Example #1
0
    def forward(self, architecture: Architecture, real_features: Tensor, fake_features: Tensor,
                **additional_inputs: Tensor) -> Tensor:
        # real loss
        real_predictions = architecture.discriminator(real_features, **additional_inputs)
        real_loss = - critic_loss_function(real_predictions)

        # fake loss
        fake_predictions = architecture.discriminator(fake_features, **additional_inputs)
        fake_loss = critic_loss_function(fake_predictions)

        # total loss
        return real_loss + fake_loss
Example #2
0
 def forward(self, architecture: Architecture, fake_features: Tensor,
             **additional_inputs: Tensor) -> Tensor:
     fake_predictions = architecture.discriminator(fake_features,
                                                   **additional_inputs)
     positive_labels = generate_positive_labels(len(fake_predictions),
                                                self.smooth_positive_labels)
     return self.bce_loss(fake_predictions, positive_labels)
Example #3
0
    def train_generator_step(self, configuration: Configuration, metadata: Metadata,
                             architecture: Architecture) -> float:
        # clean previous gradients
        architecture.generator_optimizer.zero_grad()

        # conditional
        if "conditional" in architecture.arguments:
            # for now uniform distribution is used but could be controlled in a different way
            # also this works for both binary and categorical dependent variables
            number_of_conditions = metadata.get_dependent_variable().get_size()
            condition = to_gpu_if_available(FloatTensor(configuration.batch_size).uniform_(0, number_of_conditions))
        # non-conditional
        else:
            condition = None

        # generate a full batch of fake features
        fake_features = self.sample_fake(architecture, configuration.batch_size, condition=condition)

        # calculate loss
        loss = architecture.generator_loss(architecture, fake_features, condition=condition)

        # calculate gradients
        loss.backward()

        # update the generator weights
        architecture.generator_optimizer.step()

        # return the loss
        return to_cpu_if_was_in_gpu(loss).item()
Example #4
0
    def forward(self, architecture: Architecture, real_features: Tensor, fake_features: Tensor,
                **additional_inputs: Tensor) -> Tensor:
        loss = super(WGANCriticLossWithGradientPenalty, self).forward(
            architecture, real_features, fake_features, **additional_inputs)

        # calculate gradient penalty
        alpha = rand(len(real_features), 1)
        alpha = alpha.expand(real_features.size())
        alpha = to_gpu_if_available(alpha)

        interpolates = alpha * real_features + ((1 - alpha) * fake_features)
        interpolates.requires_grad_()

        # we do not interpolate the conditions because they are the same for fake and real features
        discriminator_interpolates = architecture.discriminator(interpolates, **additional_inputs)

        gradients = grad(outputs=discriminator_interpolates,
                         inputs=interpolates,
                         grad_outputs=to_gpu_if_available(ones_like(discriminator_interpolates)),
                         create_graph=True,
                         retain_graph=True,
                         only_inputs=True)[0]

        gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * self.weight

        # return total loss
        return loss + gradient_penalty
Example #5
0
def compute_parameter_size(architecture: Architecture) -> int:
    size = 0
    for component in architecture.values():
        if isinstance(component, Module):  # skip optimizers
            for parameter in component.parameters():
                if parameter.requires_grad:
                    size += parameter.numel()
    return size
Example #6
0
    def forward(self, architecture: Architecture, real_features: Tensor,
                fake_features: Tensor, **additional_inputs: Tensor) -> Tensor:
        # real loss
        real_predictions = architecture.discriminator(real_features,
                                                      **additional_inputs)
        positive_labels = generate_positive_labels(len(real_predictions),
                                                   self.smooth_positive_labels)
        real_loss = self.bce_loss(real_predictions, positive_labels)

        # fake loss
        fake_predictions = architecture.discriminator(fake_features,
                                                      **additional_inputs)
        negative_labels = to_gpu_if_available(zeros(len(fake_predictions)))
        fake_loss = self.bce_loss(fake_predictions, negative_labels)

        # total loss
        return real_loss + fake_loss
Example #7
0
 def generate_sample(self, configuration: Configuration, metadata: Metadata,
                     architecture: Architecture,
                     **additional_inputs: Tensor) -> Tensor:
     noise = to_gpu_if_available(
         FloatTensor(configuration.batch_size,
                     architecture.arguments.noise_size).normal_())
     architecture.autoencoder.eval()
     architecture.generator.eval()
     code = architecture.generator(noise, **additional_inputs)
     return architecture.autoencoder.decode(code, **additional_inputs)
Example #8
0
    def train_generator_step(configuration: Configuration, metadata: Metadata,
                             architecture: Architecture,
                             batch: Batch) -> float:
        # clean previous gradients
        architecture.generator_optimizer.zero_grad()

        # generate a batch of fake features with the same size as the real feature batch
        generated = architecture.generator(batch["features"],
                                           missing_mask=batch["missing_mask"])
        # replace the missing features by the generated
        imputed = compose_with_mask(
            mask=batch["missing_mask"],
            differentiable=True,  # now there are no NaNs and this should be used
            where_one=generated,
            where_zero=batch["raw_features"])
        # generate hint
        hint = generate_hint(batch["missing_mask"],
                             configuration.hint_probability, metadata)

        # calculate loss
        loss = architecture.generator_loss(architecture=architecture,
                                           features=batch["raw_features"],
                                           generated=generated,
                                           imputed=imputed,
                                           hint=hint,
                                           non_missing_mask=inverse_mask(
                                               batch["missing_mask"]))

        # calculate gradients
        loss.backward()

        # update the generator weights
        architecture.generator_optimizer.step()

        # return the loss
        return to_cpu_if_was_in_gpu(loss).item()
Example #9
0
    def forward(self, architecture: Architecture, features: Tensor,
                generated: Tensor, imputed: Tensor, hint: Tensor,
                non_missing_mask: Tensor) -> Tensor:
        # the discriminator should predict the missing mask
        # which means that it detects which positions where imputed and which ones were real
        predictions = architecture.discriminator(imputed, missing_mask=hint)
        # but the generator wants to fool the discriminator
        # so we optimize for the inverse mask
        adversarial_loss = self.bce_loss(predictions, non_missing_mask)

        # reconstruction of the non-missing values
        reconstruction_loss = self.reconstruction_loss(generated, features,
                                                       non_missing_mask)

        # return the complete loss
        return adversarial_loss + self.reconstruction_loss_weight * reconstruction_loss
Example #10
0
    def train_discriminator_step(self, configuration: Configuration, metadata: Metadata, architecture: Architecture,
                                 batch: Batch) -> float:
        # clean previous gradients
        architecture.discriminator_optimizer.zero_grad()

        # generate a batch of fake features with the same size as the real feature batch
        fake_features = self.sample_fake(architecture, len(batch["features"]), condition=batch.get("labels"))
        fake_features = fake_features.detach()  # do not propagate to the generator

        # calculate loss
        loss = architecture.discriminator_loss(architecture,
                                               batch["features"],
                                               fake_features,
                                               condition=batch.get("labels"))

        # calculate gradients
        loss.backward()

        # update the discriminator weights
        architecture.discriminator_optimizer.step()

        # return the loss
        return to_cpu_if_was_in_gpu(loss).item()
Example #11
0
 def extract_states(sources: Architecture) -> Checkpoint:
     targets = {}
     for name, source in sources.items():
         targets[name] = source.state_dict()
     return targets
Example #12
0
 def val_batch(architecture: Architecture, batch: Batch,
               post_processing: PostProcessing) -> float:
     generated = architecture.generator(batch["features"],
                                        missing_mask=batch["missing_mask"])
     loss = architecture.val_loss(post_processing, generated, batch)
     return to_cpu_if_was_in_gpu(loss).item()
Example #13
0
 def load_states(sources: Checkpoint, targets: Architecture) -> None:
     for name, target in targets.items():
         target.load_state_dict(sources[name])
Example #14
0
    def impute(self, configuration: Configuration, metadata: Metadata,
               architecture: Architecture, batch: Dict[str, Tensor]) -> Tensor:
        # loss function
        loss_function = create_component(architecture, metadata,
                                         configuration.reconstruction_loss)
        masked_loss_function = MaskedReconstructionLoss(loss_function)
        batch_size = batch["features"].shape[0] * batch["features"].shape[1]
        # we need the non missing mask for the loss
        non_missing_mask = inverse_mask(batch["missing_mask"])

        # initial noise
        noise = to_gpu_if_available(
            FloatTensor(len(batch["features"]),
                        architecture.arguments.noise_size).normal_())
        noise.requires_grad_()

        # it is not the generator what we are updating
        # it is the noise
        optimizer = Adam([noise],
                         weight_decay=0,
                         lr=configuration.noise_learning_rate)
        architecture.generator.eval()

        # logger
        log_path = create_parent_directories_if_needed(configuration.logs)
        logger = TrainLogger(self.logger, log_path, False)

        # initial generation
        logger.start_timer()
        generated = architecture.generator(noise,
                                           condition=batch.get("labels"))

        # iterate until we reach the maximum number of iterations or until the non missing loss is too small
        max_iterations = configuration.max_iterations
        for iteration in range(1, max_iterations + 1):
            # compute the loss on the non-missing values
            non_missing_loss = masked_loss_function(generated,
                                                    batch["features"],
                                                    non_missing_mask)
            logger.log(iteration, max_iterations, "non_missing_loss",
                       to_cpu_if_was_in_gpu(non_missing_loss).item())

            # this loss only makes sense if the ground truth is present
            # only used for debugging
            if configuration.get("log_missing_loss", False):
                # this part should not affect the gradient calculation
                with torch.no_grad():
                    missing_loss = masked_loss_function(
                        generated, batch["raw_features"],
                        batch["missing_mask"])
                    logger.log(iteration, max_iterations, "missing_loss",
                               to_cpu_if_was_in_gpu(missing_loss).item())

                    loss = loss_function(generated,
                                         batch["raw_features"]) / batch_size
                    logger.log(iteration, max_iterations, "loss",
                               to_cpu_if_was_in_gpu(loss).item())

            # if the generation is good enough we stop
            if to_cpu_if_was_in_gpu(non_missing_loss).item(
            ) < configuration.get("tolerance", 1e-5):
                break

            # clear previous gradients
            optimizer.zero_grad()
            # compute the gradients
            non_missing_loss.backward()
            # update the noise
            optimizer.step()

            # generate next
            logger.start_timer()
            generated = architecture.generator(noise,
                                               condition=batch.get("labels"))

        return generated
Example #15
0
 def forward(self, architecture: Architecture, imputed: Tensor,
             hint: Tensor, missing_mask: Tensor) -> Tensor:
     # the discriminator should predict the missing mask
     # which means that it detects which positions where imputed and which ones were real
     predictions = architecture.discriminator(imputed, missing_mask=hint)
     return self.bce_loss(predictions, missing_mask)
Example #16
0
 def impute(self, configuration: Configuration, metadata: Metadata,
            architecture: Architecture, batch: Dict[str, Tensor]) -> Tensor:
     return architecture.generator(batch["features"],
                                   missing_mask=batch["missing_mask"])
Example #17
0
 def impute(self, configuration: Configuration, metadata: Metadata,
            architecture: Architecture, batch: Dict[str, Tensor]) -> Tensor:
     return architecture.autoencoder(
         batch["features"], condition=batch.get("labels"))["reconstructed"]
Example #18
0
 def sample_fake(self, architecture: Architecture, size: int, **additional_inputs: Tensor) -> Tensor:
     # for now the noise comes from a normal distribution but could be other distribution
     noise = to_gpu_if_available(FloatTensor(size, architecture.arguments.noise_size).normal_())
     return architecture.generator(noise, **additional_inputs)
Example #19
0
 def forward(self, architecture: Architecture, fake_features: Tensor, **additional_inputs: Tensor) -> Tensor:
     fake_predictions = architecture.discriminator(fake_features, **additional_inputs)
     return - critic_loss_function(fake_predictions)
Example #20
0
def create_architecture(metadata: Metadata,
                        configuration: Configuration) -> Architecture:
    architecture = Architecture(configuration.arguments)

    # create the dependency graph
    # nodes are component names and edges are dependencies between components
    nodes = set()
    in_edges = dict()
    out_edges = dict()
    for node in configuration.components.keys():
        nodes.add(node)
        in_edges[node] = set()
        out_edges[node] = set()

    # create the dependency edges
    nodes_without_out_edges = set()
    for node, component_configuration in configuration.components.items():
        factory = factory_by_name[component_configuration.factory]
        dependencies = factory.dependencies(
            component_configuration.get("arguments", {}))
        if len(dependencies) == 0:
            nodes_without_out_edges.add(node)
        else:
            for other_node in dependencies:
                out_edges[node].add(
                    other_node)  # the node needs the other node
                in_edges[other_node].add(
                    node)  # the other node is needed by the node

    # create components until the graph is empty (topological sort)
    while len(nodes) > 0:
        # if there are no nodes without out edges there must be a loop
        if len(nodes_without_out_edges) == 0:
            raise Exception(
                "Dependencies cannot be met for components: {}.".format(
                    ", ".join(nodes)))

        # get any node without out edges
        node = nodes_without_out_edges.pop()
        assert len(out_edges[node]) == 0

        # create the component
        architecture[node] = create_component(architecture, metadata,
                                              configuration.components[node])

        # while the node has other nodes pointing at him
        while len(in_edges[node]) > 0:
            # remove any incoming edge for the node
            other_node = in_edges[node].pop()
            # remove the outgoing edge for the other node
            out_edges[other_node].remove(node)
            # if the other node has no more dependencies
            if len(out_edges[other_node]) == 0:
                nodes_without_out_edges.add(other_node)

        # remove the node
        nodes.remove(node)
        in_edges.pop(node)
        out_edges.pop(node)

    return architecture