def forward(self, architecture: Architecture, real_features: Tensor, fake_features: Tensor, **additional_inputs: Tensor) -> Tensor: loss = super(WGANCriticLossWithGradientPenalty, self).forward( architecture, real_features, fake_features, **additional_inputs) # calculate gradient penalty alpha = rand(len(real_features), 1) alpha = alpha.expand(real_features.size()) alpha = to_gpu_if_available(alpha) interpolates = alpha * real_features + ((1 - alpha) * fake_features) interpolates.requires_grad_() # we do not interpolate the conditions because they are the same for fake and real features discriminator_interpolates = architecture.discriminator(interpolates, **additional_inputs) gradients = grad(outputs=discriminator_interpolates, inputs=interpolates, grad_outputs=to_gpu_if_available(ones_like(discriminator_interpolates)), create_graph=True, retain_graph=True, only_inputs=True)[0] gradient_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * self.weight # return total loss return loss + gradient_penalty
def run(self, configuration: Configuration) -> None: seed_all(configuration.get("seed")) metadata = load_metadata(configuration.metadata) architecture_configuration = load_configuration(configuration.architecture) self.validate_architecture_configuration(architecture_configuration) architecture = create_architecture(metadata, architecture_configuration) architecture.to_gpu_if_available() checkpoints = Checkpoints() checkpoint = checkpoints.load(configuration.checkpoint) if "best_architecture" in checkpoint: checkpoints.load_states(checkpoint["best_architecture"], architecture) else: checkpoints.load_states(checkpoint["architecture"], architecture) # pre-processing imputation = create_component(architecture, metadata, configuration.imputation) pre_processing = PreProcessing(imputation) # post-processing if "scale_transform" in configuration: scale_transform = load_scale_transform(configuration.scale_transform) else: scale_transform = None post_processing = PostProcessing(metadata, scale_transform) # load the features features = to_gpu_if_available(torch.from_numpy(np.load(configuration.features)).float()) missing_mask = to_gpu_if_available(torch.from_numpy(np.load(configuration.missing_mask)).float()) # initial imputation batch = pre_processing.transform({"features": features, "missing_mask": missing_mask}) # generate the model outputs output = self.impute(configuration, metadata, architecture, batch) # imputation output = compose_with_mask(mask=missing_mask, differentiable=False, where_one=output, where_zero=features) # post-process output = post_processing.transform(output) # save the imputation output = to_cpu_if_was_in_gpu(output) output = output.numpy() np.save(configuration.output, output)
def train_generator_step(self, configuration: Configuration, metadata: Metadata, architecture: Architecture) -> float: # clean previous gradients architecture.generator_optimizer.zero_grad() # conditional if "conditional" in architecture.arguments: # for now uniform distribution is used but could be controlled in a different way # also this works for both binary and categorical dependent variables number_of_conditions = metadata.get_dependent_variable().get_size() condition = to_gpu_if_available(FloatTensor(configuration.batch_size).uniform_(0, number_of_conditions)) # non-conditional else: condition = None # generate a full batch of fake features fake_features = self.sample_fake(architecture, configuration.batch_size, condition=condition) # calculate loss loss = architecture.generator_loss(architecture, fake_features, condition=condition) # calculate gradients loss.backward() # update the generator weights architecture.generator_optimizer.step() # return the loss return to_cpu_if_was_in_gpu(loss).item()
def generate_sample(self, configuration: Configuration, metadata: Metadata, architecture: Architecture, **additional_inputs: Tensor) -> Tensor: code = to_gpu_if_available( FloatTensor(configuration.batch_size, architecture.arguments.code_size).normal_()) architecture.autoencoder.eval() return architecture.autoencoder.decode(code, **additional_inputs)
def generate_hint(missing_mask: Tensor, hint_probability: float, metadata: Metadata) -> Tensor: # the GAIN paper goes on and on about using a more complex hint mechanism # but then in the online code example they use this technique # see: https://github.com/jsyoon0823/GAIN/issues/2 # create a mask with "hint probability" of having ones hint_mask = to_gpu_if_available( generate_mask_for(missing_mask, hint_probability, metadata)) # leave the mask untouched where there are hints (hint_mask=1) # but put zeros where there are no hints (hint_mask=0) return missing_mask * hint_mask
def run(self, configuration: Configuration) -> None: seed_all(configuration.get("seed")) metadata = load_metadata(configuration.metadata) architecture_configuration = load_configuration( configuration.architecture) self.validate_architecture_configuration(architecture_configuration) architecture = create_architecture(metadata, architecture_configuration) architecture.to_gpu_if_available() checkpoints = Checkpoints() checkpoint = checkpoints.load(configuration.checkpoint) if "best_architecture" in checkpoint: checkpoints.load_states(checkpoint["best_architecture"], architecture) else: checkpoints.load_states(checkpoint["architecture"], architecture) # load the features features = to_gpu_if_available( torch.from_numpy(np.load(configuration.features)).float()) # conditional if "labels" in configuration: condition = to_gpu_if_available( torch.from_numpy(np.load(configuration.labels)).float()) else: condition = None # encode with torch.no_grad(): code = architecture.autoencoder.encode(features, condition=condition)["code"] # save the code code = to_cpu_if_was_in_gpu(code) code = code.numpy() np.save(configuration.output, code)
def forward(self, inputs: Tensor) -> Tensor: # dropout only during training if self.training: # create a missing mask using the drop probability drop_mask = to_gpu_if_available( generate_mask_for(inputs, self.drop_probability, self.metadata)) # put zeros where the drop mask is one and leave the inputs where the drop mask is zero return compose_with_mask(mask=drop_mask, where_one=torch.zeros_like(inputs), where_zero=inputs, differentiable=True) # don't touch the inputs during evaluation else: return inputs
def forward(self, architecture: Architecture, real_features: Tensor, fake_features: Tensor, **additional_inputs: Tensor) -> Tensor: # real loss real_predictions = architecture.discriminator(real_features, **additional_inputs) positive_labels = generate_positive_labels(len(real_predictions), self.smooth_positive_labels) real_loss = self.bce_loss(real_predictions, positive_labels) # fake loss fake_predictions = architecture.discriminator(fake_features, **additional_inputs) negative_labels = to_gpu_if_available(zeros(len(fake_predictions))) fake_loss = self.bce_loss(fake_predictions, negative_labels) # total loss return real_loss + fake_loss
def generate_sample(self, sampler: Sampler, configuration: Configuration, metadata: Metadata) -> Tensor: condition = to_gpu_if_available( torch.ones(configuration.batch_size, dtype=torch.float) * self.condition) return sampler.generate_sample(condition=condition)
def sample_fake(self, architecture: Architecture, size: int, **additional_inputs: Tensor) -> Tensor: # for now the noise comes from a normal distribution but could be other distribution noise = to_gpu_if_available(FloatTensor(size, architecture.arguments.noise_size).normal_()) return architecture.generator(noise, **additional_inputs)
def impute(self, configuration: Configuration, metadata: Metadata, architecture: Architecture, batch: Dict[str, Tensor]) -> Tensor: # loss function loss_function = create_component(architecture, metadata, configuration.reconstruction_loss) masked_loss_function = MaskedReconstructionLoss(loss_function) batch_size = batch["features"].shape[0] * batch["features"].shape[1] # we need the non missing mask for the loss non_missing_mask = inverse_mask(batch["missing_mask"]) # initial noise noise = to_gpu_if_available( FloatTensor(len(batch["features"]), architecture.arguments.noise_size).normal_()) noise.requires_grad_() # it is not the generator what we are updating # it is the noise optimizer = Adam([noise], weight_decay=0, lr=configuration.noise_learning_rate) architecture.generator.eval() # logger log_path = create_parent_directories_if_needed(configuration.logs) logger = TrainLogger(self.logger, log_path, False) # initial generation logger.start_timer() generated = architecture.generator(noise, condition=batch.get("labels")) # iterate until we reach the maximum number of iterations or until the non missing loss is too small max_iterations = configuration.max_iterations for iteration in range(1, max_iterations + 1): # compute the loss on the non-missing values non_missing_loss = masked_loss_function(generated, batch["features"], non_missing_mask) logger.log(iteration, max_iterations, "non_missing_loss", to_cpu_if_was_in_gpu(non_missing_loss).item()) # this loss only makes sense if the ground truth is present # only used for debugging if configuration.get("log_missing_loss", False): # this part should not affect the gradient calculation with torch.no_grad(): missing_loss = masked_loss_function( generated, batch["raw_features"], batch["missing_mask"]) logger.log(iteration, max_iterations, "missing_loss", to_cpu_if_was_in_gpu(missing_loss).item()) loss = loss_function(generated, batch["raw_features"]) / batch_size logger.log(iteration, max_iterations, "loss", to_cpu_if_was_in_gpu(loss).item()) # if the generation is good enough we stop if to_cpu_if_was_in_gpu(non_missing_loss).item( ) < configuration.get("tolerance", 1e-5): break # clear previous gradients optimizer.zero_grad() # compute the gradients non_missing_loss.backward() # update the noise optimizer.step() # generate next logger.start_timer() generated = architecture.generator(noise, condition=batch.get("labels")) return generated
def create(self, architecture: Architecture, metadata: Metadata, arguments: Configuration) -> Any: return MeanAndModesImputationLayer( to_gpu_if_available( torch.from_numpy(np.load(arguments.path)).float()), **arguments.get_all_defined(["differentiable"]))
def run(self, configuration: Configuration) -> None: seed_all(configuration.get("seed")) datasets = Datasets() for dataset_name, dataset_path in configuration.data.items(): datasets[dataset_name] = to_gpu_if_available(torch.from_numpy(np.load(dataset_path)).float()) metadata = load_metadata(configuration.metadata) architecture_configuration = load_configuration(configuration.architecture) self.validate_architecture_configuration(architecture_configuration) architecture = create_architecture(metadata, architecture_configuration) architecture.to_gpu_if_available() create_parent_directories_if_needed(configuration.checkpoints.output) checkpoints = Checkpoints() # no input checkpoint by default checkpoint = None # continue from an output checkpoint (has priority over input checkpoint) if configuration.checkpoints.get("continue_from_output", default=False) \ and checkpoints.exists(configuration.checkpoints.output): checkpoint = checkpoints.load(configuration.checkpoints.output) # continue from an input checkpoint elif "input" in configuration.checkpoints: checkpoint = checkpoints.load(configuration.checkpoints.input) if configuration.checkpoints.get("ignore_input_epochs", default=False): checkpoint["epoch"] = 0 if configuration.checkpoints.get("use_best_input", default=False): checkpoint["architecture"] = checkpoint.pop("best_architecture") checkpoint.pop("best_epoch") checkpoint.pop("best_metric") # if there is no starting checkpoint then initialize if checkpoint is None: architecture.initialize() checkpoint = { "architecture": checkpoints.extract_states(architecture), "epoch": 0 } # if there is a starting checkpoint then load it else: checkpoints.load_states(checkpoint["architecture"], architecture) log_path = create_parent_directories_if_needed(configuration.logs) logger = TrainLogger(self.logger, log_path, checkpoint["epoch"] > 0) # pre-processing if "imputation" in configuration: imputation = create_component(architecture, metadata, configuration.imputation) else: imputation = None pre_processing = PreProcessing(imputation) # post-processing if "scale_transform" in configuration: scale_transform = load_scale_transform(configuration.scale_transform) else: scale_transform = None post_processing = PostProcessing(metadata, scale_transform) for epoch in range(checkpoint["epoch"] + 1, configuration.epochs + 1): # train discriminator and generator logger.start_timer() metrics = self.train_epoch(configuration, metadata, architecture, datasets, pre_processing, post_processing) for metric_name, metric_value in metrics.items(): logger.log(epoch, configuration.epochs, metric_name, metric_value) # update the checkpoint checkpoint["architecture"] = checkpoints.extract_states(architecture) checkpoint["epoch"] = epoch # if the best architecture parameters should be kept if "keep_checkpoint_by_metric" in configuration: # get the metric used to compare checkpoints checkpoint_metric = metrics[configuration.keep_checkpoint_by_metric] # check if this is the best checkpoint (or the first) if "best_metric" not in checkpoint or checkpoint_metric < checkpoint["best_metric"]: checkpoint["best_architecture"] = checkpoint["architecture"] checkpoint["best_epoch"] = epoch checkpoint["best_metric"] = checkpoint_metric # save checkpoint checkpoints.delayed_save(checkpoint, configuration.checkpoints.output, configuration.checkpoints.max_delay) # force save of last checkpoint checkpoints.save(checkpoint, configuration.checkpoints.output) # finish logger.close()
def generate_positive_labels(size: int, smooth: bool): if smooth: return to_gpu_if_available(FloatTensor(size).uniform_(0.9, 1)) else: return to_gpu_if_available(ones(size))
def to_gpu_if_available(self) -> None: for name, component in self.items(): if isinstance(component, Module): # skip optimizers self[name] = to_gpu_if_available(component)