def forward(self, outputs: Dict[str, Tensor], batch: Dict[str, Tensor]) -> Tensor: if self.masked: return self.reconstruction_loss( outputs["reconstructed"], batch["features"], inverse_mask(batch["missing_mask"])) else: return self.reconstruction_loss(outputs["reconstructed"], batch["features"])
def forward(self, outputs: Dict[str, Tensor], batch: Dict[str, Tensor]) -> Tensor: if self.masked: reconstruction_loss = self.reconstruction_loss( outputs["reconstructed"], batch["features"], inverse_mask(batch["missing_mask"])) else: reconstruction_loss = self.reconstruction_loss( outputs["reconstructed"], batch["features"]) kld_loss = -0.5 * torch.sum(1 + outputs["log_var"] - outputs["mu"].pow(2) - outputs["log_var"].exp()) return reconstruction_loss + kld_loss
def run(self, configuration: Configuration) -> None: metadata = load_metadata(configuration.metadata) inputs = torch.from_numpy(np.load(configuration.inputs)) missing_mask = torch.from_numpy(np.load(configuration.missing_mask)) non_missing_mask = inverse_mask(missing_mask) assert inputs.shape == missing_mask.shape filling_values = torch.zeros(metadata.get_num_features(), dtype=inputs.dtype) for variable_metadata in metadata.get_by_independent_variable(): index = variable_metadata.get_feature_index() size = variable_metadata.get_size() # binary if variable_metadata.is_binary(): # count how many ones in the variable where the non missing mask is one one_count = inputs[non_missing_mask[:, index] == 1, index].sum() # count how many ones non missing values the variable has and subtract the ones zero_count = non_missing_mask[:, index].sum() - one_count # fill with a one if there are more ones than zeros # if not fill with a zero filling_value = (1 if one_count >= zero_count else 0) # categorical elif variable_metadata.is_categorical(): # how many ones per column (per categorical variable value) column_count = torch.zeros(size) for offset in range(size): column_count[offset] = inputs[non_missing_mask[:, index + offset] == 1, index + offset].sum() # get the most common filling_value = one_hot(column_count.argmax(), num_classes=size) # numerical else: # take the mean of the values where the non missing mask is one filling_value = inputs[non_missing_mask[:, index] == 1, index].mean() # fill the variable filling_values[index:index + size] = filling_value # save the filling values np.save(configuration.outputs, filling_values.numpy())
def train_generator_step(configuration: Configuration, metadata: Metadata, architecture: Architecture, batch: Batch) -> float: # clean previous gradients architecture.generator_optimizer.zero_grad() # generate a batch of fake features with the same size as the real feature batch generated = architecture.generator(batch["features"], missing_mask=batch["missing_mask"]) # replace the missing features by the generated imputed = compose_with_mask( mask=batch["missing_mask"], differentiable=True, # now there are no NaNs and this should be used where_one=generated, where_zero=batch["raw_features"]) # generate hint hint = generate_hint(batch["missing_mask"], configuration.hint_probability, metadata) # calculate loss loss = architecture.generator_loss(architecture=architecture, features=batch["raw_features"], generated=generated, imputed=imputed, hint=hint, non_missing_mask=inverse_mask( batch["missing_mask"])) # calculate gradients loss.backward() # update the generator weights architecture.generator_optimizer.step() # return the loss return to_cpu_if_was_in_gpu(loss).item()
def impute(self, configuration: Configuration, metadata: Metadata, architecture: Architecture, batch: Dict[str, Tensor]) -> Tensor: # loss function loss_function = create_component(architecture, metadata, configuration.reconstruction_loss) masked_loss_function = MaskedReconstructionLoss(loss_function) batch_size = batch["features"].shape[0] * batch["features"].shape[1] # we need the non missing mask for the loss non_missing_mask = inverse_mask(batch["missing_mask"]) # initial noise noise = to_gpu_if_available( FloatTensor(len(batch["features"]), architecture.arguments.noise_size).normal_()) noise.requires_grad_() # it is not the generator what we are updating # it is the noise optimizer = Adam([noise], weight_decay=0, lr=configuration.noise_learning_rate) architecture.generator.eval() # logger log_path = create_parent_directories_if_needed(configuration.logs) logger = TrainLogger(self.logger, log_path, False) # initial generation logger.start_timer() generated = architecture.generator(noise, condition=batch.get("labels")) # iterate until we reach the maximum number of iterations or until the non missing loss is too small max_iterations = configuration.max_iterations for iteration in range(1, max_iterations + 1): # compute the loss on the non-missing values non_missing_loss = masked_loss_function(generated, batch["features"], non_missing_mask) logger.log(iteration, max_iterations, "non_missing_loss", to_cpu_if_was_in_gpu(non_missing_loss).item()) # this loss only makes sense if the ground truth is present # only used for debugging if configuration.get("log_missing_loss", False): # this part should not affect the gradient calculation with torch.no_grad(): missing_loss = masked_loss_function( generated, batch["raw_features"], batch["missing_mask"]) logger.log(iteration, max_iterations, "missing_loss", to_cpu_if_was_in_gpu(missing_loss).item()) loss = loss_function(generated, batch["raw_features"]) / batch_size logger.log(iteration, max_iterations, "loss", to_cpu_if_was_in_gpu(loss).item()) # if the generation is good enough we stop if to_cpu_if_was_in_gpu(non_missing_loss).item( ) < configuration.get("tolerance", 1e-5): break # clear previous gradients optimizer.zero_grad() # compute the gradients non_missing_loss.backward() # update the noise optimizer.step() # generate next logger.start_timer() generated = architecture.generator(noise, condition=batch.get("labels")) return generated