Example #1
0
    def forward(self, inputs: Tensor, missing_mask: Tensor) -> Tensor:
        filling_values = self.means_and_modes.repeat(len(inputs), 1)

        return compose_with_mask(missing_mask,
                                 where_one=filling_values,
                                 where_zero=inputs,
                                 differentiable=self.differentiable)
Example #2
0
    def forward(self, post_processing: PostProcessing, prediction: Tensor,
                batch: Dict[str, Tensor]) -> Tensor:
        imputed = compose_with_mask(
            mask=batch["missing_mask"],
            differentiable=False,  # back propagation not needed here
            where_one=prediction,
            where_zero=batch["raw_features"])

        return self.reconstruction_loss(
            post_processing.transform(imputed),
            post_processing.transform(batch["raw_features"]))
Example #3
0
    def run(self, configuration: Configuration) -> None:
        seed_all(configuration.get("seed"))

        metadata = load_metadata(configuration.metadata)

        architecture_configuration = load_configuration(configuration.architecture)
        self.validate_architecture_configuration(architecture_configuration)
        architecture = create_architecture(metadata, architecture_configuration)
        architecture.to_gpu_if_available()

        checkpoints = Checkpoints()
        checkpoint = checkpoints.load(configuration.checkpoint)
        if "best_architecture" in checkpoint:
            checkpoints.load_states(checkpoint["best_architecture"], architecture)
        else:
            checkpoints.load_states(checkpoint["architecture"], architecture)

        # pre-processing
        imputation = create_component(architecture, metadata, configuration.imputation)

        pre_processing = PreProcessing(imputation)

        # post-processing
        if "scale_transform" in configuration:
            scale_transform = load_scale_transform(configuration.scale_transform)
        else:
            scale_transform = None

        post_processing = PostProcessing(metadata, scale_transform)

        # load the features
        features = to_gpu_if_available(torch.from_numpy(np.load(configuration.features)).float())
        missing_mask = to_gpu_if_available(torch.from_numpy(np.load(configuration.missing_mask)).float())

        # initial imputation
        batch = pre_processing.transform({"features": features, "missing_mask": missing_mask})

        # generate the model outputs
        output = self.impute(configuration, metadata, architecture, batch)

        # imputation
        output = compose_with_mask(mask=missing_mask, differentiable=False, where_one=output, where_zero=features)

        # post-process
        output = post_processing.transform(output)

        # save the imputation
        output = to_cpu_if_was_in_gpu(output)
        output = output.numpy()
        np.save(configuration.output, output)
Example #4
0
    def forward(self, inputs: Tensor) -> Tensor:
        # dropout only during training
        if self.training:
            # create a missing mask using the drop probability
            drop_mask = to_gpu_if_available(
                generate_mask_for(inputs, self.drop_probability,
                                  self.metadata))

            # put zeros where the drop mask is one and leave the inputs where the drop mask is zero
            return compose_with_mask(mask=drop_mask,
                                     where_one=torch.zeros_like(inputs),
                                     where_zero=inputs,
                                     differentiable=True)

        # don't touch the inputs during evaluation
        else:
            return inputs
Example #5
0
    def impute(self, configuration: Configuration, metadata: Metadata,
               scaled_inputs: Tensor, missing_mask: Tensor) -> Tensor:
        with open(configuration.model, "rb") as model_file:
            model = pickle.load(model_file)

        # the model need np.nan in the missing values to work
        scaled_inputs = compose_with_mask(
            missing_mask,
            where_one=torch.empty_like(scaled_inputs).fill_(np.nan),
            where_zero=scaled_inputs,
            differentiable=False)  # cannot be differentiable with nans!

        # impute with the scikit-learn model
        imputed = model.transform(scaled_inputs)

        # go back to torch (annoying)
        return torch.from_numpy(imputed).float()
Example #6
0
    def run(self, configuration: Configuration) -> None:
        inputs = torch.from_numpy(np.load(configuration.inputs))
        missing_mask = torch.from_numpy(np.load(configuration.missing_mask))

        assert inputs.shape == missing_mask.shape

        # the model need np.nan in the missing values to work
        inputs = compose_with_mask(missing_mask,
                                   where_one=torch.empty_like(inputs).fill_(np.nan),
                                   where_zero=inputs,
                                   differentiable=False)  # cannot be differentiable with nans!

        # create the model
        model = IterativeImputer(random_state=configuration.get("seed", 0),
                                 estimator=ExtraTreeRegressor(),
                                 missing_values=np.nan)

        # go back to torch (annoying)
        model.fit(inputs.numpy())

        # save the model
        with open(create_parent_directories_if_needed(configuration.outputs), "wb") as model_file:
            pickle.dump(model, model_file)
Example #7
0
    def train_generator_step(configuration: Configuration, metadata: Metadata,
                             architecture: Architecture,
                             batch: Batch) -> float:
        # clean previous gradients
        architecture.generator_optimizer.zero_grad()

        # generate a batch of fake features with the same size as the real feature batch
        generated = architecture.generator(batch["features"],
                                           missing_mask=batch["missing_mask"])
        # replace the missing features by the generated
        imputed = compose_with_mask(
            mask=batch["missing_mask"],
            differentiable=True,  # now there are no NaNs and this should be used
            where_one=generated,
            where_zero=batch["raw_features"])
        # generate hint
        hint = generate_hint(batch["missing_mask"],
                             configuration.hint_probability, metadata)

        # calculate loss
        loss = architecture.generator_loss(architecture=architecture,
                                           features=batch["raw_features"],
                                           generated=generated,
                                           imputed=imputed,
                                           hint=hint,
                                           non_missing_mask=inverse_mask(
                                               batch["missing_mask"]))

        # calculate gradients
        loss.backward()

        # update the generator weights
        architecture.generator_optimizer.step()

        # return the loss
        return to_cpu_if_was_in_gpu(loss).item()
Example #8
0
 def forward(self, inputs: Tensor, missing_mask: Tensor) -> Tensor:
     return compose_with_mask(missing_mask,
                              where_one=torch.zeros_like(inputs),
                              where_zero=inputs,
                              differentiable=self.differentiable)
Example #9
0
 def forward(self, inputs: Tensor, missing_mask: Tensor) -> Tensor:
     return compose_with_mask(missing_mask,
                              where_one=torch.empty_like(inputs).normal_(
                                  self.noise_mean, self.noise_std),
                              where_zero=inputs,
                              differentiable=self.differentiable)