def create_test_performance_estimators(self):
     performance_estimators = PerformanceList()
     performance_estimators += [FloatHelper("reconstruction_loss")]
     performance_estimators += [LossHelper("test_loss")]
     performance_estimators += [AccuracyHelper("test_")]
     performance_estimators += [FloatHelper("weight")]
     self.test_performance_estimators = performance_estimators
     return performance_estimators
 def create_training_performance_estimators(self):
     performance_estimators = PerformanceList()
     performance_estimators += [FloatHelper("train_critic_loss")]
     performance_estimators += [FloatHelper("train_encoder_loss")]
     performance_estimators += [FloatHelper("train_accuracy")]
     performance_estimators += [FloatHelper("train_encoded_accuracy")]
     performance_estimators += [FloatHelper("ratio")]
     self.training_performance_estimators = performance_estimators
     return performance_estimators
 def create_training_performance_estimators(self):
     performance_estimators = PerformanceList()
     performance_estimators += [FloatHelper("reconstruction_loss")]
     performance_estimators += [FloatHelper("discriminator_loss")]
     performance_estimators += [FloatHelper("generator_loss")]
     performance_estimators += [FloatHelper("semisup_loss")]
     performance_estimators += [FloatHelper("weight")]
     self.training_performance_estimators = performance_estimators
     return performance_estimators
예제 #4
0
    def train_autoencoder(self, epoch, performance_estimators=None):

        if performance_estimators is None:
            performance_estimators = PerformanceList()
            performance_estimators += [LossHelper("train_loss")]
            performance_estimators += [FloatHelper("train_grad_norm")]
            print('\nTraining, epoch: %d' % epoch)

        self.net.train()
        supervised_grad_norm = 1.
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()

        unsupervised_loss_acc = 0
        num_batches = 0
        train_loader_subset = self.problem.train_loader_subset_range(
            0, self.args.num_training)

        for batch_idx, (_, data_dict) in enumerate(train_loader_subset):
            inputs = data_dict["input"].to(self.device)
            num_batches += 1

            inputs, targets = Variable(inputs), Variable(inputs,
                                                         requires_grad=False)
            # outputs used to calculate the loss of the supervised model
            # must be done with the model prior to regularization:
            self.net.train()
            self.optimizer_training.zero_grad()
            outputs = self.net(inputs)

            supervised_loss = self.criterion(outputs, targets)
            optimized_loss = supervised_loss
            optimized_loss.backward()
            self.optimizer_training.step()
            performance_estimators.set_metric_with_outputs(
                batch_idx, "train_loss", supervised_loss.item(), outputs,
                targets)

            supervised_grad_norm = grad_norm(self.net.parameters())
            performance_estimators.set_metric(batch_idx, "train_grad_norm",
                                              supervised_grad_norm)

            performance_estimators.set_metric_with_outputs(
                batch_idx, "optimized_loss", optimized_loss.item(), outputs,
                targets)

            progress_bar(
                batch_idx * self.mini_batch_size, self.max_training_examples,
                " ".join([
                    performance_estimator.progress_message()
                    for performance_estimator in performance_estimators
                ]))

            if (batch_idx +
                    1) * self.mini_batch_size > self.max_training_examples:
                break

        return performance_estimators
예제 #5
0
    def train_supervised(self, epoch):
        performance_estimators = PerformanceList()
        performance_estimators += [FloatHelper("supervised_loss")]
        performance_estimators += [AccuracyHelper("train_")]
        if self.use_cuda:
            self.tensor_cache.cuda()
        print('\nTraining, epoch: %d' % epoch)

        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()

        unsupervised_loss_acc = 0
        num_batches = 0
        train_loader_subset = self.problem.train_loader_subset_range(
            0, self.args.num_training)
        data_provider = MultiThreadedCpuGpuDataProvider(
            iterator=zip(train_loader_subset),
            is_cuda=self.use_cuda,
            batch_names=["training"],
            requires_grad={"training": ["sbi"]},
            volatile={"training": ["metaData"]},
            recode_functions={
                "softmaxGenotype":
                lambda x: recode_for_label_smoothing(x, self.epsilon),
            })
        cudnn.benchmark = False
        try:

            for batch_idx, (_, data_dict) in enumerate(data_provider):
                sbi = data_dict["training"]["sbi"]
                target_s = data_dict["training"]["softmaxGenotype"]
                metadata = data_dict["training"]["metaData"]

                self.train_one_batch(performance_estimators, batch_idx, sbi,
                                     target_s, metadata)
                if (batch_idx +
                        1) * self.mini_batch_size > self.max_training_examples:
                    break
        finally:
            data_provider.close()

        return performance_estimators
 def create_training_performance_estimators(self):
     performance_estimators = PerformanceList()
     performance_estimators += [FloatHelper("supervised_loss")]
     performance_estimators += [AccuracyHelper("train_")]
     self.training_performance_estimators = performance_estimators
     return performance_estimators
    def supervised_somatic(self, epoch, performance_estimators=None):

        if performance_estimators is None:
            performance_estimators = PerformanceList()
            performance_estimators += [LossHelper("train_loss")]
            performance_estimators += [LossHelper("classification_loss")]
            performance_estimators += [LossHelper("frequency_loss")]
            performance_estimators += [FloatHelper("train_grad_norm")]
            print('\nTraining, epoch: %d' % epoch)

        self.net.train()
        supervised_grad_norm = 1.
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()

        unsupervised_loss_acc = 0
        num_batches = 0
        train_loader_subset = self.problem.train_loader_subset_range(
            0, self.args.num_training)
        cross_entropy_loss = CrossEntropyLoss()
        mse_loss = MSELoss()
        self.net.train()

        for batch_idx, (_, data_dict) in enumerate(train_loader_subset):
            inputs = data_dict["input"].to(self.device)
            is_mutated_base_target = data_dict["isBaseMutated"].to(self.device)
            # transform one-hot encoding into a class index:
            max, indices = is_mutated_base_target.max(dim=1)
            is_mutated_base_target = indices
            somatic_frequency_target = data_dict["somaticFrequency"].to(
                self.device)
            num_batches += 1

            # outputs used to calculate the loss of the supervised model
            # must be done with the model prior to regularization:

            self.optimizer_training.zero_grad()
            output_mut, output_frequency = self.net(inputs)

            classification_loss = cross_entropy_loss(output_mut,
                                                     is_mutated_base_target)
            frequency_loss = mse_loss(output_frequency,
                                      somatic_frequency_target)
            optimized_loss = classification_loss + frequency_loss

            optimized_loss.backward()
            self.optimizer_training.step()
            performance_estimators.set_metric(batch_idx, "train_loss",
                                              optimized_loss.item())
            performance_estimators.set_metric(batch_idx, "classification_loss",
                                              classification_loss.item())
            performance_estimators.set_metric(batch_idx, "frequency_loss",
                                              frequency_loss.item())

            supervised_grad_norm = grad_norm(self.net.parameters())
            performance_estimators.set_metric(batch_idx, "train_grad_norm",
                                              supervised_grad_norm)

            progress_bar(
                batch_idx * self.mini_batch_size, self.max_training_examples,
                performance_estimators.progress_message(
                    ["classification_loss", "frequency_loss"]))

            if (batch_idx +
                    1) * self.mini_batch_size > self.max_training_examples:
                break

        return performance_estimators
예제 #8
0
    def train_semisup(self, epoch):
        performance_estimators = PerformanceList()
        performance_estimators += [FloatHelper("optimized_loss")]
        performance_estimators += [FloatHelper("supervised_loss")]
        performance_estimators += [FloatHelper("reconstruction_loss")]
        performance_estimators += [AccuracyHelper("train_")]

        print('\nTraining, epoch: %d' % epoch)

        self.net.train()

        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()

        unsupervised_loss_acc = 0
        num_batches = 0
        train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training)
        unlabeled_loader = self.problem.unlabeled_loader()
        data_provider = MultiThreadedCpuGpuDataProvider(iterator=zip(train_loader_subset, unlabeled_loader),is_cuda=self.use_cuda,
                                     batch_names=["training", "unlabeled"],
                                     requires_grad={"training": ["input"], "unlabeled": ["input"]},
                                     volatile={"training": ["metaData"], "unlabeled": []},
                                     recode_functions={"softmaxGenotype": lambda x: recode_for_label_smoothing(x,self.epsilon)})
        self.net.autoencoder.train()
        try:
            for batch_idx, (_, data_dict) in enumerate(data_provider):
                input_s = data_dict["training"]["input"]
                metadata = data_dict["training"]["metaData"]
                target_s = data_dict["training"]["softmaxGenotype"]
                input_u = data_dict["unlabeled"]["input"]
                num_batches += 1

                # need a copy of input_u and input_s as output:
                target_u = Variable(input_u.data, requires_grad=False)
                target_output_s = Variable(input_s.data, requires_grad=False)
                # outputs used to calculate the loss of the supervised model
                # must be done with the model prior to regularization:

                # Zero gradients:
                self.net.zero_grad()
                self.net.autoencoder.zero_grad()
                self.optimizer_training.zero_grad()

                output_s = self.net(input_s)
                output_u = self.net.autoencoder(input_u)
                input_output_s = self.net.autoencoder(input_s)
                output_s_p = self.get_p(output_s)

                _, target_index = torch.max(target_s, dim=1)
                supervised_loss = self.criterion_classifier(output_s, target_s)
                reconstruction_loss_unsup = self.criterion_autoencoder(output_u, target_u)
                reconstruction_loss_sup = self.criterion_autoencoder(input_output_s, target_output_s)
                reconstruction_loss = self.args.gamma * reconstruction_loss_unsup+reconstruction_loss_sup
                optimized_loss = supervised_loss + reconstruction_loss
                optimized_loss.backward()
                self.optimizer_training.step()
                performance_estimators.set_metric(batch_idx, "supervised_loss", supervised_loss.data[0])
                performance_estimators.set_metric(batch_idx, "reconstruction_loss", reconstruction_loss.data[0])
                performance_estimators.set_metric(batch_idx, "optimized_loss", optimized_loss.data[0])
                performance_estimators.set_metric_with_outputs(batch_idx, "train_accuracy", supervised_loss.data[0],
                                                               output_s_p, targets=target_index)

                progress_bar(batch_idx * self.mini_batch_size,
                             self.max_training_examples,
                             performance_estimators.progress_message(["supervised_loss", "reconstruction_loss",
                                                                      "train_accuracy"]))

                if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples:
                    break
        finally:
            data_provider.close()

        return performance_estimators
예제 #9
0
    def train_semisup_aae(self, epoch, performance_estimators=None):
        if performance_estimators is None:
            performance_estimators = PerformanceList()
            performance_estimators += [FloatHelper("reconstruction_loss")]
            performance_estimators += [FloatHelper("discriminator_loss")]
            performance_estimators += [FloatHelper("generator_loss")]
            performance_estimators += [FloatHelper("supervised_loss")]
            performance_estimators += [FloatHelper("weight")]
            print('\nTraining, epoch: %d' % epoch)
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()

        self.net.train()
        supervised_grad_norm = 1.
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()

        unsupervised_loss_acc = 0
        num_batches = 0
        train_loader_subset1 = self.problem.train_loader_subset_range(
            0, self.args.num_training)
        train_loader_subset2 = self.problem.train_loader_subset_range(
            0, self.args.num_training)

        data_provider = MultiThreadedCpuGpuDataProvider(
            iterator=zip(train_loader_subset1, train_loader_subset2),
            is_cuda=self.use_cuda,
            batch_names=["training1", "training2"],
            requires_grad={
                "training1": ["input"],
                "training2": ["input"]
            },
            volatile={
                "training1": ["metaData"],
                "training2": ["metaData"]
            },
            recode_functions={
                "softmaxGenotype": recode_for_label_smoothing,
                "input": self.normalize_inputs
            })

        indel_weight = self.args.indel_weight_factor
        snp_weight = 1.0

        latent_codes = []
        try:
            for batch_idx, (_, data_dict) in enumerate(data_provider):
                input_s1 = data_dict["training1"]["input"]
                input_s2 = data_dict["training2"]["input"]
                target_s1 = data_dict["training1"]["softmaxGenotype"]
                target_s2 = data_dict["training2"]["softmaxGenotype"]

                meta_data1 = data_dict["training1"]["metaData"]
                meta_data2 = data_dict["training2"]["metaData"]
                num_batches += 1
                self.zero_grad_all_optimizers()

                # input_s=normalize_mean_std(input_s)
                # input_u=normalize_mean_std(input_u)
                # print(torch.mean(input_s,dim=0))
                # Train reconstruction phase:
                self.net.decoder.train()
                reconstruction_loss = self.net.get_crossconstruction_loss(
                    input_s1, input_s2, target_s2)
                reconstruction_loss.backward()
                for opt in [self.decoder_opt, self.encoder_reconstruction_opt]:
                    opt.step()

                # Train discriminators:
                self.net.encoder.train()
                self.net.discriminator_cat.train()
                self.net.discriminator_prior.train()
                self.zero_grad_all_optimizers()
                genotype_frequencies = self.class_frequencies[
                    "softmaxGenotype"]
                category_prior = (genotype_frequencies /
                                  torch.sum(genotype_frequencies)).numpy()
                discriminator_loss = self.net.get_discriminator_loss(
                    input_s1, category_prior=category_prior)
                discriminator_loss.backward()
                for opt in [
                        self.discriminator_cat_opt,
                        self.discriminator_prior_opt
                ]:
                    opt.step()
                self.zero_grad_all_optimizers()

                # Train generator:
                self.net.encoder.train()
                generator_loss = self.net.get_generator_loss(input_s1)
                generator_loss.backward()
                for opt in [self.encoder_generator_opt]:
                    opt.step()
                self.zero_grad_all_optimizers()

                if self.use_pdf:
                    self.net.encoder.train()
                    _, latent_code = self.net.encoder(input_s1)
                    weight = self.estimate_example_density_weight(latent_code)
                else:
                    weight = self.estimate_batch_weight(
                        meta_data1,
                        indel_weight=indel_weight,
                        snp_weight=snp_weight)
                self.net.encoder.train()
                supervised_loss = self.net.get_crossencoder_supervised_loss(
                    input_s1, target_s1) * weight
                supervised_loss.backward()

                for opt in [self.encoder_semisup_opt]:
                    opt.step()
                self.zero_grad_all_optimizers()

                performance_estimators.set_metric(batch_idx,
                                                  "reconstruction_loss",
                                                  reconstruction_loss.data[0])
                performance_estimators.set_metric(batch_idx,
                                                  "discriminator_loss",
                                                  discriminator_loss.data[0])
                performance_estimators.set_metric(batch_idx, "generator_loss",
                                                  generator_loss.data[0])
                performance_estimators.set_metric(batch_idx, "supervised_loss",
                                                  supervised_loss.data[0])
                performance_estimators.set_metric(batch_idx, "weight", weight)
                if not self.args.no_progress:
                    progress_bar(
                        batch_idx * self.mini_batch_size,
                        self.max_training_examples,
                        performance_estimators.progress_message([
                            "reconstruction_loss", "discriminator_loss",
                            "generator_loss", "semisup_loss"
                        ]))
                if ((batch_idx + 1) *
                        self.mini_batch_size) > self.max_training_examples:
                    break
        finally:
            data_provider.close()

        return performance_estimators
예제 #10
0
    def test_semisup_aae(self, epoch, performance_estimators=None):
        print('\nTesting, epoch: %d' % epoch)
        if performance_estimators is None:
            performance_estimators = PerformanceList()
            performance_estimators += [FloatHelper("reconstruction_loss")]
            performance_estimators += [LossHelper("test_loss")]
            performance_estimators += [AccuracyHelper("test_")]
            performance_estimators += [FloatHelper("weight")]

        self.net.eval()
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()
        validation_loader_subset = self.problem.validation_loader_range(
            0, self.args.num_validation)
        data_provider = MultiThreadedCpuGpuDataProvider(
            iterator=zip(validation_loader_subset),
            is_cuda=self.use_cuda,
            batch_names=["validation"],
            requires_grad={"validation": []},
            volatile={
                "validation": ["input", "softmaxGenotype"],
            },
            recode_functions={"input": self.normalize_inputs})
        self.net.eval()
        try:
            for batch_idx, (_, data_dict) in enumerate(data_provider):
                input_s = data_dict["validation"]["input"]
                target_s = data_dict["validation"]["softmaxGenotype"]

                # Estimate the reconstruction loss on validation examples:
                reconstruction_loss = self.net.get_crossconstruction_loss(
                    input_s, input_s, target_s)

                # now evaluate prediction of categories:
                categories_predicted, latent_code = self.net.encoder(input_s)
                #            categories_predicted+=self.net.latent_to_categories(latent_code)

                categories_predicted_p = self.get_p(categories_predicted)
                categories_predicted_p[
                    categories_predicted_p != categories_predicted_p] = 0.0
                _, target_index = torch.max(target_s, dim=1)
                categories_loss = self.net.semisup_loss_criterion(
                    categories_predicted, target_s)

                weight = self.estimate_example_density_weight(latent_code)
                performance_estimators.set_metric(batch_idx,
                                                  "reconstruction_loss",
                                                  reconstruction_loss.data[0])
                performance_estimators.set_metric(batch_idx, "weight", weight)
                performance_estimators.set_metric_with_outputs(
                    batch_idx, "test_accuracy", reconstruction_loss.data[0],
                    categories_predicted_p, target_index)
                performance_estimators.set_metric_with_outputs(
                    batch_idx, "test_loss", categories_loss.data[0] * weight,
                    categories_predicted_p, target_s)

                if not self.args.no_progress:
                    progress_bar(
                        batch_idx * self.mini_batch_size,
                        self.max_validation_examples,
                        performance_estimators.progress_message([
                            "test_loss", "test_accuracy", "reconstruction_loss"
                        ]))

                if ((batch_idx + 1) *
                        self.mini_batch_size) > self.max_validation_examples:
                    break
            # print()
        finally:
            data_provider.close()
        # Apply learning rate schedules:
        test_metric = performance_estimators.get_metric(
            self.get_test_metric_name())
        assert test_metric is not None, (
            self.get_test_metric_name() +
            "must be found among estimated performance metrics")
        if not self.args.constant_learning_rates:
            for scheduler in self.schedulers:
                scheduler.step(test_metric, epoch)
        # Run the garbage collector to try to release memory we no longer need:
        import gc
        gc.collect()
        return performance_estimators