Exemple #1
0
    def test_somatic_classifer(self, epoch, performance_estimators=None):
        print('\nTesting, epoch: %d' % epoch)
        if performance_estimators is None:
            performance_estimators = PerformanceList()
            performance_estimators += [LossHelper("test_loss")]
            performance_estimators += [LossHelper("classification_loss")]
            performance_estimators += [LossHelper("frequency_loss")]

        self.net.eval()
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()
        cross_entropy_loss = CrossEntropyLoss()
        mse_loss = MSELoss()
        for batch_idx, (_, data_dict) in enumerate(
                self.problem.validation_loader_range(
                    0, self.args.num_validation)):
            inputs = data_dict["input"]
            is_mutated_base_target = data_dict["isBaseMutated"]
            # transform one-hot encoding into a class index:
            max, indices = is_mutated_base_target.max(dim=1)
            is_mutated_base_target = indices
            somatic_frequency_target = data_dict["somaticFrequency"]
            if self.use_cuda:
                inputs, is_mutated_base_target, somatic_frequency_target = inputs.cuda(), \
                                                                           is_mutated_base_target.cuda(), \
                                                                           somatic_frequency_target.cuda()

            inputs, mut_targets, freq_targets = Variable(inputs), Variable(is_mutated_base_target, volatile=True), \
                                                Variable(somatic_frequency_target, volatile=True)

            is_base_mutated, output_frequency = self.net(inputs)
            classification_loss = cross_entropy_loss(is_base_mutated,
                                                     mut_targets)
            frequency_loss = mse_loss(output_frequency, freq_targets)
            test_loss = classification_loss + frequency_loss

            performance_estimators.set_metric(batch_idx, "test_loss",
                                              test_loss.data[0])
            performance_estimators.set_metric(batch_idx, "classification_loss",
                                              classification_loss.data[0])
            performance_estimators.set_metric(batch_idx, "frequency_loss",
                                              frequency_loss.data[0])

            progress_bar(
                batch_idx * self.mini_batch_size, self.max_validation_examples,
                performance_estimators.progress_message(["test_loss"]))

            if ((batch_idx + 1) *
                    self.mini_batch_size) > self.max_validation_examples:
                break
        # print()

        # Apply learning rate schedule:
        test_accuracy = performance_estimators.get_metric("test_loss")
        assert test_accuracy is not None, "test_loss must be found among estimated performance metrics"
        if not self.args.constant_learning_rates:
            self.scheduler_train.step(test_accuracy, epoch)
        return performance_estimators
Exemple #2
0
    def test_semi_sup(self, epoch):
        print('\nTesting, epoch: %d' % epoch)

        performance_estimators = PerformanceList()
        performance_estimators += [LossHelper("test_supervised_loss")]
        performance_estimators += [LossHelper("test_reconstruction_loss")]
        performance_estimators += [AccuracyHelper("test_")]

        self.net.eval()
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()
        validation_loader_subset = self.problem.validation_loader_range(0, self.args.num_validation)
        data_provider = MultiThreadedCpuGpuDataProvider(iterator=zip(validation_loader_subset), is_cuda=self.use_cuda,
                                                        batch_names=["validation"],
                                                        requires_grad={"validation": []},
                                                        volatile={"validation": ["input", "softmaxGenotype"]})
        try:
            for batch_idx, (_, data_dict) in enumerate(data_provider):
                input_s = data_dict["validation"]["input"]
                target_s = data_dict["validation"]["softmaxGenotype"]
                # we need copies of the same tensors:
                input_u, target_u = Variable(input_s.data, volatile=True), Variable(input_s.data, volatile=True)

                output_s = self.net(input_s)
                output_u = self.net.autoencoder(input_u)
                output_s_p = self.get_p(output_s)

                _, target_index = torch.max(target_s, dim=1)

                supervised_loss = self.criterion_classifier(output_s, target_s)
                reconstruction_loss = self.criterion_autoencoder(output_u, target_u)

                performance_estimators.set_metric(batch_idx, "test_supervised_loss", supervised_loss.data[0])
                performance_estimators.set_metric(batch_idx, "test_reconstruction_loss", reconstruction_loss.data[0])
                performance_estimators.set_metric_with_outputs(batch_idx, "test_accuracy", supervised_loss.data[0],
                                                               output_s_p, targets=target_index)

                progress_bar(batch_idx * self.mini_batch_size, self.max_validation_examples,
                             performance_estimators.progress_message(["test_supervised_loss", "test_reconstruction_loss",
                                                                      "test_accuracy"]))

                if ((batch_idx + 1) * self.mini_batch_size) > self.max_validation_examples:
                    break
            # print()
        finally:
            data_provider.close()
        test_metric = performance_estimators.get_metric(self.get_test_metric_name())
        assert test_metric is not None, self.get_test_metric_name() + "must be found among estimated performance metrics"
        if not self.args.constant_learning_rates:
            self.scheduler_train.step(test_metric, epoch)
        return performance_estimators
Exemple #3
0
    def train_autoencoder(self, epoch, performance_estimators=None):

        if performance_estimators is None:
            performance_estimators = PerformanceList()
            performance_estimators += [LossHelper("train_loss")]
            performance_estimators += [FloatHelper("train_grad_norm")]
            print('\nTraining, epoch: %d' % epoch)

        self.net.train()
        supervised_grad_norm = 1.
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()

        unsupervised_loss_acc = 0
        num_batches = 0
        train_loader_subset = self.problem.train_loader_subset_range(
            0, self.args.num_training)

        for batch_idx, (_, data_dict) in enumerate(train_loader_subset):
            inputs = data_dict["input"].to(self.device)
            num_batches += 1

            inputs, targets = Variable(inputs), Variable(inputs,
                                                         requires_grad=False)
            # outputs used to calculate the loss of the supervised model
            # must be done with the model prior to regularization:
            self.net.train()
            self.optimizer_training.zero_grad()
            outputs = self.net(inputs)

            supervised_loss = self.criterion(outputs, targets)
            optimized_loss = supervised_loss
            optimized_loss.backward()
            self.optimizer_training.step()
            performance_estimators.set_metric_with_outputs(
                batch_idx, "train_loss", supervised_loss.item(), outputs,
                targets)

            supervised_grad_norm = grad_norm(self.net.parameters())
            performance_estimators.set_metric(batch_idx, "train_grad_norm",
                                              supervised_grad_norm)

            performance_estimators.set_metric_with_outputs(
                batch_idx, "optimized_loss", optimized_loss.item(), outputs,
                targets)

            progress_bar(
                batch_idx * self.mini_batch_size, self.max_training_examples,
                " ".join([
                    performance_estimator.progress_message()
                    for performance_estimator in performance_estimators
                ]))

            if (batch_idx +
                    1) * self.mini_batch_size > self.max_training_examples:
                break

        return performance_estimators
 def create_test_performance_estimators(self):
     performance_estimators = PerformanceList()
     performance_estimators += [LossHelper("test_supervised_loss")]
     performance_estimators += [AccuracyHelper("test_")]
     performance_estimators += [LossHelper("test_encoded_supervised_loss")]
     performance_estimators += [AccuracyHelper("test_encoded_")]
     self.test_performance_estimators = performance_estimators
     return performance_estimators
 def create_test_performance_estimators(self):
     performance_estimators = PerformanceList()
     performance_estimators += [FloatHelper("reconstruction_loss")]
     performance_estimators += [LossHelper("test_loss")]
     performance_estimators += [AccuracyHelper("test_")]
     performance_estimators += [FloatHelper("weight")]
     self.test_performance_estimators = performance_estimators
     return performance_estimators
 def create_training_performance_estimators(self):
     performance_estimators = PerformanceList()
     performance_estimators += [FloatHelper("train_critic_loss")]
     performance_estimators += [FloatHelper("train_encoder_loss")]
     performance_estimators += [FloatHelper("train_accuracy")]
     performance_estimators += [FloatHelper("train_encoded_accuracy")]
     performance_estimators += [FloatHelper("ratio")]
     self.training_performance_estimators = performance_estimators
     return performance_estimators
 def create_training_performance_estimators(self):
     performance_estimators = PerformanceList()
     performance_estimators += [FloatHelper("reconstruction_loss")]
     performance_estimators += [FloatHelper("discriminator_loss")]
     performance_estimators += [FloatHelper("generator_loss")]
     performance_estimators += [FloatHelper("semisup_loss")]
     performance_estimators += [FloatHelper("weight")]
     self.training_performance_estimators = performance_estimators
     return performance_estimators
    def training_loops(self,
                       training_loop_method=None,
                       testing_loop_method=None):
        """Train the model in a completely supervised manner. Returns the performance obtained
           at the end of the configured training run.
        :return list of performance estimators that observed performance on the last epoch run.
        """
        assert training_loop_method is not None, "training_loop_method is required"
        assert testing_loop_method is not None, "testing_loop_method is required"
        header_written = False

        lr_train_helper = LearningRateHelper(scheduler=self.scheduler_train,
                                             learning_rate_name="train_lr")
        previous_test_perfs = None
        perfs = PerformanceList()
        self.optimizer_training = self.get_default_optimizer_training(
            self.net, self.args.optimizer, self.args)

        for epoch in range(self.start_epoch,
                           self.start_epoch + self.args.num_epochs):
            perfs = PerformanceList()
            perfs += training_loop_method(epoch)

            perfs += [lr_train_helper]
            if previous_test_perfs is None or self.epoch_is_test_epoch(epoch):
                perfs += testing_loop_method(epoch)

            if not header_written:
                header_written = True
                self.log_performance_header(perfs)

            early_stop, perfs = self.log_performance_metrics(epoch, perfs)
            if early_stop:
                # early stopping requested.
                return perfs

        return perfs
Exemple #9
0
    def train_supervised(self, epoch):
        performance_estimators = PerformanceList()
        performance_estimators += [FloatHelper("supervised_loss")]
        performance_estimators += [AccuracyHelper("train_")]
        if self.use_cuda:
            self.tensor_cache.cuda()
        print('\nTraining, epoch: %d' % epoch)

        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()

        unsupervised_loss_acc = 0
        num_batches = 0
        train_loader_subset = self.problem.train_loader_subset_range(
            0, self.args.num_training)
        data_provider = MultiThreadedCpuGpuDataProvider(
            iterator=zip(train_loader_subset),
            is_cuda=self.use_cuda,
            batch_names=["training"],
            requires_grad={"training": ["sbi"]},
            volatile={"training": ["metaData"]},
            recode_functions={
                "softmaxGenotype":
                lambda x: recode_for_label_smoothing(x, self.epsilon),
            })
        cudnn.benchmark = False
        try:

            for batch_idx, (_, data_dict) in enumerate(data_provider):
                sbi = data_dict["training"]["sbi"]
                target_s = data_dict["training"]["softmaxGenotype"]
                metadata = data_dict["training"]["metaData"]

                self.train_one_batch(performance_estimators, batch_idx, sbi,
                                     target_s, metadata)
                if (batch_idx +
                        1) * self.mini_batch_size > self.max_training_examples:
                    break
        finally:
            data_provider.close()

        return performance_estimators
Exemple #10
0
    def test_autoencoder(self, epoch, performance_estimators=None):
        print('\nTesting, epoch: %d' % epoch)
        if performance_estimators is None:
            performance_estimators = PerformanceList()
            performance_estimators += [LossHelper("test_loss")]

        self.net.eval()
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()

        for batch_idx, (_, data_dict) in enumerate(
                self.problem.validation_loader_range(
                    0, self.args.num_validation)):
            inputs = data_dict["input"]
            if self.use_cuda:
                inputs = inputs.cuda()

            inputs, targets = Variable(inputs,
                                       volatile=True), Variable(inputs,
                                                                volatile=True)

            outputs = self.net(inputs)
            loss = self.criterion(outputs, targets)

            performance_estimators.set_metric_with_outputs(
                batch_idx, "test_loss", loss.data[0], outputs, targets)

            progress_bar(
                batch_idx * self.mini_batch_size, self.max_validation_examples,
                performance_estimators.progress_message(["test_loss"]))

            if ((batch_idx + 1) *
                    self.mini_batch_size) > self.max_validation_examples:
                break
        # print()

        # Apply learning rate schedule:
        test_accuracy = performance_estimators.get_metric("test_loss")
        assert test_accuracy is not None, "test_loss must be found among estimated performance metrics"
        if not self.args.constant_learning_rates:
            self.scheduler_train.step(test_accuracy, epoch)
        return performance_estimators
 def create_training_performance_estimators(self):
     performance_estimators = PerformanceList()
     performance_estimators += [FloatHelper("supervised_loss")]
     performance_estimators += [AccuracyHelper("train_")]
     self.training_performance_estimators = performance_estimators
     return performance_estimators
    def supervised_somatic(self, epoch, performance_estimators=None):

        if performance_estimators is None:
            performance_estimators = PerformanceList()
            performance_estimators += [LossHelper("train_loss")]
            performance_estimators += [LossHelper("classification_loss")]
            performance_estimators += [LossHelper("frequency_loss")]
            performance_estimators += [FloatHelper("train_grad_norm")]
            print('\nTraining, epoch: %d' % epoch)

        self.net.train()
        supervised_grad_norm = 1.
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()

        unsupervised_loss_acc = 0
        num_batches = 0
        train_loader_subset = self.problem.train_loader_subset_range(
            0, self.args.num_training)
        cross_entropy_loss = CrossEntropyLoss()
        mse_loss = MSELoss()
        self.net.train()

        for batch_idx, (_, data_dict) in enumerate(train_loader_subset):
            inputs = data_dict["input"].to(self.device)
            is_mutated_base_target = data_dict["isBaseMutated"].to(self.device)
            # transform one-hot encoding into a class index:
            max, indices = is_mutated_base_target.max(dim=1)
            is_mutated_base_target = indices
            somatic_frequency_target = data_dict["somaticFrequency"].to(
                self.device)
            num_batches += 1

            # outputs used to calculate the loss of the supervised model
            # must be done with the model prior to regularization:

            self.optimizer_training.zero_grad()
            output_mut, output_frequency = self.net(inputs)

            classification_loss = cross_entropy_loss(output_mut,
                                                     is_mutated_base_target)
            frequency_loss = mse_loss(output_frequency,
                                      somatic_frequency_target)
            optimized_loss = classification_loss + frequency_loss

            optimized_loss.backward()
            self.optimizer_training.step()
            performance_estimators.set_metric(batch_idx, "train_loss",
                                              optimized_loss.item())
            performance_estimators.set_metric(batch_idx, "classification_loss",
                                              classification_loss.item())
            performance_estimators.set_metric(batch_idx, "frequency_loss",
                                              frequency_loss.item())

            supervised_grad_norm = grad_norm(self.net.parameters())
            performance_estimators.set_metric(batch_idx, "train_grad_norm",
                                              supervised_grad_norm)

            progress_bar(
                batch_idx * self.mini_batch_size, self.max_training_examples,
                performance_estimators.progress_message(
                    ["classification_loss", "frequency_loss"]))

            if (batch_idx +
                    1) * self.mini_batch_size > self.max_training_examples:
                break

        return performance_estimators
Exemple #13
0
    def train_semisup(self, epoch):
        performance_estimators = PerformanceList()
        performance_estimators += [FloatHelper("optimized_loss")]
        performance_estimators += [FloatHelper("supervised_loss")]
        performance_estimators += [FloatHelper("reconstruction_loss")]
        performance_estimators += [AccuracyHelper("train_")]

        print('\nTraining, epoch: %d' % epoch)

        self.net.train()

        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()

        unsupervised_loss_acc = 0
        num_batches = 0
        train_loader_subset = self.problem.train_loader_subset_range(0, self.args.num_training)
        unlabeled_loader = self.problem.unlabeled_loader()
        data_provider = MultiThreadedCpuGpuDataProvider(iterator=zip(train_loader_subset, unlabeled_loader),is_cuda=self.use_cuda,
                                     batch_names=["training", "unlabeled"],
                                     requires_grad={"training": ["input"], "unlabeled": ["input"]},
                                     volatile={"training": ["metaData"], "unlabeled": []},
                                     recode_functions={"softmaxGenotype": lambda x: recode_for_label_smoothing(x,self.epsilon)})
        self.net.autoencoder.train()
        try:
            for batch_idx, (_, data_dict) in enumerate(data_provider):
                input_s = data_dict["training"]["input"]
                metadata = data_dict["training"]["metaData"]
                target_s = data_dict["training"]["softmaxGenotype"]
                input_u = data_dict["unlabeled"]["input"]
                num_batches += 1

                # need a copy of input_u and input_s as output:
                target_u = Variable(input_u.data, requires_grad=False)
                target_output_s = Variable(input_s.data, requires_grad=False)
                # outputs used to calculate the loss of the supervised model
                # must be done with the model prior to regularization:

                # Zero gradients:
                self.net.zero_grad()
                self.net.autoencoder.zero_grad()
                self.optimizer_training.zero_grad()

                output_s = self.net(input_s)
                output_u = self.net.autoencoder(input_u)
                input_output_s = self.net.autoencoder(input_s)
                output_s_p = self.get_p(output_s)

                _, target_index = torch.max(target_s, dim=1)
                supervised_loss = self.criterion_classifier(output_s, target_s)
                reconstruction_loss_unsup = self.criterion_autoencoder(output_u, target_u)
                reconstruction_loss_sup = self.criterion_autoencoder(input_output_s, target_output_s)
                reconstruction_loss = self.args.gamma * reconstruction_loss_unsup+reconstruction_loss_sup
                optimized_loss = supervised_loss + reconstruction_loss
                optimized_loss.backward()
                self.optimizer_training.step()
                performance_estimators.set_metric(batch_idx, "supervised_loss", supervised_loss.data[0])
                performance_estimators.set_metric(batch_idx, "reconstruction_loss", reconstruction_loss.data[0])
                performance_estimators.set_metric(batch_idx, "optimized_loss", optimized_loss.data[0])
                performance_estimators.set_metric_with_outputs(batch_idx, "train_accuracy", supervised_loss.data[0],
                                                               output_s_p, targets=target_index)

                progress_bar(batch_idx * self.mini_batch_size,
                             self.max_training_examples,
                             performance_estimators.progress_message(["supervised_loss", "reconstruction_loss",
                                                                      "train_accuracy"]))

                if (batch_idx + 1) * self.mini_batch_size > self.max_training_examples:
                    break
        finally:
            data_provider.close()

        return performance_estimators
Exemple #14
0
    def train_semisup_aae(self, epoch, performance_estimators=None):
        if performance_estimators is None:
            performance_estimators = PerformanceList()
            performance_estimators += [FloatHelper("reconstruction_loss")]
            performance_estimators += [FloatHelper("discriminator_loss")]
            performance_estimators += [FloatHelper("generator_loss")]
            performance_estimators += [FloatHelper("supervised_loss")]
            performance_estimators += [FloatHelper("weight")]
            print('\nTraining, epoch: %d' % epoch)
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()

        self.net.train()
        supervised_grad_norm = 1.
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()

        unsupervised_loss_acc = 0
        num_batches = 0
        train_loader_subset1 = self.problem.train_loader_subset_range(
            0, self.args.num_training)
        train_loader_subset2 = self.problem.train_loader_subset_range(
            0, self.args.num_training)

        data_provider = MultiThreadedCpuGpuDataProvider(
            iterator=zip(train_loader_subset1, train_loader_subset2),
            is_cuda=self.use_cuda,
            batch_names=["training1", "training2"],
            requires_grad={
                "training1": ["input"],
                "training2": ["input"]
            },
            volatile={
                "training1": ["metaData"],
                "training2": ["metaData"]
            },
            recode_functions={
                "softmaxGenotype": recode_for_label_smoothing,
                "input": self.normalize_inputs
            })

        indel_weight = self.args.indel_weight_factor
        snp_weight = 1.0

        latent_codes = []
        try:
            for batch_idx, (_, data_dict) in enumerate(data_provider):
                input_s1 = data_dict["training1"]["input"]
                input_s2 = data_dict["training2"]["input"]
                target_s1 = data_dict["training1"]["softmaxGenotype"]
                target_s2 = data_dict["training2"]["softmaxGenotype"]

                meta_data1 = data_dict["training1"]["metaData"]
                meta_data2 = data_dict["training2"]["metaData"]
                num_batches += 1
                self.zero_grad_all_optimizers()

                # input_s=normalize_mean_std(input_s)
                # input_u=normalize_mean_std(input_u)
                # print(torch.mean(input_s,dim=0))
                # Train reconstruction phase:
                self.net.decoder.train()
                reconstruction_loss = self.net.get_crossconstruction_loss(
                    input_s1, input_s2, target_s2)
                reconstruction_loss.backward()
                for opt in [self.decoder_opt, self.encoder_reconstruction_opt]:
                    opt.step()

                # Train discriminators:
                self.net.encoder.train()
                self.net.discriminator_cat.train()
                self.net.discriminator_prior.train()
                self.zero_grad_all_optimizers()
                genotype_frequencies = self.class_frequencies[
                    "softmaxGenotype"]
                category_prior = (genotype_frequencies /
                                  torch.sum(genotype_frequencies)).numpy()
                discriminator_loss = self.net.get_discriminator_loss(
                    input_s1, category_prior=category_prior)
                discriminator_loss.backward()
                for opt in [
                        self.discriminator_cat_opt,
                        self.discriminator_prior_opt
                ]:
                    opt.step()
                self.zero_grad_all_optimizers()

                # Train generator:
                self.net.encoder.train()
                generator_loss = self.net.get_generator_loss(input_s1)
                generator_loss.backward()
                for opt in [self.encoder_generator_opt]:
                    opt.step()
                self.zero_grad_all_optimizers()

                if self.use_pdf:
                    self.net.encoder.train()
                    _, latent_code = self.net.encoder(input_s1)
                    weight = self.estimate_example_density_weight(latent_code)
                else:
                    weight = self.estimate_batch_weight(
                        meta_data1,
                        indel_weight=indel_weight,
                        snp_weight=snp_weight)
                self.net.encoder.train()
                supervised_loss = self.net.get_crossencoder_supervised_loss(
                    input_s1, target_s1) * weight
                supervised_loss.backward()

                for opt in [self.encoder_semisup_opt]:
                    opt.step()
                self.zero_grad_all_optimizers()

                performance_estimators.set_metric(batch_idx,
                                                  "reconstruction_loss",
                                                  reconstruction_loss.data[0])
                performance_estimators.set_metric(batch_idx,
                                                  "discriminator_loss",
                                                  discriminator_loss.data[0])
                performance_estimators.set_metric(batch_idx, "generator_loss",
                                                  generator_loss.data[0])
                performance_estimators.set_metric(batch_idx, "supervised_loss",
                                                  supervised_loss.data[0])
                performance_estimators.set_metric(batch_idx, "weight", weight)
                if not self.args.no_progress:
                    progress_bar(
                        batch_idx * self.mini_batch_size,
                        self.max_training_examples,
                        performance_estimators.progress_message([
                            "reconstruction_loss", "discriminator_loss",
                            "generator_loss", "semisup_loss"
                        ]))
                if ((batch_idx + 1) *
                        self.mini_batch_size) > self.max_training_examples:
                    break
        finally:
            data_provider.close()

        return performance_estimators
Exemple #15
0
    def test_semisup_aae(self, epoch, performance_estimators=None):
        print('\nTesting, epoch: %d' % epoch)
        if performance_estimators is None:
            performance_estimators = PerformanceList()
            performance_estimators += [FloatHelper("reconstruction_loss")]
            performance_estimators += [LossHelper("test_loss")]
            performance_estimators += [AccuracyHelper("test_")]
            performance_estimators += [FloatHelper("weight")]

        self.net.eval()
        for performance_estimator in performance_estimators:
            performance_estimator.init_performance_metrics()
        validation_loader_subset = self.problem.validation_loader_range(
            0, self.args.num_validation)
        data_provider = MultiThreadedCpuGpuDataProvider(
            iterator=zip(validation_loader_subset),
            is_cuda=self.use_cuda,
            batch_names=["validation"],
            requires_grad={"validation": []},
            volatile={
                "validation": ["input", "softmaxGenotype"],
            },
            recode_functions={"input": self.normalize_inputs})
        self.net.eval()
        try:
            for batch_idx, (_, data_dict) in enumerate(data_provider):
                input_s = data_dict["validation"]["input"]
                target_s = data_dict["validation"]["softmaxGenotype"]

                # Estimate the reconstruction loss on validation examples:
                reconstruction_loss = self.net.get_crossconstruction_loss(
                    input_s, input_s, target_s)

                # now evaluate prediction of categories:
                categories_predicted, latent_code = self.net.encoder(input_s)
                #            categories_predicted+=self.net.latent_to_categories(latent_code)

                categories_predicted_p = self.get_p(categories_predicted)
                categories_predicted_p[
                    categories_predicted_p != categories_predicted_p] = 0.0
                _, target_index = torch.max(target_s, dim=1)
                categories_loss = self.net.semisup_loss_criterion(
                    categories_predicted, target_s)

                weight = self.estimate_example_density_weight(latent_code)
                performance_estimators.set_metric(batch_idx,
                                                  "reconstruction_loss",
                                                  reconstruction_loss.data[0])
                performance_estimators.set_metric(batch_idx, "weight", weight)
                performance_estimators.set_metric_with_outputs(
                    batch_idx, "test_accuracy", reconstruction_loss.data[0],
                    categories_predicted_p, target_index)
                performance_estimators.set_metric_with_outputs(
                    batch_idx, "test_loss", categories_loss.data[0] * weight,
                    categories_predicted_p, target_s)

                if not self.args.no_progress:
                    progress_bar(
                        batch_idx * self.mini_batch_size,
                        self.max_validation_examples,
                        performance_estimators.progress_message([
                            "test_loss", "test_accuracy", "reconstruction_loss"
                        ]))

                if ((batch_idx + 1) *
                        self.mini_batch_size) > self.max_validation_examples:
                    break
            # print()
        finally:
            data_provider.close()
        # Apply learning rate schedules:
        test_metric = performance_estimators.get_metric(
            self.get_test_metric_name())
        assert test_metric is not None, (
            self.get_test_metric_name() +
            "must be found among estimated performance metrics")
        if not self.args.constant_learning_rates:
            for scheduler in self.schedulers:
                scheduler.step(test_metric, epoch)
        # Run the garbage collector to try to release memory we no longer need:
        import gc
        gc.collect()
        return performance_estimators
Exemple #16
0
    def do_testing(epoch, thread_executor):
        print('Testing, epoch: %d' % epoch)
        for model_trainer in trainers:
            model_trainer.test_performance_estimators.init_performance_metrics()
            model_trainer.reset_before_test_epoch()

        validation_loader_subset = problem.validation_loader_range(0, args.num_validation)
        data_provider = DataProvider(
            iterator=zip(validation_loader_subset),
            device=device,
            batch_names=["validation"],
            requires_grad={"validation": []},
            vectors_to_keep=["input", "softmaxGenotype", "metaData"]
        )

        try:
            for batch_idx, (_, data_dict) in enumerate(data_provider):
                input_s = data_dict["validation"]["input"]
                target_s = data_dict["validation"]["softmaxGenotype"]
                metadata = data_dict["validation"]["metaData"]
                futures = []
                for model_trainer in trainers:
                    def to_do(model_trainer, input_s, target_s, metadata, errors):
                        input_s_local = input_s.clone()
                        target_s_local = target_s.clone()
                        metadata_local = metadata.clone()
                        target_smoothed = recode_for_label_smoothing(target_s_local,
                                                                     model_trainer.args.epsilon_label_smoothing)

                        model_trainer.net.eval()
                        model_trainer.test_one_batch(model_trainer.test_performance_estimators,
                                                     batch_idx, input_s_local, target_smoothed, metadata=metadata_local,
                                                     errors=errors)

                    futures += [thread_executor.submit(to_do, model_trainer, input_s, target_s, metadata, None)]
                concurrent.futures.wait(futures)
                # Report any exceptions encountered in to_do:
                raise_to_do_exceptions(futures)
        finally:
            data_provider.close()
            del validation_loader_subset
        # print("test errors by class: ", str(errors))

        for model_trainer in trainers:
            perfs = PerformanceList()
            perfs += model_trainer.training_performance_estimators
            perfs += model_trainer.test_performance_estimators
            if epoch == 0:
                model_trainer.log_performance_header(perfs)

            early_stop, perfs = model_trainer.log_performance_metrics(epoch, perfs)

            if early_stop:
                # early stopping requested, no longer train this model:
                with global_lock:
                    trainers.remove(model_trainer)
                return
            # Apply learning rate schedule:
            test_metric = model_trainer.test_performance_estimators.get_metric(model_trainer.get_test_metric_name())
            assert test_metric is not None, (model_trainer.get_test_metric_name() +
                                             "must be found among estimated performance metrics")
            if not model_trainer.args.constant_learning_rates:
                model_trainer.scheduler_train.step(test_metric, epoch)
            model_trainer.compute_after_test_epoch()