Example #1
0
    def _risk_assessment_helper(self, experiment_class, exp_path, debug=False, other=None):

        dataset_getter = DatasetGetter(None)

        best_config = self.model_selector.model_selection(dataset_getter, experiment_class, exp_path,
                                                          self.model_configs, debug, other)

        # Retrain with the best configuration and test
        experiment = experiment_class(best_config['config'], exp_path)

        # Set up a log file for this experiment (I am in a forked process)
        logger = Logger(str(os.path.join(experiment.exp_path, 'experiment.log')), mode='a')

        dataset_getter.set_inner_k(None)

        training_scores, test_scores = [], []

        # Mitigate bad random initializations
        for i in range(3):
            training_score, test_score = experiment.run_test(dataset_getter, logger, other)
            print(f'Final training run {i + 1}: {training_score}, {test_score}')

            training_scores.append(training_score)
            test_scores.append(test_score)

        training_score = sum(training_scores)/3
        test_score = sum(test_scores)/3

        logger.log('TR score: ' + str(training_score) + ' TS score: ' + str(test_score))

        with open(os.path.join(self._HOLDOUT_FOLDER, self._ASSESSMENT_FILENAME), 'w') as fp:
            json.dump({'best_config': best_config, 'HOLDOUT_TR': training_score, 'HOLDOUT_TS': test_score}, fp)
Example #2
0
        learning_rate = config_file['learning_rate'][0]
        batch_size = config_file['batch_size'][0]
        num_epochs = config_file['num_epochs'][0]
        #drop_out = config_file['drop_out']
        #seed = config_file['seed']
        clipping = config_file['gradient_clipping'][0]
        scheduler_info = config_file['scheduler'][0]

        dataset_class = dataset_classes[dataset_name]  # dataset_class()
        dataset = dataset_class()

        accs = []
        best_val_epoch = []
        for fold in range(10):  #10 fold cross validation
            begin_time = time.time()
            dataset_getter = DatasetGetter(fold)
            train_loader, val_loader = dataset_getter.get_train_val(
                dataset, batch_size, shuffle=True)
            test_loader = dataset_getter.get_test(dataset,
                                                  batch_size,
                                                  shuffle=False)

            for i in range(5):

                #initialize the model
                model = GCN(dim_features=dataset._dim_features,
                            dim_target=dataset._dim_target,
                            config={
                                'embedding_dim':
                                config_file['embedding_dim'][0],
                                'num_layers': config_file['num_layers'][0],