コード例 #1
0
def create_experiment(yaml_filename, seed=9859295):
    """Utility function to create experiment from yaml file"""
    # for reproducibility for layer weights
    # should be same seed as in experiment_runner.py
    lasagne.random.set_rng(RandomState(seed))
    train_dict = yaml_parse.load(open(yaml_filename, 'r'))
    layers = load_layers_from_dict(train_dict)
    final_layer = layers[-1]
    dataset = train_dict['dataset']
    splitter = train_dict['dataset_splitter']
    if (np.any([hasattr(l, 'n_stride') for l in layers])):
        n_sample_preds = get_n_sample_preds(final_layer)
        # for backwards compatibility input time length also
        input_time_length = get_input_time_length(final_layer)
        log.info("Setting n_sample preds automatically to {:d}".format(
            n_sample_preds))
        for monitor in train_dict['exp_args']['monitors']:
            if hasattr(monitor, 'n_sample_preds'):
                monitor.n_sample_preds = n_sample_preds
            if hasattr(monitor, 'input_time_length'):
                monitor.input_time_length = input_time_length

        train_dict['exp_args']['iterator'].n_sample_preds = n_sample_preds
        log.info("Input window length is {:d}".format(
            get_model_input_window(final_layer)))
    # add early stop chan, encessary for backwards compatibility
    exp_args = train_dict['exp_args']
    exp_args['remember_best_chan'] = train_dict['exp_args'].pop(
        'remember_best_chan', 'valid_misclass')
    exp_args['run_after_early_stop'] = train_dict['exp_args'].pop(
        'run_after_early_stop', True)
    exp = Experiment(final_layer, dataset, splitter, **exp_args)
    assert len(np.setdiff1d(
        layers, lasagne.layers.get_all_layers(final_layer))) == 0, (
            "All layers "
            "should be used, unused {:s}".format(
                str(
                    np.setdiff1d(layers,
                                 lasagne.layers.get_all_layers(final_layer)))))
    return exp
コード例 #2
0
ファイル: experiment.py プロジェクト: robintibor/braindecode
def create_experiment(yaml_filename, seed=9859295):
    """Utility function to create experiment from yaml file"""
    # for reproducibility for layer weights
    # should be same seed as in experiment_runner.py
    lasagne.random.set_rng(RandomState(seed))
    train_dict = yaml_parse.load(open(yaml_filename, 'r'))
    layers = load_layers_from_dict(train_dict)
    final_layer = layers[-1]
    dataset = train_dict['dataset'] 
    splitter = train_dict['dataset_splitter']
    if (np.any([hasattr(l, 'n_stride') for l in layers])):
        n_sample_preds =  get_n_sample_preds(final_layer)
        # for backwards compatibility input time length also
        input_time_length = get_input_time_length(final_layer)
        log.info("Setting n_sample preds automatically to {:d}".format(
            n_sample_preds))
        for monitor in train_dict['exp_args']['monitors']:
            if hasattr(monitor, 'n_sample_preds'):
                monitor.n_sample_preds = n_sample_preds
            if hasattr(monitor, 'input_time_length'):
                monitor.input_time_length = input_time_length
                
        train_dict['exp_args']['iterator'].n_sample_preds = n_sample_preds
        log.info("Input window length is {:d}".format(
            get_model_input_window(final_layer)))
    # add early stop chan, encessary for backwards compatibility
    exp_args = train_dict['exp_args']
    exp_args['remember_best_chan'] = train_dict['exp_args'].pop('remember_best_chan',
        'valid_misclass')
    exp_args['run_after_early_stop'] = train_dict['exp_args'].pop('run_after_early_stop',
        True)
    exp = Experiment(final_layer, dataset, splitter,
                    **exp_args)
    assert len(np.setdiff1d(layers, 
        lasagne.layers.get_all_layers(final_layer))) == 0, ("All layers "
        "should be used, unused {:s}".format(str(np.setdiff1d(layers, 
        lasagne.layers.get_all_layers(final_layer)))))
    return exp
コード例 #3
0
    def _run_experiments_with_string(self, experiment_index, train_str):
        assert experiment_index >= self._get_start_id()
        assert experiment_index < self._get_stop_id()
        lasagne.random.set_rng(RandomState(9859295))
        # Save train string now, will be overwritten later after
        # input dimensions determined, save now for debug in
        # case of crash
        if not self._dry_run:
            self._save_train_string(train_str, experiment_index)
        starttime = time.time()

        train_dict = self._load_without_layers(train_str)
        log.info("With params...")
        if not self._quiet:
            pprint(train_dict['original_params'])
        if self._dry_run:
            # Do not do the loading or training...
            # Only go until here to show the train params
            return

        if self._batch_test:
            # TODO: put into function
            # load layers, load data with dimensions of the layer
            # create experiment with max epochs 2, run
            from braindecode.datasets.random import RandomSet
            train_str = train_str.replace('in_cols', '1')
            train_str = train_str.replace('in_sensors', '32')
            train_dict = yaml_parse.load(train_str)
            layers = load_layers_from_dict(train_dict)
            final_layer = layers[-1]
            n_chans = layers[0].shape[1]
            n_classes = final_layer.output_shape[1]
            n_samples = 500000
            # set n sample perds in case of cnt model
            if (np.any([hasattr(l, 'n_stride') for l in layers])):
                n_sample_preds = get_n_sample_preds(final_layer)
                log.info("Setting n_sample preds automatically to {:d}".format(
                    n_sample_preds))
                for monitor in train_dict['exp_args']['monitors']:
                    if hasattr(monitor, 'n_sample_preds'):
                        monitor.n_sample_preds = n_sample_preds
                train_dict['exp_args'][
                    'iterator'].n_sample_preds = n_sample_preds
                log.info("Input window length is {:d}".format(
                    get_model_input_window(final_layer)))
                # make at least batches
                n_samples = int(n_sample_preds * 1.5 * 200)
            dataset = RandomSet(topo_shape=[n_samples, n_chans, 1, 1],
                                y_shape=[n_samples, n_classes])
            dataset.load()
            splitter = FixedTrialSplitter(n_train_trials=int(n_samples * 0.8),
                                          valid_set_fraction=0.1)
            train_dict['exp_args']['preprocessor'] = None
            train_dict['exp_args']['stop_criterion'] = MaxEpochs(1)
            train_dict['exp_args']['iterator'].batch_size = 1
            # TODO: set stop criterion to max epochs =1
            #  change batch_size in iterator
            exp = Experiment(final_layer, dataset, splitter,
                             **train_dict['exp_args'])
            exp.setup()
            exp.run_until_early_stop()
            datasets = exp.dataset_provider.get_train_valid_test(exp.dataset)
            for batch_size in range(32, 200, 5):
                train_dict['exp_args']['stop_criterion'].num_epochs += 2
                log.info("Running with batch size {:d}".format(batch_size))
                train_dict['exp_args']['iterator'].batch_size = batch_size
                exp.run_until_stop(datasets, remember_best=False)
            return

        dataset = train_dict['dataset']
        dataset.load()
        iterator = train_dict['exp_args']['iterator']
        splitter = train_dict['dataset_splitter']
        if dataset.__class__.__name__ == 'EpilepsySet':
            log.info("Reducing to float16 for epilepsy set...")
            dataset.seizure_topo = np.float16(dataset.seizure_topo)
            dataset.non_seizure_topo = np.float16(dataset.non_seizure_topo)
        else:
            # todo: remove this?
            log.info(
                "Determining dataset dimensions to set possible model params..."
            )
            train_set = splitter.split_into_train_valid_test(dataset)['train']
            batch_gen = iterator.get_batches(train_set, shuffle=True)
            dummy_batch_topo = batch_gen.next()[0]
            del train_set
            # not for ultrasound: assert 'in_sensors' in train_str
            # not for cnt net assert 'in_rows' in train_str
            # not for resnet: assert 'in_cols' in train_str
            train_str = train_str.replace('in_sensors',
                                          str(dummy_batch_topo.shape[1]))
            train_str = train_str.replace('in_rows',
                                          str(dummy_batch_topo.shape[2]))
            train_str = train_str.replace('in_cols',
                                          str(dummy_batch_topo.shape[3]))

        self._save_train_string(train_str, experiment_index)

        # reset rng for actual loading of layers, so you can reproduce it
        # when you load the file later
        lasagne.random.set_rng(RandomState(9859295))
        train_dict = yaml_parse.load(train_str)

        layers = load_layers_from_dict(train_dict)
        final_layer = layers[-1]
        assert len(
            np.setdiff1d(
                layers, lasagne.layers.get_all_layers(final_layer))) == 0, (
                    "All layers "
                    "should be used, unused {:s}".format(
                        str(
                            np.setdiff1d(
                                layers,
                                lasagne.layers.get_all_layers(final_layer)))))
        # Set n sample preds in case of cnt model
        if (np.any([hasattr(l, 'n_stride') for l in layers])):
            # Can this be moved up and duplication in if clause( batch test,
            # more above) be removed?
            n_sample_preds = get_n_sample_preds(final_layer)
            log.info("Setting n_sample preds automatically to {:d}".format(
                n_sample_preds))
            for monitor in train_dict['exp_args']['monitors']:
                if hasattr(monitor, 'n_sample_preds'):
                    monitor.n_sample_preds = n_sample_preds
            train_dict['exp_args']['iterator'].n_sample_preds = n_sample_preds
            log.info("Input window length is {:d}".format(
                get_model_input_window(final_layer)))

        if not self._cross_validation:
            # for now lets not do that, current models seem fine again.
            #             if (dataset.__class__.__name__ == 'EpilepsySet') and self._pred_loss_hack:
            #                 from braindecode.epilepsy.experiment import EpilepsyExperiment
            #                 log.info("Creating epilepsy experiment with the pred loss hack")
            #                 exp = EpilepsyExperiment(final_layer, dataset, splitter,
            #                     **train_dict['exp_args'])
            #             else:
            exp = Experiment(final_layer, dataset, splitter,
                             **train_dict['exp_args'])
            exp.setup()
            exp.run()
            endtime = time.time()

            model = exp.final_layer

            # dummy predictions targets
            predictions = [0, 3, 1, 2, 3, 4]
            targets = [3, 4, 1, 2, 3, 4]

            result_or_results = Result(
                parameters=train_dict['original_params'],
                templates={},
                training_time=endtime - starttime,
                monitor_channels=exp.monitor_chans,
                predictions=predictions,
                targets=targets)

        else:  # cross validation
            assert False, (
                "cross validation not used in long time, not up to date"
                " for example targets predictions not added")
            # default 5 folds for now
            n_folds = train_dict['num_cv_folds']
            exp_cv = ExperimentCrossValidation(final_layer,
                                               dataset,
                                               exp_args=train_dict['exp_args'],
                                               n_folds=n_folds,
                                               shuffle=self._shuffle)
            exp_cv.run()
            endtime = time.time()
            result_or_results = []
            for i_fold in xrange(n_folds):
                res = Result(parameters=train_dict['original_params'],
                             templates={},
                             training_time=endtime - starttime,
                             monitor_channels=exp_cv.all_monitor_chans[i_fold],
                             predictions=[0, 3, 1, 2, 3, 4],
                             targets=[3, 4, 1, 2, 3, 4])
                result_or_results.append(res)
            model = exp_cv.all_layers

        if not os.path.exists(self._folder_paths[experiment_index]):
            os.makedirs(self._folder_paths[experiment_index])

        result_file_name = self._get_result_save_path(experiment_index)

        log.info("Saving result to {:s}...".format(result_file_name))
        with open(result_file_name, 'w') as resultfile:
            pickle.dump(result_or_results, resultfile)

        model_file_name = self._get_model_save_path(experiment_index)
        param_file_name = model_file_name.replace('.pkl', '.npy')
        np.save(param_file_name, lasagne.layers.get_all_param_values(model))

        # Possibly make kaggle submission file
        if isinstance(dataset,
                      KaggleGraspLiftSet) and splitter.use_test_as_valid:
            experiment_save_id = int(
                self._base_save_paths[experiment_index].split("/")[-1])
            create_submission_csv_for_one_subject(
                self._folder_paths[experiment_index], exp.dataset, iterator,
                train_dict['exp_args']['preprocessor'], final_layer,
                experiment_save_id)
        elif isinstance(
                dataset,
                AllSubjectsKaggleGraspLiftSet) and splitter.use_test_as_valid:
            experiment_save_id = int(
                self._base_save_paths[experiment_index].split("/")[-1])
            create_submission_csv_for_all_subject_model(
                self._folder_paths[experiment_index], exp.dataset,
                exp.dataset_provider, iterator, final_layer,
                experiment_save_id)
        elif isinstance(splitter, SeveralSetsSplitter):
            pass  # nothing to do in this case

        # very hacky create predictions targets :)
        # Not done earlier as there were weird theano crashes
        if exp.monitors[2].__class__.__name__ == 'CntTrialMisclassMonitor':
            del dataset
            del exp
            add_labels_to_cnt_exp_result(
                self._base_save_paths[experiment_index])
コード例 #4
0
    def _run_experiments_with_string(self, experiment_index, train_str):
        assert experiment_index >= self._get_start_id()
        assert experiment_index < self._get_stop_id()
        lasagne.random.set_rng(RandomState(9859295))
        # Save train string now, will be overwritten later after 
        # input dimensions determined, save now for debug in
        # case of crash
        if not self._dry_run:
            self._save_train_string(train_str, experiment_index)
        starttime = time.time()
        
        train_dict = self._load_without_layers(train_str)
        log.info("With params...")
        if not self._quiet:
            pprint(train_dict['original_params'])
        if self._dry_run:
            # Do not do the loading or training...
            # Only go until here to show the train params
            return
        
        if self._batch_test:
        # TODO: put into function
        # load layers, load data with dimensions of the layer
        # create experiment with max epochs 2, run
            from braindecode.datasets.random import RandomSet
            train_str = train_str.replace('in_cols', '1')
            train_str = train_str.replace('in_sensors', '32')
            train_dict =  yaml_parse.load(train_str)
            layers = load_layers_from_dict(train_dict)
            final_layer = layers[-1]
            n_chans = layers[0].shape[1]
            n_classes = final_layer.output_shape[1]
            n_samples = 500000
            # set n sample perds in case of cnt model
            if (np.any([hasattr(l, 'n_stride') for l in layers])):
                n_sample_preds =  get_n_sample_preds(final_layer)
                log.info("Setting n_sample preds automatically to {:d}".format(
                    n_sample_preds))
                for monitor in train_dict['exp_args']['monitors']:
                    if hasattr(monitor, 'n_sample_preds'):
                        monitor.n_sample_preds = n_sample_preds
                train_dict['exp_args']['iterator'].n_sample_preds = n_sample_preds
                log.info("Input window length is {:d}".format(
                    get_model_input_window(final_layer)))
                # make at least batches
                n_samples = int(n_sample_preds * 1.5 * 200)
            dataset = RandomSet(topo_shape=[n_samples, n_chans, 1, 1], 
                y_shape=[n_samples, n_classes])
            dataset.load()
            splitter = FixedTrialSplitter(n_train_trials=int(n_samples*0.8), 
                valid_set_fraction=0.1)
            train_dict['exp_args']['preprocessor'] = None
            train_dict['exp_args']['stop_criterion'] = MaxEpochs(1)
            train_dict['exp_args']['iterator'].batch_size = 1
            # TODO: set stop criterion to max epochs =1
            #  change batch_size in iterator
            exp = Experiment(final_layer, dataset, splitter,
                **train_dict['exp_args'])
            exp.setup()
            exp.run_until_early_stop()
            datasets = exp.dataset_provider.get_train_valid_test(exp.dataset)
            for batch_size in range(32,200,5):
                train_dict['exp_args']['stop_criterion'].num_epochs += 2
                log.info("Running with batch size {:d}".format(batch_size))
                train_dict['exp_args']['iterator'].batch_size = batch_size
                exp.run_until_stop(datasets, remember_best=False)
            return
            
            
        dataset = train_dict['dataset'] 
        dataset.load()
        iterator = train_dict['exp_args']['iterator']
        splitter = train_dict['dataset_splitter']
        if dataset.__class__.__name__ == 'EpilepsySet':
            log.info("Reducing to float16 for epilepsy set...")
            dataset.seizure_topo = np.float16(dataset.seizure_topo)
            dataset.non_seizure_topo = np.float16(dataset.non_seizure_topo)
        else:
            # todo: remove this?
            log.info("Determining dataset dimensions to set possible model params...")
            train_set = splitter.split_into_train_valid_test(dataset)['train']
            batch_gen = iterator.get_batches(train_set, shuffle=True)
            dummy_batch_topo = batch_gen.next()[0]
            del train_set
            # not for ultrasound: assert 'in_sensors' in train_str
            # not for cnt net assert 'in_rows' in train_str
            # not for resnet: assert 'in_cols' in train_str
            train_str = train_str.replace('in_sensors',
                str(dummy_batch_topo.shape[1]))
            train_str = train_str.replace('in_rows',
                str(dummy_batch_topo.shape[2]))
            train_str = train_str.replace('in_cols', 
                str(dummy_batch_topo.shape[3]))
        
        self._save_train_string(train_str, experiment_index)
        
        
        # reset rng for actual loading of layers, so you can reproduce it 
        # when you load the file later
        lasagne.random.set_rng(RandomState(9859295))
        train_dict =  yaml_parse.load(train_str)
            
        layers = load_layers_from_dict(train_dict)
        final_layer = layers[-1]
        assert len(np.setdiff1d(layers, 
            lasagne.layers.get_all_layers(final_layer))) == 0, ("All layers "
            "should be used, unused {:s}".format(str(np.setdiff1d(layers, 
            lasagne.layers.get_all_layers(final_layer)))))
        # Set n sample preds in case of cnt model
        if (np.any([hasattr(l, 'n_stride') for l in layers])):
            # Can this be moved up and duplication in if clause( batch test,
            # more above) be removed?
            n_sample_preds =  get_n_sample_preds(final_layer)
            log.info("Setting n_sample preds automatically to {:d}".format(
                n_sample_preds))
            for monitor in train_dict['exp_args']['monitors']:
                if hasattr(monitor, 'n_sample_preds'):
                    monitor.n_sample_preds = n_sample_preds
            train_dict['exp_args']['iterator'].n_sample_preds = n_sample_preds
            log.info("Input window length is {:d}".format(
                get_model_input_window(final_layer)))
        
        if not self._cross_validation:
            # for now lets not do that, current models seem fine again.
#             if (dataset.__class__.__name__ == 'EpilepsySet') and self._pred_loss_hack:
#                 from braindecode.epilepsy.experiment import EpilepsyExperiment
#                 log.info("Creating epilepsy experiment with the pred loss hack")
#                 exp = EpilepsyExperiment(final_layer, dataset, splitter,
#                     **train_dict['exp_args'])
#             else:
            exp = Experiment(final_layer, dataset, splitter,
                    **train_dict['exp_args'])
            exp.setup()
            exp.run()
            endtime = time.time()
            
            
            model = exp.final_layer
                
            # dummy predictions targets
            predictions = [0,3,1,2,3,4]
            targets = [3,4,1,2,3,4]
                
            result_or_results = Result(parameters=train_dict['original_params'],
                templates={}, 
                training_time=endtime - starttime, 
                monitor_channels=exp.monitor_chans, 
                predictions=predictions,
                targets=targets)
               
                
        else: # cross validation
            assert False, ("cross validation not used in long time, not up to date"
                " for example targets predictions not added")
            # default 5 folds for now
            n_folds = train_dict['num_cv_folds']
            exp_cv = ExperimentCrossValidation(final_layer, 
                dataset, exp_args=train_dict['exp_args'], n_folds=n_folds,
                shuffle=self._shuffle)
            exp_cv.run()
            endtime = time.time()
            result_or_results = []
            for i_fold in xrange(n_folds):
                res = Result(parameters=train_dict['original_params'],
                templates={}, 
                training_time=endtime - starttime, 
                monitor_channels=exp_cv.all_monitor_chans[i_fold], 
                predictions=[0,3,1,2,3,4],
                targets=[3,4,1,2,3,4])
                result_or_results.append(res)
            model = exp_cv.all_layers
            
        if not os.path.exists(self._folder_paths[experiment_index]):
            os.makedirs(self._folder_paths[experiment_index])
        
        result_file_name = self._get_result_save_path(experiment_index)
        
        log.info("Saving result...")
        with open(result_file_name, 'w') as resultfile:
            pickle.dump(result_or_results, resultfile)
        
        model_file_name = self._get_model_save_path(experiment_index)
        param_file_name = model_file_name.replace('.pkl', '.npy')
        np.save(param_file_name, lasagne.layers.get_all_param_values(model))
        
        # Possibly make kaggle submission file
        if isinstance(dataset, KaggleGraspLiftSet) and splitter.use_test_as_valid:
            experiment_save_id = int(
                self._base_save_paths[experiment_index].split("/")[-1])
            create_submission_csv_for_one_subject(self._folder_paths[experiment_index],
                exp.dataset, iterator,
                train_dict['exp_args']['preprocessor'], 
                final_layer, experiment_save_id)
        elif isinstance(dataset, AllSubjectsKaggleGraspLiftSet) and splitter.use_test_as_valid:
            experiment_save_id = int(
                self._base_save_paths[experiment_index].split("/")[-1])
            create_submission_csv_for_all_subject_model(
                self._folder_paths[experiment_index],
                exp.dataset, exp.dataset_provider, iterator,
                final_layer, experiment_save_id)
        elif isinstance(splitter, SeveralSetsSplitter):
            pass # nothing to do in this case

        # very hacky create predictions targets :)
        # Not done earlier as there were weird theano crashes
        if exp.monitors[2].__class__.__name__ == 'CntTrialMisclassMonitor':
            del dataset
            del exp
            add_labels_to_cnt_exp_result(self._base_save_paths[experiment_index])