def create_experiment(yaml_filename, seed=9859295): """Utility function to create experiment from yaml file""" # for reproducibility for layer weights # should be same seed as in experiment_runner.py lasagne.random.set_rng(RandomState(seed)) train_dict = yaml_parse.load(open(yaml_filename, 'r')) layers = load_layers_from_dict(train_dict) final_layer = layers[-1] dataset = train_dict['dataset'] splitter = train_dict['dataset_splitter'] if (np.any([hasattr(l, 'n_stride') for l in layers])): n_sample_preds = get_n_sample_preds(final_layer) # for backwards compatibility input time length also input_time_length = get_input_time_length(final_layer) log.info("Setting n_sample preds automatically to {:d}".format( n_sample_preds)) for monitor in train_dict['exp_args']['monitors']: if hasattr(monitor, 'n_sample_preds'): monitor.n_sample_preds = n_sample_preds if hasattr(monitor, 'input_time_length'): monitor.input_time_length = input_time_length train_dict['exp_args']['iterator'].n_sample_preds = n_sample_preds log.info("Input window length is {:d}".format( get_model_input_window(final_layer))) # add early stop chan, encessary for backwards compatibility exp_args = train_dict['exp_args'] exp_args['remember_best_chan'] = train_dict['exp_args'].pop( 'remember_best_chan', 'valid_misclass') exp_args['run_after_early_stop'] = train_dict['exp_args'].pop( 'run_after_early_stop', True) exp = Experiment(final_layer, dataset, splitter, **exp_args) assert len(np.setdiff1d( layers, lasagne.layers.get_all_layers(final_layer))) == 0, ( "All layers " "should be used, unused {:s}".format( str( np.setdiff1d(layers, lasagne.layers.get_all_layers(final_layer))))) return exp
def create_experiment(yaml_filename, seed=9859295): """Utility function to create experiment from yaml file""" # for reproducibility for layer weights # should be same seed as in experiment_runner.py lasagne.random.set_rng(RandomState(seed)) train_dict = yaml_parse.load(open(yaml_filename, 'r')) layers = load_layers_from_dict(train_dict) final_layer = layers[-1] dataset = train_dict['dataset'] splitter = train_dict['dataset_splitter'] if (np.any([hasattr(l, 'n_stride') for l in layers])): n_sample_preds = get_n_sample_preds(final_layer) # for backwards compatibility input time length also input_time_length = get_input_time_length(final_layer) log.info("Setting n_sample preds automatically to {:d}".format( n_sample_preds)) for monitor in train_dict['exp_args']['monitors']: if hasattr(monitor, 'n_sample_preds'): monitor.n_sample_preds = n_sample_preds if hasattr(monitor, 'input_time_length'): monitor.input_time_length = input_time_length train_dict['exp_args']['iterator'].n_sample_preds = n_sample_preds log.info("Input window length is {:d}".format( get_model_input_window(final_layer))) # add early stop chan, encessary for backwards compatibility exp_args = train_dict['exp_args'] exp_args['remember_best_chan'] = train_dict['exp_args'].pop('remember_best_chan', 'valid_misclass') exp_args['run_after_early_stop'] = train_dict['exp_args'].pop('run_after_early_stop', True) exp = Experiment(final_layer, dataset, splitter, **exp_args) assert len(np.setdiff1d(layers, lasagne.layers.get_all_layers(final_layer))) == 0, ("All layers " "should be used, unused {:s}".format(str(np.setdiff1d(layers, lasagne.layers.get_all_layers(final_layer))))) return exp
def _run_experiments_with_string(self, experiment_index, train_str): assert experiment_index >= self._get_start_id() assert experiment_index < self._get_stop_id() lasagne.random.set_rng(RandomState(9859295)) # Save train string now, will be overwritten later after # input dimensions determined, save now for debug in # case of crash if not self._dry_run: self._save_train_string(train_str, experiment_index) starttime = time.time() train_dict = self._load_without_layers(train_str) log.info("With params...") if not self._quiet: pprint(train_dict['original_params']) if self._dry_run: # Do not do the loading or training... # Only go until here to show the train params return if self._batch_test: # TODO: put into function # load layers, load data with dimensions of the layer # create experiment with max epochs 2, run from braindecode.datasets.random import RandomSet train_str = train_str.replace('in_cols', '1') train_str = train_str.replace('in_sensors', '32') train_dict = yaml_parse.load(train_str) layers = load_layers_from_dict(train_dict) final_layer = layers[-1] n_chans = layers[0].shape[1] n_classes = final_layer.output_shape[1] n_samples = 500000 # set n sample perds in case of cnt model if (np.any([hasattr(l, 'n_stride') for l in layers])): n_sample_preds = get_n_sample_preds(final_layer) log.info("Setting n_sample preds automatically to {:d}".format( n_sample_preds)) for monitor in train_dict['exp_args']['monitors']: if hasattr(monitor, 'n_sample_preds'): monitor.n_sample_preds = n_sample_preds train_dict['exp_args'][ 'iterator'].n_sample_preds = n_sample_preds log.info("Input window length is {:d}".format( get_model_input_window(final_layer))) # make at least batches n_samples = int(n_sample_preds * 1.5 * 200) dataset = RandomSet(topo_shape=[n_samples, n_chans, 1, 1], y_shape=[n_samples, n_classes]) dataset.load() splitter = FixedTrialSplitter(n_train_trials=int(n_samples * 0.8), valid_set_fraction=0.1) train_dict['exp_args']['preprocessor'] = None train_dict['exp_args']['stop_criterion'] = MaxEpochs(1) train_dict['exp_args']['iterator'].batch_size = 1 # TODO: set stop criterion to max epochs =1 # change batch_size in iterator exp = Experiment(final_layer, dataset, splitter, **train_dict['exp_args']) exp.setup() exp.run_until_early_stop() datasets = exp.dataset_provider.get_train_valid_test(exp.dataset) for batch_size in range(32, 200, 5): train_dict['exp_args']['stop_criterion'].num_epochs += 2 log.info("Running with batch size {:d}".format(batch_size)) train_dict['exp_args']['iterator'].batch_size = batch_size exp.run_until_stop(datasets, remember_best=False) return dataset = train_dict['dataset'] dataset.load() iterator = train_dict['exp_args']['iterator'] splitter = train_dict['dataset_splitter'] if dataset.__class__.__name__ == 'EpilepsySet': log.info("Reducing to float16 for epilepsy set...") dataset.seizure_topo = np.float16(dataset.seizure_topo) dataset.non_seizure_topo = np.float16(dataset.non_seizure_topo) else: # todo: remove this? log.info( "Determining dataset dimensions to set possible model params..." ) train_set = splitter.split_into_train_valid_test(dataset)['train'] batch_gen = iterator.get_batches(train_set, shuffle=True) dummy_batch_topo = batch_gen.next()[0] del train_set # not for ultrasound: assert 'in_sensors' in train_str # not for cnt net assert 'in_rows' in train_str # not for resnet: assert 'in_cols' in train_str train_str = train_str.replace('in_sensors', str(dummy_batch_topo.shape[1])) train_str = train_str.replace('in_rows', str(dummy_batch_topo.shape[2])) train_str = train_str.replace('in_cols', str(dummy_batch_topo.shape[3])) self._save_train_string(train_str, experiment_index) # reset rng for actual loading of layers, so you can reproduce it # when you load the file later lasagne.random.set_rng(RandomState(9859295)) train_dict = yaml_parse.load(train_str) layers = load_layers_from_dict(train_dict) final_layer = layers[-1] assert len( np.setdiff1d( layers, lasagne.layers.get_all_layers(final_layer))) == 0, ( "All layers " "should be used, unused {:s}".format( str( np.setdiff1d( layers, lasagne.layers.get_all_layers(final_layer))))) # Set n sample preds in case of cnt model if (np.any([hasattr(l, 'n_stride') for l in layers])): # Can this be moved up and duplication in if clause( batch test, # more above) be removed? n_sample_preds = get_n_sample_preds(final_layer) log.info("Setting n_sample preds automatically to {:d}".format( n_sample_preds)) for monitor in train_dict['exp_args']['monitors']: if hasattr(monitor, 'n_sample_preds'): monitor.n_sample_preds = n_sample_preds train_dict['exp_args']['iterator'].n_sample_preds = n_sample_preds log.info("Input window length is {:d}".format( get_model_input_window(final_layer))) if not self._cross_validation: # for now lets not do that, current models seem fine again. # if (dataset.__class__.__name__ == 'EpilepsySet') and self._pred_loss_hack: # from braindecode.epilepsy.experiment import EpilepsyExperiment # log.info("Creating epilepsy experiment with the pred loss hack") # exp = EpilepsyExperiment(final_layer, dataset, splitter, # **train_dict['exp_args']) # else: exp = Experiment(final_layer, dataset, splitter, **train_dict['exp_args']) exp.setup() exp.run() endtime = time.time() model = exp.final_layer # dummy predictions targets predictions = [0, 3, 1, 2, 3, 4] targets = [3, 4, 1, 2, 3, 4] result_or_results = Result( parameters=train_dict['original_params'], templates={}, training_time=endtime - starttime, monitor_channels=exp.monitor_chans, predictions=predictions, targets=targets) else: # cross validation assert False, ( "cross validation not used in long time, not up to date" " for example targets predictions not added") # default 5 folds for now n_folds = train_dict['num_cv_folds'] exp_cv = ExperimentCrossValidation(final_layer, dataset, exp_args=train_dict['exp_args'], n_folds=n_folds, shuffle=self._shuffle) exp_cv.run() endtime = time.time() result_or_results = [] for i_fold in xrange(n_folds): res = Result(parameters=train_dict['original_params'], templates={}, training_time=endtime - starttime, monitor_channels=exp_cv.all_monitor_chans[i_fold], predictions=[0, 3, 1, 2, 3, 4], targets=[3, 4, 1, 2, 3, 4]) result_or_results.append(res) model = exp_cv.all_layers if not os.path.exists(self._folder_paths[experiment_index]): os.makedirs(self._folder_paths[experiment_index]) result_file_name = self._get_result_save_path(experiment_index) log.info("Saving result to {:s}...".format(result_file_name)) with open(result_file_name, 'w') as resultfile: pickle.dump(result_or_results, resultfile) model_file_name = self._get_model_save_path(experiment_index) param_file_name = model_file_name.replace('.pkl', '.npy') np.save(param_file_name, lasagne.layers.get_all_param_values(model)) # Possibly make kaggle submission file if isinstance(dataset, KaggleGraspLiftSet) and splitter.use_test_as_valid: experiment_save_id = int( self._base_save_paths[experiment_index].split("/")[-1]) create_submission_csv_for_one_subject( self._folder_paths[experiment_index], exp.dataset, iterator, train_dict['exp_args']['preprocessor'], final_layer, experiment_save_id) elif isinstance( dataset, AllSubjectsKaggleGraspLiftSet) and splitter.use_test_as_valid: experiment_save_id = int( self._base_save_paths[experiment_index].split("/")[-1]) create_submission_csv_for_all_subject_model( self._folder_paths[experiment_index], exp.dataset, exp.dataset_provider, iterator, final_layer, experiment_save_id) elif isinstance(splitter, SeveralSetsSplitter): pass # nothing to do in this case # very hacky create predictions targets :) # Not done earlier as there were weird theano crashes if exp.monitors[2].__class__.__name__ == 'CntTrialMisclassMonitor': del dataset del exp add_labels_to_cnt_exp_result( self._base_save_paths[experiment_index])
def _run_experiments_with_string(self, experiment_index, train_str): assert experiment_index >= self._get_start_id() assert experiment_index < self._get_stop_id() lasagne.random.set_rng(RandomState(9859295)) # Save train string now, will be overwritten later after # input dimensions determined, save now for debug in # case of crash if not self._dry_run: self._save_train_string(train_str, experiment_index) starttime = time.time() train_dict = self._load_without_layers(train_str) log.info("With params...") if not self._quiet: pprint(train_dict['original_params']) if self._dry_run: # Do not do the loading or training... # Only go until here to show the train params return if self._batch_test: # TODO: put into function # load layers, load data with dimensions of the layer # create experiment with max epochs 2, run from braindecode.datasets.random import RandomSet train_str = train_str.replace('in_cols', '1') train_str = train_str.replace('in_sensors', '32') train_dict = yaml_parse.load(train_str) layers = load_layers_from_dict(train_dict) final_layer = layers[-1] n_chans = layers[0].shape[1] n_classes = final_layer.output_shape[1] n_samples = 500000 # set n sample perds in case of cnt model if (np.any([hasattr(l, 'n_stride') for l in layers])): n_sample_preds = get_n_sample_preds(final_layer) log.info("Setting n_sample preds automatically to {:d}".format( n_sample_preds)) for monitor in train_dict['exp_args']['monitors']: if hasattr(monitor, 'n_sample_preds'): monitor.n_sample_preds = n_sample_preds train_dict['exp_args']['iterator'].n_sample_preds = n_sample_preds log.info("Input window length is {:d}".format( get_model_input_window(final_layer))) # make at least batches n_samples = int(n_sample_preds * 1.5 * 200) dataset = RandomSet(topo_shape=[n_samples, n_chans, 1, 1], y_shape=[n_samples, n_classes]) dataset.load() splitter = FixedTrialSplitter(n_train_trials=int(n_samples*0.8), valid_set_fraction=0.1) train_dict['exp_args']['preprocessor'] = None train_dict['exp_args']['stop_criterion'] = MaxEpochs(1) train_dict['exp_args']['iterator'].batch_size = 1 # TODO: set stop criterion to max epochs =1 # change batch_size in iterator exp = Experiment(final_layer, dataset, splitter, **train_dict['exp_args']) exp.setup() exp.run_until_early_stop() datasets = exp.dataset_provider.get_train_valid_test(exp.dataset) for batch_size in range(32,200,5): train_dict['exp_args']['stop_criterion'].num_epochs += 2 log.info("Running with batch size {:d}".format(batch_size)) train_dict['exp_args']['iterator'].batch_size = batch_size exp.run_until_stop(datasets, remember_best=False) return dataset = train_dict['dataset'] dataset.load() iterator = train_dict['exp_args']['iterator'] splitter = train_dict['dataset_splitter'] if dataset.__class__.__name__ == 'EpilepsySet': log.info("Reducing to float16 for epilepsy set...") dataset.seizure_topo = np.float16(dataset.seizure_topo) dataset.non_seizure_topo = np.float16(dataset.non_seizure_topo) else: # todo: remove this? log.info("Determining dataset dimensions to set possible model params...") train_set = splitter.split_into_train_valid_test(dataset)['train'] batch_gen = iterator.get_batches(train_set, shuffle=True) dummy_batch_topo = batch_gen.next()[0] del train_set # not for ultrasound: assert 'in_sensors' in train_str # not for cnt net assert 'in_rows' in train_str # not for resnet: assert 'in_cols' in train_str train_str = train_str.replace('in_sensors', str(dummy_batch_topo.shape[1])) train_str = train_str.replace('in_rows', str(dummy_batch_topo.shape[2])) train_str = train_str.replace('in_cols', str(dummy_batch_topo.shape[3])) self._save_train_string(train_str, experiment_index) # reset rng for actual loading of layers, so you can reproduce it # when you load the file later lasagne.random.set_rng(RandomState(9859295)) train_dict = yaml_parse.load(train_str) layers = load_layers_from_dict(train_dict) final_layer = layers[-1] assert len(np.setdiff1d(layers, lasagne.layers.get_all_layers(final_layer))) == 0, ("All layers " "should be used, unused {:s}".format(str(np.setdiff1d(layers, lasagne.layers.get_all_layers(final_layer))))) # Set n sample preds in case of cnt model if (np.any([hasattr(l, 'n_stride') for l in layers])): # Can this be moved up and duplication in if clause( batch test, # more above) be removed? n_sample_preds = get_n_sample_preds(final_layer) log.info("Setting n_sample preds automatically to {:d}".format( n_sample_preds)) for monitor in train_dict['exp_args']['monitors']: if hasattr(monitor, 'n_sample_preds'): monitor.n_sample_preds = n_sample_preds train_dict['exp_args']['iterator'].n_sample_preds = n_sample_preds log.info("Input window length is {:d}".format( get_model_input_window(final_layer))) if not self._cross_validation: # for now lets not do that, current models seem fine again. # if (dataset.__class__.__name__ == 'EpilepsySet') and self._pred_loss_hack: # from braindecode.epilepsy.experiment import EpilepsyExperiment # log.info("Creating epilepsy experiment with the pred loss hack") # exp = EpilepsyExperiment(final_layer, dataset, splitter, # **train_dict['exp_args']) # else: exp = Experiment(final_layer, dataset, splitter, **train_dict['exp_args']) exp.setup() exp.run() endtime = time.time() model = exp.final_layer # dummy predictions targets predictions = [0,3,1,2,3,4] targets = [3,4,1,2,3,4] result_or_results = Result(parameters=train_dict['original_params'], templates={}, training_time=endtime - starttime, monitor_channels=exp.monitor_chans, predictions=predictions, targets=targets) else: # cross validation assert False, ("cross validation not used in long time, not up to date" " for example targets predictions not added") # default 5 folds for now n_folds = train_dict['num_cv_folds'] exp_cv = ExperimentCrossValidation(final_layer, dataset, exp_args=train_dict['exp_args'], n_folds=n_folds, shuffle=self._shuffle) exp_cv.run() endtime = time.time() result_or_results = [] for i_fold in xrange(n_folds): res = Result(parameters=train_dict['original_params'], templates={}, training_time=endtime - starttime, monitor_channels=exp_cv.all_monitor_chans[i_fold], predictions=[0,3,1,2,3,4], targets=[3,4,1,2,3,4]) result_or_results.append(res) model = exp_cv.all_layers if not os.path.exists(self._folder_paths[experiment_index]): os.makedirs(self._folder_paths[experiment_index]) result_file_name = self._get_result_save_path(experiment_index) log.info("Saving result...") with open(result_file_name, 'w') as resultfile: pickle.dump(result_or_results, resultfile) model_file_name = self._get_model_save_path(experiment_index) param_file_name = model_file_name.replace('.pkl', '.npy') np.save(param_file_name, lasagne.layers.get_all_param_values(model)) # Possibly make kaggle submission file if isinstance(dataset, KaggleGraspLiftSet) and splitter.use_test_as_valid: experiment_save_id = int( self._base_save_paths[experiment_index].split("/")[-1]) create_submission_csv_for_one_subject(self._folder_paths[experiment_index], exp.dataset, iterator, train_dict['exp_args']['preprocessor'], final_layer, experiment_save_id) elif isinstance(dataset, AllSubjectsKaggleGraspLiftSet) and splitter.use_test_as_valid: experiment_save_id = int( self._base_save_paths[experiment_index].split("/")[-1]) create_submission_csv_for_all_subject_model( self._folder_paths[experiment_index], exp.dataset, exp.dataset_provider, iterator, final_layer, experiment_save_id) elif isinstance(splitter, SeveralSetsSplitter): pass # nothing to do in this case # very hacky create predictions targets :) # Not done earlier as there were weird theano crashes if exp.monitors[2].__class__.__name__ == 'CntTrialMisclassMonitor': del dataset del exp add_labels_to_cnt_exp_result(self._base_save_paths[experiment_index])