# load train train_yaml_file = os.path.join(experiment_root, 'train.yaml'); train_yaml = load_yaml_template(train_yaml_file); # fix dataset path localizer = PathLocalizer(); train_yaml = localizer.localize_yaml(train_yaml); with log_timing(log, 'loading train from {}'.format(train_yaml_file)): train = load_yaml(train_yaml)[0]; return train, model; if __name__ == '__main__': init_logging(pylearn2_loglevel=logging.INFO); parser = argparse.ArgumentParser(prog='generate_plots', description='generates plots ;-)'); # global options parser.add_argument('path', help='root path of the experiment'); args = parser.parse_args(); experiment_root = args.path; # experiment_root = '/Users/sstober/git/deepbeat/deepbeat/spearmint/h0_input47/20041_h0_pattern_width_[47]_h0_patterns_[30]_h0_pool_size_[1]_learning_rate_[0.01]' # path = '/Users/sstober/git/deepbeat/deepbeat/spearmint/best/h0_1bar_nophase_49bins' train, model = load_results(experiment_root);
'condition': 'n/a', 'channels': channel_ids, } # save data savepath = generate_filepath_from_metadata(metadata) save(os.path.join(target_path, savepath), (trial_data, metadata), mkdirs=True) # save metadata metadb[savepath] = metadata log.debug('imported {}={} as {}'.format( label, metadata['meta_label'], savepath)) save(metadb_file, metadb, mkdirs=True) log.info('import finished') if __name__ == '__main__': import deepthought from deepthought.util.config_util import init_logging init_logging() source_path = os.path.join(deepthought.DATA_PATH, 'rwanda2013rhythms', 'eeg') target_path = os.path.join(deepthought.DATA_PATH, 'rwanda2013rhythms', 'multichannel') import_dataset(source_path, target_path)
''' Created on Jun 4, 2014 @author: sstober ''' import itertools import logging from pylearn2.utils import serial from deepthought.util.config_util import init_logging from deepthought.experiments.ismir2014.extract_results import _extract_best_results, _get_best_epochs if __name__ == '__main__': init_logging(pylearn2_loglevel=logging.INFO) # using cached result model_file = 'mlp.pkl' model = serial.load(model_file) channels = model.monitor.channels # directly analyze the model from the train object best_results = _extract_best_results( channels=channels, mode='misclass', check_dataset='valid', check_channels=['_y_misclass'], ) best_epochs = _get_best_epochs(best_results)
def run_job(job_id, meta_job_path, yaml_template_file, base_config_path, hyper_params, cache_path=None): # ConstrainedGPEIOptChooser requires NaN or inf to recognize constraints # BAD_SOLUTION_RETURN_VALUE = np.inf; BAD_SOLUTION_RETURN_VALUE = 1 # TODO: nice-to-have: make logging a little nicer init_logging(pylearn2_loglevel=logging.INFO) for key, value in hyper_params.items(): hyper_params[key] = value[0] log.debug('{} = {}'.format(key, hyper_params[key])) base_config = load_config_file(base_config_path) # fix dataset path localizer_class = base_config.get( 'localizer_class', 'deepthought.datasets.rwanda2013rhythms.PathLocalizer') # for compatibility with old code localizer = load_class(localizer_class) base_config = localizer.localize_config(base_config) if not hasattr(base_config, 'random_seed') \ and not hasattr(hyper_params, 'random_seed'): random_seed = random.randint(0, 100) hyper_params['random_seed'] = random_seed log.debug('using random seed {}'.format(random_seed)) param_str = '' for key in sorted(hyper_params.keys()): # deterministic order param_str += '_{}_{}'.format(key, hyper_params[key]) verbose_job_id = str(job_id) + param_str base_config.verbose_job_id = verbose_job_id if cache_path is None: cache_path = os.path.join(meta_job_path, 'cache') job_output_path = os.path.join(meta_job_path, 'output', str(job_id)) output_path = os.path.join(cache_path, convert_to_valid_filename(param_str)) # check whether cached result already exists model = None failed_file = os.path.join(output_path, 'failed') if os.path.exists(output_path): # create a link to job-id symlink(output_path, job_output_path, override=True, ignore_errors=True) # using cached result model_file = os.path.join(output_path, 'mlp.pkl') if os.path.exists(model_file): try: with log_timing( log, 'loading cached model from {}'.format(model_file)): model = serial.load(model_file) channels = model.monitor.channels except Exception as e: log.error('unexpected exception loading model from {}: {} \n{}'\ .format(model_file, e, traceback.format_exc())) else: # if mlp.pkl is missing but mlp-best.pkl is there, then it was a bad configuration if os.path.exists(failed_file): log.info('cache contains \'failed\' flag') return BAD_SOLUTION_RETURN_VALUE if model is None: # output_path = os.path.join( # meta_job_path, # 'output', # convert_to_valid_filename(verbose_job_id) # ); # needs to go here to get the internal reference resolved base_config.output_path = output_path # sanity check of structural parameters: if not structural_param_check( merge_params(base_config, hyper_params), raise_error=False, ): touch(failed_file, mkdirs=True) # set marker return BAD_SOLUTION_RETURN_VALUE ensure_dir_exists(output_path) symlink(output_path, job_output_path, override=True, ignore_errors=True) yaml = flatten_yaml( yaml_file_path=yaml_template_file, base_config=base_config, hyper_params=hyper_params, ) save_yaml_file(yaml_str=yaml, yaml_file_path=os.path.join(output_path, 'train.yaml')) with log_timing(log, 'loading yaml for job {}'.format(job_id)): train = load_yaml(yaml)[0] with log_timing(log, 'running job {} '.format(job_id)): try: train.main_loop() except Exception as e: log.error('unexpected exception during training: {} \n{}'\ .format(e, traceback.format_exc())) touch(failed_file, mkdirs=True) # set marker return BAD_SOLUTION_RETURN_VALUE channels = train.model.monitor.channels # directly analyze the model from the train object best_results = _extract_best_results( channels=channels, mode='misclass', check_dataset='valid', check_channels=['_y_misclass'], ) best_epochs = _get_best_epochs(best_results) best_epoch = best_epochs[-1] # take last entry -> more stable??? datasets = ['train', 'valid', 'test', 'post'] measures = ['_y_misclass', '_objective', '_nll'] print 'results for job {}'.format(job_id) for measure, dataset in itertools.product(measures, datasets): channel = dataset + measure if channel in channels: value = float(channels[channel].val_record[best_epoch]) print '{:>30} : {:.4f}'.format(channel, value) # return float(channels['test_y_misclass'].val_record[best_epoch]); return float(channels['valid_y_misclass'].val_record[best_epoch])
def run_job(job_id, meta_job_path, yaml_template_file, base_config_path, hyper_params, cache_path=None): # ConstrainedGPEIOptChooser requires NaN or inf to recognize constraints # BAD_SOLUTION_RETURN_VALUE = np.inf; BAD_SOLUTION_RETURN_VALUE = 1; # TODO: nice-to-have: make logging a little nicer init_logging(pylearn2_loglevel=logging.INFO); for key, value in hyper_params.items(): hyper_params[key] = value[0]; log.debug('{} = {}'.format(key, hyper_params[key])); base_config = load_config_file(base_config_path); # fix dataset path localizer_class = base_config.get('localizer_class', 'deepthought.datasets.rwanda2013rhythms.PathLocalizer'); # for compatibility with old code localizer = load_class(localizer_class); base_config = localizer.localize_config(base_config); if not hasattr(base_config, 'random_seed') \ and not hasattr(hyper_params, 'random_seed'): random_seed = random.randint(0, 100); hyper_params['random_seed'] = random_seed; log.debug('using random seed {}'.format(random_seed)) param_str = ''; for key in sorted(hyper_params.keys()): # deterministic order param_str += '_{}_{}'.format(key, hyper_params[key]); verbose_job_id = str(job_id) + param_str; base_config.verbose_job_id = verbose_job_id; if cache_path is None: cache_path = os.path.join(meta_job_path, 'cache'); job_output_path = os.path.join(meta_job_path, 'output', str(job_id)); output_path = os.path.join( cache_path, convert_to_valid_filename(param_str) ); # check whether cached result already exists model = None; failed_file = os.path.join(output_path, 'failed'); if os.path.exists(output_path): # create a link to job-id symlink(output_path, job_output_path, override=True, ignore_errors=True); # using cached result model_file = os.path.join(output_path, 'mlp.pkl'); if os.path.exists(model_file): try: with log_timing(log, 'loading cached model from {}'.format(model_file)): model = serial.load(model_file); channels = model.monitor.channels; except Exception as e: log.error('unexpected exception loading model from {}: {} \n{}'\ .format(model_file, e, traceback.format_exc())); else: # if mlp.pkl is missing but mlp-best.pkl is there, then it was a bad configuration if os.path.exists(failed_file): log.info('cache contains \'failed\' flag'); return BAD_SOLUTION_RETURN_VALUE; if model is None: # output_path = os.path.join( # meta_job_path, # 'output', # convert_to_valid_filename(verbose_job_id) # ); # needs to go here to get the internal reference resolved base_config.output_path = output_path; # sanity check of structural parameters: if not structural_param_check( merge_params(base_config, hyper_params), raise_error=False, ): touch(failed_file, mkdirs=True); # set marker return BAD_SOLUTION_RETURN_VALUE; ensure_dir_exists(output_path); symlink(output_path, job_output_path, override=True, ignore_errors=True); yaml = flatten_yaml( yaml_file_path = yaml_template_file, base_config = base_config, hyper_params = hyper_params, ); save_yaml_file( yaml_str = yaml, yaml_file_path = os.path.join(output_path, 'train.yaml') ); with log_timing(log, 'loading yaml for job {}'.format(job_id)): train = load_yaml(yaml)[0]; with log_timing(log, 'running job {} '.format(job_id)): try: train.main_loop(); except Exception as e: log.error('unexpected exception during training: {} \n{}'\ .format(e, traceback.format_exc())); touch(failed_file, mkdirs=True); # set marker return BAD_SOLUTION_RETURN_VALUE; channels = train.model.monitor.channels; # directly analyze the model from the train object best_results = _extract_best_results( channels=channels, mode='misclass', check_dataset='valid', check_channels=['_y_misclass'], ); best_epochs = _get_best_epochs(best_results); best_epoch = best_epochs[-1]; # take last entry -> more stable??? datasets = ['train', 'valid', 'test', 'post']; measures = ['_y_misclass', '_objective', '_nll']; print 'results for job {}'.format(job_id); for measure,dataset in itertools.product(measures,datasets): channel = dataset+measure; if channel in channels: value = float(channels[channel].val_record[best_epoch]); print '{:>30} : {:.4f}'.format(channel, value); # return float(channels['test_y_misclass'].val_record[best_epoch]); return float(channels['valid_y_misclass'].val_record[best_epoch]);
'stimulus' : stimulus, 'stimulus_id' : stimulus_id, 'rhythm_type' : label_converter.get_label(stimulus_id, 'rhythm'), 'tempo' : label_converter.get_label(stimulus_id, 'tempo'), 'audio_file' : label_converter.get_label(stimulus_id, 'audio_file'), 'trial_no' : 1, 'trial_type' : 'perception', 'condition' : 'n/a', 'channels' : channel_ids, } # save data savepath = generate_filepath_from_metadata(metadata) save(os.path.join(target_path, savepath), (trial_data, metadata), mkdirs=True) # save metadata metadb[savepath] = metadata log.debug('imported {}={} as {}'.format(label, metadata['meta_label'], savepath)) save(metadb_file, metadb, mkdirs=True) log.info('import finished') if __name__ == '__main__': import deepthought from deepthought.util.config_util import init_logging init_logging() source_path = os.path.join(deepthought.DATA_PATH, 'rwanda2013rhythms', 'eeg') target_path = os.path.join(deepthought.DATA_PATH, 'rwanda2013rhythms', 'multichannel') import_dataset(source_path, target_path)