def load_results(experiment_root): # load the model (mlp_best.pkl) model_file = os.path.join(experiment_root, 'mlp_best.pkl'); with log_timing(log, 'loading model from {}'.format(model_file)): model = serial.load(model_file); # load train train_yaml_file = os.path.join(experiment_root, 'train.yaml'); train_yaml = load_yaml_template(train_yaml_file); # fix dataset path localizer = PathLocalizer(); train_yaml = localizer.localize_yaml(train_yaml); with log_timing(log, 'loading train from {}'.format(train_yaml_file)): train = load_yaml(train_yaml)[0]; return train, model;
def load_results(experiment_root): # load the model (mlp_best.pkl) model_file = os.path.join(experiment_root, 'mlp_best.pkl') with log_timing(log, 'loading model from {}'.format(model_file)): model = serial.load(model_file) # load train train_yaml_file = os.path.join(experiment_root, 'train.yaml') train_yaml = load_yaml_template(train_yaml_file) # fix dataset path localizer = PathLocalizer() train_yaml = localizer.localize_yaml(train_yaml) with log_timing(log, 'loading train from {}'.format(train_yaml_file)): train = load_yaml(train_yaml)[0] return train, model
config = load_config_file(args.config) else: config = empty_config() if not hasattr(config, 'random_seed'): random_seed = random.randint(0, 100) config.random_seed = random_seed log.debug('using random seed {}'.format(random_seed)) # load optional localizer if args.localizer is not None: localizer_class = args.localizer else: localizer_class = config.get( 'localizer_class', 'deepthought.datasets.rwanda2013rhythms.PathLocalizer') # for compatibility with old code localizer = load_class(localizer_class) # localize settings config = localizer.localize_config(config) # apply settings train_yaml = train_yaml % config # localize YAML train_yaml = localizer.localize_yaml(train_yaml) train, _ = load_yaml(train_yaml) train.main_loop()
def run_job(job_id, meta_job_path, yaml_template_file, base_config_path, hyper_params, cache_path=None): # ConstrainedGPEIOptChooser requires NaN or inf to recognize constraints # BAD_SOLUTION_RETURN_VALUE = np.inf; BAD_SOLUTION_RETURN_VALUE = 1 # TODO: nice-to-have: make logging a little nicer init_logging(pylearn2_loglevel=logging.INFO) for key, value in hyper_params.items(): hyper_params[key] = value[0] log.debug('{} = {}'.format(key, hyper_params[key])) base_config = load_config_file(base_config_path) # fix dataset path localizer_class = base_config.get( 'localizer_class', 'deepthought.datasets.rwanda2013rhythms.PathLocalizer') # for compatibility with old code localizer = load_class(localizer_class) base_config = localizer.localize_config(base_config) if not hasattr(base_config, 'random_seed') \ and not hasattr(hyper_params, 'random_seed'): random_seed = random.randint(0, 100) hyper_params['random_seed'] = random_seed log.debug('using random seed {}'.format(random_seed)) param_str = '' for key in sorted(hyper_params.keys()): # deterministic order param_str += '_{}_{}'.format(key, hyper_params[key]) verbose_job_id = str(job_id) + param_str base_config.verbose_job_id = verbose_job_id if cache_path is None: cache_path = os.path.join(meta_job_path, 'cache') job_output_path = os.path.join(meta_job_path, 'output', str(job_id)) output_path = os.path.join(cache_path, convert_to_valid_filename(param_str)) # check whether cached result already exists model = None failed_file = os.path.join(output_path, 'failed') if os.path.exists(output_path): # create a link to job-id symlink(output_path, job_output_path, override=True, ignore_errors=True) # using cached result model_file = os.path.join(output_path, 'mlp.pkl') if os.path.exists(model_file): try: with log_timing( log, 'loading cached model from {}'.format(model_file)): model = serial.load(model_file) channels = model.monitor.channels except Exception as e: log.error('unexpected exception loading model from {}: {} \n{}'\ .format(model_file, e, traceback.format_exc())) else: # if mlp.pkl is missing but mlp-best.pkl is there, then it was a bad configuration if os.path.exists(failed_file): log.info('cache contains \'failed\' flag') return BAD_SOLUTION_RETURN_VALUE if model is None: # output_path = os.path.join( # meta_job_path, # 'output', # convert_to_valid_filename(verbose_job_id) # ); # needs to go here to get the internal reference resolved base_config.output_path = output_path # sanity check of structural parameters: if not structural_param_check( merge_params(base_config, hyper_params), raise_error=False, ): touch(failed_file, mkdirs=True) # set marker return BAD_SOLUTION_RETURN_VALUE ensure_dir_exists(output_path) symlink(output_path, job_output_path, override=True, ignore_errors=True) yaml = flatten_yaml( yaml_file_path=yaml_template_file, base_config=base_config, hyper_params=hyper_params, ) save_yaml_file(yaml_str=yaml, yaml_file_path=os.path.join(output_path, 'train.yaml')) with log_timing(log, 'loading yaml for job {}'.format(job_id)): train = load_yaml(yaml)[0] with log_timing(log, 'running job {} '.format(job_id)): try: train.main_loop() except Exception as e: log.error('unexpected exception during training: {} \n{}'\ .format(e, traceback.format_exc())) touch(failed_file, mkdirs=True) # set marker return BAD_SOLUTION_RETURN_VALUE channels = train.model.monitor.channels # directly analyze the model from the train object best_results = _extract_best_results( channels=channels, mode='misclass', check_dataset='valid', check_channels=['_y_misclass'], ) best_epochs = _get_best_epochs(best_results) best_epoch = best_epochs[-1] # take last entry -> more stable??? datasets = ['train', 'valid', 'test', 'post'] measures = ['_y_misclass', '_objective', '_nll'] print 'results for job {}'.format(job_id) for measure, dataset in itertools.product(measures, datasets): channel = dataset + measure if channel in channels: value = float(channels[channel].val_record[best_epoch]) print '{:>30} : {:.4f}'.format(channel, value) # return float(channels['test_y_misclass'].val_record[best_epoch]); return float(channels['valid_y_misclass'].val_record[best_epoch])
# load optional settings if args.config is not None: config = load_config_file(args.config); else: config = empty_config(); if not hasattr(config, 'random_seed'): random_seed = random.randint(0, 100); config.random_seed = random_seed; log.debug('using random seed {}'.format(random_seed)) # load optional localizer if args.localizer is not None: localizer_class = args.localizer; else: localizer_class = config.get('localizer_class', 'deepthought.datasets.rwanda2013rhythms.PathLocalizer'); # for compatibility with old code localizer = load_class(localizer_class); # localize settings config = localizer.localize_config(config); # apply settings train_yaml = train_yaml % config; # localize YAML train_yaml = localizer.localize_yaml(train_yaml); train, _ = load_yaml(train_yaml); train.main_loop();
def run_job(job_id, meta_job_path, yaml_template_file, base_config_path, hyper_params, cache_path=None): # ConstrainedGPEIOptChooser requires NaN or inf to recognize constraints # BAD_SOLUTION_RETURN_VALUE = np.inf; BAD_SOLUTION_RETURN_VALUE = 1; # TODO: nice-to-have: make logging a little nicer init_logging(pylearn2_loglevel=logging.INFO); for key, value in hyper_params.items(): hyper_params[key] = value[0]; log.debug('{} = {}'.format(key, hyper_params[key])); base_config = load_config_file(base_config_path); # fix dataset path localizer_class = base_config.get('localizer_class', 'deepthought.datasets.rwanda2013rhythms.PathLocalizer'); # for compatibility with old code localizer = load_class(localizer_class); base_config = localizer.localize_config(base_config); if not hasattr(base_config, 'random_seed') \ and not hasattr(hyper_params, 'random_seed'): random_seed = random.randint(0, 100); hyper_params['random_seed'] = random_seed; log.debug('using random seed {}'.format(random_seed)) param_str = ''; for key in sorted(hyper_params.keys()): # deterministic order param_str += '_{}_{}'.format(key, hyper_params[key]); verbose_job_id = str(job_id) + param_str; base_config.verbose_job_id = verbose_job_id; if cache_path is None: cache_path = os.path.join(meta_job_path, 'cache'); job_output_path = os.path.join(meta_job_path, 'output', str(job_id)); output_path = os.path.join( cache_path, convert_to_valid_filename(param_str) ); # check whether cached result already exists model = None; failed_file = os.path.join(output_path, 'failed'); if os.path.exists(output_path): # create a link to job-id symlink(output_path, job_output_path, override=True, ignore_errors=True); # using cached result model_file = os.path.join(output_path, 'mlp.pkl'); if os.path.exists(model_file): try: with log_timing(log, 'loading cached model from {}'.format(model_file)): model = serial.load(model_file); channels = model.monitor.channels; except Exception as e: log.error('unexpected exception loading model from {}: {} \n{}'\ .format(model_file, e, traceback.format_exc())); else: # if mlp.pkl is missing but mlp-best.pkl is there, then it was a bad configuration if os.path.exists(failed_file): log.info('cache contains \'failed\' flag'); return BAD_SOLUTION_RETURN_VALUE; if model is None: # output_path = os.path.join( # meta_job_path, # 'output', # convert_to_valid_filename(verbose_job_id) # ); # needs to go here to get the internal reference resolved base_config.output_path = output_path; # sanity check of structural parameters: if not structural_param_check( merge_params(base_config, hyper_params), raise_error=False, ): touch(failed_file, mkdirs=True); # set marker return BAD_SOLUTION_RETURN_VALUE; ensure_dir_exists(output_path); symlink(output_path, job_output_path, override=True, ignore_errors=True); yaml = flatten_yaml( yaml_file_path = yaml_template_file, base_config = base_config, hyper_params = hyper_params, ); save_yaml_file( yaml_str = yaml, yaml_file_path = os.path.join(output_path, 'train.yaml') ); with log_timing(log, 'loading yaml for job {}'.format(job_id)): train = load_yaml(yaml)[0]; with log_timing(log, 'running job {} '.format(job_id)): try: train.main_loop(); except Exception as e: log.error('unexpected exception during training: {} \n{}'\ .format(e, traceback.format_exc())); touch(failed_file, mkdirs=True); # set marker return BAD_SOLUTION_RETURN_VALUE; channels = train.model.monitor.channels; # directly analyze the model from the train object best_results = _extract_best_results( channels=channels, mode='misclass', check_dataset='valid', check_channels=['_y_misclass'], ); best_epochs = _get_best_epochs(best_results); best_epoch = best_epochs[-1]; # take last entry -> more stable??? datasets = ['train', 'valid', 'test', 'post']; measures = ['_y_misclass', '_objective', '_nll']; print 'results for job {}'.format(job_id); for measure,dataset in itertools.product(measures,datasets): channel = dataset+measure; if channel in channels: value = float(channels[channel].val_record[best_epoch]); print '{:>30} : {:.4f}'.format(channel, value); # return float(channels['test_y_misclass'].val_record[best_epoch]); return float(channels['valid_y_misclass'].val_record[best_epoch]);