예제 #1
0
def load_results(experiment_root):
    # load the model (mlp_best.pkl)
    model_file = os.path.join(experiment_root, 'mlp_best.pkl');    
    with log_timing(log, 'loading model from {}'.format(model_file)):  
        model = serial.load(model_file);    

    # load train
    train_yaml_file = os.path.join(experiment_root, 'train.yaml');
    train_yaml = load_yaml_template(train_yaml_file);
    
    # fix dataset path
    localizer = PathLocalizer();
    train_yaml = localizer.localize_yaml(train_yaml);
    
    with log_timing(log, 'loading train from {}'.format(train_yaml_file)):      
        train = load_yaml(train_yaml)[0];
    
    return train, model;
예제 #2
0
def load_results(experiment_root):
    # load the model (mlp_best.pkl)
    model_file = os.path.join(experiment_root, 'mlp_best.pkl')
    with log_timing(log, 'loading model from {}'.format(model_file)):
        model = serial.load(model_file)

    # load train
    train_yaml_file = os.path.join(experiment_root, 'train.yaml')
    train_yaml = load_yaml_template(train_yaml_file)

    # fix dataset path
    localizer = PathLocalizer()
    train_yaml = localizer.localize_yaml(train_yaml)

    with log_timing(log, 'loading train from {}'.format(train_yaml_file)):
        train = load_yaml(train_yaml)[0]

    return train, model
예제 #3
0
        config = load_config_file(args.config)
    else:
        config = empty_config()

    if not hasattr(config, 'random_seed'):
        random_seed = random.randint(0, 100)
        config.random_seed = random_seed
        log.debug('using random seed {}'.format(random_seed))

    # load optional localizer
    if args.localizer is not None:
        localizer_class = args.localizer
    else:
        localizer_class = config.get(
            'localizer_class',
            'deepthought.datasets.rwanda2013rhythms.PathLocalizer')
        # for compatibility with old code
    localizer = load_class(localizer_class)

    # localize settings
    config = localizer.localize_config(config)

    # apply settings
    train_yaml = train_yaml % config

    # localize YAML
    train_yaml = localizer.localize_yaml(train_yaml)

    train, _ = load_yaml(train_yaml)

    train.main_loop()
예제 #4
0
def run_job(job_id,
            meta_job_path,
            yaml_template_file,
            base_config_path,
            hyper_params,
            cache_path=None):

    # ConstrainedGPEIOptChooser requires NaN or inf to recognize constraints
    #     BAD_SOLUTION_RETURN_VALUE = np.inf;
    BAD_SOLUTION_RETURN_VALUE = 1

    # TODO: nice-to-have: make logging a little nicer
    init_logging(pylearn2_loglevel=logging.INFO)

    for key, value in hyper_params.items():
        hyper_params[key] = value[0]
        log.debug('{} = {}'.format(key, hyper_params[key]))

    base_config = load_config_file(base_config_path)

    # fix dataset path
    localizer_class = base_config.get(
        'localizer_class',
        'deepthought.datasets.rwanda2013rhythms.PathLocalizer')
    # for compatibility with old code
    localizer = load_class(localizer_class)
    base_config = localizer.localize_config(base_config)

    if not hasattr(base_config, 'random_seed') \
                            and not hasattr(hyper_params, 'random_seed'):
        random_seed = random.randint(0, 100)
        hyper_params['random_seed'] = random_seed
        log.debug('using random seed {}'.format(random_seed))

    param_str = ''
    for key in sorted(hyper_params.keys()):  # deterministic order
        param_str += '_{}_{}'.format(key, hyper_params[key])

    verbose_job_id = str(job_id) + param_str
    base_config.verbose_job_id = verbose_job_id

    if cache_path is None:
        cache_path = os.path.join(meta_job_path, 'cache')

    job_output_path = os.path.join(meta_job_path, 'output', str(job_id))
    output_path = os.path.join(cache_path,
                               convert_to_valid_filename(param_str))

    # check whether cached result already exists
    model = None
    failed_file = os.path.join(output_path, 'failed')
    if os.path.exists(output_path):
        # create a link to job-id
        symlink(output_path,
                job_output_path,
                override=True,
                ignore_errors=True)

        # using cached result
        model_file = os.path.join(output_path, 'mlp.pkl')
        if os.path.exists(model_file):
            try:
                with log_timing(
                        log,
                        'loading cached model from {}'.format(model_file)):
                    model = serial.load(model_file)

                    channels = model.monitor.channels
            except Exception as e:
                log.error('unexpected exception loading model from {}: {} \n{}'\
                      .format(model_file, e, traceback.format_exc()))
        else:
            # if mlp.pkl is missing but mlp-best.pkl is there, then it was a bad configuration
            if os.path.exists(failed_file):
                log.info('cache contains \'failed\' flag')
                return BAD_SOLUTION_RETURN_VALUE

    if model is None:

        #     output_path = os.path.join(
        #                                meta_job_path,
        #                                'output',
        #                                convert_to_valid_filename(verbose_job_id)
        #                                );

        # needs to go here to get the internal reference resolved
        base_config.output_path = output_path

        # sanity check of structural parameters:
        if not structural_param_check(
                merge_params(base_config, hyper_params),
                raise_error=False,
        ):
            touch(failed_file, mkdirs=True)
            # set marker
            return BAD_SOLUTION_RETURN_VALUE

        ensure_dir_exists(output_path)
        symlink(output_path,
                job_output_path,
                override=True,
                ignore_errors=True)

        yaml = flatten_yaml(
            yaml_file_path=yaml_template_file,
            base_config=base_config,
            hyper_params=hyper_params,
        )

        save_yaml_file(yaml_str=yaml,
                       yaml_file_path=os.path.join(output_path, 'train.yaml'))

        with log_timing(log, 'loading yaml for job {}'.format(job_id)):
            train = load_yaml(yaml)[0]

        with log_timing(log, 'running job {} '.format(job_id)):
            try:
                train.main_loop()
            except Exception as e:
                log.error('unexpected exception during training: {} \n{}'\
                          .format(e, traceback.format_exc()))
                touch(failed_file, mkdirs=True)
                # set marker
                return BAD_SOLUTION_RETURN_VALUE

        channels = train.model.monitor.channels

    # directly analyze the model from the train object
    best_results = _extract_best_results(
        channels=channels,
        mode='misclass',
        check_dataset='valid',
        check_channels=['_y_misclass'],
    )
    best_epochs = _get_best_epochs(best_results)
    best_epoch = best_epochs[-1]
    # take last entry -> more stable???

    datasets = ['train', 'valid', 'test', 'post']
    measures = ['_y_misclass', '_objective', '_nll']

    print 'results for job {}'.format(job_id)
    for measure, dataset in itertools.product(measures, datasets):
        channel = dataset + measure
        if channel in channels:
            value = float(channels[channel].val_record[best_epoch])
            print '{:>30} : {:.4f}'.format(channel, value)

#     return float(channels['test_y_misclass'].val_record[best_epoch]);
    return float(channels['valid_y_misclass'].val_record[best_epoch])
예제 #5
0
 # load optional settings
 if args.config is not None:
     config = load_config_file(args.config);
 else:
     config = empty_config();
     
 if not hasattr(config, 'random_seed'):
     random_seed = random.randint(0, 100);
     config.random_seed = random_seed;
     log.debug('using random seed {}'.format(random_seed))
 
 # load optional localizer
 if args.localizer is not None:
     localizer_class = args.localizer;
 else:
     localizer_class = config.get('localizer_class', 
                                   'deepthought.datasets.rwanda2013rhythms.PathLocalizer'); # for compatibility with old code    
 localizer = load_class(localizer_class);
 
 # localize settings
 config = localizer.localize_config(config);
 
 # apply settings    
 train_yaml = train_yaml % config;
 
 # localize YAML
 train_yaml = localizer.localize_yaml(train_yaml);   
      
 train, _ = load_yaml(train_yaml);
 
 train.main_loop();
예제 #6
0
def run_job(job_id, meta_job_path, yaml_template_file, base_config_path, hyper_params, cache_path=None):
    
    # ConstrainedGPEIOptChooser requires NaN or inf to recognize constraints
#     BAD_SOLUTION_RETURN_VALUE = np.inf;
    BAD_SOLUTION_RETURN_VALUE = 1;

    # TODO: nice-to-have: make logging a little nicer
    init_logging(pylearn2_loglevel=logging.INFO);
    
    for key, value in hyper_params.items():
        hyper_params[key] = value[0];
        log.debug('{} = {}'.format(key, hyper_params[key]));
    
    base_config = load_config_file(base_config_path);
    
    # fix dataset path
    localizer_class = base_config.get('localizer_class', 
                                      'deepthought.datasets.rwanda2013rhythms.PathLocalizer'); # for compatibility with old code
    localizer = load_class(localizer_class);
    base_config = localizer.localize_config(base_config);

    if not hasattr(base_config, 'random_seed') \
                            and not hasattr(hyper_params, 'random_seed'):
        random_seed = random.randint(0, 100);
        hyper_params['random_seed'] = random_seed;
        log.debug('using random seed {}'.format(random_seed))

    param_str = '';
    for key in sorted(hyper_params.keys()): # deterministic order
        param_str += '_{}_{}'.format(key, hyper_params[key]);
    
    verbose_job_id = str(job_id) + param_str;
    base_config.verbose_job_id = verbose_job_id;
    

    if cache_path is None:
        cache_path = os.path.join(meta_job_path, 'cache');

    job_output_path = os.path.join(meta_job_path, 'output', str(job_id));
    output_path = os.path.join( 
                               cache_path, 
                               convert_to_valid_filename(param_str)
                               );                            

    # check whether cached result already exists
    model = None;
    failed_file = os.path.join(output_path, 'failed');
    if os.path.exists(output_path):
        # create a link to job-id                    
        symlink(output_path, job_output_path, override=True, ignore_errors=True);
        
        # using cached result
        model_file = os.path.join(output_path, 'mlp.pkl');
        if os.path.exists(model_file):    
            try: 
                with log_timing(log, 'loading cached model from {}'.format(model_file)): 
                    model = serial.load(model_file);
                
        
                    channels = model.monitor.channels;
            except Exception as e:
                log.error('unexpected exception loading model from {}: {} \n{}'\
                      .format(model_file, e, traceback.format_exc())); 
        else:
            # if mlp.pkl is missing but mlp-best.pkl is there, then it was a bad configuration
            if os.path.exists(failed_file):
                log.info('cache contains \'failed\' flag'); 
                return BAD_SOLUTION_RETURN_VALUE;
        
    if model is None:
        
#     output_path = os.path.join(
#                                meta_job_path, 
#                                'output', 
#                                convert_to_valid_filename(verbose_job_id)
#                                );    

        # needs to go here to get the internal reference resolved
        base_config.output_path = output_path;     
        
        # sanity check of structural parameters:
        if not structural_param_check(
                                   merge_params(base_config, hyper_params), 
                                   raise_error=False,
                                   ):
            touch(failed_file, mkdirs=True); # set marker
            return BAD_SOLUTION_RETURN_VALUE;
    
        ensure_dir_exists(output_path);
        symlink(output_path, job_output_path, override=True, ignore_errors=True);
    
        
        yaml = flatten_yaml(
                            yaml_file_path = yaml_template_file, 
                            base_config = base_config, 
                            hyper_params = hyper_params,
                            );
    
        save_yaml_file(
                       yaml_str = yaml, 
                       yaml_file_path = os.path.join(output_path, 'train.yaml')
                       );    
        
        with log_timing(log, 'loading yaml for job {}'.format(job_id)):
            train = load_yaml(yaml)[0];
        
        with log_timing(log, 'running job {} '.format(job_id)):   
            try: 
                train.main_loop();
            except Exception as e:
                log.error('unexpected exception during training: {} \n{}'\
                          .format(e, traceback.format_exc()));
                touch(failed_file, mkdirs=True); # set marker
                return BAD_SOLUTION_RETURN_VALUE;
        
        channels = train.model.monitor.channels;

                 
    # directly analyze the model from the train object   
    best_results = _extract_best_results(
                                         channels=channels,
                                         mode='misclass', 
                                         check_dataset='valid',
                                         check_channels=['_y_misclass'],
                                         );    
    best_epochs = _get_best_epochs(best_results);
    best_epoch = best_epochs[-1]; # take last entry -> more stable???
    
    datasets = ['train', 'valid', 'test', 'post'];
    measures = ['_y_misclass', '_objective', '_nll'];
    
    print 'results for job {}'.format(job_id);
    for measure,dataset in itertools.product(measures,datasets):
        channel = dataset+measure;
        if channel in channels:
            value = float(channels[channel].val_record[best_epoch]);
            print '{:>30} : {:.4f}'.format(channel, value);
     
#     return float(channels['test_y_misclass'].val_record[best_epoch]);
    return float(channels['valid_y_misclass'].val_record[best_epoch]);