Beispiel #1
0
def run_epoch(trainobj):
    """
    Runs an epoch. Returns True to continue or
    False to terminate.
    """

    if trainobj.first_callbacks_and_monitoring:
        trainobj.run_callbacks_and_monitoring()
        trainobj.first_callbacks_and_monitoring = False
        return True

    rval = True
    if trainobj.algorithm is None:
        rval = trainobj.model.train_all(dataset=trainobj.dataset)
        if rval is not None:
            raise ValueError("Model.train_all should not return " +
                             "anything. Use Model.continue_learning " +
                             "to control whether learning continues.")
        rval = post_epoch(trainobj)
    else:
        with log_timing(logger, None, level=logging.DEBUG,
                        callbacks=[trainobj.total_seconds.set_value]):
            with log_timing(logger, None, final_msg='Time this epoch:',
                            callbacks=[trainobj.training_seconds.set_value]):
                rval = trainobj.algorithm.train(dataset=trainobj.dataset)
            if rval is not None:
                raise ValueError("TrainingAlgorithm.train should not "
                                 "return anything. Use "
                                 "TrainingAlgorithm.continue_learning "
                                 "to control whether learning "
                                 "continues.")
            rval = post_epoch(trainobj)
    return rval
Beispiel #2
0
def main():
    parser = argparse.ArgumentParser(description='Collect statistics.')
    parser.add_argument('infile',
                        nargs='+',
                        type=argparse.FileType('r'),
                        help="The pickle files to read.")
    parser.add_argument('-O',
                        '--output',
                        type=argparse.FileType('w'),
                        help="Output CSV file to write.")
    args = parser.parse_args()
    names = [(a.name, a) for a in args.infile]
    indices = [b[0] for b in names]
    columns = ['weight_scaling_error', 'geometric_error', 'arithmetic_error']
    df = pd.DataFrame(index=indices, columns=columns)
    try:
        dataset = None
        for i, (name, model_handle) in enumerate(names):
            with log_timing(
                    log,
                    "Processing %s [%d / %d]" % (name, i + 1, len(names))):
                with model_handle as f:
                    model = cPickle.load(f)
                if dataset is None:
                    with log_timing(log,
                                    "Loading test set",
                                    final_msg="Loaded."):
                        d = None
                        # HACK HACK HACK
                        for k in model.monitor._datasets:
                            if 'valid' in k or '50000' in k:
                                d = k
                                break
                        if d is None:
                            log.warning("No validation set found, using "
                                        "first dataset in monitor.")
                            d = model.monitor._datasets[0]
                        dataset = yload(d).get_test_set()
                d = compare_ensemble(model,
                                     dataset,
                                     input_scales={
                                         'h1': 2.,
                                         'y': 2.
                                     })
                df['weight_scaling_error'][name] = d['weight_scaling_error']
                df['geometric_error'][name] = d['geometric_error']
                df['arithmetic_error'][name] = d['arithmetic_error']
                np.save(model_handle.name + '.sca.npy',
                        d['weight_scaling_output'])
                np.save(model_handle.name + '.geo.npy', d['geometric_output'])
                np.save(model_handle.name + '.ari.npy', d['arithmetic_output'])
    finally:
        df.to_csv(args.output)
Beispiel #3
0
def process_dataset(model, dataset, data_specs=None, output_fn=None, batch_size=128):
    
    if data_specs is None:
        data_specs = (CompositeSpace((
                                model.get_input_space(), 
                                model.get_output_space())), 
                           ("features", "targets"));
    
    if output_fn is None:                
        with log_timing(log, 'compiling output_fn'):         
            minibatch = model.get_input_space().make_theano_batch();
            output_fn = theano.function(inputs=[minibatch], 
                                        outputs=model.fprop(minibatch));
    
    it = dataset.iterator(mode='sequential',
                          batch_size=batch_size,
                          data_specs=data_specs);
    y_pred = [];
    y_real = [];                
    output = [];
    for minibatch, target in it:
        out = output_fn(minibatch); # this hangs for convnet on Jeep2
        output.append(out);
        # print out
        # print out.shape
        y_pred.append(np.argmax(out, axis = 1));
        y_real.append(np.argmax(target, axis = 1));
    y_pred = np.hstack(y_pred);
    y_real = np.hstack(y_real);  
    output = np.vstack(output);
    
    return y_real, y_pred, output;
Beispiel #4
0
    def on_monitor(self, model, dataset, algorithm):
        """
        Looks whether the model performs better than earlier. If it's the
        case, saves the model.

        Parameters
        ----------
        model : pylearn2.models.model.Model
            model.monitor must contain a channel with name given by
            self.channel_name
        dataset : pylearn2.datasets.dataset.Dataset
            Not used
        algorithm : TrainingAlgorithm
            Not used
        """
        monitor = model.monitor
        channels = monitor.channels
        channel = channels[self.channel_name]
        val_record = channel.val_record
        new_cost = val_record[-1]

        if self.coeff * new_cost < self.coeff * self.best_cost:
            self.best_cost = new_cost
            # Update the tag of the model object before saving it.
            self._update_tag(model)
            if self.store_best_model:
                self.best_model = deepcopy(model)
            if self.save_path is not None:
                with log_timing(log, 'Saving to ' + self.save_path):
                    serial.save(self.save_path, model, on_overwrite='backup')
Beispiel #5
0
 def on_monitor(self, model, dataset, algorithm):
     
     epoch = algorithm.monitor._epochs_seen;
     model_file = self.save_path + self.save_prefix + str(epoch) + '.pkl'; 
     
     with log_timing(log, 'saving model to {}'.format(model_file)):
         serial.save(model_file, model, on_overwrite = 'backup')
def process_dataset(model, dataset, data_specs=None, output_fn=None):
    
    if data_specs is None:
        data_specs = (CompositeSpace((
                                model.get_input_space(), 
                                model.get_output_space())), 
                           ("features", "targets"));
    
    if output_fn is None:                
        with log_timing(log, 'compiling output_fn'):         
            minibatch = model.get_input_space().make_theano_batch();
            output_fn = theano.function(inputs=[minibatch], 
                                        outputs=model.fprop(minibatch));
    
    it = dataset.iterator('sequential',
                          batch_size=100,
                          data_specs=data_specs);
    y_pred = [];
    y_real = [];                
    output = [];
    for minibatch, target in it:
        out = output_fn(minibatch); # this hangs for convnet on Jeep2
        output.append(out);
        y_pred.append(np.argmax(out, axis = 1));
        y_real.append(np.argmax(target, axis = 1));
    y_pred = np.hstack(y_pred);
    y_real = np.hstack(y_real);  
    output = np.vstack(output);
    
    return y_real, y_pred, output;
Beispiel #7
0
    def on_monitor(self, model, dataset, algorithm):

        epoch = algorithm.monitor._epochs_seen
        model_file = self.save_path + self.save_prefix + str(epoch) + '.pkl'

        with log_timing(log, 'saving model to {}'.format(model_file)):
            serial.save(model_file, model, on_overwrite='backup')
Beispiel #8
0
    def on_monitor(self, model, dataset, algorithm):
        """
        Looks whether the model performs better than earlier
        - or equally good (modification).
        If it's the case, saves the model.

        Parameters
        ----------
        model : pylearn2.models.model.Model
            model.monitor must contain a channel with name given by
            self.channel_name
        dataset : pylearn2.datasets.dataset.Dataset
            Not used
        algorithm : TrainingAlgorithm
            Not used
        """
        monitor = model.monitor
        channels = monitor.channels
        channel = channels[self.channel_name]
        val_record = channel.val_record
        new_cost = val_record[-1]

        if self.coeff * new_cost <= self.coeff * self.best_cost and \
           monitor._epochs_seen >= self.start_epoch:
            self.best_cost = new_cost
            # Update the tag of the model object before saving it.
            self._update_tag(model)
            if self.store_best_model:
                self.best_model = deepcopy(model)
            if self.save_path is not None:
                with log_timing(log, 'Saving to ' + self.save_path):
                    serial.save(self.save_path, model, on_overwrite='backup')
def extract_output(config, best_epoch):
    # load best model
    model_file = os.path.join(config.experiment_root, 'epochs',
                              'epoch{}.pkl'.format(best_epoch))
    print 'loading ' + model_file
    model = serial.load(model_file)

    #     print model;

    # additional dataset params
    config.start_sample = 11200
    config.stop_sample = 12800
    config.name = 'test'

    # load dataset
    dataset, dataset_yaml = load_yaml_file(
        os.path.join(os.path.dirname(__file__), '..', 'run',
                     'dataset_template.yaml'),
        params=config,
    )

    with log_timing(log, 'processing dataset'):
        y_real, y_pred, output = process_dataset(model, dataset)

    return y_real, y_pred, output
Beispiel #10
0
    def main_loop(self):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.
        """
        if self.algorithm is None:
            self.model.monitor = Monitor.get_monitor(self.model)
            self.setup_extensions()
            self.run_callbacks_and_monitoring()
            while True:
                rval = self.model.train_all(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("Model.train_all should not return anything. Use Model.continue_learning to control whether learning continues.")
                self.model.monitor.report_epoch()
                if self.save_freq > 0 and self.model.monitor.epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning = self.model.continue_learning()
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break
        else:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
            self.setup_extensions()
            if not hasattr(self.model, 'monitor'):
                # TODO: is this really necessary? I just put this error here
                # to prevent an AttributeError later, but I think we could
                # rewrite to avoid the AttributeError
                raise RuntimeError("The algorithm is responsible for setting"
                        " up the Monitor, but failed to.")
            if len(self.model.monitor._datasets)>0:
                # This monitoring channel keeps track of a shared variable,
                # which does not need inputs nor data.
                self.model.monitor.add_channel(name="monitor_seconds_per_epoch",
                                               ipt=None,
                                               val=self.monitor_time,
                                               data_specs=(NullSpace(), ''),
                                               dataset=self.model.monitor._datasets[0])
            self.run_callbacks_and_monitoring()
            while True:
                with log_timing(log, None, final_msg='Time this epoch:',
                                callbacks=[self.monitor_time.set_value]):
                    import time
                    print 'current time', time.strftime("%H:%M:%S")
                    rval = self.algorithm.train(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("TrainingAlgorithm.train should not return anything. Use TrainingAlgorithm.continue_learning to control whether learning continues.")
                self.model.monitor.report_epoch()
                self.run_callbacks_and_monitoring()
                if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning =  self.algorithm.continue_learning(self.model)
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break

        self.model.monitor.training_succeeded = True

        if self.save_freq > 0:
            self.save()
Beispiel #11
0
    def main_loop(self):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.
        """
        if self.algorithm is None:
            self.model.monitor = Monitor.get_monitor(self.model)
            self.setup_extensions()
            self.run_callbacks_and_monitoring()
            while True:
                rval = self.model.train_all(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("Model.train_all should not return anything. Use Model.continue_learning to control whether learning continues.")
                self.model.monitor.report_epoch()
                if self.save_freq > 0 and self.model.monitor.epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning = self.model.continue_learning()
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break
        else:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
            self.setup_extensions()
            if not hasattr(self.model, 'monitor'):
                # TODO: is this really necessary? I just put this error here
                # to prevent an AttributeError later, but I think we could
                # rewrite to avoid the AttributeError
                raise RuntimeError("The algorithm is responsible for setting"
                        " up the Monitor, but failed to.")
            if len(self.model.monitor._datasets)>0:
                # This monitoring channel keeps track of a shared variable,
                # which does not need inputs nor data.
                self.model.monitor.add_channel(name="monitor_seconds_per_epoch",
                                               ipt=None,
                                               val=self.monitor_time,
                                               data_specs=(NullSpace(), ''),
                                               dataset=self.model.monitor._datasets[0])
            self.run_callbacks_and_monitoring()
            while True:
                with log_timing(log, None, final_msg='Time this epoch:',
                                callbacks=[self.monitor_time.set_value]):
                    rval = self.algorithm.train(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("TrainingAlgorithm.train should not return anything. Use TrainingAlgorithm.continue_learning to control whether learning continues.")
                self.model.monitor.report_epoch()
                self.run_callbacks_and_monitoring()
                if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning =  self.algorithm.continue_learning(self.model)
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break

        self.model.monitor.training_succeeded = True

        if self.save_freq > 0:
            self.save()
Beispiel #12
0
def load_results(experiment_root):
    # load the model (mlp_best.pkl)
    model_file = os.path.join(experiment_root, 'mlp_best.pkl');    
    with log_timing(log, 'loading model from {}'.format(model_file)):  
        model = serial.load(model_file);    

    # load train
    train_yaml_file = os.path.join(experiment_root, 'train.yaml');
    train_yaml = load_yaml_template(train_yaml_file);
    
    # fix dataset path
    localizer = PathLocalizer();
    train_yaml = localizer.localize_yaml(train_yaml);
    
    with log_timing(log, 'loading train from {}'.format(train_yaml_file)):      
        train = load_yaml(train_yaml)[0];
    
    return train, model;
def load_results(experiment_root):
    # load the model (mlp_best.pkl)
    model_file = os.path.join(experiment_root, 'mlp_best.pkl')
    with log_timing(log, 'loading model from {}'.format(model_file)):
        model = serial.load(model_file)

    # load train
    train_yaml_file = os.path.join(experiment_root, 'train.yaml')
    train_yaml = load_yaml_template(train_yaml_file)

    # fix dataset path
    localizer = PathLocalizer()
    train_yaml = localizer.localize_yaml(train_yaml)

    with log_timing(log, 'loading train from {}'.format(train_yaml_file)):
        train = load_yaml(train_yaml)[0]

    return train, model
Beispiel #14
0
def train_mlp(params):
    train, yaml_str = load_yaml_file(
                   os.path.join(os.path.dirname(__file__), 'cross_trial_template.yaml'),
                   params=params,
                   );
    
    save_yaml_file(yaml_str, os.path.join(params.experiment_root, 'settings.yaml'));
        
    with log_timing(log, 'training network'):    
        train.main_loop();
Beispiel #15
0
def extract_output(experiment_root):
    train, model = load_results(experiment_root);
        
    # get the datasets with their names from the monitor
    for key, dataset in train.algorithm.monitoring_dataset.items():
        # process each dataset 
        with log_timing(log, 'processing dataset \'{}\''.format(key)): 
            y_real, y_pred, output = process_dataset(model, dataset)
            
            save(os.path.join(experiment_root, 'cache', key+'_output.pklz'), (y_real, y_pred, output));    
Beispiel #16
0
def train_mlp(params):
    train, yaml_str = load_yaml_file(
        os.path.join(os.path.dirname(__file__), 'cross_trial_template.yaml'),
        params=params,
    )

    save_yaml_file(yaml_str,
                   os.path.join(params.experiment_root, 'settings.yaml'))

    with log_timing(log, 'training network'):
        train.main_loop()
Beispiel #17
0
 def __init__(self, filepath):
     self.filepath = filepath
     with log_timing(log, 'loading data from {}'.format(filepath)):
         tmp = load(filepath)
         if len(tmp) == 2:
             self.data, self.metadata = tmp
             self.targets = None
         elif len(tmp) == 3:
             self.data, self.metadata, self.targets = tmp
         else:
             raise ValueError('got {} objects instead of 2 or 3.'.format(len(tmp)))
def save_yaml_file(yaml_str, yaml_file_path):
    if save_yaml_file is not None:
        with log_timing(log, 'saving yaml to {}'.format(yaml_file_path)):
            save_dir = os.path.dirname(yaml_file_path);
            if save_dir == '':
                save_dir = '.'
            if not os.path.exists(save_dir):
                os.makedirs(save_dir)
            with  open(yaml_file_path, 'w') as yaml_file:
                yaml_file.write(yaml_str) 
            yaml_file.close();
def extract_output(experiment_root):
    train, model = load_results(experiment_root)

    # get the datasets with their names from the monitor
    for key, dataset in train.algorithm.monitoring_dataset.items():
        # process each dataset
        with log_timing(log, 'processing dataset \'{}\''.format(key)):
            y_real, y_pred, output = process_dataset(model, dataset)

            save(os.path.join(experiment_root, 'cache', key + '_output.pklz'),
                 (y_real, y_pred, output))
Beispiel #20
0
def train_convnet(config):
    
    train, yaml_str = load_yaml_file(
                   os.path.join(os.path.dirname(__file__), 'train_convnet_template.yaml'),
                   params=config,
                   );
    
    save_yaml_file(yaml_str, os.path.join(config.experiment_root, 'settings.yaml'));
        
    with log_timing(log, 'training network'):    
        train.main_loop();
Beispiel #21
0
def load_data_file(filename):

    #data = np.loadtxt(filename, dtype=float, delimiter=' ', skiprows=1); #, autostrip=True, names=False) 
    with log_timing(log, 'loading data from {}'.format(filename)):
        data = np.genfromtxt(filename,  dtype=theano.config.floatX, delimiter=' ', skip_header=1, autostrip=True);    
    log.info('loaded {}'.format(data.shape));
    
#     print data.shape;
#     print data[0];
#     print data[-1];

    return data;
Beispiel #22
0
def load_data_file(filename):

    #data = np.loadtxt(filename, dtype=float, delimiter=' ', skiprows=1); #, autostrip=True, names=False) 
    with log_timing(log, 'loading data from {}'.format(filename)):
        data = np.genfromtxt(filename,  dtype=theano.config.floatX, delimiter=' ', skip_header=1, autostrip=True);    
    log.info('loaded {}'.format(data.shape));
    
#     print data.shape;
#     print data[0];
#     print data[-1];

    return data;
Beispiel #23
0
 def __init__(self, filepath):
     self.filepath = filepath
     with log_timing(log, 'loading data from {}'.format(filepath)):
         tmp = load(filepath)
         if len(tmp) == 2:
             self.data, self.metadata = tmp
             self.targets = None
         elif len(tmp) == 3:
             self.data, self.metadata, self.targets = tmp
         else:
             raise ValueError('got {} objects instead of 2 or 3.'.format(
                 len(tmp)))
Beispiel #24
0
def train_convnet(config):

    train, yaml_str = load_yaml_file(
        os.path.join(os.path.dirname(__file__), 'train_convnet_template.yaml'),
        params=config,
    )

    save_yaml_file(yaml_str,
                   os.path.join(config.experiment_root, 'settings.yaml'))

    with log_timing(log, 'training network'):
        train.main_loop()
Beispiel #25
0
def load_yaml(yaml_template, params=None):    
    log.debug('params: {}'.format(params))
    
    if params is not None:
        yaml_str = yaml_template % params
    else:
        yaml_str = yaml_template
    log.debug(yaml_str)

    with log_timing(log, 'parsing yaml'):    
        obj = yaml_parse.load(yaml_str)
    
    return obj, yaml_str
def load_yaml(yaml_template, params=None):    
    print params;
    
    if params is not None:
        yaml_str = yaml_template % params;
    else:
        yaml_str = yaml_template;
    print yaml_str;

    with log_timing(log, 'parsing yaml'):    
        obj = yaml_parse.load(yaml_str);
    
    return obj, yaml_str;
Beispiel #27
0
def load_yaml(yaml_template, params=None):
    log.debug('params: {}'.format(params))

    if params is not None:
        yaml_str = yaml_template % params
    else:
        yaml_str = yaml_template
    log.debug(yaml_str)

    with log_timing(log, 'parsing yaml'):
        obj = yaml_parse.load(yaml_str)

    return obj, yaml_str
Beispiel #28
0
    def main_loop(self):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.
        """
        if self.algorithm is None:
            self.model.monitor = Monitor.get_monitor(self.model)
            self.run_callbacks_and_monitoring()
            while True:
                rval = self.model.train_all(dataset=self.dataset)
                if rval is not None:
                    raise ValueError(
                        "Model.train_all should not return anything. Use Model.continue_learning to control whether learning continues."
                    )
                self.model.monitor.report_epoch()
                if self.save_freq > 0 and self.model.monitor.epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning = self.model.continue_learning()
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break
        else:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
            if not hasattr(self.model, 'monitor'):
                # TODO: is this really necessary? I just put this error here
                # to prevent an AttributeError later, but I think we could
                # rewrite to avoid the AttributeError
                raise RuntimeError("The algorithm is responsible for setting"
                                   " up the Monitor, but failed to.")
            self.run_callbacks_and_monitoring()
            while True:
                with log_timing(log, None, final_msg='Time this epoch:'):
                    rval = self.algorithm.train(dataset=self.dataset)
                if rval is not None:
                    raise ValueError(
                        "TrainingAlgorithm.train should not return anything. Use TrainingAlgorithm.continue_learning to control whether learning continues."
                    )
                self.model.monitor.report_epoch()
                self.run_callbacks_and_monitoring()
                if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning = self.algorithm.continue_learning(
                    self.model)
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break

        self.model.monitor.training_succeeded = True

        if self.save_freq > 0:
            self.save()
Beispiel #29
0
    def main_loop(self):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.
        """
        if self.algorithm is None:
            self.model.monitor = Monitor.get_monitor(self.model)
            self.setup_extensions()
            self.run_callbacks_and_monitoring()
            while True:
                rval = self.model.train_all(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("Model.train_all should not return anything. Use Model.continue_learning to control whether learning continues.")
                self.model.monitor.report_epoch()
                if self.save_freq > 0 and self.model.monitor.epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning = self.model.continue_learning()
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break
        else:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
            self.setup_extensions()
            if not hasattr(self.model, 'monitor'):
                # TODO: is this really necessary? I just put this error here
                # to prevent an AttributeError later, but I think we could
                # rewrite to avoid the AttributeError
                raise RuntimeError("The algorithm is responsible for setting"
                        " up the Monitor, but failed to.")
            self.run_callbacks_and_monitoring()
            while True:
                with log_timing(log, None, final_msg='Time this epoch:'):
                    rval = self.algorithm.train(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("TrainingAlgorithm.train should not return anything. Use TrainingAlgorithm.continue_learning to control whether learning continues.")
                self.model.monitor.report_epoch()
                self.run_callbacks_and_monitoring()
                if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0:
                    self.save()
                continue_learning =  self.algorithm.continue_learning(self.model)
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break

        self.model.monitor.training_succeeded = True

        if self.save_freq > 0:
            self.save()
Beispiel #30
0
    def __init__(self, save_dir):
        PYLEARN2_TRAIN_DIR = preprocess('${PYLEARN2_TRAIN_DIR}')
        PYLEARN2_TRAIN_BASE_NAME = preprocess('${PYLEARN2_TRAIN_BASE_NAME}')

        src = os.path.join(PYLEARN2_TRAIN_DIR, PYLEARN2_TRAIN_BASE_NAME)
        dst = os.path.join(save_dir, PYLEARN2_TRAIN_BASE_NAME)

        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        if os.path.exists(save_dir) and not os.path.isdir(save_dir):
            raise IOError("save path %s exists, not a directory" % save_dir)
        elif not os.access(save_dir, os.W_OK):
            raise IOError("permission error creating %s" % dst)

        with log_timing(log, 'copying yaml from {} to {}'.format(src, dst)):
            copyfile(src, dst)
Beispiel #31
0
    def __init__(self, save_dir):
        PYLEARN2_TRAIN_DIR = preprocess('${PYLEARN2_TRAIN_DIR}')
        PYLEARN2_TRAIN_BASE_NAME = preprocess('${PYLEARN2_TRAIN_BASE_NAME}')

        src = os.path.join(PYLEARN2_TRAIN_DIR, PYLEARN2_TRAIN_BASE_NAME)
        dst = os.path.join(save_dir, PYLEARN2_TRAIN_BASE_NAME)

        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        if os.path.exists(save_dir) and not os.path.isdir(save_dir):
            raise IOError("save path %s exists, not a directory" % save_dir)
        elif not os.access(save_dir, os.W_OK):
            raise IOError("permission error creating %s" % dst)

        with log_timing(log, 'copying yaml from {} to {}'.format(src, dst)):
            copyfile(src, dst)
Beispiel #32
0
    def run_one_epoch(self, datasets, remember_best):
        batch_generator = self.iterator.get_batches(datasets['train'],
                                                    shuffle=True)
        with log_timing(log, None, final_msg='Time updates following epoch:'):
            for inputs, targets in batch_generator:
                if self.batch_modifier is not None:
                    inputs, targets = self.batch_modifier.process(
                        inputs, targets)
                # could happen that batch modifier has removed all inputs...
                if len(inputs) > 0:
                    self.train_func(inputs, targets)

        self.monitor_epoch(datasets)
        self.print_epoch()
        if remember_best:
            self.remember_extension.remember_epoch(self.monitor_chans,
                                                   self.all_params)
Beispiel #33
0
 def run_one_epoch(self, datasets, remember_best):
     batch_generator = self.iterator.get_batches(datasets['train'],
         shuffle=True)
     with log_timing(log, None, final_msg='Time updates following epoch:'):
         for inputs, targets in batch_generator:
             if self.batch_modifier is not None:
                 inputs, targets = self.batch_modifier.process(inputs,
                     targets)
             # could happen that batch modifier has removed all inputs...
             if len(inputs) > 0:
                 self.train_func(inputs, targets)
     
     self.monitor_epoch(datasets)
     self.print_epoch()
     if remember_best:
         self.remember_extension.remember_epoch(self.monitor_chans,
             self.all_params)
Beispiel #34
0
def train_mlp(params):

    #     sda_file = os.path.join(params.experiment_root, 'sda', 'sda_all.pkl');

    # check whether pre-trained SDA is there
    pretrained = True
    for i in xrange(len(params.hidden_layers_sizes)):
        sda_layer_file = params.get(('layer{}_content').format(i))
        if not os.path.isfile(sda_layer_file):
            log.info(
                'did not find pre-trained SDA layer model at {}. re-computing SDA'
                .format(sda_layer_file))
            pretrained = False
            break
        else:
            log.info('found pre-trained SDA layer model at {}'.format(
                sda_layer_file))

    if not pretrained:
        train_sda(params)

    n_layers = len(params.hidden_layers_sizes)

    if params.learning_rule == 'AdaDelta':
        yaml_template = 'train_sda_mlp_template.AdaDelta.yaml'
    else:
        if n_layers == 3:
            yaml_template = 'train_sda_mlp_template.Momentum.yaml'
        elif n_layers == 2:
            yaml_template = 'train_sda_mlp_template.Momentum.2layers.yaml'
        else:
            raise '{} layers not supported'.format(n_layers)

    train, train_yaml_str = load_yaml_file(
        os.path.join(os.path.dirname(__file__), yaml_template),
        params=params,
    )

    save_yaml_file(train_yaml_str,
                   os.path.join(params.experiment_root, 'mlp_train.yaml'))

    with log_timing(log, 'training MLP'):
        train.main_loop()

    log.info('done')
Beispiel #35
0
 def save(self):
     """Saves the model."""
     # TODO-- save state of training algorithm so training can be
     # resumed after a crash
     for extension in self.extensions:
         extension.on_save(self.model, self.dataset, self.algorithm)
     if self.save_path is not None:
         with log_timing(log, "Saving to " + self.save_path):
             if self.first_save and (not self.allow_overwrite) and os.path.exists(self.save_path):
                 # Every job overwrites its own output on the second save
                 # and every save thereafter. The "allow_overwrite" flag
                 # only pertains to overwriting the output of previous jobs.
                 raise IOError("Trying to overwrite file when not allowed.")
             try:
                 # Make sure that saving does not serialize the dataset
                 self.dataset._serialization_guard = SerializationGuard()
                 serial.save(self.save_path, self.model, on_overwrite="backup")
             finally:
                 self.dataset._serialization_guard = None
         self.first_save = False
def train_mlp(params):
    
#     sda_file = os.path.join(params.experiment_root, 'sda', 'sda_all.pkl');

    # check whether pre-trained SDA is there
    pretrained = True;
    for i in xrange(len(params.hidden_layers_sizes)):
        sda_layer_file = params.get(('layer{}_content').format(i));
        if not os.path.isfile(sda_layer_file):
            log.info('did not find pre-trained SDA layer model at {}. re-computing SDA'.format(sda_layer_file));
            pretrained = False;
            break;
        else:
            log.info('found pre-trained SDA layer model at {}'.format(sda_layer_file));
    
    if not pretrained:
        train_sda(params);
        
    n_layers = len(params.hidden_layers_sizes);
        
    if params.learning_rule == 'AdaDelta':
        yaml_template = 'train_sda_mlp_template.AdaDelta.yaml'
    else:
        if n_layers == 3:
            yaml_template = 'train_sda_mlp_template.Momentum.yaml'
        elif n_layers == 2:
            yaml_template = 'train_sda_mlp_template.Momentum.2layers.yaml'
        else:
            raise '{} layers not supported'.format(n_layers);
    
    train, train_yaml_str = load_yaml_file(
                   os.path.join(os.path.dirname(__file__), yaml_template),
                   params=params,
                   );
                   
    save_yaml_file(train_yaml_str, os.path.join(params.experiment_root, 'mlp_train.yaml'));
    
    with log_timing(log, 'training MLP'):    
        train.main_loop();
        
    log.info('done');
Beispiel #37
0
 def save(self):
     """Saves the model."""
     #TODO-- save state of training algorithm so training can be
     # resumed after a crash
     for extension in self.extensions:
         extension.on_save(self.model, self.dataset, self.algorithm)
     if self.save_path is not None:
         with log_timing(log, 'Saving to ' + self.save_path):
             if self.first_save and (not self.allow_overwrite) \
                and os.path.exists(self.save_path):
                 # Every job overwrites its own output on the second save
                 # and every save thereafter. The "allow_overwrite" flag
                 # only pertains to overwriting the output of previous jobs.
                 raise IOError("Trying to overwrite file when not allowed.")
             try:
                 # Make sure that saving does not serialize the dataset
                 self.dataset._serialization_guard = SerializationGuard()
                 serial.save(self.save_path, self.model,
                             on_overwrite='backup')
             finally:
                 self.dataset._serialization_guard = None
         self.first_save = False
Beispiel #38
0
def split_trial(path, trial_len):

    log.info('processing {}'.format(path))

    datafile = glob.glob(os.path.join(path, '*.txt'))[0]
    metafile = glob.glob(os.path.join(path, '*_Trials_Onsets.xlsx'))[0]

    log.debug('data file: {}'.format(datafile))
    log.debug('meta file: {}'.format(metafile))

    onsets = load_xlsx_meta_file(metafile)
    data = load_data_file(datafile)
    log.debug(onsets)

    onsets.append([len(data), 'end'])
    # artificial last marker

    trials = {}
    for i in xrange(len(onsets) - 1):
        onset, label = onsets[i]
        next_onset = onsets[i + 1][0]

        # rounding to integers
        onset = int(math.floor(float(onset)))
        next_onset = int(math.floor(float(next_onset)))

        next_onset = min(onset + trial_len, next_onset)

        log.debug('[{}..{}) -> {}'.format(onset, next_onset, label))
        trial_data = np.vstack(data[onset:next_onset])
        log.debug('{} samples extracted'.format(trial_data.shape))

        trials[label] = trial_data

    filename = os.path.join(path, 'trials.pklz')
    with log_timing(log, 'saving to {}'.format(filename)):
        save(filename, trials)

    return trials
def extract_output(config, best_epoch):
    # load best model
    model_file = os.path.join(config.experiment_root, "epochs", "epoch{}.pkl".format(best_epoch))
    print "loading " + model_file
    model = serial.load(model_file)

    #     print model;

    # additional dataset params
    config.start_sample = 11200
    config.stop_sample = 12800
    config.name = "test"

    # load dataset
    dataset, dataset_yaml = load_yaml_file(
        os.path.join(os.path.dirname(__file__), "..", "run", "dataset_template.yaml"), params=config
    )

    with log_timing(log, "processing dataset"):
        y_real, y_pred, output = process_dataset(model, dataset)

    return y_real, y_pred, output
Beispiel #40
0
def split_trial(path, trial_len):
    
    log.info('processing {}'.format(path));
    
    datafile = glob.glob(os.path.join(path,'*.txt'))[0];
    metafile = glob.glob(os.path.join(path,'*_Trials_Onsets.xlsx'))[0];
    
    log.debug('data file: {}'.format(datafile));
    log.debug('meta file: {}'.format(metafile));

    onsets = load_xlsx_meta_file(metafile);    
    data = load_data_file(datafile);
    log.debug(onsets);
    
    onsets.append([len(data), 'end']); # artificial last marker

    trials = {};
    for i in xrange(len(onsets) - 1):
        onset, label = onsets[i];
        next_onset = onsets[i+1][0];
        
        # rounding to integers
        onset = int(math.floor(float(onset)));
        next_onset = int(math.floor(float(next_onset)));
        
        next_onset = min(onset+trial_len, next_onset);
        
        log.debug('[{}..{}) -> {}'.format(onset, next_onset, label));
        trial_data = np.vstack(data[onset:next_onset]);
        log.debug('{} samples extracted'.format(trial_data.shape));
        
        trials[label] = trial_data;
        
    filename = os.path.join(path, 'trials.pklz');
    with log_timing(log, 'saving to {}'.format(filename)):
        save(filename, trials);
        
    return trials;
def extract_output(config, best_epoch):
    # load best model    
    model_file = os.path.join(config.experiment_root, 'epochs', 'epoch{}.pkl'.format(best_epoch));
    print 'loading '+model_file;    
    model = serial.load(model_file);
    
#     print model;

    # additional dataset params
    config.start_sample = 11200;
    config.stop_sample  = 12800;
    config.name = 'test';

    # load dataset    
    dataset, dataset_yaml = load_yaml_file(
                       os.path.join(os.path.dirname(__file__), '..', 'run', 'dataset_template.yaml'),
                       params=config,
                       );    
    
    with log_timing(log, 'processing dataset'):   
        y_real, y_pred, output = process_dataset(model, dataset)
        
    return y_real, y_pred, output;
Beispiel #42
0
 def process_markers(self, markers):
     # Check if a trial has ended with last samples
     # need marker samples with some overlap
     # so we do not miss trial boundaries inbetween two sample blocks
     marker_samples_with_overlap = np.copy(
         self.marker_buffer[-len(markers)-2:])
     trial_has_ended = np.sum(np.diff(marker_samples_with_overlap) < 0) > 0
     if trial_has_ended:
         trial_starts, trial_stops = self.get_trial_start_stop_indices(
             self.marker_buffer)
         trial_start = trial_starts[-1]
         trial_stop = trial_stops[-1]
         log.info("Trial has ended for class {:d}".format(
             self.marker_buffer[trial_start]))
         assert trial_start < trial_stop, ("trial start {:d} should be "
             "before trial stop {:d}, markers: {:s}").format(trial_start, 
                 trial_stop, str(marker_samples_with_overlap))
         self.add_blocks(trial_start + self.trial_start_offset, trial_stop,
             self.data_processor.sample_buffer,
             self.marker_buffer)
         log.info("Now {:d} trials (including breaks)".format(
             len(self.data_batches)))
         
         with log_timing(log, None, final_msg='Time for training:'):
             self.train()
     trial_has_started = np.sum(np.diff(marker_samples_with_overlap) > 0) > 0
     if trial_has_started:
         trial_end_in_marker_buffer = np.sum(np.diff(self.marker_buffer) < 0) > 0
         if trial_end_in_marker_buffer:
             # +1 necessary since diff removes one index
             trial_start = np.flatnonzero(np.diff(self.marker_buffer) > 0)[-1] + 1
             trial_stop = np.flatnonzero(np.diff(self.marker_buffer) < 0)[-1] + 1
             assert trial_start > trial_stop, ("If trial has just started "
                 "expect this to be after stop of last trial")
             self.add_break(break_start=trial_stop, break_stop=trial_start,
                 all_samples=self.data_processor.sample_buffer,
                 all_markers=self.marker_buffer)
Beispiel #43
0
    def main_loop(self, time_budget=None):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.

        Parameters
        ----------
        time_budget : int, optional
            The maximum number of seconds before interrupting
            training. Default is `None`, no time limit.
        """
        t0 = datetime.now()
        self.setup()
        if self.algorithm is None:
            self.run_callbacks_and_monitoring()
            while True:
                if self.exceeded_time_budget(t0, time_budget):
                    break

                rval = self.model.train_all(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("Model.train_all should not return " +
                                     "anything. Use Model.continue_learning " +
                                     "to control whether learning continues.")
                self.model.monitor.report_epoch()
                extension_continue = self.run_callbacks_and_monitoring()
                freq = self.save_freq
                if freq > 0 and self.model.monitor.get_epochs_seen() % freq == 0:
                    self.save()
                continue_learning = (self.model.continue_learning() and
                                     extension_continue)
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break
        else:
            if not hasattr(self.model, 'monitor'):
                # TODO: is this really necessary? I just put this error here
                # to prevent an AttributeError later, but I think we could
                # rewrite to avoid the AttributeError
                raise RuntimeError("The algorithm is responsible for setting"
                                   " up the Monitor, but failed to.")
            if len(self.model.monitor._datasets) > 0:
                # This monitoring channel keeps track of a shared variable,
                # which does not need inputs nor data.
                self.training_seconds.__doc__ = """\
The number of seconds that were spent in actual training during the most
recent epoch. This excludes seconds that were spent running callbacks for
the extensions, computing monitoring channels, etc."""
                self.model.monitor.add_channel(
                    name="training_seconds_this_epoch",
                    ipt=None,
                    val=self.training_seconds,
                    data_specs=(NullSpace(), ''),
                    dataset=self.model.monitor._datasets[0])
                self.total_seconds.__doc__ = """\
The number of seconds that were spent on the entirety of processing for the
previous epoch. This includes not only training but also the computation of
the monitoring channels, running TrainExtension callbacks, etc. This value
is reported for the *previous* epoch because the amount of time spent on
monitoring for this epoch is not known until the monitoring channels have
already been reported."""
                self.model.monitor.add_channel(
                    name="total_seconds_last_epoch",
                    ipt=None,
                    val=self.total_seconds,
                    data_specs=(NullSpace(), ''),
                    dataset=self.model.monitor._datasets[0])
            self.run_callbacks_and_monitoring()
            while True:
                if self.exceeded_time_budget(t0, time_budget):
                    break

                with log_timing(log, None, level=logging.DEBUG,
                                callbacks=[self.total_seconds.set_value]):
                    with log_timing(
                            log, None, final_msg='Time this epoch:',
                            callbacks=[self.training_seconds.set_value]):
                        rval = self.algorithm.train(dataset=self.dataset)
                    if rval is not None:
                        raise ValueError("TrainingAlgorithm.train should not "
                                         "return anything. Use "
                                         "TrainingAlgorithm.continue_learning "
                                         "to control whether learning "
                                         "continues.")
                    self.model.monitor.report_epoch()
                    extension_continue = self.run_callbacks_and_monitoring()
                    if self.save_freq > 0 and \
                       self.model.monitor.get_epochs_seen() % self.save_freq == 0:
                        self.save()
                continue_learning = (
                    self.algorithm.continue_learning(self.model) and
                    extension_continue
                )
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break

        self.model.monitor.training_succeeded = True

        if self.save_freq > 0:
            self.save()
Beispiel #44
0
    def main_loop(self, time_budget=None):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.

        Parameters
        ----------
        time_budget : int, optional
            The maximum number of seconds before interrupting
            training. Default is `None`, no time limit.
        """
        t0 = datetime.now()
        if self.algorithm is None:
            self.model.monitor = Monitor.get_monitor(self.model)
            self.model.monitor.time_budget_exceeded = False
            self.setup_extensions()
            # Model.censor_updates is used by the training algorithm to
            # enforce constraints after each step of learning. Here we
            # make sure the constraints are enforced from the start.
            self.model.enforce_constraints()
            self.run_callbacks_and_monitoring()
            while True:
                if self.exceeded_time_budget(t0, time_budget):
                    break

                rval = self.model.train_all(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("Model.train_all should not return " +
                                     "anything. Use Model.continue_learning " +
                                     "to control whether learning continues.")
                self.model.monitor.report_epoch()
                extension_continue = self.run_callbacks_and_monitoring()
                freq = self.save_freq
                if freq > 0 and self.model.monitor.epochs_seen % freq == 0:
                    self.save()
                continue_learning = (self.model.continue_learning() and
                                     extension_continue)
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break
        else:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
            self.setup_extensions()
            # Model.censor_updates is used by the training algorithm to
            # enforce constraints after each step of learning. Here we
            # make sure the constraints are enforced from the start.
            self.model.enforce_constraints()
            if not hasattr(self.model, 'monitor'):
                # TODO: is this really necessary? I just put this error here
                # to prevent an AttributeError later, but I think we could
                # rewrite to avoid the AttributeError
                raise RuntimeError("The algorithm is responsible for setting"
                                   " up the Monitor, but failed to.")
            if len(self.model.monitor._datasets)>0:
                # This monitoring channel keeps track of a shared variable,
                # which does not need inputs nor data.
                self.model.monitor.add_channel(name="training_seconds_this_epoch",
                                               ipt=None,
                                               val=self.training_seconds,
                                               data_specs=(NullSpace(), ''),
                                               dataset=self.model.monitor._datasets[0])
                self.model.monitor.add_channel(name="total_seconds_last_epoch",
                                               ipt=None,
                                               val=self.total_seconds,
                                               data_specs=(NullSpace(), ''),
                                               dataset=self.model.monitor._datasets[0])
            self.run_callbacks_and_monitoring()
            while True:
                if self.exceeded_time_budget(t0, time_budget):
                    break

                with log_timing(log, None, level=logging.DEBUG,
                                callbacks=[self.total_seconds.set_value]):
                    with log_timing(log, None, final_msg='Time this epoch:',
                                    callbacks=[self.training_seconds.set_value]):
                        rval = self.algorithm.train(dataset=self.dataset)
                    if rval is not None:
                        raise ValueError("TrainingAlgorithm.train should not " +
                                         "return anything. Use " +
                                         "TrainingAlgorithm.continue_learning " +
                                         "to control whether learning continues.")
                    self.model.monitor.report_epoch()
                    extension_continue = self.run_callbacks_and_monitoring()
                    if self.save_freq > 0 and self.model.monitor._epochs_seen % self.save_freq == 0:
                        self.save()
                continue_learning = (
                    self.algorithm.continue_learning(self.model) and
                    extension_continue
                )
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break

        self.model.monitor.training_succeeded = True

        if self.save_freq > 0:
            self.save()
Beispiel #45
0
    def redo_theano(self):
        """
        Recompiles Theano functions used by this monitor.

        This is needed so that if new channels are added, Theano's
        optimizations make sure (to the extent that they can) that the new
        channels and old channels don't have any redundant calculations.

        It is also needed to regenerate Theano functions after pickling and
        unpickling, since Theano functions should not be pickled.
        """
        self._dirty = False

        init_names = dir(self)
        self.prereqs = OrderedDict()
        for channel in self.channels.values():
            if channel.prereqs is not None:
                dataset = channel.dataset
                if dataset not in self.prereqs:
                    self.prereqs[dataset] = []
                prereqs = self.prereqs[dataset]
                for prereq in channel.prereqs:
                    if prereq not in prereqs:
                        prereqs.append(prereq)

        updates = OrderedDict()
        for channel in self.channels.values():
            updates[channel.val_shared] = np.cast[config.floatX](0.0)
        with log_timing(log, "compiling begin_record_entry"):
            self.begin_record_entry = function(inputs=[], updates=updates, mode=self.theano_function_mode,
                    name = 'Monitor.begin_record_entry')
        updates = OrderedDict()
        givens = OrderedDict()
        #Get the appropriate kind of theano variable to represent the data the model
        #acts on
        X = self.model.get_input_space().make_theano_batch(name = "monitoring_X")
        if config.compute_test_value != 'off':
            m = self.model.get_test_batch_size()
            test_value = self.model.get_input_space().get_origin_batch(m)
            X.tag.test_value = np.cast[X.type.dtype](test_value)
        if self.require_label:
            Y = self.model.get_output_space().make_theano_batch(name = "monitoring_Y")

        log.info('Monitored channels: ')
        for key in sorted(self.channels.keys()):
            mode = self.theano_function_mode
            if mode is not None and hasattr(mode, 'record'):
                mode.record.handle_line('compiling monitor including channel '+key+'\n')
            log.info('\t%s' % key)
        it = [d.iterator(mode=i, num_batches=n, batch_size=b, topo=self.topo) \
              for d, i, n, b in safe_izip(self._datasets, self._iteration_mode,
                                    self._num_batches, self._batch_size)]
        num_examples = [np.cast[config.floatX](float(i.num_examples)) for i in it]
        givens = [OrderedDict() for d in self._datasets]
        updates = [OrderedDict() for d in self._datasets]
        for channel in self.channels.values():
            index = self._datasets.index(channel.dataset)
            d = self._datasets[index]
            g = givens[index]
            n = num_examples[index]
            u = updates[index]
            if isinstance(channel.graph_input, (list, tuple)):
                g[channel.graph_input[0]] = X
                g[channel.graph_input[1]] = Y
            else:
                g[channel.graph_input] = X
            if n == 0:
                raise ValueError("Iterating over 0 examples results in divide by 0")
            if self.topo:
                batch_index = d.get_topo_batch_axis()
            else:
                batch_index = 0
            val = channel.val * T.cast(X.shape[batch_index], config.floatX) / n
            u[channel.val_shared] = channel.val_shared + val

        with log_timing(log, "Compiling accum"):
            # Check type of update expressions
            for up in updates:
                for key in up:
                    if key.dtype != up[key].dtype:
                        raise TypeError('Monitoring channel shared variable ' \
                                + key.name + ' has dtype ' + key.dtype + \
                                ' but is driven by an expression with type ' + \
                                up[key].dtype)

            self.accum = []
            for idx, packed in enumerate(safe_izip(givens, updates)):
                g, u = packed
                mode = self.theano_function_mode
                if mode is not None and hasattr(mode, 'record'):
                    for elem in g:
                        mode.record.handle_line('g key '+var_descriptor(elem)+'\n')
                        mode.record.handle_line('g val '+var_descriptor(g[elem])+'\n')
                    for elem in u:
                        mode.record.handle_line('u key '+var_descriptor(elem)+'\n')
                        mode.record.handle_line('u val '+var_descriptor(u[elem])+'\n')
                function_name = 'Monitor.accum[%d]' % idx
                if self.require_label:
                    if mode is not None and hasattr(mode, 'record'):
                        mode.record.handle_line('compiling supervised accum\n')
                    # Some channels may not depend on the data, ie, they might just monitor the model
                    # parameters, or some shared variable updated by the training algorithm, so we
                    # need to ignore the unused input error
                    self.accum.append(function([X, Y], givens=g, updates=u, mode=self.theano_function_mode,
                            name=function_name))
                else:
                    if mode is not None and hasattr(mode, 'record'):
                        mode.record.handle_line('compiling unsupervised accum\n')
                    self.accum.append(function([X], givens=g, updates=u, mode=self.theano_function_mode,
                            name=function_name))
            for a in self.accum:
                if mode is not None and hasattr(mode, 'record'):
                    for elem in a.maker.fgraph.outputs:
                        mode.record.handle_line('accum output '+var_descriptor(elem)+'\n')
                log.info("graph size: %d" % len(a.maker.fgraph.toposort()))
        final_names = dir(self)
        self.register_names_to_del([name for name in final_names
                                    if name not in init_names])
Beispiel #46
0
    def redo_theano(self):
        """
        Recompiles Theano functions used by this monitor.

        This is called any time we need to evaluate the channels and the
        channel definitions have changed since last we called it, or if the
        theano functions are unavailable for any other reason (first time they
        are needed after construction or deserialization, etc.)

        All channels are compiled as part of the same theano function so that
        the theano optimizations can eliminate subexpressions that are shared
        between multiple channels.
        """
        self._dirty = False

        # Recompute the data specs, since the channels may have changed.
        self._build_data_specs()

        init_names = dir(self)
        self.prereqs = OrderedDict()
        for channel in self.channels.values():
            if channel.prereqs is not None:
                dataset = channel.dataset
                if dataset not in self.prereqs:
                    self.prereqs[dataset] = []
                prereqs = self.prereqs[dataset]
                for prereq in channel.prereqs:
                    if prereq not in prereqs:
                        prereqs.append(prereq)

        updates = OrderedDict()
        for channel in self.channels.values():
            updates[channel.val_shared] = np.cast[config.floatX](0.0)
        with log_timing(log, "compiling begin_record_entry"):
            self.begin_record_entry = function(
                inputs=[],
                updates=updates,
                mode=self.theano_function_mode,
                name='Monitor.begin_record_entry')
        updates = OrderedDict()
        givens = OrderedDict()
        # Get the appropriate kind of theano variable to represent the data
        # the model acts on
        batch_names = ['monitoring_%s' % s for s in self._flat_data_specs[1]]
        theano_args = self._flat_data_specs[0].make_theano_batch(batch_names)

        # Get a symbolic expression of the batch size
        # We do it here, rather than for each channel, because channels with an
        # empty data_specs do not use data, and are unable to extract the batch
        # size. The case where the whole data specs is empty is not supported.
        batch_size = self._flat_data_specs[0].batch_size(theano_args)

        # Also get a nested representation, for joint iteration
        # with each of channel.graph_input
        nested_theano_args = self._data_specs_mapping.nest(theano_args)
        if not isinstance(nested_theano_args, tuple):
            nested_theano_args = (nested_theano_args, )
        assert len(nested_theano_args) == (len(self.channels) + 1)

        log.info('Monitored channels: ')
        for key in sorted(self.channels.keys()):
            mode = self.theano_function_mode
            if mode is not None and hasattr(mode, 'record'):
                mode.record.handle_line('compiling monitor including ' +
                                        'channel ' + key + '\n')
            log.info('\t%s' % key)
        it = [
            d.iterator(mode=i,
                       num_batches=n,
                       batch_size=b,
                       data_specs=self._flat_data_specs,
                       return_tuple=True)
            for d, i, n, b in safe_izip(self._datasets, self._iteration_mode,
                                        self._num_batches, self._batch_size)
        ]
        self.num_examples = [
            np.cast[config.floatX](float(i.num_examples)) for i in it
        ]
        givens = [OrderedDict() for d in self._datasets]
        updates = [OrderedDict() for d in self._datasets]
        for i, channel in enumerate(self.channels.values()):
            index = self._datasets.index(channel.dataset)
            d = self._datasets[index]
            g = givens[index]
            cur_num_examples = self.num_examples[index]
            u = updates[index]

            # Flatten channel.graph_input and the appropriate part of
            # nested_theano_args, to iterate jointly over them.
            c_mapping = DataSpecsMapping(channel.data_specs)
            channel_inputs = c_mapping.flatten(channel.graph_input,
                                               return_tuple=True)
            inputs = c_mapping.flatten(nested_theano_args[i + 1],
                                       return_tuple=True)

            for (channel_X, X) in safe_izip(channel_inputs, inputs):
                assert channel_X not in g or g[channel_X] is X
                assert channel_X.type == X.type, (channel_X.type, X.type)
                g[channel_X] = X

            if batch_size == 0:
                # No channel does need any data, so there is not need to
                # average results, and we will call the accum functions only
                # once.
                # TODO: better handling of channels not needing data when
                # some other channels need data.
                assert len(self._flat_data_specs[1]) == 0
                val = channel.val
            else:
                if n == 0:
                    raise ValueError("Iterating over 0 examples results in " +
                                     "divide by 0")
                val = (channel.val * T.cast(batch_size, config.floatX) /
                       cur_num_examples)
            u[channel.val_shared] = channel.val_shared + val

        with log_timing(log, "Compiling accum"):
            # Check type of update expressions
            for up in updates:
                for key in up:
                    if key.dtype != up[key].dtype:
                        raise TypeError('Monitoring channel shared variable ' +
                                        key.name + ' has dtype ' + key.dtype +
                                        ' but is driven by an expression ' +
                                        'with type ' + up[key].dtype)

            self.accum = []
            for idx, packed in enumerate(safe_izip(givens, updates)):
                g, u = packed
                mode = self.theano_function_mode
                if mode is not None and hasattr(mode, 'record'):
                    for elem in g:
                        mode.record.handle_line('g key ' +
                                                var_descriptor(elem) + '\n')
                        mode.record.handle_line('g val ' +
                                                var_descriptor(g[elem]) + '\n')
                    for elem in u:
                        mode.record.handle_line('u key ' +
                                                var_descriptor(elem) + '\n')
                        mode.record.handle_line('u val ' +
                                                var_descriptor(u[elem]) + '\n')
                function_name = 'Monitor.accum[%d]' % idx
                if mode is not None and hasattr(mode, 'record'):
                    mode.record.handle_line('compiling supervised accum\n')
                # Some channels may not depend on the data, ie, they might just
                # monitor the model parameters, or some shared variable updated
                # by the training algorithm, so we need to ignore the unused
                # input error
                self.accum.append(
                    function(theano_args,
                             givens=g,
                             updates=u,
                             mode=self.theano_function_mode,
                             name=function_name))
            for a in self.accum:
                if mode is not None and hasattr(mode, 'record'):
                    for elem in a.maker.fgraph.outputs:
                        mode.record.handle_line('accum output ' +
                                                var_descriptor(elem) + '\n')
                log.info("graph size: %d" % len(a.maker.fgraph.toposort()))
        final_names = dir(self)
        self.register_names_to_del(
            [name for name in final_names if name not in init_names])
Beispiel #47
0
def analyze(config):
    output_path = config.get('output_path');
#     model_file = os.path.join(output_path, 'eeg', 'conv3', 'convolutional_network.pkl');
#     model_file = os.path.join(output_path, 'eeg', 'conv10', 'epochs', 'cnn_epoch94.pkl');
    model_file = '../../../debug/debug_run4/debug_network.pkl';
    with log_timing(log, 'loading convnet model from {}'.format(model_file)):
        model = serial.load(model_file);
        
    input_shape =  model.get_input_space().shape;
        
    config = config.eeg;
    hyper_params = {
                'input_length':input_shape[0], #25+151-1+301-1, # this should leave a single value per channel after convolution
                'hop_size':5,               # reduce amount of data by factor 5
                
                'dataset_root': config.get('dataset_root'),
                'dataset_suffix': config.get('dataset_suffix'),
                'save_path': config.get('save_path'),
        }
        
    dataset_yaml = '''
    !obj:deepthought.datasets.rwanda2013rhythms.EEGDataset.EEGDataset {
                                 name : 'testset',
                                 path : %(dataset_root)s, 
                                 suffix : '_channels', # %(dataset_suffix)s,
                                 subjects : [0],
                                 resample : [400, 100],
                                 start_sample : 2500,
                                 stop_sample  : 3200,     # None (empty) = end of sequence
                  # FIXME:                
#                                  n_fft : 24,
#                                  frame_size : 10, # %(input_length)i,                                
                                 frame_size : %(input_length)i,
                                 
                                 hop_size : %(hop_size)i,           
                                 label_mode : 'rhythm_type',
#                                  save_matrix_path: '../../../debug/debug.pkl'
                            }
'''
    dataset_yaml = dataset_yaml  % hyper_params;
    print dataset_yaml;

    with log_timing(log, 'parsing yaml'):    
        testset = yaml_parse.load(dataset_yaml);
        
#     print testset.subject_partitions;
#     print testset.sequence_partitions;
    
    seq_starts = testset.sequence_partitions;
#     return;
    
#     axes=['b', 0, 1, 'c']
#     def dimshuffle(b01c):
#         default = ('b', 0, 1, 'c')
#         return b01c.transpose(*[default.index(axis) for axis in axes])
#     data = dimshuffle(testset.X);
    
#     design_matrix = model.get_design_matrix()

#     view_converter = DefaultViewConverter([475, 1, 1]);
#     data = view_converter.


#     ## get the labels
#     data_specs= (model.get_output_space(), "targets");
#     it = testset.iterator(
#                            mode='sequential', 
#                            batch_size=100,
#                            data_specs=data_specs);
#     labels = np.hstack([np.argmax(minibatch, axis = 1) for minibatch in it])
#     print labels[0:1000]
# 
#     ## get the predictions
#     minibatch = model.get_input_space().make_theano_batch();
#     output_fn = theano.function(inputs=[minibatch], 
#                                 outputs=T.argmax(model.fprop(minibatch), axis = 1));
#     print "function compiled"
# #     data_specs= (CompositeSpace((
# #                                 model.get_input_space(), 
# #                                 model.get_output_space())), 
# #                 ("features", "targets"));
#                 
#     data_specs= (model.get_input_space(), "features");    
#     it = testset.iterator(
#                             mode='sequential', 
#                             batch_size=100,
#                             data_specs=data_specs);
#     print "iterator ready"
#         
#     y_pred = np.hstack([output_fn(minibatch) for minibatch in it])
#     
#     print y_pred[0:1000]
    
    
    minibatch = model.get_input_space().make_theano_batch();
    output_fn = theano.function(inputs=[minibatch], 
                                outputs=T.argmax(model.fprop(minibatch), axis = 1));
    print "function compiled"
    
    data_specs= (CompositeSpace((
                                model.get_input_space(), 
                                model.get_output_space())), 
                ("features", "targets"));
    it = testset.iterator('sequential',
                          batch_size=100,
                          data_specs=data_specs);
    print "iterator ready"
                    
    y_pred = [];
    y_real = [];                
    for minibatch, target in it:
        y_pred.append(output_fn(minibatch));
        y_real.append(np.argmax(target, axis = 1));
    y_pred = np.hstack(y_pred);
    y_real = np.hstack(y_real);   
    
    print y_pred[0:1000]
    
    print classification_report(y_real, y_pred);
    print confusion_matrix(y_real, y_pred);

    misclass = (y_real != y_pred);
    print misclass.mean();
    
    correct = 0;
    s_real = [];
    s_pred = [];
    s_pred_agg = [];
    
    n_channels = 16;
    channel_scores = np.zeros(n_channels, dtype=np.int);
    
    for i in xrange(len(seq_starts)):
        
        start = seq_starts[i];
        if i < len(seq_starts) - 1:
            stop = seq_starts[i+1];
        else:
            stop = None;
        
        s_real.append(y_real[start]);
        
#         print np.bincount(y_pred[start:stop]);
#         print np.argmax(np.bincount(y_pred[start:stop]));

        s_pred.append(np.argmax(np.bincount(y_pred[start:stop])));
        
        s_pred_agg.append(np.mean(y_pred[start:stop])); # works only for binary classification
        
        seq_misclass = misclass[start:stop].mean();
#         print '{} [{}{}]: {}'.format(i, start, stop, seq_misclass);
        
        if seq_misclass < 0.5: # more correct than incorrect
            correct += 1;
            channel_scores[i%n_channels] += 1;
    
    s_real = np.hstack(s_real);
    s_pred = np.hstack(s_pred);  
    
    print s_real;
    print s_pred;       
    print s_pred_agg;
    
    print 'aggregated'
    print classification_report(s_real, s_pred);
    print confusion_matrix(s_real, s_pred);
    
    s_misclass = (s_real != s_pred);
    print s_misclass.mean();
    
    print channel_scores;
    
    return;
    
    
    
    
    
    
    
    

    input_shape =  model.get_input_space().shape;
    
    print input_shape
    
    view_converter = DefaultViewConverter((input_shape[0], input_shape[1], 1));
    
    data = view_converter.design_mat_to_topo_view(testset.X);
    print data.shape;
                
    X = model.get_input_space().make_theano_batch()
    Y = model.fprop( X )
    Y = T.argmax( Y, axis = 1 ) # needed - otherwise not single value
    output_fn = theano.function( [X], Y );
    


    
#     y_pred = output_fn( data );

    batch_size = 1000;
    y_pred = [];
    batch_start = 0;
    while batch_start < data.shape[0]:
        batch_stop = min(data.shape[0], batch_start + batch_size);
        y_pred.append(output_fn( data[batch_start:batch_stop] ));
#         if batch_start == 0: print y_pred;
        batch_start = batch_stop;
    y_pred = np.hstack(y_pred);

    print testset.labels[0:1000]
    print y_pred[0:1000]

    print classification_report(testset.labels, y_pred);
    print confusion_matrix(testset.labels, y_pred);

    labels = np.argmax(testset.y, axis=1)
    print classification_report(labels, y_pred);
    print confusion_matrix(labels, y_pred);
    
    labels = np.argmax(testset.y, axis=1)
    print classification_report(labels, y_pred);
    print confusion_matrix(labels, y_pred);

    misclass = (labels != y_pred).mean()
    print misclass
    
#     # alternative version from KeepBestParams
#     minibatch = T.matrix('minibatch')
#     output_fn = theano.function(inputs=[minibatch],outputs=T.argmax( model.fprop(minibatch), axis = 1 ));
#     it = testset.iterator('sequential', batch_size=batch_size, targets=False);
#     y_pred = [output_fn(mbatch) for mbatch in it];

#             y_hat = T.argmax(state, axis=1)
#             y = T.argmax(target, axis=1)
#             misclass = T.neq(y, y_hat).mean()
#             misclass = T.cast(misclass, config.floatX)
#             rval['misclass'] = misclass
#             rval['nll'] = self.cost(Y_hat=state, Y=target)
        
    

    log.debug('done');
Beispiel #48
0
    def redo_theano(self):
        """
        Recompiles Theano functions used by this monitor.

        This is needed so that if new channels are added, Theano's
        optimizations make sure (to the extent that they can) that the new
        channels and old channels don't have any redundant calculations.

        It is also needed to regenerate Theano functions after pickling and
        unpickling, since Theano functions should not be pickled.
        """
        self._dirty = False

        init_names = dir(self)
        self.prereqs = OrderedDict()
        for channel in self.channels.values():
            if channel.prereqs is not None:
                dataset = channel.dataset
                if dataset not in self.prereqs:
                    self.prereqs[dataset] = []
                prereqs = self.prereqs[dataset]
                for prereq in channel.prereqs:
                    if prereq not in prereqs:
                        prereqs.append(prereq)

        updates = OrderedDict()
        for channel in self.channels.values():
            updates[channel.val_shared] = np.cast[config.floatX](0.0)
        with log_timing(log, "compiling begin_record_entry"):
            self.begin_record_entry = function(
                inputs=[],
                updates=updates,
                mode=self.theano_function_mode,
                name='Monitor.begin_record_entry')
        updates = OrderedDict()
        givens = OrderedDict()
        # Get the appropriate kind of theano variable to represent the data the model
        # acts on
        X = self.model.get_input_space().make_theano_batch(name="monitoring_X")
        if config.compute_test_value != 'off':
            m = self.model.get_test_batch_size()
            test_value = self.model.get_input_space().get_origin_batch(m)
            X.tag.test_value = np.cast[X.type.dtype](test_value)
        if self.require_label:
            Y = self.model.get_output_space().make_theano_batch(
                name="monitoring_Y")

        log.info('Monitored channels: ')
        for key in sorted(self.channels.keys()):
            mode = self.theano_function_mode
            if mode is not None and hasattr(mode, 'record'):
                mode.record.handle_line(
                    'compiling monitor including channel ' + key + '\n')
            log.info('\t%s' % key)
        it = [d.iterator(mode=i, num_batches=n, batch_size=b, topo=self.topo) \
              for d, i, n, b in safe_izip(self._datasets, self._iteration_mode,
                                    self._num_batches, self._batch_size)]
        self.num_examples = [
            np.cast[config.floatX](float(i.num_examples)) for i in it
        ]
        givens = [OrderedDict() for d in self._datasets]
        updates = [OrderedDict() for d in self._datasets]
        for channel in self.channels.values():
            index = self._datasets.index(channel.dataset)
            d = self._datasets[index]
            g = givens[index]
            cur_num_examples = self.num_examples[index]
            u = updates[index]
            if isinstance(channel.graph_input, (list, tuple)):
                channel_X, channel_Y = channel.graph_input
                assert channel_X not in g or g[channel_X] is X
                assert channel_Y not in g or g[channel_Y] is Y
                g[channel_X] = X
                g[channel_Y] = Y
            else:
                channel_X = channel.graph_input
                assert channel_X not in g or g[channel_X] is X
                g[channel_X] = X
            if n == 0:
                raise ValueError(
                    "Iterating over 0 examples results in divide by 0")
            if self.topo:
                batch_index = d.get_topo_batch_axis()
            else:
                batch_index = 0
            val = channel.val * T.cast(X.shape[batch_index],
                                       config.floatX) / cur_num_examples
            u[channel.val_shared] = channel.val_shared + val

        with log_timing(log, "Compiling accum"):
            # Check type of update expressions
            for up in updates:
                for key in up:
                    if key.dtype != up[key].dtype:
                        raise TypeError('Monitoring channel shared variable ' \
                                + key.name + ' has dtype ' + key.dtype + \
                                ' but is driven by an expression with type ' + \
                                up[key].dtype)

            self.accum = []
            for idx, packed in enumerate(safe_izip(givens, updates)):
                g, u = packed
                mode = self.theano_function_mode
                if mode is not None and hasattr(mode, 'record'):
                    for elem in g:
                        mode.record.handle_line('g key ' +
                                                var_descriptor(elem) + '\n')
                        mode.record.handle_line('g val ' +
                                                var_descriptor(g[elem]) + '\n')
                    for elem in u:
                        mode.record.handle_line('u key ' +
                                                var_descriptor(elem) + '\n')
                        mode.record.handle_line('u val ' +
                                                var_descriptor(u[elem]) + '\n')
                function_name = 'Monitor.accum[%d]' % idx
                if self.require_label:
                    if mode is not None and hasattr(mode, 'record'):
                        mode.record.handle_line('compiling supervised accum\n')
                    # Some channels may not depend on the data, ie, they might just monitor the model
                    # parameters, or some shared variable updated by the training algorithm, so we
                    # need to ignore the unused input error
                    self.accum.append(
                        function([X, Y],
                                 givens=g,
                                 updates=u,
                                 mode=self.theano_function_mode,
                                 name=function_name))
                else:
                    if mode is not None and hasattr(mode, 'record'):
                        mode.record.handle_line(
                            'compiling unsupervised accum\n')
                    self.accum.append(
                        function([X],
                                 givens=g,
                                 updates=u,
                                 mode=self.theano_function_mode,
                                 name=function_name))
            for a in self.accum:
                if mode is not None and hasattr(mode, 'record'):
                    for elem in a.maker.fgraph.outputs:
                        mode.record.handle_line('accum output ' +
                                                var_descriptor(elem) + '\n')
                log.info("graph size: %d" % len(a.maker.fgraph.toposort()))
        final_names = dir(self)
        self.register_names_to_del(
            [name for name in final_names if name not in init_names])
    def __init__(
        self,
        path,
        name='',  # optional name

        # selectors
        subjects='all',  # optional selector (list) or 'all'
        trial_types='all',  # optional selector (list) or 'all'
        trial_numbers='all',  # optional selector (list) or 'all'
        conditions='all',  # optional selector (list) or 'all'     
        partitioner=None,
        channel_filter=NoChannelFilter(
        ),  # optional channel filter, default: keep all
        channel_names=None,  # optional channel names (for metadata)
        label_map=None,  # optional conversion of labels
        remove_dc_offset=False,  # optional subtraction of channel mean, usually done already earlier
        resample=None,  # optional down-sampling

        # optional sub-sequences selection
        start_sample=0,
        stop_sample=None,  # optional for selection of sub-sequences

        # optional signal filter to by applied before spitting the signal
        signal_filter=None,

        # windowing parameters
        frame_size=-1,
        hop_size=-1,  # values > 0 will lead to windowing
        hop_fraction=None,  # alternative to specifying absolute hop_size

        # optional spectrum parameters, n_fft = 0 keeps raw data
        n_fft=0,
        n_freq_bins=None,
        spectrum_log_amplitude=False,
        spectrum_normalization_mode=None,
        include_phase=False,
        flatten_channels=False,
        layout='tf',  # (0,1)-axes layout tf=time x features or ft=features x time
        save_matrix_path=None,
        keep_metadata=False,
    ):
        '''
        Constructor
        '''

        # save params
        self.params = locals().copy()
        del self.params['self']
        # print self.params

        # TODO: get the whole filtering into an extra class

        datafiles_metadata, metadb = load_datafiles_metadata(path)

        #         print datafiles_metadata

        def apply_filters(filters, node):
            if isinstance(node, dict):
                filtered = []
                keepkeys = filters[0]
                for key, value in node.items():
                    if keepkeys == 'all' or key in keepkeys:
                        filtered.extend(apply_filters(filters[1:], value))
                return filtered
            else:
                return node  # [node]

        # keep only files that match the metadata filters
        self.datafiles = apply_filters(
            [subjects, trial_types, trial_numbers, conditions],
            datafiles_metadata)

        # copy metadata for retained files
        self.metadb = {}
        for datafile in self.datafiles:
            self.metadb[datafile] = metadb[datafile]

#         print self.datafiles
#         print self.metadb

        self.name = name

        if partitioner is not None:
            self.datafiles = partitioner.get_partition(self.name, self.metadb)

        self.include_phase = include_phase
        self.spectrum_normalization_mode = spectrum_normalization_mode
        self.spectrum_log_amplitude = spectrum_log_amplitude

        self.sequence_partitions = [
        ]  # used to keep track of original sequences

        # metadata: [subject, trial_no, stimulus, channel, start, ]
        self.metadata = []

        sequences = []
        labels = []
        n_sequences = 0

        if frame_size > 0 and hop_size == -1 and hop_fraction is not None:
            hop_size = np.ceil(frame_size / hop_fraction)

        for i in xrange(len(self.datafiles)):
            with log_timing(log,
                            'loading data from {}'.format(self.datafiles[i])):

                # save start of next sequence
                self.sequence_partitions.append(n_sequences)

                data, metadata = load(os.path.join(path, self.datafiles[i]))

                label = metadata['label']
                if label_map is not None:
                    label = label_map[label]

                multi_channel_frames = []

                # process 1 channel at a time
                for channel in xrange(data.shape[1]):
                    # filter channels
                    if not channel_filter.keep_channel(channel):
                        continue

                    samples = data[:, channel]

                    # subtract channel mean
                    if remove_dc_offset:
                        samples -= samples.mean()

                    # down-sample if requested
                    if resample is not None and resample[0] != resample[1]:
                        samples = librosa.resample(samples, resample[0],
                                                   resample[1])

                    # apply optional signal filter after down-sampling -> requires lower order
                    if signal_filter is not None:
                        samples = signal_filter.process(samples)

                    # get sub-sequence in resampled space
                    # log.info('using samples {}..{} of {}'.format(start_sample,stop_sample, samples.shape))
                    samples = samples[start_sample:stop_sample]

                    if n_fft is not None and n_fft > 0:  # Optionally:
                        ### frequency spectrum branch ###

                        # transform to spectogram
                        hop_length = n_fft / 4
                        '''
                        from http://theremin.ucsd.edu/~bmcfee/librosadoc/librosa.html
                        >>> # Get a power spectrogram from a waveform y
                        >>> S       = np.abs(librosa.stft(y)) ** 2
                        >>> log_S   = librosa.logamplitude(S)
                        '''

                        S = librosa.core.stft(samples,
                                              n_fft=n_fft,
                                              hop_length=hop_length)
                        # mag = np.abs(S)        # magnitude spectrum
                        mag = np.abs(S)**2  # power spectrum

                        # include phase information if requested
                        if self.include_phase:
                            # phase = np.unwrap(np.angle(S))
                            phase = np.angle(S)

                        # Optionally: cut off high bands
                        if n_freq_bins is not None:
                            mag = mag[0:n_freq_bins, :]
                            if self.include_phase:
                                phase = phase[0:n_freq_bins, :]

                        if self.spectrum_log_amplitude:
                            mag = librosa.logamplitude(mag)

                        s = mag  # for normalization
                        '''
                        NOTE on normalization:
                        It depends on the structure of a neural network and (even more) 
                        on the properties of data. There is no best normalization algorithm 
                        because if there would be one, it would be used everywhere by default...
                    
                        In theory, there is no requirement for the data to be normalized at all. 
                        This is a purely practical thing because in practice convergence could 
                        take forever if your input is spread out too much. The simplest would be 
                        to just normalize it by scaling your data to (-1,1) (or (0,1) depending 
                        on activation function), and in most cases it does work. If your 
                        algorithm converges well, then this is your answer. If not, there are 
                        too many possible problems and methods to outline here without knowing 
                        the actual data.
                        '''

                        ## normalize to mean 0, std 1
                        if self.spectrum_normalization_mode == 'mean0_std1':
                            # s = preprocessing.scale(s, axis=0);
                            mean = np.mean(s)
                            std = np.std(s)
                            s = (s - mean) / std

                        ## normalize by linear transform to [0,1]
                        elif self.spectrum_normalization_mode == 'linear_0_1':
                            s = s / np.max(s)

                        ## normalize by linear transform to [-1,1]
                        elif self.spectrum_normalization_mode == 'linear_-1_1':
                            s = -1 + 2 * (s - np.min(s)) / (np.max(s) -
                                                            np.min(s))

                        elif self.spectrum_normalization_mode is not None:
                            raise ValueError(
                                'unsupported spectrum normalization mode {}'.
                                format(self.spectrum_normalization_mode))

                        #print s.mean(axis=0)
                        #print s.std(axis=0)

                        # include phase information if requested
                        if self.include_phase:
                            # normalize phase to [-1.1]
                            phase = phase / np.pi
                            s = np.vstack([s, phase])

                        # transpose to fit pylearn2 layout
                        s = np.transpose(s)
                        # print s.shape

                        ### end of frequency spectrum branch ###
                    else:
                        ### raw waveform branch ###

                        # normalize to max amplitude 1
                        s = librosa.util.normalize(samples)

                        # add 2nd data dimension
                        s = s.reshape(s.shape[0], 1)
                        # print s.shape

                        ### end of raw waveform branch ###

                    s = np.asfarray(s, dtype='float32')

                    if frame_size > 0 and hop_size > 0:
                        s = s.copy(
                        )  # FIXME: THIS IS NECESSARY IN MultiChannelEEGSequencesDataset - OTHERWISE, THE FOLLOWING OP DOES NOT WORK!!!!
                        frames = frame(s,
                                       frame_length=frame_size,
                                       hop_length=hop_size)
                    else:
                        frames = s
                    del s
                    # print frames.shape

                    if flatten_channels:
                        # add artificial channel dimension
                        frames = frames.reshape(
                            (frames.shape[0], frames.shape[1], frames.shape[2],
                             1))
                        # print frames.shape

                        sequences.append(frames)

                        # increment counter by new number of frames
                        n_sequences += frames.shape[0]

                        if keep_metadata:
                            # determine channel name
                            channel_name = None
                            if channel_names is not None:
                                channel_name = channel_names[channel]
                            elif 'channels' in metadata:
                                channel_name = metadata['channels'][channel]

                            self.metadata.append({
                                'subject':
                                metadata['subject'],  # subject
                                'trial_type':
                                metadata['trial_type'],  # trial_type
                                'trial_no':
                                metadata['trial_no'],  # trial_no
                                'condition':
                                metadata['condition'],  # condition
                                'channel':
                                channel,  # channel
                                'channel_name':
                                channel_name,
                                'start':
                                self.sequence_partitions[-1],  # start
                                'stop':
                                n_sequences  # stop
                            })

                        for _ in xrange(frames.shape[0]):
                            labels.append(label)
                    else:
                        multi_channel_frames.append(frames)

                    ### end of channel iteration ###

                if not flatten_channels:
                    # turn list into array
                    multi_channel_frames = np.asfarray(multi_channel_frames,
                                                       dtype='float32')
                    # [channels x frames x time x freq] -> cb01
                    # [channels x frames x time x 1] -> cb0.

                    # move channel dimension to end
                    multi_channel_frames = np.rollaxis(multi_channel_frames, 0,
                                                       4)
                    # print multi_channel_frames.shape
                    # log.debug(multi_channel_frames.shape)

                    sequences.append(multi_channel_frames)

                    # increment counter by new number of frames
                    n_sequences += multi_channel_frames.shape[0]

                    if keep_metadata:
                        self.metadata.append({
                            'subject':
                            metadata['subject'],  # subject
                            'trial_type':
                            metadata['trial_type'],  # trial_type
                            'trial_no':
                            metadata['trial_no'],  # trial_no
                            'condition':
                            metadata['condition'],  # condition
                            'channel':
                            'all',  # channel
                            'start':
                            self.sequence_partitions[-1],  # start
                            'stop':
                            n_sequences  # stop
                        })

                    for _ in xrange(multi_channel_frames.shape[0]):
                        labels.append(label)

                ### end of datafile iteration ###

        # turn into numpy arrays
        sequences = np.vstack(sequences)
        # print sequences.shape;

        labels = np.hstack(labels)

        # one_hot_y = one_hot(labels)
        one_hot_formatter = OneHotFormatter(labels.max() + 1)  # FIXME!
        one_hot_y = one_hot_formatter.format(labels)

        self.labels = labels

        if layout == 'ft':  # swap axes to (batch, feature, time, channels)
            sequences = sequences.swapaxes(1, 2)

        log.debug('final dataset shape: {} (b,0,1,c)'.format(sequences.shape))
        super(MultiChannelEEGDataset, self).__init__(topo_view=sequences,
                                                     y=one_hot_y,
                                                     axes=['b', 0, 1, 'c'])

        log.info(
            'generated dataset "{}" with shape X={}={} y={} labels={} '.format(
                self.name, self.X.shape, sequences.shape, self.y.shape,
                self.labels.shape))

        if save_matrix_path is not None:
            matrix = DenseDesignMatrix(topo_view=sequences,
                                       y=one_hot_y,
                                       axes=['b', 0, 1, 'c'])
            with log_timing(
                    log,
                    'saving DenseDesignMatrix to {}'.format(save_matrix_path)):
                serial.save(save_matrix_path, matrix)
Beispiel #50
0
        def get_func(learn_discriminator, learn_generator, dont_you_fucking_dare_touch_the_generator=False):

            updates = OrderedDict()

            assert (learn_discriminator or learn_generator) and not (learn_discriminator and learn_generator)

            if learn_discriminator:
                cur_params = model.discriminator.get_params()
            else:
                cur_params = model.generator.get_params()

            def check():
                for param in params:
                    if param not in cur_params:
                        assert param not in updates

            cur_grads = OrderedDict()
            for param in cur_params:
                cur_grads[param] = grads[param]

            for param in grads:
                if grads[param].name is None and cost_value is not None:
                    grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                         {'costname': cost_value.name,
                                          'paramname': param.name})
                assert grads[param].dtype == param.dtype

            cur_lr_scalers = OrderedDict()
            for param in cur_params:
                if param in lr_scalers:
                    lr_scaler = lr_scalers[param]
                    cur_lr_scalers[param] = lr_scaler

            log.info('Parameter and initial learning rate summary:')
            for param in cur_params:
                param_name = param.name
                if param_name is None:
                    param_name = 'anon_param'
                lr = learning_rate.get_value() * cur_lr_scalers.get(param,1.)
                log.info('\t' + param_name + ': ' + str(lr))

            updates.update(self.learning_rule.get_updates(
                    learning_rate, cur_grads, cur_lr_scalers))
            check()

            for param in cur_params:
                if updates[param].name is None:
                    updates[param].name = 'sgd_update(' + param.name + ')'
            check()
            model.modify_updates(updates)
            check()
            for param in cur_params:
                update = updates[param]
                if update.name is None:
                    update.name = 'censor(sgd_update(' + param.name + '))'
                for update_val in get_debug_values(update):
                    if np.any(np.isinf(update_val)):
                        raise ValueError("debug value of %s contains infs" %
                                update.name)
                    if np.any(np.isnan(update_val)):
                        raise ValueError("debug value of %s contains nans" %
                                update.name)

            check()

            if dont_you_fucking_dare_touch_the_generator:
                for param in model.generator.get_params():
                    assert param not in updates

            with log_timing(log, 'Compiling sgd_update'):
                return function(theano_args,
                                           updates=updates,
                                           name='sgd_update',
                                           on_unused_input='ignore',
                                           mode=self.theano_function_mode)
Beispiel #51
0
    def setup(self, model, dataset):

        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [ param for param in model.get_params() if np.any(np.isinf(param.get_value())) ]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: "+str(inf_params))
        if any([np.any(np.isnan(param.get_value())) for param in model.get_params()]):
            nan_params = [ param for param in model.get_params() if np.any(np.isnan(param.get_value())) ]
            raise ValueError("These params are NaN: "+str(nan_params))
        self.model = model

        batch_size = self.batch_size
        if hasattr(model, "force_batch_size"):
            if model.force_batch_size > 0:
                if batch_size is not None:
                    if batch_size != model.force_batch_size:
                        if self.set_batch_size:
                            model.set_batch_size(batch_size)
                        else:
                            raise ValueError("batch_size argument to SGD conflicts with model's force_batch_size attribute")
                else:
                    self.batch_size = model.force_batch_size
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        # TODO: come up with some standard scheme for associating training runs
        # with monitors / pushing the monitor automatically, instead of just
        # enforcing that people have called push_monitor
        assert self.monitor.get_examples_seen() == 0
        self.monitor._sanity_check()




        X = model.get_input_space().make_theano_batch(name="%s[X]" % self.__class__.__name__)
        self.topo = not X.ndim == 2

        if config.compute_test_value == 'raise':
            if self.topo:
                X.tag.test_value = dataset.get_batch_topo(self.batch_size)
            else:
                X.tag.test_value = dataset.get_batch_design(self.batch_size)

        Y = T.matrix(name="%s[Y]" % self.__class__.__name__)


        if self.cost.supervised:
            if config.compute_test_value == 'raise':
                _, Y.tag.test_value = dataset.get_batch_design(self.batch_size, True)

            self.supervised = True
            cost_value = self.cost(model, X, Y)

        else:
            self.supervised = False
            cost_value = self.cost(model, X)
        if cost_value is not None and cost_value.name is None:
            if self.supervised:
                cost_value.name = 'objective(' + X.name + ', ' + Y.name + ')'
            else:
                cost_value.name = 'objective(' + X.name + ')'

        # Set up monitor to model the objective value, learning rate,
        # momentum (if applicable), and extra channels defined by
        # the cost
        learning_rate = self.learning_rate
        if self.monitoring_dataset is not None:
            self.monitor.setup(dataset=self.monitoring_dataset,
                    cost=self.cost, batch_size=self.batch_size, num_batches=self.monitoring_batches,
                    extra_costs=self.monitoring_costs
                    )
            if self.supervised:
                ipt = (X, Y)
            else:
                ipt = X
            dataset_name = self.monitoring_dataset.keys()[0]
            monitoring_dataset = self.monitoring_dataset[dataset_name]
            #TODO: have Monitor support non-data-dependent channels
            self.monitor.add_channel(name='learning_rate', ipt=ipt,
                    val=learning_rate, dataset=monitoring_dataset)
            if self.momentum:
                self.monitor.add_channel(name='momentum', ipt=ipt,
                        val=self.momentum, dataset=monitoring_dataset)

        params = list(model.get_params())
        assert len(params) > 0
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i

        if self.cost.supervised:
            grads, updates = self.cost.get_gradients(model, X, Y)
        else:
            grads, updates = self.cost.get_gradients(model, X)

        for param in grads:
            assert param in params
        for param in params:
            assert param in grads

        for param in grads:
            if grads[param].name is None and cost_value is not None:
                grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                     {'costname': cost_value.name,
                                      'paramname': param.name})

        lr_scalers = model.get_lr_scalers()

        for key in lr_scalers:
            if key not in params:
                raise ValueError("Tried to scale the learning rate on " +\
                        str(key)+" which is not an optimization parameter.")

        log.info('Parameter and initial learning rate summary:')
        for param in params:
            param_name = param.name
            if param_name is None:
                param_name = 'anon_param'
            lr = learning_rate.get_value() * lr_scalers.get(param,1.)
            log.info('\t' + param_name + ': ' + str(lr))

        if self.momentum is None:
            updates.update( dict(safe_zip(params, [param - learning_rate * \
                lr_scalers.get(param, 1.) * grads[param]
                                    for param in params])))
        else:
            for param in params:
                inc = sharedX(param.get_value() * 0.)
                if param.name is not None:
                    inc.name = 'inc_'+param.name
                updated_inc = self.momentum * inc - learning_rate * lr_scalers.get(param, 1.) * grads[param]
                updates[inc] = updated_inc
                updates[param] = param + updated_inc


        for param in params:
            if updates[param].name is None:
                updates[param].name = 'sgd_update(' + param.name + ')'
        model.censor_updates(updates)
        for param in params:
            update = updates[param]
            if update.name is None:
                update.name = 'censor(sgd_update(' + param.name + '))'
            for update_val in get_debug_values(update):
                if np.any(np.isinf(update_val)):
                    raise ValueError("debug value of %s contains infs" % update.name)
                if np.any(np.isnan(update_val)):
                    raise ValueError("debug value of %s contains nans" % update.name)


        with log_timing(log, 'Compiling sgd_update'):
            if self.supervised:
                fn_inputs = [X, Y]
            else:
                fn_inputs = [X]
            self.sgd_update = function(fn_inputs, updates=updates,
                                       name='sgd_update',
                                       on_unused_input='ignore',
                                       mode=self.theano_function_mode)
        self.params = params
Beispiel #52
0
    def setup(self, model, dataset):
        """
        Compiles the theano functions needed for the train method.

        Parameters
        ----------
        model : a Model instance
        dataset : Dataset
        """
        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [param for param in model.get_params()
                      if contains_inf(param.get_value())]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: "+str(inf_params))
        if any([contains_nan(param.get_value())
                for param in model.get_params()]):
            nan_params = [param for param in model.get_params()
                          if contains_nan(param.get_value())]
            raise ValueError("These params are NaN: "+str(nan_params))
        self.model = model

        self._synchronize_batch_size(model)
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        self.monitor._sanity_check()

        # test if force batch size and batch size
        has_force_batch_size = getattr(model, "force_batch_size", False)
        train_dataset_is_uneven = \
            dataset.get_num_examples() % self.batch_size != 0

        has_monitoring_datasets = \
            self.monitoring_dataset is not None and \
            self.monitoring_dataset.values() > 0

        if has_monitoring_datasets:
            monitoring_datasets_are_uneven = \
                any(d.get_num_examples() % self.batch_size
                    != 0 for d in self.monitoring_dataset.values())
        else:
            monitoring_datasets_are_uneven = False  # or True it doesn't matter

        if has_force_batch_size and train_dataset_is_uneven and \
           not has_uniform_batch_size(self.train_iteration_mode):

            raise ValueError("Dataset size is not a multiple of batch size."
                             "You should set train_iteration_mode (and "
                             "maybe monitor_iteration_mode) to "
                             "even_sequential, even_shuffled_sequential or "
                             "even_batchwise_shuffled_sequential")

        if has_force_batch_size and has_monitoring_datasets and \
           monitoring_datasets_are_uneven and \
           not has_uniform_batch_size(self.monitor_iteration_mode):

            raise ValueError("Dataset size is not a multiple of batch size."
                             "You should set monitor_iteration_mode to "
                             "even_sequential, even_shuffled_sequential or "
                             "even_batchwise_shuffled_sequential")

        data_specs = self.cost.get_data_specs(self.model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space.
        # We want that so that if the same space/source is specified
        # more than once in data_specs, only one Theano Variable
        # is generated for it, and the corresponding value is passed
        # only once to the compiled Theano function.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = '%s[%s]' % (self.__class__.__name__, source)
            arg = space.make_theano_batch(name=name,
                                          batch_size=self.batch_size)
            theano_args.append(arg)
        theano_args = tuple(theano_args)

        # Methods of `self.cost` need args to be passed in a format compatible
        # with data_specs
        nested_args = mapping.nest(theano_args)
        fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args)
        self.on_load_batch = fixed_var_descr.on_load_batch

        cost_value = self.cost.expr(model, nested_args,
                                    ** fixed_var_descr.fixed_vars)

        if cost_value is not None and cost_value.name is None:
            # Concatenate the name of all tensors in theano_args !?
            cost_value.name = 'objective'

        learning_rate = self.learning_rate
        params = list(model.get_params())
        assert len(params) > 0
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i

        grads, updates = self.cost.get_gradients(model, nested_args,
                                                 ** fixed_var_descr.fixed_vars)
        if not isinstance(grads, OrderedDict):
            raise TypeError(str(type(self.cost)) + ".get_gradients returned " +
                            "something with" + str(type(grads)) + "as its " +
                            "first member. Expected OrderedDict.")

        for param in grads:
            assert param in params
        for param in params:
            assert param in grads

        for param in grads:
            if grads[param].name is None and cost_value is not None:
                grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                     {'costname': cost_value.name,
                                      'paramname': param.name})
            assert grads[param].dtype == param.dtype

        lr_scalers = model.get_lr_scalers()

        for key in lr_scalers:
            if key not in params:
                raise ValueError("Tried to scale the learning rate on " +\
                        str(key)+" which is not an optimization parameter.")

        log.info('Parameter and initial learning rate summary:')
        for param in params:
            param_name = param.name
            if param_name is None:
                param_name = 'anon_param'
            lr = learning_rate.get_value() * lr_scalers.get(param,1.)
            log.info('\t' + param_name + ': ' + str(lr))

        if self.learning_rule:
            updates.update(self.learning_rule.get_updates(
                learning_rate, grads, lr_scalers))
        else:
            # Use standard SGD updates with fixed learning rate.
            updates.update( dict(safe_zip(params, [param - learning_rate * \
                lr_scalers.get(param, 1.) * grads[param]
                                    for param in params])))

        for param in params:
            if updates[param].name is None:
                updates[param].name = 'sgd_update(' + param.name + ')'
        model.modify_updates(updates)
        for param in params:
            update = updates[param]
            if update.name is None:
                update.name = 'censor(sgd_update(' + param.name + '))'
            for update_val in get_debug_values(update):
                if contains_inf(update_val):
                    raise ValueError("debug value of %s contains infs" %
                            update.name)
                if contains_nan(update_val):
                    raise ValueError("debug value of %s contains nans" %
                            update.name)


        # Set up monitor to model the objective value, learning rate,
        # momentum (if applicable), and extra channels defined by
        # the cost.
        # We have to do that after learning_rule.get_updates has been
        # called, since it may have an effect on
        # learning_rule.add_channels_to_monitor (that is currently the case
        # for AdaDelta and RMSProp).
        self._setup_monitor()

        with log_timing(log, 'Compiling sgd_update'):
            self.sgd_update = function(theano_args,
                                       updates=updates,
                                       name='sgd_update',
                                       on_unused_input='ignore',
                                       mode=self.theano_function_mode)
        self.params = params
Beispiel #53
0
    def setup(self, model, dataset):
        """
        Compiles the theano functions needed for the train method.
        """
        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [param for param in model.get_params()
                      if np.any(np.isinf(param.get_value()))]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: "+str(inf_params))
        if any([np.any(np.isnan(param.get_value()))
                for param in model.get_params()]):
            nan_params = [param for param in model.get_params()
                          if np.any(np.isnan(param.get_value()))]
            raise ValueError("These params are NaN: "+str(nan_params))
        self.model = model

        batch_size = self.batch_size
        if hasattr(model, "force_batch_size"):
            if model.force_batch_size > 0:
                if batch_size is not None:
                    if batch_size != model.force_batch_size:
                        if self.set_batch_size:
                            model.set_batch_size(batch_size)
                        else:
                            raise ValueError("batch_size argument to SGD " +
                                             "conflicts with model's " +
                                             "force_batch_size attribute")
                else:
                    self.batch_size = model.force_batch_size
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        self.monitor._sanity_check()

        data_specs = self.cost.get_data_specs(self.model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space.
        # We want that so that if the same space/source is specified
        # more than once in data_specs, only one Theano Variable
        # is generated for it, and the corresponding value is passed
        # only once to the compiled Theano function.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = '%s[%s]' % (self.__class__.__name__, source)
            arg = space.make_theano_batch(name=name,
                                          batch_size=self.batch_size)
            theano_args.append(arg)
        theano_args = tuple(theano_args)

        # Methods of `self.cost` need args to be passed in a format compatible
        # with data_specs
        nested_args = mapping.nest(theano_args)
        fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args)
        self.on_load_batch = fixed_var_descr.on_load_batch

        cost_value = self.cost.expr(model, nested_args,
                                    ** fixed_var_descr.fixed_vars)

        if cost_value is not None and cost_value.name is None:
            # Concatenate the name of all tensors in theano_args !?
            cost_value.name = 'objective'

        # Set up monitor to model the objective value, learning rate,
        # momentum (if applicable), and extra channels defined by
        # the cost
        learning_rate = self.learning_rate
        if self.monitoring_dataset is not None:
            self.monitor.setup(
                    dataset=self.monitoring_dataset,
                    cost=self.cost,
                    batch_size=self.batch_size,
                    num_batches=self.monitoring_batches,
                    extra_costs=self.monitoring_costs,
                    mode=self.monitor_iteration_mode
                    )
            dataset_name = self.monitoring_dataset.keys()[0]
            monitoring_dataset = self.monitoring_dataset[dataset_name]
            #TODO: have Monitor support non-data-dependent channels
            self.monitor.add_channel(name='learning_rate',
                                     ipt=None,
                                     val=learning_rate,
                                     data_specs=(NullSpace(), ''),
                                     dataset=monitoring_dataset)

            if self.learning_rule:
                self.learning_rule.add_channels_to_monitor(
                        self.monitor,
                        monitoring_dataset)

        params = list(model.get_params())
        assert len(params) > 0
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i

        grads, updates = self.cost.get_gradients(model, nested_args,
                                                 ** fixed_var_descr.fixed_vars)
        if not isinstance(grads, OrderedDict):
            raise TypeError(str(type(self.cost)) + ".get_gradients returned " +
                            "something with" + str(type(grads)) + "as its " +
                            "first member. Expected OrderedDict.")

        for param in grads:
            assert param in params
        for param in params:
            assert param in grads

        for param in grads:
            if grads[param].name is None and cost_value is not None:
                grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                     {'costname': cost_value.name,
                                      'paramname': param.name})
            assert grads[param].dtype == param.dtype

        lr_scalers = model.get_lr_scalers()

        for key in lr_scalers:
            if key not in params:
                raise ValueError("Tried to scale the learning rate on " +\
                        str(key)+" which is not an optimization parameter.")

        log.info('Parameter and initial learning rate summary:')
        for param in params:
            param_name = param.name
            if param_name is None:
                param_name = 'anon_param'
            lr = learning_rate.get_value() * lr_scalers.get(param,1.)
            log.info('\t' + param_name + ': ' + str(lr))

        if self.learning_rule:
            updates.update(self.learning_rule.get_updates(
                learning_rate, grads, lr_scalers))
        else:
            # Use standard SGD updates with fixed learning rate.
            updates.update( dict(safe_zip(params, [param - learning_rate * \
                lr_scalers.get(param, 1.) * grads[param]
                                    for param in params])))

        for param in params:
            if updates[param].name is None:
                updates[param].name = 'sgd_update(' + param.name + ')'
        model.censor_updates(updates)
        for param in params:
            update = updates[param]
            if update.name is None:
                update.name = 'censor(sgd_update(' + param.name + '))'
            for update_val in get_debug_values(update):
                if np.any(np.isinf(update_val)):
                    raise ValueError("debug value of %s contains infs" % update.name)
                if np.any(np.isnan(update_val)):
                    raise ValueError("debug value of %s contains nans" % update.name)


        with log_timing(log, 'Compiling sgd_update'):
            self.sgd_update = function(theano_args,
                                       updates=updates,
                                       name='sgd_update',
                                       on_unused_input='ignore',
                                       mode=self.theano_function_mode)
        self.params = params
Beispiel #54
0
    def setup(self, model, dataset):
        """
        Compiles the theano functions needed for the train method.
        """
        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [param for param in model.get_params()
                      if np.any(np.isinf(param.get_value()))]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: "+str(inf_params))
        if any([np.any(np.isnan(param.get_value()))
                for param in model.get_params()]):
            nan_params = [param for param in model.get_params()
                          if np.any(np.isnan(param.get_value()))]
            raise ValueError("These params are NaN: "+str(nan_params))
        self.model = model

        self._synchronize_batch_size(model)
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        self.monitor._sanity_check()

        data_specs = self.cost.get_data_specs(self.model)
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)

        # Build a flat tuple of Theano Variables, one for each space.
        # We want that so that if the same space/source is specified
        # more than once in data_specs, only one Theano Variable
        # is generated for it, and the corresponding value is passed
        # only once to the compiled Theano function.
        theano_args = []
        for space, source in safe_zip(space_tuple, source_tuple):
            name = '%s[%s]' % (self.__class__.__name__, source)
            arg = space.make_theano_batch(name=name,
                                          batch_size=self.batch_size)
            theano_args.append(arg)
        theano_args = tuple(theano_args)

        # Methods of `self.cost` need args to be passed in a format compatible
        # with data_specs
        nested_args = mapping.nest(theano_args)
        fixed_var_descr = self.cost.get_fixed_var_descr(model, nested_args)
        self.on_load_batch = fixed_var_descr.on_load_batch

        cost_value = self.cost.expr(model, nested_args,
                                    ** fixed_var_descr.fixed_vars)

        if cost_value is not None and cost_value.name is None:
            # Concatenate the name of all tensors in theano_args !?
            cost_value.name = 'objective'

        # Set up monitor to model the objective value, learning rate,
        # momentum (if applicable), and extra channels defined by
        # the cost
        learning_rate = self.learning_rate
        if self.monitoring_dataset is not None:
            self.monitor.setup(dataset=self.monitoring_dataset,
                               cost=self.cost,
                               batch_size=self.batch_size,
                               num_batches=self.monitoring_batches,
                               extra_costs=self.monitoring_costs,
                               mode=self.monitor_iteration_mode)
            dataset_name = self.monitoring_dataset.keys()[0]
            monitoring_dataset = self.monitoring_dataset[dataset_name]
            #TODO: have Monitor support non-data-dependent channels
            self.monitor.add_channel(name='learning_rate',
                                     ipt=None,
                                     val=learning_rate,
                                     data_specs=(NullSpace(), ''),
                                     dataset=monitoring_dataset)

            if self.learning_rule:
                self.learning_rule.add_channels_to_monitor(
                        self.monitor,
                        monitoring_dataset)

        params = list(model.get_params())
        assert len(params) > 0
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i

        grads, updates = self.cost.get_gradients(model, nested_args,
                                                 ** fixed_var_descr.fixed_vars)
        if not isinstance(grads, OrderedDict):
            raise TypeError(str(type(self.cost)) + ".get_gradients returned " +
                            "something with" + str(type(grads)) + "as its " +
                            "first member. Expected OrderedDict.")

        for param in grads:
            assert param in params
        for param in params:
            assert param in grads

        for param in grads:
            if grads[param].name is None and cost_value is not None:
                grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                     {'costname': cost_value.name,
                                      'paramname': param.name})
            assert grads[param].dtype == param.dtype

        lr_scalers = model.get_lr_scalers()

        for key in lr_scalers:
            if key not in params:
                raise ValueError("Tried to scale the learning rate on " +\
                        str(key)+" which is not an optimization parameter.")

        log.info('Parameter and initial learning rate summary:')
        for param in params:
            param_name = param.name
            if param_name is None:
                param_name = 'anon_param'
            lr = learning_rate.get_value() * lr_scalers.get(param,1.)
            log.info('\t' + param_name + ': ' + str(lr))

        if self.learning_rule:
            updates.update(self.learning_rule.get_updates(
                learning_rate, grads, lr_scalers))
        else:
            # Use standard SGD updates with fixed learning rate.
            updates.update( dict(safe_zip(params, [param - learning_rate * \
                lr_scalers.get(param, 1.) * grads[param]
                                    for param in params])))

        for param in params:
            if updates[param].name is None:
                updates[param].name = 'sgd_update(' + param.name + ')'
        model.censor_updates(updates)
        for param in params:
            update = updates[param]
            if update.name is None:
                update.name = 'censor(sgd_update(' + param.name + '))'
            for update_val in get_debug_values(update):
                if np.any(np.isinf(update_val)):
                    raise ValueError("debug value of %s contains infs" %
                            update.name)
                if np.any(np.isnan(update_val)):
                    raise ValueError("debug value of %s contains nans" %
                            update.name)


        with log_timing(log, 'Compiling sgd_update'):
            self.sgd_update = function(theano_args,
                                       updates=updates,
                                       name='sgd_update',
                                       on_unused_input='ignore',
                                       mode=self.theano_function_mode)
        self.params = params
Beispiel #55
0
    def setup(self, model, dataset):

        if self.cost is None:
            self.cost = model.get_default_cost()

        inf_params = [
            param for param in model.get_params()
            if np.any(np.isinf(param.get_value()))
        ]
        if len(inf_params) > 0:
            raise ValueError("These params are Inf: " + str(inf_params))
        if any([
                np.any(np.isnan(param.get_value()))
                for param in model.get_params()
        ]):
            nan_params = [
                param for param in model.get_params()
                if np.any(np.isnan(param.get_value()))
            ]
            raise ValueError("These params are NaN: " + str(nan_params))
        self.model = model

        batch_size = self.batch_size
        if hasattr(model, "force_batch_size"):
            if model.force_batch_size > 0:
                if batch_size is not None:
                    if batch_size != model.force_batch_size:
                        if self.set_batch_size:
                            model.set_batch_size(batch_size)
                        else:
                            raise ValueError(
                                "batch_size argument to SGD conflicts with model's force_batch_size attribute"
                            )
                else:
                    self.batch_size = model.force_batch_size
        model._test_batch_size = self.batch_size
        self.monitor = Monitor.get_monitor(model)
        self.monitor._sanity_check()

        X = model.get_input_space().make_theano_batch(name="%s[X]" %
                                                      self.__class__.__name__)
        self.topo = not X.ndim == 2

        if config.compute_test_value == 'raise':
            if self.topo:
                X.tag.test_value = dataset.get_batch_topo(self.batch_size)
            else:
                X.tag.test_value = dataset.get_batch_design(self.batch_size)

        Y = T.matrix(name="%s[Y]" % self.__class__.__name__)

        fixed_var_descr = self.cost.get_fixed_var_descr(model, X, Y)
        self.on_load_batch = fixed_var_descr.on_load_batch

        if self.cost.supervised:
            if config.compute_test_value == 'raise':
                _, Y.tag.test_value = dataset.get_batch_design(
                    self.batch_size, True)

            self.supervised = True
            cost_value = self.cost(model, X, Y, **fixed_var_descr.fixed_vars)

        else:
            self.supervised = False
            cost_value = self.cost(model, X, **fixed_var_descr.fixed_vars)
        if cost_value is not None and cost_value.name is None:
            if self.supervised:
                cost_value.name = 'objective(' + X.name + ', ' + Y.name + ')'
            else:
                cost_value.name = 'objective(' + X.name + ')'

        # Set up monitor to model the objective value, learning rate,
        # momentum (if applicable), and extra channels defined by
        # the cost
        learning_rate = self.learning_rate
        if self.monitoring_dataset is not None:
            self.monitor.setup(dataset=self.monitoring_dataset,
                               cost=self.cost,
                               batch_size=self.batch_size,
                               num_batches=self.monitoring_batches,
                               extra_costs=self.monitoring_costs,
                               mode=self.monitor_iteration_mode)
            if self.supervised:
                ipt = (X, Y)
            else:
                ipt = X
            dataset_name = self.monitoring_dataset.keys()[0]
            monitoring_dataset = self.monitoring_dataset[dataset_name]
            #TODO: have Monitor support non-data-dependent channels
            self.monitor.add_channel(name='learning_rate',
                                     ipt=ipt,
                                     val=learning_rate,
                                     dataset=monitoring_dataset)
            if self.momentum:
                self.monitor.add_channel(name='momentum',
                                         ipt=ipt,
                                         val=self.momentum,
                                         dataset=monitoring_dataset)

        params = list(model.get_params())
        assert len(params) > 0
        for i, param in enumerate(params):
            if param.name is None:
                param.name = 'sgd_params[%d]' % i

        if self.cost.supervised:
            grads, updates = self.cost.get_gradients(
                model, X, Y, **fixed_var_descr.fixed_vars)
        else:
            grads, updates = self.cost.get_gradients(
                model, X, **fixed_var_descr.fixed_vars)

        for param in grads:
            assert param in params
        for param in params:
            assert param in grads

        for param in grads:
            if grads[param].name is None and cost_value is not None:
                grads[param].name = ('grad(%(costname)s, %(paramname)s)' % {
                    'costname': cost_value.name,
                    'paramname': param.name
                })

        lr_scalers = model.get_lr_scalers()

        for key in lr_scalers:
            if key not in params:
                raise ValueError("Tried to scale the learning rate on " +\
                        str(key)+" which is not an optimization parameter.")

        log.info('Parameter and initial learning rate summary:')
        for param in params:
            param_name = param.name
            if param_name is None:
                param_name = 'anon_param'
            lr = learning_rate.get_value() * lr_scalers.get(param, 1.)
            log.info('\t' + param_name + ': ' + str(lr))

        if self.momentum is None:
            updates.update( dict(safe_zip(params, [param - learning_rate * \
                lr_scalers.get(param, 1.) * grads[param]
                                    for param in params])))
        else:
            for param in params:
                inc = sharedX(param.get_value() * 0.)
                if param.name is not None:
                    inc.name = 'inc_' + param.name
                updated_inc = self.momentum * inc - learning_rate * lr_scalers.get(
                    param, 1.) * grads[param]
                updates[inc] = updated_inc
                updates[param] = param + updated_inc

        for param in params:
            if updates[param].name is None:
                updates[param].name = 'sgd_update(' + param.name + ')'
        model.censor_updates(updates)
        for param in params:
            update = updates[param]
            if update.name is None:
                update.name = 'censor(sgd_update(' + param.name + '))'
            for update_val in get_debug_values(update):
                if np.any(np.isinf(update_val)):
                    raise ValueError("debug value of %s contains infs" %
                                     update.name)
                if np.any(np.isnan(update_val)):
                    raise ValueError("debug value of %s contains nans" %
                                     update.name)

        with log_timing(log, 'Compiling sgd_update'):
            if self.supervised:
                fn_inputs = [X, Y]
            else:
                fn_inputs = [X]

            self.sgd_update = function(fn_inputs,
                                       updates=updates,
                                       name='sgd_update',
                                       on_unused_input='ignore',
                                       mode=self.theano_function_mode)
        self.params = params
Beispiel #56
0
    def main_loop(self, time_budget=None):
        """
        Repeatedly runs an epoch of the training algorithm, runs any
        epoch-level callbacks, and saves the model.

        Parameters
        ----------
        time_budget : int, optional
            The maximum number of seconds before interrupting
            training. Default is `None`, no time limit.
        """
        t0 = datetime.now()
        if self.algorithm is None:
            self.model.monitor = Monitor.get_monitor(self.model)
            self.model.monitor.time_budget_exceeded = False
            self.setup_extensions()
            # Model.censor_updates is used by the training algorithm to
            # enforce constraints after each step of learning. Here we
            # make sure the constraints are enforced from the start.
            self.model.enforce_constraints()
            self.run_callbacks_and_monitoring()
            while True:
                if self.exceeded_time_budget(t0, time_budget):
                    break

                rval = self.model.train_all(dataset=self.dataset)
                if rval is not None:
                    raise ValueError("Model.train_all should not return " +
                                     "anything. Use Model.continue_learning " +
                                     "to control whether learning continues.")
                self.model.monitor.report_epoch()
                extension_continue = self.run_callbacks_and_monitoring()
                freq = self.save_freq
                if freq > 0 and self.model.monitor.epochs_seen % freq == 0:
                    self.save()
                continue_learning = (self.model.continue_learning()
                                     and extension_continue)
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break
        else:
            self.algorithm.setup(model=self.model, dataset=self.dataset)
            self.setup_extensions()
            # Model.censor_updates is used by the training algorithm to
            # enforce constraints after each step of learning. Here we
            # make sure the constraints are enforced from the start.
            self.model.enforce_constraints()
            if not hasattr(self.model, 'monitor'):
                # TODO: is this really necessary? I just put this error here
                # to prevent an AttributeError later, but I think we could
                # rewrite to avoid the AttributeError
                raise RuntimeError("The algorithm is responsible for setting"
                                   " up the Monitor, but failed to.")
            if len(self.model.monitor._datasets) > 0:
                # This monitoring channel keeps track of a shared variable,
                # which does not need inputs nor data.
                self.training_seconds.__doc__ = """\
The number of seconds that were spent in actual training during the most
recent epoch. This excludes seconds that were spent running callbacks for
the extensions, computing monitoring channels, etc."""
                self.model.monitor.add_channel(
                    name="training_seconds_this_epoch",
                    ipt=None,
                    val=self.training_seconds,
                    data_specs=(NullSpace(), ''),
                    dataset=self.model.monitor._datasets[0])
                self.total_seconds.__doc__ = """\
The number of seconds that were spent on the entirety of processing for the
previous epoch. This includes not only training but also the computation of
the monitoring channels, running TrainExtension callbacks, etc. This value
is reported for the *previous* epoch because the amount of time spent on
monitoring for this epoch is not known until the monitoring channels have
already been reported."""
                self.model.monitor.add_channel(
                    name="total_seconds_last_epoch",
                    ipt=None,
                    val=self.total_seconds,
                    data_specs=(NullSpace(), ''),
                    dataset=self.model.monitor._datasets[0])
            self.run_callbacks_and_monitoring()
            while True:
                if self.exceeded_time_budget(t0, time_budget):
                    break

                with log_timing(log,
                                None,
                                level=logging.DEBUG,
                                callbacks=[self.total_seconds.set_value]):
                    with log_timing(
                            log,
                            None,
                            final_msg='Time this epoch:',
                            callbacks=[self.training_seconds.set_value]):
                        rval = self.algorithm.train(dataset=self.dataset)
                    if rval is not None:
                        raise ValueError("TrainingAlgorithm.train should not "
                                         "return anything. Use "
                                         "TrainingAlgorithm.continue_learning "
                                         "to control whether learning "
                                         "continues.")
                    self.model.monitor.report_epoch()
                    extension_continue = self.run_callbacks_and_monitoring()
                    if self.save_freq > 0 and \
                       self.model.monitor._epochs_seen % self.save_freq == 0:
                        self.save()
                continue_learning = (self.algorithm.continue_learning(
                    self.model) and extension_continue)
                assert continue_learning in [True, False, 0, 1]
                if not continue_learning:
                    break

        self.model.monitor.training_succeeded = True

        if self.save_freq > 0:
            self.save()
Beispiel #57
0
        def get_func(learn_discriminator, learn_generator):

            updates = OrderedDict()

            assert (learn_discriminator or learn_generator
                    ) and not (learn_discriminator and learn_generator)

            if learn_discriminator:
                cur_params = model.discriminator.get_params()
            else:
                cur_params = model.generator.get_params()

            cur_grads = OrderedDict()
            for param in cur_params:
                cur_grads[param] = grads[param]

            for param in grads:
                if grads[param].name is None and cost_value is not None:
                    grads[param].name = ('grad(%(costname)s, %(paramname)s)' %
                                         {
                                             'costname': cost_value.name,
                                             'paramname': param.name
                                         })
                assert grads[param].dtype == param.dtype

            cur_lr_scalers = OrderedDict()
            for param in cur_params:
                if param in lr_scalers:
                    lr_scaler = lr_scalers[param]
                    cur_lr_scalers[param] = lr_scaler

            log.info('Parameter and initial learning rate summary:')
            for param in cur_params:
                param_name = param.name
                if param_name is None:
                    param_name = 'anon_param'
                lr = learning_rate.get_value() * cur_lr_scalers.get(param, 1.)
                log.info('\t' + param_name + ': ' + str(lr))

            if self.learning_rule:
                updates.update(
                    self.learning_rule.get_updates(learning_rate, cur_grads,
                                                   cur_lr_scalers))
            else:
                # Use standard SGD updates with fixed learning rate.
                updates.update( dict(safe_zip(params, [param - learning_rate * \
                    lr_scalers.get(param, 1.) * grads[param]
                                        for param in params])))

            for param in cur_params:
                if updates[param].name is None:
                    updates[param].name = 'sgd_update(' + param.name + ')'
            model.modify_updates(updates)
            for param in cur_params:
                update = updates[param]
                if update.name is None:
                    update.name = 'censor(sgd_update(' + param.name + '))'
                for update_val in get_debug_values(update):
                    if np.any(np.isinf(update_val)):
                        raise ValueError("debug value of %s contains infs" %
                                         update.name)
                    if np.any(np.isnan(update_val)):
                        raise ValueError("debug value of %s contains nans" %
                                         update.name)

            with log_timing(log, 'Compiling sgd_update'):
                return function(theano_args,
                                updates=updates,
                                name='sgd_update',
                                on_unused_input='ignore',
                                mode=self.theano_function_mode)