Example #1
0
    def train(self, dataset):
        assert self.bSetup
        model = self.model
        batch_size = self.batch_size

        if self.topo:
            get_data = dataset.get_batch_topo
        else:
            get_data = dataset.get_batch_design

        rng = self.rng
        train_iteration_mode = "shuffled_sequential"
        if not is_stochastic(train_iteration_mode):
            rng = None
        iterator = dataset.iterator(
            mode=train_iteration_mode,
            batch_size=self.batch_size,
            targets=self.cost.supervised,
            num_batches=self.batches_per_iter,
            topo=self.topo,
            rng=rng,
        )
        for data in iterator:
            if self.cost.supervised:
                args = data
                X, Y = data
            else:
                args = [data]
                X = data
            self.optimizer.minimize(*args)
            model.monitor.report_batch(X.shape[0])
Example #2
0
 def train(self, dataset):
     if not hasattr(self, "sgd_update"):
         raise Exception("train called without first calling setup")
     model = self.model
     batch_size = self.batch_size
     for param in self.params:
         value = param.get_value(borrow=True)
         if np.any(np.isnan(value)) or np.any(np.isinf(value)):
             raise Exception("NaN in " + param.name)
     self.first = False
     rng = self.rng
     if not is_stochastic(self.train_iteration_mode):
         rng = None
     iterator = dataset.iterator(
         mode=self.train_iteration_mode, batch_size=self.batch_size, targets=self.supervised, topo=self.topo, rng=rng
     )
     if self.supervised:
         for (batch_in, batch_target) in iterator:
             self.sgd_update(batch_in, batch_target)
             actual_batch_size = batch_in.shape[0]
             self.monitor.report_batch(actual_batch_size)
             # print 'batches seen', self.monitor.get_batches_seen()
             for callback in self.update_callbacks:
                 callback(self)
     else:
         for batch in iterator:
             self.sgd_update(batch)
             actual_batch_size = batch.shape[0]  # iterator might return a smaller batch if dataset size
             # isn't divisible by batch_size
             self.monitor.report_batch(actual_batch_size)
             for callback in self.update_callbacks:
                 callback(self)
Example #3
0
    def train(self, dataset):
        if not hasattr(self, 'sgd_update'):
            raise Exception("train called without first calling setup")

        # Make sure none of the parameters have bad values
        for param in self.params:
            value = param.get_value(borrow=True)
            if np.any(np.isnan(value)) or np.any(np.isinf(value)):
                raise Exception("NaN in " + param.name)

        self.first = False
        rng = self.rng
        if not is_stochastic(self.train_iteration_mode):
            rng = None

        data_specs = self.cost.get_data_specs(self.model)

        # The iterator should be built from flat data specs, so it returns
        # flat, non-redundent tuples of data.
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)
        if len(space_tuple) == 0:
            # No data will be returned by the iterator, and it is impossible
            # to know the size of the actual batch.
            # It is not decided yet what the right thing to do should be.
            raise NotImplementedError("Unable to train with SGD, because "
                    "the cost does not actually use data from the data set. "
                    "data_specs: %s" % str(data_specs))
        flat_data_specs = (CompositeSpace(space_tuple), source_tuple)

        iterator = dataset.iterator(mode=self.train_iteration_mode,
                batch_size=self.batch_size,
                data_specs=flat_data_specs, return_tuple=True,
                rng = rng, num_batches = self.batches_per_iter)

        on_load_batch = self.on_load_batch
        for batch in iterator:
            for callback in on_load_batch:
                callback(mapping.nest(batch))
            self.sgd_update(*batch)
            # iterator might return a smaller batch if dataset size
            # isn't divisible by batch_size
            # Note: if data_specs[0] is a NullSpace, there is no way to know
            # how many examples would actually have been in the batch,
            # since it was empty, so actual_batch_size would be reported as 0.
            actual_batch_size = flat_data_specs[0].np_batch_size(batch)
            self.monitor.report_batch(actual_batch_size)
            for callback in self.update_callbacks:
                callback(self)

        # Make sure none of the parameters have bad values
        for param in self.params:
            value = param.get_value(borrow=True)
            if np.any(np.isnan(value)) or np.any(np.isinf(value)):
                raise Exception("NaN in " + param.name)
Example #4
0
    def train(self, dataset):
        if not hasattr(self, "sgd_update"):
            raise Exception("train called without first calling setup")
        model = self.model
        batch_size = self.batch_size

        # Make sure none of the parameters have bad values
        for param in self.params:
            value = param.get_value(borrow=True)
            if np.any(np.isnan(value)) or np.any(np.isinf(value)):
                raise Exception("NaN in " + param.name)

        self.first = False
        rng = self.rng
        if not is_stochastic(self.train_iteration_mode):
            rng = None
        iterator = dataset.iterator(
            mode=self.train_iteration_mode,
            batch_size=self.batch_size,
            targets=self.supervised,
            topo=self.topo,
            rng=rng,
            num_batches=self.batches_per_iter,
        )

        if self.topo:
            batch_idx = dataset.get_topo_batch_axis()
        else:
            batch_idx = 0

        on_load_batch = self.on_load_batch
        if self.supervised:
            for (batch_in, batch_target) in iterator:
                for callback in on_load_batch:
                    callback(batch_in, batch_target)
                self.sgd_update(batch_in, batch_target)
                actual_batch_size = batch_in.shape[batch_idx]
                self.monitor.report_batch(actual_batch_size)
                for callback in self.update_callbacks:
                    callback(self)
        else:
            for batch in iterator:
                for callback in on_load_batch:
                    callback(batch, None)
                self.sgd_update(batch)
                actual_batch_size = batch.shape[0]  # iterator might return a smaller batch if dataset size
                # isn't divisible by batch_size
                self.monitor.report_batch(actual_batch_size)
                for callback in self.update_callbacks:
                    callback(self)

        # Make sure none of the parameters have bad values
        for param in self.params:
            value = param.get_value(borrow=True)
            if np.any(np.isnan(value)) or np.any(np.isinf(value)):
                raise Exception("NaN in " + param.name)
Example #5
0
    def train(self, dataset):
        """
        .. todo::

            WRITEME
        """
        assert self.bSetup
        model = self.model

        rng = self.rng
        train_iteration_mode = "shuffled_sequential"
        if not is_stochastic(train_iteration_mode):
            rng = None

        data_specs = self.cost.get_data_specs(self.model)
        # The iterator should be built from flat data specs, so it returns
        # flat, non-redundent tuples of data.
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)
        if len(space_tuple) == 0:
            # No data will be returned by the iterator, and it is impossible
            # to know the size of the actual batch.
            # It is not decided yet what the right thing to do should be.
            raise NotImplementedError(
                "Unable to train with BGD, because "
                "the cost does not actually use data from the data set. "
                "data_specs: %s" % str(data_specs)
            )
        flat_data_specs = (CompositeSpace(space_tuple), source_tuple)

        iterator = dataset.iterator(
            mode=train_iteration_mode,
            batch_size=self.batch_size,
            num_batches=self.batches_per_iter,
            data_specs=flat_data_specs,
            return_tuple=True,
            rng=rng,
        )

        mode = self.theano_function_mode
        for data in iterator:
            if "targets" in source_tuple and mode is not None and hasattr(mode, "record"):
                Y = data[source_tuple.index("targets")]
                stry = str(Y).replace("\n", " ")
                mode.record.handle_line("data Y " + stry + "\n")

            for on_load_batch in self.on_load_batch:
                on_load_batch(mapping.nest(data))

            self.before_step(model)
            self.optimizer.minimize(*data)
            self.after_step(model)
            actual_batch_size = flat_data_specs[0].np_batch_size(data)
            model.monitor.report_batch(actual_batch_size)
Example #6
0
    def train(self, dataset):
        if not hasattr(self, 'sgd_update'):
            raise Exception("train called without first calling setup")
        model = self.model
        batch_size = self.batch_size

        # Make sure none of the parameters have bad values
        for param in self.params:
            value = param.get_value(borrow=True)
            #this is sometimes very slow. we could get a huge speedup if we could
            #avoid having to run this everytime.
            if np.any(np.isnan(value)) or np.any(np.isinf(value)):
                raise Exception("NaN in " + param.name)

        self.first = False
        rng = self.rng
        if not is_stochastic(self.train_iteration_mode):
            rng = None

        iterator = dataset.iterator(mode=self.train_iteration_mode,
                batch_size=self.batch_size, targets=self.supervised,
                topo=self.topo, rng = rng, num_batches = self.batches_per_iter)
        if self.topo:
            batch_idx = dataset.get_topo_batch_axis()
        else:
            batch_idx = 0
        if self.supervised:
            ind = 0
            for (batch_in, batch_target) in iterator:
                self.sgd_update(batch_in, batch_target)
                actual_batch_size = batch_in.shape[batch_idx]
                self.monitor.report_batch(actual_batch_size)
                for callback in self.update_callbacks:
                    callback(self)
        else:
            for batch in iterator:
                self.sgd_update(batch)
                actual_batch_size = batch.shape[0] # iterator might return a smaller batch if dataset size
                                                   # isn't divisible by batch_size
                self.monitor.report_batch(actual_batch_size)
                for callback in self.update_callbacks:
                    callback(self)

        # Make sure none of the parameters have bad values
        #this part is also sometimes very slow. Here again, if we can find a way
        #to speed it up, the gain could be significant.
        for param in self.params:
            value = param.get_value(borrow=True)
            if np.any(np.isnan(value)) or np.any(np.isinf(value)):
                raise Exception("NaN in " + param.name)
Example #7
0
    def train(self, dataset):
        assert self.bSetup
        model = self.model
        batch_size = self.batch_size

        if self.topo:
            get_data = dataset.get_batch_topo
        else:
            get_data = dataset.get_batch_design

        rng = self.rng
        train_iteration_mode = 'shuffled_sequential'
        if not is_stochastic(train_iteration_mode):
            rng = None
        iterator = dataset.iterator(mode=train_iteration_mode,
                batch_size=self.batch_size,
                targets=self.cost.supervised,
                num_batches=self.batches_per_iter,
                topo=self.topo,
                rng = rng)
        for data in iterator:
            if self.cost.supervised:
                args = data
                X, Y = data
                mode = self.theano_function_mode
                if mode is not None and hasattr(mode, 'record'):
                    stry = str(Y).replace('\n',' ')
                    mode.record.handle_line('data Y '+stry+'\n')
                for on_load_batch in self.on_load_batch:
                    on_load_batch(X, Y)
            else:
                args = [ data ]
                X = data
                for on_load_batch in self.on_load_batch:
                    on_load_batch(X, None)
            self.before_step(model)
            self.optimizer.minimize(*args)
            self.after_step(model)
            model.monitor.report_batch( X.shape[0] )
Example #8
0
    def setup(self, dataset, cost, batch_size, num_batches = None, extra_costs=None,
            mode='sequential'):
        """
        Sets up the monitor for a cost minimization problem.
        Adds channels defined by both the model and the cost for
        the specified dataset(s), as well as a channel called 'objective'
        defined by the costs' __call__ method.

        dataset: a Dataset or dictionary mapping string names to Datasets
                    If string names are used, then for every dataset,
                    each channel defined by the model or cost will be
                    replicated with that dataset's name followed by an
                    underscore as the prefix.
                    For example, if your cost defines a channel called
                    'misclass', and datasets is {'train' : train_dataset,
                    'valid' : valid_dataset} you will get channels called
                    'train_misclass' and 'valid_misclass'.

        cost: a Cost

        """
        if dataset is None:
            return
        if isinstance(dataset, Dataset):
            dataset = {'': dataset}
        else:
            assert isinstance(dataset, dict)
            assert all(isinstance(key, str) for key in dataset)
            assert all(isinstance(dataset[key], Dataset) for key in dataset)

        if extra_costs is None:
            costs = {}
        else:
            costs = extra_costs
        assert '' not in costs
        costs[''] = cost

        supervised = any(cost.supervised for cost in costs.values())
        model = self.model

        X = model.get_input_space().make_theano_batch()
        X.name = 'monitor_X'

        if supervised:
            Y = model.get_output_space().make_theano_batch()
            Y.name = 'monitor_Y'
            ipt = (X, Y)
        else:
            Y = None
            ipt = X
        custom_channels = {}
        for cost_name in costs:
            if cost_name == '':
                prefix = ''
            else:
                prefix = cost_name + '_'
            cost = costs[cost_name]
            raw_channels = cost.get_monitoring_channels(model, X, Y)
            channels = {}
            for name in raw_channels:
                channels[prefix+name] = raw_channels[name]
            custom_channels.update(channels)
        model_channels = model.get_monitoring_channels(X, Y)
        custom_channels.update(model_channels)

        if is_stochastic(mode):
            seed = [[2013, 02, 22]]
        else:
            seed = None

        for dataset_name in dataset:
            cur_dataset = dataset[dataset_name]
            self.add_dataset(dataset=cur_dataset,
                                 mode=mode,
                                 batch_size=batch_size,
                                 num_batches=num_batches,
                                 seed=seed)
            if dataset_name == '':
                dprefix = ''
            else:
                dprefix = dataset_name + '_'
            # These channel name 'objective' must not vary, since callbacks that respond to the
            # values in the monitor use the name to find it.
            for cost_name in costs:
                cost = costs[cost_name]
                cost_value = cost(model, X, Y)
                if cost_value is not None:
                    if cost_name == '':
                        name = dprefix + 'objective'
                    else:
                        name = dprefix + cost_name
                    self.add_channel(name=name, ipt=ipt,
                        val=cost_value, dataset=cur_dataset)
            for key in custom_channels:
                self.add_channel(name=dprefix + key, ipt=ipt,
                        val=custom_channels[key], dataset=cur_dataset)
Example #9
0
    def setup(self, dataset, cost, batch_size, num_batches=None,
              extra_costs=None, mode='sequential', obj_prereqs=None,
              cost_monitoring_args=None):
        """
        Sets up the monitor for a cost minimization problem.
        Adds channels defined by both the model and the cost for
        the specified dataset(s), as well as a channel called
        'objective' defined by the costs' __call__ method.

        Parameters
        ----------
        dataset : pylearn2.datasets.Dataset
            Dataset or dictionary mapping string names to Datasets.
            If string names are used, then for every dataset, each
            channel defined by the model or cost will be replicated
            with that dataset's name followed by an underscore as the
            prefix. For example, if your cost defines a channel called
            'misclass', and datasets is
            {'train' : train_dataset, 'valid' : valid_dataset},
            you will get channels called 'train_misclass' and
            'valid_misclass'.
        cost : pylearn2.costs.Cost
            The cost being optimized by training. The value of the cost
            will appear as the `objective` channel. Its
            `get_monitoring_channels` method will also be used to
            supply other channels.
        extra_costs : OrderedDict, optional
            A dictionary mapping channel names to Cost objects.
            Their value will appear as the specified channel name.
            They will also provide more monitoring channels via their
            `get_monitoring_channels` method.
        obj_prereqs : None, or list of functions
            Functions to pass as prerequisites to the `objective` channel.
        cost_monitoring_args : dict
            Dictionary of kwargs that will be passed to
            `cost.get_monitoring_channels()`
            (but not for the extra_costs).
        """

        if dataset is None:
            return
        if isinstance(dataset, Dataset):
            dataset = {'': dataset}
        else:
            assert isinstance(dataset, dict)
            assert all(isinstance(key, str) for key in dataset)
            assert all(isinstance(dataset[key], Dataset) for key in dataset)

        if extra_costs is None:
            costs = {}
        else:
            assert isinstance(extra_costs, (OrderedDict, dict))
            costs = extra_costs
        assert '' not in costs
        costs[''] = cost

        if cost_monitoring_args is None:
            cost_monitoring_args = {}

        model = self.model

        # Build a composite data_specs containing the specs for all costs,
        # then the specs of the model
        cost_names = sorted(costs.keys())
        spaces = []
        sources = []
        for c in cost_names:
            c_space, c_source = costs[c].get_data_specs(model)
            spaces.append(c_space)
            sources.append(c_source)

        # Ask the model for the data_specs needed
        m_space, m_source = model.get_monitoring_data_specs()
        spaces.append(m_space)
        sources.append(m_source)

        nested_space = CompositeSpace(spaces)
        nested_sources = tuple(sources)

        # Flatten this data_specs, so we build only one symbolic Theano
        # variable for each of the unique (space, source) pairs.
        mapping = DataSpecsMapping((nested_space, nested_sources))
        space_tuple = mapping.flatten(nested_space, return_tuple=True)
        source_tuple = mapping.flatten(nested_sources, return_tuple=True)
        ipt = tuple(space.make_theano_batch(name='monitor_%s' % source,
                                            batch_size=None)
                    for (space, source) in safe_zip(space_tuple, source_tuple))

        # Build a nested tuple from ipt, to dispatch the appropriate parts
        # of the ipt batch to each cost
        nested_ipt = mapping.nest(ipt)

        custom_channels = {}
        for i, cost_name in enumerate(cost_names):
            if cost_name == '':
                prefix = ''
            else:
                prefix = cost_name + '_'
            cost = costs[cost_name]
            cost_ipt = nested_ipt[i]
            raw_channels = cost.get_monitoring_channels(model, cost_ipt)
            channels = {}
            for name in raw_channels:
                # We need three things: the value itself (raw_channels[name]),
                # the input variables (cost_ipt), and the data_specs for
                # these input variables ((spaces[i], sources[i]))
                channels[prefix + name] = (raw_channels[name],
                                           cost_ipt,
                                           (spaces[i], sources[i]))
            custom_channels.update(channels)

        # Use the last inputs from nested_ipt for the model
        model_channels = model.get_monitoring_channels(nested_ipt[-1])
        channels = {}
        for name in model_channels:
            # Note: some code used to consider that model_channels[name]
            # could be a a (channel, prereqs) pair, this is not supported.
            channels[name] = (model_channels[name],
                              nested_ipt[-1],
                              (spaces[-1], sources[-1]))
        custom_channels.update(channels)

        if is_stochastic(mode):
            seed = [[2013, 02, 22]]
        else:
            seed = None

        for dataset_name in dataset:
            cur_dataset = dataset[dataset_name]
            self.add_dataset(dataset=cur_dataset,
                             mode=mode,
                             batch_size=batch_size,
                             num_batches=num_batches,
                             seed=seed)
            if dataset_name == '':
                dprefix = ''
            else:
                dprefix = dataset_name + '_'
            # These channel name 'objective' must not vary, since callbacks
            # that respond to the values in the monitor use the name to find
            # it.
            for i, cost_name in enumerate(cost_names):
                cost = costs[cost_name]
                cost_ipt = nested_ipt[i]
                cost_value = cost.expr(model, cost_ipt)
                if cost_value is not None:
                    if cost_name == '':
                        name = dprefix + 'objective'
                        prereqs = obj_prereqs
                    else:
                        name = dprefix + cost_name
                        prereqs = None

                    cost.get_data_specs(model)[0].validate(cost_ipt)
                    self.add_channel(name=name,
                                     ipt=cost_ipt,
                                     val=cost_value,
                                     data_specs=cost.get_data_specs(model),
                                     dataset=cur_dataset,
                                     prereqs=prereqs)

            for key in custom_channels:
                val, ipt, data_specs = custom_channels[key]
                data_specs[0].validate(ipt)
                self.add_channel(name=dprefix + key,
                                 ipt=ipt,
                                 val=val,
                                 data_specs=data_specs,
                                 dataset=cur_dataset)
Example #10
0
def main():
    parser = argparse.ArgumentParser(description='Pylearn2 lab.')
    parser.add_argument('-s', '--save', action='store_true', help = 'Save the resulting images')
    parser.add_argument('-q', '--quit', action='store_true', help = 'Quit after plotting instead of dropping into IPython')
    parser.add_argument('directory', type = str,
                        help = 'Which results directory to use')
    args = parser.parse_args()

    # OLD
    #config_file_path = '/home/jason/s/deep_learning/pylearn/pred_net.yaml'
    #train = yaml_parse.load_path(config_file_path)
    #train = serial.load_train_file(config_file_path)

    #result_prefix = '/home/jason/s/pylearn2/pylearn2/pred/results/'
    result_prefix = '/u/yosinski/s/galatea/fish/results/'
    result_dir = os.path.join(result_prefix, args.directory)

    print 'loading train object...'
    #train = serial.load_train_file(os.path.join(result_dir, 'pred_net.yaml'))
    train = serial.load_train_file(os.path.join(result_dir, 'model.yaml'))
    print 'loading saved model...'
    #model = serial.load(os.path.join(result_dir, 'pred_net.pkl'))
    model = serial.load(os.path.join(result_dir, 'model.pkl'))
    print 'done.'

    print 'model was trained on:'
    print model.dataset_yaml_src

    if train.algorithm.cost is not None:
        data_specs = train.algorithm.cost.get_data_specs(model)
    else:
        data_specs = train.model.get_default_cost().get_data_specs(train.model)
    mapping = DataSpecsMapping(data_specs)
    space_tuple = mapping.flatten(data_specs[0], return_tuple=True)
    source_tuple = mapping.flatten(data_specs[1], return_tuple=True)
    flat_data_specs = (CompositeSpace(space_tuple), source_tuple)

    num_frames = model.num_frames
    num_batches = 100
    batch_size = train.algorithm.batch_size if train.algorithm.batch_size else 20*num_frames
    
    train_dataset = train.dataset
    valid_dataset = train.algorithm.monitoring_dataset['valid']
    
    rng = train.algorithm.rng
    if not is_stochastic(train.algorithm.train_iteration_mode):
        rng = None
    
    train_iterator = train_dataset.iterator(mode = train.algorithm.train_iteration_mode,
                                            batch_size = batch_size,
                                            data_specs = flat_data_specs,
                                            return_tuple = True, rng=rng,
                                            num_batches = num_batches * 10)
    valid_iterator = valid_dataset.iterator(mode = train.algorithm.train_iteration_mode,
                                            batch_size = batch_size,
                                            data_specs = flat_data_specs,
                                            return_tuple = True,  # No rng override
                                            num_batches = num_batches * 10)

    train_batches = [train_iterator.next() for ii in range(num_batches)]
    valid_batches = [valid_iterator.next() for ii in range(num_batches)]

    print 'got batches with shape:'
    for dat in train_batches[0]:
        print '  ', dat.shape



    #########################
    # Plot costs
    #########################

    # Plot costs over time
    ch_train_objective = model.monitor.channels['train_objective']
    ch_valid_objective = model.monitor.channels['valid_objective']

    x_vals = ch_train_objective.epoch_record
    x_label = 'epoch'

    
    plot(x_vals, ch_train_objective.val_record, 'b-')
    plot(x_vals, ch_valid_objective.val_record, 'r-')
    legend(('train', 'valid'))

    if args.save:
        savefig(os.path.join(result_dir, 'costs_lin.png'))
        savefig(os.path.join(result_dir, 'costs_lin.pdf'))
    if args.save:
        gca().set_yscale('log')
        savefig(os.path.join(result_dir, 'costs_log.png'))
        savefig(os.path.join(result_dir, 'costs_log.pdf'))
        gca().set_yscale('linear')



        

    #########################
    # Compute some accuracies
    #########################

    try:
        model.fns.feat_to_compout
    except:
        model.redo_theano()
    all_acc_id = []
    all_xy_errs = []

    print 'Training set:'
    print '   acc_id\tx_err\ty_err'
    for bb,batch in enumerate(train_batches):
        feat,ids,xy = batch

        idsN_floatX = array(ids.argmax(1), dtype=theano.config.floatX)
        acc_id = model.fns.wiskott_id_accuracy(feat, idsN_floatX)
        all_acc_id.append(acc_id)
        
        xy_errs = model.fns.wiskott_xy_errors(feat, xy[:,0:2])
        all_xy_errs.append(xy_errs)
        
        # Old numpy way
        #ids_hat,xy_hat = model.fns.feat_to_idxy(feat)    
        #idx_true = np.where( ids == 1 )[1]
        #idx_hat = np.where(np.sign(ids_hat.T - ids_hat.max(1)).T + 1)[1]
        #n_correct += (idx_true == idx_hat).sum()
        #n_total += len(idx_true)

        print '%2d:\t%g,\t%g,\t%g' % (bb, acc_id, xy_errs[0], xy_errs[1])
        

        
    #########################
    # Embed
    #########################

    if not args.quit:
        # Start shell
        ipshell()
    print 'done.'
Example #11
0
    def train(self, dataset):
        """
        Runs one epoch of SGD training on the specified dataset.

        Parameters
        ----------
        dataset : Dataset
        """
        if not hasattr(self, 'sgd_update'):
            raise Exception("train called without first calling setup")

        # Make sure none of the parameters have bad values
        for param in self.params:
            value = param.get_value(borrow=True)
            if not isfinite(value):
                raise Exception("NaN in " + param.name)

        self.first = False
        rng = self.rng
        if not is_stochastic(self.train_iteration_mode):
            rng = None

        data_specs = self.cost.get_data_specs(self.model)

        # The iterator should be built from flat data specs, so it returns
        # flat, non-redundent tuples of data.
        mapping = DataSpecsMapping(data_specs)
        space_tuple = mapping.flatten(data_specs[0], return_tuple=True)

        # print 'space tuple', type(space_tuple), space_tuple
        from pylearn2.space import VectorSpace

        ###############################################
        # # # CHANGINGS TO THE ORIGINAL ALGORITHM # # #
        ###############################################

        # we have 3 classes in dataset (active, inactive, middle), but only two softmax neurons
        # therefore VectorSpace has dim = 2 and an error will be raised when trying to convert
        # label to a vector of length 2. So we change the vector length for a while and convert
        # things manually.
        space_tuple = (space_tuple[0], VectorSpace(dim=3))

        #############################
        # # #  END OF CHANGINGS # # #
        #############################

        source_tuple = mapping.flatten(data_specs[1], return_tuple=True)
        if len(space_tuple) == 0:
            # No data will be returned by the iterator, and it is impossible
            # to know the size of the actual batch.
            # It is not decided yet what the right thing to do should be.
            raise NotImplementedError(
                "Unable to train with SGD, because "
                "the cost does not actually use data from the data set. "
                "data_specs: %s" % str(data_specs))
        flat_data_specs = (CompositeSpace(space_tuple), source_tuple)

        iterator = dataset.iterator(mode=self.train_iteration_mode,
                                    batch_size=self.batch_size,
                                    data_specs=flat_data_specs,
                                    return_tuple=True, rng=rng,
                                    num_batches=self.batches_per_iter)

        # print 'flat data specs', type(flat_data_specs), flat_data_specs
        # flat data specs <type 'tuple'>
        # (CompositeSpace(Conv2DSpace(shape=(18, 3492), num_channels=1, axes=('c', 0, 1, 'b'), dtype=float64),
        #                             VectorSpace(dim=2, dtype=float64)),
        #                 'features', 'targets'))

        on_load_batch = self.on_load_batch
        for batch in iterator:
            # batch is a list with two numpy arrays: [sample, label]
            # self.params is a list with theano.tensor.sharedvar.TensorSharedVariables
            # theano.tensor.sharedvar.TensorSharedVariable.get_value() returns numpy.array
            # you can set value with theano.tensor.sharedvar.TensorSharedVariable.set_value(np.array_object)

            # this being here might cause troubles as batch is a nasty thing right now
            for callback in on_load_batch:
                callback(*batch)

            ###############################################
            # # # CHANGINGS TO THE ORIGINAL ALGORITHM # # #
            ###############################################

            self.print_params("on entering iteration", t.cyan)

            # GOOD ADVICE: if something is very wrong check it the following map is valid
            # TODO: check this
            # active     1    [[ 0. 1. 0. ]]    [[ 0. 1. ]]
            # nonactive  0    [[ 1. 0. 0. ]]    [[ 1. 0. ]]
            # middle    -1    [[ 0. 0. 1. ]]

            batch_1_on_load = batch[1].copy()

            # if label was '0'
            if (batch[1] == np.array((1, 0, 0))).all():
                # print "example: nonactive"
                batch = (batch[0], np.reshape(np.array((1, 0)), (1, 2)))
                self.sgd_update(*batch)
            # if label was '1'
            elif (batch[1] == np.array((0, 1, 0))).all():
                # print "example: active"
                batch = (batch[0], np.reshape(np.array((0, 1)), (1, 2)))
                self.sgd_update(*batch)
            # else we have to deal with unlabeled example
            else:
                # print "example: middle"
                parameters_on_load = self.get_parameters()

                ######################################
                # # # RUNNING AS INACTIVE SAMPLE # # #
                ######################################
                # print 'running as inactive'
                # setting label as inactive
                batch = (batch[0], np.reshape(np.array((1, 0)), (1, 2)))
                self.print_params("on entering inactive", t.blue)
                # updating the model
                self.sgd_update(*batch)
                self.print_params("after update inactive", t.green)
                # remember changing in parameters
                params_after_inactive = self.get_parameters()
                diff_inactive = self.get_difference(params_after_inactive, parameters_on_load)
                self.print_dict_of_params(diff_inactive, "difference")
                # bring back on load parameters
                self.restore_parameters(parameters_on_load)
                self.print_params('after restore', t.yellow)
                ####################################
                # # # RUNNING AS ACTIVE SAMPLE # # #
                ####################################
                # print 'running as active'
                # setting label as active
                batch = (batch[0], np.reshape(np.array((0, 1)), (1, 2)))
                self.print_params('on entering active', t.blue)
                # updating the model
                self.sgd_update(*batch)
                self.print_params('after update active', t.green)
                # remember changing in parameters
                params_after_active = self.get_parameters()
                diff_active = self.get_difference(params_after_active, parameters_on_load)
                self.print_dict_of_params(diff_active, "difference")
                # bring back on load parameters
                self.restore_parameters(parameters_on_load)
                self.print_params('after restore', t.yellow)
                ##############################
                # # # UPDATING THE MODEL # # #
                ##############################
                update_vector = self.calculate_update(diff_active, diff_inactive)
                self.print_dict_of_params(update_vector, "update vector")
                self.update_non_classification_parameters(update_vector)

            # end of if

            self.print_params('on leaving', t.red)

            # iterator might return a smaller batch if dataset size
            # isn't divisible by batch_size
            # Note: if data_specs[0] is a NullSpace, there is no way to know
            # how many examples would actually have been in the batch,
            # since it was empty, so actual_batch_size would be reported as 0.

            # OK, now lines below need batch in the previous size. So I just set the batch to what is used to be
            # before my wicked transformations.
            batch = (batch[0], batch_1_on_load)

            self.print_self_debug()

            #############################
            # # #  END OF CHANGINGS # # #
            #############################
            actual_batch_size = flat_data_specs[0].np_batch_size(batch)
            self.monitor.report_batch(actual_batch_size)
            for callback in self.update_callbacks:
                callback(self)

        # Make sure none of the parameters have bad values
        for param in self.params:
            value = param.get_value(borrow=True)
            if not isfinite(value):
                raise Exception("NaN in " + param.name)
        self.second = True
Example #12
0
    def setup(self, dataset, cost, batch_size, num_batches=None,
              extra_costs=None, mode='sequential', obj_prereqs=None,
              cost_monitoring_args=None):

        if dataset is None:
            return
        if isinstance(dataset, Dataset):
            dataset = {'': dataset}
        else:
            assert isinstance(dataset, dict)
            assert all(isinstance(key, str) for key in dataset)
            assert all(isinstance(dataset[key], Dataset) for key in dataset)

        if extra_costs is None:
            costs = {}
        else:
            assert isinstance(extra_costs, (OrderedDict, dict))
            costs = extra_costs
        assert '' not in costs
        costs[''] = cost

        if cost_monitoring_args is None:
            cost_monitoring_args = {}

        model = self.model

        # Build a composite data_specs containing the specs for all costs,
        # then the specs of the model
        cost_names = sorted(costs.keys())
        spaces = []
        sources = []
        for c in cost_names:
            c_space, c_source = costs[c].get_data_specs(model)
            spaces.append(c_space)
            sources.append(c_source)

        # Ask the model for the data_specs needed
        m_space, m_source = model.get_monitoring_data_specs()
        spaces.append(m_space)
        sources.append(m_source)

        nested_space = CompositeSpace(spaces)
        nested_sources = tuple(sources)

        # Flatten this data_specs, so we build only one symbolic Theano
        # variable for each of the unique (space, source) pairs.
        mapping = DataSpecsMapping((nested_space, nested_sources))
        space_tuple = mapping.flatten(nested_space, return_tuple=True)
        source_tuple = mapping.flatten(nested_sources, return_tuple=True)
        ipt = tuple(space.make_theano_batch(name='monitor_%s' % source,
                                            batch_size=None)
                    for (space, source) in safe_zip(space_tuple, source_tuple))

        # Build a nested tuple from ipt, to dispatch the appropriate parts
        # of the ipt batch to each cost
        nested_ipt = mapping.nest(ipt)

        # custom_channels = {}
        # for i, cost_name in enumerate(cost_names):
        #     if cost_name == '':
        #         prefix = ''
        #     else:
        #         prefix = cost_name + '_'
        #     cost = costs[cost_name]
        #     cost_ipt = nested_ipt[i]
        #     raw_channels = cost.get_monitoring_channels(model, cost_ipt)
        #     channels = {}
        #     for name in raw_channels:
        #         # We need three things: the value itself (raw_channels[name]),
        #         # the input variables (cost_ipt), and the data_specs for
        #         # these input variables ((spaces[i], sources[i]))
        #         channels[prefix + name] = (raw_channels[name],
        #                                    cost_ipt,
        #                                    (spaces[i], sources[i]))
        #     custom_channels.update(channels)
        #
        # # Use the last inputs from nested_ipt for the model
        # model_channels = model.get_monitoring_channels(nested_ipt[-1])
        # channels = {}
        # for name in model_channels:
        #     # Note: some code used to consider that model_channels[name]
        #     # could be a a (channel, prereqs) pair, this is not supported.
        #     channels[name] = (model_channels[name],
        #                       nested_ipt[-1],
        #                       (spaces[-1], sources[-1]))
        # custom_channels.update(channels)

        if is_stochastic(mode):
            seed = [[2013, 2, 22]]
        else:
            seed = None

        for dataset_name in dataset:
            cur_dataset = dataset[dataset_name]
            self.add_dataset(dataset=cur_dataset,
                             mode=mode,
                             batch_size=batch_size,
                             num_batches=num_batches,
                             seed=seed)
            if dataset_name == '':
                dprefix = ''
            else:
                dprefix = dataset_name + '_'
            # These channel name 'objective' must not vary, since callbacks
            # that respond to the values in the monitor use the name to find
            # it.
            for i, cost_name in enumerate(cost_names):
                cost = costs[cost_name]
                cost_ipt = nested_ipt[i]
                cost_value_list = cost.expr(model, cost_ipt)
                cost_value = reduce(lambda x, y: x + y, cost_value_list)

                if cost_value is not None:
                    if cost_name == '':
                        name = dprefix + 'objective'
                        prereqs = obj_prereqs
                    else:
                        name = dprefix + cost_name
                        prereqs = None

                    cost.get_data_specs(model)[0].validate(cost_ipt)
                    self.add_channel(name=name,
                                     ipt=cost_ipt,
                                     val=cost_value,
                                     data_specs=cost.get_data_specs(model),
                                     dataset=cur_dataset,
                                     prereqs=prereqs)