def train(self, dataset): assert self.bSetup model = self.model batch_size = self.batch_size if self.topo: get_data = dataset.get_batch_topo else: get_data = dataset.get_batch_design rng = self.rng train_iteration_mode = "shuffled_sequential" if not is_stochastic(train_iteration_mode): rng = None iterator = dataset.iterator( mode=train_iteration_mode, batch_size=self.batch_size, targets=self.cost.supervised, num_batches=self.batches_per_iter, topo=self.topo, rng=rng, ) for data in iterator: if self.cost.supervised: args = data X, Y = data else: args = [data] X = data self.optimizer.minimize(*args) model.monitor.report_batch(X.shape[0])
def train(self, dataset): if not hasattr(self, "sgd_update"): raise Exception("train called without first calling setup") model = self.model batch_size = self.batch_size for param in self.params: value = param.get_value(borrow=True) if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name) self.first = False rng = self.rng if not is_stochastic(self.train_iteration_mode): rng = None iterator = dataset.iterator( mode=self.train_iteration_mode, batch_size=self.batch_size, targets=self.supervised, topo=self.topo, rng=rng ) if self.supervised: for (batch_in, batch_target) in iterator: self.sgd_update(batch_in, batch_target) actual_batch_size = batch_in.shape[0] self.monitor.report_batch(actual_batch_size) # print 'batches seen', self.monitor.get_batches_seen() for callback in self.update_callbacks: callback(self) else: for batch in iterator: self.sgd_update(batch) actual_batch_size = batch.shape[0] # iterator might return a smaller batch if dataset size # isn't divisible by batch_size self.monitor.report_batch(actual_batch_size) for callback in self.update_callbacks: callback(self)
def train(self, dataset): if not hasattr(self, 'sgd_update'): raise Exception("train called without first calling setup") model = self.model batch_size = self.batch_size for param in self.params: value = param.get_value(borrow=True) if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name) self.first = False rng = self.rng if not is_stochastic(self.train_iteration_mode): rng = None iterator = dataset.iterator(mode=self.train_iteration_mode, batch_size=self.batch_size, targets=self.supervised, topo=self.topo, rng=rng) if self.supervised: for (batch_in, batch_target) in iterator: self.sgd_update(batch_in, batch_target) actual_batch_size = batch_in.shape[0] self.monitor.report_batch(actual_batch_size) #print 'batches seen', self.monitor.get_batches_seen() for callback in self.update_callbacks: callback(self) else: for batch in iterator: self.sgd_update(batch) actual_batch_size = batch.shape[ 0] # iterator might return a smaller batch if dataset size # isn't divisible by batch_size self.monitor.report_batch(actual_batch_size) for callback in self.update_callbacks: callback(self)
def train(self, dataset): assert self.bSetup model = self.model batch_size = self.batch_size if self.topo: get_data = dataset.get_batch_topo else: get_data = dataset.get_batch_design rng = self.rng train_iteration_mode = 'shuffled_sequential' if not is_stochastic(train_iteration_mode): rng = None iterator = dataset.iterator(mode=train_iteration_mode, batch_size=self.batch_size, targets=self.cost.supervised, num_batches=self.batches_per_iter, topo=self.topo, rng=rng) for data in iterator: if self.cost.supervised: args = data X, Y = data else: args = [data] X = data self.optimizer.minimize(*args) model.monitor.report_batch(X.shape[0])
def train(self, dataset): if not hasattr(self, 'sgd_update'): raise Exception("train called without first calling setup") # Make sure none of the parameters have bad values for param in self.params: value = param.get_value(borrow=True) if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name) self.first = False rng = self.rng if not is_stochastic(self.train_iteration_mode): rng = None data_specs = self.cost.get_data_specs(self.model) # The iterator should be built from flat data specs, so it returns # flat, non-redundent tuples of data. mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) if len(space_tuple) == 0: # No data will be returned by the iterator, and it is impossible # to know the size of the actual batch. # It is not decided yet what the right thing to do should be. raise NotImplementedError( "Unable to train with SGD, because " "the cost does not actually use data from the data set. " "data_specs: %s" % str(data_specs)) flat_data_specs = (CompositeSpace(space_tuple), source_tuple) iterator = dataset.iterator(mode=self.train_iteration_mode, batch_size=self.batch_size, data_specs=flat_data_specs, return_tuple=True, rng=rng, num_batches=self.batches_per_iter) on_load_batch = self.on_load_batch for batch in iterator: for callback in on_load_batch: callback(mapping.nest(batch)) self.sgd_update(*batch) # iterator might return a smaller batch if dataset size # isn't divisible by batch_size # Note: if data_specs[0] is a NullSpace, there is no way to know # how many examples would actually have been in the batch, # since it was empty, so actual_batch_size would be reported as 0. actual_batch_size = flat_data_specs[0].np_batch_size(batch) self.monitor.report_batch(actual_batch_size) for callback in self.update_callbacks: callback(self) # Make sure none of the parameters have bad values for param in self.params: value = param.get_value(borrow=True) if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name)
def train(self, dataset): if not hasattr(self, 'sgd_update'): raise Exception("train called without first calling setup") # Make sure none of the parameters have bad values for param in self.params: value = param.get_value(borrow=True) if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name) self.first = False rng = self.rng if not is_stochastic(self.train_iteration_mode): rng = None data_specs = self.cost.get_data_specs(self.model) # The iterator should be built from flat data specs, so it returns # flat, non-redundent tuples of data. mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) if len(space_tuple) == 0: # No data will be returned by the iterator, and it is impossible # to know the size of the actual batch. # It is not decided yet what the right thing to do should be. raise NotImplementedError("Unable to train with SGD, because " "the cost does not actually use data from the data set. " "data_specs: %s" % str(data_specs)) flat_data_specs = (CompositeSpace(space_tuple), source_tuple) iterator = dataset.iterator(mode=self.train_iteration_mode, batch_size=self.batch_size, data_specs=flat_data_specs, return_tuple=True, rng = rng, num_batches = self.batches_per_iter) on_load_batch = self.on_load_batch for batch in iterator: for callback in on_load_batch: callback(mapping.nest(batch)) self.sgd_update(*batch) # iterator might return a smaller batch if dataset size # isn't divisible by batch_size # Note: if data_specs[0] is a NullSpace, there is no way to know # how many examples would actually have been in the batch, # since it was empty, so actual_batch_size would be reported as 0. actual_batch_size = flat_data_specs[0].np_batch_size(batch) self.monitor.report_batch(actual_batch_size) for callback in self.update_callbacks: callback(self) # Make sure none of the parameters have bad values for param in self.params: value = param.get_value(borrow=True) if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name)
def train(self, dataset): #import pdb #import theano #pdb.set_trace() #theano.printing.debug_print() #theano.printing.pydotprint() if not hasattr(self, 'sgd_update'): raise Exception("train called without first calling setup") model = self.model batch_size = self.batch_size # Make sure none of the parameters have bad values for param in self.params: value = param.get_value(borrow=True) #this is sometimes very slow. we could get a huge speedup if we could #avoid having to run this everytime. if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name) self.first = False rng = self.rng if not is_stochastic(self.train_iteration_mode): rng = None iterator = dataset.iterator(mode=self.train_iteration_mode, batch_size=self.batch_size, targets=self.supervised, topo=self.topo, rng = rng, num_batches = self.batches_per_iter) if self.topo: batch_idx = dataset.get_topo_batch_axis() else: batch_idx = 0 if self.supervised: ind = 0 for (batch_in, batch_target) in iterator: self.sgd_update(batch_in, batch_target) actual_batch_size = batch_in.shape[batch_idx] self.monitor.report_batch(actual_batch_size) for callback in self.update_callbacks: callback(self) else: for batch in iterator: self.sgd_update(batch) actual_batch_size = batch.shape[0] # iterator might return a smaller batch if dataset size # isn't divisible by batch_size self.monitor.report_batch(actual_batch_size) for callback in self.update_callbacks: callback(self) # Make sure none of the parameters have bad values #this part is also sometimes very slow. Here again, if we can find a way #to speed it up, the gain could be significant. for param in self.params: value = param.get_value(borrow=True) if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name)
def train(self, dataset): if not hasattr(self, "sgd_update"): raise Exception("train called without first calling setup") model = self.model batch_size = self.batch_size # Make sure none of the parameters have bad values for param in self.params: value = param.get_value(borrow=True) if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name) self.first = False rng = self.rng if not is_stochastic(self.train_iteration_mode): rng = None iterator = dataset.iterator( mode=self.train_iteration_mode, batch_size=self.batch_size, targets=self.supervised, topo=self.topo, rng=rng, num_batches=self.batches_per_iter, ) if self.topo: batch_idx = dataset.get_topo_batch_axis() else: batch_idx = 0 on_load_batch = self.on_load_batch if self.supervised: for (batch_in, batch_target) in iterator: for callback in on_load_batch: callback(batch_in, batch_target) self.sgd_update(batch_in, batch_target) actual_batch_size = batch_in.shape[batch_idx] self.monitor.report_batch(actual_batch_size) for callback in self.update_callbacks: callback(self) else: for batch in iterator: for callback in on_load_batch: callback(batch, None) self.sgd_update(batch) actual_batch_size = batch.shape[0] # iterator might return a smaller batch if dataset size # isn't divisible by batch_size self.monitor.report_batch(actual_batch_size) for callback in self.update_callbacks: callback(self) # Make sure none of the parameters have bad values for param in self.params: value = param.get_value(borrow=True) if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name)
def train(self, dataset): """ .. todo:: WRITEME """ assert self.bSetup model = self.model rng = self.rng train_iteration_mode = "shuffled_sequential" if not is_stochastic(train_iteration_mode): rng = None data_specs = self.cost.get_data_specs(self.model) # The iterator should be built from flat data specs, so it returns # flat, non-redundent tuples of data. mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) if len(space_tuple) == 0: # No data will be returned by the iterator, and it is impossible # to know the size of the actual batch. # It is not decided yet what the right thing to do should be. raise NotImplementedError( "Unable to train with BGD, because " "the cost does not actually use data from the data set. " "data_specs: %s" % str(data_specs) ) flat_data_specs = (CompositeSpace(space_tuple), source_tuple) iterator = dataset.iterator( mode=train_iteration_mode, batch_size=self.batch_size, num_batches=self.batches_per_iter, data_specs=flat_data_specs, return_tuple=True, rng=rng, ) mode = self.theano_function_mode for data in iterator: if "targets" in source_tuple and mode is not None and hasattr(mode, "record"): Y = data[source_tuple.index("targets")] stry = str(Y).replace("\n", " ") mode.record.handle_line("data Y " + stry + "\n") for on_load_batch in self.on_load_batch: on_load_batch(mapping.nest(data)) self.before_step(model) self.optimizer.minimize(*data) self.after_step(model) actual_batch_size = flat_data_specs[0].np_batch_size(data) model.monitor.report_batch(actual_batch_size)
def train(self, dataset): if not hasattr(self, 'sgd_update'): raise Exception("train called without first calling setup") model = self.model batch_size = self.batch_size # Make sure none of the parameters have bad values for param in self.params: value = param.get_value(borrow=True) if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name) self.first = False rng = self.rng if not is_stochastic(self.train_iteration_mode): rng = None iterator = dataset.iterator(mode=self.train_iteration_mode, batch_size=self.batch_size, targets=self.supervised, topo=self.topo, rng=rng, num_batches=self.batches_per_iter) if self.topo: batch_idx = dataset.get_topo_batch_axis() else: batch_idx = 0 on_load_batch = self.on_load_batch if self.supervised: for (batch_in, batch_target) in iterator: for callback in on_load_batch: callback(batch_in, batch_target) self.sgd_update(batch_in, batch_target) actual_batch_size = batch_in.shape[batch_idx] self.monitor.report_batch(actual_batch_size) for callback in self.update_callbacks: callback(self) else: for batch in iterator: for callback in on_load_batch: callback(batch, None) self.sgd_update(batch) actual_batch_size = batch.shape[ 0] # iterator might return a smaller batch if dataset size # isn't divisible by batch_size self.monitor.report_batch(actual_batch_size) for callback in self.update_callbacks: callback(self) # Make sure none of the parameters have bad values for param in self.params: value = param.get_value(borrow=True) if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name)
def train(self, dataset): """ .. todo:: WRITEME """ assert self.bSetup model = self.model rng = self.rng train_iteration_mode = 'shuffled_sequential' if not is_stochastic(train_iteration_mode): rng = None data_specs = self.cost.get_data_specs(self.model) # The iterator should be built from flat data specs, so it returns # flat, non-redundent tuples of data. mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) if len(space_tuple) == 0: # No data will be returned by the iterator, and it is impossible # to know the size of the actual batch. # It is not decided yet what the right thing to do should be. raise NotImplementedError( "Unable to train with BGD, because " "the cost does not actually use data from the data set. " "data_specs: %s" % str(data_specs)) flat_data_specs = (CompositeSpace(space_tuple), source_tuple) iterator = dataset.iterator(mode=train_iteration_mode, batch_size=self.batch_size, num_batches=self.batches_per_iter, data_specs=flat_data_specs, return_tuple=True, rng=rng) mode = self.theano_function_mode for data in iterator: if ('targets' in source_tuple and mode is not None and hasattr(mode, 'record')): Y = data[source_tuple.index('targets')] stry = str(Y).replace('\n', ' ') mode.record.handle_line('data Y ' + stry + '\n') for on_load_batch in self.on_load_batch: on_load_batch(mapping.nest(data)) self.before_step(model) self.optimizer.minimize(*data) self.after_step(model) actual_batch_size = flat_data_specs[0].np_batch_size(data) model.monitor.report_batch(actual_batch_size)
def train(self, dataset): if not hasattr(self, 'sgd_update'): raise Exception("train called without first calling setup") model = self.model batch_size = self.batch_size # Make sure none of the parameters have bad values for param in self.params: value = param.get_value(borrow=True) #this is sometimes very slow. we could get a huge speedup if we could #avoid having to run this everytime. if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name) self.first = False rng = self.rng if not is_stochastic(self.train_iteration_mode): rng = None iterator = dataset.iterator(mode=self.train_iteration_mode, batch_size=self.batch_size, targets=self.supervised, topo=self.topo, rng = rng, num_batches = self.batches_per_iter) if self.topo: batch_idx = dataset.get_topo_batch_axis() else: batch_idx = 0 if self.supervised: ind = 0 for (batch_in, batch_target) in iterator: self.sgd_update(batch_in, batch_target) actual_batch_size = batch_in.shape[batch_idx] self.monitor.report_batch(actual_batch_size) for callback in self.update_callbacks: callback(self) else: for batch in iterator: self.sgd_update(batch) actual_batch_size = batch.shape[0] # iterator might return a smaller batch if dataset size # isn't divisible by batch_size self.monitor.report_batch(actual_batch_size) for callback in self.update_callbacks: callback(self) # Make sure none of the parameters have bad values #this part is also sometimes very slow. Here again, if we can find a way #to speed it up, the gain could be significant. for param in self.params: value = param.get_value(borrow=True) if np.any(np.isnan(value)) or np.any(np.isinf(value)): raise Exception("NaN in " + param.name)
def train(self, dataset): assert self.bSetup model = self.model batch_size = self.batch_size if self.topo: get_data = dataset.get_batch_topo else: get_data = dataset.get_batch_design rng = self.rng train_iteration_mode = 'shuffled_sequential' if not is_stochastic(train_iteration_mode): rng = None iterator = dataset.iterator(mode=train_iteration_mode, batch_size=self.batch_size, targets=self.cost.supervised, num_batches=self.batches_per_iter, topo=self.topo, rng = rng) for data in iterator: if self.cost.supervised: args = data X, Y = data mode = self.theano_function_mode if mode is not None and hasattr(mode, 'record'): stry = str(Y).replace('\n',' ') mode.record.handle_line('data Y '+stry+'\n') for on_load_batch in self.on_load_batch: on_load_batch(X, Y) else: args = [ data ] X = data for on_load_batch in self.on_load_batch: on_load_batch(X, None) self.before_step(model) self.optimizer.minimize(*args) self.after_step(model) model.monitor.report_batch( X.shape[0] )
def setup(self, dataset, cost, batch_size, num_batches=None, extra_costs=None, mode='sequential', obj_prereqs=None, cost_monitoring_args=None): """ Sets up the monitor for a cost minimization problem. Adds channels defined by both the model and the cost for the specified dataset(s), as well as a channel called 'objective' defined by the costs' __call__ method. Parameters ---------- dataset : pylearn2.datasets.Dataset Dataset or dictionary mapping string names to Datasets. If \ string names are used, then for every dataset, each channel \ defined by the model or cost will be replicated with that \ dataset's name followed by an underscore as the prefix. For \ example, if your cost defines a channel called 'misclass', and \ datasets is {'train' : train_dataset, 'valid' : valid_dataset} \ you will get channels called 'train_misclass' and 'valid_misclass'. cost : pylearn2.costs.Cost The cost being optimized by training. The value of the cost will appear as the `objective` channel. Its `get_monitoring_channels` method will also be used to supply other channels. extra_costs : OrderedDict, optional A dictionary mapping channel names to Cost objects. Their value will appear as the specified channel name. They will also provide more monitoring channels via their `get_monitoring_channels` method. obj_prereqs : None, or list of functions Functions to pass as prerequisites to the `objective` channel. cost_monitoring_args : dict Dictionary of kwargs that will be passed to \ `cost.get_monitoring_channels()` (but not for the extra_costs). """ if dataset is None: return if isinstance(dataset, Dataset): dataset = {'': dataset} else: assert isinstance(dataset, dict) assert all(isinstance(key, str) for key in dataset) assert all(isinstance(dataset[key], Dataset) for key in dataset) if extra_costs is None: costs = {} else: costs = extra_costs assert '' not in costs costs[''] = cost if cost_monitoring_args is None: cost_monitoring_args = {} model = self.model # Build a composite data_specs containing the specs for all costs, # then the specs of the model cost_names = sorted(costs.keys()) spaces = [] sources = [] for c in cost_names: c_space, c_source = costs[c].get_data_specs(model) spaces.append(c_space) sources.append(c_source) # Ask the model for the data_specs needed m_space, m_source = model.get_monitoring_data_specs() spaces.append(m_space) sources.append(m_source) nested_space = CompositeSpace(spaces) nested_sources = tuple(sources) # Flatten this data_specs, so we build only one symbolic Theano # variable for each of the unique (space, source) pairs. mapping = DataSpecsMapping((nested_space, nested_sources)) space_tuple = mapping.flatten(nested_space, return_tuple=True) source_tuple = mapping.flatten(nested_sources, return_tuple=True) ipt = tuple( space.make_theano_batch(name='monitor_%s' % source, batch_size=None) for (space, source) in safe_zip(space_tuple, source_tuple)) # Build a nested tuple from ipt, to dispatch the appropriate parts # of the ipt batch to each cost nested_ipt = mapping.nest(ipt) custom_channels = {} for i, cost_name in enumerate(cost_names): if cost_name == '': prefix = '' else: prefix = cost_name + '_' cost = costs[cost_name] cost_ipt = nested_ipt[i] raw_channels = cost.get_monitoring_channels(model, cost_ipt) channels = {} for name in raw_channels: # We need three things: the value itself (raw_channels[name]), # the input variables (cost_ipt), and the data_specs for # these input variables ((spaces[i], sources[i])) channels[prefix + name] = (raw_channels[name], cost_ipt, (spaces[i], sources[i])) custom_channels.update(channels) # Use the last inputs from nested_ipt for the model model_channels = model.get_monitoring_channels(nested_ipt[-1]) channels = {} for name in model_channels: # Note: some code used to consider that model_channels[name] # could be a a (channel, prereqs) pair, this is not supported. channels[name] = (model_channels[name], nested_ipt[-1], (spaces[-1], sources[-1])) custom_channels.update(channels) if is_stochastic(mode): seed = [[2013, 02, 22]] else: seed = None for dataset_name in dataset: cur_dataset = dataset[dataset_name] self.add_dataset(dataset=cur_dataset, mode=mode, batch_size=batch_size, num_batches=num_batches, seed=seed) if dataset_name == '': dprefix = '' else: dprefix = dataset_name + '_' # These channel name 'objective' must not vary, since callbacks # that respond to the values in the monitor use the name to find # it. for i, cost_name in enumerate(cost_names): cost = costs[cost_name] cost_ipt = nested_ipt[i] cost_value = cost.expr(model, cost_ipt) if cost_value is not None: if cost_name == '': name = dprefix + 'objective' prereqs = obj_prereqs else: name = dprefix + cost_name prereqs = None cost.get_data_specs(model)[0].validate(cost_ipt) self.add_channel(name=name, ipt=cost_ipt, val=cost_value, data_specs=cost.get_data_specs(model), dataset=cur_dataset, prereqs=prereqs) for key in custom_channels: val, ipt, data_specs = custom_channels[key] data_specs[0].validate(ipt) self.add_channel(name=dprefix + key, ipt=ipt, val=val, data_specs=data_specs, dataset=cur_dataset)
def setup(self, dataset, cost, batch_size, num_batches=None, extra_costs=None, mode='sequential'): """ Sets up the monitor for a cost minimization problem. Adds channels defined by both the model and the cost for the specified dataset(s), as well as a channel called 'objective' defined by the costs' __call__ method. dataset: a Dataset or dictionary mapping string names to Datasets If string names are used, then for every dataset, each channel defined by the model or cost will be replicated with that dataset's name followed by an underscore as the prefix. For example, if your cost defines a channel called 'misclass', and datasets is {'train' : train_dataset, 'valid' : valid_dataset} you will get channels called 'train_misclass' and 'valid_misclass'. cost: a Cost """ if dataset is None: return if isinstance(dataset, Dataset): dataset = {'': dataset} else: assert isinstance(dataset, dict) assert all(isinstance(key, str) for key in dataset) assert all(isinstance(dataset[key], Dataset) for key in dataset) if extra_costs is None: costs = {} else: costs = extra_costs assert '' not in costs costs[''] = cost supervised = any(cost.supervised for cost in costs.values()) model = self.model X = model.get_input_space().make_theano_batch() X.name = 'monitor_X' if supervised: Y = model.get_output_space().make_theano_batch() Y.name = 'monitor_Y' ipt = (X, Y) else: Y = None ipt = X custom_channels = {} for cost_name in costs: if cost_name == '': prefix = '' else: prefix = cost_name + '_' cost = costs[cost_name] raw_channels = cost.get_monitoring_channels(model, X, Y) channels = {} for name in raw_channels: channels[prefix + name] = raw_channels[name] custom_channels.update(channels) model_channels = model.get_monitoring_channels(X, Y) custom_channels.update(model_channels) if is_stochastic(mode): seed = [[2013, 02, 22]] else: seed = None for dataset_name in dataset: cur_dataset = dataset[dataset_name] self.add_dataset(dataset=cur_dataset, mode=mode, batch_size=batch_size, num_batches=num_batches, seed=seed) if dataset_name == '': dprefix = '' else: dprefix = dataset_name + '_' # These channel name 'objective' must not vary, since callbacks that respond to the # values in the monitor use the name to find it. for cost_name in costs: cost = costs[cost_name] cost_value = cost(model, X, Y) if cost_value is not None: if cost_name == '': name = dprefix + 'objective' else: name = dprefix + cost_name self.add_channel(name=name, ipt=ipt, val=cost_value, dataset=cur_dataset) for key in custom_channels: self.add_channel(name=dprefix + key, ipt=ipt, val=custom_channels[key], dataset=cur_dataset)
def main(): parser = argparse.ArgumentParser(description='Pylearn2 lab.') parser.add_argument('-s', '--save', action='store_true', help='Save the resulting images') parser.add_argument( '-q', '--quit', action='store_true', help='Quit after plotting instead of dropping into IPython') parser.add_argument('directory', type=str, help='Which results directory to use') args = parser.parse_args() # OLD #config_file_path = '/home/jason/s/deep_learning/pylearn/pred_net.yaml' #train = yaml_parse.load_path(config_file_path) #train = serial.load_train_file(config_file_path) #result_prefix = '/home/jason/s/pylearn2/pylearn2/pred/results/' result_prefix = '/u/yosinski/s/galatea/fish/results/' result_dir = os.path.join(result_prefix, args.directory) print 'loading train object...' #train = serial.load_train_file(os.path.join(result_dir, 'pred_net.yaml')) train = serial.load_train_file(os.path.join(result_dir, 'model.yaml')) print 'loading saved model...' #model = serial.load(os.path.join(result_dir, 'pred_net.pkl')) model = serial.load(os.path.join(result_dir, 'model.pkl')) print 'done.' print 'model was trained on:' print model.dataset_yaml_src if train.algorithm.cost is not None: data_specs = train.algorithm.cost.get_data_specs(model) else: data_specs = train.model.get_default_cost().get_data_specs(train.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) flat_data_specs = (CompositeSpace(space_tuple), source_tuple) num_frames = model.num_frames num_batches = 100 batch_size = train.algorithm.batch_size if train.algorithm.batch_size else 20 * num_frames train_dataset = train.dataset valid_dataset = train.algorithm.monitoring_dataset['valid'] rng = train.algorithm.rng if not is_stochastic(train.algorithm.train_iteration_mode): rng = None train_iterator = train_dataset.iterator( mode=train.algorithm.train_iteration_mode, batch_size=batch_size, data_specs=flat_data_specs, return_tuple=True, rng=rng, num_batches=num_batches * 10) valid_iterator = valid_dataset.iterator( mode=train.algorithm.train_iteration_mode, batch_size=batch_size, data_specs=flat_data_specs, return_tuple=True, # No rng override num_batches=num_batches * 10) train_batches = [train_iterator.next() for ii in range(num_batches)] valid_batches = [valid_iterator.next() for ii in range(num_batches)] print 'got batches with shape:' for dat in train_batches[0]: print ' ', dat.shape ######################### # Plot costs ######################### # Plot costs over time ch_train_objective = model.monitor.channels['train_objective'] ch_valid_objective = model.monitor.channels['valid_objective'] x_vals = ch_train_objective.epoch_record x_label = 'epoch' plot(x_vals, ch_train_objective.val_record, 'b-') plot(x_vals, ch_valid_objective.val_record, 'r-') legend(('train', 'valid')) if args.save: savefig(os.path.join(result_dir, 'costs_lin.png')) savefig(os.path.join(result_dir, 'costs_lin.pdf')) if args.save: gca().set_yscale('log') savefig(os.path.join(result_dir, 'costs_log.png')) savefig(os.path.join(result_dir, 'costs_log.pdf')) gca().set_yscale('linear') ######################### # Compute some accuracies ######################### try: model.fns.feat_to_compout except: model.redo_theano() all_acc_id = [] all_xy_errs = [] print 'Training set:' print ' acc_id\tx_err\ty_err' for bb, batch in enumerate(train_batches): feat, ids, xy = batch idsN_floatX = array(ids.argmax(1), dtype=theano.config.floatX) acc_id = model.fns.wiskott_id_accuracy(feat, idsN_floatX) all_acc_id.append(acc_id) xy_errs = model.fns.wiskott_xy_errors(feat, xy[:, 0:2]) all_xy_errs.append(xy_errs) # Old numpy way #ids_hat,xy_hat = model.fns.feat_to_idxy(feat) #idx_true = np.where( ids == 1 )[1] #idx_hat = np.where(np.sign(ids_hat.T - ids_hat.max(1)).T + 1)[1] #n_correct += (idx_true == idx_hat).sum() #n_total += len(idx_true) print '%2d:\t%g,\t%g,\t%g' % (bb, acc_id, xy_errs[0], xy_errs[1]) ######################### # Embed ######################### if not args.quit: # Start shell ipshell() print 'done.'
def setup(self, dataset, cost, batch_size, num_batches = None, extra_costs=None, mode='sequential'): """ Sets up the monitor for a cost minimization problem. Adds channels defined by both the model and the cost for the specified dataset(s), as well as a channel called 'objective' defined by the costs' __call__ method. dataset: a Dataset or dictionary mapping string names to Datasets If string names are used, then for every dataset, each channel defined by the model or cost will be replicated with that dataset's name followed by an underscore as the prefix. For example, if your cost defines a channel called 'misclass', and datasets is {'train' : train_dataset, 'valid' : valid_dataset} you will get channels called 'train_misclass' and 'valid_misclass'. cost: a Cost """ if dataset is None: return if isinstance(dataset, Dataset): dataset = {'': dataset} else: assert isinstance(dataset, dict) assert all(isinstance(key, str) for key in dataset) assert all(isinstance(dataset[key], Dataset) for key in dataset) if extra_costs is None: costs = {} else: costs = extra_costs assert '' not in costs costs[''] = cost supervised = any(cost.supervised for cost in costs.values()) model = self.model X = model.get_input_space().make_theano_batch() X.name = 'monitor_X' if supervised: Y = model.get_output_space().make_theano_batch() Y.name = 'monitor_Y' ipt = (X, Y) else: Y = None ipt = X custom_channels = {} for cost_name in costs: if cost_name == '': prefix = '' else: prefix = cost_name + '_' cost = costs[cost_name] raw_channels = cost.get_monitoring_channels(model, X, Y) channels = {} for name in raw_channels: channels[prefix+name] = raw_channels[name] custom_channels.update(channels) model_channels = model.get_monitoring_channels(X, Y) custom_channels.update(model_channels) if is_stochastic(mode): seed = [[2013, 02, 22]] else: seed = None for dataset_name in dataset: cur_dataset = dataset[dataset_name] self.add_dataset(dataset=cur_dataset, mode=mode, batch_size=batch_size, num_batches=num_batches, seed=seed) if dataset_name == '': dprefix = '' else: dprefix = dataset_name + '_' # These channel name 'objective' must not vary, since callbacks that respond to the # values in the monitor use the name to find it. for cost_name in costs: cost = costs[cost_name] cost_value = cost(model, X, Y) if cost_value is not None: if cost_name == '': name = dprefix + 'objective' else: name = dprefix + cost_name self.add_channel(name=name, ipt=ipt, val=cost_value, dataset=cur_dataset) for key in custom_channels: self.add_channel(name=dprefix + key, ipt=ipt, val=custom_channels[key], dataset=cur_dataset)
def train(self, dataset): """ Runs one epoch of SGD training on the specified dataset. Parameters ---------- dataset : Dataset """ if not hasattr(self, 'sgd_update'): raise Exception("train called without first calling setup") # Make sure none of the parameters have bad values for param in self.params: value = param.get_value(borrow=True) if not isfinite(value): raise Exception("NaN in " + param.name) self.first = False rng = self.rng if not is_stochastic(self.train_iteration_mode): rng = None data_specs = self.cost.get_data_specs(self.model) # The iterator should be built from flat data specs, so it returns # flat, non-redundent tuples of data. mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) # print 'space tuple', type(space_tuple), space_tuple from pylearn2.space import VectorSpace ############################################### # # # CHANGINGS TO THE ORIGINAL ALGORITHM # # # ############################################### # we have 3 classes in dataset (active, inactive, middle), but only two softmax neurons # therefore VectorSpace has dim = 2 and an error will be raised when trying to convert # label to a vector of length 2. So we change the vector length for a while and convert # things manually. space_tuple = (space_tuple[0], VectorSpace(dim=3)) ############################# # # # END OF CHANGINGS # # # ############################# source_tuple = mapping.flatten(data_specs[1], return_tuple=True) if len(space_tuple) == 0: # No data will be returned by the iterator, and it is impossible # to know the size of the actual batch. # It is not decided yet what the right thing to do should be. raise NotImplementedError( "Unable to train with SGD, because " "the cost does not actually use data from the data set. " "data_specs: %s" % str(data_specs)) flat_data_specs = (CompositeSpace(space_tuple), source_tuple) iterator = dataset.iterator(mode=self.train_iteration_mode, batch_size=self.batch_size, data_specs=flat_data_specs, return_tuple=True, rng=rng, num_batches=self.batches_per_iter) # print 'flat data specs', type(flat_data_specs), flat_data_specs # flat data specs <type 'tuple'> # (CompositeSpace(Conv2DSpace(shape=(18, 3492), num_channels=1, axes=('c', 0, 1, 'b'), dtype=float64), # VectorSpace(dim=2, dtype=float64)), # 'features', 'targets')) on_load_batch = self.on_load_batch for batch in iterator: # batch is a list with two numpy arrays: [sample, label] # self.params is a list with theano.tensor.sharedvar.TensorSharedVariables # theano.tensor.sharedvar.TensorSharedVariable.get_value() returns numpy.array # you can set value with theano.tensor.sharedvar.TensorSharedVariable.set_value(np.array_object) # this being here might cause troubles as batch is a nasty thing right now for callback in on_load_batch: callback(*batch) ############################################### # # # CHANGINGS TO THE ORIGINAL ALGORITHM # # # ############################################### self.print_params("on entering iteration", t.cyan) # GOOD ADVICE: if something is very wrong check it the following map is valid # TODO: check this # active 1 [[ 0. 1. 0. ]] [[ 0. 1. ]] # nonactive 0 [[ 1. 0. 0. ]] [[ 1. 0. ]] # middle -1 [[ 0. 0. 1. ]] batch_1_on_load = batch[1].copy() # if label was '0' if (batch[1] == np.array((1, 0, 0))).all(): # print "example: nonactive" batch = (batch[0], np.reshape(np.array((1, 0)), (1, 2))) self.sgd_update(*batch) # if label was '1' elif (batch[1] == np.array((0, 1, 0))).all(): # print "example: active" batch = (batch[0], np.reshape(np.array((0, 1)), (1, 2))) self.sgd_update(*batch) # else we have to deal with unlabeled example else: # print "example: middle" parameters_on_load = self.get_parameters() ###################################### # # # RUNNING AS INACTIVE SAMPLE # # # ###################################### # print 'running as inactive' # setting label as inactive batch = (batch[0], np.reshape(np.array((1, 0)), (1, 2))) self.print_params("on entering inactive", t.blue) # updating the model self.sgd_update(*batch) self.print_params("after update inactive", t.green) # remember changing in parameters params_after_inactive = self.get_parameters() diff_inactive = self.get_difference(params_after_inactive, parameters_on_load) self.print_dict_of_params(diff_inactive, "difference") # bring back on load parameters self.restore_parameters(parameters_on_load) self.print_params('after restore', t.yellow) #################################### # # # RUNNING AS ACTIVE SAMPLE # # # #################################### # print 'running as active' # setting label as active batch = (batch[0], np.reshape(np.array((0, 1)), (1, 2))) self.print_params('on entering active', t.blue) # updating the model self.sgd_update(*batch) self.print_params('after update active', t.green) # remember changing in parameters params_after_active = self.get_parameters() diff_active = self.get_difference(params_after_active, parameters_on_load) self.print_dict_of_params(diff_active, "difference") # bring back on load parameters self.restore_parameters(parameters_on_load) self.print_params('after restore', t.yellow) ############################## # # # UPDATING THE MODEL # # # ############################## update_vector = self.calculate_update(diff_active, diff_inactive) self.print_dict_of_params(update_vector, "update vector") self.update_non_classification_parameters(update_vector) # end of if self.print_params('on leaving', t.red) # iterator might return a smaller batch if dataset size # isn't divisible by batch_size # Note: if data_specs[0] is a NullSpace, there is no way to know # how many examples would actually have been in the batch, # since it was empty, so actual_batch_size would be reported as 0. # OK, now lines below need batch in the previous size. So I just set the batch to what is used to be # before my wicked transformations. batch = (batch[0], batch_1_on_load) self.print_self_debug() ############################# # # # END OF CHANGINGS # # # ############################# actual_batch_size = flat_data_specs[0].np_batch_size(batch) self.monitor.report_batch(actual_batch_size) for callback in self.update_callbacks: callback(self) # Make sure none of the parameters have bad values for param in self.params: value = param.get_value(borrow=True) if not isfinite(value): raise Exception("NaN in " + param.name) self.second = True
def setup(self, dataset, cost, batch_size, num_batches=None, extra_costs=None, mode='sequential', obj_prereqs=None, cost_monitoring_args=None): """ Sets up the monitor for a cost minimization problem. Adds channels defined by both the model and the cost for the specified dataset(s), as well as a channel called 'objective' defined by the costs' __call__ method. Parameters ---------- dataset : pylearn2.datasets.Dataset Dataset or dictionary mapping string names to Datasets. If string names are used, then for every dataset, each channel defined by the model or cost will be replicated with that dataset's name followed by an underscore as the prefix. For example, if your cost defines a channel called 'misclass', and datasets is {'train' : train_dataset, 'valid' : valid_dataset}, you will get channels called 'train_misclass' and 'valid_misclass'. cost : pylearn2.costs.Cost The cost being optimized by training. The value of the cost will appear as the `objective` channel. Its `get_monitoring_channels` method will also be used to supply other channels. extra_costs : OrderedDict, optional A dictionary mapping channel names to Cost objects. Their value will appear as the specified channel name. They will also provide more monitoring channels via their `get_monitoring_channels` method. obj_prereqs : None, or list of functions Functions to pass as prerequisites to the `objective` channel. cost_monitoring_args : dict Dictionary of kwargs that will be passed to `cost.get_monitoring_channels()` (but not for the extra_costs). """ if dataset is None: return if isinstance(dataset, Dataset): dataset = {'': dataset} else: assert isinstance(dataset, dict) assert all(isinstance(key, str) for key in dataset) assert all(isinstance(dataset[key], Dataset) for key in dataset) if extra_costs is None: costs = {} else: assert isinstance(extra_costs, (OrderedDict, dict)) costs = extra_costs assert '' not in costs costs[''] = cost if cost_monitoring_args is None: cost_monitoring_args = {} model = self.model # Build a composite data_specs containing the specs for all costs, # then the specs of the model cost_names = sorted(costs.keys()) spaces = [] sources = [] for c in cost_names: c_space, c_source = costs[c].get_data_specs(model) spaces.append(c_space) sources.append(c_source) # Ask the model for the data_specs needed m_space, m_source = model.get_monitoring_data_specs() spaces.append(m_space) sources.append(m_source) nested_space = CompositeSpace(spaces) nested_sources = tuple(sources) # Flatten this data_specs, so we build only one symbolic Theano # variable for each of the unique (space, source) pairs. mapping = DataSpecsMapping((nested_space, nested_sources)) space_tuple = mapping.flatten(nested_space, return_tuple=True) source_tuple = mapping.flatten(nested_sources, return_tuple=True) ipt = tuple(space.make_theano_batch(name='monitor_%s' % source, batch_size=None) for (space, source) in safe_zip(space_tuple, source_tuple)) # Build a nested tuple from ipt, to dispatch the appropriate parts # of the ipt batch to each cost nested_ipt = mapping.nest(ipt) custom_channels = {} for i, cost_name in enumerate(cost_names): if cost_name == '': prefix = '' else: prefix = cost_name + '_' cost = costs[cost_name] cost_ipt = nested_ipt[i] raw_channels = cost.get_monitoring_channels(model, cost_ipt) channels = {} for name in raw_channels: # We need three things: the value itself (raw_channels[name]), # the input variables (cost_ipt), and the data_specs for # these input variables ((spaces[i], sources[i])) channels[prefix + name] = (raw_channels[name], cost_ipt, (spaces[i], sources[i])) custom_channels.update(channels) # Use the last inputs from nested_ipt for the model model_channels = model.get_monitoring_channels(nested_ipt[-1]) channels = {} for name in model_channels: # Note: some code used to consider that model_channels[name] # could be a a (channel, prereqs) pair, this is not supported. channels[name] = (model_channels[name], nested_ipt[-1], (spaces[-1], sources[-1])) custom_channels.update(channels) if is_stochastic(mode): seed = [[2013, 02, 22]] else: seed = None for dataset_name in dataset: cur_dataset = dataset[dataset_name] self.add_dataset(dataset=cur_dataset, mode=mode, batch_size=batch_size, num_batches=num_batches, seed=seed) if dataset_name == '': dprefix = '' else: dprefix = dataset_name + '_' # These channel name 'objective' must not vary, since callbacks # that respond to the values in the monitor use the name to find # it. for i, cost_name in enumerate(cost_names): cost = costs[cost_name] cost_ipt = nested_ipt[i] cost_value = cost.expr(model, cost_ipt) if cost_value is not None: if cost_name == '': name = dprefix + 'objective' prereqs = obj_prereqs else: name = dprefix + cost_name prereqs = None cost.get_data_specs(model)[0].validate(cost_ipt) self.add_channel(name=name, ipt=cost_ipt, val=cost_value, data_specs=cost.get_data_specs(model), dataset=cur_dataset, prereqs=prereqs) for key in custom_channels: val, ipt, data_specs = custom_channels[key] data_specs[0].validate(ipt) self.add_channel(name=dprefix + key, ipt=ipt, val=val, data_specs=data_specs, dataset=cur_dataset)
def setup(self, dataset, cost, batch_size, num_batches=None, extra_costs=None, mode='sequential', obj_prereqs=None, cost_monitoring_args=None): if dataset is None: return if isinstance(dataset, Dataset): dataset = {'': dataset} else: assert isinstance(dataset, dict) assert all(isinstance(key, str) for key in dataset) assert all(isinstance(dataset[key], Dataset) for key in dataset) if extra_costs is None: costs = {} else: assert isinstance(extra_costs, (OrderedDict, dict)) costs = extra_costs assert '' not in costs costs[''] = cost if cost_monitoring_args is None: cost_monitoring_args = {} model = self.model # Build a composite data_specs containing the specs for all costs, # then the specs of the model cost_names = sorted(costs.keys()) spaces = [] sources = [] for c in cost_names: c_space, c_source = costs[c].get_data_specs(model) spaces.append(c_space) sources.append(c_source) # Ask the model for the data_specs needed m_space, m_source = model.get_monitoring_data_specs() spaces.append(m_space) sources.append(m_source) nested_space = CompositeSpace(spaces) nested_sources = tuple(sources) # Flatten this data_specs, so we build only one symbolic Theano # variable for each of the unique (space, source) pairs. mapping = DataSpecsMapping((nested_space, nested_sources)) space_tuple = mapping.flatten(nested_space, return_tuple=True) source_tuple = mapping.flatten(nested_sources, return_tuple=True) ipt = tuple( space.make_theano_batch(name='monitor_%s' % source, batch_size=None) for (space, source) in safe_zip(space_tuple, source_tuple)) # Build a nested tuple from ipt, to dispatch the appropriate parts # of the ipt batch to each cost nested_ipt = mapping.nest(ipt) # custom_channels = {} # for i, cost_name in enumerate(cost_names): # if cost_name == '': # prefix = '' # else: # prefix = cost_name + '_' # cost = costs[cost_name] # cost_ipt = nested_ipt[i] # raw_channels = cost.get_monitoring_channels(model, cost_ipt) # channels = {} # for name in raw_channels: # # We need three things: the value itself (raw_channels[name]), # # the input variables (cost_ipt), and the data_specs for # # these input variables ((spaces[i], sources[i])) # channels[prefix + name] = (raw_channels[name], # cost_ipt, # (spaces[i], sources[i])) # custom_channels.update(channels) # # # Use the last inputs from nested_ipt for the model # model_channels = model.get_monitoring_channels(nested_ipt[-1]) # channels = {} # for name in model_channels: # # Note: some code used to consider that model_channels[name] # # could be a a (channel, prereqs) pair, this is not supported. # channels[name] = (model_channels[name], # nested_ipt[-1], # (spaces[-1], sources[-1])) # custom_channels.update(channels) if is_stochastic(mode): seed = [[2013, 2, 22]] else: seed = None for dataset_name in dataset: cur_dataset = dataset[dataset_name] self.add_dataset(dataset=cur_dataset, mode=mode, batch_size=batch_size, num_batches=num_batches, seed=seed) if dataset_name == '': dprefix = '' else: dprefix = dataset_name + '_' # These channel name 'objective' must not vary, since callbacks # that respond to the values in the monitor use the name to find # it. for i, cost_name in enumerate(cost_names): cost = costs[cost_name] cost_ipt = nested_ipt[i] cost_value_list = cost.expr(model, cost_ipt) cost_value = reduce(lambda x, y: x + y, cost_value_list) if cost_value is not None: if cost_name == '': name = dprefix + 'objective' prereqs = obj_prereqs else: name = dprefix + cost_name prereqs = None cost.get_data_specs(model)[0].validate(cost_ipt) self.add_channel(name=name, ipt=cost_ipt, val=cost_value, data_specs=cost.get_data_specs(model), dataset=cur_dataset, prereqs=prereqs)
def setup(self, dataset, cost, batch_size, num_batches=None, extra_costs=None, mode='sequential', obj_prereqs=None, cost_monitoring_args=None): if dataset is None: return if isinstance(dataset, Dataset): dataset = {'': dataset} else: assert isinstance(dataset, dict) assert all(isinstance(key, str) for key in dataset) assert all(isinstance(dataset[key], Dataset) for key in dataset) if extra_costs is None: costs = {} else: assert isinstance(extra_costs, (OrderedDict, dict)) costs = extra_costs assert '' not in costs costs[''] = cost if cost_monitoring_args is None: cost_monitoring_args = {} model = self.model # Build a composite data_specs containing the specs for all costs, # then the specs of the model cost_names = sorted(costs.keys()) spaces = [] sources = [] for c in cost_names: c_space, c_source = costs[c].get_data_specs(model) spaces.append(c_space) sources.append(c_source) # Ask the model for the data_specs needed m_space, m_source = model.get_monitoring_data_specs() spaces.append(m_space) sources.append(m_source) nested_space = CompositeSpace(spaces) nested_sources = tuple(sources) # Flatten this data_specs, so we build only one symbolic Theano # variable for each of the unique (space, source) pairs. mapping = DataSpecsMapping((nested_space, nested_sources)) space_tuple = mapping.flatten(nested_space, return_tuple=True) source_tuple = mapping.flatten(nested_sources, return_tuple=True) ipt = tuple(space.make_theano_batch(name='monitor_%s' % source, batch_size=None) for (space, source) in safe_zip(space_tuple, source_tuple)) # Build a nested tuple from ipt, to dispatch the appropriate parts # of the ipt batch to each cost nested_ipt = mapping.nest(ipt) # custom_channels = {} # for i, cost_name in enumerate(cost_names): # if cost_name == '': # prefix = '' # else: # prefix = cost_name + '_' # cost = costs[cost_name] # cost_ipt = nested_ipt[i] # raw_channels = cost.get_monitoring_channels(model, cost_ipt) # channels = {} # for name in raw_channels: # # We need three things: the value itself (raw_channels[name]), # # the input variables (cost_ipt), and the data_specs for # # these input variables ((spaces[i], sources[i])) # channels[prefix + name] = (raw_channels[name], # cost_ipt, # (spaces[i], sources[i])) # custom_channels.update(channels) # # # Use the last inputs from nested_ipt for the model # model_channels = model.get_monitoring_channels(nested_ipt[-1]) # channels = {} # for name in model_channels: # # Note: some code used to consider that model_channels[name] # # could be a a (channel, prereqs) pair, this is not supported. # channels[name] = (model_channels[name], # nested_ipt[-1], # (spaces[-1], sources[-1])) # custom_channels.update(channels) if is_stochastic(mode): seed = [[2013, 2, 22]] else: seed = None for dataset_name in dataset: cur_dataset = dataset[dataset_name] self.add_dataset(dataset=cur_dataset, mode=mode, batch_size=batch_size, num_batches=num_batches, seed=seed) if dataset_name == '': dprefix = '' else: dprefix = dataset_name + '_' # These channel name 'objective' must not vary, since callbacks # that respond to the values in the monitor use the name to find # it. for i, cost_name in enumerate(cost_names): cost = costs[cost_name] cost_ipt = nested_ipt[i] cost_value_list = cost.expr(model, cost_ipt) cost_value = reduce(lambda x, y: x + y, cost_value_list) if cost_value is not None: if cost_name == '': name = dprefix + 'objective' prereqs = obj_prereqs else: name = dprefix + cost_name prereqs = None cost.get_data_specs(model)[0].validate(cost_ipt) self.add_channel(name=name, ipt=cost_ipt, val=cost_value, data_specs=cost.get_data_specs(model), dataset=cur_dataset, prereqs=prereqs)
def main(): parser = argparse.ArgumentParser(description='Pylearn2 lab.') parser.add_argument('-s', '--save', action='store_true', help = 'Save the resulting images') parser.add_argument('-q', '--quit', action='store_true', help = 'Quit after plotting instead of dropping into IPython') parser.add_argument('directory', type = str, help = 'Which results directory to use') args = parser.parse_args() # OLD #config_file_path = '/home/jason/s/deep_learning/pylearn/pred_net.yaml' #train = yaml_parse.load_path(config_file_path) #train = serial.load_train_file(config_file_path) #result_prefix = '/home/jason/s/pylearn2/pylearn2/pred/results/' result_prefix = '/u/yosinski/s/galatea/fish/results/' result_dir = os.path.join(result_prefix, args.directory) print 'loading train object...' #train = serial.load_train_file(os.path.join(result_dir, 'pred_net.yaml')) train = serial.load_train_file(os.path.join(result_dir, 'model.yaml')) print 'loading saved model...' #model = serial.load(os.path.join(result_dir, 'pred_net.pkl')) model = serial.load(os.path.join(result_dir, 'model.pkl')) print 'done.' print 'model was trained on:' print model.dataset_yaml_src if train.algorithm.cost is not None: data_specs = train.algorithm.cost.get_data_specs(model) else: data_specs = train.model.get_default_cost().get_data_specs(train.model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) flat_data_specs = (CompositeSpace(space_tuple), source_tuple) num_frames = model.num_frames num_batches = 100 batch_size = train.algorithm.batch_size if train.algorithm.batch_size else 20*num_frames train_dataset = train.dataset valid_dataset = train.algorithm.monitoring_dataset['valid'] rng = train.algorithm.rng if not is_stochastic(train.algorithm.train_iteration_mode): rng = None train_iterator = train_dataset.iterator(mode = train.algorithm.train_iteration_mode, batch_size = batch_size, data_specs = flat_data_specs, return_tuple = True, rng=rng, num_batches = num_batches * 10) valid_iterator = valid_dataset.iterator(mode = train.algorithm.train_iteration_mode, batch_size = batch_size, data_specs = flat_data_specs, return_tuple = True, # No rng override num_batches = num_batches * 10) train_batches = [train_iterator.next() for ii in range(num_batches)] valid_batches = [valid_iterator.next() for ii in range(num_batches)] print 'got batches with shape:' for dat in train_batches[0]: print ' ', dat.shape ######################### # Plot costs ######################### # Plot costs over time ch_train_objective = model.monitor.channels['train_objective'] ch_valid_objective = model.monitor.channels['valid_objective'] x_vals = ch_train_objective.epoch_record x_label = 'epoch' plot(x_vals, ch_train_objective.val_record, 'b-') plot(x_vals, ch_valid_objective.val_record, 'r-') legend(('train', 'valid')) if args.save: savefig(os.path.join(result_dir, 'costs_lin.png')) savefig(os.path.join(result_dir, 'costs_lin.pdf')) if args.save: gca().set_yscale('log') savefig(os.path.join(result_dir, 'costs_log.png')) savefig(os.path.join(result_dir, 'costs_log.pdf')) gca().set_yscale('linear') ######################### # Compute some accuracies ######################### try: model.fns.feat_to_compout except: model.redo_theano() all_acc_id = [] all_xy_errs = [] print 'Training set:' print ' acc_id\tx_err\ty_err' for bb,batch in enumerate(train_batches): feat,ids,xy = batch idsN_floatX = array(ids.argmax(1), dtype=theano.config.floatX) acc_id = model.fns.wiskott_id_accuracy(feat, idsN_floatX) all_acc_id.append(acc_id) xy_errs = model.fns.wiskott_xy_errors(feat, xy[:,0:2]) all_xy_errs.append(xy_errs) # Old numpy way #ids_hat,xy_hat = model.fns.feat_to_idxy(feat) #idx_true = np.where( ids == 1 )[1] #idx_hat = np.where(np.sign(ids_hat.T - ids_hat.max(1)).T + 1)[1] #n_correct += (idx_true == idx_hat).sum() #n_total += len(idx_true) print '%2d:\t%g,\t%g,\t%g' % (bb, acc_id, xy_errs[0], xy_errs[1]) ######################### # Embed ######################### if not args.quit: # Start shell ipshell() print 'done.'
def train(self, dataset): assert self.bSetup model = self.model if self.batch_size is None: batch_size = model.force_batch_size else: batch_size = self.batch_size if hasattr(model, 'force_batch_size'): assert (model.force_batch_size <= 0 or batch_size == model.force_batch_size) assert self.batch_size % self.duplicate == 0 rng = self.rng train_iteration_mode = 'shuffled_sequential' if not is_stochastic(train_iteration_mode): rng = None iterator = dataset.iterator(mode=train_iteration_mode, batch_size=self.batch_size // self.duplicate, num_batches=self.batches_per_iter, targets=self.cost.supervised, topo=self.X.ndim != 2, rng=rng) accum_batches = [] if self.accumulate: warnings.warn( "InpaintAlg.train wastes time setting shared variables only to pull their value back out." ) for data in iterator: if self.cost.supervised: X, Y = data mode = self.theano_function_mode if mode is not None and hasattr(mode, 'record'): stry = str(Y).replace('\n', ' ') mode.record.handle_line('data Y ' + stry + '\n') if self.duplicate > 1: Y = np.concatenate([Y] * self.duplicate, axis=0) self.Y.set_value(Y) else: X = data if self.duplicate > 1: X = np.concatenate([X] * self.duplicate, axis=0) self.X.set_value(X) self.update_mask() if self.accumulate: accum_batches.append( [elem.get_value() for elem in self.inputs]) if len(accum_batches) == self.combine_batches: self.before_step(model) self.optimizer.minimize(*accum_batches) self.after_step(model) actual_batch_size = sum( [batch[0].shape[0] for batch in accum_batches]) model.monitor.report_batch(actual_batch_size) accum_batches = [] else: self.before_step(model) self.optimizer.minimize() self.after_step(model) actual_batch_size = X.shape[0] model.monitor.report_batch(actual_batch_size) assert len(accum_batches) == 0