def getFeatures(indexes): """ .. todo:: Write me """ if self._load_to_memory: sequences = [self.samples_sequences[i] for i in indexes] else: sequences = [self.node[i] for i in indexes] # Get random source word index for "ngram" source_i = [numpy.random.randint(self.frame_length/2 +1, len(s)-self.frame_length/2, 1)[0] for s in sequences] target_i = [min(abs(int(numpy.random.normal(s_i, self.frame_length/3.0))), len(s)-1) for s_i, s in safe_izip(source_i, sequences)] # Words mapped to integers greater than input max are set to 1 (unknown) X = [numpy.asarray([s[i]]) for i, s in safe_izip(source_i, sequences)] X[X>=self.X_labels] = numpy.asarray([1]) X = numpy.asarray(X) y = [numpy.asarray([s[i]]) for i, s in safe_izip(target_i, sequences)] y[y>=self.X_labels] = numpy.asarray([1]) y = numpy.asarray(y) # Store the targets generated by these indices. self.lastY = (y, indexes) #print X #print y return X
def iterator(self, mode=None, batch_size=None, num_batches=None, rng=None, data_specs=None, return_tuple=False): allowed_modes = ('sequential', 'random_slice', 'even_sequential', 'batchwise_shuffled_sequential', 'even_batchwise_shuffled_sequential') if mode is not None and mode not in allowed_modes: raise ValueError("Due to HDF5 limitations on advanced indexing, " + "the '" + mode + "' iteration mode is not " + "supported") if data_specs is None: data_specs = self._iter_data_specs space, source = data_specs sub_spaces, sub_sources = ( (space.components, source) if isinstance(space, CompositeSpace) else ((space,), (source,))) convert = [None for sp, src in safe_izip(sub_spaces, sub_sources)] mode = (self._iter_subset_class if mode is None else resolve_iterator_class(mode)) if batch_size is None: batch_size = getattr(self, '_iter_batch_size', None) if num_batches is None: num_batches = getattr(self, '_iter_num_batches', None) if rng is None and mode.stochastic: rng = self.rng return VariableImageDatasetIterator( dataset=self, subset_iterator=mode( self.num_examples, batch_size, num_batches, rng), data_specs=data_specs, return_tuple=return_tuple, convert=convert)
def next(self): next_index = self._subset_iterator.next() rvals = [] if hasattr(self._dataset, 'get'): raw_data = self._next(next_index) else: raw_data = self._fallback_next(next_index) for (space, source, data, fn) in safe_izip(self._space, self._source, #self._raw_data, raw_data, self._convert): rval = data if isinstance(space, SequenceDataSpace): # Add padding max_sequence_length = max(len(sample) for sample in data) batch = np.zeros((len(rval), max_sequence_length, space.dim), dtype=space.dtype) for i, sample in enumerate(rval): batch[i, :len(sample)] = sample rvals.append(np.transpose(batch, (1, 0, 2))) # Create mask rvals.append(self._create_mask(rval)) else: rvals.append(rval) # Reorder according to given data specs if not self._return_tuple and len(rval) == 1: rvals, = rvals return tuple(rvals)
def __call__(self, model, X, Y=None, **kwargs): def wrapped_layer_cost(layer, coef): try: return layer.get_weight_decay(coeff) except NotImplementedError: if coef == 0.: return 0. else: raise NotImplementedError( str(type(layer)) + " does not implement get_weight_decay.") layer_costs = [ wrapped_layer_cost(layer, coeff) for layer, coeff in safe_izip(model.layers, self.coeffs) ] assert T.scalar() != 0. # make sure theano semantics do what I want layer_costs = [cost for cost in layer_costs if cost != 0.] if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_weight_decay' return rval else: total_cost = reduce(lambda x, y: x + y, layer_costs) total_cost.name = 'MLP_WeightDecay' assert total_cost.ndim == 0 total_cost.name = 'weight_decay' return total_cost
def next(self): """ Retrieves the next batch of examples. Returns ------- next_batch : object An object representing a mini-batch of data, conforming to the space specified in the `data_specs` constructor argument to this iterator. Will be a tuple if more than one data source was specified or if the constructor parameter `return_tuple` was `True`. Raises ------ StopIteration When there are no more batches to return. """ next_index = self._subset_iterator.next() # TODO: handle fancy-index copies by allocating a buffer and # using np.take() rval = tuple( fn(data[next_index]) if fn else data[next_index] for data, fn in safe_izip(self._raw_data, self._convert)) if not self._return_tuple and len(rval) == 1: rval, = rval return rval
def next(self): """ Retrieves the next batch of examples. Returns ------- next_batch : object An object representing a mini-batch of data, conforming to the space specified in the `data_specs` constructor argument to this iterator. Will be a tuple if more than one data source was specified or if the constructor parameter `return_tuple` was `True`. Raises ------ StopIteration When there are no more batches to return. """ next_index = self._subset_iterator.next() # TODO: handle fancy-index copies by allocating a buffer and # using np.take() rval = tuple( fn(data[next_index]) if fn else data[next_index] for data, fn in safe_izip(self._raw_data, self._convert)) if not self._return_tuple and len(rval) == 1: rval, = rval return rval
def next(self): next_index = self._subset_iterator.next() rvals = [] for space, source, data, fn in safe_izip(self._space, self._source, self._raw_data, self._convert): rval = data[next_index] if isinstance(space, SequenceDataSpace): # Add padding max_sequence_length = max(len(sample) for sample in rval) batch = np.zeros( (len(rval), max_sequence_length) + data[0].shape[1:], dtype=data[0].dtype) for i, sample in enumerate(rval): batch[i, :len(sample)] = sample rval = np.transpose(batch, (1, 0, 2)) if fn: rval = fn(rval) rvals.append(rval) # Create mask if source in self.mask_needed: rvals.append(self._create_mask(rval)) else: if fn: rval = fn(rval) rvals.append(rval) # Reorder according to given data specs if not self._return_tuple and len(rval) == 1: rvals, = rvals return tuple(rvals)
def expr(self, model, data, **kwargs): """ .. todo:: WRITEME """ self.get_data_specs(model)[0].validate(data) layer_costs = [ layer.get_weight_decay(coeff) for layer, coeff in safe_izip(model.hidden_layers, self.coeffs) ] assert T.scalar() != 0. # make sure theano semantics do what I want layer_costs = [cost for cost in layer_costs if cost != 0.] if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_weight_decay' return rval else: total_cost = reduce(lambda x, y: x + y, layer_costs) total_cost.name = 'DBM_WeightDecay' assert total_cost.ndim == 0 total_cost.name = 'weight_decay' return total_cost
def __call__(self, model, X, Y = None, ** kwargs): def wrapped_layer_cost(layer, coef): try: return layer.get_weight_decay(coeff) except NotImplementedError: if coef==0.: return 0. else: raise NotImplementedError(str(type(layer))+" does not implement get_weight_decay.") layer_costs = [ wrapped_layer_cost(layer, coeff) for layer, coeff in safe_izip(model.layers, self.coeffs) ] assert T.scalar() != 0. # make sure theano semantics do what I want layer_costs = [ cost for cost in layer_costs if cost != 0.] if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_weight_decay' return rval else: total_cost = reduce(lambda x, y: x + y, layer_costs) total_cost.name = 'MLP_WeightDecay' assert total_cost.ndim == 0 total_cost.name = 'weight_decay' return total_cost
def _fallback_next(self, next_index): # TODO: handle fancy-index copies by allocating a buffer and # using np.take() return tuple( fn(data[next_index]) if fn else data[next_index] for data, fn in safe_izip(self._raw_data, self._convert) )
def get_monitoring_channels(self, X=None, Y=None): """ Note: X and Y may both be None, in the case when this is a layer of a bigger MLP. """ state = X rval = OrderedDict() for layer, scale in safe_izip(self.mlp.layers, self._params): state = self.scale(state, layer, scale) ch = layer.get_monitoring_channels() for key in ch: rval[layer.layer_name+'_'+key] = ch[key] state = layer.fprop(state) args = [state] if layer is self.mlp.layers[-1]: args.append(Y) ch = layer.get_monitoring_channels_from_state(*args) for key in ch: rval[layer.layer_name+'_'+key] = ch[key] for i in xrange(len(self._params)): rval['scale_input_to_' + self.mlp.layers[i].layer_name + '_min'] = self._params[i].min() rval['scale_input_to_' + self.mlp.layers[i].layer_name + '_min'] = self._params[i].mean() rval['scale_input_to_' + self.mlp.layers[i].layer_name + '_min'] = self._params[i].max() return rval
def next(self): # next numerical index next_file_index = self._subset_iterator.next() # associate numerical index with file from the dataset next_file = self._dataset.file_list[next_file_index][ 0] # !!! added line to iterate over different index set !!! # lookup file's position in the hdf5 array offset, nframes, key, target = self._dataset.file_index[next_file] thop = 1. # hardcoded and must match prepare_dataset.py!!! sup = np.arange(0, nframes - self._dataset.tframes, np.int(self._dataset.tframes / thop)) next_index = offset + sup spaces, sources = self._data_specs output = [] for data, fn, source, space in safe_izip(self._raw_data, self._convert, sources, spaces.components): if source == 'targets': # if fn: # output.append( fn( np.reshape(data[next_index[0], :], (1,-1)) ) ) # else: # output.append( np.reshape(data[next_index[0], :], (1,-1)) ) output.append(target) else: design_mat = [] for index in next_index: if 0: #space.dtype=='complex64': X = data[index:index + self._dataset.tframes, :] # return phase too else: X = np.abs(data[index:index + self._dataset.tframes, :]) design_mat.append(X.reshape((np.prod(X.shape), ))) design_mat = np.vstack(design_mat) if self._dataset.tframes > 1: # ideally we'd standardize in a preprocessing layer # (so that standardization is built-in to the model rather # than the dataset) but i haven't quite figured out how to do # this yet for images, due to a memory error associated with # a really big diagonal scaling matrix # (however, it works fine for vectors) design_mat = self._dataset.standardize(design_mat) if fn: output.append(fn(design_mat)) else: output.append(design_mat) output.append(next_file) rval = tuple(output) if not self._return_tuple and len(rval) == 1: rval, = rval return rval
def expr(self, model, data, ** kwargs): """ .. todo:: WRITEME """ self.get_data_specs(model)[0].validate(data) layer_costs = [layer.get_weight_decay(coeff) for layer, coeff in safe_izip(model.layers, self.coeffs)] assert T.scalar() != 0. # make sure theano semantics do what I want layer_costs = [cost for cost in layer_costs if cost != 0.] if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_weight_decay' return rval else: total_cost = reduce(lambda x, y: x + y, layer_costs) total_cost.name = 'RNN_WeightDecay' assert total_cost.ndim == 0 total_cost.name = 'weight_decay' return total_cost
def add_dataset(self, dataset, mode="sequential", batch_size=None, num_batches=None, seed=None): """ Determines the data used to calculate the values of each channel. Parameters ---------- dataset : object A `pylearn2.datasets.Dataset` object. mode : str or object, optional Iteration mode; see the docstring of the `iterator` method on `pylearn2.datasets.Dataset` for details. batch_size : int, optional The size of an individual batch. Optional if `mode` is 'sequential' and `num_batches` is specified (batch size will be calculated based on full dataset size). num_batches : int, optional The total number of batches. Unnecessary if `mode` is 'sequential' and `batch_size` is specified (number of batches will be calculated based on full dataset size). """ # The user can ommit using lists if only one dataset is set if not isinstance(dataset, list): dataset = [dataset] if not isinstance(mode, list): mode = [mode] if not isinstance(batch_size, list): batch_size = [batch_size] if not isinstance(num_batches, list): num_batches = [num_batches] if seed is None: seed = [None] * len(dataset) if not isinstance(seed, list): seed = [seed] if any([len(l) != len(dataset) for l in [mode, batch_size, seed]]): raise ValueError("make sure each dataset has its iteration " + "mode, batch size and number of batches.") for (d, m, b, n, sd) in safe_izip(dataset, mode, batch_size, num_batches, seed): try: it = d.iterator(mode=m, batch_size=b, num_batches=n, topo=self.topo, targets=self.require_label, rng=sd) except ValueError as exc: raise ValueError("invalid iteration parameters in " "Monitor.add_dataset: " + str(exc)) if it.stochastic: # must be a seed, not a random number generator # if it were a random number generator, different iterators using # it would update its state, so we would not get the same iterator # each time # Also, must not be None, because this makes the iterator pick # a seed based on the clock if not isinstance(sd, (list, tuple, int)): raise TypeError( "Monitor requires a seed (not a random number generator) when using stochastic iteration modes." ) else: assert sd is None # the iterator should catch this, but let's double-check if not d in self._datasets: self._datasets.append(d) self._iteration_mode.append(m) self._batch_size.append(b) self._num_batches.append(n) self._rng_seed.append(sd)
def next(self): next_index = self._subset_iterator.next() rvals = [] for space, source, data, fn in safe_izip(self._space, self._source, self._raw_data, self._convert): rval = data[next_index] if isinstance(space, SequenceDataSpace): # Add padding max_sequence_length = max(len(sample) for sample in rval) batch = np.zeros((len(rval), max_sequence_length) + data[0].shape[1:], dtype=data[0].dtype) for i, sample in enumerate(rval): batch[i, :len(sample)] = sample # Create mask if source in self.mask_needed: mask = self._create_mask(rval) rval = np.swapaxes(batch, 0, 1) if fn: rval = fn(rval) rvals.append(rval) if source in self.mask_needed: rvals.append(mask) else: if fn: rval = fn(rval) rvals.append(rval) # Reorder according to given data specs if not self._return_tuple and len(rval) == 1: rvals, = rvals return tuple(rvals)
def next(self): next_index = self._subset_iterator.next() rval = tuple( fn(batch) if fn else batch for batch, fn in safe_izip( self._dataset.get(self._source, next_index), self._convert)) if not self._return_tuple and len(rval) == 1: rval, = rval return rval
def getFeatures(indexes): """ .. todo:: Write me """ if self._load_to_memory: sequences = [self.samples_sequences[i] for i in indexes] else: sequences = [self.node[i] for i in indexes] # Get random start point for ngram # Get random source word index for "ngram" source_i = [numpy.random.randint(self.frame_length/2 +1, len(s)-self.frame_length/2, 1)[0] for s in sequences] target_i = [min(abs(int(numpy.random.normal(s_i, self.frame_length/3.0))), len(s)-1) for s_i, s in safe_izip(source_i, sequences)] preX = [s[i] for i, s in safe_izip(source_i, sequences)] X = [] def make_sequence(word): string = self._inv_words[word] #if len(string) < 1: #print "Word index", word, "Returns empty word" seq = map(lambda c: [self._char_labels.get(c, 0)], self._inv_words[word]) #if len(seq) < 1: # print "Word index", word, "Returns empty sequence", string seq.append([self._eow]) return numpy.asarray(seq) for word in preX: X.append(make_sequence(word)) X = numpy.asarray(X) y = [numpy.asarray([s[i]]) for i, s in safe_izip(target_i, sequences)] #y[y>=30000] = numpy.asarray([1]) y = numpy.asarray(y) bad_is = numpy.where(y >= 30000) y[bad_is] = numpy.asarray([1]) # Target Words mapped to integers greater than input max are set to # 1 (unknown) # Store the targets generated by these indices. self.lastY = (y, indexes) if self._use_words: self.lastPreX = preX return X
def next(self): """ Retrieves the next batch of examples. Returns ------- next_batch : object An object representing a mini-batch of data, conforming to the space specified in the `data_specs` constructor argument to this iterator. Will be a tuple if more than one data source was specified or if the constructor parameter `return_tuple` was `True`. Raises ------ StopIteration When there are no more batches to return. """ next_index = self._subset_iterator.next() next_index = self._dataset.support[ next_index ] # !!! added line to iterate over different index set !!! spaces, sources = self._data_specs output = [] for data, fn, source in safe_izip(self._raw_data, self._convert, sources): if source=='targets': if fn: output.append( fn(data[next_index, :]) ) else: output.append( data[next_index, :] ) else: design_mat = [] for index in next_index: X = np.abs(data[index:index+self._dataset.tframes, :]) design_mat.append( X.reshape((np.prod(X.shape),)) ) design_mat = np.vstack(design_mat) if self._dataset.tframes > 1: # ideally we'd standardize in a preprocessing layer # (so that standardization is built-in to the model rather # than the dataset) but i haven't quite figured out how to do # this yet for images, due to a memory error associated with # a really big diagonal scaling matrix # (however, it works fine for vectors) design_mat = self._dataset.standardize(design_mat) if fn: output.append( fn(design_mat) ) else: output.append( design_mat ) rval = tuple(output) if not self._return_tuple and len(rval) == 1: rval, = rval return rval
def next(self): # next numerical index next_file_index = self._subset_iterator.next() # associate numerical index with file from the dataset next_file = self._dataset.file_list[ next_file_index ][0] # !!! added line to iterate over different index set !!! # lookup file's position in the hdf5 array offset, nframes, key, target = self._dataset.file_index[next_file] thop = 1. # hardcoded and must match prepare_dataset.py!!! sup = np.arange(0,nframes-self._dataset.tframes,np.int(self._dataset.tframes/thop)) next_index = offset + sup spaces, sources = self._data_specs output = [] for data, fn, source, space in safe_izip(self._raw_data, self._convert, sources, spaces.components): if source=='targets': # if fn: # output.append( fn( np.reshape(data[next_index[0], :], (1,-1)) ) ) # else: # output.append( np.reshape(data[next_index[0], :], (1,-1)) ) output.append( target ) else: design_mat = [] for index in next_index: if 0:#space.dtype=='complex64': X = data[index:index+self._dataset.tframes, :] # return phase too else: X = np.abs(data[index:index+self._dataset.tframes, :]) design_mat.append( X.reshape((np.prod(X.shape),)) ) design_mat = np.vstack(design_mat) if self._dataset.tframes > 1: # ideally we'd standardize in a preprocessing layer # (so that standardization is built-in to the model rather # than the dataset) but i haven't quite figured out how to do # this yet for images, due to a memory error associated with # a really big diagonal scaling matrix # (however, it works fine for vectors) design_mat = self._dataset.standardize(design_mat) if fn: output.append( fn(design_mat) ) else: output.append( design_mat ) output.append(next_file) rval = tuple(output) if not self._return_tuple and len(rval) == 1: rval, = rval return rval
def next(self): next_index = self._subset_iterator.next() rval = tuple( fn(batch) if fn else batch for batch, fn in safe_izip(self._dataset.get(self._source, next_index), self._convert) ) if not self._return_tuple and len(rval) == 1: rval, = rval return rval
def get_objs(): n = 0. aves = [0. for model in models] m = 0 for X, Y in train.iterator(batch_size = 5000, mode='sequential', targets=True): objs = [func(X, Y) for func in funcs] n += 1. aves = [ave + (obj - ave) / n for ave, obj in safe_izip(aves, objs)] m += X.shape[0] if m != 10000: raise AssertionError(str(m)) return aves
def expr(self, model, data, **kwargs): """Returns a theano expression for the cost function. Parameters ---------- model : MLP data : tuple Should be a valid occupant of CompositeSpace(model.get_input_space(), model.get_output_space()) Returns ------- total_cost : theano.gof.Variable coeff * sum(abs(weights)) added up for each set of weights. """ assert T.scalar() != 0. # make sure theano semantics do what I want self.get_data_specs(model)[0].validate(data) if isinstance(self.coeffs, list): warnings.warn("Coefficients should be given as a dictionary " "with layer names as key. The support of " "coefficients as list would be deprecated " "from 03/06/2015") layer_costs = [ layer.get_l1_weight_decay(coeff) for layer, coeff in safe_izip(model.layers, self.coeffs) ] layer_costs = [cost for cost in layer_costs if cost != 0.] else: layer_costs = [] for layer in model.layers: layer_name = layer.layer_name if layer_name in self.coeffs: cost = layer.get_l1_weight_decay(self.coeffs[layer_name]) if cost != 0.: layer_costs.append(cost) if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_l1_penalty' return rval else: total_cost = reduce(operator.add, layer_costs) total_cost.name = 'MLP_L1Penalty' assert total_cost.ndim == 0 total_cost.name = 'l1_penalty' return total_cost
def expr(self, model, data, ** kwargs): """Returns a theano expression for the cost function. Parameters ---------- model : MLP data : tuple Should be a valid occupant of CompositeSpace(model.get_input_space(), model.get_output_space()) Returns ------- total_cost : theano.gof.Variable coeff * sum(abs(weights)) added up for each set of weights. """ assert T.scalar() != 0. # make sure theano semantics do what I want self.get_data_specs(model)[0].validate(data) if isinstance(self.coeffs, list): warnings.warn("Coefficients should be given as a dictionary " "with layer names as key. The support of " "coefficients as list would be deprecated " "from 03/06/2015") layer_costs = [layer.get_l1_weight_decay(coeff) for layer, coeff in safe_izip(model.layers, self.coeffs)] layer_costs = [cost for cost in layer_costs if cost != 0.] else: layer_costs = [] for layer in model.layers: layer_name = layer.layer_name if layer_name in self.coeffs: cost = layer.get_l1_weight_decay(self.coeffs[layer_name]) if cost != 0.: layer_costs.append(cost) if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_l1_penalty' return rval else: total_cost = reduce(operator.add, layer_costs) total_cost.name = 'MLP_L1Penalty' assert total_cost.ndim == 0 total_cost.name = 'l1_penalty' return total_cost
def expr(self, model, data, **kwargs): """Returns a theano expression for the cost function. Parameters ---------- model : MLP data : tuple Should be a valid occupant of CompositeSpace(model.get_input_space(), model.get_output_space()) Returns ------- total_cost : theano.gof.Variable coeff * sum(sqr(weights)) added up for each set of weights. """ self.get_data_specs(model)[0].validate(data) def wrapped_layer_cost(layer, coef): try: return layer.get_weight_decay(coeff) except NotImplementedError: if coef == 0.: return 0. else: reraise_as( NotImplementedError( str(type(layer)) + " does not implement get_weight_decay.")) layer_costs = [ wrapped_layer_cost(layer, coeff) for layer, coeff in safe_izip(model.layers, self.coeffs) ] assert T.scalar() != 0. # make sure theano semantics do what I want layer_costs = [cost for cost in layer_costs if cost != 0.] if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_weight_decay' return rval else: total_cost = reduce(operator.add, layer_costs) total_cost.name = 'MLP_WeightDecay' assert total_cost.ndim == 0 total_cost.name = 'weight_decay' return total_cost
def get(self, source, indexes): """ Returns required examples for the required data sources, e.g. the first ten features and targets pairs or the last five targets Parameters ---------- source : tuple of str Tuple of source names indexes : slice Examples to fetch """ assert type(indexes) is slice # Translate indexes to stay in the [start, stop] range indexes = slice(indexes.start + self.start, indexes.stop + self.start, indexes.step) # Make sure that requested sources are provided by the dataset self._validate_source(source) rval = [] # Axes for a single example single_axes = [a for a in self.axes if a != 'b'] for so in source: if so == 'features': images = self.X[indexes] shapes = self.s[indexes] space = self.data_specs[0].components[0] # If batch size has changed, reallocate a buffer if self.X_buffer is None or len(self.X_buffer) != len(images): self.X_buffer = space.get_origin_batch(len(images)) for i, (img, s) in enumerate(safe_izip(images, shapes)): # Transpose image in 'b01c' format to comply with # transformer interface b01c = img.reshape(s).transpose( [single_axes.index(a) for a in (0, 1, 'c')]) # Assign i'th example in the batch with the preprocessed # image self.X_buffer.transpose( [('b', 0, 1, 'c').index(a) for a in self.axes] )[i] = self.transformer(b01c) if self.rescale is not None: self.X_buffer /= self.rescale rval.append(self.X_buffer) elif so == 'targets': targets = self.y[indexes] space = self.data_specs[0].components[1] # If batch size has changed, reallocate a buffer if self.y_buffer is None or len(self.y_buffer) != len(targets): self.y_buffer = space.get_origin_batch(len(targets)) rval.append(self.y[indexes]) return tuple(rval)
def get(self, source, indexes): """ Returns required examples for the required data sources, e.g. the first ten features and targets pairs or the last five targets Parameters ---------- source : tuple of str Tuple of source names indexes : slice Examples to fetch """ assert type(indexes) is slice # Translate indexes to stay in the [start, stop] range indexes = slice(indexes.start + self.start, indexes.stop + self.start, indexes.step) # Make sure that requested sources are provided by the dataset self._validate_source(source) rval = [] # Axes for a single example single_axes = [a for a in self.axes if a != 'b'] for so in source: if so == 'features': images = self.X[indexes] shapes = self.s[indexes] space = self.data_specs[0].components[0] # If batch size has changed, reallocate a buffer if self.X_buffer is None or len(self.X_buffer) != len(images): self.X_buffer = space.get_origin_batch(len(images)) for i, (img, s) in enumerate(safe_izip(images, shapes)): # Transpose image in 'b01c' format to comply with # transformer interface b01c = img.reshape(s).transpose( [single_axes.index(a) for a in (0, 1, 'c')]) # Assign i'th example in the batch with the preprocessed # image self.X_buffer.transpose([ ('b', 0, 1, 'c').index(a) for a in self.axes ])[i] = self.transformer(b01c) if self.rescale is not None: self.X_buffer /= self.rescale rval.append(self.X_buffer) elif so == 'targets': targets = self.y[indexes] space = self.data_specs[0].components[1] # If batch size has changed, reallocate a buffer if self.y_buffer is None or len(self.y_buffer) != len(targets): self.y_buffer = space.get_origin_batch(len(targets)) rval.append(self.y[indexes]) return tuple(rval)
def get_objs(): n = 0. aves = [0. for model in models] m = 0 for X, Y in train.iterator(batch_size=5000, mode='sequential', targets=True): objs = [func(X, Y) for func in funcs] n += 1. aves = [ave + (obj - ave) / n for ave, obj in safe_izip(aves, objs)] m += X.shape[0] if m != 60000: raise AssertionError(str(m)) return aves
def __init__(self, dataset, data_specs, subset_iterator, return_tuple=False, convert=None): # Unpack the data specs into two tuples space, source = data_specs if not isinstance(source, tuple): source = (source, ) # Remove the requested mask from the data specs before calling # the parent constructor self._original_source = source mask_seen, sequence_seen = False, False self.mask_needed = [] retain = [] for i, (subspace, subsource) in enumerate(safe_izip(space.components, source)): if isinstance(subspace, SequenceMaskSpace): if not subsource.endswith('_mask') or \ subsource[:-5] not in source: raise ValueError("SequenceDatasetIterator received " "data_specs containing a " "SequenceMaskSpace with corresponding " "source %s, but the source should end " "with `_mask` in order to match it to the" "correct SequenceDataSpace") mask_seen = True self.mask_needed.append(subsource[:-5]) else: retain.append(i) if isinstance(subspace, SequenceDataSpace): sequence_seen = True if mask_seen != sequence_seen and i + 1 != len(retain): raise ValueError("SequenceDatasetIterator was asked to iterate " "over a sequence mask without data or vice versa") space = space.restrict(retain) source = tuple(source[i] for i in retain) super(SequenceDatasetIterator, self).__init__(dataset, subset_iterator, (space, source), return_tuple=return_tuple, convert=convert) if not isinstance(space, CompositeSpace): space = (space, ) else: space = space.components assert len(space) == len(source) self._original_space = space
def expr(self, model, data, ** kwargs): """Returns a theano expression for the cost function. Parameters ---------- model : MLP data : tuple Should be a valid occupant of CompositeSpace(model.get_input_space(), model.get_output_space()) Returns ------- total_cost : theano.gof.Variable coeff * sum(sqr(weights)) added up for each set of weights. """ self.get_data_specs(model)[0].validate(data) def wrapped_layer_cost(layer, coef): try: return layer.get_weight_decay(coeff) except NotImplementedError: if coef == 0.: return 0. else: reraise_as(NotImplementedError(str(type(layer)) + " does not implement get_weight_decay.")) layer_costs = [wrapped_layer_cost(layer, coeff) for layer, coeff in safe_izip(model.layers, self.coeffs)] assert T.scalar() != 0. # make sure theano semantics do what I want layer_costs = [cost for cost in layer_costs if cost != 0.] if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_weight_decay' return rval else: total_cost = reduce(operator.add, layer_costs) total_cost.name = 'MLP_WeightDecay' assert total_cost.ndim == 0 total_cost.name = 'weight_decay' return total_cost
def __init__(self, dataset, data_specs, subset_iterator, return_tuple=False, convert=None): # Unpack the data specs into two tuples space, source = data_specs if not isinstance(source, tuple): source = (source,) # Remove the requested mask from the data specs before calling # the parent constructor self._original_source = source mask_seen, sequence_seen = False, False self.mask_needed = [] retain = [] for i, (subspace, subsource) in enumerate(safe_izip(space.components, source)): if isinstance(subspace, SequenceMaskSpace): if not subsource.endswith('_mask') or \ subsource[:-5] not in source: raise ValueError("SequenceDatasetIterator received " "data_specs containing a " "SequenceMaskSpace with corresponding " "source %s, but the source should end " "with `_mask` in order to match it to the" "correct SequenceDataSpace") mask_seen = True self.mask_needed.append(subsource[:-5]) else: retain.append(i) if isinstance(subspace, SequenceDataSpace): sequence_seen = True if mask_seen != sequence_seen and i + 1 != len(retain): raise ValueError("SequenceDatasetIterator was asked to iterate " "over a sequence mask without data or vice versa") space = space.restrict(retain) source = tuple(source[i] for i in retain) super(SequenceDatasetIterator, self).__init__( dataset, subset_iterator, (space, source), return_tuple=return_tuple, convert=convert ) if not isinstance(space, CompositeSpace): space = (space,) else: space = space.components assert len(space) == len(source) self._original_space = space
def fprop(self, state_below, apply_dropout = False): if apply_dropout: warnings.warn("dropout should be implemented with fixed_var_descr to make sure it works with BGD, this is just a hack to get it working with SGD") theano_rng = MRG_RandomStreams(self.rng.randint(2**15)) state_below = self.apply_dropout(state=state_below, include_prob=self.dropout_input_include_prob, theano_rng=theano_rng) rval = self.layers[0].fprop(state_below) if apply_dropout: dropout = self.dropout_include_probs[0] rval = self.apply_dropout(state=rval, include_prob=dropout, theano_rng=theano_rng) for layer, dropout in safe_izip(self.layers[1:], self.dropout_include_probs[1:]): rval = layer.fprop(rval) if apply_dropout: rval = self.apply_dropout(state=rval, include_prob=dropout, theano_rng=theano_rng) return rval
def __call__(self, model, X, Y = None, ** kwargs): layer_costs = [ layer.get_weight_decay(coeff) for layer, coeff in safe_izip(model.hidden_layers, self.coeffs) ] assert T.scalar() != 0. # make sure theano semantics do what I want layer_costs = [ cost for cost in layer_costs if cost != 0.] if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_weight_decay' return rval else: total_cost = reduce(lambda x, y: x + y, layer_costs) total_cost.name = 'DBM_WeightDecay' assert total_cost.ndim == 0 total_cost.name = 'weight_decay' return total_cost
def get_monitoring_channels(self, model, X, Y = None, drop_mask = None): rval = OrderedDict() scratch = self(model, X, drop_mask, return_locals = True) history = scratch['history'] X_tilde = scratch['X_tilde'] for ii, state in enumerate(history): rval['obj_after_' + str(ii)] = self.cost_from_state(state, model, X, X_tilde) if ii > 0: prev_state = history[ii-1] V_hat = state['V_hat'] prev_V_hat = prev_state['V_hat'] assert V_hat is not prev_V_hat rval['max_pixel_diff[%d]'%ii] = abs(V_hat-prev_V_hat).max() h0 = state['H_hat'][0] prev_h0 = prev_state['H_hat'][0] assert h0 is not prev_h0 rval['max_h0_diff[%d]' % ii] = abs(h0[0] - prev_h0[0]).max() final_state = history[-1] layers = [ model.visible_layer ] + model.hidden_layers states = [ final_state['V_hat'] ] + final_state['H_hat'] for layer, state in safe_izip(layers, states): d = layer.get_monitoring_channels_from_state(state) for key in d: mod_key = 'final_denoise_' + layer.layer_name + '_' + key assert mod_key not in rval rval[mod_key] = d[key] return rval
def cost_from_state(self, state, dbm, X, X_tilde): V_hat = state['V_hat'] beta = dbm.visible_layer.beta model_term = beta * (X_tilde-V_hat) noise_term = self.noise_precision * (X_tilde-X) diff = model_term - noise_term assert diff.ndim == 4 smd_cost = T.sqr(diff).sum(axis=(1,2,3)).mean() assert smd_cost.ndim == 0 if not hasattr(self, 'both_directions'): self.both_directions = False total_cost = smd_cost if self.l1_act_targets is not None: for mf_state, targets, coeffs, eps, layer in safe_izip(state['H_hat'] , self.l1_act_targets, self.l1_act_coeffs, self.l1_act_eps, dbm.hidden_layers): assert not isinstance(targets, str) if not isinstance(targets, (list, tuple)): assert not isinstance(mf_state, (list, tuple)) mf_state = [ mf_state ] targets = [ targets ] coeffs = [ coeffs ] eps = [ eps ] total_cost += layer.get_l1_activation_cost( state = mf_state, targets = targets, coeffs = coeffs, eps = eps) # end for substates # end for layers # end if act penalty total_cost.name = 'total_cost(V_hat = %s)' % V_hat.name return total_cost
def cost_from_state(self, state, dbm, X, X_tilde): V_hat = state['V_hat'] beta = dbm.visible_layer.beta model_term = beta * (X_tilde - V_hat) noise_term = self.noise_precision * (X_tilde - X) diff = model_term - noise_term assert diff.ndim == 4 smd_cost = T.sqr(diff).sum(axis=(1, 2, 3)).mean() assert smd_cost.ndim == 0 if not hasattr(self, 'both_directions'): self.both_directions = False total_cost = smd_cost if self.l1_act_targets is not None: for mf_state, targets, coeffs, eps, layer in safe_izip( state['H_hat'], self.l1_act_targets, self.l1_act_coeffs, self.l1_act_eps, dbm.hidden_layers): assert not isinstance(targets, str) if not isinstance(targets, (list, tuple)): assert not isinstance(mf_state, (list, tuple)) mf_state = [mf_state] targets = [targets] coeffs = [coeffs] eps = [eps] total_cost += layer.get_l1_activation_cost(state=mf_state, targets=targets, coeffs=coeffs, eps=eps) # end for substates # end for layers # end if act penalty total_cost.name = 'total_cost(V_hat = %s)' % V_hat.name return total_cost
def expr(self, model, data, **kwargs): """Returns a theano expression for the cost function. Parameters ---------- model : MLP data : tuple Should be a valid occupant of CompositeSpace(model.get_input_space(), model.get_output_space()) Returns ------- total_cost : theano.gof.Variable coeff * sum(abs(weights)) added up for each set of weights. """ self.get_data_specs(model)[0].validate(data) layer_costs = [ layer.get_l1_weight_decay(coeff) for layer, coeff in safe_izip(model.layers, self.coeffs) ] assert T.scalar() != 0. # make sure theano semantics do what I want layer_costs = [cost for cost in layer_costs if cost != 0.] if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_l1_penalty' return rval else: total_cost = reduce(operator.add, layer_costs) total_cost.name = 'MLP_L1Penalty' assert total_cost.ndim == 0 total_cost.name = 'l1_penalty' return total_cost
def iterator(self, mode=None, batch_size=None, num_batches=None, rng=None, data_specs=None, return_tuple=False): allowed_modes = ('sequential', 'random_slice', 'even_sequential', 'batchwise_shuffled_sequential', 'even_batchwise_shuffled_sequential') if mode is not None and mode not in allowed_modes: raise ValueError("Due to HDF5 limitations on advanced indexing, " + "the '" + mode + "' iteration mode is not " + "supported") if data_specs is None: data_specs = self._iter_data_specs space, source = data_specs sub_spaces, sub_sources = ((space.components, source) if isinstance( space, CompositeSpace) else ((space, ), (source, ))) convert = [None for sp, src in safe_izip(sub_spaces, sub_sources)] mode = (self._iter_subset_class if mode is None else resolve_iterator_class(mode)) if batch_size is None: batch_size = getattr(self, '_iter_batch_size', None) if num_batches is None: num_batches = getattr(self, '_iter_num_batches', None) if rng is None and mode.stochastic: rng = self.rng return VariableImageDatasetIterator(dataset=self, subset_iterator=mode( self.num_examples, batch_size, num_batches, rng), data_specs=data_specs, return_tuple=return_tuple, convert=convert)
def expr(self, model, data, ** kwargs): """Returns a theano expression for the cost function. Parameters ---------- model : MLP data : tuple Should be a valid occupant of CompositeSpace(model.get_input_space(), model.get_output_space()) Returns ------- total_cost : theano.gof.Variable coeff * sum(abs(weights)) added up for each set of weights. """ self.get_data_specs(model)[0].validate(data) layer_costs = [layer.get_l1_weight_decay(coeff) for layer, coeff in safe_izip(model.layers, self.coeffs)] assert T.scalar() != 0. # make sure theano semantics do what I want layer_costs = [cost for cost in layer_costs if cost != 0.] if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_l1_penalty' return rval else: total_cost = reduce(operator.add, layer_costs) total_cost.name = 'MLP_L1Penalty' assert total_cost.ndim == 0 total_cost.name = 'l1_penalty' return total_cost
def get_monitoring_channels(self, model, X, Y=None, drop_mask=None): rval = OrderedDict() scratch = self(model, X, drop_mask, return_locals=True) history = scratch['history'] X_tilde = scratch['X_tilde'] for ii, state in enumerate(history): rval['obj_after_' + str(ii)] = self.cost_from_state( state, model, X, X_tilde) if ii > 0: prev_state = history[ii - 1] V_hat = state['V_hat'] prev_V_hat = prev_state['V_hat'] assert V_hat is not prev_V_hat rval['max_pixel_diff[%d]' % ii] = abs(V_hat - prev_V_hat).max() h0 = state['H_hat'][0] prev_h0 = prev_state['H_hat'][0] assert h0 is not prev_h0 rval['max_h0_diff[%d]' % ii] = abs(h0[0] - prev_h0[0]).max() final_state = history[-1] layers = [model.visible_layer] + model.hidden_layers states = [final_state['V_hat']] + final_state['H_hat'] for layer, state in safe_izip(layers, states): d = layer.get_monitoring_channels_from_state(state) for key in d: mod_key = 'final_denoise_' + layer.layer_name + '_' + key assert mod_key not in rval rval[mod_key] = d[key] return rval
def cost_from_states(self, state, new_state, dbm, X, Y, drop_mask, drop_mask_Y, new_drop_mask, new_drop_mask_Y, return_locals=False): """ .. todo:: WRITEME """ if not self.supervised: assert drop_mask_Y is None assert new_drop_mask_Y is None if self.supervised: assert drop_mask_Y is not None if self.both_directions: assert new_drop_mask_Y is not None assert Y is not None V_hat_unmasked = state['V_hat_unmasked'] assert V_hat_unmasked.ndim == X.ndim if not hasattr(self, 'use_sum'): self.use_sum = False inpaint_cost = self.get_inpaint_cost(dbm, X, V_hat_unmasked, drop_mask, state, Y, drop_mask_Y) if not hasattr(self, 'both_directions'): self.both_directions = False assert self.both_directions == (new_state is not None) if new_state is not None: new_V_hat_unmasked = new_state['V_hat_unmasked'] new_inpaint_cost = dbm.visible_layer.recons_cost( X, new_V_hat_unmasked, new_drop_mask) if self.supervised: new_Y_hat_unmasked = new_state['Y_hat_unmasked'] scale = None raise NotImplementedError( "This branch appears to be broken, needs to define scale.") new_inpaint_cost = new_inpaint_cost + \ dbm.hidden_layers[-1].recons_cost(Y, new_Y_hat_unmasked, new_drop_mask_Y, scale) # end if include_Y inpaint_cost = 0.5 * inpaint_cost + 0.5 * new_inpaint_cost # end if both directions total_cost = inpaint_cost if not hasattr(self, 'range_rewards'): self.range_rewards = None if self.range_rewards is not None: for layer, mf_state, coeffs in safe_izip(dbm.hidden_layers, state['H_hat'], self.range_rewards): try: layer_cost = layer.get_range_rewards(mf_state, coeffs) except NotImplementedError: if coeffs == 0.: layer_cost = 0. else: raise if layer_cost != 0.: total_cost += layer_cost if not hasattr(self, 'stdev_rewards'): self.stdev_rewards = None if self.stdev_rewards is not None: assert False # not monitored yet for layer, mf_state, coeffs in safe_izip(dbm.hidden_layers, state['H_hat'], self.stdev_rewards): try: layer_cost = layer.get_stdev_rewards(mf_state, coeffs) except NotImplementedError: if coeffs == 0.: layer_cost = 0. else: raise if layer_cost != 0.: total_cost += layer_cost l1_act_cost = None if self.l1_act_targets is not None: l1_act_cost = 0. if self.l1_act_eps is None: self.l1_act_eps = [None] * len(self.l1_act_targets) for layer, mf_state, targets, coeffs, eps in \ safe_izip(dbm.hidden_layers, state['H_hat'] , self.l1_act_targets, self.l1_act_coeffs, self.l1_act_eps): assert not isinstance(targets, str) try: layer_cost = layer.get_l1_act_cost(mf_state, targets, coeffs, eps) except NotImplementedError: if coeffs == 0.: layer_cost = 0. else: raise if layer_cost != 0.: l1_act_cost += layer_cost # end for substates # end for layers total_cost += l1_act_cost # end if act penalty if not hasattr(self, 'hid_presynaptic_cost'): self.hid_presynaptic_cost = None if self.hid_presynaptic_cost is not None: assert False # not monitored yet for c, s, in safe_izip(self.hid_presynaptic_cost, state['H_hat']): if c == 0.: continue s = s[1] assert hasattr(s, 'owner') owner = s.owner assert owner is not None op = owner.op if not hasattr(op, 'scalar_op'): raise ValueError( "Expected V_hat_unmasked to be generated by an Elemwise op, got " + str(op) + " of type " + str(type(op))) assert isinstance(op.scalar_op, T.nnet.sigm.ScalarSigmoid) z, = owner.inputs total_cost += c * T.sqr(z).mean() if not hasattr(self, 'reweighted_act_targets'): self.reweighted_act_targets = None reweighted_act_cost = None if self.reweighted_act_targets is not None: reweighted_act_cost = 0. warnings.warn( "reweighted_act_cost is hardcoded for sigmoid layers and doesn't check that this is " "what we get.") for c, t, s in safe_izip(self.reweighted_act_coeffs, self.reweighted_act_targets, state['H_hat']): if c == 0: continue s, _ = s m = s.mean(axis=0) d = T.sqr(m - t) weight = 1. / (1e-7 + s * (1 - s)) reweighted_act_cost += c * (weight * d).mean() total_cost += reweighted_act_cost total_cost.name = 'total_cost(V_hat_unmasked = %s)' % V_hat_unmasked.name if return_locals: return total_cost, locals() return total_cost
def __call__(self): """ Runs the model on the monitoring dataset in order to add one data point to each of the channels. """ # If the channels have changed at all, we need to recompile the theano # functions used to compute them if self._dirty: self.redo_theano() model = self.model datasets = self._datasets # Set all channels' val_shared to 0 self.begin_record_entry() for d, i, b, n, a, sd in safe_izip(datasets, self._iteration_mode, self._batch_size, self._num_batches, self.accum, self._rng_seed): if isinstance(d, basestring): d = yaml_parse.load(d) raise NotImplementedError() # need to put d back into self._datasets myiterator = d.iterator(mode=i, batch_size=b, num_batches=n, topo=self.topo, targets=self.require_label, rng=sd) for X in myiterator: if self.require_label: X, y = X self.run_prereqs(X,y,d) a(X, y) else: self.run_prereqs(X, None, d) a(X) # end for X # end for d log.info("Monitoring step:") log.info("\tEpochs seen: %d" % self._epochs_seen) log.info("\tBatches seen: %d" % self._num_batches_seen) log.info("\tExamples seen: %d" % self._examples_seen) t = time.time() - self.t0 for channel_name in sorted(self.channels.keys(), key=number_aware_alphabetical_key): channel = self.channels[channel_name] channel.time_record.append(t) channel.batch_record.append(self._num_batches_seen) channel.example_record.append(self._examples_seen) channel.epoch_record.append(self._epochs_seen) val = channel.val_shared.get_value() channel.val_record.append(val) # TODO: use logging infrastructure so that user can configure # formatting if abs(val) < 1e4: val_str = str(val) else: val_str = '%.3e' % val log.info("\t%s: %s" % (channel_name, val_str))
def redo_theano(self): """ Recompiles Theano functions used by this monitor. This is needed so that if new channels are added, Theano's optimizations make sure (to the extent that they can) that the new channels and old channels don't have any redundant calculations. It is also needed to regenerate Theano functions after pickling and unpickling, since Theano functions should not be pickled. """ self._dirty = False init_names = dir(self) self.prereqs = OrderedDict() for channel in self.channels.values(): if channel.prereqs is not None: dataset = channel.dataset if dataset not in self.prereqs: self.prereqs[dataset] = [] prereqs = self.prereqs[dataset] for prereq in channel.prereqs: if prereq not in prereqs: prereqs.append(prereq) updates = OrderedDict() for channel in self.channels.values(): updates[channel.val_shared] = np.cast[config.floatX](0.0) with log_timing(log, "compiling begin_record_entry"): self.begin_record_entry = function(inputs=[], updates=updates, mode=self.theano_function_mode, name = 'Monitor.begin_record_entry') updates = OrderedDict() givens = OrderedDict() #Get the appropriate kind of theano variable to represent the data the model #acts on X = self.model.get_input_space().make_theano_batch(name = "monitoring_X") if config.compute_test_value != 'off': m = self.model.get_test_batch_size() test_value = self.model.get_input_space().get_origin_batch(m) X.tag.test_value = np.cast[X.type.dtype](test_value) if self.require_label: Y = self.model.get_output_space().make_theano_batch(name = "monitoring_Y") log.info('Monitored channels: ') for key in sorted(self.channels.keys()): mode = self.theano_function_mode if mode is not None and hasattr(mode, 'record'): mode.record.handle_line('compiling monitor including channel '+key+'\n') log.info('\t%s' % key) it = [d.iterator(mode=i, num_batches=n, batch_size=b, topo=self.topo) \ for d, i, n, b in safe_izip(self._datasets, self._iteration_mode, self._num_batches, self._batch_size)] num_examples = [np.cast[config.floatX](float(i.num_examples)) for i in it] givens = [OrderedDict() for d in self._datasets] updates = [OrderedDict() for d in self._datasets] for channel in self.channels.values(): index = self._datasets.index(channel.dataset) d = self._datasets[index] g = givens[index] n = num_examples[index] u = updates[index] if isinstance(channel.graph_input, (list, tuple)): g[channel.graph_input[0]] = X g[channel.graph_input[1]] = Y else: g[channel.graph_input] = X if n == 0: raise ValueError("Iterating over 0 examples results in divide by 0") if self.topo: batch_index = d.get_topo_batch_axis() else: batch_index = 0 val = channel.val * T.cast(X.shape[batch_index], config.floatX) / n u[channel.val_shared] = channel.val_shared + val with log_timing(log, "Compiling accum"): # Check type of update expressions for up in updates: for key in up: if key.dtype != up[key].dtype: raise TypeError('Monitoring channel shared variable ' \ + key.name + ' has dtype ' + key.dtype + \ ' but is driven by an expression with type ' + \ up[key].dtype) self.accum = [] for idx, packed in enumerate(safe_izip(givens, updates)): g, u = packed mode = self.theano_function_mode if mode is not None and hasattr(mode, 'record'): for elem in g: mode.record.handle_line('g key '+var_descriptor(elem)+'\n') mode.record.handle_line('g val '+var_descriptor(g[elem])+'\n') for elem in u: mode.record.handle_line('u key '+var_descriptor(elem)+'\n') mode.record.handle_line('u val '+var_descriptor(u[elem])+'\n') function_name = 'Monitor.accum[%d]' % idx if self.require_label: if mode is not None and hasattr(mode, 'record'): mode.record.handle_line('compiling supervised accum\n') # Some channels may not depend on the data, ie, they might just monitor the model # parameters, or some shared variable updated by the training algorithm, so we # need to ignore the unused input error self.accum.append(function([X, Y], givens=g, updates=u, mode=self.theano_function_mode, name=function_name)) else: if mode is not None and hasattr(mode, 'record'): mode.record.handle_line('compiling unsupervised accum\n') self.accum.append(function([X], givens=g, updates=u, mode=self.theano_function_mode, name=function_name)) for a in self.accum: if mode is not None and hasattr(mode, 'record'): for elem in a.maker.fgraph.outputs: mode.record.handle_line('accum output '+var_descriptor(elem)+'\n') log.info("graph size: %d" % len(a.maker.fgraph.toposort())) final_names = dir(self) self.register_names_to_del([name for name in final_names if name not in init_names])
def add_updates(old, new): if isinstance(old, (list, tuple)): for old_elem, new_elem in safe_izip(old, new): add_updates(old_elem, new_elem) else: rval[old] = new
def mlp_pred(non_linearity): Z = [T.dot(X, W) for W in model.W1] H = map(non_linearity, Z) Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)] pred = sum(Z) return pred
def _next(self, next_index): return tuple( fn(batch) if fn else batch for batch, fn in safe_izip( self._dataset.get(self._source, next_index), self._convert))
def redo_theano(self): """ Recompiles Theano functions used by this monitor. This is called any time we need to evaluate the channels and the channel definitions have changed since last we called it, or if the theano functions are unavailable for any other reason (first time they are needed after construction or deserialization, etc.) All channels are compiled as part of the same theano function so that the theano optimizations can eliminate subexpressions that are shared between multiple channels. """ self._dirty = False # Recompute the data specs, since the channels may have changed. self._build_data_specs() init_names = dir(self) self.prereqs = OrderedDict() for channel in self.channels.values(): if channel.prereqs is not None: dataset = channel.dataset if dataset not in self.prereqs: self.prereqs[dataset] = [] prereqs = self.prereqs[dataset] for prereq in channel.prereqs: if prereq not in prereqs: prereqs.append(prereq) updates = OrderedDict() for channel in self.channels.values(): updates[channel.val_shared] = np.cast[config.floatX](0.0) with log_timing(log, "compiling begin_record_entry"): self.begin_record_entry = function( inputs=[], updates=updates, mode=self.theano_function_mode, name='Monitor.begin_record_entry') updates = OrderedDict() givens = OrderedDict() # Get the appropriate kind of theano variable to represent the data # the model acts on batch_names = ['monitoring_%s' % s for s in self._flat_data_specs[1]] theano_args = self._flat_data_specs[0].make_theano_batch(batch_names) # Get a symbolic expression of the batch size # We do it here, rather than for each channel, because channels with an # empty data_specs do not use data, and are unable to extract the batch # size. The case where the whole data specs is empty is not supported. batch_size = self._flat_data_specs[0].batch_size(theano_args) # Also get a nested representation, for joint iteration # with each of channel.graph_input nested_theano_args = self._data_specs_mapping.nest(theano_args) if not isinstance(nested_theano_args, tuple): nested_theano_args = (nested_theano_args, ) assert len(nested_theano_args) == (len(self.channels) + 1) log.info('Monitored channels: ') for key in sorted(self.channels.keys()): mode = self.theano_function_mode if mode is not None and hasattr(mode, 'record'): mode.record.handle_line('compiling monitor including ' + 'channel ' + key + '\n') log.info('\t%s' % key) it = [ d.iterator(mode=i, num_batches=n, batch_size=b, data_specs=self._flat_data_specs, return_tuple=True) for d, i, n, b in safe_izip(self._datasets, self._iteration_mode, self._num_batches, self._batch_size) ] self.num_examples = [ np.cast[config.floatX](float(i.num_examples)) for i in it ] givens = [OrderedDict() for d in self._datasets] updates = [OrderedDict() for d in self._datasets] for i, channel in enumerate(self.channels.values()): index = self._datasets.index(channel.dataset) d = self._datasets[index] g = givens[index] cur_num_examples = self.num_examples[index] u = updates[index] # Flatten channel.graph_input and the appropriate part of # nested_theano_args, to iterate jointly over them. c_mapping = DataSpecsMapping(channel.data_specs) channel_inputs = c_mapping.flatten(channel.graph_input, return_tuple=True) inputs = c_mapping.flatten(nested_theano_args[i + 1], return_tuple=True) for (channel_X, X) in safe_izip(channel_inputs, inputs): assert channel_X not in g or g[channel_X] is X assert channel_X.type == X.type, (channel_X.type, X.type) g[channel_X] = X if batch_size == 0: # No channel does need any data, so there is not need to # average results, and we will call the accum functions only # once. # TODO: better handling of channels not needing data when # some other channels need data. assert len(self._flat_data_specs[1]) == 0 val = channel.val else: if n == 0: raise ValueError("Iterating over 0 examples results in " + "divide by 0") val = (channel.val * T.cast(batch_size, config.floatX) / cur_num_examples) u[channel.val_shared] = channel.val_shared + val with log_timing(log, "Compiling accum"): # Check type of update expressions for up in updates: for key in up: if key.dtype != up[key].dtype: raise TypeError('Monitoring channel shared variable ' + key.name + ' has dtype ' + key.dtype + ' but is driven by an expression ' + 'with type ' + up[key].dtype) self.accum = [] for idx, packed in enumerate(safe_izip(givens, updates)): g, u = packed mode = self.theano_function_mode if mode is not None and hasattr(mode, 'record'): for elem in g: mode.record.handle_line('g key ' + var_descriptor(elem) + '\n') mode.record.handle_line('g val ' + var_descriptor(g[elem]) + '\n') for elem in u: mode.record.handle_line('u key ' + var_descriptor(elem) + '\n') mode.record.handle_line('u val ' + var_descriptor(u[elem]) + '\n') function_name = 'Monitor.accum[%d]' % idx if mode is not None and hasattr(mode, 'record'): mode.record.handle_line('compiling supervised accum\n') # Some channels may not depend on the data, ie, they might just # monitor the model parameters, or some shared variable updated # by the training algorithm, so we need to ignore the unused # input error self.accum.append( function(theano_args, givens=g, updates=u, mode=self.theano_function_mode, name=function_name)) for a in self.accum: if mode is not None and hasattr(mode, 'record'): for elem in a.maker.fgraph.outputs: mode.record.handle_line('accum output ' + var_descriptor(elem) + '\n') log.info("graph size: %d" % len(a.maker.fgraph.toposort())) final_names = dir(self) self.register_names_to_del( [name for name in final_names if name not in init_names])
def _fallback_next(self, next_index): # TODO: handle fancy-index copies by allocating a buffer and # using np.take() return tuple( fn(data[next_index]) if fn else data[next_index] for data, fn in safe_izip(self._raw_data, self._convert))
def __call__(self, model, X, Y=None, drop_mask=None, drop_mask_Y=None, return_locals=False, include_toronto=True, **kwargs): """ .. todo:: WRITEME """ if not self.supervised: assert drop_mask_Y is None Y = None # ignore Y if some other cost is supervised and has made it get passed in if self.supervised: assert Y is not None if drop_mask is not None: assert drop_mask_Y is not None if not hasattr(model, 'cost'): model.cost = self if not hasattr(model, 'mask_gen'): model.mask_gen = self.mask_gen dbm = model X_space = model.get_input_space() if drop_mask is None: if self.supervised: drop_mask, drop_mask_Y = self.mask_gen(X, Y, X_space=X_space) else: drop_mask = self.mask_gen(X, X_space=X_space) if drop_mask_Y is not None: assert drop_mask_Y.ndim == 1 if drop_mask.ndim < X.ndim: if self.mask_gen is not None: assert self.mask_gen.sync_channels if X.ndim != 4: raise NotImplementedError() drop_mask = drop_mask.dimshuffle(0, 1, 2, 'x') if not hasattr(self, 'noise'): self.noise = False history = dbm.do_inpainting(X, Y=Y, drop_mask=drop_mask, drop_mask_Y=drop_mask_Y, return_history=True, noise=self.noise, niter=self.niter, block_grad=self.block_grad) final_state = history[-1] new_drop_mask = None new_drop_mask_Y = None new_history = [None for state in history] if not hasattr(self, 'both_directions'): self.both_directions = False if self.both_directions: new_drop_mask = 1. - drop_mask if self.supervised: new_drop_mask_Y = 1. - drop_mask_Y new_history = dbm.do_inpainting(X, Y=Y, drop_mask=new_drop_mask, drop_mask_Y=new_drop_mask_Y, return_history=True, noise=self.noise, niter=self.niter, block_grad=self.block_grad) new_final_state = new_history[-1] total_cost, sublocals = self.cost_from_states(final_state, new_final_state, dbm, X, Y, drop_mask, drop_mask_Y, new_drop_mask, new_drop_mask_Y, return_locals=True) l1_act_cost = sublocals['l1_act_cost'] inpaint_cost = sublocals['inpaint_cost'] reweighted_act_cost = sublocals['reweighted_act_cost'] if not hasattr(self, 'robustness'): self.robustness = None if self.robustness is not None: inpainting_H_hat = history[-1]['H_hat'] mf_H_hat = dbm.mf(X, Y=Y) if self.supervised: inpainting_H_hat = inpainting_H_hat[:-1] mf_H_hat = mf_H_hat[:-1] for ihh, mhh in safe_izip(flatten(inpainting_H_hat), flatten(mf_H_hat)): total_cost += self.robustness * T.sqr(mhh - ihh).sum() if not hasattr(self, 'toronto_act_targets'): self.toronto_act_targets = None toronto_act_cost = None if self.toronto_act_targets is not None and include_toronto: toronto_act_cost = 0. H_hat = history[-1]['H_hat'] for s, c, t in zip(H_hat, self.toronto_act_coeffs, self.toronto_act_targets): if c == 0.: continue s, _ = s m = s.mean(axis=0) toronto_act_cost += c * T.sqr(m - t).mean() total_cost += toronto_act_cost if return_locals: return locals() total_cost.name = 'total_inpaint_cost' return total_cost
def __call__(self): """ Runs the model on the monitoring dataset in order to add one data point to each of the channels. """ # If the channels have changed at all, we need to recompile the theano # functions used to compute them if self._dirty: self.redo_theano() model = self.model datasets = self._datasets # Set all channels' val_shared to 0 self.begin_record_entry() for d, i, b, n, a, sd, ne in safe_izip(datasets, self._iteration_mode, self._batch_size, self._num_batches, self.accum, self._rng_seed, self.num_examples): if isinstance(d, basestring): d = yaml_parse.load(d) raise NotImplementedError() # need to put d back into self._datasets myiterator = d.iterator(mode=i, batch_size=b, num_batches=n, topo=self.topo, targets=self.require_label, rng=sd) actual_ne = 0 for X in myiterator: if self.require_label: X, y = X self.run_prereqs(X, y, d) a(X, y) else: self.run_prereqs(X, None, d) a(X) if X.ndim == 2: actual_batch_size = X.shape[0] else: actual_batch_size = X.shape[d.get_topo_batch_axis()] actual_ne += actual_batch_size # end for X if actual_ne != ne: raise RuntimeError( "At compile time, your iterator said it had " + str(ne) + " examples total, but at runtime it gave us " + str(actual_ne) + ".") # end for d log.info("Monitoring step:") log.info("\tEpochs seen: %d" % self._epochs_seen) log.info("\tBatches seen: %d" % self._num_batches_seen) log.info("\tExamples seen: %d" % self._examples_seen) t = time.time() - self.t0 for channel_name in sorted(self.channels.keys(), key=number_aware_alphabetical_key): channel = self.channels[channel_name] channel.time_record.append(t) channel.batch_record.append(self._num_batches_seen) channel.example_record.append(self._examples_seen) channel.epoch_record.append(self._epochs_seen) val = channel.val_shared.get_value() channel.val_record.append(val) # TODO: use logging infrastructure so that user can configure # formatting if abs(val) < 1e4: val_str = str(val) else: val_str = '%.3e' % val log.info("\t%s: %s" % (channel_name, val_str))
def _next(self, next_index): return tuple( fn(batch) if fn else batch for batch, fn in safe_izip(self._dataset.get(self._source, next_index), self._convert) )
def redo_theano(self): """ Recompiles Theano functions used by this monitor. This is needed so that if new channels are added, Theano's optimizations make sure (to the extent that they can) that the new channels and old channels don't have any redundant calculations. It is also needed to regenerate Theano functions after pickling and unpickling, since Theano functions should not be pickled. """ self._dirty = False init_names = dir(self) self.prereqs = OrderedDict() for channel in self.channels.values(): if channel.prereqs is not None: dataset = channel.dataset if dataset not in self.prereqs: self.prereqs[dataset] = [] prereqs = self.prereqs[dataset] for prereq in channel.prereqs: if prereq not in prereqs: prereqs.append(prereq) updates = OrderedDict() for channel in self.channels.values(): updates[channel.val_shared] = np.cast[config.floatX](0.0) with log_timing(log, "compiling begin_record_entry"): self.begin_record_entry = function( inputs=[], updates=updates, mode=self.theano_function_mode, name='Monitor.begin_record_entry') updates = OrderedDict() givens = OrderedDict() # Get the appropriate kind of theano variable to represent the data the model # acts on X = self.model.get_input_space().make_theano_batch(name="monitoring_X") if config.compute_test_value != 'off': m = self.model.get_test_batch_size() test_value = self.model.get_input_space().get_origin_batch(m) X.tag.test_value = np.cast[X.type.dtype](test_value) if self.require_label: Y = self.model.get_output_space().make_theano_batch( name="monitoring_Y") log.info('Monitored channels: ') for key in sorted(self.channels.keys()): mode = self.theano_function_mode if mode is not None and hasattr(mode, 'record'): mode.record.handle_line( 'compiling monitor including channel ' + key + '\n') log.info('\t%s' % key) it = [d.iterator(mode=i, num_batches=n, batch_size=b, topo=self.topo) \ for d, i, n, b in safe_izip(self._datasets, self._iteration_mode, self._num_batches, self._batch_size)] self.num_examples = [ np.cast[config.floatX](float(i.num_examples)) for i in it ] givens = [OrderedDict() for d in self._datasets] updates = [OrderedDict() for d in self._datasets] for channel in self.channels.values(): index = self._datasets.index(channel.dataset) d = self._datasets[index] g = givens[index] cur_num_examples = self.num_examples[index] u = updates[index] if isinstance(channel.graph_input, (list, tuple)): channel_X, channel_Y = channel.graph_input assert channel_X not in g or g[channel_X] is X assert channel_Y not in g or g[channel_Y] is Y g[channel_X] = X g[channel_Y] = Y else: channel_X = channel.graph_input assert channel_X not in g or g[channel_X] is X g[channel_X] = X if n == 0: raise ValueError( "Iterating over 0 examples results in divide by 0") if self.topo: batch_index = d.get_topo_batch_axis() else: batch_index = 0 val = channel.val * T.cast(X.shape[batch_index], config.floatX) / cur_num_examples u[channel.val_shared] = channel.val_shared + val with log_timing(log, "Compiling accum"): # Check type of update expressions for up in updates: for key in up: if key.dtype != up[key].dtype: raise TypeError('Monitoring channel shared variable ' \ + key.name + ' has dtype ' + key.dtype + \ ' but is driven by an expression with type ' + \ up[key].dtype) self.accum = [] for idx, packed in enumerate(safe_izip(givens, updates)): g, u = packed mode = self.theano_function_mode if mode is not None and hasattr(mode, 'record'): for elem in g: mode.record.handle_line('g key ' + var_descriptor(elem) + '\n') mode.record.handle_line('g val ' + var_descriptor(g[elem]) + '\n') for elem in u: mode.record.handle_line('u key ' + var_descriptor(elem) + '\n') mode.record.handle_line('u val ' + var_descriptor(u[elem]) + '\n') function_name = 'Monitor.accum[%d]' % idx if self.require_label: if mode is not None and hasattr(mode, 'record'): mode.record.handle_line('compiling supervised accum\n') # Some channels may not depend on the data, ie, they might just monitor the model # parameters, or some shared variable updated by the training algorithm, so we # need to ignore the unused input error self.accum.append( function([X, Y], givens=g, updates=u, mode=self.theano_function_mode, name=function_name)) else: if mode is not None and hasattr(mode, 'record'): mode.record.handle_line( 'compiling unsupervised accum\n') self.accum.append( function([X], givens=g, updates=u, mode=self.theano_function_mode, name=function_name)) for a in self.accum: if mode is not None and hasattr(mode, 'record'): for elem in a.maker.fgraph.outputs: mode.record.handle_line('accum output ' + var_descriptor(elem) + '\n') log.info("graph size: %d" % len(a.maker.fgraph.toposort())) final_names = dir(self) self.register_names_to_del( [name for name in final_names if name not in init_names])
def get_monitoring_channels(self, model, X, Y=None, drop_mask=None, drop_mask_Y=None, **kwargs): """ .. todo:: WRITEME """ if self.supervised: assert Y is not None rval = OrderedDict() # TODO: shouldn't self() handle this? if drop_mask is not None and drop_mask.ndim < X.ndim: if self.mask_gen is not None: assert self.mask_gen.sync_channels if X.ndim != 4: raise NotImplementedError() drop_mask = drop_mask.dimshuffle(0, 1, 2, 'x') scratch = self(model, X, Y, drop_mask=drop_mask, drop_mask_Y=drop_mask_Y, return_locals=True) history = scratch['history'] new_history = scratch['new_history'] new_drop_mask = scratch['new_drop_mask'] new_drop_mask_Y = None drop_mask = scratch['drop_mask'] if self.supervised: drop_mask_Y = scratch['drop_mask_Y'] new_drop_mask_Y = scratch['new_drop_mask_Y'] ii = 0 for name in [ 'inpaint_cost', 'l1_act_cost', 'toronto_act_cost', 'reweighted_act_cost' ]: var = scratch[name] if var is not None: rval['total_inpaint_cost_term_' + str(ii) + '_' + name] = var ii = ii + 1 if self.monitor_each_step: for ii, packed in enumerate(safe_izip(history, new_history)): state, new_state = packed rval['all_inpaint_costs_after_' + str(ii)] = self.cost_from_states(state, new_state, model, X, Y, drop_mask, drop_mask_Y, new_drop_mask, new_drop_mask_Y) if ii > 0: prev_state = history[ii - 1] V_hat = state['V_hat'] prev_V_hat = prev_state['V_hat'] rval['max_pixel_diff[%d]' % ii] = abs(V_hat - prev_V_hat).max() final_state = history[-1] #empirical beta code--should be moved to gaussian visible layer, should support topo data #V_hat = final_state['V_hat'] #err = X - V_hat #masked_err = err * drop_mask #sum_sqr_err = T.sqr(masked_err).sum(axis=0) #recons_count = T.cast(drop_mask.sum(axis=0), 'float32') # empirical_beta = recons_count / sum_sqr_err # assert empirical_beta.ndim == 1 #rval['empirical_beta_min'] = empirical_beta.min() #rval['empirical_beta_mean'] = empirical_beta.mean() #rval['empirical_beta_max'] = empirical_beta.max() layers = model.get_all_layers() states = [final_state['V_hat']] + final_state['H_hat'] for layer, state in safe_izip(layers, states): d = layer.get_monitoring_channels_from_state(state) for key in d: mod_key = 'final_inpaint_' + layer.layer_name + '_' + key assert mod_key not in rval rval[mod_key] = d[key] if self.supervised: inpaint_Y_hat = history[-1]['H_hat'][-1] err = T.neq(T.argmax(inpaint_Y_hat, axis=1), T.argmax(Y, axis=1)) assert err.ndim == 1 assert drop_mask_Y.ndim == 1 err = T.dot(err, drop_mask_Y) / drop_mask_Y.sum() if err.dtype != inpaint_Y_hat.dtype: err = T.cast(err, inpaint_Y_hat.dtype) rval['inpaint_err'] = err Y_hat = model.mf(X)[-1] Y = T.argmax(Y, axis=1) Y = T.cast(Y, Y_hat.dtype) argmax = T.argmax(Y_hat, axis=1) if argmax.dtype != Y_hat.dtype: argmax = T.cast(argmax, Y_hat.dtype) err = T.neq(Y, argmax).mean() if err.dtype != Y_hat.dtype: err = T.cast(err, Y_hat.dtype) rval['err'] = err if self.monitor_multi_inference: Y_hat = model.inference_procedure.multi_infer(X) argmax = T.argmax(Y_hat, axis=1) if argmax.dtype != Y_hat.dtype: argmax = T.cast(argmax, Y_hat.dtype) err = T.neq(Y, argmax).mean() if err.dtype != Y_hat.dtype: err = T.cast(err, Y_hat.dtype) rval['multi_err'] = err return rval
def add_updates(old, new): if isinstance(old, (list, tuple)): for old_elem, new_elem in safe_izip(old, new): add_updates(old_elem, new_elem) else: rval[old] = new
def mlp_pred(non_linearity): Z = [T.dot(X, W) for W in model.W1] H = [non_linearity(z) for z in Z] Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)] pred = sum(Z) return pred
def add_dataset(self, dataset, mode='sequential', batch_size=None, num_batches=None, seed=None): """ Determines the data used to calculate the values of each channel. Parameters ---------- dataset : object A `pylearn2.datasets.Dataset` object. mode : str or object, optional Iteration mode; see the docstring of the `iterator` method \ on `pylearn2.datasets.Dataset` for details. batch_size : int, optional The size of an individual batch. Optional if `mode` is \ 'sequential' and `num_batches` is specified (batch size \ will be calculated based on full dataset size). num_batches : int, optional The total number of batches. Unnecessary if `mode` is \ 'sequential' and `batch_size` is specified (number of \ batches will be calculated based on full dataset size). seed : int, optional Optional. The seed to be used for random iteration modes. """ # The user can ommit using lists if only one dataset is set if not isinstance(dataset, list): dataset = [dataset] if not isinstance(mode, list): mode = [mode] if not isinstance(batch_size, list): batch_size = [batch_size] if not isinstance(num_batches, list): num_batches = [num_batches] if seed is None: seed = [None] * len(dataset) if not isinstance(seed, list): seed = [seed] if len(mode) != len(dataset): raise ValueError("Received " + str(len(dataset)) + " dataset but " + str(len(mode)) + " modes.") if any([len(l) != len(dataset) for l in [batch_size, seed]]): raise ValueError("make sure each dataset has its iteration " + "batch size and number of batches.") for (d, m, b, n, sd) in safe_izip(dataset, mode, batch_size, num_batches, seed): try: it = d.iterator(mode=m, batch_size=b, num_batches=n, data_specs=self._flat_data_specs, return_tuple=True, rng=sd) except ValueError as exc: raise ValueError("invalid iteration parameters in " + "Monitor.add_dataset: " + str(exc)) if it.stochastic: # Must be a seed, not a random number generator. If it were a # random number generator, different iterators using it would # update its state, so we would not get the same iterator # each time. Also, must not be None, because this makes the # iterator pick a seed based on the clock if sd is None: raise TypeError("Monitor requires a seed when using " + "stochastic iteration modes.") if not isinstance(sd, (list, tuple, int)): raise TypeError("Monitor requires a seed (not a random " + "number generator) when using " + "stochastic iteration modes.") else: # The iterator should catch this, but let's double-check assert sd is None if not d in self._datasets: self._datasets.append(d) self._iteration_mode.append(m) self._batch_size.append(b) self._num_batches.append(n) self._rng_seed.append(sd)
def mlp_pred(non_linearity): Z = [T.dot(X, W) for W in model.W1] H = map(non_linearity, Z) Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)] pred = sum(Z) return pred
def __call__(self): """ Runs the model on the monitoring dataset in order to add one data point to each of the channels. """ # If the channels have changed at all, we need to recompile the theano # functions used to compute them if self._dirty: self.redo_theano() datasets = self._datasets # Set all channels' val_shared to 0 self.begin_record_entry() for d, i, b, n, a, sd, ne in safe_izip(datasets, self._iteration_mode, self._batch_size, self._num_batches, self.accum, self._rng_seed, self.num_examples): if isinstance(d, basestring): d = yaml_parse.load(d) raise NotImplementedError() # need to put d back into self._datasets myiterator = d.iterator(mode=i, batch_size=b, num_batches=n, data_specs=self._flat_data_specs, return_tuple=True, rng=sd) # If self._flat_data_specs is empty, no channel needs data, # so we do not need to call the iterator in order to average # the monitored values across different batches, we only # have to call them once. if len(self._flat_data_specs[1]) == 0: X = () self.run_prereqs(X, d) a(*X) else: actual_ne = 0 for X in myiterator: # X is a flat (not nested) tuple self.run_prereqs(X, d) a(*X) actual_ne += self._flat_data_specs[0].np_batch_size(X) # end for X if actual_ne != ne: raise RuntimeError("At compile time, your iterator said " "it had %d examples total, but at " "runtime it gave us %d." % (ne, actual_ne)) # end for d log.info("Monitoring step:") log.info("\tEpochs seen: %d" % self._epochs_seen) log.info("\tBatches seen: %d" % self._num_batches_seen) log.info("\tExamples seen: %d" % self._examples_seen) t = time.time() - self.t0 for channel_name in sorted(self.channels.keys(), key=number_aware_alphabetical_key): channel = self.channels[channel_name] channel.time_record.append(t) channel.batch_record.append(self._num_batches_seen) channel.example_record.append(self._examples_seen) channel.epoch_record.append(self._epochs_seen) val = channel.val_shared.get_value() channel.val_record.append(val) # TODO: use logging infrastructure so that user can configure # formatting if abs(val) < 1e4: val_str = str(val) else: val_str = '%.3e' % val log.info("\t%s: %s" % (channel_name, val_str))