def get_gradients(self, model, X, Y=None, **kwargs): """ .. todo:: WRITEME """ scratch = self(model, X, Y, include_toronto=False, return_locals=True, **kwargs) total_cost = scratch['total_cost'] params = list(model.get_params()) grads = dict( safe_zip(params, T.grad(total_cost, params, disconnected_inputs='ignore'))) if self.toronto_act_targets is not None: H_hat = scratch['history'][-1]['H_hat'] for i, packed in enumerate( safe_zip(H_hat, self.toronto_act_coeffs, self.toronto_act_targets)): s, c, t = packed if c == 0.: continue s, _ = s m = s.mean(axis=0) m_cost = c * T.sqr(m - t).mean() real_grads = T.grad(m_cost, s) if i == 0: below = X else: below = H_hat[i - 1][0] W, = model.hidden_layers[i].transformer.get_params() assert W in grads b = model.hidden_layers[i].b ancestor = T.scalar() hack_W = W + ancestor hack_b = b + ancestor fake_s = T.dot(below, hack_W) + hack_b if fake_s.ndim != real_grads.ndim: print fake_s.ndim print real_grads.ndim assert False sources = [(fake_s, real_grads)] fake_grads = T.grad(cost=None, known_grads=dict(sources), wrt=[below, ancestor, hack_W, hack_b]) grads[W] = grads[W] + fake_grads[2] grads[b] = grads[b] + fake_grads[3] return grads, OrderedDict()
def expr(self, model, data, ** kwargs): """ Returns the sum of the costs the SumOfCosts instance was given at initialization. Parameters ---------- model : pylearn2.models.model.Model the model for which we want to calculate the sum of costs data : flat tuple of tensor_like variables. data has to follow the format defined by self.get_data_specs(), but this format will always be a flat tuple. """ self.get_data_specs(model)[0].validate(data) composite_specs, mapping = self.get_composite_specs_and_mapping(model) nested_data = mapping.nest(data) costs = [] for cost, cost_data in safe_zip(self.costs, nested_data): costs.append(cost.expr(model, cost_data, **kwargs)) assert len(costs) > 0 if any([cost is None for cost in costs]): sum_of_costs = None else: costs = [coeff * cost for coeff, cost in safe_zip(self.coeffs, costs)] assert len(costs) > 0 sum_of_costs = reduce(lambda x, y: x + y, costs) return sum_of_costs
def _read_hdf5(self, sources, aliases, load_all=False, use_h5py=True): """ Loads elements from an HDF5 dataset using either h5py or tables. It can load either the whole object in memory or a reference to the object on disk, depending on the load_all parameter. Returns a list of objects. Parameters ---------- sources : list of str List of HDF5 keys corresponding to the data to be loaded. load_all : bool, optional (default False) If true, load dataset into memory. use_h5py: bool, optional (default True) If true uses h5py, else tables. """ data = alias_dict() if use_h5py: for s, a in safe_zip(sources, aliases): if load_all: data[s, a] = self._fhandler[s][:] else: data[s, a] = self._fhandler[s] # hdf5 handle has no ndim data[s].ndim = len(data[s].shape) else: for s, a in safe_zip(sources, aliases): if load_all: data[s, a](self._fhandler.getNode('/', s)[:]) else: data[s, a] = self._fhandler.getNode('/', s) return data
def _read_hdf5(self, sources, aliases, load_all=False, use_h5py=True): """ Loads elements from an HDF5 dataset using either h5py or tables. It can load either the whole object in memory or a reference to the object on disk, depending on the load_all parameter. Returns a list of objects. Parameters ---------- sources : list of str List of HDF5 keys corresponding to the data to be loaded. load_all : bool, optional (default False) If true, load dataset into memory. use_h5py: bool, optional (default True) If true uses h5py, else tables. """ data = alias_dict() if use_h5py: for s, a in safe_zip(sources, aliases): if load_all: data[s, a] = self._fhandler[s][:] else: data[s, a] = self._fhandler[s] # hdf5 handle has no ndim data[s].ndim = len(data[s].shape) else: for s, a in safe_zip(sources, aliases): if load_all: data[s, a](self._fhandler.getNode("/", s)[:]) else: data[s, a] = self._fhandler.getNode("/", s) return data
def get_gradients(self, model, X, Y = None, **kwargs): """ .. todo:: WRITEME """ if Y is None: data = X else: data = (X, Y) scratch = self.expr(model, data, include_toronto = False, return_locals=True, **kwargs) total_cost = scratch['total_cost'] params = list(model.get_params()) grads = dict(safe_zip(params, T.grad(total_cost, params, disconnected_inputs='ignore'))) if self.toronto_act_targets is not None: H_hat = scratch['history'][-1]['H_hat'] for i, packed in enumerate(safe_zip(H_hat, self.toronto_act_coeffs, self.toronto_act_targets)): s, c, t = packed if c == 0.: continue s, _ = s m = s.mean(axis=0) m_cost = c * T.sqr(m-t).mean() real_grads = T.grad(m_cost, s) if i == 0: below = X else: below = H_hat[i-1][0] W, = model.hidden_layers[i].transformer.get_params() assert W in grads b = model.hidden_layers[i].b ancestor = T.scalar() hack_W = W + ancestor hack_b = b + ancestor fake_s = T.dot(below, hack_W) + hack_b if fake_s.ndim != real_grads.ndim: print fake_s.ndim print real_grads.ndim assert False sources = [ (fake_s, real_grads) ] fake_grads = T.grad(cost=None, known_grads=dict(sources), wrt=[below, ancestor, hack_W, hack_b]) grads[W] = grads[W] + fake_grads[2] grads[b] = grads[b] + fake_grads[3] return grads, OrderedDict()
def get_gradients(self, model, data, **kwargs): self.get_data_specs(model)[0].validate(data) obj, scratch = self.base_cost(model, data, return_locals=True, **kwargs) if self.supervised: assert isinstance(data, (list, tuple)) assert len(data) == 2 (X, Y) = data else: X, = data interm_grads = OrderedDict() H_hat = scratch['H_hat'] terms = scratch['terms'] hidden_layers = scratch['hidden_layers'] grads = OrderedDict() assert len(H_hat) == len(terms) assert len(terms) == len(hidden_layers) num_layers = len(hidden_layers) for i in xrange(num_layers): state = H_hat[i] layer = model.hidden_layers[i] term = terms[i] if term == 0.: continue else: print 'term is ',term if i == 0: state_below = X layer_below = model.visible_layer else: layer_below = model.hidden_layers[i-1] state_below = H_hat[i-1] state_below = layer_below.upward_state(state_below) components = flatten(state) real_grads = T.grad(term, components) fake_state = layer.linear_feed_forward_approximation(state_below) fake_components = flatten(fake_state) real_grads = OrderedDict(safe_zip(fake_components, real_grads)) params = list(layer.get_params()) fake_grads = T.grad(cost=None, consider_constant=flatten(state_below), wrt=params, known_grads = real_grads) for param, grad in safe_zip(params, fake_grads): if param in grads: grads[param] = grads[param] + grad else: grads[param] = grad return grads, OrderedDict()
def get_gradients(self, model, X, Y=None, **kwargs): obj, scratch = self.base_cost(model, X, Y, return_locals=True, **kwargs) interm_grads = OrderedDict() H_hat = scratch['H_hat'] terms = scratch['terms'] hidden_layers = scratch['hidden_layers'] grads = OrderedDict() assert len(H_hat) == len(terms) assert len(terms) == len(hidden_layers) num_layers = len(hidden_layers) for i in xrange(num_layers): state = H_hat[i] layer = model.hidden_layers[i] term = terms[i] if term == 0.: continue else: print 'term is ', term if i == 0: state_below = X layer_below = model.visible_layer else: layer_below = model.hidden_layers[i - 1] state_below = H_hat[i - 1] state_below = layer_below.upward_state(state_below) components = flatten(state) real_grads = T.grad(term, components) fake_state = layer.linear_feed_forward_approximation(state_below) fake_components = flatten(fake_state) real_grads = OrderedDict(safe_zip(fake_components, real_grads)) params = list(layer.get_params()) fake_grads = T.grad(cost=None, consider_constant=flatten(state_below), wrt=params, known_grads=real_grads) for param, grad in safe_zip(params, fake_grads): if param in grads: grads[param] = grads[param] + grad else: grads[param] = grad return grads, OrderedDict()
def get_monitoring_channels(self, data): """ .. todo:: WRITEME """ space, source = self.get_monitoring_data_specs() space.validate(data) X = data history = self.mf(X, return_history=True) q = history[-1] rval = OrderedDict() ch = self.visible_layer.get_monitoring_channels() for key in ch: rval['vis_' + key] = ch[key] for state, layer in safe_zip(q, self.hidden_layers): ch = layer.get_monitoring_channels() for key in ch: rval[layer.layer_name + '_' + key] = ch[key] ch = layer.get_monitoring_channels_from_state(state) for key in ch: rval['mf_' + layer.layer_name + '_' + key] = ch[key] if len(history) > 1: prev_q = history[-2] flat_q = flatten(q) flat_prev_q = flatten(prev_q) mx = None for new, old in safe_zip(flat_q, flat_prev_q): cur_mx = abs(new - old).max() if new is old: logger.error('{0} is {1}'.format(new, old)) assert False if mx is None: mx = cur_mx else: mx = T.maximum(mx, cur_mx) rval['max_var_param_diff'] = mx for layer, new, old in safe_zip(self.hidden_layers, q, prev_q): sum_diff = 0. for sub_new, sub_old in safe_zip(flatten(new), flatten(old)): sum_diff += abs(sub_new - sub_old).sum() denom = self.batch_size * \ layer.get_total_state_space().get_total_dimension() denom = np.cast[config.floatX](denom) rval['mean_'+layer.layer_name+'_var_param_diff'] = \ sum_diff / denom return rval
def model(self, large=None, last_layer=None, seed=None): """ Creates the MLP model based on internal attributes. Parameters ---------- large : bool, optional The variant - large or small; by default, the value stored in the instance is used. last_layer : optional Last layer in the network seed : optional Seed for random number generator Returns ------- model : pylearn2.models.mlp.MLP The model """ laylist = self.layers() model = MLP(layers=laylist, input_space=Conv2DSpace( shape=self.shape, num_channels=3, axes=['b', 0, 1, 'c']), seed=seed) last_layer_std = None index = 0 for lay in laylist[:last_layer_std]: if not isinstance(lay, (ZeroPad, Softmax)): # we simulate a get_weights method here as # the class does not provides one # It does provide a get_weights_topo() but that is useless # as the shape is changed # example: # get_weights => (96, 3, 7, 7) # get_weights_topo => (96, 7, 7, 3) crt_w = lay.transformer.get_params()[0].get_value() #crt_w = lay.get_weights_topo() crt_b = lay.get_biases() assert all([crt == new for crt, new in safe_zip( crt_w.shape, self.weights[index].shape)]) assert all([crt == new for crt, new in safe_zip( crt_b.shape, self.biases[index].shape)]) lay.set_weights(self.weights[index]) lay.set_biases(self.biases[index]) index = index + 1 return model
def get_expected_warning(from_space, from_batch, to_space): # composite -> composite if isinstance(from_space, CompositeSpace) and \ isinstance(to_space, CompositeSpace): for fs, fb, ts in safe_zip(from_space.components, from_batch, to_space.components): warning, message = get_expected_warning(fs, fb, ts) if warning is not None: return warning, message return None, None # composite -> simple if isinstance(from_space, CompositeSpace): for fs, fb in safe_zip(from_space.components, from_batch): warning, message = get_expected_warning(fs, fb, to_space) if warning is not None: return warning, message return None, None # simple -> composite if isinstance(to_space, CompositeSpace): if isinstance(from_space, VectorSpace) and \ isinstance(from_batch, theano.sparse.SparseVariable): assert from_space.sparse return (UserWarning, 'Formatting from a sparse VectorSpace to a ' 'CompositeSpace is currently (2 Jan 2014) a ' 'non-differentiable action. This is because it ' 'calls slicing operations on a sparse batch ' '(e.g. "my_matrix[r:R, c:C]", which Theano does ' 'not yet have a gradient operator for. If ' 'autodifferentiation is reporting an error, ' 'this may be why.') for ts in to_space.components: warning, message = get_expected_warning(from_space, from_batch, ts) if warning is not None: return warning, message return None, None # simple -> simple return None, None
def make_layer_to_state(self, num_examples, rng=None): """ Makes and returns a dictionary mapping layers to states. By states, we mean here a real assignment, not a mean field state. For example, for a layer containing binary random variables, the state will be a shared variable containing values in {0,1}, not [0,1]. The visible layer will be included. Uses a dictionary so it is easy to unambiguously index a layer without needing to remember rules like vis layer = 0, hiddens start at 1, etc. Parameters ---------- num_examples : int Number of examples to make up the state rng : MRG_RandomStreams Random number generator, if None then use model's rng """ # Make a list of all layers layers = [self.visible_layer] + self.hidden_layers if rng is None: rng = self.rng states = [layer.make_state(num_examples, rng) for layer in layers] def recurse_check(layer, state): if isinstance(state, (list, tuple)): for elem in state: recurse_check(layer, elem) else: val = state.get_value() m = val.shape[0] if m != num_examples: raise ValueError( layer.layer_name + " gave state with " + str(m) + " examples in some component." "We requested " + str(num_examples) ) for layer, state in safe_zip(layers, states): recurse_check(layer, state) rval = OrderedDict(safe_zip(layers, states)) return rval
def get_gradients(self, model, data, **kwargs): space, sources = self.get_data_specs(model) space.validate(data) assert isinstance(model, CompressAdversaryPair) g = model.compressor d = model.discriminator #get raw gradients for d and g objectives... d_obj, g_obj = self.get_objectives(model, data) g_params = g.get_params() d_params = d.get_params() for param in g_params: assert param not in d_params for param in d_params: assert param not in g_params d_grads = T.grad(d_obj, d_params) g_grads = T.grad(g_obj, g_params) # if self.scale_grads: # S_grad = T.grad(g_obj, S) # scale = T.maximum(1., self.target_scale / T.sqrt(T.sqr(S_grad).sum())) # g_grads = [g_grad * scale for g_grad in g_grads] #adjust raw gradients with control signals rval = OrderedDict() zeros = itertools.repeat(theano.tensor.constant(0., dtype='float32')) if self.ever_train_discriminator: rval.update(OrderedDict(safe_zip(d_params, [self.now_train_discriminator * dg for dg in d_grads]))) else: rval.update(OrderedDict(zip(d_params, zeros))) if self.ever_train_compressor: rval.update(OrderedDict(safe_zip(g_params, [self.now_train_compressor * gg for gg in g_grads]))) else: rval.update(OrderedDict(zip(g_params, zeros))) #update control signals using the updates return functionality updates = OrderedDict() #first, the clock self.future_train_clock = T.switch(T.ge(self.train_clock,self.discriminator_steps+self.joint_steps+self.compressor_steps),1.,self.train_clock+1.) updates[self.train_clock] = self.future_train_clock #then the control signals updates[self.now_train_discriminator] = T.switch(T.le(self.future_train_clock,self.discriminator_steps+self.joint_steps),1.,0.) updates[self.now_train_compressor] = T.switch(T.gt(self.future_train_clock,self.discriminator_steps),1.,0.) return rval, updates
def get_monitoring_channels(self, data): """ data is a flat tuple, and can contain features, targets, or both """ rval = super(PieceChangeMonitoringMLP, self).get_monitoring_channels(data) X, Y = data state = X theano_rng = MRG_RandomStreams(self.rng.randint(2**15)) assert not isinstance(state, tuple) piece_ids_0 = self.piece_id(state, theano_rng) # piece_ids_0 = Print('piece_ids_0[0]')(piece_ids_0[0]) piece_ids_1 = self.piece_id(state, theano_rng) assert len(piece_ids_0) == 2 # rm piece_changes = T.cast( sum([ T.neq(ids_0, ids_1).sum() for ids_0, ids_1 in safe_zip(piece_ids_0, piece_ids_1) ]), 'float32') possible_changes = T.cast(sum([ids_0.size for ids_0 in piece_ids_0]), 'float32') rval['piece_change_rate'] = piece_changes / possible_changes return rval
def load_model(self, model): """ Slot that loads a model object (not file). Parameters ---------- model : Model The model to load. """ try: logger.debug('Loading model %s', str(model)) pras_list = model.get_params() parv_list = model.get_param_values() for par, parv in safe_zip(pras_list, parv_list): tvi = QtGui.QTreeWidgetItem() tvi.setText(0, par.name) tvi.par = par tvi.parv = parv self.lv_top.addTopLevelItem(tvi) logger.debug('Model loaded') except Exception, exc: logger.error('Loading image file failed', exc_info=True) QtGui.QMessageBox.warning(self, 'Exception', str(exc))
def setup(self, model, dataset): """ Allows the training algorithm to do some preliminary configuration *before* we actually start training the model. The dataset is provided in case other derived training algorithms need to modify model based on the dataset. Parameters ---------- model: a Python object representing the model to train loosely implementing the interface of models.model.Model. dataset: a pylearn2.datasets.dataset.Dataset object used to draw training data """ self.model = model self.monitor = Monitor.get_monitor(model) if self.monitoring_dataset is not None: # Get the data specifications needed by the model space, source = model.get_monitoring_data_specs() # Create Theano variables for each of the individual components # of that data. Usually, it will be X for inputs and Y for targets. # First, we need to find these components, and put them in a tuple mapping = DataSpecsMapping((space, source)) space_tuple = mapping.flatten(space, return_tuple=True) source_tuple = mapping.flatten(source, return_tuple=True) # Then, build a flat tuple of these Theano variables ipt = tuple(sp.make_theano_batch(name='monitor_%s' % src) for (sp, src) in safe_zip(space_tuple, source_tuple)) # Finally, organize them back into a structure expected by the # monitoring channels of the model nested_ipt = mapping.nest(ipt) self.monitor.add_dataset(dataset=self.monitoring_dataset, mode="sequential", batch_size=self.batch_size, num_batches=self.monitoring_batches) channels = model.get_monitoring_channels(nested_ipt) if not isinstance(channels, dict): raise TypeError("model.get_monitoring_channels must return a " "dictionary, but it returned " + str(channels)) for name in channels: J = channels[name] if isinstance(J, tuple): assert len(J) == 2 J, prereqs = J else: prereqs = None self.monitor.add_channel(name=name, ipt=nested_ipt, val=J, prereqs=prereqs, data_specs=(space, source)) self.first = True self.bSetup = True
def get_gradients(model): cost = model.get_default_cost() data_specs = cost.get_data_specs(model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) theano_args = [] for space, source in safe_zip(space_tuple, source_tuple): name = '%s[%s]' % (SGD.__class__.__name__, source) arg = space.make_theano_batch(name=name, batch_size=model.batch_size) theano_args.append(arg) theano_args = tuple(theano_args) nested_args = mapping.nest(theano_args) fixed_var_descr = cost.get_fixed_var_descr(model, nested_args) grads, updates = cost.get_gradients(model, nested_args, **fixed_var_descr.fixed_vars) params = list(model.get_params()) for param in params: some = grads[param] print("ok") return grads
def _shared_inputs(self, inputs): """ .. todo:: WRITEME """ return [elem for elem, shared in safe_zip(inputs, self._shared_mask) if shared ]
def iterator(self, mode=None, batch_size=None, num_batches=None, rng=None, data_specs=None, return_tuple=False): """ Method inherited from `pylearn2.datasets.dataset.Dataset`. """ self.mode = mode self.batch_size = batch_size self._return_tuple = return_tuple # TODO: If there is a view_converter, we have to use it to convert # the stored data for "features" into one that the iterator can return. space, source = data_specs or (self.X_space, 'features') assert isinstance(space, CompositeSpace),\ "Unexpected input space for the data." sub_spaces = space.components sub_sources = source conv_fn = lambda x: x.todense().astype(theano.config.floatX) convert = [] for sp, src in safe_zip(sub_spaces, sub_sources): convert.append(conv_fn if src in ('features', 'targets') else None) assert mode is not None,\ "Iteration mode not provided for %s" % str(self) mode = resolve_iterator_class(mode) subset_iterator = mode(self.X.shape[0], batch_size, num_batches, rng) return FiniteDatasetIterator(self, subset_iterator, data_specs=data_specs, return_tuple=return_tuple, convert=convert)
def redraw(): ''' Draws the currently selected convolutional kernel. ''' axes_list = all_axes.flatten() layer = conv_layers[layer_index] unit_index = unit_indices[layer_index, ...] weights = _get_conv_weights_bc01(layer)[unit_index, ...] active_axes = axes_list[:weights.shape[0]] for axes, weights in safe_zip(active_axes, weights): axes.set_visible(True) axes.imshow(weights, cmap='gray', interpolation='nearest') assert len(frozenset(active_axes)) == len(active_axes) unused_axes = axes_list[len(active_axes):] assert len(frozenset(unused_axes)) == len(unused_axes) assert len(axes_list) == len(active_axes) + len(unused_axes) for axes in unused_axes: axes.set_visible(False) title_text.set_text("Layer %s, unit %d" % (layer.layer_name, unit_indices[layer_index])) figure.canvas.draw()
def draw(batch_pair): for axis, image_batch in safe_zip(axes, batch_pair): assert image_batch.shape[0] == 1 grayscale_image = image_batch[0, :, :, 0] axis.imshow(grayscale_image, cmap='gray') figure.canvas.draw()
def _fill_mapping(self, space, source): """Builds a nested tuple of integers representing the mapping""" if isinstance(space, NullSpace): # This Space does not contain any data, and should not # be mapped to anything assert source == '' return None elif not isinstance(space, CompositeSpace): # Space is a simple Space, source should be a simple source if isinstance(source, tuple): source, = source # If (space, source) has not already been seen, insert it. # We need both the space and the source to match. if (space, source) in self.specs_to_index: spec_index = self.specs_to_index[(space, source)] else: spec_index = self.n_unique_specs self.specs_to_index[(space, source)] = spec_index self.n_unique_specs += 1 return spec_index else: # Recursively fill the mapping, and return it spec_mapping = tuple( self._fill_mapping(sub_space, sub_source) for sub_space, sub_source in safe_zip( space.components, source)) return spec_mapping
def __call__(self, * batches): """ .. todo:: WRITEME """ for batch in batches: if not isinstance(batch, list): raise TypeError("Expected each argument to be a list," " but one argument is " + str(batch) + " of type "+str(type(batch))) total_examples = np.cast[config.floatX]( sum([batch[0].shape[0] for batch in batches])) if self.has_updates: self._clear() augmented = self._true_inputs(batches[0]) + [total_examples] self._set_shared(batches[0]) rval = self._func(*augmented) for batch in batches[1:]: augmented = self._true_inputs(batch) + [total_examples] self._set_shared(batch) # This works if there is no output, # because the output is an empty list cur_out = self._func(*augmented) rval = [x + y for x, y in safe_zip(rval, cur_out)] if len(rval) == 1: return rval[0] return rval
def _get_standard_neg(self, model, layer_to_chains): params = list(model.get_params()) warnings.warn("""TODO: reduce variance of negative phase by integrating out the even-numbered layers. The Rao-Blackwellize method can do this for you when expected gradient = gradient of expectation, but doing this in general is trickier.""") #layer_to_chains = model.rao_blackwellize(layer_to_chains) expected_energy_p = model.energy( layer_to_chains[model.visible_layer], [layer_to_chains[layer] for layer in model.hidden_layers] ).mean() samples = flatten(layer_to_chains.values()) for i, sample in enumerate(samples): if sample.name is None: sample.name = 'sample_'+str(i) neg_phase_grads = OrderedDict( safe_zip(params, T.grad(-expected_energy_p, params, consider_constant=samples, disconnected_inputs='ignore')) ) return neg_phase_grads
def inv_prop(self, state_above): if not isinstance(state_above, tuple): expected_space = VectorSpace(self.output_space.get_total_dimension()) state_above = expected_space.format_as(state_above, self.output_space) self.output_space.validate(state_above) return tuple(layer.inv_prop(state) for layer,state in safe_zip(self.layers, state_above))
def topo_view_to_design_mat(self, topo_array): """ Returns a design matrix view/copy of topological matrix. Parameters ---------- topo_array: numpy.ndarray An N-D array with axis order given by self.axes. Non-batch axes' dimension sizes must agree with corresponding sizes in self.shape. returns: numpy.ndarray A design matrix with data in rows. Data, is laid out in memory according to the default axis order ('b', 'c', 0, 1). This will try to return a view into topo_array if possible; otherwise it will allocate a new ndarray. """ for shape_elem, axis in safe_zip(self.shape, (0, 1, 2, 'c')): if topo_array.shape[self.axes.index(axis)] != shape_elem: raise ValueError( "topo_array's %s axis has a different size " "(%d) from the corresponding size (%d) in " "self.shape.\n" " self.shape: %s (uses standard axis order: 0, 1, " "'c')\n" " self.axes: %s\n" " topo_array.shape: %s (should be in self.axes' order)") topo_array_bc01 = topo_array.transpose([self.axes.index(ax) for ax in ('b', 'c', 0, 1, 2)]) return topo_array_bc01.reshape((topo_array_bc01.shape[0], np.prod(topo_array_bc01.shape[1:])))
def iterator(self, mode=None, batch_size=None, num_batches=None, rng=None, data_specs=None, return_tuple=False): """ Copied from dense_design_matrix, in order to fix uneven problem. """ if data_specs is None: data_specs = self._iter_data_specs # If there is a view_converter, we have to use it to convert # the stored data for "features" into one that the iterator # can return. space, source = data_specs if isinstance(space, CompositeSpace): sub_spaces = space.components sub_sources = source else: sub_spaces = (space,) sub_sources = (source,) convert = [] for sp, src in safe_zip(sub_spaces, sub_sources): if src == 'features' and \ getattr(self, 'view_converter', None) is not None: conv_fn = (lambda batch, self=self, space=sp: self.view_converter.get_formatted_batch(batch, space)) else: conv_fn = None convert.append(conv_fn) # TODO: Refactor if mode is None: if hasattr(self, '_iter_subset_class'): mode = self._iter_subset_class else: raise ValueError('iteration mode not provided and no default ' 'mode set for %s' % str(self)) else: mode = resolve_iterator_class(mode) if batch_size is None: batch_size = getattr(self, '_iter_batch_size', None) if num_batches is None: num_batches = getattr(self, '_iter_num_batches', None) if rng is None and mode.stochastic: rng = self.rng # hack to make the online augmentations run FiniteDatasetIterator.uneven = False iterator = FiniteDatasetIterator(self, mode(self.X.shape[0], batch_size, num_batches, rng), data_specs=data_specs, return_tuple=return_tuple, convert=convert) return iterator
def next(self): next_index = self._subset_iterator.next() # TODO: handle fancy-index copies by allocating a buffer and # using numpy.take() # This saves us some memory (and time spent allocating it) # when the dataset dtype matches floatX and next_index is not a # fancy-index. if self._deprecated_interface: if self._needs_cast: features = numpy.cast[config.floatX](self._raw_data[next_index]) else: features = self._raw_data[next_index] if self._topo: features = self._dataset.get_topological_view(features) if self._targets: targets = self._raw_targets[next_index] if self._targets_need_cast: targets = np.cast[config.floatX](targets) return features, targets else: return features else: rval = tuple( fn(data[next_index]) if fn else data[next_index] for data, fn in safe_zip(self._raw_data, self._convert)) if not self._return_tuple and len(rval) == 1: rval, = rval return rval
def next(self): warnings.warn("This class is obselete with the new interface change, " "and will be removed around November 7th", stacklevel=2) next_index = self._subset_iterator.next() if self._deprecated_interface: if isinstance(next_index, np.ndarray) and len(next_index) == 1: next_index = next_index[0] if self._needs_cast: features = numpy.cast[config.floatX](self._raw_data[next_index]) else: features = self._raw_data[next_index,:] if self._topo: if len(features.shape) != 2: features = features.reshape((1, features.shape[0])) features = self._dataset.get_topological_view(features) if self._targets: targets = self._raw_targets[next_index,:] if len(targets.shape) != 2: targets = targets.reshape((1, targets.shape[0])) if self._targets_need_cast: targets = np.cast[config.floatX](targets) return features, targets else: return features else: rval = tuple( fn(data[next_index]) if fn else data[next_index] for data, fn in safe_zip(self._raw_data, self._convert)) if not self._return_tuple and len(rval) == 1: rval, = rval return rval
def test_variational_cd(): # Verifies that VariationalCD works well with make_layer_to_symbolic_state visible_layer = BinaryVector(nvis=100) hidden_layer = BinaryVectorMaxPool(detector_layer_dim=500, pool_size=1, layer_name='h', irange=0.05, init_bias=-2.0) model = DBM(visible_layer=visible_layer, hidden_layers=[hidden_layer], batch_size=100, niter=1) cost = VariationalCD(num_chains=100, num_gibbs_steps=2) data_specs = cost.get_data_specs(model) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) theano_args = [] for space, source in safe_zip(space_tuple, source_tuple): name = '%s' % (source) arg = space.make_theano_batch(name=name) theano_args.append(arg) theano_args = tuple(theano_args) nested_args = mapping.nest(theano_args) grads, updates = cost.get_gradients(model, nested_args)
def _fill_flat(self, nested, mapping, rval): """Auxiliary recursive function used by self.flatten""" if isinstance(nested, CompositeSpace): nested = tuple(nested.components) if mapping is None: # The corresponding Space was a NullSpace, which does # not correspond to actual data, so nested should evaluate # to False, and should not be included in the flattened version if not isinstance(nested, NullSpace): assert not nested, ("The following element is mapped to " "NullSpace, so it should evaluate to False (for instance, " "None, an empty string or an empty tuple), but is %s" % nested) return if isinstance(mapping, int): # "nested" should actually be a single element idx = mapping if isinstance(nested, tuple): nested, = nested if rval[idx] is None: rval[idx] = nested else: assert rval[idx] == nested, ("This mapping was built " "with the same element occurring more than once " "in the nested representation, but current nested " "sequence has different values (%s and %s) at " "these positions." % (rval[idx], nested)) else: for sub_nested, sub_mapping in safe_zip(nested, mapping): self._fill_flat(sub_nested, sub_mapping, rval)
def expr(self, model, data): if hasattr(model, 'autoencoders'): assert len(model.autoencoders) == len(self.coeffs) self.get_data_specs(model)[0].validate(data) X = data if hasattr(model, 'autoencoders'): layers = model.autoencoders else: layers = [model] layer_costs = [] current = data for layer, coeff, in safe_zip(layers, self.coeffs): current = layer.encode(current) cost = theano.tensor.abs_(current).sum(axis=1).mean() layer_costs.append(coeff * cost) assert theano.tensor.scalar() != 0. layer_costs = [cost_ for cost_ in layer_costs if cost_ != 0.] if len(layer_costs) == 0: return theano.tensor.as_tensor_variable(0.) else: total_cost = reduce(lambda x, y: x + y, layer_costs) total_cost.name = 'L1_ActCost' assert total_cost.ndim == 0 return total_cost
def topo_view_to_design_mat(self, topo_array): """ ... todo:: WRITEME """ for shape_elem, axis in safe_zip(self.shape, (0, 1, "c")): if topo_array.shape[self.axes.index(axis)] != shape_elem: raise ValueError( "topo_array's %s axis has a different size " "(%d) from the corresponding size (%d) in " "self.shape.\n" " self.shape: %s (uses standard axis order: 0, 1, " "'c')\n" " self.axes: %s\n" " topo_array.shape: %s (should be in self.axes' order)") if self.mask is not None: m = topo_array.shape[0] mask_idx = np.where(self.mask.transpose( [self.axes.index(ax) - 1 for ax in ("c", 0, 1)]).flatten() == 1)[0].tolist() design_matrix = np.zeros((m, len(mask_idx)), dtype=topo_array.dtype) for i in range(m): topo_array_c01 = topo_array[i].transpose([self.axes.index(ax) - 1 for ax in ("c", 0, 1)]) design_matrix[i] = topo_array_c01.flatten()[mask_idx] else: topo_array_bc01 = topo_array.transpose([self.axes.index(ax) for ax in ("b", "c", 0, 1)]) design_matrix = topo_array_bc01.reshape((topo_array.shape[0], np.prod(topo_array.shape[1:]))) return design_matrix
def __call__(self, *batches): """ .. todo:: WRITEME """ for batch in batches: if not isinstance(batch, list): raise TypeError("Expected each argument to be a list," " but one argument is " + str(batch) + " of type " + str(type(batch))) total_examples = np.cast[config.floatX](sum( [batch[0].shape[0] for batch in batches])) if self.has_updates: self._clear() augmented = self._true_inputs(batches[0]) + [total_examples] self._set_shared(batches[0]) rval = self._func(*augmented) for batch in batches[1:]: augmented = self._true_inputs(batch) + [total_examples] self._set_shared(batch) # This works if there is no output, # because the output is an empty list cur_out = self._func(*augmented) rval = [x + y for x, y in safe_zip(rval, cur_out)] if len(rval) == 1: return rval[0] return rval
def get_gradients(self, model, data, ** kwargs): indiv_results = [] composite_specs, mapping = self.get_composite_specs_and_mapping(model) nested_data = mapping.nest(data) for cost, cost_data in safe_zip(self.costs, nested_data): result = cost.get_gradients(model, cost_data, ** kwargs) indiv_results.append(result) grads = OrderedDict() updates = OrderedDict() params = model.get_params() for coeff, packed in zip(self.coeffs, indiv_results): g, u = packed for param in g: if param not in params: raise ValueError("A shared variable (" + str(param) + ") that is not a parameter appeared " "a cost gradient dictionary.") for param in g: assert param.ndim == g[param].ndim v = coeff * g[param] if param not in grads: grads[param] = v else: grads[param] = grads[param] + v assert grads[param].ndim == param.ndim assert not any([state in updates for state in u]) assert not any([state in params for state in u]) updates.update(u) return grads, updates
def _get_standard_neg(self, model, layer_to_chains): """ .. todo:: WRITEME """ params = list(model.get_params()) warnings.warn("""TODO: reduce variance of negative phase by integrating out the even-numbered layers. The Rao-Blackwellize method can do this for you when expected gradient = gradient of expectation, but doing this in general is trickier.""") #layer_to_chains = model.rao_blackwellize(layer_to_chains) expected_energy_p = model.energy( layer_to_chains[model.visible_layer], [layer_to_chains[layer] for layer in model.hidden_layers]).mean() samples = flatten(layer_to_chains.values()) for i, sample in enumerate(samples): if sample.name is None: sample.name = 'sample_' + str(i) neg_phase_grads = OrderedDict( safe_zip( params, T.grad(-expected_energy_p, params, consider_constant=samples, disconnected_inputs='ignore'))) return neg_phase_grads
def make_layer_to_symbolic_state(self, num_examples, rng=None): """ .. todo:: Explain the difference with `make_layer_to_state` Makes and returns a dictionary mapping layers to states. By states, we mean here a real assignment, not a mean field state. For example, for a layer containing binary random variables, the state will be a shared variable containing values in {0,1}, not [0,1]. The visible layer will be included. Uses a dictionary so it is easy to unambiguously index a layer without needing to remember rules like vis layer = 0, hiddens start at 1, etc. Parameters ---------- num_examples : int WRITEME rng : WRITEME """ # Make a list of all layers layers = [self.visible_layer] + self.hidden_layers assert rng is not None states = [layer.make_symbolic_state(num_examples, rng) for layer in layers] zipped = safe_zip(layers, states) rval = OrderedDict(zipped) return rval
def compile_f_step(): prev = T.matrices(self.nlayers) if clamped: _initial = T.matrices(len(indices)) _clamps = T.matrices(len(indices)) z = self._update(copy.copy(prev), clamped=safe_zip(indices, _initial, _clamps), return_activations=True) f = theano.function(prev + _initial + _clamps, z, on_unused_input='ignore', allow_input_downcast=True) else: z = self._update(copy.copy(prev), return_activations=True) f = theano.function(prev, z, on_unused_input='ignore', allow_input_downcast=True) def wrapped(*args): data = f(*args) length = len(data) / 2 return data[:length], data[length:] return wrapped
def __call__(self, model, X, Y=None, return_locals=False, **kwargs): """ If returns locals is True, returns (objective, locals()) Note that this means adding / removing / changing the value of local variables is an interface change. In particular, TorontoSparsity depends on "terms" and "H_hat" """ assert (Y is None) == (not self.supervised) H_hat = model.mf(X, Y=Y) terms = [] hidden_layers = model.hidden_layers #if self.supervised: # hidden_layers = hidden_layers[:-1] for layer, mf_state, targets, coeffs in \ safe_zip(hidden_layers, H_hat, self.targets, self.coeffs): try: cost = layer.get_l2_act_cost(mf_state, targets, coeffs) except NotImplementedError: assert isinstance(coeffs, float) and coeffs == 0. cost = 0. terms.append(cost) objective = sum(terms) if return_locals: return objective, locals() return objective
def get_expected_warning(from_space, from_batch, to_space): # composite -> composite if isinstance(from_space, CompositeSpace) and \ isinstance(to_space, CompositeSpace): for fs, fb, ts in safe_zip(from_space.components, from_batch, to_space.components): warning, message = get_expected_warning(fs, fb, ts) if warning is not None: return warning, message return None, None # composite -> simple if isinstance(from_space, CompositeSpace): for fs, fb in safe_zip(from_space.components, from_batch): warning, message = get_expected_warning(fs, fb, to_space) if warning is not None: return warning, message return None, None # simple -> composite if isinstance(to_space, CompositeSpace): if isinstance(from_space, VectorSpace) and \ isinstance(from_batch, theano.sparse.SparseVariable): assert from_space.sparse return (UserWarning, 'Formatting from a sparse VectorSpace to a ' 'CompositeSpace is currently (2 Jan 2014) a ' 'non-differentiable action. This is because it ' 'calls slicing operations on a sparse batch ' '(e.g. "my_matrix[r:R, c:C]", which Theano does ' 'not yet have a gradient operator for. If ' 'autodifferentiation is reporting an error, ' 'this may be why.') for ts in to_space.components: warning, message = get_expected_warning( from_space, from_batch, ts) if warning is not None: return warning, message return None, None # simple -> simple return None, None
def make_layer_to_state(self, num_examples, rng=None): """ Makes and returns a dictionary mapping layers to states. By states, we mean here a real assignment, not a mean field state. For example, for a layer containing binary random variables, the state will be a shared variable containing values in {0,1}, not [0,1]. The visible layer will be included. Uses a dictionary so it is easy to unambiguously index a layer without needing to remember rules like vis layer = 0, hiddens start at 1, etc. Parameters ---------- num_examples : int Number of examples to make up the state rng : MRG_RandomStreams Random number generator, if None then use model's rng """ # Make a list of all layers layers = [self.visible_layer] + self.hidden_layers if rng is None: rng = self.rng states = [layer.make_state(num_examples, rng) for layer in layers] def recurse_check(layer, state): if isinstance(state, (list, tuple)): for elem in state: recurse_check(layer, elem) else: val = state.get_value() m = val.shape[0] if m != num_examples: raise ValueError(layer.layer_name + " gave state with " + str(m) + " examples in some component." "We requested " + str(num_examples)) for layer, state in safe_zip(layers, states): recurse_check(layer, state) rval = OrderedDict(safe_zip(layers, states)) return rval
def on_monitor(self, model, dataset, algorithm): """ .. todo:: WRITEME """ monitor = model.monitor if self.first: self.first = False self.monitor_channel = sharedX(algorithm.scale_step) # TODO: make monitor accept channels not associated with any # dataset, # so this hack won't be necessary hack = monitor.channels.values()[0] monitor.add_channel('scale_step', hack.graph_input, self.monitor_channel, dataset=hack.dataset) channel = monitor.channels[self.channel] v = channel.val_record if len(v) == 1: return latest = v[-1] logger.info("Latest {0}: {1}".format(self.channel, latest)) # Only compare to the previous step, not the best step so far # Another extension can be in charge of saving the best parameters ever # seen.We want to keep learning as long as we're making progress. We # don't want to give up on a step size just because it failed to undo # the damage of the bigger one that preceded it in a single epoch logger.info("Previous is {0}".format(self.prev)) cur = algorithm.scale_step if latest >= self.prev: logger.info("Looks like using {0} " "isn't working out so great for us.".format(cur)) cur *= self.scale if cur < self.giveup_after: logger.info("Guess we just have to give up.") self.continue_learning = False cur = self.giveup_after logger.info("Let's see how {0} does.".format(cur)) logger.info("Reloading saved params from last call") for p, v in safe_zip(model.get_params(), self.stored_values): p.set_value(v) latest = self.prev elif latest <= self.prev and self.scale_up != 1.: logger.info("Looks like we're making progress " "on the validation set, let's try speeding up") cur *= self.scale_up if cur > self.max_scale: cur = self.max_scale logger.info("New scale is {0}".format(cur)) algorithm.scale_step = cur self.monitor_channel.set_value(np.cast[config.floatX](cur)) self.prev = latest self.stored_values = [ param.get_value() for param in model.get_params() ]
def load_model(model_paths, costs, batch_size=100): if type(costs) is not list: costs = len(model_paths) * [costs] model = {} model['layers'] = [] model['costs'] = [] model['comparative_costs'] = [] model['weights'] = [] model['encoders'] = [] model['decoders'] = [] for i, path in enumerate(model_paths): if os.path.isfile(path): model['layers'].append(serial.load(path)) I = model['layers'][i].get_input_space().make_theano_batch( batch_size=batch_size) E = model['layers'][i].encode(I) model['encoders'].append(theano.function([I], E)) H = model['layers'][i].get_output_space().make_theano_batch( batch_size=batch_size) D = model['layers'][i].decode(H) model['decoders'].append(theano.function([H], D)) model['weights'].append(model['layers'][i].get_weights()) data_specs = costs[i].get_data_specs(model['layers'][i]) mapping = DataSpecsMapping(data_specs) space_tuple = mapping.flatten(data_specs[0], return_tuple=True) source_tuple = mapping.flatten(data_specs[1], return_tuple=True) # Build a flat tuple of Theano Variables, one for each space. # We want that so that if the same space/source is specified # more than once in data_specs, only one Theano Variable # is generated for it, and the corresponding value is passed # only once to the compiled Theano function. theano_args = [] for space, source in safe_zip(space_tuple, source_tuple): arg = space.make_theano_batch(batch_size=batch_size) theano_args.append(arg) theano_args = tuple(theano_args) # Methods of `self.cost` need args to be passed in a format compatible # with data_specs nested_args = mapping.nest(theano_args) fixed_var_descr = costs[i].get_fixed_var_descr( model['layers'][i], nested_args) model['costs'].append( theano.function([nested_args], costs[i].expr(model['layers'][i], nested_args, **fixed_var_descr.fixed_vars))) I2 = model['layers'][i].get_input_space().make_theano_batch( batch_size=batch_size) model['comparative_costs'].append( theano.function([I, I2], costs[i].costs[0].cost(I, I2))) else: sys.exit("Whoa. " + path + " isn't a thing I know about!") return model
def _set_shared(self, inputs): """ .. todo:: WRITEME """ for elem, mask, shared in safe_zip(inputs, self._shared_mask, self._shared): if mask: shared.set_value(elem)
def test_image_dtype(self): expected_dtypes = ('uint8', 'float32') norbs = (NORB(which_set='train', which_norb='small'), NORB(which_set='train', which_norb='small', image_dtype='float32')) for norb, expected_dtype in safe_zip(norbs, expected_dtypes): assert str(norb.X.dtype) == expected_dtype
def iterator(self, mode=None, batch_size=None, num_batches=None, topo=None, targets=None, rng=None, data_specs=None, return_tuple=False): """ method inherited from Dataset """ self.mode = mode self.batch_size = batch_size self._targets = targets self._return_tuple = return_tuple if data_specs is None: data_specs = self._iter_data_specs # If there is a view_converter, we have to use it to convert # the stored data for "features" into one that the iterator # can return. # if self.conv_fn = lambda x: x.todense() space, source = data_specs if isinstance(space, CompositeSpace): sub_spaces = space.components sub_sources = source else: sub_spaces = (space, ) sub_sources = (source, ) convert = [] for sp, src in safe_zip(sub_spaces, sub_sources): if src == 'features' or 'targets': conv_fn = self.conv_fn else: conv_fn = None convert.append(conv_fn) if mode is None: if hasattr(self, '_iter_subset_class'): mode = self._iter_subset_class else: raise ValueError('iteration mode not provided and no default ' 'mode set for %s' % str(self)) else: mode = resolve_iterator_class(mode) return FiniteDatasetIterator(self, mode(self.X.shape[0], batch_size, num_batches, rng), data_specs=data_specs, return_tuple=return_tuple, convert=convert)
def after_step(self, model): """ .. todo:: WRITEME """ if self.scale_step != 1: for param, value in safe_zip(self.params, self.value): value = (1.-self.scale_step) * value + self.scale_step * param.get_value() param.set_value(value)