def test_masked_fprop(): # Construct a dirt-simple linear network with identity weights. mlp = MLP(nvis=2, layers=[Linear(2, 'h0', irange=0), Linear(2, 'h1', irange=0)]) mlp.layers[0].set_weights(np.eye(2, dtype=mlp.get_weights().dtype)) mlp.layers[1].set_weights(np.eye(2, dtype=mlp.get_weights().dtype)) mlp.layers[0].set_biases(np.arange(1, 3, dtype=mlp.get_weights().dtype)) mlp.layers[1].set_biases(np.arange(3, 5, dtype=mlp.get_weights().dtype)) # Verify that get_total_input_dimension works. np.testing.assert_equal(mlp.get_total_input_dimension(['h0', 'h1']), 4) inp = theano.tensor.matrix() # Accumulate the sum of output of all masked networks. l = [] for mask in xrange(16): l.append(mlp.masked_fprop(inp, mask)) outsum = reduce(lambda x, y: x + y, l) f = theano.function([inp], outsum, allow_input_downcast=True) np.testing.assert_equal(f([[5, 3]]), [[144., 144.]]) np.testing.assert_equal(f([[2, 7]]), [[96., 208.]]) np.testing.assert_raises(ValueError, mlp.masked_fprop, inp, 22) np.testing.assert_raises(ValueError, mlp.masked_fprop, inp, 2, ['h3']) np.testing.assert_raises(ValueError, mlp.masked_fprop, inp, 2, None, 2., {'h3': 4})
def expr(self, model, data, ** kwargs): """ Returns the sum of the costs the SumOfCosts instance was given at initialization. Parameters ---------- model : pylearn2.models.model.Model the model for which we want to calculate the sum of costs data : flat tuple of tensor_like variables. data has to follow the format defined by self.get_data_specs(), but this format will always be a flat tuple. """ self.get_data_specs(model)[0].validate(data) composite_specs, mapping = self.get_composite_specs_and_mapping(model) nested_data = mapping.nest(data) costs = [] for cost, cost_data in safe_zip(self.costs, nested_data): costs.append(cost.expr(model, cost_data, **kwargs)) assert len(costs) > 0 if any([cost is None for cost in costs]): sum_of_costs = None else: costs = [coeff * cost for coeff, cost in safe_zip(self.coeffs, costs)] assert len(costs) > 0 sum_of_costs = reduce(lambda x, y: x + y, costs) return sum_of_costs
def expr(self, model, data, **kwargs): """ Returns the sum of the costs the SumOfCosts instance was given at initialization. Parameters ---------- model : pylearn2.models.model.Model the model for which we want to calculate the sum of costs data : flat tuple of tensor_like variables. data has to follow the format defined by self.get_data_specs(), but this format will always be a flat tuple. """ self.get_data_specs(model)[0].validate(data) composite_specs, mapping = self.get_composite_specs_and_mapping(model) nested_data = mapping.nest(data) costs = [] for cost, cost_data in safe_zip(self.costs, nested_data): costs.append(cost.expr(model, cost_data, **kwargs)) assert len(costs) > 0 if any([cost is None for cost in costs]): sum_of_costs = None else: costs = [ coeff * cost for coeff, cost in safe_zip(self.coeffs, costs) ] assert len(costs) > 0 sum_of_costs = reduce(lambda x, y: x + y, costs) return sum_of_costs
def get_params(self): """ .. todo:: WRITEME """ return reduce(operator.add, [ae.get_params() for ae in self.autoencoders])
def expected_energy(self, V, mf_hidden): """ Compute the energy of current model with the visible samples and variational parameters. Parameters ---------- V : tensor_like Theano batch of visible unit observations (must be SAMPLES, not mean field parameters: the random variables in the expectation are the hiddens only) mf_hidden : list List, one element per hidden layer, of batches of variational parameters (must be VARIATIONAL PARAMETERS, not samples. Layers with analytically determined variance parameters for their mean field parameters will use those to integrate over the variational distribution, so it's not generally the same thing as measuring the energy at a point.) Returns ------- rval : tensor_like Vector containing the expected energy of each example under the corresponding variational distribution. """ self.visible_layer.space.validate(V) assert isinstance(mf_hidden, (list, tuple)) assert len(mf_hidden) == len(self.hidden_layers) terms = [] terms.append(self.visible_layer.expected_energy_term(state=V, average=False)) # This condition could be relaxed, but current code assumes it assert len(self.hidden_layers) > 0 terms.append(self.hidden_layers[0].expected_energy_term( state_below=self.visible_layer.upward_state(V), average_below=False, state=mf_hidden[0], average=True)) for i in xrange(1, len(self.hidden_layers)): layer = self.hidden_layers[i] layer_below = self.hidden_layers[i-1] mf_below = mf_hidden[i-1] mf_below = layer_below.upward_state(mf_below) mf = mf_hidden[i] terms.append(layer.expected_energy_term(state_below=mf_below, state=mf, average_below=True, average=True)) assert len(terms) > 0 rval = reduce(operator.add, terms) assert rval.ndim == 1 return rval
def energy(self, V, hidden): """ .. todo:: WRITEME Parameters ---------- V : tensor_like Theano batch of visible unit observations (must be SAMPLES, not mean field parameters) hidden : list List, one element per hidden layer, of batches of samples (must be SAMPLES, not mean field parameters) Returns ------- rval : tensor_like Vector containing the energy of each sample Notes ----- Applying this function to non-sample theano variables is not guaranteed to give you an expected energy in general, so don't use this that way. """ terms = [] terms.append(self.visible_layer.expected_energy_term(state=V, average=False)) # This condition could be relaxed, but current code assumes it assert len(self.hidden_layers) > 0 terms.append(self.hidden_layers[0].expected_energy_term( state_below=self.visible_layer.upward_state(V), state=hidden[0], average_below=False, average=False)) for i in xrange(1, len(self.hidden_layers)): layer = self.hidden_layers[i] samples_below = hidden[i-1] layer_below = self.hidden_layers[i-1] samples_below = layer_below.upward_state(samples_below) samples = hidden[i] terms.append(layer.expected_energy_term(state_below=samples_below, state=samples, average_below=False, average=False)) assert len(terms) > 0 rval = reduce(operator.add, terms) assert rval.ndim == 1 return rval
def energy(self, V, hidden): """ Compute the energy of current model with visible and hidden samples. Parameters ---------- V : tensor_like Theano batch of visible unit observations (must be SAMPLES, not mean field parameters) hidden : list List, one element per hidden layer, of batches of samples (must be SAMPLES, not mean field parameters) Returns ------- rval : tensor_like Vector containing the energy of each sample Notes ----- Applying this function to non-sample theano variables is not guaranteed to give you an expected energy in general, so don't use this that way. """ terms = [] terms.append(self.visible_layer.expected_energy_term(state=V, average=False)) # This condition could be relaxed, but current code assumes it assert len(self.hidden_layers) > 0 terms.append( self.hidden_layers[0].expected_energy_term( state_below=self.visible_layer.upward_state(V), state=hidden[0], average_below=False, average=False ) ) for i in xrange(1, len(self.hidden_layers)): layer = self.hidden_layers[i] samples_below = hidden[i - 1] layer_below = self.hidden_layers[i - 1] samples_below = layer_below.upward_state(samples_below) samples = hidden[i] terms.append( layer.expected_energy_term(state_below=samples_below, state=samples, average_below=False, average=False) ) assert len(terms) > 0 rval = reduce(operator.add, terms) assert rval.ndim == 1 return rval
def expr(self, model, data, **kwargs): """Returns a theano expression for the cost function. Parameters ---------- model : MLP data : tuple Should be a valid occupant of CompositeSpace(model.get_input_space(), model.get_output_space()) Returns ------- total_cost : theano.gof.Variable coeff * sum(abs(weights)) added up for each set of weights. """ assert T.scalar() != 0. # make sure theano semantics do what I want self.get_data_specs(model)[0].validate(data) if isinstance(self.coeffs, list): warnings.warn("Coefficients should be given as a dictionary " "with layer names as key. The support of " "coefficients as list would be deprecated " "from 03/06/2015") layer_costs = [ layer.get_l1_weight_decay(coeff) for layer, coeff in safe_izip(model.layers, self.coeffs) ] layer_costs = [cost for cost in layer_costs if cost != 0.] else: layer_costs = [] for layer in model.layers: layer_name = layer.layer_name if layer_name in self.coeffs: cost = layer.get_l1_weight_decay(self.coeffs[layer_name]) if cost != 0.: layer_costs.append(cost) if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_l1_penalty' return rval else: total_cost = reduce(operator.add, layer_costs) total_cost.name = 'MLP_L1Penalty' assert total_cost.ndim == 0 total_cost.name = 'l1_penalty' return total_cost
def expr(self, model, data, **kwargs): """Returns a theano expression for the cost function. Parameters ---------- model : MLP data : tuple Should be a valid occupant of CompositeSpace(model.get_input_space(), model.get_output_space()) Returns ------- total_cost : theano.gof.Variable coeff * sum(sqr(weights)) added up for each set of weights. """ self.get_data_specs(model)[0].validate(data) def wrapped_layer_cost(layer, coef): try: return layer.get_weight_decay(coeff) except NotImplementedError: if coef == 0.: return 0. else: reraise_as( NotImplementedError( str(type(layer)) + " does not implement get_weight_decay.")) layer_costs = [ wrapped_layer_cost(layer, coeff) for layer, coeff in safe_izip(model.layers, self.coeffs) ] assert T.scalar() != 0. # make sure theano semantics do what I want layer_costs = [cost for cost in layer_costs if cost != 0.] if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_weight_decay' return rval else: total_cost = reduce(operator.add, layer_costs) total_cost.name = 'MLP_WeightDecay' assert total_cost.ndim == 0 total_cost.name = 'weight_decay' return total_cost
def expr(self, model, data, ** kwargs): """Returns a theano expression for the cost function. Parameters ---------- model : MLP data : tuple Should be a valid occupant of CompositeSpace(model.get_input_space(), model.get_output_space()) Returns ------- total_cost : theano.gof.Variable coeff * sum(abs(weights)) added up for each set of weights. """ assert T.scalar() != 0. # make sure theano semantics do what I want self.get_data_specs(model)[0].validate(data) if isinstance(self.coeffs, list): warnings.warn("Coefficients should be given as a dictionary " "with layer names as key. The support of " "coefficients as list would be deprecated " "from 03/06/2015") layer_costs = [layer.get_l1_weight_decay(coeff) for layer, coeff in safe_izip(model.layers, self.coeffs)] layer_costs = [cost for cost in layer_costs if cost != 0.] else: layer_costs = [] for layer in model.layers: layer_name = layer.layer_name if layer_name in self.coeffs: cost = layer.get_l1_weight_decay(self.coeffs[layer_name]) if cost != 0.: layer_costs.append(cost) if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_l1_penalty' return rval else: total_cost = reduce(operator.add, layer_costs) total_cost.name = 'MLP_L1Penalty' assert total_cost.ndim == 0 total_cost.name = 'l1_penalty' return total_cost
def expr(self, model, data, ** kwargs): """Returns a theano expression for the cost function. Parameters ---------- model : MLP data : tuple Should be a valid occupant of CompositeSpace(model.get_input_space(), model.get_output_space()) Returns ------- total_cost : theano.gof.Variable coeff * sum(sqr(weights)) added up for each set of weights. """ self.get_data_specs(model)[0].validate(data) def wrapped_layer_cost(layer, coef): try: return layer.get_weight_decay(coeff) except NotImplementedError: if coef == 0.: return 0. else: reraise_as(NotImplementedError(str(type(layer)) + " does not implement get_weight_decay.")) layer_costs = [wrapped_layer_cost(layer, coeff) for layer, coeff in safe_izip(model.layers, self.coeffs)] assert T.scalar() != 0. # make sure theano semantics do what I want layer_costs = [cost for cost in layer_costs if cost != 0.] if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_weight_decay' return rval else: total_cost = reduce(operator.add, layer_costs) total_cost.name = 'MLP_WeightDecay' assert total_cost.ndim == 0 total_cost.name = 'weight_decay' return total_cost
def restrict_instances(self, instances): """ .. todo:: WRITEME """ mask = reduce(np.maximum, [self.instance == ins for ins in instances]) mask = mask.astype('bool') self.instance = self.instance[mask] self.X = self.X[mask, :] if self.y.ndim == 2: self.y = self.y[mask, :] else: self.y = self.y[mask] assert self.X.shape[0] == self.y.shape[0] expected = sum([(self.instance == ins).sum() for ins in instances]) assert self.X.shape[0] == expected
def expr(self, model, data, **kwargs): """Returns a theano expression for the cost function. Parameters ---------- model : MLP data : tuple Should be a valid occupant of CompositeSpace(model.get_input_space(), model.get_output_space()) Returns ------- total_cost : theano.gof.Variable coeff * sum(abs(weights)) added up for each set of weights. """ self.get_data_specs(model)[0].validate(data) layer_costs = [ layer.get_l1_weight_decay(coeff) for layer, coeff in safe_izip(model.layers, self.coeffs) ] assert T.scalar() != 0. # make sure theano semantics do what I want layer_costs = [cost for cost in layer_costs if cost != 0.] if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_l1_penalty' return rval else: total_cost = reduce(operator.add, layer_costs) total_cost.name = 'MLP_L1Penalty' assert total_cost.ndim == 0 total_cost.name = 'l1_penalty' return total_cost
def get_fixed_var_descr(self, model, data): """ .. todo:: WRITEME Parameters ---------- model : Model data : theano.gof.Variable or tuple A valid member of the Space defined by self.get_data_specs(model)[0] """ data_specs = self.get_data_specs(model) data_specs[0].validate(data) composite_specs, mapping = self.get_composite_specs_and_mapping(model) nested_data = mapping.nest(data) descrs = [cost.get_fixed_var_descr(model, cost_data) for cost, cost_data in safe_zip(self.costs, nested_data)] return reduce(merge, descrs)
def expr(self, model, data, ** kwargs): """Returns a theano expression for the cost function. Parameters ---------- model : MLP data : tuple Should be a valid occupant of CompositeSpace(model.get_input_space(), model.get_output_space()) Returns ------- total_cost : theano.gof.Variable coeff * sum(abs(weights)) added up for each set of weights. """ self.get_data_specs(model)[0].validate(data) layer_costs = [layer.get_l1_weight_decay(coeff) for layer, coeff in safe_izip(model.layers, self.coeffs)] assert T.scalar() != 0. # make sure theano semantics do what I want layer_costs = [cost for cost in layer_costs if cost != 0.] if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_l1_penalty' return rval else: total_cost = reduce(operator.add, layer_costs) total_cost.name = 'MLP_L1Penalty' assert total_cost.ndim == 0 total_cost.name = 'l1_penalty' return total_cost
def energy(self, V, hidden): """ Compute the energy of current model with visible and hidden samples. Parameters ---------- V : tensor_like Theano batch of visible unit observations (must be SAMPLES, not mean field parameters) hidden : list List, one element per hidden layer, of batches of samples (must be SAMPLES, not mean field parameters) Returns ------- rval : tensor_like Vector containing the energy of each sample Notes ----- Applying this function to non-sample theano variables is not guaranteed to give you an expected energy in general, so don't use this that way. """ terms = [] terms.append(self.visible_layer.expected_energy_term(state=V, average=False)) # This condition could be relaxed, but current code assumes it assert len(self.hidden_layers) > 0 """ Here it doesn't matter whether to recalculate the D base on the samples from visible layer or to use the initial D calculated from the raw data, because when we do the sampling on visible layer we have included the information of D into the process. Therefore, Ds are guaranteed to be the same. """ D = None if type(self.visible_layer) is ReplicatedSoftMaxLayer: state_below,D = self.visible_layer.upward_state(V, D_is_initialized = True) else: state_below=self.visible_layer.upward_state(V) terms.append(self.hidden_layers[0].expected_energy_term( state_below=state_below, state=hidden[0], average_below=False, average=False,D = D)) for i in xrange(1, len(self.hidden_layers)): layer = self.hidden_layers[i] samples_below = hidden[i-1] layer_below = self.hidden_layers[i-1] samples_below = layer_below.upward_state(samples_below) samples = hidden[i] terms.append(layer.expected_energy_term(state_below=samples_below, state=samples, average_below=False, average=False)) assert len(terms) > 0 rval = reduce(operator.add, terms) assert rval.ndim == 1 return rval
def expr(self, model, data, **kwargs): """Returns a theano expression for the cost function. Parameters ---------- model : MLP data : tuple Should be a valid occupant of CompositeSpace(model.get_input_space(), model.get_output_space()) Returns ------- total_cost : theano.gof.Variable coeff * sum(sqr(weights)) added up for each set of weights. """ self.get_data_specs(model)[0].validate(data) assert T.scalar() != 0. # make sure theano semantics do what I want def wrapped_layer_cost(layer, coeff): try: return layer.get_weight_decay(coeff) except NotImplementedError: if coeff == 0.: return 0. else: reraise_as( NotImplementedError( str(type(layer)) + " does not implement " "get_weight_decay.")) if isinstance(self.coeffs, list): warnings.warn("Coefficients should be given as a dictionary " "with layer names as key. The support of " "coefficients as list would be deprecated from " "03/06/2015") layer_costs = [ wrapped_layer_cost(layer, coeff) for layer, coeff in safe_izip(model.layers, self.coeffs) ] layer_costs = [cost for cost in layer_costs if cost != 0.] else: layer_costs = [] for layer in model.layers: layer_name = layer.layer_name if layer_name in self.coeffs: cost = wrapped_layer_cost(layer, self.coeffs[layer_name]) if cost != 0.: layer_costs.append(cost) if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_weight_decay' return rval else: total_cost = reduce(operator.add, layer_costs) total_cost.name = 'MLP_WeightDecay' assert total_cost.ndim == 0 total_cost.name = 'weight_decay' return total_cost
def format(self, targets, mode='stack', sparse=False): """ Formats a given array of target labels into a one-hot vector. If labels appear multiple times, their value in the one-hot vector is incremented. Parameters ---------- targets : ndarray A 1D array of targets, or a batch (2D array) where each row is a list of targets. mode : string The way in which to convert the labels to arrays. Takes three different options: - "concatenate" : concatenates the one-hot vectors from multiple labels - "stack" : returns a matrix where each row is the one-hot vector of a label - "merge" : merges the one-hot vectors together to form a vector where the elements are the result of an indicator function NB: As the result of an indicator function the result is the same in case a label is duplicated in the input. sparse : bool If true then the return value is sparse matrix. Note that if sparse is True, then mode cannot be 'stack' because sparse matrices need to be 2D Returns ------- one_hot : a NumPy array (can be 1D-3D depending on settings) where normally the first axis are the different batch items, the second axis the labels, the third axis the one_hot vectors. Can be dense or sparse. """ if mode not in ('concatenate', 'stack', 'merge'): raise ValueError("%s got bad mode argument '%s'" % (self.__class__.__name__, str(self._max_labels))) elif mode == 'stack' and sparse: raise ValueError("Sparse matrices need to be 2D, hence they" "cannot be stacked") if targets.ndim > 2: raise ValueError("Targets needs to be 1D or 2D, but received %d " "dimensions" % targets.ndim) if 'int' not in str(targets.dtype): raise TypeError("need an integer array for targets") if sparse: if not scipy_available: raise RuntimeError("The converting of indices to a sparse " "one-hot vector requires scipy to be " "installed") if mode == 'concatenate': one_hot = scipy.sparse.csr_matrix( (np.ones(targets.size, dtype=self._dtype), (targets.flatten() + np.arange(targets.size) * self._max_labels) % (self._max_labels * targets.shape[1]), np.arange(targets.shape[0] + 1) * targets.shape[1]), (targets.shape[0], self._max_labels * targets.shape[1]) ) elif mode == 'merge': one_hot = scipy.sparse.csr_matrix( (np.ones(targets.size), targets.flatten(), np.arange(targets.shape[0] + 1) * targets.shape[1]), (targets.shape[0], self._max_labels) ) else: one_hot = np.zeros(targets.shape + (self._max_labels,), dtype=self._dtype) shape = (np.prod(one_hot.shape[:-1]), one_hot.shape[-1]) one_hot.reshape(shape)[np.arange(shape[0]), targets.flatten()] = 1 if mode == 'concatenate': shape = one_hot.shape[-3:-2] + (reduce(mul, one_hot.shape[-2:], 1),) one_hot = one_hot.reshape(shape) elif mode == 'merge': one_hot = np.minimum(one_hot.sum(axis=one_hot.ndim - 2), 1) return one_hot
def dot(self, x): return reduce( lambda t,a:t.dot(a), self._linear_transformations, x)
def transpose_dot(self, x): return reduce( lambda t, a: t.transpose_dot(a), reversed(self._linear_transformations), x)
def params(self): return reduce( lambda t, a: a + t.params(), self._linear_transformations, [])
def expr(self, model, data, ** kwargs): """Returns a theano expression for the cost function. Parameters ---------- model : MLP data : tuple Should be a valid occupant of CompositeSpace(model.get_input_space(), model.get_output_space()) Returns ------- total_cost : theano.gof.Variable coeff * sum(sqr(weights)) added up for each set of weights. """ self.get_data_specs(model)[0].validate(data) assert T.scalar() != 0. # make sure theano semantics do what I want def wrapped_layer_cost(layer, coeff): try: return layer.get_weight_decay(coeff) except NotImplementedError: if coeff == 0.: return 0. else: reraise_as(NotImplementedError(str(type(layer)) + " does not implement " "get_weight_decay.")) if isinstance(self.coeffs, list): warnings.warn("Coefficients should be given as a dictionary " "with layer names as key. The support of " "coefficients as list would be deprecated from " "03/06/2015") layer_costs = [wrapped_layer_cost(layer, coeff) for layer, coeff in safe_izip(model.layers, self.coeffs)] layer_costs = [cost for cost in layer_costs if cost != 0.] else: layer_costs = [] for layer in model.layers: layer_name = layer.layer_name if layer_name in self.coeffs: cost = wrapped_layer_cost(layer, self.coeffs[layer_name]) if cost != 0.: layer_costs.append(cost) if len(layer_costs) == 0: rval = T.as_tensor_variable(0.) rval.name = '0_weight_decay' return rval else: total_cost = reduce(operator.add, layer_costs) total_cost.name = 'MLP_WeightDecay' assert total_cost.ndim == 0 total_cost.name = 'weight_decay' return total_cost
def dot(self, x): return reduce(lambda t, a: t.dot(a), self._linear_transformations, x)
def params(self): return reduce(lambda t, a: a + t.params(), self._linear_transformations, [])
def transpose_dot(self, x): return reduce(lambda t, a: t.transpose_dot(a), reversed(self._linear_transformations), x)