def _allocate(self): parameter_shape = [1 if broadcast else dim for dim, broadcast in zip(self.shape, self.broadcastable)] self.gamma = shared_floatx_nans(parameter_shape, name='gamma') add_role(self.gamma, WEIGHT) self.parameters.append(self.gamma) self.add_auxiliary_variable(self.gamma.norm(2), name='gamma_norm')
def _allocate(self): W = shared_floatx_nans((self.length, self.dim), name='W_lookup') self.parameters.append(W) add_role(W, WEIGHT) b = shared_floatx_nans((self.dim,), name='b_lookup') self.parameters.append(b) add_role(b, BIAS)
def setup_mainloop(extensions): """Create a MainLoop, register the given extension, supply it with a DataStream and a minimal model/cost to optimize. """ features = [numpy.array(f, dtype=floatX) for f in [[1, 2], [3, 4], [5, 6]]] dataset = IterableDataset(dict(features=features)) datastream = DataStream(dataset) W = shared_floatx([0, 0], name='W') add_role(W, PARAMETER) x = tensor.vector('features') cost = tensor.sum((x-W)**2) cost.name = "cost" algorithm = GradientDescent(cost=cost, parameters=[W], step_rule=Scale(1e-3)) main_loop = MainLoop( model=Model(cost), data_stream=datastream, algorithm=algorithm, extensions=[ FinishAfter(after_n_epochs=1), ] + extensions) return main_loop
def _allocate(self): self.parameters.append(shared_floatx_nans((self.dim, self.dim), name="W")) add_role(self.parameters[0], WEIGHT) self.parameters.append(shared_floatx_zeros((self.dim,), name="initial_state")) add_role(self.parameters[1], INITIAL_STATE)
def apply(self, input_): aggregate_axes = [0] + [1 + i for i, b in enumerate(self.broadcastable) if b] # NOTE: don't put batch_stats on self because apply may be # called multiple times batch_stats = dict( (stat, getattr(input_, stat)(axis=aggregate_axes, keepdims=True)) for stat in self.stats) for stat, role in self.roles.items(): graph.add_transform([batch_stats[stat]], graph.ConstantTransform( # adding zero to ensure it's a TensorType(float32, row) # just like the corresponding batch_stat, rather than a # CudaNdarray(float32, row). -__- 0 + T.patternbroadcast( self.population_stats[stat], [True] + self.broadcastable)), reason="population_normalization") # make the batch statistics identifiable to get_updates() below add_role(batch_stats[stat], self.roles[stat]) batch_stats[stat] = self.annotated_statistic(batch_stats[stat]) gamma = T.patternbroadcast(self.gamma, [True] + self.broadcastable) beta = T.patternbroadcast(self.beta, [True] + self.broadcastable) return theano.tensor.nnet.bn.batch_normalization( inputs=input_, gamma=gamma, beta=beta, mean=batch_stats["mean"], std=T.sqrt(batch_stats["var"] + self.epsilon))
def compute_step(self, parameter, previous_step): mean_square_step_tm1 = shared_floatx_zeros_matching( parameter, "mean_square_step_tm1") add_role(mean_square_step_tm1, ALGORITHM_BUFFER) mean_square_delta_x_tm1 = shared_floatx_zeros_matching( parameter, "mean_square_delta_x_tm1") add_role(mean_square_delta_x_tm1, ALGORITHM_BUFFER) mean_square_step_t = ( self.decay_rate * mean_square_step_tm1 + (1 - self.decay_rate) * tensor.sqr(previous_step) ) rms_delta_x_tm1 = tensor.sqrt(mean_square_delta_x_tm1 + self.epsilon) rms_step_t = tensor.sqrt(mean_square_step_t + self.epsilon) delta_x_t = rms_delta_x_tm1 / rms_step_t * previous_step mean_square_delta_x_t = ( self.decay_rate * mean_square_delta_x_tm1 + (1 - self.decay_rate) * tensor.sqr(delta_x_t) ) step = delta_x_t updates = [(mean_square_step_tm1, mean_square_step_t), (mean_square_delta_x_tm1, mean_square_delta_x_t)] return step, updates
def __init__(self, threshold, axis=None): axis = pack(axis) if axis is not None else () self.axis = set(axis) self.threshold = shared_floatx(threshold, "threshold") add_role(self.threshold, ALGORITHM_HYPERPARAMETER) if len(axis) != len(self.axis): raise ValueError("axis must be unique")
def _allocate(self): parameter_shape = [1 if broadcast else dim for dim, broadcast in zip(self.shape, self.broadcastable)] self.b = shared_floatx_nans(parameter_shape, name='b') add_role(self.b, BIAS) self.parameters.append(self.b) self.add_auxiliary_variable(self.b.norm(2), name='b_norm')
def __init__(self, decay_rate=0.95, epsilon=1e-6): if not 0.0 <= decay_rate <= 1.0: raise ValueError("decay rate needs to be in [0, 1]") self.decay_rate = shared_floatx(decay_rate, "decay_rate") add_role(self.decay_rate, ALGORITHM_HYPERPARAMETER) self.epsilon = shared_floatx(epsilon, "epsilon") add_role(self.epsilon, ALGORITHM_HYPERPARAMETER)
def annotate_bn(self, var, id, var_type, mb_size, size, norm_ax): var_shape = np.array((1,) + size) out_dim = np.prod(var_shape) / np.prod(var_shape[list(norm_ax)]) # Flatten the var - shared variable updating is not trivial otherwise, # as theano seems to believe a row vector is a matrix and will complain # about the updates orig_shape = var.shape var = var.flatten() # Here we add the name and role, the variables will later be identified # by these values var.name = id + '_%s_clean' % var_type add_role(var, BNPARAM) shared_var = self.shared(np.zeros(out_dim), name='shared_%s' % var.name, role=None) # Update running average estimates. When the counter is reset to 1, it # will clear its memory cntr, c_up = self.counter() one = np.float32(1) run_avg = lambda new, old: one / cntr * new + (one - one / cntr) * old if var_type == 'mean': new_value = run_avg(var, shared_var) elif var_type == 'var': mb_size = T.cast(mb_size, 'float32') new_value = run_avg(mb_size / (mb_size - one) * var, shared_var) else: raise NotImplemented('Unknown batch norm var %s' % var_type) # Add the counter update to the annotated update if it is the first # instance of a counter self.annotate_update([(shared_var, new_value)] + c_up, var) return var.reshape(orig_shape)
def _allocate(self): c_dim = self.get_dim('c') self.c_0 = shared_floatx_nans((c_dim,), name='c_0') add_role(self.c_0, PARAMETER) # add the theano shared variables to our parameter lists self.params.extend([ self.c_0 ]) return
def shared_param(init, name, cast_float32, role, **kwargs): if cast_float32: v = np.float32(init) p = theano.shared(v, name=name, **kwargs) if debug: p.tag.test_value = v add_role(p, role) return p
def __init__(self, decay_rate=0.9, max_scaling=1e5): if not 0.0 <= decay_rate <= 1.0: raise ValueError("decay rate needs to be in [0, 1]") if max_scaling <= 0: raise ValueError("max. scaling needs to be greater than 0") self.decay_rate = shared_floatx(decay_rate, "decay_rate") add_role(self.decay_rate, ALGORITHM_HYPERPARAMETER) self.epsilon = 1.0 / max_scaling
def __init__(self, learning_rate=0.002, beta1=0.1, beta2=0.001, epsilon=1e-8, decay_factor=(1 - 1e-8)): self.learning_rate = shared_floatx(learning_rate, "learning_rate") self.beta1 = shared_floatx(beta1, "beta1") self.beta2 = shared_floatx(beta2, "beta2") self.epsilon = shared_floatx(epsilon, "epsilon") self.decay_factor = shared_floatx(decay_factor, "decay_factor") for param in [self.learning_rate, self.beta1, self.beta2, self.epsilon, self.decay_factor]: add_role(param, ALGORITHM_HYPERPARAMETER)
def _allocate(self): self.parameters.append(shared_floatx_nans((self.dim, self.dim), name='state_to_state')) self.parameters.append(shared_floatx_nans((self.dim, 2 * self.dim), name='state_to_gates')) for i in range(2): if self.parameters[i]: add_role(self.parameters[i], WEIGHT)
def _allocate(self): W = shared_floatx_nans((self.input_dim, self.attention_dim), name='W') add_role(W, WEIGHT) self.parameters.append(W) self.add_auxiliary_variable(W.norm(2), name='W_norm') if self.use_bias: b = shared_floatx_nans((1, ), name='b') add_role(b, BIAS) self.parameters.append(b)
def compute_step(self, parameter, previous_step): mean_square_step_tm1 = shared_floatx_zeros_matching(parameter, "mean_square_step_tm1") add_role(mean_square_step_tm1, ALGORITHM_BUFFER) mean_square_step_t = self.decay_rate * mean_square_step_tm1 + (1 - self.decay_rate) * tensor.sqr(previous_step) add_role(mean_square_step_t, ALGORITHM_BUFFER) rms_step_t = tensor.maximum(tensor.sqrt(mean_square_step_t), self.epsilon) step = previous_step / rms_step_t updates = [(mean_square_step_tm1, mean_square_step_t)] return step, updates
def _allocate(self): parameter_shape = [ 1 if broadcast else dim for dim, broadcast in zip(self.shape, self.broadcastable) ] self.w = shared_floatx_nans(parameter_shape, name='w') add_role(self.w, WEIGHT) self.parameters.append(self.w) self.add_auxiliary_variable(self.w.norm(2), name='w_norm')
def __init__(self, eta=0, gamma=0.55, seed=180891): self.eta_sqrt = shared_floatx(sqrt(eta), "eta") add_role(self.eta_sqrt, ALGORITHM_HYPERPARAMETER) self.gamma_half = shared_floatx(gamma/2, "gamma") add_role(self.gamma_half, ALGORITHM_HYPERPARAMETER) self.theano_random = rng_mrg.MRG_RandomStreams(seed=seed)
def add_auxiliary_variable(self, variable, roles=None, name=None): """Attach an auxiliary variable to the graph. Auxiliary variables are Theano variables that are not part of a brick's output, but can be useful nonetheless e.g. as a regularizer or to monitor during training progress. Parameters ---------- variable : :class:`~tensor.TensorVariable` The variable you want to add. roles : list of :class:`.VariableRole` instances, optional The roles of this variable. The :const:`.AUXILIARY` role will automatically be added. Other options are :const:`.COST`, :const:`.WEIGHT`, etc. name : str, optional Name to give to the variable. If the variable already has a name it will be overwritten. Examples -------- >>> from blocks.bricks.base import application, Brick >>> from blocks.roles import COST >>> from blocks.utils import shared_floatx_nans >>> class Foo(Brick): ... def _allocate(self): ... W = shared_floatx_nans((10, 10)) ... self.add_auxiliary_variable(W.mean(), name='mean_W') ... @application ... def apply(self, x, application_call): ... application_call.add_auxiliary_variable( ... x - 1, name='x_minus_1') ... application_call.add_auxiliary_variable( ... x.mean(), roles=[COST], name='mean_x') ... return x + 1 >>> from theano import tensor >>> x = tensor.vector() >>> y = Foo().apply(x) >>> from blocks.filter import VariableFilter >>> cg = ComputationGraph([y]) >>> var_filter = VariableFilter(roles=[AUXILIARY]) >>> var_filter(cg.variables) # doctest: +SKIP {x_minus_1, mean_W, mean_x} >>> var_filter = VariableFilter(roles=[COST]) >>> var_filter(cg.variables) # doctest: +SKIP {mean_x} """ add_annotation(variable, self) if name is not None: variable.name = name variable.tag.name = name add_role(variable, AUXILIARY) if roles is not None: for role in roles: add_role(variable, role) self.auxiliary_variables.append(variable)
def _allocate(self): parameter_shape = [ 1 if broadcast else dim for dim, broadcast in zip(self.shape, self.broadcastable) ] self.beta = shared_floatx_nans(parameter_shape, name='beta') add_role(self.beta, BIAS) self.parameters.append(self.beta) self.add_auxiliary_variable(self.beta.norm(2), name='beta_norm')
def _allocate(self): self.parameters.append( shared_floatx_nans((self.dim, self.dim), name='state_to_state')) self.parameters.append( shared_floatx_nans((self.dim, 2 * self.dim), name='state_to_gates')) for i in range(2): if self.parameters[i]: add_role(self.parameters[i], WEIGHT)
def _create_intpic_histogram_for(param, pic_size, label_count): # The pic histogram is a 2d-array of pic_size. # For a 3d parameter, that ends up being a 5d tensor. # For a 1d parameter, that's a 3d tensor. shape = param.get_value().shape + (label_count,) + pic_size buf = shared_floatx_zeros(shape) buf.tag.for_parameter = param add_role(buf, INTPIC_STATISTICS) return buf
def _create_intpic_histogram_for(param, pic_size, label_count): # The pic histogram is a 2d-array of pic_size. # For a 3d parameter, that ends up being a 5d tensor. # For a 1d parameter, that's a 3d tensor. shape = param.get_value().shape + (label_count, ) + pic_size buf = shared_floatx_zeros(shape) buf.tag.for_parameter = param add_role(buf, INTPIC_STATISTICS) return buf
def _allocate(self): W = shared_floatx_nans((self.n_out, self.dwin * self.vector_size), name='W') b = shared_floatx_nans((self.n_out, ), name='b') add_role(b, BIAS) add_role(W, WEIGHT) self.parameters.append(W) self.parameters.append(b) self.mlp.allocate()
def __init__(self, input_, n_in, n_out, name='logisticRegression_rel', W=None, b=None, **kwargs): """ Initialize the parameters of the logistic regression :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) :type n_in: int :param n_in: number of input units, the dimension of the space in which the datapoints lie :type n_out: int :param n_out: number of output units, the dimension of the space in which the labels lie """ print '******************no MIML' super(LogisticRegression, self).__init__(**kwargs) if W == None: # initialize with 0 the weights W as a matrix of shape (n_in, n_out) W = theano.shared(value=numpy.zeros((n_in, n_out), dtype=theano.config.floatX), name='W') # else: # self.W = W if b == None: # initialize the baises b as a vector of n_out 0s b = theano.shared(value=numpy.zeros((n_out, ), dtype=theano.config.floatX), name='b') # else: # self.b = b add_role(W, WEIGHT) add_role(b, BIAS) self.parameters = [] self.parameters.append(W) self.parameters.append(b) self.add_auxiliary_variable(W.norm(2), name='W_norm') self.add_auxiliary_variable(b.norm(2), name='b_norm') self.allocated = True self.name = name self.p_y_given_x = T.nnet.softmax( T.dot(input_, self.parameters[0]) + self.parameters[1]) # compute prediction as class whose probability is maximal in # symbolic form self.y_pred = T.argmax(self.p_y_given_x, axis=1)
def copy_and_tag_noise(variable, brick, role, name): """Helper method to copy a variable and annotate it.""" copy = variable.copy() # Theano name copy.name = "{}_apply_{}".format(brick.name, name) add_annotation(copy, brick) # Blocks name copy.tag.name = name add_role(copy, role) return copy
def _allocate(self): if self.noise_batch_size is not None: if self.tied_noise: N = shared_floatx_zeros( (self.noise_batch_size, self.input_dim[0]), name='N') else: N = shared_floatx_zeros( (self.noise_batch_size,) + self.input_dim, name='N') add_role(N, NOISE) self.parameters.append(N)
def _allocate(self): W = shared_floatx_nans((self.input_dim, self.output_dim), name='W') add_role(W, WEIGHT) self.parameters.append(W) self.add_auxiliary_variable(W.norm(2), name='W_norm') if getattr(self, 'use_bias', True): b = shared_floatx_nans((self.output_dim, ), name='b') add_role(b, BIAS) self.parameters.append(b) self.add_auxiliary_variable(b.norm(2), name='b_norm')
def allocate_parameters(self): parameters = Parameters() for parameter in [ theano.shared(self.initial_gamma * ones(self.shape), name="gammas"), theano.shared(self.initial_beta * ones(self.shape), name="betas")]: add_role(parameter, PARAMETER) setattr(parameters, parameter.name, parameter) if self.name: parameter.name = "%s.%s" % (self.name, parameter.name) return parameters
def _allocate(self): if self.noise_batch_size is not None: if self.tied_noise: N = shared_floatx_zeros( (self.noise_batch_size, self.input_dim[0]), name='N') else: N = shared_floatx_zeros( (self.noise_batch_size, ) + self.input_dim, name='N') add_role(N, NOISE) self.parameters.append(N)
def test_replace_variable_is_auxiliary(): # Test if warning appears when variable is an AUXILIARY variable with warnings.catch_warnings(record=True) as w: x = tensor.scalar() y = x + 1 add_role(y, AUXILIARY) cg = ComputationGraph([y]) cg.replace([(y, 2 * y)]) assert len(w) == 1 assert "auxiliary" in str(w[-1].message)
def _allocate(self): W = shared_floatx_zeros((self.input_dim, self.output_dim), name='W') add_role(W, WEIGHTS) self.params.append(W) self.add_auxiliary_variable(W.norm(2), name='W_norm') if self.use_bias: b = shared_floatx_zeros((self.output_dim, ), name='b') add_role(b, BIASES) self.params.append(b) self.add_auxiliary_variable(b.norm(2), name='b_norm')
def _allocate(self): W = shared_floatx_nans((self.input_dim, self.output_dim), name='W') add_role(W, WEIGHT) self.parameters.append(W) self.add_auxiliary_variable(W.norm(2), name='W_norm') if self.use_bias: b = shared_floatx_nans((self.output_dim,), name='b') add_role(b, BIAS) self.parameters.append(b) self.add_auxiliary_variable(b.norm(2), name='b_norm')
def _initialize(self): self.layers_features, self.data_input = create_theano_expressions() cg = ComputationGraph(self.layers_features[self.layer_name]) i = 0 for v in cg.shared_variables: v.name = str(i) self.parameters.append(v) add_role(v, WEIGHT) i += 1
def _allocate(self): W = shared_floatx_nans((self.num_filters, self.input_dim, self.filter_length, 1), name='W') add_role(W, FILTER) self.params.append(W) if self.use_bias: b = shared_floatx_nans((self.num_filters, ), name='b') add_role(b, BIAS) self.params.append(b)
def _allocate(self): self.parameters.append(shared_floatx_nans((self.dim, self.dim), name='state_to_state')) self.parameters.append(shared_floatx_nans((self.dim, 2 * self.dim), name='state_to_gates')) self.parameters.append(shared_floatx_zeros((self.dim,), name="initial_state")) for i in range(2): if self.parameters[i]: add_role(self.parameters[i], WEIGHT) add_role(self.parameters[2], INITIAL_STATE)
def __init__(self, learning_rate=0.002, mu1=0.99, nu2=0.999, epsilon=1e-8, decay_prod=(1.)): self.learning_rate = shared_floatx(learning_rate, "learning_rate") self.mu1 = shared_floatx(mu1, "mu1") self.nu2 = shared_floatx(nu2, "nu2") self.epsilon = shared_floatx(epsilon, "epsilon") self.decay_prod = shared_floatx(decay_prod, "decay_prod") for param in [self.learning_rate, self.mu1, self.nu2, self.epsilon, self.decay_prod]: add_role(param, ALGORITHM_HYPERPARAMETER)
def cost(self, application_call, outputs, mask=None, **kwargs): # Compute the sum of costs costs = self.cost_matrix(outputs, mask=mask, **kwargs) cost = tensor.mean(costs.sum(axis=0)) add_role(cost, COST) # Add auxiliary variable for per sequence element cost application_call.add_auxiliary_variable( (costs.sum() / mask.sum()) if mask is not None else costs.mean(), name='per_sequence_element') return cost
def copy_and_tag(variable, role, name): """Helper method to copy a variable and annotate it.""" copy = variable.copy() # Theano name copy.name = _variable_name(brick.name, self.name, name) add_annotation(copy, brick) add_annotation(copy, call) # Blocks name copy.tag.name = name add_role(copy, role) return copy
def _allocate(self): W = shared_floatx_nans( (self.num_filters, self.num_channels) + self.filter_size, name='W') add_role(W, FILTER) self.params.append(W) self.add_auxiliary_variable(W.norm(2), name='W_norm') if self.use_bias: b = shared_floatx_nans(self.get_dim('output'), name='b') add_role(b, BIAS) self.params.append(b) self.add_auxiliary_variable(b.norm(2), name='b_norm')
def apply(self, *args, **kwargs): out = self._apply(*args, **kwargs) # ====== add roles ====== # tmp = out if not isinstance(tmp, (tuple, list)): tmp = [out] for o in tmp: add_role(o, OUTPUT) add_annotation(o, self) # return outputs return out
def _allocate(self): W = shared_floatx_nans((self.num_filters, self.num_channels) + self.filter_size, name='W') add_role(W, FILTERS) self.params.append(W) self.add_auxiliary_variable(W.norm(2), name='W_norm') if self.use_bias: b = shared_floatx_nans(self.get_dim('output'), name='b') add_role(b, BIASES) self.params.append(b) self.add_auxiliary_variable(b.norm(2), name='b_norm')
def _initialize(self): self.beta = shared_floatx_zeros((self.dim, ), name='beta') self.gamma = shared_floatx_zeros((self.dim, ), name='gamma') add_role(self.beta, PARAMETER) add_role(self.gamma, PARAMETER) self.parameters = [self.gamma, self.beta] self.beta_init.initialize(self.beta, self.rng) self.gamma_init.initialize(self.gamma, self.rng)
def allocate_parameters(self, args): if hasattr(self, "parameters"): return self.parameters self.parameters = Empty() h0 = theano.shared(zeros((args.num_hidden, )), name="h0") c0 = theano.shared(zeros((args.num_hidden, )), name="c0") if args.init == "id": Wa = theano.shared(np.concatenate([ np.eye(args.num_hidden), orthogonal((args.num_hidden, 3 * args.num_hidden)), ], axis=1).astype( theano.config.floatX), name="Wa") else: Wa = theano.shared(orthogonal( (args.num_hidden, 4 * args.num_hidden)), name="Wa") Wx = theano.shared(orthogonal((1, 4 * args.num_hidden)), name="Wx") a_gammas = theano.shared(args.initial_gamma * ones( (4 * args.num_hidden, )), name="a_gammas") b_gammas = theano.shared(args.initial_gamma * ones( (4 * args.num_hidden, )), name="b_gammas") ab_betas = theano.shared(args.initial_beta * ones( (4 * args.num_hidden, )), name="ab_betas") # forget gate bias initialization forget_biais = ab_betas.get_value() forget_biais[args.num_hidden:2 * args.num_hidden] = 1. ab_betas.set_value(forget_biais) c_gammas = theano.shared(args.initial_gamma * ones( (args.num_hidden, )), name="c_gammas") c_betas = theano.shared(args.initial_beta * ones((args.num_hidden, )), name="c_betas") if not args.baseline: parameters_list = [ h0, c0, Wa, Wx, a_gammas, b_gammas, ab_betas, c_gammas, c_betas ] else: parameters_list = [h0, c0, Wa, Wx, ab_betas, c_betas] for parameter in parameters_list: print parameter.name add_role(parameter, PARAMETER) setattr(self.parameters, parameter.name, parameter) return self.parameters
def construct_graphs(args, nclasses, length): constructor = LSTM if args.lstm else RNN if args.permuted: permutation = np.random.randint(0, length, size=(length, )) Wy = theano.shared(orthogonal((args.num_hidden, 1)), name="Wy") by = theano.shared(np.zeros((nclasses, ), dtype=theano.config.floatX), name="by") ### graph construction inputs = dict(features=T.tensor3("x"), drops_state=T.tensor3('drops_state'), drops_cell=T.tensor3('drops_cell'), targets=T.matrix("y")) x, drops_state, drops_cell, y = inputs["features"], inputs[ 'drops_state'], inputs['drops_cell'], inputs["targets"] # theano.config.compute_test_value = "warn" # batch = next(get_stream(which_set="train", # num_examples=args.num_examples, # length=args.length, # batch_size=args.batch_size, # drop_prob_cell=args.drop_prob_cell, # drop_prob_state=args.drop_prob_state, # for_evaluation=False, # hidden_dim=args.num_hidden).get_epoch_iterator()) # x.tag.test_value = batch[0] # y.tag.test_value = batch[1] # drops_state.tag.test_value = batch[2] # drops_cell.tag.test_value = batch[3] #x = x.dimshuffle(1, 0, 2) y = y.flatten(ndim=1) args.use_population_statistics = False turd = constructor(args, nclasses) (outputs, training_updates, dummy_states, popstats) = turd.construct_graph_popstats(args, x, drops_state, drops_cell, length) training_graph, training_extensions = construct_common_graph( "training", args, outputs, dummy_states, Wy, by, y) #args.use_population_statistics = True #(inf_outputs, inference_updates, dummy_states, _) = turd.construct_graph_popstats(args, x, drops_state, drops_cell, # length, popstats=popstats) #inference_graph, inference_extensions = construct_common_graph("inference", args, inf_outputs, dummy_states, Wy, by, y) add_role(Wy, PARAMETER) add_role(by, PARAMETER) args.use_population_statistics = False return (dict(training=training_graph, inference=training_graph), dict(training=training_extensions, inference=training_extensions), dict(training=training_updates, inference=training_updates))
def _allocate(self): W = shared_floatx_nans( (self.num_filters, self.input_dim, self.filter_length, 1), name='W') add_role(W, FILTER) self.params.append(W) if self.use_bias: b = shared_floatx_nans((self.num_filters, ), name='b') add_role(b, BIAS) self.params.append(b)
def cost(self, application_call, readouts, outputs): if readouts.ndim == 3: temp_shape = (readouts.shape[0] * readouts.shape[1], readouts.shape[2]) correct_mask = tensor.zeros(temp_shape) correct_mask = tensor.set_subtensor( correct_mask[tensor.arange(temp_shape[0]), outputs.flatten()], 1) correct_mask = correct_mask.reshape(readouts.shape) # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # WARNING: # this code only makes sense when the actual groundtruths # are plugged for groundtruths. # # This happens in SpeechRecognizer.get_cost_graph() # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! groundtruth = outputs.copy() groundtruth.name = self.GROUNDTRUTH reward_matrix, gain_matrix = self.reward_op(groundtruth, outputs) gain_matrix = theano.tensor.maximum(gain_matrix, self.min_reward) gain_matrix.name = self.GAIN_MATRIX reward_matrix.name = self.REWARD_MATRIX predicted_gains = readouts.reshape(temp_shape)[ tensor.arange(temp_shape[0]), outputs.flatten()] predicted_gains = predicted_gains.reshape(outputs.shape) predicted_gains = tensor.concatenate( [tensor.zeros((1, outputs.shape[1])), predicted_gains[1:]]) predicted_rewards = predicted_gains.cumsum(axis=0) predicted_rewards = readouts + predicted_rewards[:, :, None] gain_mse_loss_matrix = ((readouts - gain_matrix)**2).sum(axis=-1) reward_mse_loss_matrix = ((predicted_rewards - reward_matrix)**2).sum(axis=-1) gain_mse_loss = gain_mse_loss_matrix.sum() gain_mse_loss.name = self.GAIN_MSE_LOSS reward_mse_loss = reward_mse_loss_matrix.sum() reward_mse_loss.name = self.REWARD_MSE_LOSS application_call.add_auxiliary_variable(gain_mse_loss) if self.criterion == 'mse_gain': add_role(reward_mse_loss, OTHER_LOSS) application_call.add_auxiliary_variable(reward_mse_loss) return gain_mse_loss_matrix else: add_role(gain_mse_loss, OTHER_LOSS) application_call.add_auxiliary_variable(gain_mse_loss) return reward_mse_loss_matrix return readouts[tensor.arange(readouts.shape[0]), outputs]
def _allocate(self): super(GaussianLayerFixedSigma, self)._allocate() dim_X, dim_H = self.dim_X, self.dim_H self.W_mean = shared_floatx_zeros((dim_H, dim_X), name='W_mean') add_role(self.W_mean, WEIGHT) self.b_mean = shared_floatx_zeros((dim_X, ), name='b_mean') add_role(self.b_mean, BIAS) self.parameters = [self.W_mean, self.b_mean]
def compute_step(self, parameter, previous_step): mean_square_step_tm1 = shared_floatx(parameter.get_value() * 0., "mean_square_step_tm1") add_role(mean_square_step_tm1, ALGORITHM_BUFFER) mean_square_step_t = ( self.decay_rate * mean_square_step_tm1 + (1 - self.decay_rate) * tensor.sqr(previous_step)) add_role(mean_square_step_t, ALGORITHM_BUFFER) rms_step_t = tensor.maximum(tensor.sqrt(mean_square_step_t), self.epsilon) step = previous_step / rms_step_t updates = [(mean_square_step_tm1, mean_square_step_t)] return step, updates
def __init__(self, rng, W, b, filter_shape, image_shape, poolsize=(2, 2), name='ConvRel', **kwargs): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type W: theano.matrix :param W: the weight matrix used for convolution :type b: theano vector :param b: the bias used for convolution :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height,filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows,#cols) """ super(LeNetConvPoolLayer, self).__init__(**kwargs) assert image_shape[1] == filter_shape[1] self.input = input add_role(W, WEIGHT) add_role(b, BIAS) # store parameters of this layer self.parameters = [] self.parameters.append(W) self.parameters.append(b) self.add_auxiliary_variable(W.norm(2), name='W_norm') self.add_auxiliary_variable(b.norm(2), name='b_norm') self.allocated = True self.name = name self.filter_shape = filter_shape self.poolsize = poolsize
def compute_step(self, parameter, previous_step): name = 'adagrad_sqs' if parameter.name: name += '_' + parameter.name ssq = shared_floatx(parameter.get_value() * 0., name=name) add_role(ssq, ALGORITHM_BUFFER) ssq_t = (tensor.sqr(previous_step) + ssq) step = (self.learning_rate * previous_step / (tensor.sqrt(ssq_t) + self.epsilon)) updates = [(ssq, ssq_t)] return step, updates