예제 #1
0
 def _allocate(self):
     parameter_shape = [1 if broadcast else dim
                        for dim, broadcast in zip(self.shape, self.broadcastable)]
     self.gamma = shared_floatx_nans(parameter_shape, name='gamma')
     add_role(self.gamma, WEIGHT)
     self.parameters.append(self.gamma)
     self.add_auxiliary_variable(self.gamma.norm(2), name='gamma_norm')
예제 #2
0
 def _allocate(self):
     W = shared_floatx_nans((self.length, self.dim), name='W_lookup')
     self.parameters.append(W)
     add_role(W, WEIGHT)
     b = shared_floatx_nans((self.dim,), name='b_lookup')
     self.parameters.append(b)
     add_role(b, BIAS)
예제 #3
0
def setup_mainloop(extensions):
    """Create a MainLoop, register the given extension, supply it with a
        DataStream and a minimal model/cost to optimize.
    """
    features = [numpy.array(f, dtype=floatX)
                for f in [[1, 2], [3, 4], [5, 6]]]
    dataset = IterableDataset(dict(features=features))
    datastream = DataStream(dataset)

    W = shared_floatx([0, 0], name='W')
    add_role(W, PARAMETER)
    x = tensor.vector('features')
    cost = tensor.sum((x-W)**2)
    cost.name = "cost"

    algorithm = GradientDescent(cost=cost, parameters=[W],
                                step_rule=Scale(1e-3))

    main_loop = MainLoop(
        model=Model(cost), data_stream=datastream,
        algorithm=algorithm,
        extensions=[
            FinishAfter(after_n_epochs=1),
            ] + extensions)

    return main_loop
예제 #4
0
 def _allocate(self):
     self.parameters.append(shared_floatx_nans((self.dim, self.dim),
                                               name="W"))
     add_role(self.parameters[0], WEIGHT)
     self.parameters.append(shared_floatx_zeros((self.dim,),
                                                name="initial_state"))
     add_role(self.parameters[1], INITIAL_STATE)
예제 #5
0
    def apply(self, input_):
        aggregate_axes = [0] + [1 + i for i, b in enumerate(self.broadcastable) if b]
        # NOTE: don't put batch_stats on self because apply may be
        # called multiple times
        batch_stats = dict(
            (stat, getattr(input_, stat)(axis=aggregate_axes,
                                         keepdims=True))
            for stat in self.stats)

        for stat, role in self.roles.items():
            graph.add_transform([batch_stats[stat]],
                                graph.ConstantTransform(
                                    # adding zero to ensure it's a TensorType(float32, row)
                                    # just like the corresponding batch_stat, rather than a
                                    # CudaNdarray(float32, row).  -__-
                                    0 + T.patternbroadcast(
                                        self.population_stats[stat],
                                        [True] + self.broadcastable)),
                                reason="population_normalization")

            # make the batch statistics identifiable to get_updates() below
            add_role(batch_stats[stat], self.roles[stat])
            batch_stats[stat] = self.annotated_statistic(batch_stats[stat])

        gamma = T.patternbroadcast(self.gamma, [True] + self.broadcastable)
        beta = T.patternbroadcast(self.beta, [True] + self.broadcastable)
        return theano.tensor.nnet.bn.batch_normalization(
            inputs=input_, gamma=gamma, beta=beta,
            mean=batch_stats["mean"],
            std=T.sqrt(batch_stats["var"] + self.epsilon))
예제 #6
0
파일: __init__.py 프로젝트: treiden/blocks
    def compute_step(self, parameter, previous_step):
        mean_square_step_tm1 = shared_floatx_zeros_matching(
            parameter, "mean_square_step_tm1")
        add_role(mean_square_step_tm1, ALGORITHM_BUFFER)
        mean_square_delta_x_tm1 = shared_floatx_zeros_matching(
            parameter, "mean_square_delta_x_tm1")
        add_role(mean_square_delta_x_tm1, ALGORITHM_BUFFER)

        mean_square_step_t = (
            self.decay_rate * mean_square_step_tm1 +
            (1 - self.decay_rate) * tensor.sqr(previous_step)
        )

        rms_delta_x_tm1 = tensor.sqrt(mean_square_delta_x_tm1 + self.epsilon)
        rms_step_t = tensor.sqrt(mean_square_step_t + self.epsilon)
        delta_x_t = rms_delta_x_tm1 / rms_step_t * previous_step

        mean_square_delta_x_t = (
            self.decay_rate * mean_square_delta_x_tm1 +
            (1 - self.decay_rate) * tensor.sqr(delta_x_t)
        )

        step = delta_x_t
        updates = [(mean_square_step_tm1, mean_square_step_t),
                   (mean_square_delta_x_tm1, mean_square_delta_x_t)]
        return step, updates
예제 #7
0
 def __init__(self, threshold, axis=None):
     axis = pack(axis) if axis is not None else ()
     self.axis = set(axis)
     self.threshold = shared_floatx(threshold, "threshold")
     add_role(self.threshold, ALGORITHM_HYPERPARAMETER)
     if len(axis) != len(self.axis):
         raise ValueError("axis must be unique")
예제 #8
0
파일: masonry.py 프로젝트: yingzha/tsa-rnn
 def _allocate(self):
     parameter_shape = [1 if broadcast else dim
                        for dim, broadcast in zip(self.shape, self.broadcastable)]
     self.b = shared_floatx_nans(parameter_shape, name='b')
     add_role(self.b, BIAS)
     self.parameters.append(self.b)
     self.add_auxiliary_variable(self.b.norm(2), name='b_norm')
예제 #9
0
 def __init__(self, decay_rate=0.95, epsilon=1e-6):
     if not 0.0 <= decay_rate <= 1.0:
         raise ValueError("decay rate needs to be in [0, 1]")
     self.decay_rate = shared_floatx(decay_rate, "decay_rate")
     add_role(self.decay_rate, ALGORITHM_HYPERPARAMETER)
     self.epsilon = shared_floatx(epsilon, "epsilon")
     add_role(self.epsilon, ALGORITHM_HYPERPARAMETER)
예제 #10
0
파일: ladder.py 프로젝트: fulldecent/LRE
    def annotate_bn(self, var, id, var_type, mb_size, size, norm_ax):
        var_shape = np.array((1,) + size)
        out_dim = np.prod(var_shape) / np.prod(var_shape[list(norm_ax)])
        # Flatten the var - shared variable updating is not trivial otherwise,
        # as theano seems to believe a row vector is a matrix and will complain
        # about the updates
        orig_shape = var.shape
        var = var.flatten()
        # Here we add the name and role, the variables will later be identified
        # by these values
        var.name = id + '_%s_clean' % var_type
        add_role(var, BNPARAM)
        shared_var = self.shared(np.zeros(out_dim),
                                 name='shared_%s' % var.name, role=None)

        # Update running average estimates. When the counter is reset to 1, it
        # will clear its memory
        cntr, c_up = self.counter()
        one = np.float32(1)
        run_avg = lambda new, old: one / cntr * new + (one - one / cntr) * old
        if var_type == 'mean':
            new_value = run_avg(var, shared_var)
        elif var_type == 'var':
            mb_size = T.cast(mb_size, 'float32')
            new_value = run_avg(mb_size / (mb_size - one) * var, shared_var)
        else:
            raise NotImplemented('Unknown batch norm var %s' % var_type)
        # Add the counter update to the annotated update if it is the first
        # instance of a counter
        self.annotate_update([(shared_var, new_value)] + c_up, var)

        return var.reshape(orig_shape)
예제 #11
0
 def _allocate(self):
     c_dim = self.get_dim('c')
     self.c_0 = shared_floatx_nans((c_dim,), name='c_0')
     add_role(self.c_0, PARAMETER)
     # add the theano shared variables to our parameter lists
     self.params.extend([ self.c_0 ])
     return
예제 #12
0
파일: utils.py 프로젝트: fulldecent/LRE
def shared_param(init, name, cast_float32, role, **kwargs):
    if cast_float32:
        v = np.float32(init)
    p = theano.shared(v, name=name, **kwargs)
    if debug:
        p.tag.test_value = v
    add_role(p, role)
    return p
예제 #13
0
 def __init__(self, decay_rate=0.9, max_scaling=1e5):
     if not 0.0 <= decay_rate <= 1.0:
         raise ValueError("decay rate needs to be in [0, 1]")
     if max_scaling <= 0:
         raise ValueError("max. scaling needs to be greater than 0")
     self.decay_rate = shared_floatx(decay_rate, "decay_rate")
     add_role(self.decay_rate, ALGORITHM_HYPERPARAMETER)
     self.epsilon = 1.0 / max_scaling
예제 #14
0
 def __init__(self, learning_rate=0.002, beta1=0.1, beta2=0.001, epsilon=1e-8, decay_factor=(1 - 1e-8)):
     self.learning_rate = shared_floatx(learning_rate, "learning_rate")
     self.beta1 = shared_floatx(beta1, "beta1")
     self.beta2 = shared_floatx(beta2, "beta2")
     self.epsilon = shared_floatx(epsilon, "epsilon")
     self.decay_factor = shared_floatx(decay_factor, "decay_factor")
     for param in [self.learning_rate, self.beta1, self.beta2, self.epsilon, self.decay_factor]:
         add_role(param, ALGORITHM_HYPERPARAMETER)
예제 #15
0
 def _allocate(self):
     self.parameters.append(shared_floatx_nans((self.dim, self.dim),
                            name='state_to_state'))
     self.parameters.append(shared_floatx_nans((self.dim, 2 * self.dim),
                            name='state_to_gates'))
     for i in range(2):
         if self.parameters[i]:
             add_role(self.parameters[i], WEIGHT)
예제 #16
0
 def _allocate(self):
     W = shared_floatx_nans((self.input_dim, self.attention_dim), name='W')
     add_role(W, WEIGHT)
     self.parameters.append(W)
     self.add_auxiliary_variable(W.norm(2), name='W_norm')
     if self.use_bias:
         b = shared_floatx_nans((1, ), name='b')
         add_role(b, BIAS)
         self.parameters.append(b)
예제 #17
0
 def compute_step(self, parameter, previous_step):
     mean_square_step_tm1 = shared_floatx_zeros_matching(parameter, "mean_square_step_tm1")
     add_role(mean_square_step_tm1, ALGORITHM_BUFFER)
     mean_square_step_t = self.decay_rate * mean_square_step_tm1 + (1 - self.decay_rate) * tensor.sqr(previous_step)
     add_role(mean_square_step_t, ALGORITHM_BUFFER)
     rms_step_t = tensor.maximum(tensor.sqrt(mean_square_step_t), self.epsilon)
     step = previous_step / rms_step_t
     updates = [(mean_square_step_tm1, mean_square_step_t)]
     return step, updates
예제 #18
0
파일: masonry.py 프로젝트: yingzha/tsa-rnn
 def _allocate(self):
     parameter_shape = [
         1 if broadcast else dim
         for dim, broadcast in zip(self.shape, self.broadcastable)
     ]
     self.w = shared_floatx_nans(parameter_shape, name='w')
     add_role(self.w, WEIGHT)
     self.parameters.append(self.w)
     self.add_auxiliary_variable(self.w.norm(2), name='w_norm')
예제 #19
0
    def __init__(self, eta=0, gamma=0.55, seed=180891):

        self.eta_sqrt = shared_floatx(sqrt(eta), "eta")
        add_role(self.eta_sqrt, ALGORITHM_HYPERPARAMETER)

        self.gamma_half = shared_floatx(gamma/2, "gamma")
        add_role(self.gamma_half, ALGORITHM_HYPERPARAMETER)

        self.theano_random = rng_mrg.MRG_RandomStreams(seed=seed)
예제 #20
0
파일: graph.py 프로젝트: Fdenpc/blocks
    def add_auxiliary_variable(self, variable, roles=None, name=None):
        """Attach an auxiliary variable to the graph.

        Auxiliary variables are Theano variables that are not part of a
        brick's output, but can be useful nonetheless e.g. as a regularizer
        or to monitor during training progress.

        Parameters
        ----------
        variable : :class:`~tensor.TensorVariable`
            The variable you want to add.
        roles : list of :class:`.VariableRole` instances, optional
            The roles of this variable. The :const:`.AUXILIARY`
            role will automatically be added. Other options are
            :const:`.COST`, :const:`.WEIGHT`, etc.
        name : str, optional
            Name to give to the variable. If the variable already has a
            name it will be overwritten.

        Examples
        --------
        >>> from blocks.bricks.base import application, Brick
        >>> from blocks.roles import COST
        >>> from blocks.utils import shared_floatx_nans
        >>> class Foo(Brick):
        ...     def _allocate(self):
        ...         W = shared_floatx_nans((10, 10))
        ...         self.add_auxiliary_variable(W.mean(), name='mean_W')
        ...     @application
        ...     def apply(self, x, application_call):
        ...         application_call.add_auxiliary_variable(
        ...             x - 1, name='x_minus_1')
        ...         application_call.add_auxiliary_variable(
        ...             x.mean(), roles=[COST], name='mean_x')
        ...         return x + 1
        >>> from theano import tensor
        >>> x = tensor.vector()
        >>> y = Foo().apply(x)
        >>> from blocks.filter import VariableFilter
        >>> cg = ComputationGraph([y])
        >>> var_filter = VariableFilter(roles=[AUXILIARY])
        >>> var_filter(cg.variables)  # doctest: +SKIP
        {x_minus_1, mean_W, mean_x}
        >>> var_filter = VariableFilter(roles=[COST])
        >>> var_filter(cg.variables)  # doctest: +SKIP
        {mean_x}

        """
        add_annotation(variable, self)
        if name is not None:
            variable.name = name
            variable.tag.name = name
        add_role(variable, AUXILIARY)
        if roles is not None:
            for role in roles:
                add_role(variable, role)
        self.auxiliary_variables.append(variable)
예제 #21
0
    def add_auxiliary_variable(self, variable, roles=None, name=None):
        """Attach an auxiliary variable to the graph.

        Auxiliary variables are Theano variables that are not part of a
        brick's output, but can be useful nonetheless e.g. as a regularizer
        or to monitor during training progress.

        Parameters
        ----------
        variable : :class:`~tensor.TensorVariable`
            The variable you want to add.
        roles : list of :class:`.VariableRole` instances, optional
            The roles of this variable. The :const:`.AUXILIARY`
            role will automatically be added. Other options are
            :const:`.COST`, :const:`.WEIGHT`, etc.
        name : str, optional
            Name to give to the variable. If the variable already has a
            name it will be overwritten.

        Examples
        --------
        >>> from blocks.bricks.base import application, Brick
        >>> from blocks.roles import COST
        >>> from blocks.utils import shared_floatx_nans
        >>> class Foo(Brick):
        ...     def _allocate(self):
        ...         W = shared_floatx_nans((10, 10))
        ...         self.add_auxiliary_variable(W.mean(), name='mean_W')
        ...     @application
        ...     def apply(self, x, application_call):
        ...         application_call.add_auxiliary_variable(
        ...             x - 1, name='x_minus_1')
        ...         application_call.add_auxiliary_variable(
        ...             x.mean(), roles=[COST], name='mean_x')
        ...         return x + 1
        >>> from theano import tensor
        >>> x = tensor.vector()
        >>> y = Foo().apply(x)
        >>> from blocks.filter import VariableFilter
        >>> cg = ComputationGraph([y])
        >>> var_filter = VariableFilter(roles=[AUXILIARY])
        >>> var_filter(cg.variables)  # doctest: +SKIP
        {x_minus_1, mean_W, mean_x}
        >>> var_filter = VariableFilter(roles=[COST])
        >>> var_filter(cg.variables)  # doctest: +SKIP
        {mean_x}

        """
        add_annotation(variable, self)
        if name is not None:
            variable.name = name
            variable.tag.name = name
        add_role(variable, AUXILIARY)
        if roles is not None:
            for role in roles:
                add_role(variable, role)
        self.auxiliary_variables.append(variable)
예제 #22
0
 def _allocate(self):
     parameter_shape = [
         1 if broadcast else dim
         for dim, broadcast in zip(self.shape, self.broadcastable)
     ]
     self.beta = shared_floatx_nans(parameter_shape, name='beta')
     add_role(self.beta, BIAS)
     self.parameters.append(self.beta)
     self.add_auxiliary_variable(self.beta.norm(2), name='beta_norm')
예제 #23
0
 def _allocate(self):
     self.parameters.append(
         shared_floatx_nans((self.dim, self.dim), name='state_to_state'))
     self.parameters.append(
         shared_floatx_nans((self.dim, 2 * self.dim),
                            name='state_to_gates'))
     for i in range(2):
         if self.parameters[i]:
             add_role(self.parameters[i], WEIGHT)
예제 #24
0
def _create_intpic_histogram_for(param, pic_size, label_count):
    # The pic histogram is a 2d-array of pic_size.
    # For a 3d parameter, that ends up being a 5d tensor.
    # For a 1d parameter, that's a 3d tensor.
    shape = param.get_value().shape + (label_count,) + pic_size
    buf = shared_floatx_zeros(shape)
    buf.tag.for_parameter = param
    add_role(buf, INTPIC_STATISTICS)
    return buf
예제 #25
0
def _create_intpic_histogram_for(param, pic_size, label_count):
    # The pic histogram is a 2d-array of pic_size.
    # For a 3d parameter, that ends up being a 5d tensor.
    # For a 1d parameter, that's a 3d tensor.
    shape = param.get_value().shape + (label_count, ) + pic_size
    buf = shared_floatx_zeros(shape)
    buf.tag.for_parameter = param
    add_role(buf, INTPIC_STATISTICS)
    return buf
예제 #26
0
 def _allocate(self):
     W = shared_floatx_nans((self.n_out, self.dwin * self.vector_size),
                            name='W')
     b = shared_floatx_nans((self.n_out, ), name='b')
     add_role(b, BIAS)
     add_role(W, WEIGHT)
     self.parameters.append(W)
     self.parameters.append(b)
     self.mlp.allocate()
예제 #27
0
    def __init__(self,
                 input_,
                 n_in,
                 n_out,
                 name='logisticRegression_rel',
                 W=None,
                 b=None,
                 **kwargs):
        """ Initialize the parameters of the logistic regression

        :type input: theano.tensor.TensorType
        :param input: symbolic variable that describes the input of the
                      architecture (one minibatch)

        :type n_in: int
        :param n_in: number of input units, the dimension of the space in
                     which the datapoints lie

        :type n_out: int
        :param n_out: number of output units, the dimension of the space in
                      which the labels lie

        """
        print '******************no MIML'
        super(LogisticRegression, self).__init__(**kwargs)
        if W == None:
            # initialize with 0 the weights W as a matrix of shape (n_in, n_out)
            W = theano.shared(value=numpy.zeros((n_in, n_out),
                                                dtype=theano.config.floatX),
                              name='W')
    # else:
    #   self.W = W

        if b == None:
            # initialize the baises b as a vector of n_out 0s
            b = theano.shared(value=numpy.zeros((n_out, ),
                                                dtype=theano.config.floatX),
                              name='b')

    # else:
    #  self.b = b
        add_role(W, WEIGHT)
        add_role(b, BIAS)
        self.parameters = []
        self.parameters.append(W)
        self.parameters.append(b)
        self.add_auxiliary_variable(W.norm(2), name='W_norm')
        self.add_auxiliary_variable(b.norm(2), name='b_norm')
        self.allocated = True
        self.name = name
        self.p_y_given_x = T.nnet.softmax(
            T.dot(input_, self.parameters[0]) + self.parameters[1])

        # compute prediction as class whose probability is maximal in
        # symbolic form
        self.y_pred = T.argmax(self.p_y_given_x, axis=1)
예제 #28
0
파일: noisy.py 프로젝트: chargen/net-intent
def copy_and_tag_noise(variable, brick, role, name):
    """Helper method to copy a variable and annotate it."""
    copy = variable.copy()
    # Theano name
    copy.name = "{}_apply_{}".format(brick.name, name)
    add_annotation(copy, brick)
    # Blocks name
    copy.tag.name = name
    add_role(copy, role)
    return copy
예제 #29
0
 def _allocate(self):
     if self.noise_batch_size is not None:
         if self.tied_noise:
             N = shared_floatx_zeros(
                     (self.noise_batch_size, self.input_dim[0]), name='N')
         else:
             N = shared_floatx_zeros(
                     (self.noise_batch_size,) + self.input_dim, name='N')
         add_role(N, NOISE)
         self.parameters.append(N)
예제 #30
0
 def _allocate(self):
     W = shared_floatx_nans((self.input_dim, self.output_dim), name='W')
     add_role(W, WEIGHT)
     self.parameters.append(W)
     self.add_auxiliary_variable(W.norm(2), name='W_norm')
     if getattr(self, 'use_bias', True):
         b = shared_floatx_nans((self.output_dim, ), name='b')
         add_role(b, BIAS)
         self.parameters.append(b)
         self.add_auxiliary_variable(b.norm(2), name='b_norm')
예제 #31
0
 def allocate_parameters(self):
     parameters = Parameters()
     for parameter in [
         theano.shared(self.initial_gamma * ones(self.shape), name="gammas"),
         theano.shared(self.initial_beta  * ones(self.shape), name="betas")]:
         add_role(parameter, PARAMETER)
         setattr(parameters, parameter.name, parameter)
         if self.name:
             parameter.name = "%s.%s" % (self.name, parameter.name)
     return parameters
예제 #32
0
파일: noisy.py 프로젝트: chargen/net-intent
 def _allocate(self):
     if self.noise_batch_size is not None:
         if self.tied_noise:
             N = shared_floatx_zeros(
                 (self.noise_batch_size, self.input_dim[0]), name='N')
         else:
             N = shared_floatx_zeros(
                 (self.noise_batch_size, ) + self.input_dim, name='N')
         add_role(N, NOISE)
         self.parameters.append(N)
예제 #33
0
파일: test_graph.py 프로젝트: zrustc/blocks
def test_replace_variable_is_auxiliary():
    # Test if warning appears when variable is an AUXILIARY variable
    with warnings.catch_warnings(record=True) as w:
        x = tensor.scalar()
        y = x + 1
        add_role(y, AUXILIARY)
        cg = ComputationGraph([y])
        cg.replace([(y, 2 * y)])
        assert len(w) == 1
        assert "auxiliary" in str(w[-1].message)
예제 #34
0
 def _allocate(self):
     W = shared_floatx_zeros((self.input_dim, self.output_dim), name='W')
     add_role(W, WEIGHTS)
     self.params.append(W)
     self.add_auxiliary_variable(W.norm(2), name='W_norm')
     if self.use_bias:
         b = shared_floatx_zeros((self.output_dim, ), name='b')
         add_role(b, BIASES)
         self.params.append(b)
         self.add_auxiliary_variable(b.norm(2), name='b_norm')
예제 #35
0
파일: bricks.py 프로젝트: EloiZ/DeepCube
 def _allocate(self):
     W = shared_floatx_nans((self.input_dim, self.output_dim), name='W')
     add_role(W, WEIGHT)
     self.parameters.append(W)
     self.add_auxiliary_variable(W.norm(2), name='W_norm')
     if self.use_bias:
         b = shared_floatx_nans((self.output_dim,), name='b')
         add_role(b, BIAS)
         self.parameters.append(b)
         self.add_auxiliary_variable(b.norm(2), name='b_norm')
예제 #36
0
def copy_and_tag_noise(variable, brick, role, name):
    """Helper method to copy a variable and annotate it."""
    copy = variable.copy()
    # Theano name
    copy.name = "{}_apply_{}".format(brick.name, name)
    add_annotation(copy, brick)
    # Blocks name
    copy.tag.name = name
    add_role(copy, role)
    return copy
예제 #37
0
    def _initialize(self):
        self.layers_features, self.data_input = create_theano_expressions()

        cg = ComputationGraph(self.layers_features[self.layer_name])
        i = 0
        for v in cg.shared_variables:
            v.name = str(i)
            self.parameters.append(v)
            add_role(v, WEIGHT)
            i += 1
예제 #38
0
파일: Conv1D.py 프로젝트: caomw/MLFun
    def _allocate(self):
        W = shared_floatx_nans((self.num_filters, self.input_dim,
            self.filter_length, 1), name='W')
        add_role(W, FILTER)
        self.params.append(W)

        if self.use_bias:
            b = shared_floatx_nans((self.num_filters, ), name='b')
            add_role(b, BIAS)
            self.params.append(b)
예제 #39
0
def test_replace_variable_is_auxiliary():
    # Test if warning appears when variable is an AUXILIARY variable
    with warnings.catch_warnings(record=True) as w:
        x = tensor.scalar()
        y = x + 1
        add_role(y, AUXILIARY)
        cg = ComputationGraph([y])
        cg.replace([(y, 2 * y)])
        assert len(w) == 1
        assert "auxiliary" in str(w[-1].message)
 def allocate_parameters(self):
     parameters = Parameters()
     for parameter in [
         theano.shared(self.initial_gamma * ones(self.shape), name="gammas"),
         theano.shared(self.initial_beta  * ones(self.shape), name="betas")]:
         add_role(parameter, PARAMETER)
         setattr(parameters, parameter.name, parameter)
         if self.name:
             parameter.name = "%s.%s" % (self.name, parameter.name)
     return parameters
예제 #41
0
 def _allocate(self):
     self.parameters.append(shared_floatx_nans((self.dim, self.dim),
                            name='state_to_state'))
     self.parameters.append(shared_floatx_nans((self.dim, 2 * self.dim),
                            name='state_to_gates'))
     self.parameters.append(shared_floatx_zeros((self.dim,),
                            name="initial_state"))
     for i in range(2):
         if self.parameters[i]:
             add_role(self.parameters[i], WEIGHT)
     add_role(self.parameters[2], INITIAL_STATE)
예제 #42
0
 def __init__(self, learning_rate=0.002,
              mu1=0.99, nu2=0.999, epsilon=1e-8,
              decay_prod=(1.)):
     self.learning_rate = shared_floatx(learning_rate, "learning_rate")
     self.mu1 = shared_floatx(mu1, "mu1")
     self.nu2 = shared_floatx(nu2, "nu2")
     self.epsilon = shared_floatx(epsilon, "epsilon")
     self.decay_prod = shared_floatx(decay_prod, "decay_prod")
     for param in [self.learning_rate, self.mu1, self.nu2, self.epsilon,
                   self.decay_prod]:
         add_role(param, ALGORITHM_HYPERPARAMETER)
예제 #43
0
    def cost(self, application_call, outputs, mask=None, **kwargs):
        # Compute the sum of costs
        costs = self.cost_matrix(outputs, mask=mask, **kwargs)
        cost = tensor.mean(costs.sum(axis=0))
        add_role(cost, COST)

        # Add auxiliary variable for per sequence element cost
        application_call.add_auxiliary_variable(
            (costs.sum() / mask.sum()) if mask is not None else costs.mean(),
            name='per_sequence_element')
        return cost
예제 #44
0
파일: base.py 프로젝트: vikkamath/blocks
 def copy_and_tag(variable, role, name):
     """Helper method to copy a variable and annotate it."""
     copy = variable.copy()
     # Theano name
     copy.name = _variable_name(brick.name, self.name, name)
     add_annotation(copy, brick)
     add_annotation(copy, call)
     # Blocks name
     copy.tag.name = name
     add_role(copy, role)
     return copy
예제 #45
0
파일: conv.py 프로젝트: jfsantos/blocks
 def _allocate(self):
     W = shared_floatx_nans(
         (self.num_filters, self.num_channels) + self.filter_size, name='W')
     add_role(W, FILTER)
     self.params.append(W)
     self.add_auxiliary_variable(W.norm(2), name='W_norm')
     if self.use_bias:
         b = shared_floatx_nans(self.get_dim('output'), name='b')
         add_role(b, BIAS)
         self.params.append(b)
         self.add_auxiliary_variable(b.norm(2), name='b_norm')
예제 #46
0
파일: base.py 프로젝트: trungnt13/blocks
 def apply(self, *args, **kwargs):
     out = self._apply(*args, **kwargs)
     # ====== add roles ====== #
     tmp = out
     if not isinstance(tmp, (tuple, list)):
         tmp = [out]
     for o in tmp:
         add_role(o, OUTPUT)
         add_annotation(o, self)
     # return outputs
     return out
예제 #47
0
파일: conv.py 프로젝트: kelvinxu/blocks
 def _allocate(self):
     W = shared_floatx_nans((self.num_filters, self.num_channels) +
                            self.filter_size, name='W')
     add_role(W, FILTERS)
     self.params.append(W)
     self.add_auxiliary_variable(W.norm(2), name='W_norm')
     if self.use_bias:
         b = shared_floatx_nans(self.get_dim('output'), name='b')
         add_role(b, BIASES)
         self.params.append(b)
         self.add_auxiliary_variable(b.norm(2), name='b_norm')
예제 #48
0
 def __init__(self, learning_rate=0.002,
              beta1=0.1, beta2=0.001, epsilon=1e-8,
              decay_factor=(1 - 1e-8)):
     self.learning_rate = shared_floatx(learning_rate, "learning_rate")
     self.beta1 = shared_floatx(beta1, "beta1")
     self.beta2 = shared_floatx(beta2, "beta2")
     self.epsilon = shared_floatx(epsilon, "epsilon")
     self.decay_factor = shared_floatx(decay_factor, "decay_factor")
     for param in [self.learning_rate, self.beta1, self.beta2, self.epsilon,
                   self.decay_factor]:
         add_role(param, ALGORITHM_HYPERPARAMETER)
    def _initialize(self):
        self.beta = shared_floatx_zeros((self.dim, ), name='beta')
        self.gamma = shared_floatx_zeros((self.dim, ), name='gamma')

        add_role(self.beta, PARAMETER)
        add_role(self.gamma, PARAMETER)

        self.parameters = [self.gamma, self.beta]

        self.beta_init.initialize(self.beta, self.rng)
        self.gamma_init.initialize(self.gamma, self.rng)
예제 #50
0
 def _allocate(self):
     self.parameters.append(shared_floatx_nans((self.dim, self.dim),
                            name='state_to_state'))
     self.parameters.append(shared_floatx_nans((self.dim, 2 * self.dim),
                            name='state_to_gates'))
     self.parameters.append(shared_floatx_zeros((self.dim,),
                            name="initial_state"))
     for i in range(2):
         if self.parameters[i]:
             add_role(self.parameters[i], WEIGHT)
     add_role(self.parameters[2], INITIAL_STATE)
    def allocate_parameters(self, args):
        if hasattr(self, "parameters"):
            return self.parameters

        self.parameters = Empty()

        h0 = theano.shared(zeros((args.num_hidden, )), name="h0")
        c0 = theano.shared(zeros((args.num_hidden, )), name="c0")
        if args.init == "id":
            Wa = theano.shared(np.concatenate([
                np.eye(args.num_hidden),
                orthogonal((args.num_hidden, 3 * args.num_hidden)),
            ],
                                              axis=1).astype(
                                                  theano.config.floatX),
                               name="Wa")
        else:
            Wa = theano.shared(orthogonal(
                (args.num_hidden, 4 * args.num_hidden)),
                               name="Wa")
        Wx = theano.shared(orthogonal((1, 4 * args.num_hidden)), name="Wx")
        a_gammas = theano.shared(args.initial_gamma * ones(
            (4 * args.num_hidden, )),
                                 name="a_gammas")
        b_gammas = theano.shared(args.initial_gamma * ones(
            (4 * args.num_hidden, )),
                                 name="b_gammas")
        ab_betas = theano.shared(args.initial_beta * ones(
            (4 * args.num_hidden, )),
                                 name="ab_betas")

        # forget gate bias initialization
        forget_biais = ab_betas.get_value()
        forget_biais[args.num_hidden:2 * args.num_hidden] = 1.
        ab_betas.set_value(forget_biais)

        c_gammas = theano.shared(args.initial_gamma * ones(
            (args.num_hidden, )),
                                 name="c_gammas")
        c_betas = theano.shared(args.initial_beta * ones((args.num_hidden, )),
                                name="c_betas")

        if not args.baseline:
            parameters_list = [
                h0, c0, Wa, Wx, a_gammas, b_gammas, ab_betas, c_gammas, c_betas
            ]
        else:
            parameters_list = [h0, c0, Wa, Wx, ab_betas, c_betas]
        for parameter in parameters_list:
            print parameter.name
            add_role(parameter, PARAMETER)
            setattr(self.parameters, parameter.name, parameter)

        return self.parameters
예제 #52
0
def construct_graphs(args, nclasses, length):
    constructor = LSTM if args.lstm else RNN

    if args.permuted:
        permutation = np.random.randint(0, length, size=(length, ))

    Wy = theano.shared(orthogonal((args.num_hidden, 1)), name="Wy")
    by = theano.shared(np.zeros((nclasses, ), dtype=theano.config.floatX),
                       name="by")

    ### graph construction
    inputs = dict(features=T.tensor3("x"),
                  drops_state=T.tensor3('drops_state'),
                  drops_cell=T.tensor3('drops_cell'),
                  targets=T.matrix("y"))
    x, drops_state, drops_cell, y = inputs["features"], inputs[
        'drops_state'], inputs['drops_cell'], inputs["targets"]

    # theano.config.compute_test_value = "warn"
    # batch = next(get_stream(which_set="train",
    #                         num_examples=args.num_examples,
    #                         length=args.length,
    #                         batch_size=args.batch_size,
    #                         drop_prob_cell=args.drop_prob_cell,
    #                         drop_prob_state=args.drop_prob_state,
    #                         for_evaluation=False,
    #                         hidden_dim=args.num_hidden).get_epoch_iterator())
    # x.tag.test_value = batch[0]
    # y.tag.test_value = batch[1]
    # drops_state.tag.test_value = batch[2]
    # drops_cell.tag.test_value = batch[3]

    #x = x.dimshuffle(1, 0, 2)
    y = y.flatten(ndim=1)

    args.use_population_statistics = False
    turd = constructor(args, nclasses)
    (outputs, training_updates, dummy_states,
     popstats) = turd.construct_graph_popstats(args, x, drops_state,
                                               drops_cell, length)
    training_graph, training_extensions = construct_common_graph(
        "training", args, outputs, dummy_states, Wy, by, y)

    #args.use_population_statistics = True
    #(inf_outputs, inference_updates, dummy_states, _) = turd.construct_graph_popstats(args, x, drops_state, drops_cell,
    #                                                                                  length, popstats=popstats)
    #inference_graph, inference_extensions = construct_common_graph("inference", args, inf_outputs, dummy_states, Wy, by, y)

    add_role(Wy, PARAMETER)
    add_role(by, PARAMETER)
    args.use_population_statistics = False
    return (dict(training=training_graph, inference=training_graph),
            dict(training=training_extensions, inference=training_extensions),
            dict(training=training_updates, inference=training_updates))
예제 #53
0
 def copy_and_tag(variable, role, name):
     """Helper method to copy a variable and annotate it."""
     copy = variable.copy()
     # Theano name
     copy.name = _variable_name(brick.name, self.name, name)
     add_annotation(copy, brick)
     add_annotation(copy, call)
     # Blocks name
     copy.tag.name = name
     add_role(copy, role)
     return copy
예제 #54
0
    def _allocate(self):
        W = shared_floatx_nans(
            (self.num_filters, self.input_dim, self.filter_length, 1),
            name='W')
        add_role(W, FILTER)
        self.params.append(W)

        if self.use_bias:
            b = shared_floatx_nans((self.num_filters, ), name='b')
            add_role(b, BIAS)
            self.params.append(b)
예제 #55
0
    def cost(self, application_call, readouts, outputs):
        if readouts.ndim == 3:
            temp_shape = (readouts.shape[0] * readouts.shape[1],
                          readouts.shape[2])
            correct_mask = tensor.zeros(temp_shape)
            correct_mask = tensor.set_subtensor(
                correct_mask[tensor.arange(temp_shape[0]),
                             outputs.flatten()], 1)
            correct_mask = correct_mask.reshape(readouts.shape)

            # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            # WARNING:
            # this code only makes sense when the actual groundtruths
            # are plugged for groundtruths.
            #
            # This happens in SpeechRecognizer.get_cost_graph()
            # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
            groundtruth = outputs.copy()
            groundtruth.name = self.GROUNDTRUTH

            reward_matrix, gain_matrix = self.reward_op(groundtruth, outputs)
            gain_matrix = theano.tensor.maximum(gain_matrix, self.min_reward)
            gain_matrix.name = self.GAIN_MATRIX
            reward_matrix.name = self.REWARD_MATRIX

            predicted_gains = readouts.reshape(temp_shape)[
                tensor.arange(temp_shape[0]),
                outputs.flatten()]
            predicted_gains = predicted_gains.reshape(outputs.shape)
            predicted_gains = tensor.concatenate(
                [tensor.zeros((1, outputs.shape[1])), predicted_gains[1:]])
            predicted_rewards = predicted_gains.cumsum(axis=0)
            predicted_rewards = readouts + predicted_rewards[:, :, None]

            gain_mse_loss_matrix = ((readouts - gain_matrix)**2).sum(axis=-1)
            reward_mse_loss_matrix = ((predicted_rewards -
                                       reward_matrix)**2).sum(axis=-1)

            gain_mse_loss = gain_mse_loss_matrix.sum()
            gain_mse_loss.name = self.GAIN_MSE_LOSS
            reward_mse_loss = reward_mse_loss_matrix.sum()
            reward_mse_loss.name = self.REWARD_MSE_LOSS
            application_call.add_auxiliary_variable(gain_mse_loss)

            if self.criterion == 'mse_gain':
                add_role(reward_mse_loss, OTHER_LOSS)
                application_call.add_auxiliary_variable(reward_mse_loss)
                return gain_mse_loss_matrix
            else:
                add_role(gain_mse_loss, OTHER_LOSS)
                application_call.add_auxiliary_variable(gain_mse_loss)
                return reward_mse_loss_matrix
        return readouts[tensor.arange(readouts.shape[0]), outputs]
예제 #56
0
    def _allocate(self):
        super(GaussianLayerFixedSigma, self)._allocate()

        dim_X, dim_H = self.dim_X, self.dim_H

        self.W_mean = shared_floatx_zeros((dim_H, dim_X), name='W_mean')
        add_role(self.W_mean, WEIGHT)

        self.b_mean = shared_floatx_zeros((dim_X, ), name='b_mean')
        add_role(self.b_mean, BIAS)

        self.parameters = [self.W_mean, self.b_mean]
예제 #57
0
 def compute_step(self, parameter, previous_step):
     mean_square_step_tm1 = shared_floatx(parameter.get_value() * 0.,
                                          "mean_square_step_tm1")
     add_role(mean_square_step_tm1, ALGORITHM_BUFFER)
     mean_square_step_t = (
         self.decay_rate * mean_square_step_tm1 +
         (1 - self.decay_rate) * tensor.sqr(previous_step))
     add_role(mean_square_step_t, ALGORITHM_BUFFER)
     rms_step_t = tensor.maximum(tensor.sqrt(mean_square_step_t),
                                 self.epsilon)
     step = previous_step / rms_step_t
     updates = [(mean_square_step_tm1, mean_square_step_t)]
     return step, updates
예제 #58
0
    def __init__(self,
                 rng,
                 W,
                 b,
                 filter_shape,
                 image_shape,
                 poolsize=(2, 2),
                 name='ConvRel',
                 **kwargs):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type W: theano.matrix
        :param W: the weight matrix used for convolution

        :type b: theano vector
        :param b: the bias used for convolution

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height,filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows,#cols)
        """
        super(LeNetConvPoolLayer, self).__init__(**kwargs)
        assert image_shape[1] == filter_shape[1]
        self.input = input

        add_role(W, WEIGHT)
        add_role(b, BIAS)
        # store parameters of this layer
        self.parameters = []
        self.parameters.append(W)
        self.parameters.append(b)
        self.add_auxiliary_variable(W.norm(2), name='W_norm')
        self.add_auxiliary_variable(b.norm(2), name='b_norm')
        self.allocated = True
        self.name = name
        self.filter_shape = filter_shape
        self.poolsize = poolsize
예제 #59
0
    def compute_step(self, parameter, previous_step):
        name = 'adagrad_sqs'
        if parameter.name:
            name += '_' + parameter.name
        ssq = shared_floatx(parameter.get_value() * 0., name=name)
        add_role(ssq, ALGORITHM_BUFFER)

        ssq_t = (tensor.sqr(previous_step) + ssq)
        step = (self.learning_rate * previous_step /
                (tensor.sqrt(ssq_t) + self.epsilon))

        updates = [(ssq, ssq_t)]

        return step, updates