Python Bias.initialize Examples

Programming Language: Python

Namespace/Package Name: blocks.bricks

Class/Type: Bias

Method/Function: initialize

Examples at hotexamples.com: 8

Python Bias.initialize - 8 examples found. These are the top rated real world Python examples of blocks.bricks.Bias.initialize extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Bias(23)

apply(6)

initialize(6)

biases_init(3)

Example #1

Show file

File: model.py Project: v-mipeng/Hashtag

class AttentionEUTHM2(AttentionEUTHM):
    def __init__(self, config, dataset, *args, **kwargs):
        '''
        Define User-text-hashtag model with negtive sampling
        :param config:
        :param dataset: User-text-hashtag dataset
        '''
        super(AttentionEUTHM2, self).__init__(config, dataset)

    def _get_doc_embed(self, *args, **kwargs):
        text_vec = self._get_text_vec()
        user_vec = self.user_embed.apply(self.user)
        text_vec = tensor.concatenate([
            text_vec, user_vec[None, :, :][tensor.zeros(
                shape=(text_vec.shape[0], ), dtype='int32')]
        ],
                                      axis=2)
        text_vec = self.word_shift.apply(text_vec) + \
                        self.word_shift_bias.parameters[0][0]
        return self._encode_text_vec(text_vec)

    def _build_bricks(self, *args, **kwargs):
        super(AttentionEUTHM2, self)._build_bricks()
        self.word_shift = MLP(
            activations=[Tanh('word_shift_tanh')],
            dims=[
                self.config.user_embed_dim + self.config.word_embed_dim,
                self.config.word_embed_dim
            ],
            name='word_shift_mlp')
        self.word_shift.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim +
                               self.config.user_embed_dim))
        self.word_shift.biases_init = Constant(0)
        self.word_shift.initialize()
        self.word_shift_bias = Bias(dim=1, name='word_shift_bias')
        self.word_shift_bias.biases_init = Constant(0)
        self.word_shift_bias.initialize()

Example #2

Show file

File: mlpfsel3.py Project: Alexis211/transpose_features

    def __init__(self, ref_data, output_dim):
        input_dim = ref_data.shape[1]

        ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name="ref_data")

        # Construct the model
        j = tensor.lvector("j")
        r = ref_data_sh[j, :]
        x = tensor.fmatrix("x")
        y = tensor.ivector("y")

        # input_dim must be nr
        mlp0 = MLP(activations=activation_functions_0, dims=[input_dim] + hidden_dims_0, name="e0")
        mlp0vs = MLP(activations=[None], dims=[hidden_dims_0[-1], input_dim], name="de0")
        mlp1 = MLP(
            activations=activation_functions_1, dims=[hidden_dims_0[-1]] + hidden_dims_1 + [n_inter], name="inter_gen"
        )
        mlp2 = MLP(
            activations=activation_functions_2 + [None], dims=[n_inter] + hidden_dims_2 + [output_dim], name="end_mlp"
        )

        encod = mlp0.apply(r)
        rprime = mlp0vs.apply(encod)
        inter_weights = mlp1.apply(encod)

        ibias = Bias(n_inter)
        ibias.biases_init = Constant(0)
        ibias.initialize()
        inter = inter_act_fun.apply(ibias.apply(tensor.dot(x, inter_weights)))

        final = mlp2.apply(inter)

        cost = Softmax().categorical_cross_entropy(y, final)
        confidence = Softmax().apply(final)

        pred = final.argmax(axis=1)
        error_rate = tensor.neq(y, pred).mean()

        # Initialize parameters
        for brick in [mlp0, mlp0vs, mlp1, mlp2]:
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0.001)
            brick.initialize()

        # apply regularization
        cg = ComputationGraph([cost, error_rate])

        if r_dropout != 0:
            # - dropout on input vector r : r_dropout
            cg = apply_dropout(cg, [r], r_dropout)

        if s_dropout != 0:
            # - dropout on intermediate layers of first mlp : s_dropout
            s_dropout_vars = list(
                set(VariableFilter(bricks=[Tanh], name="output")(ComputationGraph([inter_weights])))
                - set([inter_weights])
            )
            cg = apply_dropout(cg, s_dropout_vars, s_dropout)

        if i_dropout != 0:
            # - dropout on input to second mlp : i_dropout
            cg = apply_dropout(cg, [inter], i_dropout)

        if a_dropout != 0:
            # - dropout on hidden layers of second mlp : a_dropout
            a_dropout_vars = list(
                set(VariableFilter(bricks=[Tanh], name="output")(ComputationGraph([final])))
                - set([inter_weights])
                - set(s_dropout_vars)
            )
            cg = apply_dropout(cg, a_dropout_vars, a_dropout)

        if w_noise_std != 0:
            # - apply noise on weight variables
            weight_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, weight_vars, w_noise_std)

        [cost_reg, error_rate_reg] = cg.outputs

        # add reconstruction penalty for AE part
        penalty_val = tensor.sqrt(((r - rprime) ** 2).sum(axis=1)).mean()
        cost_reg = cost_reg + reconstruction_penalty * penalty_val

        self.cost = cost
        self.cost_reg = cost_reg
        self.error_rate = error_rate
        self.error_rate_reg = error_rate_reg
        self.pred = pred
        self.confidence = confidence

Example #3

Show file

File: mlpfsel2ae.py Project: Alexis211/transpose_features

    def __init__(self, ref_data, output_dim):
        input_dim = ref_data.shape[1]
        ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name='ref_data')

        rng = RandomStreams()

        ae_bricks = []
        ae_input = ref_data_sh
        ae_costs = []
        for i, (idim, odim) in enumerate(zip([input_dim] + ae_dims[:-1], ae_dims)):
            ae_mlp = MLP(activations=[ae_activations[i]],
                         dims=[idim, odim],
                         name='enc%i'%i)
            enc = ae_mlp.apply(ae_input)
            enc_n = ae_mlp.apply(ae_input + rng.normal(size=ae_input.shape, std=ae_f_noise_std))
            ae_mlp_dec = MLP(activations=[ae_activations[i]],
                             dims=[odim, idim],
                             name='dec%i'%i)
            dec = ae_mlp_dec.apply(enc_n)

            cost = tensor.sqrt(((ae_input - dec) ** 2).sum(axis=1)).mean() + \
                        ae_l1_pen * abs(enc).sum(axis=1).mean()
            ae_costs.append(cost)

            ae_input = enc
            ae_bricks = ae_bricks + [ae_mlp, ae_mlp_dec]

        self.ae_costs = ae_costs

        ref_data_enc = ae_input

        # Construct the model
        j = tensor.lvector('j')
        r = ref_data_enc[j, :]
        x = tensor.fmatrix('x')
        y = tensor.ivector('y')

        # input_dim must be nr
        mlp = MLP(activations=activation_functions,
                  dims=[ae_dims[-1]] + hidden_dims + [n_inter], name='inter_gen')
        mlp2 = MLP(activations=activation_functions_2 + [None],
                   dims=[n_inter] + hidden_dims_2 + [output_dim],
                   name='end_mlp')

        inter_weights = mlp.apply(r)

        if inter_bias == None:
            ibias = Bias(n_inter)
            ibias.biases_init = Constant(0)
            ibias.initialize()
            inter = ibias.apply(tensor.dot(x, inter_weights))
        else:
            inter = tensor.dot(x, inter_weights) - inter_bias
        inter = inter_act_fun.apply(inter)

        final = mlp2.apply(inter)

        cost = Softmax().categorical_cross_entropy(y, final)
        confidence = Softmax().apply(final)

        pred = final.argmax(axis=1)
        # error_rate = tensor.neq(y, pred).mean()
        ber = balanced_error_rate.ber(y, pred)

        # Initialize parameters
        for brick in ae_bricks + [mlp, mlp2]:
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0.001)
            brick.initialize()

        # apply regularization
        cg = ComputationGraph([cost, ber])

        if r_dropout != 0:
            # - dropout on input vector r : r_dropout
            cg = apply_dropout(cg, [r], r_dropout)

        if x_dropout != 0:
            cg = apply_dropout(cg, [x], x_dropout)

        if s_dropout != 0:
            # - dropout on intermediate layers of first mlp : s_dropout
            s_dropout_vars = list(set(VariableFilter(bricks=[Tanh], name='output')
                                                     (ComputationGraph([inter_weights])))
                                 - set([inter_weights]))
            cg = apply_dropout(cg, s_dropout_vars, s_dropout)

        if i_dropout != 0:
            # - dropout on input to second mlp : i_dropout
            cg = apply_dropout(cg, [inter], i_dropout)

        if a_dropout != 0:
            # - dropout on hidden layers of second mlp : a_dropout
            a_dropout_vars = list(set(VariableFilter(bricks=[Tanh], name='output')
                                                     (ComputationGraph([final])))
                                 - set([inter_weights]) - set(s_dropout_vars))
            cg = apply_dropout(cg, a_dropout_vars, a_dropout)

        if r_noise_std != 0:
            cg = apply_noise(cg, [r], r_noise_std)

        if w_noise_std != 0:
            # - apply noise on weight variables
            weight_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, weight_vars, w_noise_std)

        [cost_reg, ber_reg] = cg.outputs
        
        if s_l1pen != 0:
            s_weights = VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg)
            cost_reg = cost_reg + s_l1pen * sum(abs(w).sum() for w in s_weights)
        if i_l1pen != 0:
            cost_reg = cost_reg + i_l1pen * abs(inter).sum()
        if a_l1pen != 0:
            a_weights = VariableFilter(bricks=mlp2.linear_transformations, roles=[WEIGHT])(cg)
            cost_reg = cost_reg + a_l1pen * sum(abs(w).sum() for w in a_weights)


        self.cost = cost
        self.cost_reg = cost_reg
        self.ber = ber
        self.ber_reg = ber_reg
        self.pred = pred
        self.confidence = confidence

Example #4

Show file

File: mlpfsel3.py Project: Alexis211/transpose_features

    def __init__(self, ref_data, output_dim):
        input_dim = ref_data.shape[1]

        ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32),
                                    name='ref_data')

        # Construct the model
        j = tensor.lvector('j')
        r = ref_data_sh[j, :]
        x = tensor.fmatrix('x')
        y = tensor.ivector('y')

        # input_dim must be nr
        mlp0 = MLP(activations=activation_functions_0,
                   dims=[input_dim] + hidden_dims_0,
                   name='e0')
        mlp0vs = MLP(activations=[None],
                     dims=[hidden_dims_0[-1], input_dim],
                     name='de0')
        mlp1 = MLP(activations=activation_functions_1,
                   dims=[hidden_dims_0[-1]] + hidden_dims_1 + [n_inter],
                   name='inter_gen')
        mlp2 = MLP(activations=activation_functions_2 + [None],
                   dims=[n_inter] + hidden_dims_2 + [output_dim],
                   name='end_mlp')

        encod = mlp0.apply(r)
        rprime = mlp0vs.apply(encod)
        inter_weights = mlp1.apply(encod)

        ibias = Bias(n_inter)
        ibias.biases_init = Constant(0)
        ibias.initialize()
        inter = inter_act_fun.apply(ibias.apply(tensor.dot(x, inter_weights)))

        final = mlp2.apply(inter)

        cost = Softmax().categorical_cross_entropy(y, final)
        confidence = Softmax().apply(final)

        pred = final.argmax(axis=1)
        error_rate = tensor.neq(y, pred).mean()

        # Initialize parameters
        for brick in [mlp0, mlp0vs, mlp1, mlp2]:
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0.001)
            brick.initialize()

        # apply regularization
        cg = ComputationGraph([cost, error_rate])

        if r_dropout != 0:
            # - dropout on input vector r : r_dropout
            cg = apply_dropout(cg, [r], r_dropout)

        if s_dropout != 0:
            # - dropout on intermediate layers of first mlp : s_dropout
            s_dropout_vars = list(
                set(
                    VariableFilter(bricks=[Tanh], name='output')
                    (ComputationGraph([inter_weights]))) -
                set([inter_weights]))
            cg = apply_dropout(cg, s_dropout_vars, s_dropout)

        if i_dropout != 0:
            # - dropout on input to second mlp : i_dropout
            cg = apply_dropout(cg, [inter], i_dropout)

        if a_dropout != 0:
            # - dropout on hidden layers of second mlp : a_dropout
            a_dropout_vars = list(
                set(
                    VariableFilter(bricks=[Tanh], name='output')
                    (ComputationGraph([final]))) - set([inter_weights]) -
                set(s_dropout_vars))
            cg = apply_dropout(cg, a_dropout_vars, a_dropout)

        if w_noise_std != 0:
            # - apply noise on weight variables
            weight_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, weight_vars, w_noise_std)

        [cost_reg, error_rate_reg] = cg.outputs

        # add reconstruction penalty for AE part
        penalty_val = tensor.sqrt(((r - rprime)**2).sum(axis=1)).mean()
        cost_reg = cost_reg + reconstruction_penalty * penalty_val

        self.cost = cost
        self.cost_reg = cost_reg
        self.error_rate = error_rate
        self.error_rate_reg = error_rate_reg
        self.pred = pred
        self.confidence = confidence

Example #5

Show file

    def __init__(self, ref_data, output_dim):
        input_dim = ref_data.shape[1]
        ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32),
                                    name='ref_data')

        rng = RandomStreams()

        ae_bricks = []
        ae_input = ref_data_sh
        ae_costs = []
        for i, (idim,
                odim) in enumerate(zip([input_dim] + ae_dims[:-1], ae_dims)):
            ae_mlp = MLP(activations=[ae_activations[i]],
                         dims=[idim, odim],
                         name='enc%i' % i)
            enc = ae_mlp.apply(ae_input)
            enc_n = ae_mlp.apply(
                ae_input + rng.normal(size=ae_input.shape, std=ae_f_noise_std))
            ae_mlp_dec = MLP(activations=[ae_activations[i]],
                             dims=[odim, idim],
                             name='dec%i' % i)
            dec = ae_mlp_dec.apply(enc_n)

            cost = tensor.sqrt(((ae_input - dec) ** 2).sum(axis=1)).mean() + \
                        ae_l1_pen * abs(enc).sum(axis=1).mean()
            ae_costs.append(cost)

            ae_input = enc
            ae_bricks = ae_bricks + [ae_mlp, ae_mlp_dec]

        self.ae_costs = ae_costs

        ref_data_enc = ae_input

        # Construct the model
        j = tensor.lvector('j')
        r = ref_data_enc[j, :]
        x = tensor.fmatrix('x')
        y = tensor.ivector('y')

        # input_dim must be nr
        mlp = MLP(activations=activation_functions,
                  dims=[ae_dims[-1]] + hidden_dims + [n_inter],
                  name='inter_gen')
        mlp2 = MLP(activations=activation_functions_2 + [None],
                   dims=[n_inter] + hidden_dims_2 + [output_dim],
                   name='end_mlp')

        inter_weights = mlp.apply(r)

        if inter_bias == None:
            ibias = Bias(n_inter)
            ibias.biases_init = Constant(0)
            ibias.initialize()
            inter = ibias.apply(tensor.dot(x, inter_weights))
        else:
            inter = tensor.dot(x, inter_weights) - inter_bias
        inter = inter_act_fun.apply(inter)

        final = mlp2.apply(inter)

        cost = Softmax().categorical_cross_entropy(y, final)
        confidence = Softmax().apply(final)

        pred = final.argmax(axis=1)
        # error_rate = tensor.neq(y, pred).mean()
        ber = balanced_error_rate.ber(y, pred)

        # Initialize parameters
        for brick in ae_bricks + [mlp, mlp2]:
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0.001)
            brick.initialize()

        # apply regularization
        cg = ComputationGraph([cost, ber])

        if r_dropout != 0:
            # - dropout on input vector r : r_dropout
            cg = apply_dropout(cg, [r], r_dropout)

        if x_dropout != 0:
            cg = apply_dropout(cg, [x], x_dropout)

        if s_dropout != 0:
            # - dropout on intermediate layers of first mlp : s_dropout
            s_dropout_vars = list(
                set(
                    VariableFilter(bricks=[Tanh], name='output')
                    (ComputationGraph([inter_weights]))) -
                set([inter_weights]))
            cg = apply_dropout(cg, s_dropout_vars, s_dropout)

        if i_dropout != 0:
            # - dropout on input to second mlp : i_dropout
            cg = apply_dropout(cg, [inter], i_dropout)

        if a_dropout != 0:
            # - dropout on hidden layers of second mlp : a_dropout
            a_dropout_vars = list(
                set(
                    VariableFilter(bricks=[Tanh], name='output')
                    (ComputationGraph([final]))) - set([inter_weights]) -
                set(s_dropout_vars))
            cg = apply_dropout(cg, a_dropout_vars, a_dropout)

        if r_noise_std != 0:
            cg = apply_noise(cg, [r], r_noise_std)

        if w_noise_std != 0:
            # - apply noise on weight variables
            weight_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, weight_vars, w_noise_std)

        [cost_reg, ber_reg] = cg.outputs

        if s_l1pen != 0:
            s_weights = VariableFilter(bricks=mlp.linear_transformations,
                                       roles=[WEIGHT])(cg)
            cost_reg = cost_reg + s_l1pen * sum(
                abs(w).sum() for w in s_weights)
        if i_l1pen != 0:
            cost_reg = cost_reg + i_l1pen * abs(inter).sum()
        if a_l1pen != 0:
            a_weights = VariableFilter(bricks=mlp2.linear_transformations,
                                       roles=[WEIGHT])(cg)
            cost_reg = cost_reg + a_l1pen * sum(
                abs(w).sum() for w in a_weights)

        self.cost = cost
        self.cost_reg = cost_reg
        self.ber = ber
        self.ber_reg = ber_reg
        self.pred = pred
        self.confidence = confidence

Example #6

Show file

File: model.py Project: v-mipeng/Hashtag

class ETHM(EUTHM):
    '''Model with only textual-hashtag information'''
    def __init__(self, config, dataset, *args, **kwargs):
        super(ETHM, self).__init__(config, dataset)

    def _build_model(self, *args, **kwargs):
        # Define inputs
        self._define_inputs()
        self._build_bricks()
        self._set_OV_value()
        # Transpose text
        self.text = self.text.dimshuffle(1, 0)
        self.text_mask = self.text_mask.dimshuffle(1, 0)
        self.sparse_word = self.sparse_word.dimshuffle(1, 0)
        self.sparse_word_mask = self.sparse_word_mask.dimshuffle(1, 0)
        # Turn word, and hashtag into vector representation
        text_vec = self.word_embed.apply(self.text)
        # Apply word and hashtag word and url
        text_vec = self._apply_hashtag_word(text_vec)
        text_vec = self._apply_sparse_word(text_vec)
        # Encode text
        mlstm_hidden, mlstm_cell = self.mlstm.apply(
            inputs=self.mlstm_ins.apply(text_vec),
            mask=self.text_mask.astype(theano.config.floatX))
        text_encodes = mlstm_hidden[-1]
        input_vec = text_encodes
        self._get_cost(input_vec, None, None)

    def _define_inputs(self, *args, **kwargs):
        self.hashtag = tensor.ivector('hashtag')
        self.text = tensor.imatrix('text')
        self.text_mask = tensor.matrix('text_mask', dtype=theano.config.floatX)
        self.hashtag_word = tensor.ivector('hashtag_word')
        self.hashtag_sparse_mask = tensor.vector('hashtag_word_sparse_mask',
                                                 dtype=theano.config.floatX)
        self.hashtag_word_left_idx = tensor.ivector(
            'hashtag_word_idx_left_idx')
        self.hashtag_word_right_idx = tensor.ivector(
            'hashtag_word_idx_right_idx')
        self.sparse_word = tensor.imatrix('sparse_word')
        self.sparse_word_sparse_mask = tensor.vector(
            'sparse_word_sparse_mask', dtype=theano.config.floatX)
        self.sparse_word_mask = tensor.matrix('sparse_word_mask',
                                              dtype=theano.config.floatX)
        self.sparse_word_left_idx = tensor.ivector('sparse_word_idx_left_idx')
        self.sparse_word_right_idx = tensor.ivector(
            'sparse_word_idx_right_idx')

    def _build_bricks(self, *args, **kwargs):
        # Build lookup tables
        self.word_embed = self._embed(len(self.dataset.word2index),
                                      self.config.word_embed_dim,
                                      name='word_embed')

        self.hashtag_embed = self._embed(len(self.dataset.hashtag2index),
                                         self.config.lstm_dim,
                                         name='hashtag_embed')
        # Build text encoder
        self.mlstm_ins = Linear(input_dim=self.config.word_embed_dim,
                                output_dim=4 * self.config.lstm_dim,
                                name='mlstm_in')
        self.mlstm_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim))
        self.mlstm_ins.biases_init = Constant(0)
        self.mlstm_ins.initialize()
        self.mlstm = MLSTM(self.config.lstm_time,
                           self.config.lstm_dim,
                           shared=False)
        self.mlstm.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) /
            numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim))
        self.mlstm.biases_init = Constant(0)
        self.mlstm.initialize()
        self.hashtag2word = MLP(
            activations=[Tanh('hashtag2word_tanh')],
            dims=[self.config.lstm_dim, self.config.word_embed_dim],
            name='hashtag2word_mlp')
        self.hashtag2word.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.hashtag2word.biases_init = Constant(0)
        self.hashtag2word.initialize()
        self.hashtag2word_bias = Bias(dim=1, name='hashtag2word_bias')
        self.hashtag2word_bias.biases_init = Constant(0)
        self.hashtag2word_bias.initialize()
        #Build character embedding
        self.char_embed = self._embed(len(self.dataset.char2index),
                                      self.config.char_embed_dim,
                                      name='char_embed')
        # Build sparse word encoder
        self.rnn_ins = Linear(input_dim=self.config.char_embed_dim,
                              output_dim=self.config.word_embed_dim,
                              name='rnn_in')
        self.rnn_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) / numpy.sqrt(self.config.char_embed_dim +
                                           self.config.word_embed_dim))
        self.rnn_ins.biases_init = Constant(0)
        self.rnn_ins.initialize()
        self.rnn = SimpleRecurrent(dim=self.config.word_embed_dim,
                                   activation=Tanh())
        self.rnn.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.rnn.initialize()

    def _apply_dropout(self, outputs, *args, **kwargs):
        variables = [self.word_embed.W, self.hashtag_embed.W]
        cgs = ComputationGraph(outputs)
        cg_dropouts = apply_dropout(cgs,
                                    variables,
                                    drop_prob=self.config.dropout_prob,
                                    seed=123).outputs
        return cg_dropouts

    def _apply_reg(self, cost, params=None, *args, **kwargs):
        try:
            if self.config.l2_norm > 0:
                cost = cost + self.config.l2_norm * theano_expressions.l2_norm(
                    tensors=[self.hashtag_embed.W, self.word_embed.W])**2

            else:
                pass
        except Exception:
            pass
        return cost

Example #7

Show file

File: model.py Project: v-mipeng/Hashtag

class EUTHM(UTHM):
    '''
    UTH model with extend information
    '''
    def __init__(self, config, dataset, *args, **kwargs):
        super(EUTHM, self).__init__(config, dataset)

    def _define_inputs(self, *args, **kwargs):
        super(EUTHM, self)._define_inputs()
        self.user_word = tensor.ivector('user_word')
        self.user_word_sparse_mask = tensor.vector('user_word_sparse_mask',
                                                   dtype=theano.config.floatX)
        self.user_word_left_idx = tensor.ivector('user_word_idx_left_idx')
        self.user_word_right_idx = tensor.ivector('user_word_idx_right_idx')
        self.hashtag_word = tensor.ivector('hashtag_word')
        self.hashtag_sparse_mask = tensor.vector('hashtag_word_sparse_mask',
                                                 dtype=theano.config.floatX)
        self.hashtag_word_left_idx = tensor.ivector(
            'hashtag_word_idx_left_idx')
        self.hashtag_word_right_idx = tensor.ivector(
            'hashtag_word_idx_right_idx')
        self.sparse_word = tensor.imatrix('sparse_word')
        self.sparse_word_sparse_mask = tensor.vector(
            'sparse_word_sparse_mask', dtype=theano.config.floatX)
        self.sparse_word_mask = tensor.matrix('sparse_word_mask',
                                              dtype=theano.config.floatX)
        self.sparse_word_left_idx = tensor.ivector('sparse_word_idx_left_idx')
        self.sparse_word_right_idx = tensor.ivector(
            'sparse_word_idx_right_idx')

    def _build_bricks(self, *args, **kwargs):
        # Build lookup tables
        super(EUTHM, self)._build_bricks()
        self.user2word = MLP(
            activations=[Tanh('user2word_tanh')],
            dims=[self.config.user_embed_dim, self.config.word_embed_dim],
            name='user2word_mlp')
        self.user2word.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.user2word.biases_init = Constant(0)
        self.user2word.initialize()
        self.hashtag2word = MLP(
            activations=[Tanh('hashtag2word_tanh')],
            dims=[
                self.config.user_embed_dim + self.config.word_embed_dim,
                self.config.word_embed_dim
            ],
            name='hashtag2word_mlp')
        self.hashtag2word.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.hashtag2word.biases_init = Constant(0)
        self.hashtag2word.initialize()
        self.user2word_bias = Bias(dim=1, name='user2word_bias')
        self.user2word_bias.biases_init = Constant(0)
        self.user2word_bias.initialize()
        self.hashtag2word_bias = Bias(dim=1, name='hashtag2word_bias')
        self.hashtag2word_bias.biases_init = Constant(0)
        self.hashtag2word_bias.initialize()
        #Build character embedding
        self.char_embed = self._embed(len(self.dataset.char2index),
                                      self.config.char_embed_dim,
                                      name='char_embed')
        # Build sparse word encoder
        self.rnn_ins = Linear(input_dim=self.config.char_embed_dim,
                              output_dim=self.config.word_embed_dim,
                              name='rnn_in')
        self.rnn_ins.weights_init = IsotropicGaussian(
            std=numpy.sqrt(2) / numpy.sqrt(self.config.char_embed_dim +
                                           self.config.word_embed_dim))
        self.rnn_ins.biases_init = Constant(0)
        self.rnn_ins.initialize()
        self.rnn = SimpleRecurrent(dim=self.config.word_embed_dim,
                                   activation=Tanh())
        self.rnn.weights_init = IsotropicGaussian(
            std=1 / numpy.sqrt(self.config.word_embed_dim))
        self.rnn.initialize()

    def _set_OV_value(self, *args, **kwargs):
        '''Train a <unk> representation'''
        tensor.set_subtensor(
            self.char_embed.W[self.dataset.char2index['<unk>']],
            numpy.zeros(self.config.char_embed_dim,
                        dtype=theano.config.floatX))

    def _get_text_vec(self, *args, **kwargs):
        # Transpose text
        self.text = self.text.dimshuffle(1, 0)
        self.text_mask = self.text_mask.dimshuffle(1, 0)
        self.sparse_word = self.sparse_word.dimshuffle(1, 0)
        self.sparse_word_mask = self.sparse_word_mask.dimshuffle(1, 0)
        # Turn word, user and hashtag into vector representation
        text_vec = self.word_embed.apply(self.text)
        # Apply user word, hashtag word and url
        text_vec = self._apply_user_word(text_vec)
        text_vec = self._apply_hashtag_word(text_vec)
        text_vec = self._apply_sparse_word(text_vec)
        return text_vec

    @abstractmethod
    def _apply_user_word(self, text_vec, *args, **kwargs):
        '''
        Replace @a with transformed author vector
        :param text_vec:
        :param args:
        :param kwargs:
        :return:
        '''
        user_word_vec = self.user2word.apply(self.user_embed.apply(self.user_word)) + \
                        self.user2word_bias.parameters[0][0]
        text_vec = tensor.set_subtensor(
            text_vec[self.user_word_right_idx, self.user_word_left_idx],
            text_vec[self.user_word_right_idx, self.user_word_left_idx] *
            (1 - self.user_word_sparse_mask[:, None]) +
            user_word_vec * self.user_word_sparse_mask[:, None])
        return text_vec

    @abstractmethod
    def _apply_hashtag_word(self, text_vec, *args, **kwargs):
        '''
        Replace #h with transformed hashtag vector
        :param text_vec:
        :param args:
        :param kwargs:
        :return:
        '''
        hashtag_word_vec = self.hashtag2word.apply(self.hashtag_embed.apply(self.hashtag_word)) +\
                           self.hashtag2word_bias.parameters[0][0]
        text_vec = tensor.set_subtensor(
            text_vec[self.hashtag_word_right_idx, self.hashtag_word_left_idx],
            text_vec[self.hashtag_word_right_idx, self.hashtag_word_left_idx] *
            (1 - self.hashtag_sparse_mask[:, None]) +
            hashtag_word_vec * self.hashtag_sparse_mask[:, None])
        return text_vec

    @abstractmethod
    def _apply_sparse_word(self, text_vec, *args, **kwargs):
        '''
        Replace sparse word encoding with character embedding. (maybe lstm)
        :param text_vec:
        :param args:
        :param kwargs:
        :return:
        '''
        sparse_word_vec = self.char_embed.apply(self.sparse_word)
        sparse_word_hiddens = self.rnn.apply(
            inputs=self.rnn_ins.apply(sparse_word_vec),
            mask=self.sparse_word_mask)
        tmp = sparse_word_hiddens[-1]
        text_vec = tensor.set_subtensor(
            text_vec[self.sparse_word_right_idx, self.sparse_word_left_idx],
            text_vec[self.sparse_word_right_idx, self.sparse_word_left_idx] *
            (1 - self.sparse_word_sparse_mask[:, None]) +
            tmp * self.sparse_word_sparse_mask[:, None])
        return text_vec

Example #8

Show file

File: mlpfsel2.py Project: Alexis211/transpose_features

    def __init__(self, ref_data, output_dim):
        if pca_dims is not None:
            covmat = numpy.dot(ref_data.T, ref_data)
            ev, evec = numpy.linalg.eig(covmat)
            best_i = ev.argsort()[-pca_dims:]
            best_evecs = evec[:, best_i]
            best_evecs = best_evecs / numpy.sqrt(
                (best_evecs**2).sum(axis=0))  #normalize
            ref_data = numpy.dot(ref_data, best_evecs)

        input_dim = ref_data.shape[1]

        ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32),
                                    name='ref_data')

        # Construct the model
        j = tensor.lvector('j')
        r = ref_data_sh[j, :]
        x = tensor.fmatrix('x')
        y = tensor.ivector('y')

        # input_dim must be nr
        mlp = MLP(activations=activation_functions,
                  dims=[input_dim] + hidden_dims + [n_inter],
                  name='inter_gen')
        mlp2 = MLP(activations=activation_functions_2 + [None],
                   dims=[n_inter] + hidden_dims_2 + [output_dim],
                   name='end_mlp')

        inter_weights = mlp.apply(r)

        if inter_bias == None:
            ibias = Bias(n_inter)
            ibias.biases_init = Constant(0)
            ibias.initialize()
            inter = ibias.apply(tensor.dot(x, inter_weights))
        else:
            inter = tensor.dot(x, inter_weights) - inter_bias
        inter = inter_act_fun.apply(inter)

        final = mlp2.apply(inter)

        cost = Softmax().categorical_cross_entropy(y, final)
        confidence = Softmax().apply(final)

        pred = final.argmax(axis=1)
        # error_rate = tensor.neq(y, pred).mean()
        ber = balanced_error_rate.ber(y, pred)

        # Initialize parameters
        for brick in [mlp, mlp2]:
            brick.weights_init = IsotropicGaussian(0.01)
            brick.biases_init = Constant(0.001)
            brick.initialize()

        # apply regularization
        cg = ComputationGraph([cost, ber])

        if r_dropout != 0:
            # - dropout on input vector r : r_dropout
            cg = apply_dropout(cg, [r], r_dropout)

        if x_dropout != 0:
            cg = apply_dropout(cg, [x], x_dropout)

        if s_dropout != 0:
            # - dropout on intermediate layers of first mlp : s_dropout
            s_dropout_vars = list(
                set(
                    VariableFilter(bricks=[Tanh], name='output')
                    (ComputationGraph([inter_weights]))) -
                set([inter_weights]))
            cg = apply_dropout(cg, s_dropout_vars, s_dropout)

        if i_dropout != 0:
            # - dropout on input to second mlp : i_dropout
            cg = apply_dropout(cg, [inter], i_dropout)

        if a_dropout != 0:
            # - dropout on hidden layers of second mlp : a_dropout
            a_dropout_vars = list(
                set(
                    VariableFilter(bricks=[Tanh], name='output')
                    (ComputationGraph([final]))) - set([inter_weights]) -
                set(s_dropout_vars))
            cg = apply_dropout(cg, a_dropout_vars, a_dropout)

        if r_noise_std != 0:
            cg = apply_noise(cg, [r], r_noise_std)

        if w_noise_std != 0:
            # - apply noise on weight variables
            weight_vars = VariableFilter(roles=[WEIGHT])(cg)
            cg = apply_noise(cg, weight_vars, w_noise_std)

        [cost_reg, ber_reg] = cg.outputs

        if s_l1pen != 0:
            s_weights = VariableFilter(bricks=mlp.linear_transformations,
                                       roles=[WEIGHT])(cg)
            cost_reg = cost_reg + s_l1pen * sum(
                abs(w).sum() for w in s_weights)
        if i_l1pen != 0:
            cost_reg = cost_reg + i_l1pen * abs(inter).sum()
        if a_l1pen != 0:
            a_weights = VariableFilter(bricks=mlp2.linear_transformations,
                                       roles=[WEIGHT])(cg)
            cost_reg = cost_reg + a_l1pen * sum(
                abs(w).sum() for w in a_weights)

        self.cost = cost
        self.cost_reg = cost_reg
        self.ber = ber
        self.ber_reg = ber_reg
        self.pred = pred
        self.confidence = confidence