class AttentionEUTHM2(AttentionEUTHM): def __init__(self, config, dataset, *args, **kwargs): ''' Define User-text-hashtag model with negtive sampling :param config: :param dataset: User-text-hashtag dataset ''' super(AttentionEUTHM2, self).__init__(config, dataset) def _get_doc_embed(self, *args, **kwargs): text_vec = self._get_text_vec() user_vec = self.user_embed.apply(self.user) text_vec = tensor.concatenate([ text_vec, user_vec[None, :, :][tensor.zeros( shape=(text_vec.shape[0], ), dtype='int32')] ], axis=2) text_vec = self.word_shift.apply(text_vec) + \ self.word_shift_bias.parameters[0][0] return self._encode_text_vec(text_vec) def _build_bricks(self, *args, **kwargs): super(AttentionEUTHM2, self)._build_bricks() self.word_shift = MLP( activations=[Tanh('word_shift_tanh')], dims=[ self.config.user_embed_dim + self.config.word_embed_dim, self.config.word_embed_dim ], name='word_shift_mlp') self.word_shift.weights_init = IsotropicGaussian( std=1 / numpy.sqrt(self.config.word_embed_dim + self.config.user_embed_dim)) self.word_shift.biases_init = Constant(0) self.word_shift.initialize() self.word_shift_bias = Bias(dim=1, name='word_shift_bias') self.word_shift_bias.biases_init = Constant(0) self.word_shift_bias.initialize()
def __init__(self, ref_data, output_dim): input_dim = ref_data.shape[1] ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name="ref_data") # Construct the model j = tensor.lvector("j") r = ref_data_sh[j, :] x = tensor.fmatrix("x") y = tensor.ivector("y") # input_dim must be nr mlp0 = MLP(activations=activation_functions_0, dims=[input_dim] + hidden_dims_0, name="e0") mlp0vs = MLP(activations=[None], dims=[hidden_dims_0[-1], input_dim], name="de0") mlp1 = MLP( activations=activation_functions_1, dims=[hidden_dims_0[-1]] + hidden_dims_1 + [n_inter], name="inter_gen" ) mlp2 = MLP( activations=activation_functions_2 + [None], dims=[n_inter] + hidden_dims_2 + [output_dim], name="end_mlp" ) encod = mlp0.apply(r) rprime = mlp0vs.apply(encod) inter_weights = mlp1.apply(encod) ibias = Bias(n_inter) ibias.biases_init = Constant(0) ibias.initialize() inter = inter_act_fun.apply(ibias.apply(tensor.dot(x, inter_weights))) final = mlp2.apply(inter) cost = Softmax().categorical_cross_entropy(y, final) confidence = Softmax().apply(final) pred = final.argmax(axis=1) error_rate = tensor.neq(y, pred).mean() # Initialize parameters for brick in [mlp0, mlp0vs, mlp1, mlp2]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply regularization cg = ComputationGraph([cost, error_rate]) if r_dropout != 0: # - dropout on input vector r : r_dropout cg = apply_dropout(cg, [r], r_dropout) if s_dropout != 0: # - dropout on intermediate layers of first mlp : s_dropout s_dropout_vars = list( set(VariableFilter(bricks=[Tanh], name="output")(ComputationGraph([inter_weights]))) - set([inter_weights]) ) cg = apply_dropout(cg, s_dropout_vars, s_dropout) if i_dropout != 0: # - dropout on input to second mlp : i_dropout cg = apply_dropout(cg, [inter], i_dropout) if a_dropout != 0: # - dropout on hidden layers of second mlp : a_dropout a_dropout_vars = list( set(VariableFilter(bricks=[Tanh], name="output")(ComputationGraph([final]))) - set([inter_weights]) - set(s_dropout_vars) ) cg = apply_dropout(cg, a_dropout_vars, a_dropout) if w_noise_std != 0: # - apply noise on weight variables weight_vars = VariableFilter(roles=[WEIGHT])(cg) cg = apply_noise(cg, weight_vars, w_noise_std) [cost_reg, error_rate_reg] = cg.outputs # add reconstruction penalty for AE part penalty_val = tensor.sqrt(((r - rprime) ** 2).sum(axis=1)).mean() cost_reg = cost_reg + reconstruction_penalty * penalty_val self.cost = cost self.cost_reg = cost_reg self.error_rate = error_rate self.error_rate_reg = error_rate_reg self.pred = pred self.confidence = confidence
def __init__(self, ref_data, output_dim): input_dim = ref_data.shape[1] ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name='ref_data') rng = RandomStreams() ae_bricks = [] ae_input = ref_data_sh ae_costs = [] for i, (idim, odim) in enumerate(zip([input_dim] + ae_dims[:-1], ae_dims)): ae_mlp = MLP(activations=[ae_activations[i]], dims=[idim, odim], name='enc%i'%i) enc = ae_mlp.apply(ae_input) enc_n = ae_mlp.apply(ae_input + rng.normal(size=ae_input.shape, std=ae_f_noise_std)) ae_mlp_dec = MLP(activations=[ae_activations[i]], dims=[odim, idim], name='dec%i'%i) dec = ae_mlp_dec.apply(enc_n) cost = tensor.sqrt(((ae_input - dec) ** 2).sum(axis=1)).mean() + \ ae_l1_pen * abs(enc).sum(axis=1).mean() ae_costs.append(cost) ae_input = enc ae_bricks = ae_bricks + [ae_mlp, ae_mlp_dec] self.ae_costs = ae_costs ref_data_enc = ae_input # Construct the model j = tensor.lvector('j') r = ref_data_enc[j, :] x = tensor.fmatrix('x') y = tensor.ivector('y') # input_dim must be nr mlp = MLP(activations=activation_functions, dims=[ae_dims[-1]] + hidden_dims + [n_inter], name='inter_gen') mlp2 = MLP(activations=activation_functions_2 + [None], dims=[n_inter] + hidden_dims_2 + [output_dim], name='end_mlp') inter_weights = mlp.apply(r) if inter_bias == None: ibias = Bias(n_inter) ibias.biases_init = Constant(0) ibias.initialize() inter = ibias.apply(tensor.dot(x, inter_weights)) else: inter = tensor.dot(x, inter_weights) - inter_bias inter = inter_act_fun.apply(inter) final = mlp2.apply(inter) cost = Softmax().categorical_cross_entropy(y, final) confidence = Softmax().apply(final) pred = final.argmax(axis=1) # error_rate = tensor.neq(y, pred).mean() ber = balanced_error_rate.ber(y, pred) # Initialize parameters for brick in ae_bricks + [mlp, mlp2]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply regularization cg = ComputationGraph([cost, ber]) if r_dropout != 0: # - dropout on input vector r : r_dropout cg = apply_dropout(cg, [r], r_dropout) if x_dropout != 0: cg = apply_dropout(cg, [x], x_dropout) if s_dropout != 0: # - dropout on intermediate layers of first mlp : s_dropout s_dropout_vars = list(set(VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([inter_weights]))) - set([inter_weights])) cg = apply_dropout(cg, s_dropout_vars, s_dropout) if i_dropout != 0: # - dropout on input to second mlp : i_dropout cg = apply_dropout(cg, [inter], i_dropout) if a_dropout != 0: # - dropout on hidden layers of second mlp : a_dropout a_dropout_vars = list(set(VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([final]))) - set([inter_weights]) - set(s_dropout_vars)) cg = apply_dropout(cg, a_dropout_vars, a_dropout) if r_noise_std != 0: cg = apply_noise(cg, [r], r_noise_std) if w_noise_std != 0: # - apply noise on weight variables weight_vars = VariableFilter(roles=[WEIGHT])(cg) cg = apply_noise(cg, weight_vars, w_noise_std) [cost_reg, ber_reg] = cg.outputs if s_l1pen != 0: s_weights = VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg) cost_reg = cost_reg + s_l1pen * sum(abs(w).sum() for w in s_weights) if i_l1pen != 0: cost_reg = cost_reg + i_l1pen * abs(inter).sum() if a_l1pen != 0: a_weights = VariableFilter(bricks=mlp2.linear_transformations, roles=[WEIGHT])(cg) cost_reg = cost_reg + a_l1pen * sum(abs(w).sum() for w in a_weights) self.cost = cost self.cost_reg = cost_reg self.ber = ber self.ber_reg = ber_reg self.pred = pred self.confidence = confidence
def __init__(self, ref_data, output_dim): input_dim = ref_data.shape[1] ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name='ref_data') # Construct the model j = tensor.lvector('j') r = ref_data_sh[j, :] x = tensor.fmatrix('x') y = tensor.ivector('y') # input_dim must be nr mlp0 = MLP(activations=activation_functions_0, dims=[input_dim] + hidden_dims_0, name='e0') mlp0vs = MLP(activations=[None], dims=[hidden_dims_0[-1], input_dim], name='de0') mlp1 = MLP(activations=activation_functions_1, dims=[hidden_dims_0[-1]] + hidden_dims_1 + [n_inter], name='inter_gen') mlp2 = MLP(activations=activation_functions_2 + [None], dims=[n_inter] + hidden_dims_2 + [output_dim], name='end_mlp') encod = mlp0.apply(r) rprime = mlp0vs.apply(encod) inter_weights = mlp1.apply(encod) ibias = Bias(n_inter) ibias.biases_init = Constant(0) ibias.initialize() inter = inter_act_fun.apply(ibias.apply(tensor.dot(x, inter_weights))) final = mlp2.apply(inter) cost = Softmax().categorical_cross_entropy(y, final) confidence = Softmax().apply(final) pred = final.argmax(axis=1) error_rate = tensor.neq(y, pred).mean() # Initialize parameters for brick in [mlp0, mlp0vs, mlp1, mlp2]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply regularization cg = ComputationGraph([cost, error_rate]) if r_dropout != 0: # - dropout on input vector r : r_dropout cg = apply_dropout(cg, [r], r_dropout) if s_dropout != 0: # - dropout on intermediate layers of first mlp : s_dropout s_dropout_vars = list( set( VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([inter_weights]))) - set([inter_weights])) cg = apply_dropout(cg, s_dropout_vars, s_dropout) if i_dropout != 0: # - dropout on input to second mlp : i_dropout cg = apply_dropout(cg, [inter], i_dropout) if a_dropout != 0: # - dropout on hidden layers of second mlp : a_dropout a_dropout_vars = list( set( VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([final]))) - set([inter_weights]) - set(s_dropout_vars)) cg = apply_dropout(cg, a_dropout_vars, a_dropout) if w_noise_std != 0: # - apply noise on weight variables weight_vars = VariableFilter(roles=[WEIGHT])(cg) cg = apply_noise(cg, weight_vars, w_noise_std) [cost_reg, error_rate_reg] = cg.outputs # add reconstruction penalty for AE part penalty_val = tensor.sqrt(((r - rprime)**2).sum(axis=1)).mean() cost_reg = cost_reg + reconstruction_penalty * penalty_val self.cost = cost self.cost_reg = cost_reg self.error_rate = error_rate self.error_rate_reg = error_rate_reg self.pred = pred self.confidence = confidence
def __init__(self, ref_data, output_dim): input_dim = ref_data.shape[1] ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name='ref_data') rng = RandomStreams() ae_bricks = [] ae_input = ref_data_sh ae_costs = [] for i, (idim, odim) in enumerate(zip([input_dim] + ae_dims[:-1], ae_dims)): ae_mlp = MLP(activations=[ae_activations[i]], dims=[idim, odim], name='enc%i' % i) enc = ae_mlp.apply(ae_input) enc_n = ae_mlp.apply( ae_input + rng.normal(size=ae_input.shape, std=ae_f_noise_std)) ae_mlp_dec = MLP(activations=[ae_activations[i]], dims=[odim, idim], name='dec%i' % i) dec = ae_mlp_dec.apply(enc_n) cost = tensor.sqrt(((ae_input - dec) ** 2).sum(axis=1)).mean() + \ ae_l1_pen * abs(enc).sum(axis=1).mean() ae_costs.append(cost) ae_input = enc ae_bricks = ae_bricks + [ae_mlp, ae_mlp_dec] self.ae_costs = ae_costs ref_data_enc = ae_input # Construct the model j = tensor.lvector('j') r = ref_data_enc[j, :] x = tensor.fmatrix('x') y = tensor.ivector('y') # input_dim must be nr mlp = MLP(activations=activation_functions, dims=[ae_dims[-1]] + hidden_dims + [n_inter], name='inter_gen') mlp2 = MLP(activations=activation_functions_2 + [None], dims=[n_inter] + hidden_dims_2 + [output_dim], name='end_mlp') inter_weights = mlp.apply(r) if inter_bias == None: ibias = Bias(n_inter) ibias.biases_init = Constant(0) ibias.initialize() inter = ibias.apply(tensor.dot(x, inter_weights)) else: inter = tensor.dot(x, inter_weights) - inter_bias inter = inter_act_fun.apply(inter) final = mlp2.apply(inter) cost = Softmax().categorical_cross_entropy(y, final) confidence = Softmax().apply(final) pred = final.argmax(axis=1) # error_rate = tensor.neq(y, pred).mean() ber = balanced_error_rate.ber(y, pred) # Initialize parameters for brick in ae_bricks + [mlp, mlp2]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply regularization cg = ComputationGraph([cost, ber]) if r_dropout != 0: # - dropout on input vector r : r_dropout cg = apply_dropout(cg, [r], r_dropout) if x_dropout != 0: cg = apply_dropout(cg, [x], x_dropout) if s_dropout != 0: # - dropout on intermediate layers of first mlp : s_dropout s_dropout_vars = list( set( VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([inter_weights]))) - set([inter_weights])) cg = apply_dropout(cg, s_dropout_vars, s_dropout) if i_dropout != 0: # - dropout on input to second mlp : i_dropout cg = apply_dropout(cg, [inter], i_dropout) if a_dropout != 0: # - dropout on hidden layers of second mlp : a_dropout a_dropout_vars = list( set( VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([final]))) - set([inter_weights]) - set(s_dropout_vars)) cg = apply_dropout(cg, a_dropout_vars, a_dropout) if r_noise_std != 0: cg = apply_noise(cg, [r], r_noise_std) if w_noise_std != 0: # - apply noise on weight variables weight_vars = VariableFilter(roles=[WEIGHT])(cg) cg = apply_noise(cg, weight_vars, w_noise_std) [cost_reg, ber_reg] = cg.outputs if s_l1pen != 0: s_weights = VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg) cost_reg = cost_reg + s_l1pen * sum( abs(w).sum() for w in s_weights) if i_l1pen != 0: cost_reg = cost_reg + i_l1pen * abs(inter).sum() if a_l1pen != 0: a_weights = VariableFilter(bricks=mlp2.linear_transformations, roles=[WEIGHT])(cg) cost_reg = cost_reg + a_l1pen * sum( abs(w).sum() for w in a_weights) self.cost = cost self.cost_reg = cost_reg self.ber = ber self.ber_reg = ber_reg self.pred = pred self.confidence = confidence
class ETHM(EUTHM): '''Model with only textual-hashtag information''' def __init__(self, config, dataset, *args, **kwargs): super(ETHM, self).__init__(config, dataset) def _build_model(self, *args, **kwargs): # Define inputs self._define_inputs() self._build_bricks() self._set_OV_value() # Transpose text self.text = self.text.dimshuffle(1, 0) self.text_mask = self.text_mask.dimshuffle(1, 0) self.sparse_word = self.sparse_word.dimshuffle(1, 0) self.sparse_word_mask = self.sparse_word_mask.dimshuffle(1, 0) # Turn word, and hashtag into vector representation text_vec = self.word_embed.apply(self.text) # Apply word and hashtag word and url text_vec = self._apply_hashtag_word(text_vec) text_vec = self._apply_sparse_word(text_vec) # Encode text mlstm_hidden, mlstm_cell = self.mlstm.apply( inputs=self.mlstm_ins.apply(text_vec), mask=self.text_mask.astype(theano.config.floatX)) text_encodes = mlstm_hidden[-1] input_vec = text_encodes self._get_cost(input_vec, None, None) def _define_inputs(self, *args, **kwargs): self.hashtag = tensor.ivector('hashtag') self.text = tensor.imatrix('text') self.text_mask = tensor.matrix('text_mask', dtype=theano.config.floatX) self.hashtag_word = tensor.ivector('hashtag_word') self.hashtag_sparse_mask = tensor.vector('hashtag_word_sparse_mask', dtype=theano.config.floatX) self.hashtag_word_left_idx = tensor.ivector( 'hashtag_word_idx_left_idx') self.hashtag_word_right_idx = tensor.ivector( 'hashtag_word_idx_right_idx') self.sparse_word = tensor.imatrix('sparse_word') self.sparse_word_sparse_mask = tensor.vector( 'sparse_word_sparse_mask', dtype=theano.config.floatX) self.sparse_word_mask = tensor.matrix('sparse_word_mask', dtype=theano.config.floatX) self.sparse_word_left_idx = tensor.ivector('sparse_word_idx_left_idx') self.sparse_word_right_idx = tensor.ivector( 'sparse_word_idx_right_idx') def _build_bricks(self, *args, **kwargs): # Build lookup tables self.word_embed = self._embed(len(self.dataset.word2index), self.config.word_embed_dim, name='word_embed') self.hashtag_embed = self._embed(len(self.dataset.hashtag2index), self.config.lstm_dim, name='hashtag_embed') # Build text encoder self.mlstm_ins = Linear(input_dim=self.config.word_embed_dim, output_dim=4 * self.config.lstm_dim, name='mlstm_in') self.mlstm_ins.weights_init = IsotropicGaussian( std=numpy.sqrt(2) / numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim)) self.mlstm_ins.biases_init = Constant(0) self.mlstm_ins.initialize() self.mlstm = MLSTM(self.config.lstm_time, self.config.lstm_dim, shared=False) self.mlstm.weights_init = IsotropicGaussian( std=numpy.sqrt(2) / numpy.sqrt(self.config.word_embed_dim + self.config.lstm_dim)) self.mlstm.biases_init = Constant(0) self.mlstm.initialize() self.hashtag2word = MLP( activations=[Tanh('hashtag2word_tanh')], dims=[self.config.lstm_dim, self.config.word_embed_dim], name='hashtag2word_mlp') self.hashtag2word.weights_init = IsotropicGaussian( std=1 / numpy.sqrt(self.config.word_embed_dim)) self.hashtag2word.biases_init = Constant(0) self.hashtag2word.initialize() self.hashtag2word_bias = Bias(dim=1, name='hashtag2word_bias') self.hashtag2word_bias.biases_init = Constant(0) self.hashtag2word_bias.initialize() #Build character embedding self.char_embed = self._embed(len(self.dataset.char2index), self.config.char_embed_dim, name='char_embed') # Build sparse word encoder self.rnn_ins = Linear(input_dim=self.config.char_embed_dim, output_dim=self.config.word_embed_dim, name='rnn_in') self.rnn_ins.weights_init = IsotropicGaussian( std=numpy.sqrt(2) / numpy.sqrt(self.config.char_embed_dim + self.config.word_embed_dim)) self.rnn_ins.biases_init = Constant(0) self.rnn_ins.initialize() self.rnn = SimpleRecurrent(dim=self.config.word_embed_dim, activation=Tanh()) self.rnn.weights_init = IsotropicGaussian( std=1 / numpy.sqrt(self.config.word_embed_dim)) self.rnn.initialize() def _apply_dropout(self, outputs, *args, **kwargs): variables = [self.word_embed.W, self.hashtag_embed.W] cgs = ComputationGraph(outputs) cg_dropouts = apply_dropout(cgs, variables, drop_prob=self.config.dropout_prob, seed=123).outputs return cg_dropouts def _apply_reg(self, cost, params=None, *args, **kwargs): try: if self.config.l2_norm > 0: cost = cost + self.config.l2_norm * theano_expressions.l2_norm( tensors=[self.hashtag_embed.W, self.word_embed.W])**2 else: pass except Exception: pass return cost
class EUTHM(UTHM): ''' UTH model with extend information ''' def __init__(self, config, dataset, *args, **kwargs): super(EUTHM, self).__init__(config, dataset) def _define_inputs(self, *args, **kwargs): super(EUTHM, self)._define_inputs() self.user_word = tensor.ivector('user_word') self.user_word_sparse_mask = tensor.vector('user_word_sparse_mask', dtype=theano.config.floatX) self.user_word_left_idx = tensor.ivector('user_word_idx_left_idx') self.user_word_right_idx = tensor.ivector('user_word_idx_right_idx') self.hashtag_word = tensor.ivector('hashtag_word') self.hashtag_sparse_mask = tensor.vector('hashtag_word_sparse_mask', dtype=theano.config.floatX) self.hashtag_word_left_idx = tensor.ivector( 'hashtag_word_idx_left_idx') self.hashtag_word_right_idx = tensor.ivector( 'hashtag_word_idx_right_idx') self.sparse_word = tensor.imatrix('sparse_word') self.sparse_word_sparse_mask = tensor.vector( 'sparse_word_sparse_mask', dtype=theano.config.floatX) self.sparse_word_mask = tensor.matrix('sparse_word_mask', dtype=theano.config.floatX) self.sparse_word_left_idx = tensor.ivector('sparse_word_idx_left_idx') self.sparse_word_right_idx = tensor.ivector( 'sparse_word_idx_right_idx') def _build_bricks(self, *args, **kwargs): # Build lookup tables super(EUTHM, self)._build_bricks() self.user2word = MLP( activations=[Tanh('user2word_tanh')], dims=[self.config.user_embed_dim, self.config.word_embed_dim], name='user2word_mlp') self.user2word.weights_init = IsotropicGaussian( std=1 / numpy.sqrt(self.config.word_embed_dim)) self.user2word.biases_init = Constant(0) self.user2word.initialize() self.hashtag2word = MLP( activations=[Tanh('hashtag2word_tanh')], dims=[ self.config.user_embed_dim + self.config.word_embed_dim, self.config.word_embed_dim ], name='hashtag2word_mlp') self.hashtag2word.weights_init = IsotropicGaussian( std=1 / numpy.sqrt(self.config.word_embed_dim)) self.hashtag2word.biases_init = Constant(0) self.hashtag2word.initialize() self.user2word_bias = Bias(dim=1, name='user2word_bias') self.user2word_bias.biases_init = Constant(0) self.user2word_bias.initialize() self.hashtag2word_bias = Bias(dim=1, name='hashtag2word_bias') self.hashtag2word_bias.biases_init = Constant(0) self.hashtag2word_bias.initialize() #Build character embedding self.char_embed = self._embed(len(self.dataset.char2index), self.config.char_embed_dim, name='char_embed') # Build sparse word encoder self.rnn_ins = Linear(input_dim=self.config.char_embed_dim, output_dim=self.config.word_embed_dim, name='rnn_in') self.rnn_ins.weights_init = IsotropicGaussian( std=numpy.sqrt(2) / numpy.sqrt(self.config.char_embed_dim + self.config.word_embed_dim)) self.rnn_ins.biases_init = Constant(0) self.rnn_ins.initialize() self.rnn = SimpleRecurrent(dim=self.config.word_embed_dim, activation=Tanh()) self.rnn.weights_init = IsotropicGaussian( std=1 / numpy.sqrt(self.config.word_embed_dim)) self.rnn.initialize() def _set_OV_value(self, *args, **kwargs): '''Train a <unk> representation''' tensor.set_subtensor( self.char_embed.W[self.dataset.char2index['<unk>']], numpy.zeros(self.config.char_embed_dim, dtype=theano.config.floatX)) def _get_text_vec(self, *args, **kwargs): # Transpose text self.text = self.text.dimshuffle(1, 0) self.text_mask = self.text_mask.dimshuffle(1, 0) self.sparse_word = self.sparse_word.dimshuffle(1, 0) self.sparse_word_mask = self.sparse_word_mask.dimshuffle(1, 0) # Turn word, user and hashtag into vector representation text_vec = self.word_embed.apply(self.text) # Apply user word, hashtag word and url text_vec = self._apply_user_word(text_vec) text_vec = self._apply_hashtag_word(text_vec) text_vec = self._apply_sparse_word(text_vec) return text_vec @abstractmethod def _apply_user_word(self, text_vec, *args, **kwargs): ''' Replace @a with transformed author vector :param text_vec: :param args: :param kwargs: :return: ''' user_word_vec = self.user2word.apply(self.user_embed.apply(self.user_word)) + \ self.user2word_bias.parameters[0][0] text_vec = tensor.set_subtensor( text_vec[self.user_word_right_idx, self.user_word_left_idx], text_vec[self.user_word_right_idx, self.user_word_left_idx] * (1 - self.user_word_sparse_mask[:, None]) + user_word_vec * self.user_word_sparse_mask[:, None]) return text_vec @abstractmethod def _apply_hashtag_word(self, text_vec, *args, **kwargs): ''' Replace #h with transformed hashtag vector :param text_vec: :param args: :param kwargs: :return: ''' hashtag_word_vec = self.hashtag2word.apply(self.hashtag_embed.apply(self.hashtag_word)) +\ self.hashtag2word_bias.parameters[0][0] text_vec = tensor.set_subtensor( text_vec[self.hashtag_word_right_idx, self.hashtag_word_left_idx], text_vec[self.hashtag_word_right_idx, self.hashtag_word_left_idx] * (1 - self.hashtag_sparse_mask[:, None]) + hashtag_word_vec * self.hashtag_sparse_mask[:, None]) return text_vec @abstractmethod def _apply_sparse_word(self, text_vec, *args, **kwargs): ''' Replace sparse word encoding with character embedding. (maybe lstm) :param text_vec: :param args: :param kwargs: :return: ''' sparse_word_vec = self.char_embed.apply(self.sparse_word) sparse_word_hiddens = self.rnn.apply( inputs=self.rnn_ins.apply(sparse_word_vec), mask=self.sparse_word_mask) tmp = sparse_word_hiddens[-1] text_vec = tensor.set_subtensor( text_vec[self.sparse_word_right_idx, self.sparse_word_left_idx], text_vec[self.sparse_word_right_idx, self.sparse_word_left_idx] * (1 - self.sparse_word_sparse_mask[:, None]) + tmp * self.sparse_word_sparse_mask[:, None]) return text_vec
def __init__(self, ref_data, output_dim): if pca_dims is not None: covmat = numpy.dot(ref_data.T, ref_data) ev, evec = numpy.linalg.eig(covmat) best_i = ev.argsort()[-pca_dims:] best_evecs = evec[:, best_i] best_evecs = best_evecs / numpy.sqrt( (best_evecs**2).sum(axis=0)) #normalize ref_data = numpy.dot(ref_data, best_evecs) input_dim = ref_data.shape[1] ref_data_sh = theano.shared(numpy.array(ref_data, dtype=numpy.float32), name='ref_data') # Construct the model j = tensor.lvector('j') r = ref_data_sh[j, :] x = tensor.fmatrix('x') y = tensor.ivector('y') # input_dim must be nr mlp = MLP(activations=activation_functions, dims=[input_dim] + hidden_dims + [n_inter], name='inter_gen') mlp2 = MLP(activations=activation_functions_2 + [None], dims=[n_inter] + hidden_dims_2 + [output_dim], name='end_mlp') inter_weights = mlp.apply(r) if inter_bias == None: ibias = Bias(n_inter) ibias.biases_init = Constant(0) ibias.initialize() inter = ibias.apply(tensor.dot(x, inter_weights)) else: inter = tensor.dot(x, inter_weights) - inter_bias inter = inter_act_fun.apply(inter) final = mlp2.apply(inter) cost = Softmax().categorical_cross_entropy(y, final) confidence = Softmax().apply(final) pred = final.argmax(axis=1) # error_rate = tensor.neq(y, pred).mean() ber = balanced_error_rate.ber(y, pred) # Initialize parameters for brick in [mlp, mlp2]: brick.weights_init = IsotropicGaussian(0.01) brick.biases_init = Constant(0.001) brick.initialize() # apply regularization cg = ComputationGraph([cost, ber]) if r_dropout != 0: # - dropout on input vector r : r_dropout cg = apply_dropout(cg, [r], r_dropout) if x_dropout != 0: cg = apply_dropout(cg, [x], x_dropout) if s_dropout != 0: # - dropout on intermediate layers of first mlp : s_dropout s_dropout_vars = list( set( VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([inter_weights]))) - set([inter_weights])) cg = apply_dropout(cg, s_dropout_vars, s_dropout) if i_dropout != 0: # - dropout on input to second mlp : i_dropout cg = apply_dropout(cg, [inter], i_dropout) if a_dropout != 0: # - dropout on hidden layers of second mlp : a_dropout a_dropout_vars = list( set( VariableFilter(bricks=[Tanh], name='output') (ComputationGraph([final]))) - set([inter_weights]) - set(s_dropout_vars)) cg = apply_dropout(cg, a_dropout_vars, a_dropout) if r_noise_std != 0: cg = apply_noise(cg, [r], r_noise_std) if w_noise_std != 0: # - apply noise on weight variables weight_vars = VariableFilter(roles=[WEIGHT])(cg) cg = apply_noise(cg, weight_vars, w_noise_std) [cost_reg, ber_reg] = cg.outputs if s_l1pen != 0: s_weights = VariableFilter(bricks=mlp.linear_transformations, roles=[WEIGHT])(cg) cost_reg = cost_reg + s_l1pen * sum( abs(w).sum() for w in s_weights) if i_l1pen != 0: cost_reg = cost_reg + i_l1pen * abs(inter).sum() if a_l1pen != 0: a_weights = VariableFilter(bricks=mlp2.linear_transformations, roles=[WEIGHT])(cg) cost_reg = cost_reg + a_l1pen * sum( abs(w).sum() for w in a_weights) self.cost = cost self.cost_reg = cost_reg self.ber = ber self.ber_reg = ber_reg self.pred = pred self.confidence = confidence