Exemple #1
0
    def __init__(self, n_input, n_hidden, n_output, optimizer=sgd, p=0.5):
        self.x = T.itensor3('batched_sequence_x') # (n_maxlen, n_batch, 2)
        self.x_mask_r = T.matrix('x_mask_r')
        self.x_mask_c = T.matrix('x_mask_c')
        self.y = T.itensor3('batched_sequence_y') # (n_maxlen, n_batch, 2)
        self.y_mask = T.matrix('y_mask')

        self.n_input = n_input
        self.n_hidden = n_hidden
        self.n_output = n_output
        self.floatX = theano.config.floatX
        self.E_table_size = int(np.ceil(np.sqrt(n_output)))
        self.p = p
        init_Er = np.asarray(np.random.uniform(low=-np.sqrt(1./self.n_output),
                                               high=np.sqrt(1./self.n_output),
                                               size=(self.E_table_size, self.n_input)),
                             dtype=self.floatX)
        self.Er = theano.shared(value=init_Er, name='row_word_embedding', borrow=True)
        init_Ec = np.asarray(np.random.uniform(low=-np.sqrt(1./self.n_output),
                                               high=np.sqrt(1./self.n_output),
                                               size=(self.E_table_size, self.n_input)),
                             dtype=self.floatX)
        self.Ec = theano.shared(value=init_Ec, name='column_word_embedding', borrow=True)

        self.optimizer = optimizer
        self.is_train = T.iscalar('is_train')
        self.n_batch = T.iscalar('n_batch')
        self.epsilon = 1.0e-15
        self.rng = RandomStreams(1234)
        self.build()
Exemple #2
0
    def set_model(self):
        say('\n\nBUILD A MODEL\n')
        argv = self.argv

        #####################
        # Network variables #
        #####################
        c = T.itensor3('c')
        r = T.itensor3('r')
        a = T.ftensor3('a')
        y_r = T.ivector('y_r')
        y_a = T.imatrix('y_a')
        n_agents = T.iscalar('n_agents')

        max_n_agents = self.max_n_agents
        init_emb = self.init_emb
        n_vocab = self.vocab.size()

        #################
        # Build a model #
        #################
        say('MODEL: %s  Unit: %s  Opt: %s  Activation: %s  ' %
            (argv.model, argv.unit, argv.opt, argv.activation))

        if argv.model == 'static':
            model = StaticModel
        else:
            model = DynamicModel

        self.model = model(argv, max_n_agents, n_vocab, init_emb)
        self.model.compile(c=c, r=r, a=a, y_r=y_r, y_a=y_a, n_agents=n_agents)
Exemple #3
0
    def __init__(self,
                 input_dim,
                 proj_dim=128,
                 neg_samples=4,
                 init='uniform',
                 activation='tanh',
                 weights=None,
                 W_regularizer=None,
                 activity_regularizer=None,
                 **kwargs):

        super(WordTagContextProduct_tensor, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.proj_dim = proj_dim
        self.samples = neg_samples + 1
        #np.random.seed(0)
        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.W_regularizer = regularizers.get(W_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)

        #self.input = T.imatrix()
        #self.input = T.itensor3()
        self.input = [T.itensor3(), T.itensor3()]
        # two different embeddings for pivot word and its context
        # because p(w|c) != p(c|w)
        self.W_w = self.init((input_dim, proj_dim))
        self.W_c = self.init((input_dim, proj_dim))

        self.params = [self.W_w, self.W_c]

        if weights is not None:
            self.set_weights(weights)
 def _get_input_tensor_variables(self):
     # x_w: 1D: batch, 2D: n_prds, 3D: n_words, 4D: 5 + window; elem=word id
     # x_p: 1D: batch, 2D: n_prds, 3D: n_words; elem=posit id
     # y: 1D: batch, 2D: n_prds, 3D: n_words; elem=label id
     if self.argv.mark_phi:
         return [T.itensor4('x_w'), T.itensor3('x_p'), T.itensor3('y')]
     return [T.itensor4('x_w'), T.itensor3('y')]
    def __init__(self, input_dim, proj_dim=128, neg_samples = 4,
                 init='uniform', activation='tanh', weights=None,W_regularizer = None, activity_regularizer=None,  **kwargs):

        super(WordTagContextProduct_tensor, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.proj_dim = proj_dim
        self.samples = neg_samples + 1
        #np.random.seed(0)
        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.W_regularizer = regularizers.get(W_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)

        #self.input = T.imatrix()
        #self.input = T.itensor3()
        self.input = [T.itensor3(), T.itensor3()]
        # two different embeddings for pivot word and its context
        # because p(w|c) != p(c|w)
        self.W_w = self.init((input_dim, proj_dim))
        self.W_c = self.init((input_dim, proj_dim))
        

        self.params = [self.W_w, self.W_c]

        if weights is not None:
            self.set_weights(weights)
Exemple #6
0
    def _setup_training_graph(self):
        """
        Connect graphs together for training and store in/out ports & updates
        (propagation)  inputs  : input, target, step_size
                       outputs : loss
                       updates : prev_states[, grads]
        (param update) inputs  : lr
                       outputs : None
                       updates : params
        (optim init)   inputs  : None
                       outputs : None
                       updates : optimizer states
        """
        p_input_tbi = tt.itensor3(name='i_port_input')
        p_target_tbi = tt.itensor3(name='i_port_target')
        p_step_size = tt.iscalar(name='i_port_step_size')
        p_lr = tt.fscalar(name='i_port_lr')

        self._prev_state_updates = []
        losses = []  # list of s_loss
        gradss = []  # list of s_grads (i.e., list of list)

        for s in self._slices:
            s_step_size = s.transfer(p_step_size)
            s_output_tbi, prev_state_updates = self._setup_forward_graph \
                (s_input_tbi     = s.apply(p_input_tbi),
                 s_time_tb       = None,
                 s_next_prev_idx = s_step_size - 1,
                 v_params        = s.v_params,
                 v_prev_states   = s.v_prev_states)
            self._prev_state_updates += prev_state_updates

            s_loss = self._setup_loss_graph \
                (s_output_tbi = s_output_tbi,
                 s_target_tbi = s.apply(p_target_tbi),
                 s_step_size  = s_step_size)
            losses += [self.transfer(s_loss)]

            s_grads = self._setup_grads_graph \
                (s_loss = s_loss,
                 v_wrt  = list(itervalues(s.v_params)))
            gradss += [[self.transfer(s_grad) for s_grad in s_grads]]

        # sum losses and grads from all slices
        p_loss = sum(losses)
        s_new_grads = [sum(grad_tuple) for grad_tuple in zip(*gradss)]
        self._grad_updates = [u for u in zip(self._v_grads, s_new_grads)]

        self._optim_inits, self._optim_param_updates, s_increments = \
            self._setup_optimizer_graph(s_lr    = self.transfer(p_lr),
                                        v_grads = self._v_grads)

        for s in self._slices:
            self._optim_param_updates += \
                [(p, p + i) for p, i in zip(s.v_params.values(), s_increments)]

        self._prop_i_ports = [p_input_tbi, p_target_tbi, p_step_size]
        self._prop_o_ports = [p_loss]
        self._update_i_ports = [p_lr]
Exemple #7
0
    def __init__(self, K, vocab_size, num_chars, W_init, 
            nhidden, embed_dim, dropout, train_emb, char_dim, use_feat, gating_fn, 
            save_attn=False):
        self.nhidden = nhidden
        self.embed_dim = embed_dim
        self.dropout = dropout
        self.train_emb = train_emb
        self.char_dim = char_dim
        self.learning_rate = LEARNING_RATE
        self.num_chars = num_chars
        self.use_feat = use_feat
        self.save_attn = save_attn
        self.gating_fn = gating_fn

        self.use_chars = self.char_dim!=0
        if W_init is None: W_init = lasagne.init.GlorotNormal().sample((vocab_size, self.embed_dim))

        doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3('quer'), \
                T.wtensor3('cand')
        docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \
                T.bmatrix('c_mask')
        target_var = T.ivector('ans')
        feat_var = T.imatrix('feat')
        doc_toks, qry_toks= T.imatrix('dchars'), T.imatrix('qchars')
        tok_var, tok_mask = T.imatrix('tok'), T.bmatrix('tok_mask')
        cloze_var = T.ivector('cloze')
        self.inps = [doc_var, doc_toks, query_var, qry_toks, cand_var, target_var, docmask_var,
                qmask_var, tok_var, tok_mask, candmask_var, feat_var, cloze_var]

        self.predicted_probs, predicted_probs_val, self.network, W_emb, attentions = (
                self.build_network(K, vocab_size, W_init))

        self.loss_fn = T.nnet.categorical_crossentropy(self.predicted_probs, target_var).mean()
        self.eval_fn = lasagne.objectives.categorical_accuracy(self.predicted_probs, 
                target_var).mean()

        loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean()
        eval_fn_val = lasagne.objectives.categorical_accuracy(predicted_probs_val, 
                target_var).mean()

        self.params = L.get_all_params(self.network, trainable=True)
        
        updates = lasagne.updates.adam(self.loss_fn, self.params, learning_rate=self.learning_rate)

        self.train_fn = theano.function(self.inps,
                [self.loss_fn, self.eval_fn, self.predicted_probs], 
                updates=updates,
                on_unused_input='warn')
        self.validate_fn = theano.function(self.inps, 
                [loss_fn_val, eval_fn_val, predicted_probs_val]+attentions,
                on_unused_input='warn')
Exemple #8
0
    def __init__(self, K, vocab_size, W_init, regularizer, rlambda, nhidden,
                 embed_dim, dropout, train_emb, subsample):
        self.nhidden = nhidden
        self.embed_dim = embed_dim
        self.dropout = dropout
        self.train_emb = train_emb
        self.subsample = subsample
        norm = lasagne.regularization.l2 if regularizer == 'l2' else lasagne.regularization.l1
        if W_init is None:
            W_init = lasagne.init.GlorotNormal().sample(
                (vocab_size, self.embed_dim))

        doc_var, query_var, cand_var = T.itensor3('doc'), T.itensor3(
            'quer'), T.wtensor3('cand')
        docmask_var, qmask_var, candmask_var = T.bmatrix('doc_mask'), T.bmatrix('q_mask'), \
                T.bmatrix('c_mask')
        target_var = T.ivector('ans')

        if rlambda > 0.:
            W_pert = W_init + lasagne.init.GlorotNormal().sample(W_init.shape)
        else:
            W_pert = W_init
        predicted_probs, predicted_probs_val, self.doc_net, self.q_net, W_emb = self.build_network(
            K, vocab_size, doc_var, query_var, cand_var, docmask_var,
            qmask_var, candmask_var, W_pert)

        loss_fn = T.nnet.categorical_crossentropy(predicted_probs, target_var).mean() + \
                rlambda*norm(W_emb-W_init)
        eval_fn = lasagne.objectives.categorical_accuracy(
            predicted_probs, target_var).mean()

        loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val, target_var).mean() + \
                rlambda*norm(W_emb-W_init)
        eval_fn_val = lasagne.objectives.categorical_accuracy(
            predicted_probs_val, target_var).mean()

        params = L.get_all_params(self.doc_net, trainable=True) + \
                L.get_all_params(self.q_net, trainable=True)

        updates = lasagne.updates.adam(loss_fn,
                                       params,
                                       learning_rate=LEARNING_RATE)

        self.train_fn = theano.function([doc_var, query_var, cand_var, target_var, docmask_var, \
                qmask_var, candmask_var],
                [loss_fn, eval_fn, predicted_probs],
                updates=updates)
        self.validate_fn = theano.function([doc_var, query_var, cand_var, target_var, docmask_var, \
                qmask_var, candmask_var],
                [loss_fn_val, eval_fn_val, predicted_probs_val])
Exemple #9
0
    def __init__(self, num_chars, char_dim, max_word_len, embed_dim):
        self.num_chars = num_chars
        self.char_dim = char_dim
        self.max_word_len = max_word_len
        self.embed_dim = embed_dim

        chars1, chars2 = T.itensor3(), T.itensor3()
        mask1, mask2 = T.btensor3(), T.btensor3()
        self.inps = [chars1, chars2, mask1, mask2]
        l_e1, l_e2 = self.build_network()

        self.fn = theano.function(
            self.inps,
            [L.get_output(l_e1), L.get_output(l_e2)])
Exemple #10
0
def build(word_embeddings, len_voc, word_emb_dim, args, freeze=False):
    # input theano vars
    posts = T.imatrix()
    post_masks = T.fmatrix()
    ques_list = T.itensor3()
    ques_masks_list = T.ftensor3()
    ans_list = T.itensor3()
    ans_masks_list = T.ftensor3()
    labels = T.imatrix()
    N = args.no_of_candidates

    post_out, post_lstm_params = build_lstm(posts, post_masks, args.post_max_len, \
                 word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size)
    ques_out, ques_emb_out, ques_lstm_params = build_list_lstm(ques_list, ques_masks_list, N, args.ques_max_len, \
              word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size)
    ans_out, ans_emb_out, ans_lstm_params = build_list_lstm(ans_list, ans_masks_list, N, args.ans_max_len, \
              word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size)

    ques_sim, pq_a_squared_errors, pq_a_loss, post_ques_dense_params \
               = answer_model(post_out, ques_out, ques_emb_out, ans_out, ans_emb_out, labels, args)

    all_params = post_lstm_params + ques_lstm_params + post_ques_dense_params

    post_out, post_lstm_params = build_lstm(posts, post_masks, args.post_max_len, \
                 word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size)
    ques_out, ques_emb_out, ques_lstm_params = build_list_lstm(ques_list, ques_masks_list, N, args.ques_max_len, \
              word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size)
    ans_out, ans_emb_out, ans_lstm_params = build_list_lstm(ans_list, ans_masks_list, N, args.ans_max_len, \
              word_embeddings, word_emb_dim, args.hidden_dim, len_voc, args.batch_size)

    pqa_loss, post_ques_ans_dense_params, pqa_preds = utility_calculator(post_out, ques_out, ques_emb_out, ans_out, \
                      ques_sim, pq_a_squared_errors, labels, args)

    all_params += post_lstm_params + ques_lstm_params + ans_lstm_params
    all_params += post_ques_ans_dense_params

    loss = pq_a_loss + pqa_loss
    loss += args.rho * sum(T.sum(l**2) for l in all_params)

    updates = lasagne.updates.adam(loss,
                                   all_params,
                                   learning_rate=args.learning_rate)

    train_fn = theano.function([posts, post_masks, ques_list, ques_masks_list, ans_list, ans_masks_list, labels], \
            [loss, pq_a_loss, pqa_loss] + pq_a_squared_errors + ques_sim + pqa_preds, updates=updates)
    test_fn = theano.function([posts, post_masks, ques_list, ques_masks_list, ans_list, ans_masks_list, labels], \
            [loss, pq_a_loss, pqa_loss] + pq_a_squared_errors + ques_sim + pqa_preds,)
    return train_fn, test_fn
Exemple #11
0
 def __theano_train__(self, n_size):
     """
     Pr(l|u, C(l)) = Pr(l|u) * Pr(l|C(l))
     Pr(u, l, t) = Pr(l|u, C(l))     if C(l) exists,
                   Pr(l|u)           otherwise.
     $Theta$ = argmax Pr(u, l, t)
     """
     tra_mask = T.ivector()
     seq_length = T.sum(tra_mask)  # 有效长度
     wl = T.concatenate((self.wl, self.wl_m))
     tidx, cidx, bidx, userid = T.ivector(), T.imatrix(), T.itensor3(
     ), T.iscalar()
     pb = self.pb[bidx]  # (seq_length x 4 x depth x n_size)
     lrs = self.lrs[tidx]  # (seq_length x 4 x depth)
     # user preference
     xu = self.xu[userid]
     plu = softmax(T.dot(xu, self.wl.T))
     # geographical influence
     cl = T.sum(wl[cidx], axis=1)  # (seq_length x n_size)
     cl = cl.reshape((cl.shape[0], 1, 1, cl.shape[1]))
     br = sigmoid(T.sum(pb[:seq_length] * cl, axis=3) *
                  lrs[:seq_length]) * T.ceil(abs(T.mean(cl, axis=3)))
     path = T.prod(br, axis=2) * self.probs[tidx][:seq_length]
     # paths = T.prod((T.floor(1-path) + path), axis=1)
     paths = T.sum(path, axis=1)
     paths = T.floor(1 - paths) + paths
     # ----------------------------------------------------------------------------
     # cost, gradients, learning rate, l2 regularization
     lr, l2 = self.alpha_lambda[0], self.alpha_lambda[1]
     seq_l2_sq = T.sum([T.sum(par**2) for par in [xu, self.wl]])
     upq = -1 * T.sum(T.log(plu[tidx[:seq_length]] * paths)) / seq_length
     seq_costs = (upq + 0.5 * l2 * seq_l2_sq)
     seq_grads = T.grad(seq_costs, self.params)
     seq_updates = [(par, par - lr * gra)
                    for par, gra in zip(self.params, seq_grads)]
     pars_subs = [(self.xu, xu), (self.pb, pb)]
     seq_updates.extend([
         (par, T.set_subtensor(sub, sub - lr * T.grad(seq_costs, sub)))
         for par, sub in pars_subs
     ])
     # ----------------------------------------------------------------------------
     uidx = T.iscalar()  # T.iscalar()类型是 TensorType(int32, )
     self.seq_train = theano.function(
         inputs=[uidx],
         outputs=upq,
         updates=seq_updates,
         givens={
             userid:
             uidx,
             tidx:
             self.tra_target_masks[uidx],
             cidx:
             self.tra_context_masks[T.arange(self.tra_accum_lens[uidx][0],
                                             self.tra_accum_lens[uidx][1])],
             bidx:
             self.routes[self.tra_target_masks[uidx]],
             tra_mask:
             self.tra_masks[uidx]
             # tra_mask_cot: self.tra_masks_cot[T.arange(self.tra_accum_lens[uidx][0], self.tra_accum_lens[uidx][1])]
         })
Exemple #12
0
    def testSplitOutputByFilter(self):
        self.setSeeds()

        input_shape = (self.batch_size, self.max_seq_len,
                       self.n_filters * self.filter_width)
        output_shape = (self.batch_size, self.n_filters, self.max_seq_len,
                        self.filter_width)

        x = np.arange(np.prod(input_shape))
        x = x.reshape(input_shape).astype(np.int32)
        y = np.zeros_like(x)
        y = np.reshape(y, output_shape)

        for i in range(self.n_filters):
            s = x[:, :, i * self.filter_width:(i + 1) * self.filter_width]
            y[:, i, :, :] = s

        xt = T.itensor3('xt')
        layer = SplitOutputByFilter(self.n_filters, self.filter_width)
        yt = layer._get_output(xt)

        f = theano.function(inputs=[xt], outputs=yt)
        y_theano = f(x)

        self.assertEquals(y.shape, y_theano.shape)
        self.assertTrue(np.all(y == y_theano))
def build_evpi_model(word_embeddings, len_voc, word_emb_dim, N, args, freeze=False):

	# input theano vars
	posts = T.imatrix()
	post_masks = T.fmatrix()
	ans_list = T.itensor3()
	ans_masks_list = T.ftensor3()
	labels = T.imatrix()
	
	utility_posts = T.imatrix()
	utility_post_masks = T.fmatrix()
	utility_labels = T.ivector()

	utility_preds, utility_post_ans_preds, utility_params = build_utility_lstm(utility_posts, utility_post_masks, \
																				posts, post_masks, ans_list, ans_masks_list, \
																				N, args.post_max_len, args.ans_max_len, \
																				word_embeddings, word_emb_dim, args.hidden_dim, len_voc)

	utility_loss = T.sum(lasagne.objectives.binary_crossentropy(utility_preds, utility_labels))
	utility_loss += T.sum(lasagne.objectives.binary_crossentropy(utility_preds, utility_labels)*2*utility_labels)
	loss = 0.0
	for i in range(N):
		loss += T.sum(lasagne.objectives.binary_crossentropy(utility_post_ans_preds[i], labels[:,i]))

	utility_loss += args.rho * sum(T.sum(l ** 2) for l in utility_params)

	# utility_updates = lasagne.updates.adam(utility_loss+loss, utility_params, learning_rate=args.learning_rate)
	utility_updates = lasagne.updates.adam(utility_loss, utility_params, learning_rate=args.learning_rate)

	utility_train_fn = theano.function([utility_posts, utility_post_masks, utility_labels, posts, post_masks, ans_list, ans_masks_list, labels], \
									 [utility_preds, utility_loss, loss] + utility_post_ans_preds, updates=utility_updates)
	utility_dev_fn = theano.function([utility_posts, utility_post_masks, utility_labels, posts, post_masks, ans_list, ans_masks_list, labels], \
									 [utility_preds, utility_loss, loss] + utility_post_ans_preds,)

	return utility_train_fn, utility_dev_fn
Exemple #14
0
def BuildModel(modelSpecs, forTrain=True):
        rng = np.random.RandomState()

        ## x is for sequential features
        x = T.tensor3('x')

        ## mask for x 
        xmask = T.bmatrix('xmask')
        propertyPredictor = ResNet4Properties( rng, seqInput=x, mask_seq=xmask, modelSpecs=modelSpecs )

        ## labelList is a list of label matrices, each with shape (batchSize, seqLen, numLabels)
        labelList = []
        if forTrain:
                ## when this model is used for training. We need to define the label variable
		labelList = []
		for res in modelSpecs['responses']:
			labelType = Response2LabelType(res)
			if labelType.startswith('Discrete'):
                		labelList.append( T.itensor3('label4' + res ) )
			else:
                		labelList.append( T.tensor3('label4' + res ) )

        ## weightList is a list of label weight matices, each with shape (batchSize, seqLen, 1)
	## we always use weight to deal with residues without 3D coordinates
        weightList = []
        if len(labelList)>0:
                weightList = [ T.tensor3('weight4' + res ) for res in modelSpecs['responses'] ]

	if len(labelList)>0:
        	return propertyPredictor, x, xmask, labelList, weightList
	else:
        	return propertyPredictor, x, xmask
Exemple #15
0
    def testSplitOutputByFilter(self):
        self.setSeeds()

        input_shape = (self.batch_size, self.max_seq_len,
                self.n_filters * self.filter_width)
        output_shape = (self.batch_size, self.n_filters,
                self.max_seq_len, self.filter_width)

        x = np.arange(np.prod(input_shape))
        x = x.reshape(input_shape).astype(np.int32)
        y = np.zeros_like(x)
        y = np.reshape(y, output_shape)

        for i in range(self.n_filters):
            s = x[:, :, i*self.filter_width:(i+1)*self.filter_width]
            y[:, i, :, :] = s

        xt = T.itensor3('xt')
        layer = SplitOutputByFilter(self.n_filters, self.filter_width)
        yt = layer._get_output(xt)

        f = theano.function(inputs=[xt], outputs=yt)
        y_theano = f(x)

        self.assertEquals(y.shape, y_theano.shape)
        self.assertTrue(np.all(y == y_theano))
def main():
    xs = itensor3('xs')
    ins = ((None, None, 93), xs)
    gru = GRU(
        inputs=ins,
        hiddens=128,
        direction='bidirectional'
    )
    print("GRU output (hiddens) shape: ", gru.output_size)
    print("GRU params: ", gru.get_params())

    lstm = LSTM(
        inputs=ins,
        hiddens=128,
        direction='bidirectional'
    )
    print("LSTM output (hiddens) shape: ", lstm.output_size)
    print("LSTM params: ", lstm.get_params())

    rnn = RNN(
        inputs=ins,
        hiddens=128,
        direction='bidirectional'
    )
    print("RNN output (hiddens) shape: ", rnn.output_size)
    print("RNN params: ", rnn.get_params())
Exemple #17
0
 def make_node(self, x, x2, x3, x4, x5):
     # check that the theano version has support for __props__.
     # This next line looks like it has a typo,
     # but it's actually a way to detect the theano version
     # is sufficiently recent to support the use of __props__.
     assert hasattr(self, '_props'), "Your version of theano is too old to support __props__."
     x = tensor.as_tensor_variable(x)
     x2 = tensor.as_tensor_variable(x2)
     x3 = tensor.as_tensor_variable(x3)
     x4 = tensor.as_tensor_variable(x4)
     x5 = tensor.as_tensor_variable(x5)
     
     if prm.att_doc:
         if prm.compute_emb:
             td = tensor.itensor4().type()
         else:
             td = tensor.ftensor4().type()
         tm = tensor.ftensor3().type()
     else:
         if prm.compute_emb:
             td = tensor.itensor3().type()
         else:
             td = tensor.ftensor3().type()
         tm = tensor.fmatrix().type()
     return theano.Apply(self, [x,x2,x3,x4,x5], [td, tm, \
                                        tensor.fmatrix().type(), tensor.ivector().type()])
Exemple #18
0
 def __init__(self, batch_size, emb_X, num_words, lstm_params, conv_param, output_size, f1_classes):
     super().__init__(batch_size)
     self.num_words = num_words
     self.inputs = [T.itensor3('input'), T.tensor3('mask')]
     self.target = T.ivector('target')
     l = InputLayer((batch_size, num_words, None), self.inputs[0])
     l_mask = InputLayer((batch_size, num_words, None), self.inputs[1])
     l = ReshapeLayer(l, (-1, [2]))
     l_mask = ReshapeLayer(l_mask, (-1, [2]))
     l = EmbeddingLayer(l, emb_X.shape[0], emb_X.shape[1], W=emb_X)
     for lstm_param in lstm_params:
         l = LSTMLayer(
             l, lstm_param, grad_clipping=100, nonlinearity=tanh, mask_input=l_mask, only_return_final=True
         )
     l = ReshapeLayer(l, (batch_size, num_words, -1))
     l_convs = []
     for filter_size in conv_param[1]:
         l_cur = Conv1DLayer(l, conv_param[0], filter_size, pad='full', nonlinearity=rectify)
         l_cur = MaxPool1DLayer(l_cur, num_words + filter_size - 1, ignore_border=True)
         l_cur = FlattenLayer(l_cur)
         l_convs.append(l_cur)
     l = ConcatLayer(l_convs)
     l = DropoutLayer(l)
     l = DenseLayer(l, output_size, nonlinearity=log_softmax)
     self.constraints[l.W] = lambda u, v: norm_constraint(v, 3)
     self.pred = T.exp(get_output(l, deterministic=True))
     self.loss = T.mean(categorical_crossentropy_exp(self.target, get_output(l)))
     params = get_all_params(l, trainable=True)
     self.updates = adadelta(self.loss, params)
     self.metrics = {'train': [acc], 'val': [acc, f1(f1_classes)]}
     self.network = l
     self.compile()
Exemple #19
0
def add_datasets_to_graph(list_of_datasets,
                          list_of_names,
                          graph,
                          strict=True,
                          list_of_test_values=None):
    assert len(list_of_datasets) == len(list_of_names)
    datasets_added = []
    for n, (dataset, name) in enumerate(zip(list_of_datasets, list_of_names)):
        if dataset.dtype != "int32":
            if len(dataset.shape) == 1:
                sym = tensor.vector()
            elif len(dataset.shape) == 2:
                sym = tensor.matrix()
            elif len(dataset.shape) == 3:
                sym = tensor.tensor3()
            else:
                raise ValueError("dataset %s has unsupported shape" % name)
        elif dataset.dtype == "int32":
            if len(dataset.shape) == 1:
                sym = tensor.ivector()
            elif len(dataset.shape) == 2:
                sym = tensor.imatrix()
            elif len(dataset.shape) == 3:
                sym = tensor.itensor3()
            else:
                raise ValueError("dataset %s has unsupported shape" % name)
        else:
            raise ValueError("dataset %s has unsupported dtype %s" %
                             (name, dataset.dtype))
        if list_of_test_values is not None:
            sym.tag.test_value = list_of_test_values[n]
        tag_expression(sym, name, dataset.shape)
        datasets_added.append(sym)
    graph["__datasets_added__"] = datasets_added
    return datasets_added
    def __init__(self, vocab_size, W_init=lasagne.init.GlorotNormal()):

        input_var, mask_var, target_var = T.itensor3('dq_pair'), T.imatrix(
            'dq_mask'), T.ivector('ans')

        self.network = self.build_network(vocab_size, input_var, mask_var,
                                          W_init)
        predicted_probs = L.get_output(self.network)
        predicted_probs_val = L.get_output(self.network, deterministic=True)

        loss_fn = T.nnet.categorical_crossentropy(predicted_probs,
                                                  target_var).mean()
        eval_fn = lasagne.objectives.categorical_accuracy(
            predicted_probs, target_var).mean()

        loss_fn_val = T.nnet.categorical_crossentropy(predicted_probs_val,
                                                      target_var).mean()
        eval_fn_val = lasagne.objectives.categorical_accuracy(
            predicted_probs_val, target_var).mean()

        params = L.get_all_params(self.network, trainable=True)

        updates = lasagne.updates.rmsprop(loss_fn,
                                          params,
                                          rho=0.95,
                                          learning_rate=LEARNING_RATE)
        updates_with_momentum = lasagne.updates.apply_momentum(updates,
                                                               params=params)

        self.train_fn = theano.function([input_var, target_var, mask_var],
                                        [loss_fn, eval_fn, predicted_probs],
                                        updates=updates_with_momentum)
        self.validate_fn = theano.function(
            [input_var, target_var, mask_var],
            [loss_fn_val, eval_fn_val, predicted_probs_val])
def TestEmbeddingLayer():
	n_in = 60
	a=np.random.uniform(0, 1, (20, 300, n_in)).round().astype(np.int32)
	n_out = 5

	x = T.itensor3('x')
	layer = MetaEmbeddingLayer(x, n_in, n_out)
	f = theano.function([x], [layer.output, layer.pcenters])

	b, pcenter = f(a)

	print(b[0, 1, 2])
	print(b[0, 1, 20])
	print(a.shape)
	batch=np.random.randint(0, 20)
	row1 = np.random.randint(0, 100)
	row2 = np.random.randint(0, 100)

	v1= a[batch][row1]
	v2= a[batch][row2]
	print(b.shape)
	print(b[batch][row1][row2])
	c = np.outer( v1, v2)
	d = c[:, :, np.newaxis ]
	e = np.sum( (d * layer.W.get_value() ), axis=(0,1))
	print(v1)
	print(v2)
	print(e)
	print('diff: ', abs(e - b[batch][row1][row2] ).sum())

	print(pcenter)
	center = [ np.sum( l.W.get_value(), axis=(0,1) ) for l in layer ]
	print(center)
	print(np.sum(center**2))
Exemple #22
0
    def _setup_inference_graph(self):
        """
        Connect graphs together for inference and store in/out ports & updates
            inputs  : input, time
            outputs : output
            updates : prev_states
        """
        p_input_tbi = tt.itensor3(name='port_i_input')

        # step_size is a compile time constant for inference
        s_next_prev_idx = tt.alloc(np.int32(self._options['step_size'] - 1))

        outputs = []
        self._prev_state_updates = []

        for s in self._slices:
            s_output_tbi, prev_state_updates = self._setup_forward_graph \
                (s_input_tbi     = s.apply(p_input_tbi),
                 s_time_tb       = None,
                 s_next_prev_idx = s.transfer(s_next_prev_idx),
                 v_params        = s.v_params,
                 v_prev_states   = s.v_prev_states)
            outputs += [self.transfer(s_output_tbi)]
            self._prev_state_updates += prev_state_updates

        # merge outputs from all slices
        p_output_tbi = tt.concatenate(outputs, axis=1)

        self._prop_i_ports = [p_input_tbi]
        self._prop_o_ports = [p_output_tbi]
Exemple #23
0
    def ans_fn(self, num_samples, means_only=False):

        qo = T.itensor3('qo')
        o_mask = T.matrix('o_mask')

        N = qo.shape[0]

        qo_flat = qo.reshape((N * self.num_choices, self.max_length))

        qo_emb = embedder(qo_flat, self.embeddings)

        z, _, _ = self.rec_model.get_samples_and_means_and_covs(
            qo_flat, qo_emb, num_samples, means_only=means_only)

        z = z.reshape((N * num_samples, self.num_choices, self.z_dim))

        o_mask_rep = T.tile(o_mask, (num_samples, 1))

        probs = self.gen_model.get_probs(z, o_mask_rep)

        probs = probs.reshape((num_samples, N, self.num_choices))

        ans = T.argmax(T.mean(probs, axis=0), axis=-1)

        ans_fn = theano.function(inputs=[qo, o_mask],
                                 outputs=[ans, T.mean(probs, axis=0)],
                                 allow_input_downcast=True)

        return ans_fn
Exemple #24
0
def add_datasets_to_graph(list_of_datasets, list_of_names, graph, strict=True,
                          list_of_test_values=None):
    assert len(list_of_datasets) == len(list_of_names)
    datasets_added = []
    for n, (dataset, name) in enumerate(zip(list_of_datasets, list_of_names)):
        if dataset.dtype != "int32":
            if len(dataset.shape) == 1:
                sym = tensor.vector()
            elif len(dataset.shape) == 2:
                sym = tensor.matrix()
            elif len(dataset.shape) == 3:
                sym = tensor.tensor3()
            else:
                raise ValueError("dataset %s has unsupported shape" % name)
        elif dataset.dtype == "int32":
            if len(dataset.shape) == 1:
                sym = tensor.ivector()
            elif len(dataset.shape) == 2:
                sym = tensor.imatrix()
            elif len(dataset.shape) == 3:
                sym = tensor.itensor3()
            else:
                raise ValueError("dataset %s has unsupported shape" % name)
        else:
            raise ValueError("dataset %s has unsupported dtype %s" % (
                name, dataset.dtype))
        if list_of_test_values is not None:
            sym.tag.test_value = list_of_test_values[n]
        tag_expression(sym, name, dataset.shape)
        datasets_added.append(sym)
    graph["__datasets_added__"] = datasets_added
    return datasets_added
Exemple #25
0
    def optimiser_fn(self, update, update_kwargs, saved_update=None):

        qo = T.itensor3('qo')
        o_mask = T.matrix('o_mask')
        a = T.ivector('a')
        learning_rate = T.scalar('learning_rate')

        p_a = self.obj(qo, o_mask, a, deterministic=False)

        grads = T.grad(-p_a, self.params, disconnected_inputs='ignore')

        update_kwargs['loss_or_grads'] = grads
        update_kwargs['params'] = self.params
        update_kwargs['learning_rate'] = learning_rate

        updates = update(**update_kwargs)

        if saved_update is not None:
            for u, v in zip(updates, saved_update.keys()):
                u.set_value(v.get_value())

        optimiser = theano.function(
            inputs=[qo, o_mask, a, learning_rate],
            outputs=p_a,
            updates=updates,
            allow_input_downcast=True,
        )

        return optimiser, updates
Exemple #26
0
    def make_node(self, x, x2, x3, x4, x5):
        # check that the theano version has support for __props__.
        # This next line looks like it has a typo,
        # but it's actually a way to detect the theano version
        # is sufficiently recent to support the use of __props__.
        assert hasattr(
            self, '_props'
        ), "Your version of theano is too old to support __props__."
        x = tensor.as_tensor_variable(x)
        x2 = tensor.as_tensor_variable(x2)
        x3 = tensor.as_tensor_variable(x3)
        x4 = tensor.as_tensor_variable(x4)
        x5 = tensor.as_tensor_variable(x5)

        if prm.att_doc:
            if prm.compute_emb:
                td = tensor.itensor4().type()
            else:
                td = tensor.ftensor4().type()
            tm = tensor.ftensor3().type()
        else:
            if prm.compute_emb:
                td = tensor.itensor3().type()
            else:
                td = tensor.ftensor3().type()
            tm = tensor.fmatrix().type()
        return theano.Apply(self, [x,x2,x3,x4,x5], [td, tm, \
                                           tensor.fmatrix().type(), tensor.ivector().type()])
Exemple #27
0
def create_theano_function(word_embed, char_embed, values=None):
    word_x = T.itensor3('word_x')
    word_mask = T.tensor3('word_mask')
    sent_mask = T.matrix('sent_mask')
    label_y = T.ivector('label_y')
    att_out, network_output, loss = fn.build_fn(word_x=word_x,
                                                word_mask=word_mask,
                                                sent_mask=sent_mask,
                                                label_y=label_y,
                                                word_embed=word_embed,
                                                char_embed=None,
                                                args=args)
    if values is not None:
        lasagne.layers.set_all_param_values(network_output,
                                            values,
                                            trainable=True)

    params = lasagne.layers.get_all_params(network_output, trainable=True)
    if args.optimizer == 'sgd':
        updates = lasagne.updates.sgd(loss, params, args.learning_rate)
    elif args.optimizer == 'momentum':
        updates = lasagne.updates.momentum(loss, params, args.learning_rate)
    train_fn = theano.function([word_x, word_mask, sent_mask, label_y],
                               loss,
                               updates=updates)

    prediction = lasagne.layers.get_output(network_output, deterministic=True)
    eval_fn = theano.function([word_x, word_mask, sent_mask], prediction)
    fn_check_attention = theano.function([word_x, word_mask, sent_mask],
                                         att_out)
    return fn_check_attention, eval_fn, train_fn, params
Exemple #28
0
def RelationStackMaker(chips, params, graph=False, weighted=False, batched=False):
	assert 'emb_matrices' in params or 'wemb_matrix' in params
	if 'emb_matrices' in params :
		assert type(params['emb_matrices']) == list
		num_inputs = len(params['emb_matrices'])
	else:
		num_inputs = 1
	if batched:
		emb_inputs = [T.itensor3('emb_input_'+str(i)) for i in range(num_inputs)]
		entities_tv = [T.fmatrix('enidx_'+str(i)).astype(theano.config.floatX) for i in range(params['num_entity'])]
		if graph:
			if weighted:
				masks = T.ftensor4('child_mask')
			else:
				masks = T.ftensor3('child_mask')
		else:
			masks = T.fmatrix('batch_mask')
	else:
		emb_inputs = [T.imatrix('emb_input_'+str(i)) for i in range(num_inputs)]
		entities_tv = [T.fvector('enidx_'+str(i)).astype(theano.config.floatX) for i in range(params['num_entity'])]
		if graph:
			if weighted:
				masks = T.ftensor3('child_mask')
			else:
				masks = T.fmatrix('child_mask')
		else:
			masks = None
	#print masks, type(masks), masks.ndim
	if len(emb_inputs) == 1:
		current_chip = Start(params['voc_size'], emb_inputs[0])
	else:
		current_chip = Start(params['voc_size'], emb_inputs)  
	print '\n', 'Building Stack now', '\n', 'Start: ', params['voc_size'] #, 'out_tv size:', len(current_chip.output_tv)
	instantiated_chips = stackLayers(chips, current_chip, params, entity_size=params['num_entity'])
	trainable_parameters = computeLayers(instantiated_chips, current_chip, params, entities_input=entities_tv, mask=masks)
Exemple #29
0
def test_broadcasts():
    A = T.imatrix()
    A_S = A.dimshuffle(0, 'x',1)
    func_shuffle = theano.function([A], A_S)
    A_value = [[1,2], [3,4]]
    AS_value = func_shuffle(A_value)
    
    print A_value
    print AS_value
    print AS_value.shape
    
    
    B = T.itensor3()
    AB = A_S + B
    func_add = theano.function([A_S, B], AB)
    
    B_value = [ A_value,
                A_value]
    
    AB_value = func_add(AS_value, B_value)
    print AB_value.shape
    
    AA = A[[0,0,0,0]]
    func_embed = theano.function([A], AA)
    AA_value = func_embed(A_value)
    print AA_value
    

    """    
    def __init__(self, input_dim, proj_dim=128, neg_samples = 4,
                 init='uniform', activation='sigmoid', weights=None, W_regularizer = None, activity_regularizer = None, **kwargs):

        super(WordTagContextProduct, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.proj_dim = proj_dim
        self.samples = neg_samples + 1
        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.W_regularizer = regularizers.get(W_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)
        self.input = [T.itensor3(), T.itensor3()]
        self.W_w = self.init((input_dim, proj_dim))
        self.params = [self.W_w]
        if weights is not None:
            self.set_weights(weights)
    def load_pretrained_model(self, model_path, model_name):
        # Load model and dictionaries
        print("Loading model params...")
        params = load_params('%s/%s' % (model_path, model_name))
        print("Loading dictionaries...")
        with open('%s/dict.pkl' % model_path, 'rb') as f:
            self.chardict = pkl.load(f)
        with open('%s/label_dict.pkl' % model_path, 'rb') as f:
            labeldict = pkl.load(f)

        self.n_char = len(self.chardict.keys()) + 1
        n_classes = len(labeldict.keys())
        print "#classes:", n_classes
        print labeldict

        print("Building network...")
        # Tweet variables
        tweet = T.itensor3()
        targets = T.imatrix()
        # masks
        t_mask = T.fmatrix()
        # network for prediction
        predictions = classify(tweet, t_mask, params, n_classes, self.n_char)
        # Theano function
        print("Compiling theano functions...")
        self.predict = theano.function([tweet, t_mask], predictions)
    def __init__(self,rng,model_params):
        self.input = T.itensor3('input') # the data is a minibatch
        self.label = T.imatrix('label') # label's shape (mini_batch size, max_term_per_sent)
        self.sent_length= T.ivector('sent_length') # sent_length is the number of terms in each sentence
        self.masks = T.imatrix('masks') # masks which used in error and likelihood calculation

        self.core = SentenceLevelNeuralModelCore(rng,self.input,self.label,self.sent_length,self.masks,model_params)

        self.params = self.core.wordvec.params() \
                + self.core.POSvec.params() \
                + self.core.wordpos_vec.params() \
                + self.core.verbpos_vec.params() \
                + self.core.conv_word.params() \
                + self.core.conv_POS.params() \
                + self.core.conv_wordpos.params() \
                + self.core.conv_verbpos.params() \
                + self.core.hidden_layer.params

        self.L2_sqr = (self.core.wordvec.embeddings ** 2).sum() \
                + (self.core.POSvec.embeddings ** 2).sum() \
                + (self.core.wordpos_vec.embeddings ** 2).sum() \
                + (self.core.verbpos_vec.embeddings ** 2).sum() \
                + (self.core.conv_word.W ** 2).sum() \
                + (self.core.conv_POS.W ** 2).sum() \
                + (self.core.conv_wordpos.W ** 2).sum() \
                + (self.core.conv_verbpos.W ** 2).sum() \
                + (self.core.hidden_layer.W ** 2).sum()

        self.negative_log_likelihood = self.core.likelihood()
        self.errors = self.core.errors()

        # we only use L2 regularization
        self.cost = self.negative_log_likelihood \
                + self.core.L2_reg * self.L2_sqr


        self.gparams = []
        for param in self.params:
            gparam = T.grad(self.cost, param)
            self.gparams.append(gparam)

        self.updates = []

        learning_rate = model_params['learning_rate']
        for param, gparam in zip(self.params, self.gparams):
            self.updates.append((param, param - learning_rate * gparam))


        #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.conv_word.output,on_unused_input='ignore')
        #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.conv_POS.output,on_unused_input='ignore')
        #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.conv_verbpos.output,on_unused_input='ignore')
        #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.conv_wordpos.output,on_unused_input='ignore')
        #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.conv_out,on_unused_input='ignore')
        #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.max_out,on_unused_input='ignore')
        #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.hidden_layer.output,on_unused_input='ignore')
        #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.core.negative_log_likelihood,on_unused_input='ignore')
        #self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.cost,on_unused_input='ignore')
        self.train_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=self.cost,updates=self.updates,on_unused_input='ignore')
        self.valid_model = theano.function(inputs=[self.input,self.label,self.masks], outputs=[self.errors,self.core.sentce_loglikelihood.y_pred_pointwise],on_unused_input='ignore')
Exemple #33
0
    def __init__(self,batch_size=16, seed=1234,nhu=300,width=5,n_out=len(nerarray),activation_f="hardtanh",
                 embeddingfile=senna_embmtxfile,trainingfile=trainingfile,paramfile=None):
        modeldir=os.path.join(nerdir,"models",'model_%i'%(len(os.listdir(nerdir+"/models"))))
        os.mkdir(modeldir)   
        for handler in logging.root.handlers[:]:
            logging.root.removeHandler(handler)
        logging.basicConfig(filename=os.path.join(modeldir,'log.txt'), level=logging.INFO, 
                            format='%(asctime)s : %(levelname)s : %(message)s')    
        logger.info("\n"+"\n".join(["\t%s : "%key+str(val) for key,val in locals().iteritems() if key!="self"]))    
        self.modeldir=modeldir
        self.batch_size = batch_size
        activation=None        
        if activation_f=="hardtanh":
            activation=hardtanh
        elif activation_f=="tanh":
            activation=T.tanh
        self.load_data(embeddingfile,trainingfile,batch_size)
        #==============================================================================
        #         BUILD MODEL
        #==============================================================================
        logger.info('... building the model')
        # allocate symbolic variables for the data
        self.index = T.iscalar()  # index to a [mini]batch
        self.x = T.itensor3('x')  # the data is presented as matrix of integers
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                            # [int] labels
        self.permutation = T.ivector('permutation')
        if paramfile!=None:
            params=pickle.load(open(paramfile,"rb"))
        else:
            params=None
        self.model = SennaNER(input=self.x, embeddings=self.embeddings,features=capsfeatures,n_out=n_out, mini_batch_size=batch_size,
                                       nhu=nhu,width=width,activation=activation,seed=seed,params=params)

        self.test_model = theano.function(inputs=[self.index],
                outputs=self.model.errors(self.y),
                givens={
                    self.x: self.test_set_x[self.index * batch_size:(self.index + 1) * batch_size],
                    self.y: self.test_set_y[self.index * batch_size:(self.index + 1) * batch_size]},
                name="test_model")
    
        self.validation_cost = theano.function(inputs=[self.index],
                outputs=self.model.negative_log_likelihood(self.y),
                givens={
                    self.x: self.valid_set_x[self.index * batch_size:(self.index + 1) * batch_size],
                    self.y: self.valid_set_y[self.index * batch_size:(self.index + 1) * batch_size]},
                name="validation_cost")
    
        self.predictions = theano.function(inputs=[self.index],
                outputs=self.model.predictions,
                givens={
                    self.x: self.test_set_x[self.index * batch_size:(self.index + 1) * batch_size]},
                name="predictions")
    
        self.visualize_hidden = theano.function(inputs=[self.index],
                outputs=self.model.HiddenLayer.output,
                givens={
                    self.x: self.valid_set_x[self.index * batch_size:(self.index + 1) * batch_size]},
                name="visualize_hidden")
Exemple #34
0
def ndim_itensor(ndim, name=None):
    if ndim == 2:
        return T.imatrix(name)
    elif ndim == 3:
        return T.itensor3(name)
    elif ndim == 4:
        return T.itensor4(name)
    return T.imatrix(name=name)
Exemple #35
0
 def save(self, repo, filename):
     params = getParams(self, T.itensor3())
     index = 0
     while os.path.isfile(os.path.join(repo, filename + "_" + str(index))):
         index += 1
     filename = filename + "_" + str(index)
     with closing(open(os.path.join(repo, filename), 'wb')) as f:
         pickle.dump(params, f, protocol=pickle.HIGHEST_PROTOCOL)
Exemple #36
0
def tensor_max():
    e = np.asarray([[[2, 4], [5, 1]], [[3, 5], [4, 6]]], dtype='int32')
    w = T.itensor3('w')

    y = T.max(w, axis=1)
    f = theano.function(inputs=[w], outputs=y)

    print f(e)
Exemple #37
0
    def __init__(self, nh, nc, ne, de, cs):
        '''
        nh ::隐藏层神经元个数
        nc ::输出层标签分类类别
        ne :: 单词的个数
        de :: 词向量的维度
        cs :: 上下文窗口
        '''
        #词向量实际为(ne, de),外加1行,是为了边界标签-1而设定的
        self.emb = theano.shared(name='embeddings',value=0.2 * numpy.random.uniform(-1.0, 1.0,(ne+1, de)).astype(theano.config.floatX))#词向量空间
        self.wx = theano.shared(name='wx',value=0.2 * numpy.random.uniform(-1.0, 1.0,(de * cs, nh)).astype(theano.config.floatX))#输入数据到隐藏层的权重矩阵
        self.wh = theano.shared(name='wh', value=0.2 * numpy.random.uniform(-1.0, 1.0,(nh, nh)).astype(theano.config.floatX))#上一时刻隐藏到本时刻隐藏层循环递归的权值矩阵
        self.w = theano.shared(name='w',value=0.2 * numpy.random.uniform(-1.0, 1.0,(nh, nc)).astype(theano.config.floatX))#隐藏层到输出层的权值矩阵
        self.bh = theano.shared(name='bh', value=numpy.zeros(nh,dtype=theano.config.floatX))#隐藏层偏置参数
        self.b = theano.shared(name='b',value=numpy.zeros(nc,dtype=theano.config.floatX))#输出层偏置参数

        self.h0 = theano.shared(name='h0',value=numpy.zeros(nh,dtype=theano.config.floatX))

        self.lastlabel=theano.shared(name='lastlabel',value=0.2 * numpy.random.uniform(-1.0, 1.0,(nc, nc)).astype(theano.config.floatX))
        self.prelabel=theano.shared(name='prelabel',value=0.2 * numpy.random.uniform(-1.0, 1.0,(nc, nc)).astype(theano.config.floatX))
        self.bhmm=theano.shared(name='bhmm',value=numpy.zeros(nc,dtype=theano.config.floatX))

        self.params = [self.emb, self.wx, self.wh, self.w,self.bh, self.b, self.h0,self.lastlabel,self.prelabel,self.bhmm]#所有待学习的参数
        lr = T.scalar('lr')#学习率,一会儿作为输入参数



        idxs = T.itensor3()
        x = self.emb[idxs].reshape((idxs.shape[0],idxs.shape[1],de*idxs.shape[2]))
        y_sentence = T.imatrix('y_sentence')  # 训练样本标签,二维的(batch,sentence)
        def step(x_t, h_tm1):
            h_t = T.nnet.sigmoid(T.dot(x_t, self.wx) + T.dot(h_tm1, self.wh) + self.bh)#通过ht-1、x计算隐藏层
            s_temp=T.dot(h_t, self.w) + self.b#由于softmax不支持三维矩阵操作,所以这边需要对其进行reshape成2D,计算完毕后再reshape成3D
            return h_t, s_temp
        [h,s_temp], _ = theano.scan(step,sequences=x,outputs_info=[T.ones(shape=(x.shape[1],self.h0.shape[0])) * self.h0, None])
        p_y =T.nnet.softmax(T.reshape(s_temp,(s_temp.shape[0]*s_temp.shape[1],-1)))
        p_y=T.reshape(p_y,s_temp.shape)

        #加入前一时刻的标签约束项
        y_label3d = T.ftensor3('y_sentence3d')
        p_ytrain=self.add_layer(p_y,y_label3d)
        loss=self.nll_multiclass(p_ytrain,y_sentence)+0.0*((self.wx**2).sum()+(self.wh**2).sum()+(self.w**2).sum())
        #神经网络的输出
        sentence_gradients = T.grad(loss, self.params)
        sentence_updates = OrderedDict((p, p - lr*g) for p, g in zip(self.params, sentence_gradients))
        self.sentence_traintemp = theano.function(inputs=[idxs,y_sentence,y_label3d,lr],outputs=loss,updates=sentence_updates)





        '''self.sentence_train = theano.function(inputs=[idxs,y_sentence,lr],outputs=loss,updates=sentence_updates)'''
        #词向量归一化,因为我们希望训练出来的向量是一个归一化向量
        self.normalize = theano.function(inputs=[],updates={self.emb:self.emb /T.sqrt((self.emb**2).sum(axis=1)).dimshuffle(0, 'x')})

        #构造预测函数、训练函数,输入数据idxs每一行是一个样本(也就是一个窗口内的序列索引)
        #)
        self.classify = theano.function(inputs=[idxs], outputs=p_y)
Exemple #38
0
def build_image_only_network(d_word, d_hidden, lr, eps=1e-6):

    # input theano vars
    in_context_fc7 = T.tensor3(name='context_images')
    in_context_bb = T.tensor4(name='context_bb')
    in_bbmask = T.tensor3(name='bounding_box_mask')
    in_context = T.itensor4(name='context')
    in_cmask = T.tensor4(name='context_mask')
    in_answer_fc7 = T.matrix(name='answer_images')
    in_answer_bb = T.matrix(name='answer_bb')
    in_answers = T.itensor3(name='answers')
    in_amask = T.tensor3(name='answer_mask')
    in_labels = T.imatrix(name='labels')

    # define network
    l_context_fc7 = lasagne.layers.InputLayer(shape=(None, 3, 4096),
                                              input_var=in_context_fc7)
    l_answers = lasagne.layers.InputLayer(shape=(None, 3, max_words),
                                          input_var=in_answers)
    l_amask = lasagne.layers.InputLayer(shape=l_answers.shape,
                                        input_var=in_amask)

    # contexts and answers should share embeddings
    l_answer_emb = lasagne.layers.EmbeddingLayer(l_answers, len_voc, d_word)

    l_context_proj = lasagne.layers.DenseLayer(
        l_context_fc7,
        num_units=d_hidden,
        nonlinearity=lasagne.nonlinearities.rectify,
        num_leading_axes=2)
    l_context_final_reps = lasagne.layers.LSTMLayer(l_context_proj,
                                                    num_units=d_hidden,
                                                    only_return_final=True)
    l_ans_reps = SumAverageLayer([l_answer_emb, l_amask],
                                 compute_sum=True,
                                 num_dims=3)
    l_scores = InnerProductLayer([l_context_final_reps, l_ans_reps])

    preds = lasagne.layers.get_output(l_scores)
    loss = T.mean(lasagne.objectives.categorical_crossentropy(
        preds, in_labels))

    all_params = lasagne.layers.get_all_params(l_scores, trainable=True)
    updates = lasagne.updates.adam(loss, all_params, learning_rate=lr)
    train_fn = theano.function([
        in_context_fc7, in_context_bb, in_bbmask, in_context, in_cmask,
        in_answer_fc7, in_answer_bb, in_answers, in_amask, in_labels
    ],
                               loss,
                               updates=updates,
                               on_unused_input='warn')
    pred_fn = theano.function([
        in_context_fc7, in_context_bb, in_bbmask, in_context, in_cmask,
        in_answer_fc7, in_answer_bb, in_answers, in_amask
    ],
                              preds,
                              on_unused_input='warn')
    return train_fn, pred_fn, l_scores
    def make_node(self, x1, x2, x3, x4):
        assert hasattr(self, '_props'), "Your version of theano is too old to support __props__."
        x1 = tensor.as_tensor_variable(x1)
        x2 = tensor.as_tensor_variable(x2)
        x3 = tensor.as_tensor_variable(x3)
        x4 = tensor.as_tensor_variable(x4)
        out = [tensor.fmatrix().type(), tensor.itensor3().type(), tensor.imatrix().type(), tensor.fmatrix().type()]

        return theano.Apply(self, [x1, x2, x3, x4], out)
Exemple #40
0
def attention_q():
    query = T.itensor3('query')
    cands = T.itensor3('cands')

    d = 2
    W1_c = theano.shared(np.random.randint(-3, 3, (d, d)))
#    W1_c = theano.shared(np.ones((d, d), dtype='int32'))
    W1_h = theano.shared(np.random.randint(-3, 3, (d, d)))
#    W1_h = theano.shared(np.ones((d, d), dtype='int32'))
    w    = theano.shared(np.ones((d,), dtype='float32'))
    W2_r = theano.shared(np.random.randint(-1, 1, (d, d)))
    W2_h = theano.shared(np.random.randint(-1, 1, (d, d)))
#    W2_r = theano.shared(np.ones((d, d), dtype='float32'))
#    W2_h = theano.shared(np.ones((d, d), dtype='float32'))

#    q_in = np.asarray([[[1, 2], [3, 4], [5, 6]]], dtype='int32')
    q_in = np.ones((1, 3, 2), dtype='int32')
#    C_in = np.ones((1, 3, 2), dtype='int32')
#    C_in = np.ones((4, 3, 3, 2), dtype='int32')
    C_in = np.asarray(np.random.randint(-2, 2, (1, 3, 2)), dtype='int32')

    def forward(query, cands, eps=1e-8):
        # cands: 1D: n_queries, 2D: n_cands-1, 3D: dim_h
        # query: 1D: n_queries, 2D: n_words, 3D: dim_h
        # mask: 1D: n_queries, 2D: n_cands, 3D: n_words

        # 1D: n_queries, 2D: n_cands-1, 3D: n_words, 4D: dim_h
        M = T.dot(query, W1_c).dimshuffle(0, 'x', 1, 2) + T.dot(cands, W1_h).dimshuffle(0, 1, 'x', 2)

        # 1D: n_queries, 2D: n_cands-1, 3D: n_words
        alpha = T.nnet.softmax(T.dot(M, w).reshape((cands.shape[0] * cands.shape[1], query.shape[1])))
        alpha = alpha.reshape((cands.shape[0], cands.shape[1], query.shape[1], 1))

        # 1D: n_queries, 2D: n_cands-1, 3D: n_words
        r = T.sum(query.dimshuffle((0, 'x', 1, 2)) * alpha, axis=2)  # 4 * 3 * 2

        # 1D: n_queries, 2D: n_cands, 3D: dim_h
        h_after = T.dot(r, W2_r)  # 4 * 3 * 2
#        return h_after, h_after
        return h_after, r, alpha.reshape((alpha.shape[0], alpha.shape[1], alpha.shape[2])), M

    y, a, b, c = forward(query, cands)
    f = theano.function(inputs=[query, cands], outputs=[y, a, b, c], on_unused_input='ignore')
    print f(q_in, C_in)
Exemple #41
0
    def get_inps(use_mask=True,
                 vgen=None,
                 use_bow_out=False,
                 debug=False,
                 output_map=None):
        if use_mask:
            X, y, mask, cmask = TT.itensor3("X"), TT.imatrix("y"), TT.fmatrix("mask"), \
                    TT.fmatrix("cost_mask")
            qmask = TT.fmatrix("qmask")
            bow_out = TT.ftensor3("bow_out")

            if debug:
                theano.config.compute_test_value = "warn"
                batch = vgen.next()
                X.tag.test_value = batch['x'].astype("int32")
                y.tag.test_value = batch['y'].astype("int32")
                mask.tag.test_value = batch['mask'].astype("float32")
                cmask.tag.test_value = batch['cmask'].astype("float32")
                qmask.tag.test_value = batch["qmask"].astype("float32")
                if use_bow_out:
                    bow_out.tag.test_value = batch['bow_out'].astype("float32")

            if output_map:
                outs = {}
                outs["X"] = X
                outs["y"] = y
                outs["mask"] = mask
                outs["cmask"] = cmask
                if use_bow_out:
                    outs["bow_out"] = bow_out
                outs["qmask"] = qmask
            else:
                outs = [X, y, mask, cmask]
                if use_bow_out:
                    outs += [bow_out]
            return outs
        else:
            X, y = TT.itensor3("X"), TT.itensor3("y")
            if debug:
                theano.config.compute_test_value = "warn"
                batch = vgen.next()
                X.tag.test_value = batch['x']
                y.tag.test_value = batch['y']
            return [X, y]
Exemple #42
0
def test_lookup():
    a = T.itensor3()
    b = T.ivector()
    y = a[0][T.arange(b.shape[0]), b]
    f = theano.function(inputs=[a, b], outputs=[y])

    u = [[[1, 2], [2, 4]], [[3, 1], [2, 1]]]
    c = [0, 1]

    print f(u, c)
Exemple #43
0
def copy():
    e = np.asarray([[[2, 4], [5, 1]], [[3, 5], [4, 6]]], dtype='int32')
    w = T.itensor3('w')
    u = T.ones(shape=(2, w.shape[2]))

    y = T.repeat(T.max(w, axis=1, keepdims=True), 2, 1)
#    y = T.max(w, axis=1, keepdims=True) * u
    f = theano.function(inputs=[w], outputs=y)

    print f(e)
    def __init__(self,
                 rng,
                 embeddings,
                 char_embeddings,
                 hiddensize,
                 char_hiddensize,
                 embedding_dim,
                 char_embedding_dim,
                 window_size,
                 num_tags,
                 dic_size,
                 dropout_rate=0.7):
        self.rng = rng
        self.inputX = T.imatrix(
            'inputX')  # a sentence, shape (T * window_size)
        self.inputX_chars = T.itensor3(
            'inputX_chars'
        )  # a sentence, shape (T * max numbe of chars in a word)
        self.inputY = T.ivector('inputY')  # tags of a sentence
        self.is_train = T.iscalar('is_train')

        self.new_theta = T.fmatrix('new_theta')

        self.dropout_rate = dropout_rate
        self.nhidden = hiddensize
        self.char_nhidden = char_hiddensize  # for now set the number of hidden units the same
        self.embedding_dim = embedding_dim
        self.char_embedding_dim = char_embedding_dim
        self.window_size = window_size
        self.n_classes = num_tags
        self.dic_size = dic_size

        # for testing in compling
        self.inputX.tag.test_value = np.ones(
            (10, window_size)).astype(np.int32)
        self.inputX_chars.tag.test_value = np.ones(
            (10, window_size, 8)).astype(np.int32)
        self.inputY.tag.test_value = np.ones(10).astype(np.int32)

        self.Embeddings = theano.shared(value=embeddings,
                                        name="Embeddings",
                                        borrow=True)
        self.Char_Embeddings = theano.shared(value=char_embeddings,
                                             name="Char_Embeddings",
                                             borrow=True)

        # word embeddings
        self.inputW = self.Embeddings[self.inputX]

        # char embeddings
        self.inputC = self.Char_Embeddings[self.inputX_chars].dimshuffle(
            [2, 0, 1, 3])

        self.params = [self.Embeddings, self.Char_Embeddings]
    def __init__(self, input_dim, proj_dim=128, neg_samples = 4,tensor_slices = 4, slice_dim = 16,
                 init='uniform', activation='tanh', weights=None,W_regularizer = None, activity_regularizer=None,   **kwargs):

        super(WordTagContextProduct_tensor, self).__init__(**kwargs)
        self.input_dim = input_dim
        self.proj_dim = proj_dim
        self.samples = neg_samples + 1
        #np.random.seed(0)
        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.W_regularizer = regularizers.get(W_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)
        self.input = [T.itensor3(), T.itensor3()]
        self.W_w = self.init((input_dim, proj_dim))
        self.tensor_slices = tensor_slices
        self.slice_dim = slice_dim
        self.params = [self.W_w]

        if weights is not None:
            self.set_weights(weights)
Exemple #46
0
 def test_get_output_for(self):
     X = T.itensor3()
     X1 = np.empty((2, 2, 10), dtype='int32')
     for i, is_ in enumerate(itertools.product(*(range(n) for n in X1.shape[:-1]))):
         X1[is_] = np.arange(i, 10 + i)
     X2 = np.empty((2, 2, 3), dtype='int32')
     for i, is_ in enumerate(itertools.product(*(range(n) for n in X2.shape[:-1]))):
         X2[is_] = np.arange(7 + i, 10 + i)
     self.assertTrue(np.array_equal(
         theano.function([X], KMaxPool1DLayer(InputLayer((100, 100)), 3).get_output_for(X))(X1), X2
     ))
Exemple #47
0
def dot():
    e1 = np.asarray([[[1, 2], [3, 4]], [[5, 6], [7, 8]]], dtype='int32')
    e2 = np.asarray([[1, 2], [3, 1]], dtype='int32')
    w = T.itensor3('w')
    v = T.imatrix('v')

    y = T.batched_dot(v, w.dimshuffle(0, 2, 1))
    u = w.T
    f = theano.function(inputs=[v, w], outputs=y)
    f2 = theano.function(inputs=[w], outputs=u)

    print f(e2, e1)
Exemple #48
0
     def __init__(self, nh, nc, ne, de, cs, bs): 
         ''' 
         nh :: dimension of the hidden layer 
         nc :: number of classes 
         ne :: number of word embeddings in the vocabulary 
         de :: dimension of the word embeddings 
         cs :: word window context size  
         bs :: batch size (number of samples)
         ''' 
         idxs = T.itensor3() # time->samples->features as many columns as context window size/lines as words in the sentence 
         # Data is given as a tensor (batch, sequence, context size)
         l_in = lasagne.layers.InputLayer((bs, None, cs), idxs)
         # We have a tensor (batch size, sequence length, concatenated context win. embeddings)
         l_emb = lasagne.layers.EmbeddingLayer(l_in, ne, de)
         l_flatt_emb = lasagne.layers.flatten(l_emb, outdim=3)
         
         print("Output of after embedding: {0}".format(lasagne.layers.get_output_shape(l_flatt_emb, (bs, 11, cs))))

         # Define recurent layer
         l_r = lasagne.layers.RecurrentLayer(l_flatt_emb, nh, nonlinearity=lasagne.nonlinearities.sigmoid)

         # Output shape should be (batch size, sequence, hidden)
         print("Output after recurrence: {0}".format(lasagne.layers.get_output_shape(l_r, (bs, 11, cs))))
         
         l_res = lasagne.layers.ReshapeLayer(l_r, (-1, l_r.output_shape[2]))
         print("Output after reshape: {0}".format(lasagne.layers.get_output_shape(l_res, (bs, 11, cs))))

         l_out = lasagne.layers.DenseLayer(l_res, nc, nonlinearity=lasagne.nonlinearities.softmax)
         print("Output shape: {0}".format(lasagne.layers.get_output_shape(l_out, (bs, 11, cs))))


         y_sentence = T.ivector('y_sentence')
         y_mask = T.vector('y_mask')
         pred = lasagne.layers.get_output(l_out)         
         c_pred = T.argmax(pred, axis = 1)
         sentence_nll = T.mean(lasagne.objectives.categorical_crossentropy(pred, y_sentence) * y_mask)
         sentence_error = T.sum(T.neq(c_pred, y_sentence)*y_mask)
         params = lasagne.layers.get_all_params(l_out)
         sentence_gradients = T.grad(sentence_nll, params)
         lr = 0.0627142536696559
         #sentence_updates = OrderedDict((p, p - lr*g) for p, g in zip(params, sentence_gradients))
         sentence_updates = lasagne.updates.momentum(sentence_nll, params, lr)

         self.train_sentence = theano.function(inputs  = [idxs, y_sentence, y_mask], 
                                               outputs = sentence_nll, 
                                               updates = sentence_updates) 

         self.normalize = theano.function( inputs = [], 
                                           updates = {l_emb.W: l_emb.W/T.sqrt((l_emb.W**2).sum(axis=1)).dimshuffle(0,'x')})  


         self.errors = theano.function(inputs=[idxs, y_sentence, y_mask], outputs=sentence_error)             
Exemple #49
0
    def test_recurrent_lookup_table(self):
        E = np.random.uniform(size=(self.vocab_size, self.n_in)).astype('float32')
        W = np.random.uniform(size=(self.n_out, self.vocab_size)).astype('float32')
        b = np.random.uniform(size=self.vocab_size).astype('float32')
        embeddings = LookupTable(vocab_size=self.vocab_size, embedding_size=self.n_in, window_size=1, E_init=E, advanced_indexing=True)
        net = Recurrent([embeddings, self.layer, LinearLayer(n_in=self.n_out, n_out=self.vocab_size, W_init=W, b_init=b), Softmax()])

        x = np.array([
            [
                [0],
                [2]
            ],
            [
                [3],
                [1]
            ]
        ]).astype('int32')

        x_var = T.itensor3()
        p_var = net.forward(x_var)

        # the predictions for the two examples
        y = np.array([0, 2]).astype('int32')
        y_one_hot = np.array([[1, 0, 0, 0, 0],
                              [0, 0, 1, 0, 0]]).astype('int32')

        # time 1
        h1, mem1 = self.recurrent_step(E[[0,3]], *self.mem0)
        z1 = h1.dot(W) + b
        p1 = np.exp(z1) / np.exp(z1).sum(axis=1)[:, np.newaxis]

        # time 2
        h2, mem2 = self.recurrent_step(E[[2,1]], *mem1)
        z2 = h2.dot(W) + b
        p2 = np.exp(z2) / np.exp(z2).sum(axis=1)[:, np.newaxis]

        f = function([x_var], p_var, allow_input_downcast=True, on_unused_input='warn')
        got_p = f(x)

        self.assertTrue(np.allclose(got_p, p2))

        y_var = T.ivector()
        loss = cross_entropy_loss(p_var, y_var, one_hot_num_classes=self.vocab_size)
        f_loss = function([x_var, y_var], [p_var, loss], allow_input_downcast=True, on_unused_input='warn')

        got_p_out, got_loss = f_loss(x, y)

        expect_p_out = p2
        expect_loss = -1 * np.sum(y_one_hot * np.log(expect_p_out)) / 2.

        self.assertTrue(np.allclose(got_p_out, expect_p_out))
        self.assertTrue(np.allclose(got_loss, expect_loss))
Exemple #50
0
def zero_pad_gate():
    dim_emb = 2
    window = 1
#    w = T.imatrix('w')
    w = T.itensor3('w')
    zero = T.zeros((1, 1, dim_emb * window), dtype=theano.config.floatX)

#    y = T.eq(w, zero)
    y = T.eq(T.sum(T.eq(w, zero), 2, keepdims=True), 0) * w
    f = theano.function(inputs=[w], outputs=[y])

    e = np.asarray([[[2, 4], [0, 0]], [[3, 2], [4, 1]]], dtype='int32')
    print f(e)
Exemple #51
0
  def make_node(self, x, y, len_x, len_y):
    x = theano.tensor.as_tensor_variable(x)
    assert x.ndim == 3  # tensor: nframes x nseqs x dim
    y = theano.tensor.as_tensor_variable(y)
    assert y.ndim == 2  # matrix: nseqs x max_labelling_length
    len_x = theano.tensor.as_tensor_variable(len_x)
    len_y = theano.tensor.as_tensor_variable(len_y)
    assert len_x.ndim == 1  # vector of seqs lengths
    assert len_x.dtype == "int32"
    assert len_y.ndim == 1  # vector of seqs lengths
    assert len_y.dtype == "int32"

    return theano.Apply(self, [x, y, len_x, len_y], [T.ftensor3(),T.itensor3()])
Exemple #52
0
def test_model(args):
    _, dev, test, vmap = load_dataset(args.tweet_file, args.testfile, args.vocab)
    labelmap = cPickle.load(open(args.label_file, 'r'))
    nclasses = len(labelmap)

    X = T.itensor3('X')
    M = T.matrix('M')
    y = T.ivector('y')

    print "building model"
    network = build_model(vmap, nclasses, invar=X, maskvar=M)
    print "loading params"
    network = read_model_data(network, args.model_file)
Exemple #53
0
def main(data_path, model_path, dict_path, save_path):

    print("Preparing Data...")

    # Load data and dictionary
    X = []
    with io.open(data_path,'r',encoding='utf-8') as f:
        for line in f:
            X.append(line.rstrip('\n'))
    with open(dict_path, 'rb') as f:
        chardict = pkl.load(f)
    n_char = len(chardict.keys()) + 1

    # Prepare data for encoding
    batches = Batch(X)

    # Load model
    print("Loading model params...")
    params = load_params(model_path)

    # Build encoder
    print("Building encoder...")

    # Theano variables
    tweet = T.itensor3()
    t_mask = T.fmatrix()

    # Embeddings
    emb_t = tweet2vec(tweet, t_mask, params, n_char)[0]

    # Theano function
    f_enc = theano.function([tweet, t_mask], emb_t)

    # Encode
    print("Encoding data...")
    print("Input data {} samples".format(len(X)))
    features = np.zeros((len(X),WDIM), dtype='float32')
    it = 0
    for x,i in batches:
        if it % 100 == 0:
            print("Minibatch {}".format(it))
        it += 1

        xp, x_mask = prepare_data(x, chardict)
        ff = f_enc(xp, x_mask)
        for ind, idx in enumerate(i):
            features[idx] = ff[ind]

    # Save
    with open(save_path, 'w') as o:
        np.save(o, features)
Exemple #54
0
    def _init_model(self, in_size, out_size, n_hid=10, learning_rate_sl=0.005, \
            learning_rate_rl=0.005, batch_size=32, ment=0.1):
        # 2-layer MLP
        self.in_size = in_size # x and y coordinate
        self.out_size = out_size # up, down, right, left
        self.batch_size = batch_size
        self.learning_rate = learning_rate_rl
        self.n_hid = n_hid

        input_var, turn_mask, act_mask, reward_var = T.ftensor3('in'), T.imatrix('tm'), \
                T.itensor3('am'), T.fvector('r')

        in_var = T.reshape(input_var, (input_var.shape[0]*input_var.shape[1],self.in_size))

        l_mask_in = L.InputLayer(shape=(None,None), input_var=turn_mask)

        pol_in = T.fmatrix('pol-h')
        l_in = L.InputLayer(shape=(None,None,self.in_size), input_var=input_var)
        l_pol_rnn = L.GRULayer(l_in, n_hid, hid_init=pol_in, mask_input=l_mask_in) # B x H x D
        pol_out = L.get_output(l_pol_rnn)[:,-1,:]
        l_den_in = L.ReshapeLayer(l_pol_rnn, (turn_mask.shape[0]*turn_mask.shape[1], n_hid)) # BH x D
        l_out = L.DenseLayer(l_den_in, self.out_size, nonlinearity=lasagne.nonlinearities.softmax)

        self.network = l_out
        self.params = L.get_all_params(self.network)

        # rl
        probs = L.get_output(self.network) # BH x A
        out_probs = T.reshape(probs, (input_var.shape[0],input_var.shape[1],self.out_size)) # B x H x A
        log_probs = T.log(out_probs)
        act_probs = (log_probs*act_mask).sum(axis=2) # B x H
        ep_probs = (act_probs*turn_mask).sum(axis=1) # B
        H_probs = -T.sum(T.sum(out_probs*log_probs,axis=2),axis=1) # B
        self.loss = 0.-T.mean(ep_probs*reward_var + ment*H_probs)

        updates = lasagne.updates.rmsprop(self.loss, self.params, learning_rate=learning_rate_rl, \
                epsilon=1e-4)

        self.inps = [input_var, turn_mask, act_mask, reward_var, pol_in]
        self.train_fn = theano.function(self.inps, self.loss, updates=updates)
        self.obj_fn = theano.function(self.inps, self.loss)
        self.act_fn = theano.function([input_var, turn_mask, pol_in], [out_probs, pol_out])

        # sl
        sl_loss = 0.-T.mean(ep_probs)
        sl_updates = lasagne.updates.rmsprop(sl_loss, self.params, learning_rate=learning_rate_sl, \
                epsilon=1e-4)

        self.sl_train_fn = theano.function([input_var, turn_mask, act_mask, pol_in], sl_loss, \
                updates=sl_updates)
        self.sl_obj_fn = theano.function([input_var, turn_mask, act_mask, pol_in], sl_loss)
Exemple #55
0
    def _setup_params(self):

        weight_scale = None

        # In this implementation, all hidden layers, terminal nodes should have same vector size
        assert self.input_n == self.output_n
        self.W_e1 = self.create_weight(self.input_n, self.output_n, "enc1", scale=weight_scale)
        self.W_e2 = self.create_weight(self.input_n, self.output_n, "enc2", scale=weight_scale)
        self.B_e = self.create_bias(self.output_n, "enc")

        self.W_d1 = self.create_weight(self.output_n, self.output_n, "dec1", scale=weight_scale)
        self.W_d2 = self.create_weight(self.output_n, self.input_n, "dec2", scale=weight_scale)
        self.B_d1 = self.create_bias(self.output_n, "dec1")
        self.B_d2 = self.create_bias(self.input_n, "dec2")

        self.init_gW_d1 = theano.shared(np.zeros_like(self.W_d1.get_value()))
        self.init_gW_d2 = theano.shared(np.zeros_like(self.W_d2.get_value()))
        self.init_gB_d1 = theano.shared(np.zeros_like(self.B_d1.get_value()))
        self.init_gB_d2 = theano.shared(np.zeros_like(self.B_d2.get_value()))

        self.h0 = None
        if self.additional_h:
            self.h0 = self.create_vector(self.output_n, "h0")

        self.W = []
        self.B = []
        self.params = [self.W_e1, self.W_e2, self.B_e, self.W_d1, self.W_d2, self.B_d1, self.B_d2]

        if self.deep:
            # Set parameters for deep encoding layer
            self.W_ee = self.create_weight(self.output_n, self.output_n, "deep_enc", scale=weight_scale)
            self.B_ee = self.create_bias(self.output_n, "deep_enc")
            self.params.extend([self.W_ee, self.B_ee])

        self.init_registers = self.create_matrix(self.max_reg + 1, self.output_n, "init_regs")
        self.zero_rep = self.create_vector(self.output_n, "zero_rep")

        # Inputs for all
        self._vars.seq = T.imatrix("seq")

        # Inputs for training
        self._vars.back_routes = T.itensor3("back_routes")
        self._vars.back_lens = T.ivector("back_lens")
        self.inputs = [self._vars.seq, self._vars.back_routes, self._vars.back_lens]

        # Just for decoding
        self._vars.n = T.iscalar("n")
        self._vars.p = T.vector("p", dtype=FLOATX)
        self.encode_inputs = [self._vars.x, self._vars.seq]
        self.decode_inputs = [self._vars.p, self._vars.seq]
Exemple #56
0
def repeat():
#    e = np.asarray([[[2, 4], [5, 1], [2, 1]]], dtype='int32')
    e = np.asarray([[[2, 4], [5, 1], [2, 10]], [[20, 4], [5, 10], [20, 1]]], dtype='int32')
#    e = np.asarray([[[4], [1], [2]], [[4], [5], [1]]], dtype='int32')
#    e = np.asarray([[2, 4], [5, 1]], dtype='int32')
    w = T.itensor3('w')
#    w = T.imatrix('w')

#    y = T.repeat(w, T.cast(w.shape[1], dtype='int32'), 0)[T.arange(w.shape[1]), 1:]
#    y = T.sum(w, axis=1)
    y = T.repeat(T.sum(w, axis=1), 2, axis=1).reshape((w.shape[0], 2, 2))
#    y = T.repeat(T.repeat(w, T.cast(w.shape[1], dtype='int32'), 0)[T.arange(w.shape[1]), 1:], 2, 0)
#    y = T.repeat(w, 2, 0)
    f = theano.function(inputs=[w], outputs=y)

    print f(e)
Exemple #57
0
def max3d():
    e = np.asarray([[[2, 1], [10, 3]], [[3, 5], [4, 6]]], dtype='int32')
    v = np.asarray([0, 1], dtype='int32')
    w = T.itensor3('w')
    a = T.ivector('a')

#    y = T.max(w, axis=1)
#    y = T.max(w[:, :, :1], axis=1)
#    y = w[T.arange(a.shape[0]), a]
    y = w[:, a]
#    y_a = y / a
#    y_r = y % a
#    y = T.max(w, axis=[1, 2])
#    f = theano.function(inputs=[w, a], outputs=[y, y_a, y_r])
    f = theano.function(inputs=[w, a], outputs=[y])

    print f(e, v)
Exemple #58
0
def lookup():
    e = theano.shared(np.asarray([[2, 1], [10, 3], [3, 5], [4, 6]], dtype='int32'))
#    w = T.imatrix('w')
#    v = T.imatrix('v')
#    v = T.itensor3('v')
#    w = T.ivector('w')
    v = T.ivector('v')
    w = T.itensor3('w')

    y = w[T.arange(w.shape[0]), v]
#    y = w[:, v]
    f = theano.function(inputs=[w, v], outputs=[y])

#    print f([[1, 2], [3, 4]],
#            [[0, 1, 1], [1, 0, -1]])
    print f([[[0, 0], [0, 1], [0, 1]], [[1, 1], [1, 0], [1, -1]]],
            [1, 2])