Example #1
0
    def __call__(self, query, options, gold, lengths, query_no):
        if len(options) == 1:
            return None, 0

        final = []
        if args.word_vectors:
            qvecs = [dy.lookup(self.pEmbedding, w) for w in query]
            qvec_max = dy.emax(qvecs)
            qvec_mean = dy.average(qvecs)
        for otext, features in options:
            inputs = dy.inputTensor(features)
            if args.word_vectors:
                ovecs = [dy.lookup(self.pEmbedding, w) for w in otext]
                ovec_max = dy.emax(ovecs)
                ovec_mean = dy.average(ovecs)
                inputs = dy.concatenate(
                    [inputs, qvec_max, qvec_mean, ovec_max, ovec_mean])
            if args.drop > 0:
                inputs = dy.dropout(inputs, args.drop)
            h = inputs
            for pH, pB in zip(self.hidden, self.bias):
                h = dy.affine_transform([pB, pH, h])
                if args.nonlin == "linear":
                    pass
                elif args.nonlin == "tanh":
                    h = dy.tanh(h)
                elif args.nonlin == "cube":
                    h = dy.cube(h)
                elif args.nonlin == "logistic":
                    h = dy.logistic(h)
                elif args.nonlin == "relu":
                    h = dy.rectify(h)
                elif args.nonlin == "elu":
                    h = dy.elu(h)
                elif args.nonlin == "selu":
                    h = dy.selu(h)
                elif args.nonlin == "softsign":
                    h = dy.softsign(h)
                elif args.nonlin == "swish":
                    h = dy.cmult(h, dy.logistic(h))
            final.append(dy.sum_dim(h, [0]))

        final = dy.concatenate(final)
        nll = -dy.log_softmax(final)
        dense_gold = []
        for i in range(len(options)):
            dense_gold.append(1.0 / len(gold) if i in gold else 0.0)
        answer = dy.inputTensor(dense_gold)
        loss = dy.transpose(answer) * nll
        predicted_link = np.argmax(final.npvalue())

        return loss, predicted_link
Example #2
0
    def one_pass(self, datum):
        datum = dynet.inputTensor(datum)

        w1 = dynet.parameter(self.layers[0]['W'])
        b1 = dynet.parameter(self.layers[0]['b'])
        w2 = dynet.parameter(self.layers[1]['W'])
        b2 = dynet.parameter(self.layers[1]['b'])

        hidden = (datum * w1) + b1
        hidden_activation = dynet.logistic(hidden)
        output = (hidden_activation * w2) + b2
        output_activation = dynet.logistic(output)
        return output_activation
Example #3
0
    def transduce(self,inputs,masks,predict=False):

        if not self.init:
            print("No Initial state provided")
            return

        outputs = []
        batch_size = inputs[0].dim()[1]

        for idx,input_tensor in enumerate(inputs):
            recur_s = []
            cell_s = []
            out = []

            hidden = self.hidden_previous
            cell = self.cell_previous
            if not predict:
                input_tensor = dy.cmult(input_tensor,self.input_drop_mask)
                hidden = dy.cmult(hidden,self.recur_drop_mask)

            gates = dy.affine_transform([self.b.expr(),self.WXH.expr(),dy.concatenate([input_tensor,hidden])])
            iga = dy.pickrange(gates,0,self.recur_size)
            fga = dy.pickrange(gates,self.recur_size,2*self.recur_size)
            oga = dy.pickrange(gates,2*self.recur_size,3*self.recur_size)
            cga = dy.pickrange(gates,3*self.recur_size,4*self.recur_size)

            ig = dy.logistic(iga)
            fg = dy.logistic(fga)  # +self.forget_bias
            og = dy.logistic(oga)
            c_tilda = dy.tanh(cga)
            new_cell = dy.cmult(cell,fg) + dy.cmult(c_tilda,ig)
            new_hidden = dy.cmult(dy.tanh(new_cell),og)

            for jdx in range(batch_size):
                if masks[idx][jdx] == 1:
                    h_t = dy.pick_batch_elem(new_hidden,jdx)
                    recur_s.append(h_t)
                    cell_s.append(dy.pick_batch_elem(new_cell,jdx))
                    out.append(h_t)
                else:
                    recur_s.append(dy.pick_batch_elem(hidden,jdx))
                    cell_s.append(dy.pick_batch_elem(cell,jdx)) 
                    out.append(dy.zeros(self.recur_size))

            new_cell = dy.concatenate_to_batch(cell_s)
            new_hidden = dy.concatenate_to_batch(recur_s)
            self.cell_previous = new_cell
            self.hidden_previous = new_hidden
            outputs.append(dy.concatenate_to_batch(out))

        return outputs
Example #4
0
 def _calc_scores_embedded_mlp(self,
                               sentences,
                               W_emb,
                               W_mlp,
                               b_mlp,
                               V_mlp,
                               a_mlp,
                               meta_data=None):
     """
     calculating the score for a a NN network (in a specific state along learning phase)
     :param sentences: list
         list of lists of sentences (represented already as numbers and not letters)
     :param W_emb: lookup parameter (dynet obj). size: (emb_size x nwords)
         matrix holding the word embedding values
     :param W_mlp: model parameter (dynet obj). size: (hid_size, emb_size + meta_data_dim)
         matrix holding weights of the mlp phase
     :param b_mlp: model parameter (dynet obj). size: (hid_size,)
         vector holding weights of intercept for each hidden state
     :param V_mlp: model parameter (dynet obj). size: (2, hid_size)
         matrix holding weights of the logisitc regression phase. 2 is there due to the fact we are in a binary
         classification
     :param a_mlp: model parameter (dynet obj). size: (1,)
         intercept value for the logistic regression phase
     :param meta_data: dict or None
         meta data features for the model. If None - meta data is not used
     :return: dynet parameter. size: (2,)
         prediction of the instance to be a drawing one according to the model (vector of 2, first place is the
         probability to be a drawing team)
     """
     dy.renew_cg()
     # sentences_len = len(sentences)
     word_embs = [[dy.lookup(W_emb, w) for w in words]
                  for words in sentences]
     # taking the average over all words
     first_layer_avg = dy.average([dy.average(w_em) for w_em in word_embs])
     # case we don't wish to use meta features for the model
     if meta_data is None:
         h = dy.tanh((W_mlp * first_layer_avg) + b_mlp)
         prediction = dy.logistic((V_mlp * h) + a_mlp)
     else:
         meta_data_ordered = [
             value for key, value in sorted(meta_data.items())
         ]
         meta_data_vector = dy.inputVector(meta_data_ordered)
         first_layer_avg_and_meta_data = dy.concatenate(
             [first_layer_avg, meta_data_vector])
         h = dy.tanh((W_mlp * first_layer_avg_and_meta_data) + b_mlp)
         prediction = dy.logistic((V_mlp * h) + a_mlp)
     return prediction
Example #5
0
 def on_calc_additional_loss(self, reward):
     if not self.learn_segmentation:
         return None
     ret = LossBuilder()
     if self.length_prior_alpha > 0:
         reward += self.segment_length_prior * self.length_prior_alpha
     reward = dy.cdiv(reward - dy.mean_batches(reward),
                      dy.std_batches(reward))
     # Baseline Loss
     if self.use_baseline:
         baseline_loss = []
         for i, baseline in enumerate(self.bs):
             baseline_loss.append(dy.squared_distance(reward, baseline))
         ret.add_loss("Baseline", dy.esum(baseline_loss))
     # Reinforce Loss
     lmbd = self.lmbd.get_value(self.warmup_counter)
     if lmbd > 0.0:
         reinforce_loss = []
         # Calculating the loss of the baseline and reinforce
         for i in range(len(self.segment_decisions)):
             ll = dy.pick_batch(self.segment_logsoftmaxes[i],
                                self.segment_decisions[i])
             if self.use_baseline:
                 r_i = reward - self.bs[i]
             else:
                 r_i = reward
             reinforce_loss.append(dy.logistic(r_i) * ll)
         ret.add_loss("Reinforce", -dy.esum(reinforce_loss) * lmbd)
     # Total Loss
     return ret
    def train(self, trainning_set):
        for sentence, eid, entity, trigger, label, pos, chars, rule in trainning_set:
            features = self.encode_sentence(sentence, pos, chars)
            loss = []            

            entity_embeds = features[entity]

            attention, context = self.self_attend(features)
            ty = dy.vecInput(len(sentence))
            ty.set([0 if i!=trigger else 1 for i in range(len(sentence))])
            loss.append(dy.binary_log_loss(dy.reshape(attention,(len(sentence),)), ty))
            h_t = dy.concatenate([context, entity_embeds])
            hidden = dy.tanh(self.lb.expr() * h_t + self.lb_bias.expr())
            out_vector = dy.reshape(dy.logistic(self.lb2.expr() * hidden + self.lb2_bias.expr()), (1,))
            label = dy.scalarInput(label)
            loss.append(dy.binary_log_loss(out_vector, label))

            pres = [0]
            for pattern in rule:
                probs = self.decoder(features, pres)
                loss.append(-dy.log(dy.pick(probs, pattern)))
                pres.append(pattern)

            loss = dy.esum(loss)
            loss.backward()
            self.trainer.update()
            dy.renew_cg()
def test_item(model, document):
    word_lookups = []
    for preprocessed_sentence in document.preprocessed_sentences:
        seq = [
            model.wlookup[int(model.w2i.get(entry, 0))]
            for entry in preprocessed_sentence
        ]
        if len(seq) > 0:
            word_lookups.append(seq)

    sentences_lookups = []
    for seq in word_lookups:
        sentence_encode = encode_sequence(model, seq, model.sentence_rnn)
        global_max = max_pooling(sentence_encode)
        global_min = average_pooling(sentence_encode)
        if len(sentence_encode) > 0:
            last_out = sentence_encode[-1]
            context = dy.concatenate([last_out, global_max, global_min])
            sentences_lookups.append(context)

    document_encode = encode_sequence(model, sentences_lookups,
                                      model.document_rnn)
    global_max = max_pooling(document_encode)
    global_min = average_pooling(document_encode)
    if len(document_encode) > 0:
        last_out = sentence_encode[-1]
        context = dy.concatenate([last_out, global_max, global_min])
        y_pred = dy.logistic((model.mlp_w * context) + model.mlp_b)
        document.prediction_result = y_pred.scalar_value()
        dy.renew_cg()
        return document.prediction_result
    return 0
Example #8
0
def test_network(pWeight, input_dy):
    # add parameters to graph as expressions
    Weight = dy.parameter(pWeight)

    # return what the network returns
    output = dy.logistic(dy.tanh(Weight * input_dy))
    return output
Example #9
0
    def word_repr(self, char_seq, cembs):
        """
        obtain the word representation when given its character sequence
        :param char_seq: character index sequence
        :param cembs: character embedding sequence
        :return:
        """

        wlen = len(char_seq)
        if 'rgW%d' % wlen not in self.param_exprs:
            self.param_exprs['rgW%d' % wlen] = dy.parameter(
                self.params['reset_gate_W'][wlen - 1])
            self.param_exprs['rgb%d' % wlen] = dy.parameter(
                self.params['reset_gate_b'][wlen - 1])
            self.param_exprs['cW%d' % wlen] = dy.parameter(
                self.params['com_W'][wlen - 1])
            self.param_exprs['cb%d' % wlen] = dy.parameter(
                self.params['com_b'][wlen - 1])

        chars = dy.concatenate(cembs)  # [c1;c2...]
        # reste_gate = sigmoid(W_r_l * chars + b_r_l), shape: (m,char_dim)
        reset_gate = dy.logistic(self.param_exprs['rgW%d' % wlen] * chars +
                                 self.param_exprs['rgb%d' % wlen])
        # word = tanh(W_c_l * (reset_gate .* chars) + b_c_l)
        word = dy.tanh(self.param_exprs['cW%d' % wlen] *
                       dy.cmult(reset_gate, chars) +
                       self.param_exprs['cb%d' % wlen])
        if self.known_words is not None and tuple(
                char_seq) in self.known_words:
            # Frequent word = (word + word_embed) / 2
            return (word + dy.lookup(self.params['word_embed'],
                                     self.known_words[tuple(char_seq)])) / 2.
        return word
Example #10
0
    def word_repr(self, char_seq, cembs):
        # obtain the word representation when given its character sequence

        wlen = len(char_seq)
        if 'rgW%d' % wlen not in self.param_exprs:
            self.param_exprs['rgW%d' % wlen] = dy.parameter(
                self.params['reset_gate_W'][wlen - 1])
            self.param_exprs['rgb%d' % wlen] = dy.parameter(
                self.params['reset_gate_b'][wlen - 1])
            self.param_exprs['cW%d' % wlen] = dy.parameter(
                self.params['com_W'][wlen - 1])
            self.param_exprs['cb%d' % wlen] = dy.parameter(
                self.params['com_b'][wlen - 1])

        chars = dy.concatenate(cembs)
        reset_gate = dy.logistic(self.param_exprs['rgW%d' % wlen] * chars +
                                 self.param_exprs['rgb%d' % wlen])
        word = dy.tanh(self.param_exprs['cW%d' % wlen] *
                       dy.cmult(reset_gate, chars) +
                       self.param_exprs['cb%d' % wlen])
        if self.known_words is not None and tuple(
                char_seq) in self.known_words:
            return (word + dy.lookup(self.params['word_embed'],
                                     self.known_words[tuple(char_seq)])) / 2.
        return word
Example #11
0
File: dy_model.py Project: jcyk/CWS
    def word_repr(self, char_seq):
        # obtain the word representation when given its character sequence
        wlen = len(char_seq)
        if 'rgW%d'%wlen not in self.param_exprs:
            self.param_exprs['rgW%d'%wlen] = dy.parameter(self.params['reset_gate_W'][wlen-1])
            self.param_exprs['rgb%d'%wlen] = dy.parameter(self.params['reset_gate_b'][wlen-1])
            self.param_exprs['cW%d'%wlen] = dy.parameter(self.params['com_W'][wlen-1])
            self.param_exprs['cb%d'%wlen] = dy.parameter(self.params['com_b'][wlen-1])
            self.param_exprs['ugW%d'%wlen] = dy.parameter(self.params['update_gate_W'][wlen-1])
            self.param_exprs['ugb%d'%wlen] = dy.parameter(self.params['update_gate_b'][wlen-1])
          
        chars = dy.concatenate(char_seq)
        reset_gate = dy.logistic(self.param_exprs['rgW%d'%wlen] * chars + self.param_exprs['rgb%d'%wlen])
        comb = dy.concatenate([dy.tanh(self.param_exprs['cW%d'%wlen] * dy.cmult(reset_gate,chars) + self.param_exprs['cb%d'%wlen]),chars])
        update_logits = self.param_exprs['ugW%d'%wlen] * comb + self.param_exprs['ugb%d'%wlen]
        
        update_gate = dy.transpose(dy.concatenate_cols([dy.softmax(dy.pickrange(update_logits,i*(wlen+1),(i+1)*(wlen+1))) for i in xrange(self.options['ndims'])]))
        
        # The following implementation of Softmax fucntion is not safe, but faster...
        #exp_update_logits = dy.exp(dy.reshape(update_logits,(self.options['ndims'],wlen+1)))
        #update_gate = dy.cdiv(exp_update_logits, dy.concatenate_cols([dy.sum_cols(exp_update_logits)] *(wlen+1)))
        #assert (not np.isnan(update_gate.npvalue()).any())

        word = dy.sum_cols(dy.cmult(update_gate,dy.reshape(comb,(self.options['ndims'],wlen+1))))
        return word
Example #12
0
    def compute_loss(self, state, word):
        top_state = self.top_lstm.initial_state()
        top_state = top_state.set_s(self.top_initial_state)
        assert len(state.open_constits) == len(state.spine)
        for open_constit, spine_word in zip(state.open_constits, state.spine):
            constit_emb = open_constit.output()
            if self.residual and spine_word != -1:
                spine_word_emb = self.embed_word(spine_word)
                if False:
                    constit_emb += spine_word_emb
                else:
                    inp = dy.concatenate([constit_emb, spine_word_emb])
                    mask = self.gate_mlp(inp)
                    mask = dy.logistic(mask)
                    constit_emb = dy.cmult(1 - mask, constit_emb)
                    constit_emb = constit_emb + dy.cmult(mask, spine_word_emb)
            top_state = top_state.add_input(constit_emb)
        #debug_top_state = self.debug_embed()
        #assert np.isclose(top_state.output().npvalue(), debug_top_state.output().npvalue()).all()

        logits = self.final_mlp(top_state.output())
        loss = dy.pickneglogsoftmax(logits, word)

        #if not self.warned:
        #  sys.stderr.write('WARNING: compute_loss hacked to not include actual terminals.\n')
        #  self.warned = True
        #if word != 0 and word != 1:
        #  probs = -dy.softmax(logits)
        #  left_prob = dy.pick(probs, 0)
        #  right_prob = dy.pick(probs, 1)
        #  loss = dy.log(1 - left_prob - right_prob)
        #else:
        #  loss = dy.pickneglogsoftmax(logits, word)

        return loss
Example #13
0
    def __train(self, data):
        def encode_sequence(seq):
            rnn_forward = self.phrase_rnn[0].initial_state()
            for entry in seq:
                vec = self.wlookup[int(self.w2i.get(entry, 0))]
                rnn_forward = rnn_forward.add_input(vec)
            return rnn_forward.output()
        tagged_loss = 0
        untagged_loss = 0
        for index, sentence_report in enumerate(data):
            for phrase in sentence_report.all_phrases:
                loss = None
                encoded_phrase = encode_sequence(phrase)
                y_pred = dy.logistic((self.mlp_w*encoded_phrase) + self.mlp_b)

                if sentence_report.mark:
                    loss = dy.binary_log_loss(y_pred, dy.scalarInput(1))
                else:
                    loss = dy.binary_log_loss(y_pred, dy.scalarInput(0))
                if index % 1000 == 0:
                    print("Description : {}".format(index+1))
                    print("Marked {} Prediction Result {} : ".format(sentence_report.mark, y_pred.scalar_value()))
                    print("Tagged loss {} Untagged Loss {} Total loss {}".format(tagged_loss, untagged_loss, tagged_loss+untagged_loss))

                if sentence_report.mark:
                    tagged_loss += loss.scalar_value()/(index+1)
                else:
                    untagged_loss += loss.scalar_value()/(index+1)
                loss.backward()
                self.trainer.update()
                dy.renew_cg()
Example #14
0
    def _predict(self, batch, train=True):

        # load the network parameters
        W_hid = dy.parameter(self.W_hid)
        b_hid = dy.parameter(self.b_hid)
        w_clf = dy.parameter(self.w_clf)
        b_clf = dy.parameter(self.b_clf)

        probas = []
        # predict the probability of positive sentiment for each sentence
        for _, sent in batch:
            sent_embed = [dy.lookup(self.embed, w) for w in sent]
            dropout_embed = []
            # $@$ Task3 implying dropout to regluarization training
            if train == True:
                for embed in sent_embed:
                    embed = dy.dropout(embed, 0.5)
                    dropout_embed.append(embed)
                sent_embed = dy.average(dropout_embed)
            else:
                sent_embed = dy.average(sent_embed)

            # hid = tanh(b + W * sent_embed)
            # but it's faster to use affine_transform in dynet
            hid = dy.affine_transform([b_hid, W_hid, sent_embed])
            hid = dy.tanh(hid)

            y_score = dy.affine_transform([b_clf, w_clf, hid])
            y_proba = dy.logistic(y_score)
            probas.append(y_proba)

        return probas
Example #15
0
    def __call__(self, word_embeddings):
        highway_memories = word_embeddings
        for birnn_layer, highway_i_factor, \
            highway_o_factor, highway_bias in zip(self.birnn_layers,
                                                  self.highway_i_factors, self.highway_o_factors,
                                                  self.highway_biases):
            output_tensors = birnn_layer(highway_memories)

            if highway_memories is word_embeddings:
                highway_memories = output_tensors
            else:
                new_highway_memories = []
                for memory_vector, output_vector in zip(
                        highway_memories, output_tensors):
                    highway_bias_expr = highway_bias.expr()
                    highway_i_factor_expr = highway_i_factor.expr()
                    highway_o_factor_expr = highway_o_factor.expr()
                    transform_rate = dn.logistic(
                        dn.affine_transform([
                            highway_bias_expr, highway_i_factor_expr,
                            memory_vector, highway_o_factor_expr, output_vector
                        ]))

                    keep_rate = 1 - transform_rate
                    new_highway_memories.append(
                        dn.cmult(keep_rate, memory_vector) +
                        dn.cmult(transform_rate, output_vector))
                highway_memories = new_highway_memories
        return highway_memories
Example #16
0
def run_instance(tokens, polarity, model_elems, embeddings):

    # Renew the computational graph
    dy.renew_cg()

    builder = model_elems.builder
    V = model_elems.V
    W = model_elems.W
    b = model_elems.b

    # Fetch the embeddings for the current sentence
    words = tokens

    # print('words of a sentence:')
    # print([word for word in words])
    # print('embedding for empty character')
    # print(embeddings[''].npvalue())
    # input('press enter to continue')
    inputs = [embeddings[w] for w in words]

    # Run FF over the LSTM
    lstm = builder.initial_state()
    outputs = lstm.transduce(inputs)

    # Get the last embedding
    selected = outputs[-1]

    # Concatenate the polarity bit to the selected vector
    prediction_input = dy.concatenate([selected, dy.scalarInput(1 if polarity else 0)])
    #prediction_input = selected

    # Run the FF network for classification
    prediction = dy.logistic(V * (W * prediction_input + b))

    return prediction
Example #17
0
def __train(model, data):
    tagged_loss = 0
    untagged_loss = 0
    for index, sentence_report in enumerate(data):
        for phrase in sentence_report.all_phrases:
            loss = None
            encoded_phrase = __encode_sequence(model, phrase)

            if model.options.external_info != "no_info":
                encoded_phrase = dy.concatenate(
                    [encoded_phrase, model.doclookup[sentence_report.app_id]])

            y_pred = dy.logistic((model.mlp_w * encoded_phrase) + model.mlp_b)

            if sentence_report.mark:
                loss = dy.binary_log_loss(y_pred, dy.scalarInput(1))
            else:
                loss = dy.binary_log_loss(y_pred, dy.scalarInput(0))

            if sentence_report.mark:
                tagged_loss += loss.scalar_value() / (index + 1)
            else:
                untagged_loss += loss.scalar_value() / (index + 1)
            loss.backward()
            model.trainer.update()
            dy.renew_cg()
Example #18
0
    def expr_for_tree(self,xt,tree,node,is_train):
        if is_train:
            # in the training phase, perform dropout
            W_dropout = dy.dropout(self.WP, self.dropout_rate)
            WR_dropout = dy.dropout(self.WR, self.dropout_rate)
            WC_dropout = dy.dropout(self.WC, self.dropout_rate)
        else:
            W_dropout = self.WP
            WR_dropout = self.WR
            WC_dropout = self.WC
            
            
        if node is None or node.is_leaf():
            Wx = W_dropout * xt
#             h = dy.tanh(Wx + self.bc)
            h = dy.tanh(dy.affine_transform([self.bc, self.WC, xt]))
            return h
        
        #get child nodes        
        children=tree.children(node.identifier)
        children_sum=dy.zeros((self.n_out))
        for i in range(len(children)):
            hc=self.expr_for_tree(xt=xt,tree=tree,node=children[i],is_train=is_train)
            rt = dy.logistic(self.WR * xt +self.UR*hc+self.br)
            children_sum=children_sum+dy.cmult(rt, hc)
        
        Wx = W_dropout * xt
        h = dy.tanh(Wx + self.bp+self.UP*children_sum)
        return h     
def train_item(args, model, sentence):
    loss = None
    seq = [
        model.wlookup[int(model.w2i.get(entry, 0))]
        for entry in sentence.preprocessed_sentence
    ]
    if len(seq) > 0:
        encoded_sequence = encode_sequence(model, seq, model.sentence_rnn)
        last_output = encoded_sequence[-1]
        global_max = max_pooling(encoded_sequence)
        global_min = average_pooling(encoded_sequence)
        context = dy.concatenate([last_output, global_max, global_min])
        y_pred = dy.logistic((model.mlp_w * context) + model.mlp_b)

        if sentence.permissions[args.permission_type]:
            loss = dy.binary_log_loss(y_pred, dy.scalarInput(1))
        else:
            loss = dy.binary_log_loss(y_pred, dy.scalarInput(0))

        loss.backward()
        model.trainer.update()
        loss_val = loss.scalar_value()
        dy.renew_cg()
        return loss_val
    return 0
    def _predict(self, batch, train=True):

        # load the network parameters
        W_hid = dy.parameter(self.W_hid)
        b_hid = dy.parameter(self.b_hid)
        w_clf = dy.parameter(self.w_clf)
        b_clf = dy.parameter(self.b_clf)

        probas = []
        # predict the probability of positive sentiment for each sentence
        for _, sent in batch:

            sent_embed = [dy.lookup(self.embed, w) for w in sent]
            sent_embed = dy.average(sent_embed)

            # hid = tanh(b + W * sent_embed)
            # but it's faster to use affine_transform in dynet
            hid = dy.affine_transform([b_hid, W_hid, sent_embed])
            hid = dy.tanh(hid)

            y_score = dy.affine_transform([b_clf, w_clf, hid])
            y_proba = dy.logistic(y_score)
            probas.append(y_proba)

        return probas
Example #21
0
 def add_input(self, cur, x):
     h = cur.hidden_state
     c = cur.memory_cell
     i = dy.logistic(self._biaffine(x, self.Wi, h))
     f = dy.logistic(self._biaffine(x, self.Wf, h))
     o = dy.logistic(self._biaffine(x, self.Wo, h))
     u = dy.tanh(self._biaffine(x, self.Wu, h))
     c_out = dy.cmult(i, u) + dy.cmult(f, c)
     h_out = dy.cmult(o, dy.tanh(c_out))
     _cur = LSTMState(self,
                      cur.state_idx + 1,
                      prev_state=cur,
                      out=h_out,
                      hidden=h_out,
                      memory=c_out)
     return _cur
Example #22
0
 def _upsample(self, mgc, start, stop):
     mgc_index = start / len(self.upsample_w_s)
     ups_index = start % len(self.upsample_w_s)
     upsampled = []
     mgc_vect = dy.inputVector(mgc[mgc_index])
     for x in range(stop - start):
         sigm = dy.logistic(self.upsample_w_s[ups_index].expr(update=True) *
                            mgc_vect +
                            self.upsample_b_s[ups_index].expr(update=True))
         tnh = dy.tanh(self.upsample_w_t[ups_index].expr(update=True) *
                       mgc_vect +
                       self.upsample_b_t[ups_index].expr(update=True))
         r = dy.cmult(sigm, tnh)
         upsampled.append(r)
         ups_index += 1
         if ups_index == len(self.upsample_w_s):
             ups_index = 0
             mgc_index += 1
             if mgc_index == len(
                     mgc
             ):  # last frame is sometimes not processed, but it should have similar parameters
                 mgc_index -= 1
             else:
                 mgc_vect = dy.inputVector(mgc[mgc_index])
     return upsampled
def test_item(model, sentence):
    seq = [
        model.wlookup[int(model.w2i.get(entry, 0))]
        for entry in sentence.preprocessed_sentence
    ]
    if len(seq) > 0:
        encoded_sequence = encode_sequence(model, seq, model.sentence_rnn)
        global_max = max_pooling(encoded_sequence)
        global_min = average_pooling(encoded_sequence)
        if len(encoded_sequence) > 0:
            att_mlp_outputs = []
            for e in encoded_sequence:
                mlp_out = (model.attention_w * e) + model.attention_b
                att_mlp_outputs.append(mlp_out)

            lst = []
            for o in att_mlp_outputs:
                lst.append(dy.exp(dy.sum_elems(dy.cmult(o,
                                                        model.att_context))))

            sum_all = dy.esum(lst)

            probs = [dy.cdiv(e, sum_all) for e in lst]
            att_context = dy.esum(
                [dy.cmult(p, h) for p, h in zip(probs, encoded_sequence)])
            context = dy.concatenate([att_context, global_max, global_min])
            y_pred = dy.logistic((model.mlp_w * context) + model.mlp_b)
            sentence.prediction_result = y_pred.scalar_value()
            dy.renew_cg()
            return sentence.prediction_result
    return 0
Example #24
0
        def add_input(self, input_vec):

            x = dynet.concatenate([input_vec, self.h])

            i = dynet.logistic(self.W_i * x + self.b_i)
            f = dynet.logistic(self.W_f * x + self.b_f)
            g = dynet.tanh(self.W_c * x + self.b_c)
            o = dynet.logistic(self.W_o * x + self.b_o)

            c = dynet.cwise_multiply(f, self.c) + dynet.cwise_multiply(i, g)
            h = dynet.cwise_multiply(o, dynet.tanh(c))

            self.c = c
            self.h = h
            self.outputs.append(h)

            return self
Example #25
0
 def highway(input_, train):
     for func, weight, bias in zip(funcs, weights, biases):
         proj = dy.rectify(func(input_, train))
         transform = dy.logistic(dy.affine_transform([bias, weight,
                                                      input_]))
         input_ = dy.cmult(transform, proj) + dy.cmult(
             input_, 1 - transform)
     return input_
Example #26
0
    def step(self, x, hx, cx):
        if not self.test:
            if self.dropout_x > 0:
                x = dy.cmult(self.dropout_mask_x, x)
            if self.dropout_h > 0:
                hx = dy.cmult(self.dropout_mask_h, hx)

        gates = dy.affine_transform(
            [self.bias, self.weight_ih, x, self.weight_hh, hx])
        i = dy.pickrange(gates, 0, self.n_hidden)
        f = dy.pickrange(gates, self.n_hidden, self.n_hidden * 2)
        g = dy.pickrange(gates, self.n_hidden * 2, self.n_hidden * 3)
        o = dy.pickrange(gates, self.n_hidden * 3, self.n_hidden * 4)

        i, f, g, o = dy.logistic(i), dy.logistic(f), dy.tanh(g), dy.logistic(o)
        cy = dy.cmult(f, cx) + dy.cmult(i, g)
        hy = dy.cmult(o, dy.tanh(cy))
        return hy, cy
Example #27
0
  def transduce(self, embed_sent):
    src = embed_sent.as_tensor()

    sent_len = src.dim()[0][1]
    src_width = 1
    batch_size = src.dim()[1]
    pad_size = (self.window_receptor-1)/2 #TODO adapt it also for even window size

    src = dy.concatenate([dy.zeroes((self.input_dim,pad_size),batch_size=batch_size),src,dy.zeroes((self.input_dim,pad_size), batch_size=batch_size)],d=1)
    padded_sent_len = sent_len + 2*pad_size

    conv1 = dy.parameter(self.pConv1)
    bias1 = dy.parameter(self.pBias1)
    src_chn = dy.reshape(src,(self.input_dim,padded_sent_len,1),batch_size=batch_size)
    cnn_layer1 = dy.conv2d_bias(src_chn,conv1,bias1,stride=[1,1])

    hidden_layer = dy.reshape(cnn_layer1,(self.internal_dim,sent_len,1),batch_size=batch_size)
    if self.non_linearity is 'linear':
        hidden_layer = hidden_layer
    elif self.non_linearity is 'tanh':
        hidden_layer = dy.tanh(hidden_layer)
    elif self.non_linearity is 'relu':
        hidden_layer = dy.rectify(hidden_layer)
    elif self.non_linearity is 'sigmoid':
        hidden_layer = dy.logistic(hidden_layer)

    for conv_hid, bias_hid in self.builder_layers:
        hidden_layer = dy.conv2d_bias(hidden_layer, dy.parameter(conv_hid),dy.parameter(bias_hid),stride=[1,1])
        hidden_layer = dy.reshape(hidden_layer,(self.internal_dim,sent_len,1),batch_size=batch_size)
        if self.non_linearity is 'linear':
            hidden_layer = hidden_layer
        elif self.non_linearity is 'tanh':
            hidden_layer = dy.tanh(hidden_layer)
        elif self.non_linearity is 'relu':
            hidden_layer = dy.rectify(hidden_layer)
        elif self.non_linearity is 'sigmoid':
            hidden_layer = dy.logistic(hidden_layer)
    last_conv = dy.parameter(self.last_conv)
    last_bias = dy.parameter(self.last_bias)
    output = dy.conv2d_bias(hidden_layer,last_conv,last_bias,stride=[1,1])
    output = dy.reshape(output, (sent_len,self.output_dim),batch_size=batch_size)
    output_seq = ExpressionSequence(expr_tensor=output)
    self._final_states = [FinalTransducerState(output_seq[-1])]
    return output_seq
    def _calc_scores_two_layers(self, sentences, W_emb, first_lstm, W_mlp, b_mlp, V_mlp, a_mlp, meta_data=None):
        """
        calculating the score for parallel LSTM network (in a specific state along learning phase)
        :param sentences: list
            list of lists of sentences (represented already as numbers and not letters)
        :param first_lstm:

        :param W_mlp: model parameter (dynet obj). size: (hid_size, emb_size + meta_data_dim)
            matrix holding weights of the mlp phase
        :param b_mlp: model parameter (dynet obj). size: (hid_size,)
            vector holding weights of intercept for each hidden state
        :param V_mlp: model parameter (dynet obj). size: (2, hid_size)
            matrix holding weights of the logisitc regression phase. 2 is there due to the fact we are in a binary
            classification
        :param a_mlp: model parameter (dynet obj). size: (1,)
            intercept value for the logistic regression phase
        :param meta_data: dict or None
            meta data features for the model. If None - meta data is not used
        :return: dynet parameter. size: (2,)
            prediction of the instance to be a drawing one according to the model (vector of 2, first place is the
            probability to be a drawing team)
        """
        dy.renew_cg()
        sentences_len = len(sentences)
        word_embs = [[dy.lookup(W_emb, w) for w in words] for words in sentences]
        first_init = first_lstm.initial_state()
        first_embs=[]
        for wb in word_embs:
            first_embs.append(first_init.transduce(wb))
        last_comp_in_first_layer = [i[-1] for i in first_embs]
        # calculating the avg over all last components of the LSTMs
        # if wanted to take the maximum, one can use dy.emax instead of dy.average (but it is not too recommended)
        first_layer_avg = dy.average(last_comp_in_first_layer)
        if meta_data is None:
            h = dy.tanh((W_mlp * first_layer_avg) + b_mlp)
            prediction = dy.logistic((V_mlp * h) + a_mlp)
        else:
            meta_data_ordered = [value for key, value in sorted(meta_data.items())]
            meta_data_vector = dy.inputVector(meta_data_ordered)
            first_layer_avg_and_meta_data = dy.concatenate([first_layer_avg, meta_data_vector])
            h = dy.tanh((W_mlp * first_layer_avg_and_meta_data) + b_mlp)
            prediction = dy.logistic((V_mlp * h) + a_mlp)
        return prediction
Example #29
0
    def transduce(self, embed_sent):
        src = embed_sent.as_tensor()

        W = dy.parameter(self.pW)
        b = dy.parameter(self.pb)

        l1 = dy.affine_transform([b, W, src])
        output = l1
        if self.nonlinearity is 'linear':
            output = l1
        elif self.nonlinearity is 'sigmoid':
            output = dy.logistic(l1)
        elif self.nonlinearity is 'tanh':
            output = 2 * dy.logistic(l1) - 1
        elif self.nonlinearity is 'relu':
            output = dy.rectify(l1)
        output_seq = ExpressionSequence(expr_tensor=output)
        self._final_states = [FinalTransducerState(output_seq[-1])]
        return output_seq
Example #30
0
        def add_input(self, input_vec):
            """
            Note that this function updates the existing State object!
            """
            x = dynet.concatenate([input_vec, self.h])

            i = dynet.logistic(self.W_i * x + self.b_i)
            f = dynet.logistic(self.W_f * x + self.b_f)
            g = dynet.tanh(self.W_c * x + self.b_c)
            o = dynet.logistic(self.W_o * x + self.b_o)

            c = dynet.cmult(f, self.c) + dynet.cmult(i, g)
            h = dynet.cmult(o, dynet.tanh(c))

            self.c = c
            self.h = h
            self.outputs.append(h)

            return self
 def __call__(self, src):
     src = src.as_tensor()
     # convolutional layer
     src = padding(src,
                   src.dim()[0][0],
                   src.dim()[0][1], self.filter_width, self.stride,
                   src.dim()[1])
     l1 = dy.rectify(
         dy.conv2d(src,
                   dy.parameter(self.filter_conv),
                   stride=[self.stride, self.stride],
                   is_valid=True))
     timestep = l1.dim()[0][1]
     features = l1.dim()[0][2]
     batch_size = l1.dim()[1]
     # transpose l1 to be (timesetp, dim), but keep the batch_size.
     rhn_in = dy.reshape(l1, (timestep, features), batch_size=batch_size)
     rhn_in = [dy.pick(rhn_in, i) for i in range(timestep)]
     for l in range(self.rhn_num_hidden_layers):
         rhn_out = []
         # initialize a random vector for the first state vector, keep the same batch size.
         prev_state = dy.parameter(self.init[l])
         # begin recurrent high way network
         for t in range(timestep):
             for m in range(0, self.rhn_microsteps):
                 H = dy.affine_transform([
                     dy.parameter(self.recur[l][m][1]),
                     dy.parameter(self.recur[l][m][0]), prev_state
                 ])
                 T = dy.affine_transform([
                     dy.parameter(self.recur[l][m][3]),
                     dy.parameter(self.recur[l][m][2]), prev_state
                 ])
                 if m == 0:
                     H += dy.parameter(self.linear[l][0]) * rhn_in[t]
                     T += dy.parameter(self.linear[l][1]) * rhn_in[t]
                 H = dy.tanh(H)
                 T = dy.logistic(T)
                 prev_state = dy.cmult(1 - T, prev_state) + dy.cmult(
                     T, H)  # ((1024, ), batch_size)
             rhn_out.append(prev_state)
         if self.residual and l > 0:
             rhn_out = [sum(x) for x in zip(rhn_out, rhn_in)]
         rhn_in = rhn_out
     # Compute the attention-weighted average of the activations
     rhn_in = dy.concatenate_cols(rhn_in)
     scores = dy.transpose(dy.parameter(self.attention[0][1])) * dy.tanh(
         dy.parameter(self.attention[0][0]) *
         rhn_in)  # ((1,510), batch_size)
     scores = dy.reshape(scores, (scores.dim()[0][1], ),
                         batch_size=scores.dim()[1])
     attn_out = rhn_in * dy.softmax(
         scores
     )  # # rhn_in.as_tensor() is ((1024,510), batch_size) softmax is ((510,), batch_size)
     return ExpressionSequence(expr_tensor=attn_out)
Example #32
0
def calc_sent_loss(sent):
  # Create a computation graph
  dy.renew_cg()
  
  # Get embeddings for the sentence
  emb = [W_w_p[x] for x in sent]

  # Sample K negative words for each predicted word at each position
  all_neg_words = np.random.choice(nwords, size=2*N*K*len(emb), replace=True, p=word_probabilities)

  # W_w = dy.parameter(W_w_p)
  # Step through the sentence and calculate the negative and positive losses
  all_losses = [] 
  for i, my_emb in enumerate(emb):
    neg_words = all_neg_words[i*K*2*N:(i+1)*K*2*N]
    pos_words = ([sent[x] if x >= 0 else S for x in range(i-N,i)] +
                 [sent[x] if x < len(sent) else S for x in range(i+1,i+N+1)])
    neg_loss = -dy.log(dy.logistic(-dy.dot_product(my_emb, dy.lookup_batch(W_c_p, neg_words))))
    pos_loss = -dy.log(dy.logistic(dy.dot_product(my_emb, dy.lookup_batch(W_c_p, pos_words))))
    all_losses.append(dy.sum_batches(neg_loss) + dy.sum_batches(pos_loss))
  return dy.esum(all_losses)
Example #33
0
 def expr_for_tree(self, tree):
     if tree.isleaf():
         return self.E[self.w2i.get(tree.label,0)]
     if len(tree.children) == 1:
         assert(tree.children[0].isleaf())
         emb = self.expr_for_tree(tree.children[0])
         Wi,Wo,Wu   = [dy.parameter(w) for w in self.WS]
         bi,bo,bu,_ = [dy.parameter(b) for b in self.BS]
         i = dy.logistic(Wi*emb + bi)
         o = dy.logistic(Wo*emb + bo)
         u = dy.tanh(    Wu*emb + bu)
         c = dy.cmult(i,u)
         expr = dy.cmult(o,dy.tanh(c))
         return expr
     assert(len(tree.children) == 2),tree.children[0]
     e1 = self.expr_for_tree(tree.children[0])
     e2 = self.expr_for_tree(tree.children[1])
     Ui,Uo,Uu = [dy.parameter(u) for u in self.US]
     Uf1,Uf2 = [dy.parameter(u) for u in self.UFS]
     bi,bo,bu,bf = [dy.parameter(b) for b in self.BS]
     e = dy.concatenate([e1,e2])
     i = dy.logistic(Ui*e + bi)
     o = dy.logistic(Uo*e + bo)
     f1 = dy.logistic(Uf1*e1 + bf)
     f2 = dy.logistic(Uf2*e2 + bf)
     u = dy.tanh(    Uu*e + bu)
     c = dy.cmult(i,u) + dy.cmult(f1,e1) + dy.cmult(f2,e2)
     h = dy.cmult(o,dy.tanh(c))
     expr = h
     return expr
Example #34
0
 def transduce(self, inputs, train):
     xs = inputs[:self.max_length]
     if not xs:
         return []
     for i in range(self.lstm_layers):
         for n, d in ("f", 1), ("b", -1):
             Wr, br, Wh = [self.params["%s%d%s" % (p, i, n)] for p in ("Wr", "br", "Wh")]
             hs_ = self.params["rnn%d%s" % (i, n)].initial_state().transduce(xs[::d])
             hs = [hs_[0]]
             for t in range(1, len(hs_)):
                 r = dy.logistic(Wr * dy.concatenate([hs[t - 1], xs[t]]) + br)
                 hs.append(dy.cmult(r, hs_[t]) + dy.cmult(1 - r, Wh * xs[t]))
             xs = hs
             if train:
                 x = dy.dropout_dim(dy.concatenate(xs, 1), 1, self.dropout)
                 xs = [dy.pick(x, i, 1) for i in range(len(xs))]
     return xs
def calc_sent_loss(sent):
  # Create a computation graph
  dy.renew_cg()

  
  # Get embeddings for the sentence
  emb = [W_w_p[x] for x in sent]

  # Step through the sentence and calculate binary prediction losses
  all_losses = [] 
  for i, my_emb in enumerate(emb):
    scores = dy.logistic(W_c * my_emb)
    pos_words = ([sent[x] if x >= 0 else S for x in range(i-N,i)] +
                 [sent[x] if x < len(sent) else S for x in range(i+1,i+N+1)])
    word_repr = [[float(y) for y in np.binary_repr(x).zfill(nbits)] for x in pos_words]
    word_repr = [dy.inputVector(x) for x in word_repr]
    all_losses.extend([dy.binary_log_loss(scores, x) for x in word_repr])
  return dy.esum(all_losses)
Example #36
0
pa = m.add_parameters(1, device="CPU")

if len(sys.argv) == 2:
  m.populate_from_textfile(sys.argv[1])

dy.renew_cg()
W1, b1, W2, b2, V, a = dy.parameter(pW1, pb1, pW2, pb2, pV, pa)

x = dy.vecInput(2, "GPU:1")
y = dy.scalarInput(0, "CPU")
h1 = dy.tanh((W1*x) + b1)
h1_gpu0 = dy.to_device(h1, "GPU:0")
h2 = dy.tanh((W2*h1_gpu0) + b2)
h2_cpu = dy.to_device(h2, "CPU")
if xsent:
    y_pred = dy.logistic((V*h2_cpu) + a)
    loss = dy.binary_log_loss(y_pred, y)
    T = 1 
    F = 0 
else:
    y_pred = (V*h2_cpu) + a 
    loss = dy.squared_distance(y_pred, y)
    T = 1 
    F = -1


for iter in range(ITERATIONS):
    mloss = 0.0 
    for mi in range(4):
        x1 = mi % 2 
        x2 = (mi // 2) % 2 
Example #37
0
 def highway(input_, train):
     for func, weight, bias in zip(funcs, weights, biases):
         proj = dy.rectify(func(input_, train))
         transform = dy.logistic(dy.affine_transform([bias, weight, input_]))
         input_ = dy.cmult(transform, proj) + dy.cmult(input_, 1 - transform)
     return input_
Example #38
0
m = dy.Model()
trainer = dy.SimpleSGDTrainer(m)

W = m.add_parameters((HIDDEN_SIZE, 2))
b = m.add_parameters(HIDDEN_SIZE)
V = m.add_parameters((1, HIDDEN_SIZE))
a = m.add_parameters(1)

if len(sys.argv) == 2:
  m.populate_from_textfile(sys.argv[1])

x = dy.vecInput(2)
y = dy.scalarInput(0)
h = dy.tanh((W*x) + b)
if xsent:
    y_pred = dy.logistic((V*h) + a)
    loss = dy.binary_log_loss(y_pred, y)
    T = 1
    F = 0
else:
    y_pred = (V*h) + a
    loss = dy.squared_distance(y_pred, y)
    T = 1
    F = -1


for iter in range(ITERATIONS):
    mloss = 0.0
    for mi in range(4):
        x1 = mi % 2
        x2 = (mi // 2) % 2