Exemple #1
0
    def encode_pt(self, X, train=False):
        dy.renew_cg()
        w_pos = dy.parameter(self.w_pos)
        b_pos = dy.parameter(self.b_pos)

        ipts  = []
        length = len(X[0])
        for i in range(length):
            cids = X[0][i] 
            wid  = X[1][i]
            tids = X[2][i]
            vec_char = self.char_seq_model.transduce([self.UNI[cid] for cid in cids])[-1]

            vec_tags = []
            for tid in tids:
                if tid == 0:
                    zero = dy.inputVector(np.zeros(self.dim_tag_emb))
                    vec_tags.append(zero)
                else:
                    vec_tags.append(self.POS[tid])
            vec_tag = dy.esum(vec_tags)

            if wid == 0:
                vec_word = dy.inputVector(np.zeros(self.dim_word))
            else:
                vec_word = self.WORD[wid]

            vec_at_i = dy.concatenate([vec_word, vec_char, vec_tag]) 
            if train is True:
                vec_at_i = dy.dropout(vec_at_i, self.dropout_rate)
            ipts.append(vec_at_i)
        hiddens = self.pos_model.transduce(ipts)
        probs = [dy.softmax(w_pos*h+b_pos) for h in hiddens]
        return probs
Exemple #2
0
    def forward_backward(self, observations):
        init_alphas = [0, 0]
        forward_mess = dy.inputVector(init_alphas)
        alpha = []
        for i in range(len(observations) - 1):
            alphas_t = []
            for next_tag in range(2):
                obs_broadcast = dy.concatenate(
                    [dy.pick(observations[i], next_tag)] * 2)
                next_tag_expr = forward_mess + self.transitions[
                    next_tag] + obs_broadcast
                alphas_t.append(self.log_sum_exp(next_tag_expr))
            forward_mess = dy.concatenate(alphas_t)
            alpha.append(forward_mess)

        init_betas = [0, 0]
        backward_mess = dy.inputVector(init_betas)
        beta = []
        for i in range(len(observations) - 1):
            beta_t = []
            for next_tag in range(2):
                obs = observations[len(observations) - i - 1]
                next_tag_expr = backward_mess + self.transitions[next_tag] + obs
                beta_t.append(self.log_sum_exp(next_tag_expr))
            backward_mess = dy.concatenate(beta_t)
            beta.append(backward_mess)

        mu = [x + y for x, y in zip(alpha, beta[::-1])]
        # compute marginal probablities
        prob = [dy.pick(dy.softmax(w), 1) for w in mu]
        return prob
Exemple #3
0
    def scorer(self, q_d_hists, q_idf, bm25_score, overlap_features, p):
        """
        Makes all the calculations and returns a relevance score
        """
        idf_vec = dy.inputVector(q_idf)
        bm25_score = dy.scalarInput(bm25_score)
        overlap_features = dy.inputVector(overlap_features)
        # Pass each query term representation through the MLP
        term_scores = []
        for hist in q_d_hists:
            q_d_hist = dy.reshape(dy.inputVector(hist), (1, len(hist)))
            hidd_out = dy.rectify(q_d_hist * self.W_1 + self.b_1)
            for i in range(0, self.mlp_layers):
                hidd_out = dy.rectify(hidd_out * self.W_n[i] + self.b_n[i])
            term_scores.append(hidd_out * self.W_last + self.b_last)

        # Term Gating
        gating_weights = idf_vec * self.w_g
        
        bm25_feature = bm25_score * self.W_bm25 + self.b_bm25 
        drop_out =  dy.scalarInput(1)
        drop_num = (np.random.rand(1) < p)/p #p= probability of keeping a unit active
        drop_out.set(drop_num)
        
        bm25_feature *= drop_out
        drmm_score = dy.transpose(dy.concatenate(term_scores)) * dy.reshape(gating_weights, (len(q_idf), 1)) #basic MLPs output
        doc_score = dy.transpose(dy.concatenate([drmm_score, overlap_features])) * self.W_scores + self.b_scores #extra features layer
        
        
        return doc_score
Exemple #4
0
  def transform(self, input_expr: dy.Expression, mask: Optional[batchers.Mask]=None):
    """
    Apply batch norm.

    Args:
      input_expr: input
      mask: compute statistics only over unmasked parts of the input expression
    """
    dim_in = input_expr.dim()
    param_bn_gamma = dy.parameter(self.gamma)
    param_bn_beta = dy.parameter(self.beta)
    if self.train:
      num_unmasked = 0
      if mask is not None:
        input_expr = set_masked_to_mean(mask, input_expr, self.time_first)
        num_unmasked = (mask.np_arr.size - np.count_nonzero(mask.np_arr)) * broadcast_factor(mask, input_expr)
      bn_mean = dy.moment_dim(input_expr, self.get_stat_dimensions(), 1, True, num_unmasked)
      neg_bn_mean_reshaped = -dy.reshape(-bn_mean, self.get_normalizer_dimensionality())
      self.population_running_mean += (-BN_MOMENTUM) * self.population_running_mean + BN_MOMENTUM * bn_mean.npvalue()
      bn_std = dy.std_dim(input_expr, self.get_stat_dimensions(), True, num_unmasked)
      self.population_running_std += (-BN_MOMENTUM) * self.population_running_std + BN_MOMENTUM * bn_std.npvalue()
    else:
      neg_bn_mean_reshaped = -dy.reshape(dy.inputVector(self.population_running_mean), self.get_normalizer_dimensionality())
      bn_std = dy.inputVector(self.population_running_std)
    bn_numerator = input_expr + neg_bn_mean_reshaped
    bn_xhat = dy.cdiv(bn_numerator, dy.reshape(bn_std, self.get_normalizer_dimensionality()) + BN_EPS)
    bn_y = dy.cmult(param_bn_gamma, bn_xhat) + param_bn_beta # y = gamma * xhat + beta
    dim_out = bn_y.dim()
    self.save_processed_arg("population_running_mean", self.population_running_mean)
    self.save_processed_arg("population_running_std", self.population_running_std)
    assert dim_out == dim_in
    return bn_y
Exemple #5
0
 def _upsample(self, mgc, start, stop):
     mgc_index = int(start / len(self.upsample_w_t))
     ups_index = start % len(self.upsample_w_t)
     upsampled = []
     mgc_index_next = mgc_index + 1
     if mgc_index_next == len(mgc):
         mgc_index_next -= 1
     mgc_vect = dy.concatenate([dy.inputVector(mgc[mgc_index]), dy.inputVector(mgc[mgc_index_next])])
     for x in range(stop - start):
         # sigm = dy.logistic(self.upsample_w_s[ups_index].expr(update=True) * mgc_vect + self.upsample_b_s[ups_index].expr(update=True))
         tnh = dy.tanh(self.upsample_w_t[ups_index].expr(update=True) * mgc_vect + self.upsample_b_t[ups_index].expr(
             update=True))
         # r = dy.cmult(sigm, tnh)
         upsampled.append(tnh)
         ups_index += 1
         if ups_index == len(self.upsample_w_t):
             ups_index = 0
             mgc_index += 1
             if mgc_index == len(
                     mgc):  # last frame is sometimes not processed, but it should have similar parameters
                 mgc_index -= 1
             else:
                 mgc_index_next = mgc_index + 1
                 if mgc_index_next == len(mgc):
                     mgc_index_next -= 1
                 mgc_vect = dy.concatenate([dy.inputVector(mgc[mgc_index]), dy.inputVector(mgc[mgc_index_next])])
     return upsampled
 def train(self, epoch):
     #train process of the neural network
     history = []
     for i in range(epoch):
         start = time.time()
         total_loss = 0
         print('Epoch ' + str(i + 1) + ':')
         for batch in self.minibathces:
             dy.renew_cg()
             losses = []
             for word in batch:
                 x = self.training_data[word][0]
                 y = self.training_data[word][1]
                 dy_x = dy.inputVector(x)
                 dy_y = dy.inputVector(y)
                 output = self.feedForward(dy_x)
                 l = self.calculateLoss(output, dy_y)
                 losses.append(l)
             loss = dy.esum(losses) / len(losses)
             total_loss += loss.value()
             loss.backward()
             self.trainer.update()
         end = time.time()
         print('Loss = {0}\nTime it takes = {1} minutes.'.format(
             total_loss / len(self.minibathces), (end - start) / 60))
         history.append(total_loss / len(self.minibathces))
     return history
    def loss(self, instance):
        trans = instance.transformation
        #trans = 'lol'
        if trans not in self.known_transformations:
            newtrans = list(self.param_dict.keys())[0][0]  ### SUPER ARBITRARY
            tqdm.write(
                "WARNING: unknown transformtion picked for instance {}; using transformation {}"
                .format(trans, newtrans))
            trans = newtrans
        b1 = dy.parameter(self.param_dict[(trans, 'b1')])
        W1 = dy.parameter(self.param_dict[(trans, 'W1')])
        b2 = dy.parameter(self.param_dict[(trans, 'b2')])
        W2 = dy.parameter(self.param_dict[(trans, 'W2')])
        #b3 = dy.parameter(self.param_dict[(trans, 'b3')])
        #W3 = dy.parameter(self.param_dict[(trans, 'W3')])

        #b = dy.parameter(self.param_dict[(trans, 'b')])
        #W = dy.parameter(self.param_dict[(trans, 'W')])

        x = dy.inputVector(instance.xs_distr_vec)
        y = dy.inputVector(instance.ys_distr_vec)

        #prediction = dy.affine_transform([b, W, x])
        prediction = dy.affine_transform(
            [b2, W2, dy.tanh(dy.affine_transform([b1, W1, x]))])
        #prediction = dy.affine_transform(
        #        [b3, W3, dy.tanh(dy.affine_transform(
        #        [b2, W2, dy.tanh(dy.affine_transform([b1, W1, x])) ] ))])

        loss = dy.squared_distance(prediction, y)

        return prediction, loss
Exemple #8
0
    def encode(self, sent, train_mode=False):
        # encode the root
        # sent.root.vecs['feat'] = self.special[0]

        for token in sent.get_tokens():
            vecs = []
            if 'word' in self.args.features:
                if train_mode and np.random.random(
                ) < 0.01 and token['word'] in self.word_drop_list:
                    word_idx = int(token['word'][0].isupper())
                else:
                    word_idx = self.word_map.get(
                        token['word'], int(token['word'][0].isupper()))

                word_vec = self.word_emb[
                    word_idx] if word_idx else dy.inputVector(
                        np.zeros(self.args.hid_dim))
                vecs.append(word_vec)
            if 'lemma' in self.args.features:
                lemma_idx = 0 if train_mode and np.random.random() < 0.1 and token['lemma'] in self.lemma_drop_list\
                            else self.lemma_map.get(token['lemma'], 0)
                lemma_vec = self.lemma_emb[
                    lemma_idx] if lemma_idx else dy.inputVector(
                        np.zeros(self.args.hid_dim))
                vecs.append(lemma_vec)
            if 'upos' in self.args.features:
                upos_vec = self.upos_emb[self.upos_map.get(token['upos'], 0)]
                vecs.append(upos_vec)
            if 'xpos' in self.args.features:
                vecs.append(self.xpos_emb[self.xpos_map.get(token['xpos'], 0)])
            if 'label' in self.args.features:
                vecs.append(self.label_emb[self.label_map.get(
                    token['label'], 0)])
            if 'char_lstm' in self.args.features:
                char_vecs = [
                    self.char_emb[self.char_map.get(c, 0)]
                    for c in token['clemma']
                ]
                f_vecs = self.char_lstm_f_encoder.initial_state().transduce(
                    char_vecs)
                b_vecs = self.char_lstm_b_encoder.initial_state().transduce(
                    reversed(char_vecs))
                char_vec = dy.concatenate([f_vecs[-1], b_vecs[-1]])
                vecs.append(char_vec)
            # if 'morph' in self.args.features and 'lemma' in self.args.features and 'upos' in self.args.features:
            if 'morph' in self.args.features:
                morph_items = ([token['upos']] if 'upos' in self.args.features
                               else ['<#m?>']) + token['morph']
                morph_input = [
                    self.morph_emb[self.morph_map.get(m, 0)]
                    for m in morph_items
                ]
                morph_vec = self.morph_lstm_encoder.initial_state().transduce(
                    morph_input)[-1]
                vecs.append(morph_vec)
            # token.vecs['feat'] = dy.concatenate(vecs)
            # token.vecs['feat'] = sum(vecs)
            token.vecs['feat'] = dy.dropout(
                sum(vecs), self.args.dropout) if train_mode else sum(vecs)
    def compute_embeddings(self, word, runtime=True):
        x_list = []
        if not isinstance(word, unicode):
            uniword = unicode(word, 'utf-8')
        else:
            import copy
            uniword = copy.deepcopy(word)

        uniword = re.sub('\d', '0', uniword)
        for i in range(len(uniword)):
            char = uniword[i]
            if char.lower() == char and char.upper() == char:
                style_emb = dy.inputVector([1.0, 0.0, 0.0])  # does not support uppercase
            elif char.lower() == char:
                style_emb = dy.inputVector([0.0, 1.0, 0.0])  # is lowercased
            else:
                style_emb = dy.inputVector([0.0, 0.0, 1.0])  # is uppercased

            char = char.lower()
            if char in self.encodings.char2int:
                x_list.append(dy.concatenate([self.character_lookup[self.encodings.char2int[char]], style_emb]))
            else:
                x_list.append(dy.concatenate([self.character_lookup[self.encodings.char2int['<UNK>']], style_emb]))

        rnn_outputs = x_list
        rnn_states_fw = None
        rnn_states_bw = None
        for rnn_fw, rnn_bw in zip(self.rnn_fw, self.rnn_bw):
            fw = []
            bw = []
            if runtime:
                rnn_fw.set_dropouts(0, 0)
                rnn_bw.set_dropouts(0, 0)
            else:
                rnn_fw.set_dropouts(0, 0.33)
                rnn_bw.set_dropouts(0, 0.33)

            rnn_fw = rnn_fw.initial_state()
            rnn_bw = rnn_bw.initial_state()
            rnn_states_fw = []
            rnn_states_bw = []
            for x in rnn_outputs:
                rnn_fw = rnn_fw.add_input(x)
                rnn_states_fw.append(rnn_fw)
                fw.append(rnn_states_fw[-1].output())
            for x in reversed(rnn_outputs):
                rnn_bw = rnn_bw.add_input(x)
                rnn_states_bw.append(rnn_bw)
                bw.append(rnn_states_bw[-1].output())
            rnn_outputs = []
            for x1, x2 in zip(fw, reversed(bw)):
                rnn_outputs.append(dy.concatenate([x1, x2]))

        attention = self._attend(rnn_outputs, rnn_states_fw[-1], rnn_states_bw[-1])

        pre_linear = dy.concatenate([fw[-1], bw[-1], attention])
        embedding = dy.tanh(self.linearW.expr() * pre_linear + self.linearB.expr())

        return embedding, rnn_outputs
Exemple #10
0
def do_one_batch(X_batch, Z_batch):
    # Flatten the batch into 1-D vector for workaround
    batch_size = X_batch.shape[0]
    if DO_BATCH:
        X_batch_f = X_batch.flatten('F')
        Z_batch_f = Z_batch.flatten('F')
        x = dy.reshape(dy.inputVector(X_batch_f), (nmf, nframes),
                       batch_size=batch_size)
        z = dy.reshape(dy.inputVector(Z_batch_f), (nvgg),
                       batch_size=batch_size)
        scnn.add_input([X_batch[i] for i in range(X_batch.shape[0])])
        vgg.add_input([Z_batch[i] for i in range(X_batch.shape[0])])

    else:
        x = dy.matInput(X_batch.shape[0], X_batch.shape[1])
        x.set(X_batch.flatten('F'))
        z = dy.vecInput(Z_batch.shape[0])
        z.set(Z_batch.flatten('F'))
        x = dy.reshape(dy.transpose(x, [1, 0]),
                       (1, X_batch.shape[1], X_batch.shape[0]))
    print(x.npvalue().shape)
    a_h1 = dy.conv2d_bias(x, w_i, b_i, [1, 1], is_valid=False)
    h1 = dy.rectify(a_h1)
    h1_pool = dy.kmax_pooling(h1, D[1], d=1)

    a_h2 = dy.conv2d_bias(h1_pool, w_h1, b_h1, [1, 1], is_valid=False)
    h2 = dy.rectify(a_h2)
    h2_pool = dy.kmax_pooling(h2, D[2], d=1)

    a_h3 = dy.conv2d_bias(h2_pool, w_h2, b_h2, [1, 1], is_valid=False)
    h3 = dy.rectify(a_h3)
    h3_pool = dy.kmax_pooling(h3, D[3], d=1)

    h4 = dy.kmax_pooling(h3_pool, 1, d=1)
    h4_re = dy.reshape(h4, (J[3], ))
    #print(h4_re.npvalue().shape)
    g = dy.scalarInput(1.)
    zem_sp = dy.weight_norm(h4_re, g)
    #print(zem_sp.npvalue().shape)
    zem_vgg = w_embed * z + b_embed
    #print(zem_vgg.npvalue().shape)

    sa = dy.transpose(zem_sp) * zem_vgg
    s = dy.rectify(sa)

    if PRINT_EMBED:
        print('Vgg embedding vector:', zem_vgg.npvalue().shape)
        print(zem_vgg.value())

        print('Speech embedding vector:', zem_sp.npvalue().shape)
        print(zem_sp.value())
    if PRINT_SIM:
        print('Raw Similarity:', sa.npvalue())
        print(sa.value())
        print('Similarity:', s.npvalue())
        print(s.value())

    return s
Exemple #11
0
    def learn(self, wave, mgc, batch_size):
        # disc, wave = self.dio.ulaw_encode(wave)
        # from ipdb import set_trace
        # set_trace()
        last_proc = 0
        dy.renew_cg()
        total_loss = 0
        losses = []
        cnt = 0
        noise = np.random.normal(0, 1.0, (len(wave) + self.UPSAMPLE_COUNT))
        for mgc_index in range(len(mgc)):
            curr_proc = int((mgc_index + 1) * 100 / len(mgc))
            if curr_proc % 5 == 0 and curr_proc != last_proc:
                while last_proc < curr_proc:
                    last_proc += 5
                    sys.stdout.write(' ' + str(last_proc))
                    sys.stdout.flush()

            if mgc_index < len(mgc) - 1:
                output, excitation, filter, vuv = self._predict_one(mgc[mgc_index],
                                                                    noise[
                                                                    self.UPSAMPLE_COUNT * mgc_index:self.UPSAMPLE_COUNT * mgc_index + 2 * self.UPSAMPLE_COUNT])

                # reconstruction error
                t_vect = wave[self.UPSAMPLE_COUNT * mgc_index:self.UPSAMPLE_COUNT * mgc_index + self.UPSAMPLE_COUNT]
                loss = dy.squared_distance(output, dy.inputVector(t_vect))
                # dynamic error
                o1 = dy.pickrange(output, 0, self.UPSAMPLE_COUNT - 1)
                o2 = dy.pickrange(output, 1, self.UPSAMPLE_COUNT)
                delta = o2 - o1
                real_delta = t_vect[1:self.UPSAMPLE_COUNT] - t_vect[0:self.UPSAMPLE_COUNT - 1]
                loss += dy.squared_distance(delta, dy.inputVector(real_delta))
                # excitation error
                # loss += dy.sum_elems(excitation)
                # o1 = dy.pickrange(excitation, 0, self.UPSAMPLE_COUNT - 1)
                # o2 = dy.pickrange(excitation, 1, self.UPSAMPLE_COUNT)
                # loss += dy.sum_elems(dy.abs(o2 - o1))

                losses.append(loss)
                cnt += self.UPSAMPLE_COUNT

            if len(losses) >= batch_size:
                loss = dy.esum(losses)
                total_loss += loss.value()
                loss.backward()
                self.trainer.update()
                losses = []
                dy.renew_cg()

        if len(losses) > 0:
            loss = dy.esum(losses)
            total_loss += loss.value()
            loss.backward()
            self.trainer.update()
            dy.renew_cg()

        return total_loss / cnt
Exemple #12
0
    def forward(self, input, enc_output, teacher_forcing_ratio):
        seq_len = len(input)
        bos_vector = [0.] * self.dec_vocab_size
        bos_vector[2] = 1.
        output = [dy.inputVector(bos_vector)]
        attention_weights = []
        rnn = self.rnn.initial_state([
            dy.inputVector(np.zeros(self.dec_hidden_dim))
            for i in range(2 * self.dec_num_layers)
        ])
        #print("Start forward loop:")
        context, _ = self._attention(rnn.s(), enc_output)
        #context = enc_output[-1]
        # input is a list of ints, starting with 2 "[BOS]" 2 4 5 3
        for i in range(
                0, seq_len - 1
        ):  # we stop when we feed the decoder the [EOS] and we take its output (thus the -1)
            # calculate the context vector at step i.
            # context is [encoder_size], attention_weights is [seq_len] # todo
            context, step_attention_weights = self._attention(
                rnn.s(), enc_output)
            #context = enc_output[-1]
            step_attention_weights = []
            # save attention weights incrementally
            #attention_weights.append(step_attention_weights)

            #if np.random.uniform(0, 1) < teacher_forcing_ratio or i is 0:
            word_embedding = dy.dropout(self.embedding[input[i]],
                                        self.dec_dropout)
            """else:
                #prev_predicted_word_index = np.argmax(lin_output.value()) 
                #index_vector = dy.inputVector(np.arange(self.dec_vocab_size))
                argmax = dy.argmax(lin_output, gradient_mode='zero_gradient')
                
                prev_embedding = dy.dropout(self.embedding*argmax, self.dec_dropout) 
                #prev_predicted_word_index = dy.sum_elems(dy.cmult(index_vector,dy.argmax(lin_output, gradient_mode='zero_gradient')))
                #print(prev_predicted_word_index.value())
                #word_embedding = dy.dropout(self.embedding[prev_predicted_word_index], self.dec_dropout) 
            """
            lstm_input = dy.concatenate([word_embedding, context])

            rnn = rnn.add_input(lstm_input)

            #print("rnn.s has {} vectors of length {}".format(len(rnn.s()), len(rnn.s()[0].value())))
            dec_output = rnn.output()

            # Maps the decoder output to the decoder vocab size space.
            lin_output = self.output_linear_W.expr(
                update=True) * dec_output + self.output_linear_b.expr(
                    update=True)

            output.append(lin_output)
            #print("Step {} predicted index = {}".format(i,np.argmax(lin_output.value())))

        return output, attention_weights
Exemple #13
0
 def __init__(self,cs=None,hs=None,full_vec=None,hidden_dim=None):
     if not None in [full_vec,hidden_dim]:
         length = int(len(full_vec)/2)
         cvec = full_vec[:length]
         hvec = full_vec[length:]
         self.cs = [dy.inputVector(cvec[i*hidden_dim:(i+1)*hidden_dim]) for i in range(int(length/hidden_dim))]
         self.hs = [dy.inputVector(hvec[i*hidden_dim:(i+1)*hidden_dim]) for i in range(int(length/hidden_dim))]
     elif not None in [cs,hs]:            
         self.cs = cs #list of c expressions
         self.hs = hs #list of h expressions
     else:
         raise MissingInput()
Exemple #14
0
    def attend(self, node):
        '''attention mechanism to return a weighted sum of bilstm vectors
        of all words in node '''

        if node.snt_id == -1:  # if node is a pre-defined meta node
            return self.bi_lstm[node.start_word_index_in_doc]

        #print(node.start_word_index_in_doc, node.end_word_index_in_doc)
        vectors = self.bi_lstm[node.start_word_index_in_doc:node.
                               end_word_index_in_doc + 1]

        # build attention on a larger context
        # +/- n words around the current timex/event
        # n = 2
        if node.start_word_index_in_doc <= 0:
            vectors.insert(
                0, dy.inputVector([0 for i in range(self.size_lstm * 2)]))
        else:
            vectors.insert(0, self.bi_lstm[node.start_word_index_in_doc - 1])

        if node.start_word_index_in_doc <= 1:
            vectors.insert(
                0, dy.inputVector([0 for i in range(self.size_lstm * 2)]))
        else:
            vectors.insert(0, self.bi_lstm[node.start_word_index_in_doc - 2])

        if node.end_word_index_in_doc >= len(self.bi_lstm) - 1:
            vectors.append(
                dy.inputVector([0 for i in range(self.size_lstm * 2)]))
        else:
            vectors.append(self.bi_lstm[node.end_word_index_in_doc + 1])

        if node.end_word_index_in_doc >= len(self.bi_lstm) - 2:
            vectors.append(
                dy.inputVector([0 for i in range(self.size_lstm * 2)]))
            #print(node.start_word_index_in_doc, node.end_word_index_in_doc)
            #print(len(self.bi_lstm))
            #print(self.size_lstm)
        else:
            vectors.append(self.bi_lstm[node.end_word_index_in_doc + 2])

        input_mat = dy.concatenate_cols(vectors)

        attn_w = dy.parameter(self.attention_w)

        unnormalized = dy.transpose(dy.tanh(attn_w * input_mat))
        att_weights = dy.softmax(unnormalized)

        weighted_sum = input_mat * att_weights

        return weighted_sum
Exemple #15
0
    def BuildLMGraph(self, sent, sent_args=None):
        dynet.renew_cg()
        init_state = self.rnn.initial_state()

        R = dynet.parameter(self.R)
        bias = dynet.parameter(self.bias)
        errs = []  # will hold expressions
        state = init_state

        for (cw, nw) in zip(sent, sent[1:]):
            cw = self.vocab[cw]
            nw = self.vocab[nw]

            if cw.s in self.pron_dict.pdict:
                fpv = self.pron_dict.pdict[cw.s]
                fpv = dynet.inputVector(fpv)
            else:
                spelling = [
                    self.s2s.src_vocab[letter] for letter in cw.s.upper()
                ]
                embedded_spelling = self.s2s.embed_seq(spelling)
                pron_vector = self.s2s.encode_seq(embedded_spelling)[-1]
                fpv = dynet.nobackprop(pron_vector)

            x_t = fpv
            state = state.add_input(x_t)
            y_t = state.output()
            r_t = bias + (R * y_t)
            err = dynet.pickneglogsoftmax(r_t, int(nw.i))
            errs.append(err)
        nerr = dynet.esum(errs)
        return nerr
Exemple #16
0
 def viterbi(self, observations):
     backpointers = []
     init_pis = [0, 0]
     forward_mess = dy.inputVector(init_pis)
     transitions = [self.transitions[idx] for idx in range(2)]
     for i in range(len(observations) - 1):
         bp_t = []
         pi_t = []
         for next_tag in range(2):
             next_tag_expr = forward_mess + transitions[next_tag]
             next_tag_arr = next_tag_expr.npvalue()
             best_tag_id = np.argmax(next_tag_arr)
             bp_t.append(best_tag_id)
             pi_t.append(dy.pick(next_tag_expr, best_tag_id))
         forward_mess = dy.concatenate(pi_t) + observations[i]
         backpointers.append(bp_t)
     # find the highrst scoring final state and the corresponding score
     best_tag_id = np.argmax(forward_mess.npvalue())
     path_score = dy.pick(forward_mess, best_tag_id)
     # backtracking
     best_path = [best_tag_id]
     for bp_t in reversed(backpointers):
         best_tag_id = bp_t[best_tag_id]
         best_path.append(best_tag_id)
     best_path.pop()
     best_path.reverse()
     return best_path, path_score
Exemple #17
0
 def zero_input(dim):
     """
     Representation for missing elements
     :param dim: dimension of vector to return
     :return: zero vector (as in e.g. Kiperwasser and Goldberg 2016; an alternative could be to learn this value)
     """
     return dy.inputVector(np.zeros(dim, dtype=float))
    def forward(self, observations):
        def log_sum_exp(scores):
            npval = scores.npvalue()
            argmax_score = np.argmax(npval)
            max_score_expr = dy.pick(scores, argmax_score)
            max_score_expr_broadcast = dy.concatenate([max_score_expr] *
                                                      self.tagset_size)
            return max_score_expr + dy.log(
                dy.sum_cols(
                    dy.transpose(dy.exp(scores - max_score_expr_broadcast))))

        init_alphas = [-1e10] * self.tagset_size
        init_alphas[t2i[START_TAG]] = 0
        for_expr = dy.inputVector(init_alphas)
        for obs in observations:
            alphas_t = []
            for next_tag in range(self.tagset_size):
                obs_broadcast = dy.concatenate([dy.pick(obs, next_tag)] *
                                               self.tagset_size)
                next_tag_expr = for_expr + self.transitions[
                    next_tag] + obs_broadcast
                alphas_t.append(log_sum_exp(next_tag_expr))
            for_expr = dy.concatenate(alphas_t)
        terminal_expr = for_expr + self.transitions[t2i["<STOP>"]]
        alpha = log_sum_exp(terminal_expr)
        return alpha
Exemple #19
0
def augment(scores, oracle_index):
    assert isinstance(scores, dy.Expression)
    shape = scores.dim()[0]
    assert len(shape) == 1
    increment = np.ones(shape)
    increment[oracle_index] = 0
    return scores + dy.inputVector(increment)
Exemple #20
0
    def predict(self, sentence):

        context_representations_for_ner_loss, context_representations_for_md_loss = \
            self.get_context_representations(sentence, training=False)

        last_layer_context_representations, _, _ = \
            self.get_last_layer_context_representations(sentence,
                                                        context_representations_for_ner_loss,
                                                        context_representations_for_md_loss)

        if self.parameters['active_models'] in [0, 2, 3]:
            tag_scores = self.calculate_tag_scores(
                last_layer_context_representations)
            # _, decoded_tags = self.crf_module.viterbi_loss(tag_scores,
            #                                                   sentence['tag_ids'])
            observations = [
                dynet.concatenate([obs, dynet.inputVector([-1e10, -1e10])],
                                  d=0) for obs in tag_scores
            ]
            decoded_tags, _ = self.crf_module.viterbi_decoding(observations)
        else:
            decoded_tags = []

        # if self.parameters['integration_mode'] in [1, 2] or self.parameters['active_models'] == 1:
        if self.parameters['active_models'] in [1, 2, 3]:
            morph_analysis_representations, morph_analysis_scores = \
                self.get_morph_analysis_representations_and_scores(sentence,
                                                                   context_representations_for_md_loss)

            selected_morph_analysis_representations = \
                self.disambiguate_morph_analyzes(morph_analysis_scores)
        else:
            selected_morph_analysis_representations = []

        return selected_morph_analysis_representations, decoded_tags
Exemple #21
0
 def viterbi_decoding(self, observations):
     backpointers = []
     init_vvars = [-1e10] * (self.n_tags + 2)
     init_vvars[self.b_id] = 0  # <Start> has all the probability
     for_expr = dynet.inputVector(init_vvars)
     trans_exprs = [self.transitions[idx] for idx in range(self.n_tags + 2)]
     for obs in observations:
         bptrs_t = []
         vvars_t = []
         for next_tag in range(self.n_tags + 2):
             next_tag_expr = for_expr + trans_exprs[next_tag]
             next_tag_arr = next_tag_expr.npvalue()
             best_tag_id = np.argmax(next_tag_arr)
             bptrs_t.append(best_tag_id)
             vvars_t.append(dynet.pick(next_tag_expr, best_tag_id))
         for_expr = dynet.concatenate(vvars_t) + obs
         backpointers.append(bptrs_t)
     # Perform final transition to terminal
     terminal_expr = for_expr + trans_exprs[self.e_id]
     terminal_arr = terminal_expr.npvalue()
     best_tag_id = np.argmax(terminal_arr)
     path_score = dynet.pick(terminal_expr, best_tag_id)
     # Reverse over the backpointers to get the best path
     best_path = [best_tag_id
                  ]  # Start with the tag that was best for terminal
     for bptrs_t in reversed(backpointers):
         best_tag_id = bptrs_t[best_tag_id]
         best_path.append(best_tag_id)
     start = best_path.pop()  # Remove the start symbol
     best_path.reverse()
     assert start == self.b_id
     # Return best path and best path's score
     return best_path, path_score
Exemple #22
0
    def forward(self, observations):
        def log_sum_exp(scores):
            npval = scores.npvalue()
            argmax_score = np.argmax(npval)
            max_score_expr = dy.pick(scores, argmax_score)
            max_score_expr_broadcast = dy.concatenate([max_score_expr] *
                                                      self.dim_output)
            return max_score_expr + dy.log(
                dy.sum_elems(
                    dy.transpose(dy.exp(scores - max_score_expr_broadcast))))

        init_alphas = [-1e10] * self.dim_output
        init_alphas[self.sp_s] = 0
        for_expr = dy.inputVector(init_alphas)
        for obs in observations:
            alphas_t = []
            for next_tag in range(self.dim_output):
                obs_broadcast = dy.concatenate([dy.pick(obs, next_tag)] *
                                               self.dim_output)
                next_tag_expr = for_expr + self.trans[next_tag] + obs_broadcast
                alphas_t.append(log_sum_exp(next_tag_expr))
            for_expr = dy.concatenate(alphas_t)
        terminal_expr = for_expr + self.trans[self.sp_e]
        alpha = log_sum_exp(terminal_expr)
        return alpha
Exemple #23
0
    def _forward(self, emissions):

        """Viterbi forward to calculate all path scores.

        :param emissions: List[dy.Expression]

        Returns:
            dy.Expression ((1,), B)
        """
        init_alphas = [-1e4] * self.n_tags
        init_alphas[self.start_idx] = 0

        alphas = dy.inputVector(init_alphas)
        transitions = self.transitions
        # len(emissions) == T
        for emission in emissions:
            add_emission = dy.colwise_add(transitions, emission)
            scores = dy.colwise_add(dy.transpose(add_emission), alphas)
            # dy.logsumexp takes a list of dy.Expression and computes logsumexp
            # elementwise across the lists so for example the logsumexp is calculated
            # for [0] in each list. This means we want the scores for a given
            # transition scores for a tag to be in the columns
            alphas = dy.logsumexp([x for x in scores])
        last_alpha = alphas + dy.pick(transitions, self.end_idx)
        alpha = dy.logsumexp([x for x in last_alpha])
        return alpha
Exemple #24
0
 def viterbi_decoding(self, observations):
     backpointers = []
     init_vvars = [-1e10] * self.dim_output
     init_vvars[self.sp_s] = 0
     for_expr = dy.inputVector(init_vvars)
     trans_exprs = [self.trans[idx] for idx in range(self.dim_output)]
     for obs in observations:
         bptrs_t = []
         vvars_t = []
         for next_tag in range(self.dim_output):
             next_tag_expr = for_expr + trans_exprs[next_tag]
             next_tag_arr = next_tag_expr.npvalue()
             best_tag_id = np.argmax(next_tag_arr)
             bptrs_t.append(best_tag_id)
             vvars_t.append(dy.pick(next_tag_expr, best_tag_id))
         for_expr = dy.concatenate(vvars_t) + obs
         backpointers.append(bptrs_t)
     terminal_expr = for_expr + trans_exprs[self.sp_e]
     terminal_arr = terminal_expr.npvalue()
     best_tag_id = np.argmax(terminal_arr)
     path_score = dy.pick(terminal_expr, best_tag_id)
     best_path = [best_tag_id]
     for bptrs_t in reversed(backpointers):
         best_tag_id = bptrs_t[best_tag_id]
         best_path.append(best_tag_id)
     start = best_path.pop()
     best_path.reverse()
     assert start == self.sp_s
     return best_path, path_score
Exemple #25
0
    def forward(self, observations):
        def log_sum_exp(scores):
            npval = scores.npvalue()
            argmax_score = np.argmax(npval)
            max_score_expr = dynet.pick(scores, argmax_score)
            max_score_expr_broadcast = dynet.concatenate([max_score_expr] *
                                                         (self.n_tags + 2))
            return max_score_expr + dynet.log(
                dynet.sum_cols(
                    dynet.transpose(
                        dynet.exp(scores - max_score_expr_broadcast))))

        init_alphas = [-1e10] * (self.n_tags + 2)
        init_alphas[self.b_id] = 0
        for_expr = dynet.inputVector(init_alphas)
        for idx, obs in enumerate(observations):
            # print "obs: ", obs.value()
            alphas_t = []
            for next_tag in range(self.n_tags + 2):
                obs_broadcast = dynet.concatenate([dynet.pick(obs, next_tag)] *
                                                  (self.n_tags + 2))
                # print "for_expr: ", for_expr.value()
                # print "transitions next_tag: ", self.transitions[next_tag].value()
                # print "obs_broadcast: ", obs_broadcast.value()

                next_tag_expr = for_expr + self.transitions[
                    next_tag] + obs_broadcast
                alphas_t.append(log_sum_exp(next_tag_expr))
            for_expr = dynet.concatenate(alphas_t)
        terminal_expr = for_expr + self.transitions[self.e_id]
        alpha = log_sum_exp(terminal_expr)
        return alpha
def create_poems(one_hot_vecs,bigram_model,indexed_vocab, line_number):
    model = dy.Model()
    dy.renew_cg()
    vector_size = len(one_hot_vecs)
    input_size = vector_size
    hidden_size = int(vector_size / 100)
    output_size = vector_size
    learning_rate = 0.1

    pW = model.add_parameters((hidden_size, output_size))
    pb = model.add_parameters((hidden_size))
    pU = model.add_parameters((input_size, hidden_size))
    pd = model.add_parameters((output_size))
    trainer = dy.SimpleSGDTrainer(model, learning_rate)

    a= model.populate("my_train.model")
    # print(a)
    for line in range(int(line_number)):
        rand = random.randrange(len(one_hot_vecs))
        predicted_word = ""
        poem = ""
        while predicted_word != "EOL":
            x = dy.inputVector(one_hot_vecs[rand])
            y = pU * dy.tanh(pW * x + pb) + pd
            rand = np.argmax(y.value())
            predicted_word = getWordFromIndexedVocab(indexed_vocab, np.argmax(y.value()))
            if predicted_word == "EOL":
                print()
                poem = poem + "\n"
            else:
                print(predicted_word,end = ' ')
                poem = poem + predicted_word + " "

        print("POEM PERPLEXITY", BigramModel.calculate_bigram_perplexity(bigram_model,poem))
Exemple #27
0
    def getWordEmbeddings(self, sentence, train, options, test_embeddings=defaultdict(lambda:{})):

        if self.elmo:
            # Get full text of sentence - excluding root, which is loaded differently 
            # for transition and graph-based parsers. 
            if options.graph_based:
                sentence_text = " ".join([entry.form for entry in sentence[1:]])
            else:
                sentence_text = " ".join([entry.form for entry in sentence[:-1]])

            elmo_sentence_representation = \
                self.elmo.get_sentence_representation(sentence_text)

        for i, root in enumerate(sentence):
            root.vecs = defaultdict(lambda: None) # all vecs are None by default (possibly a little risky?)
            if options.word_emb_size > 0:
                if train:
                    word_count = float(self.word_counts.get(root.norm, 0))
                    dropFlag = random.random() > word_count/(0.25+word_count)
                    root.vecs["word"] = self.word_lookup[self.words.get(root.norm, 0) if not dropFlag else 0]
                else: # need to check in test_embeddings at prediction time
                    if root.norm in self.words:
                        root.vecs["word"] = self.word_lookup[self.words[root.norm]]
                    elif root.norm in test_embeddings["words"]:
                        root.vecs["word"] = dy.inputVector(test_embeddings["words"][root.norm])
                    else:
                        root.vecs["word"] = self.word_lookup[0]
            if options.pos_emb_size > 0:
                root.vecs["pos"] = self.pos_lookup[self.pos.get(root.cpos,0)]
            if options.char_emb_size > 0:
                root.vecs["char"] = self.get_char_vector(root,train,test_embeddings["chars"])
            if options.tbank_emb_size > 0:
                if options.forced_tbank_emb:
                    treebank_id = options.forced_tbank_emb
                elif root.proxy_tbank:
                    treebank_id = root.proxy_tbank
                else:
                    treebank_id = root.treebank_id
                # this is a bit of a hack for models trained on an old version of the code
                # that used treebank name rather than id as the lookup
                if not treebank_id in self.treebanks and treebank_id in utils.reverse_iso_dict and \
                    utils.reverse_iso_dict[treebank_id] in self.treebanks:
                    treebank_id = utils.reverse_iso_dict[treebank_id]
                root.vecs["treebank"] = self.treebank_lookup[self.treebanks[treebank_id]]
            if self.elmo:
                if i < len(sentence) - 1:
                    # Don't look up the 'root' word
                    root.vecs["elmo"] = elmo_sentence_representation[i]
                else:
                    # TODO
                    root.vecs["elmo"] = dy.zeros(self.elmo.emb_dim)

            root.vec = dy.concatenate(list(filter(None, [root.vecs["word"],
                                                    root.vecs["elmo"],
                                                    root.vecs["pos"],
                                                    root.vecs["char"],
                                                         root.vecs["treebank"]])))

        for bilstm in self.bilstms:
            bilstm.set_token_vecs(sentence,train)
    def pz(self, eq):
        """
		Gumbel softmax on distribution over z.
		"""
        W = dy.parameter(self.W)
        prob = dy.softmax(W * eq)
        gumbel = dy.random_gumbel(self.num_clusters)
        # y = []
        # denom = []
        # for z in range(self.num_clusters):
        # 	pi_i = prob[z]
        # 	g_i = gumbel[z]
        # 	val = dy.exp((dy.log(pi_i)+g_i)/self.temp)
        # 	denom.append(val)
        # denom = dy.esum(denom)

        # for z in range(self.num_clusters):
        # 	pi_i = prob[z]
        # 	g_i = gumbel[z]
        # 	numerator = dy.exp((dy.log(pi_i)+g_i)/self.temp)
        # 	y.append(dy.cdiv(numerator, denom))

        logits = dy.softmax(
            dy.cdiv(dy.esum([prob, gumbel]), dy.inputVector([self.temp])))

        # logits = dy.concatenate(y)
        # print(np.max(logits.npvalue()))
        return logits
Exemple #29
0
    def _attend(self, input_list, decoder_state, last_pos=None):
        w1 = self.att_w1.expr(update=True)
        w2 = self.att_w2.expr(update=True)
        v = self.att_v.expr(update=True)
        attention_weights = []

        w2dt = w2 * dy.concatenate([decoder_state.s()[-1]])
        for input_vector in input_list:
            attention_weight = v * dy.tanh(w1 * input_vector + w2dt)
            attention_weights.append(attention_weight)

        attention_weights = dy.softmax(dy.concatenate(attention_weights))
        # force incremental attention if this is runtime
        if last_pos is not None:
            current_pos = np.argmax(attention_weights.value())
            if current_pos < last_pos or current_pos >= last_pos + 3:
                current_pos = last_pos + 1
                if current_pos >= len(input_list):
                    current_pos = len(input_list) - 1
                output_vectors = input_list[current_pos]
                simulated_att = np.zeros((len(input_list)))
                simulated_att[current_pos] = 1.0
                new_att_vec = dy.inputVector(simulated_att)
                return output_vectors, new_att_vec

        output_vectors = dy.esum([
            vector * attention_weight
            for vector, attention_weight in zip(input_list, attention_weights)
        ])

        return output_vectors, attention_weights
Exemple #30
0
    def learn(self, characters, target_mgc, guided_att=True):
        num_mgc = target_mgc.shape[0]
        # print num_mgc
        dy.renew_cg()
        output_mgc, output_stop, output_attention = self._predict(
            characters, target_mgc)
        losses = []
        index = 0
        for mgc, real_mgc in zip(output_mgc, target_mgc):
            t_mgc = dy.inputVector(real_mgc)
            # losses.append(self._compute_binary_divergence(mgc, t_mgc) )
            losses.append(dy.l1_distance(mgc, t_mgc))

            if index % 3 == 0:
                # attention loss
                if guided_att:
                    att = output_attention[index / 3]
                    losses.append(
                        self._compute_guided_attention(att, index / 3,
                                                       len(characters) + 2,
                                                       num_mgc / 3))
                # EOS loss
                stop = output_stop[index / 3]
                if index >= num_mgc - 6:
                    losses.append(dy.l1_distance(stop, dy.scalarInput(-0.8)))
                else:
                    losses.append(dy.l1_distance(stop, dy.scalarInput(0.8)))
            index += 1
        loss = dy.esum(losses)
        loss_val = loss.value() / num_mgc
        loss.backward()
        self.trainer.update()
        return loss_val
Exemple #31
0
def viterbi(emissions, transition, start_idx, end_idx, norm=False):
    n_tags = emissions[0].dim()[0][0]
    backpointers = []

    inits = [-1e4] * n_tags
    inits[start_idx] = 0
    alphas = dy.inputVector(inits)
    alphas = dy.log_softmax(alphas) if norm else alphas

    for emission in emissions:
        next_vars = dy.colwise_add(dy.transpose(transition), alphas)
        best_tags = np.argmax(next_vars.npvalue(), 0)
        v_t = dy.max_dim(next_vars, 0)
        alphas = v_t + emission
        backpointers.append(best_tags)

    terminal_expr = alphas + dy.pick(transition, end_idx)
    best_tag = np.argmax(terminal_expr.npvalue())
    path_score = dy.pick(terminal_expr, best_tag)

    best_path = [best_tag]
    for bp_t in reversed(backpointers):
        best_tag = bp_t[best_tag]
        best_path.append(best_tag)
    _ = best_path.pop()
    best_path.reverse()
    return best_path, path_score
Exemple #32
0
def generate_poem():
    start = '<s>'
    poem = ''

    wordflag = 0

    for i in range(25):
        dy.renew_cg()

        W = dy.parameter(pW)
        b = dy.parameter(pb)
        U = dy.parameter(pU)
        d = dy.parameter(pd)

        x_val = dy.inputVector(list(one_hot_encoded[word_index[start]]))
        h_val = dy.tanh(W * x_val + b)

        y_val = U * h_val + d

        probs = dy.softmax(y_val)

        poem += start

        poem += ' '
        wordflag += 1

        if wordflag == 5:
            poem += '\n'
            wordflag = 0

        start = weightedChoice(probs.value(), unigrams)
        prob_list.append(probs.__getitem__(word_index[start]).value())
    return poem
Exemple #33
0
 def ergm_score(self):
     """
     :return: ERGM score (dynet Expression) computed based on ERGM weights and features only
     Does not populate any field
     """
     W = dy.parameter(self.ergm_weights)
     f = dy.transpose(dy.inputVector([self.feature_vals[k] for k in self.feature_set]))
     return f * W
def calc_loss(sents):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src_sents = [x[0] for x in sents]
    tgt_sents = [x[1] for x in sents]
    src_cws = []

    src_len = [len(sent) for sent in src_sents]        
    max_src_len = np.max(src_len)
    num_words = 0

    for i in range(max_src_len):
        src_cws.append([sent[i] for sent in src_sents])


    #initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    #get the output of the first LSTM
    src_output = init_state_src.add_inputs([dy.lookup_batch(LOOKUP_SRC, cws) for cws in src_cws])[-1].output()
    #now decode
    all_losses = []

    # Decoder
    #need to mask padding at end of sentence
    tgt_cws = []
    tgt_len = [len(sent) for sent in sents]
    max_tgt_len = np.max(tgt_len)
    masks = []

    for i in range(max_tgt_len):
        tgt_cws.append([sent[i] if len(sent) > i else eos_trg for sent in tgt_sents])
        mask = [(1 if len(sent) > i else 0) for sent in tgt_sents]
        masks.append(mask)
        num_words += sum(mask)



    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])
    prev_words = tgt_cws[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_words, mask in zip(tgt_cws[1:], masks):
        #feed the current state into the 
        current_state = current_state.add_input(dy.lookup_batch(LOOKUP_TRG, prev_words))
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        loss = (dy.pickneglogsoftmax_batch(s, next_words))
        mask_expr = dy.inputVector(mask)
        mask_expr = dy.reshape(mask_expr, (1,),len(sents))
        mask_loss = loss * mask_expr
        all_losses.append(mask_loss)
        prev_words = next_words
    return dy.sum_batches(dy.esum(all_losses)), num_words
Exemple #35
0
def calc_loss(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    trg = sent[1]

    # initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    # get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output()

    # Now compute mean and standard deviation of source hidden state.
    W_mean = dy.parameter(W_mean_p)
    V_mean = dy.parameter(V_mean_p)
    b_mean = dy.parameter(b_mean_p)

    W_var = dy.parameter(W_var_p)
    V_var = dy.parameter(V_var_p)
    b_var = dy.parameter(b_var_p)

    # The mean vector from the encoder.
    mu = mlp(src_output, W_mean, V_mean, b_mean)
    # This is the diagonal vector of the log co-variance matrix from the encoder
    # (regard this as log variance is easier for furture implementation)
    log_var = mlp(src_output, W_var, V_var, b_var)

    # Compute KL[N(u(x), sigma(x)) || N(0, I)]
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    kl_loss = -0.5 * dy.sum_elems(1 + log_var - dy.pow(mu, dy.inputVector([2])) - dy.exp(log_var))

    z = reparameterize(mu, log_var)

    # now step through the output sentence
    all_losses = []

    current_state = LSTM_TRG_BUILDER.initial_state().set_s([z, dy.tanh(z)])
    prev_word = trg[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_word in trg[1:]:
        # feed the current state into the
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        all_losses.append(dy.pickneglogsoftmax(s, next_word))

        prev_word = next_word

    softmax_loss = dy.esum(all_losses)

    return kl_loss, softmax_loss
Exemple #36
0
 def init_features(self, embeddings, train=False):
     """
     Set the value of self.input_reps (and self.empty_rep) given embeddings for the whole input sequence
     :param embeddings: list of [(key, list of vectors embeddings per time step)] per feature
     :param train: are we training now?
     """
     if self.params:
         keys, embeddings = zip(*embeddings)
         inputs = [self.mlp.evaluate(zip(keys, es), train=train) for es in zip(*embeddings)]  # join each time step
         self.config.print("Transducing %d inputs with dropout %s" %
                           (len(inputs), self.dropout if train else "disabled"), level=4)
         self.input_reps = self.transduce(inputs, train)
         expected = min(len(inputs), self.max_length or np.iinfo(int).max)
         assert len(self.input_reps) == expected, \
             "transduce() returned incorrect number of elements: %d != %d" % (len(self.input_reps), expected)
         self.empty_rep = dy.inputVector(np.zeros(self.lstm_layer_dim, dtype=float))
Exemple #37
0
 def generate_inputs(self, features, axis):
     indices = []  # list, not set, in order to maintain consistent order
     for key, values in sorted(features.items()):
         param = self.input_params[key]
         lookup = self.params.get(key)
         if param.numeric:
             yield key, dy.inputVector(values)
         elif param.indexed:  # collect indices to be looked up
             indices += values  # DenseFeatureExtractor collapsed features so there are no repetitions between them
         elif lookup is None:  # ignored
             continue
         else:  # lookup feature
             yield from ((key, self.get_empty_values(key) if x == MISSING_VALUE else lookup[x]) for x in values)
         self.config.print(lambda: "%s: %s" % (key, values), level=4)
     if indices:
         for birnn in self.get_birnns(axis):
             yield from birnn.evaluate(indices)
def calc_sent_loss(sent):
  # Create a computation graph
  dy.renew_cg()

  
  # Get embeddings for the sentence
  emb = [W_w_p[x] for x in sent]

  # Step through the sentence and calculate binary prediction losses
  all_losses = [] 
  for i, my_emb in enumerate(emb):
    scores = dy.logistic(W_c * my_emb)
    pos_words = ([sent[x] if x >= 0 else S for x in range(i-N,i)] +
                 [sent[x] if x < len(sent) else S for x in range(i+1,i+N+1)])
    word_repr = [[float(y) for y in np.binary_repr(x).zfill(nbits)] for x in pos_words]
    word_repr = [dy.inputVector(x) for x in word_repr]
    all_losses.extend([dy.binary_log_loss(scores, x) for x in word_repr])
  return dy.esum(all_losses)
Exemple #39
0
    def BuildLMGraph(self, sents):
        dy.renew_cg()
        # initialize the RNN
        init_state = self.builder.initial_state()
        # parameters -> expressions
        R = dy.parameter(self.R)
        bias = dy.parameter(self.bias)

        S = vocab.w2i["<s>"]
        # get the cids and masks for each step
        tot_chars = 0
        cids = []
        masks = []

        for i in range(len(sents[0])):
            cids.append([(vocab.w2i[sent[i]] if len(sent) > i else S) for sent in sents])
            mask = [(1 if len(sent)>i else 0) for sent in sents]
            masks.append(mask)
            tot_chars += sum(mask)

        # start the rnn with "<s>"
        init_ids = cids[0]
        s = init_state.add_input(lookup_batch(self.lookup, init_ids))

        losses = []

        # feed char vectors into the RNN and predict the next char
        for cid, mask in zip(cids[1:], masks[1:]):
            score = dy.affine_transform([bias, R, s.output()])
            loss = dy.pickneglogsoftmax_batch(score, cid)
            # mask the loss if at least one sentence is shorter
            if mask[-1] != 1:
                mask_expr = dy.inputVector(mask)
                mask_expr = dy.reshape(mask_expr, (1,), len(sents))
                loss = loss * mask_expr

            losses.append(loss)
            # update the state of the RNN
            cemb = dy.lookup_batch(self.lookup, cid)
            s = s.add_input(cemb)

        return dy.sum_batches(dy.esum(losses)), tot_chars
Exemple #40
0
def calc_lm_loss(sents):
    dy.renew_cg()

    # initialize the RNN
    f_init = RNN.initial_state()

    # get the wids and masks for each step
    tot_words = 0
    wids = []
    masks = []
    for i in range(len(sents[0])):
        wids.append([(sent[i] if len(sent) > i else S) for sent in sents])
        mask = [(1 if len(sent) > i else 0) for sent in sents]
        masks.append(mask)
        tot_words += sum(mask)

    # start the rnn by inputting "<s>"
    init_ids = [S] * len(sents)
    s = f_init.add_input(dy.lookup_batch(WORDS_LOOKUP, init_ids))

    # feed word vectors into the RNN and predict the next word
    losses = []
    for wid, mask in zip(wids, masks):
        # calculate the softmax and loss
        score = dy.affine_transform([b_exp, W_exp, s.output()])
        loss = dy.pickneglogsoftmax_batch(score, wid)
        # mask the loss if at least one sentence is shorter
        if mask[-1] != 1:
            mask_expr = dy.inputVector(mask)
            mask_expr = dy.reshape(mask_expr, (1,), len(sents))
            loss = loss * mask_expr
        losses.append(loss)
        # update the state of the RNN
        wemb = dy.lookup_batch(WORDS_LOOKUP, wid)
        s = s.add_input(wemb)

    return dy.sum_batches(dy.esum(losses)), tot_words
Exemple #41
0
 def get_empty_values(self, key):
     value = self.empty_values.get(key)
     if value is None:
         self.empty_values[key] = value = dy.inputVector(np.zeros(self.input_params[key].dim, dtype=float))
     return value
Exemple #42
0
  alpha = 0.05  # smoothing of training loss for reporting
  start = time.time()
  dev_time = 0
  report = args.minibatch_size * 30
  dev_report = args.minibatch_size * 600
  for epoch in range(50):
    random.shuffle(training)
    print(("Epoch {} starting".format(epoch+1)))
    i = 0
    while i < len(training):
      dy.renew_cg()
      mbsize = min(args.minibatch_size, len(training) - i)
      minibatch = training[i:i+mbsize]
      losses = []
      for lbl, img in minibatch:
        x = dy.inputVector(img)
        logits = classify(x, dropout=True)
        loss = dy.pickneglogsoftmax(logits, lbl)
        losses.append(loss)
      mbloss = dy.esum(losses) / mbsize
      mbloss.backward()
      sgd.update()

      # eloss is an exponentially smoothed loss.
      if eloss is None:
        eloss = mbloss.scalar_value()
      else:
        eloss = mbloss.scalar_value() * alpha + eloss * (1.0 - alpha)

      # Do dev evaluation here:
      if (i > 0) and (i % dev_report == 0):
def calc_loss(sents):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src_sents = [x[0] for x in sents]
    tgt_sents = [x[1] for x in sents]
    src_cws = []

    src_len = [len(sent) for sent in src_sents]        
    max_src_len = np.max(src_len)
    num_words = 0

    for i in range(max_src_len):
        src_cws.append([sent[i] for sent in src_sents])


    #get the outputs of the first LSTM
    src_outputs = [dy.concatenate([x.output(), y.output()]) for x,y in LSTM_SRC.add_inputs([dy.lookup_batch(LOOKUP_SRC, cws) for cws in src_cws])]
    src_output = src_outputs[-1]

    #gets the parameters for the attention
    src_output_matrix = dy.concatenate_cols(src_outputs)
    w1_att_src = dy.parameter(w1_att_src_p)
    fixed_attentional_component = w1_att_src * src_output_matrix

    #now decode
    all_losses = []

    # Decoder
    #need to mask padding at end of sentence
    tgt_cws = []
    tgt_len = [len(sent) for sent in sents]
    max_tgt_len = np.max(tgt_len)
    masks = []

    for i in range(max_tgt_len):
        tgt_cws.append([sent[i] if len(sent) > i else eos_trg for sent in tgt_sents])
        mask = [(1 if len(sent) > i else 0) for sent in tgt_sents]
        masks.append(mask)
        num_words += sum(mask)



    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])
    prev_words = tgt_cws[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    W_m = dy.parameter(W_m_p)
    b_m = dy.parameter(b_m_p)

    for next_words, mask in zip(tgt_cws[1:], masks):
        #feed the current state into the 
        current_state = current_state.add_input(dy.lookup_batch(LOOKUP_TRG, prev_words))
        output_embedding = current_state.output()
        att_output, _ = calc_attention(src_output_matrix, output_embedding, fixed_attentional_component)
        middle_expr = dy.tanh(dy.affine_transform([b_m, W_m, dy.concatenate([output_embedding, att_output])]))
        s = dy.affine_transform([b_sm, W_sm, middle_expr])
        loss = (dy.pickneglogsoftmax_batch(s, next_words))
        mask_expr = dy.inputVector(mask)
        mask_expr = dy.reshape(mask_expr, (1,),len(sents))
        mask_loss = loss * mask_expr
        all_losses.append(mask_loss)
        prev_words = next_words
    return dy.sum_batches(dy.esum(all_losses)), num_words