Esempio n. 1
0
    def calculate_LM_loss(self, sequence):

        # Renew the computation graph
        dy.renew_cg()

        # Initialize the RNN
        f_init = self.RNN.initial_state()

        # Initialize the parameters
        W_exp = dy.parameter(self.W_sm)
        b_exp = dy.parameter(self.b_sm)

        # Get the ids for ICD codes
        wids = [self.vw.w2i[w] for w in sequence]
        #print wids
        #print wids[0]
        #print dy.lookup(self.lookup, wids[0])

        # Start the RNN
        s = f_init.add_input(dy.lookup(self.lookup, wids[-1]))

        # Feed the vectors into the RNN and predict the next code
        losses = []
        for wid in wids:
            score = W_exp * s.output() + b_exp
            loss = dy.pickneglogsoftmax(score, wid)
            losses.append(loss)
            s = s.add_input(self.lookup[wid])
        return dy.esum(losses)
Esempio n. 2
0
def build_tagging_graph_lvl1(words, tags, builders):
    dy.renew_cg()
    f_init, b_init = [b.initial_state() for b in builders]

    wembs = [E[w] for w in words]
    wembs = [dy.noise(we, 0.1) for we in wembs]

    fw = [x.output() for x in f_init.add_inputs(wembs)]
    bw = [x.output() for x in b_init.add_inputs(reversed(wembs))]

    # fw_rnn_hidden_outs = [x.value() for x in fw]
    # bw_rnn_hidden_outs = [x.value() for x in bw]

    # print ("Transducing")
    # fw_rnn_hidden_outs = f_init.transduce(wembs)
    # bw_rnn_hidden_outs = b_init.transduce(reversed(wembs))

    if MLP:
        H = dy.parameter(pH)
        O = dy.parameter(pO)
    else:
        O = dy.parameter(pO)
    errs = []
    for f, b, t in zip(fw, reversed(bw), tags):
        f_b = dy.concatenate([f, b])
        if MLP:
            r_t = O * (dy.tanh(H * f_b))
        else:
            r_t = O * f_b
        err = dy.pickneglogsoftmax(r_t, t)
        errs.append(err)

    return {'err': dy.esum(errs), 'fw': fw, 'bw': bw}
Esempio n. 3
0
def softmax(
        edges,
        labels_exprs,
        label_dict,  # type: Dictionary
        is_train):
    labeled_edges = []
    loss = dn.scalarInput(0.0)
    for edge, r_scores_expr in zip(edges, labels_exprs):
        head, label, modifier = edge
        if head == 0:
            if not is_train:
                labeled_edges.append(
                    graph_utils.Edge(edge.source, "ROOT", edge.target))
            continue
        if is_train:
            gold_label_index = label_dict.word_to_int[label]
            loss += dn.pickneglogsoftmax(r_scores_expr, gold_label_index)
        else:
            label_index = np.argmax(r_scores_expr.value())
            label = label_dict.int_to_word[label_index]
            labeled_edges.append(
                graph_utils.Edge(edge.source, label, edge.target))
    if is_train:
        return loss
    else:
        return labeled_edges
def trainAlgo(train_tokens, train_labels, num_epochs, num_batches_training,
              batch_size, w2i, embedding_parameters, pW, pb, modelPath,
              RNN_unit, trainer, RNN_model):
    # i = epoch index
    # j = batch index
    # k = sentence index (inside batch j)
    # l = token index (inside sentence k)

    epoch_losses = []
    overall_accuracies = []
    sentence_accuracies = []

    start_train_time = time.clock()
    for i in range(num_epochs):
        epoch_loss = []
        print("Starting epoch: " + str(i + 1))
        start_epoch_time = time.clock()
        for j in range(num_batches_training):
            # begin a clean computational graph
            dy.renew_cg()
            # build the batch
            batch_tokens = train_tokens[j * batch_size:(j + 1) * batch_size]
            batch_labels = train_labels[j * batch_size:(j + 1) * batch_size]
            # iterate through the batch
            for k in range(len(batch_tokens)):
                # prepare input: words to indexes
                seq_of_idxs = words2indexes(batch_tokens[k], w2i)
                # make a forward pass
                preds = forward_pass(seq_of_idxs, embedding_parameters, pW, pb,
                                     RNN_unit)
                # calculate loss for each token in each example
                loss = [
                    dy.pickneglogsoftmax(preds[l], batch_labels[k][l])
                    for l in range(len(preds))
                ]
                # sum the loss for each token
                sent_loss = dy.esum(loss)
                # backpropogate the loss for the sentence
                sent_loss.backward()
                trainer.update()
                epoch_loss.append(sent_loss.npvalue())
                # print("epoch: " + str(i+1) + " batch: " + str(j+1) + " loss: " + str(np.average(epoch_loss)) + "\r")
        # record epoch loss
        epoch_losses.append(np.sum(epoch_loss))
        print("Train loss after epoch: " + str(i + 1) + " loss: " +
              str(np.average(epoch_loss)))
        print("Epoch " + str(i + 1) + " Time Taken: " +
              str(time.clock() - start_epoch_time))
        # get accuracy on test set
        # # print("Train loss after epoch {}".format(i + 1))
        # epoch_predictions = test(train_tokens, train_labels, num_batches_training, w2i, embedding_parameters, pW, pb)
        # epoch_overall_accuracy, epoch_sentence_accuracy = evaluate(epoch_predictions, train_labels)
        # overall_accuracies.append(epoch_overall_accuracy)
        # sentence_accuracies.append(epoch_sentence_accuracy)

    print("Training Completed. Time taken: " +
          str(time.clock() - start_train_time))
    print("Saving model in " + str(modelPath))
    RNN_model.save(modelPath)
    print("Done!")
def Train(instances, itercount):
    dy.renew_cg()
    ontoparser.initialize_graph_nodes(train=True)

    loss = []
    errors = 0.0
    for instance in instances:
        fexpr, sexpr, groundtruth = instance
        # context insensitive embeddings or local embeddings
        subtype = [sb.lower()
                   for sb in fexpr.split()]  #if sb.lower() not in stop]
        supertype = [sp.lower()
                     for sp in sexpr.split()]  #if sp.lower() not in stop]
        fembs, DSTATUS_X = ontoparser.get_linear_embd(subtype)
        sembs, DSTATUS_Y = ontoparser.get_linear_embd(supertype)

        #if (DSTATUS_X is False) or (DSTATUS_Y is False): continue
        fembs = fembs[0] if len(fembs) == 1 else dy.average(fembs)
        sembs = sembs[0] if len(sembs) == 1 else dy.average(sembs)

        x = dy.concatenate([fembs, sembs])

        #e_dist = dy.squared_distance(fembs, sembs)
        e_dist = 1 - distance.cosine(fembs.npvalue(), sembs.npvalue())
        #weighted_x = x * e_dist
        output = ontoparser.W2 * (dy.rectify(ontoparser.W1 * x) +
                                  ontoparser.b1) + ontoparser.b2

        prediction = np.argmax(output.npvalue())
        loss.append(
            dy.pickneglogsoftmax(output, ontoparser.meta.tdmaps[groundtruth]))
        #if ((ontoparser.meta.rmaps[prediction] == "Hypernym") and ("Hypernym" != groundtruth)) and (e_dist < 0.5):
        #    loss[-1] += -log(0.6)
        errors += 0 if groundtruth == ontoparser.meta.rmaps[prediction] else 1
    return loss, errors
Esempio n. 6
0
def build_tagging_graph_lvl2(embeds, words, tags, builders):
    # dy.renew_cg()
    f_init, b_init = [b.initial_state() for b in builders]

    # wembs = [E[w] for w in words]
    # wembs = [dy.noise(we,0.1) for we in wembs]

    fw = [x.output() for x in f_init.add_inputs(embeds)]
    bw = [x.output() for x in b_init.add_inputs(reversed(embeds))]

    # fw = [x.output() for x in f_init.add_inputs(wembs)]
    # bw = [x.output() for x in b_init.add_inputs(reversed(wembs))]

    # fw = f_init.transduce(embeds)
    # bw = b_init.transduce(reversed(embeds))

    if MLP:
        H = dy.parameter(pH)
        O = dy.parameter(pO)
    else:
        O = dy.parameter(pO)
    errs = []
    for f, b, t in zip(fw, reversed(bw), tags):
        f_b = dy.concatenate([f, b])
        if MLP:
            r_t = O * (dy.tanh(H * f_b))
        else:
            r_t = O * f_b
        err = dy.pickneglogsoftmax(r_t, t)
        errs.append(err)

    return dy.esum(errs)
def calc_lm_loss(sent):

    dy.renew_cg()
    # parameters -> expressions
    W_exp = dy.parameter(W_sm)
    b_exp = dy.parameter(b_sm)

    # initialize the RNN
    f_init = RNN.initial_state()

    # get the word ids
    wids = [vw.w2i[w] for w in sent]

    # start the rnn by inputting "<s>"
    s = f_init.add_input(WORDS_LOOKUP[wids[-1]]) 

    # feed word vectors into the RNN and predict the next word
    losses = []
    for wid in wids:
        # calculate the softmax and loss
        score = W_exp * s.output() + b_exp
        loss = dy.pickneglogsoftmax(score, wid)
        losses.append(loss)
        # update the state of the RNN
        s = s.add_input(WORDS_LOOKUP[wid]) 
    
    return dy.esum(losses)
Esempio n. 8
0
    def decode_to_loss(self, vectors, output):
        w = dy.parameter(self.w_softmax)
        b = dy.parameter(self.b_softmax)
        w1 = dy.parameter(self.attention_source)
        output = list(output)

        encoded_states = dy.concatenate_cols(vectors)

        prev_output_embeddings = self.target_lookup[self.eos_target]
        current_state = self.decoder.initial_state().add_input(
            dy.concatenate(
                [dy.vecInput(self.hidden_size * 2), prev_output_embeddings]))
        losses = []
        attentional_component = w1 * encoded_states
        for next_word in output:

            vector = dy.concatenate([
                self.attention(encoded_states, current_state,
                               attentional_component), prev_output_embeddings
            ])

            current_state = current_state.add_input(vector)
            s = dy.affine_transform([b, w, current_state.output()])
            item_loss = dy.pickneglogsoftmax(s, next_word)
            losses.append(item_loss)
            prev_output_embeddings = self.target_lookup[next_word]

        loss = dy.esum(losses)
        return loss
Esempio n. 9
0
def sent_loss_precalc(words, tags, vecs):
    errs = []
    for v, t in zip(vecs, tags):
        tid = vt.w2i[t]
        err = dy.pickneglogsoftmax(v, tid)
        errs.append(err)
    return dy.esum(errs)
Esempio n. 10
0
    def encode(self, instance, wids):
        dy.renew_cg()
        W_y = dy.parameter(self.W_y)
        b_y = dy.parameter(self.b_y)
        #        print "chceking wids here",wids["about"]
        src_sent = instance.split()
        #print "printing src sentnce length", len(src_sent)
        losses = []
        total_words = 0

        # Encoder
        enc_state = self.enc_builder.initial_state()
        for current_word in src_sent:
            state = enc_state.add_input(self.src_lookup[wids[current_word]])
            encoded = (W_y * state.output()) + b_y

        dec_state = self.dec_builder.initial_state()
        dec_state = self.dec_builder.initial_state(encoded)
        errs = []
        # Calculate losses for decoding
        for (cw, nw) in zip(src_sent, src_sent[1:]):
            dec_state = dec_state.add_input(
                self.tgt_lookup[wids[current_word]])
            decoded = dec_state.output()
            ystar = (W_y * dec_state.output()) + b_y
            print "current word is >>>>>>>", cw
            print "next word shud be", nw
            #loss = dy.pickneglogsoftmax(ystar, wids[nw])
            for wid in wids:
                loss = dy.pickneglogsoftmax(ystar, wids[wid])
                print "Loss for ", wid, " w.r.t ", nw, " is ", loss.value()
Esempio n. 11
0
    def build_nnlm_graph(self, dictionary):
        dy.renew_cg()
        M = self.model.add_lookup_parameters((len(self.wids), self.EMB_SIZE))
        W_mh = self.model.add_parameters(
            (self.HID_SIZE, self.EMB_SIZE * (self.N - 1)))
        b_hh = self.model.add_parameters((self.HID_SIZE))
        W_hs = self.model.add_parameters((len(self.wids), self.HID_SIZE))
        b_s = self.model.add_parameters((len(self.wids)))

        w_xh = dy.parameter(W_mh)
        b_h = dy.parameter(b_hh)
        W_hy = dy.parameter(W_hs)
        b_y = dy.parameter(b_s)
        errs = []
        for context, next_word in dictionary:
            #print context, next_word
            k = M[self.wids[context.split()[0]]]
            kk = M[self.wids[context.split()[1]]]
            #print k , kk
            #print k.value()
            x = k.value() + kk.value()
            #print x
            h_val = dy.tanh(w_xh * dy.inputVector(x) + b_h)
            y_val = W_hy * h_val + b_y
            err = dy.pickneglogsoftmax(y_val, self.wids[next_word])
            errs.append(err)
        gen_err = dy.esum(errs)
        return gen_err
Esempio n. 12
0
 def sent_loss(self, words, tags, ltags):
     self.eval = False
     vecs = self.build_tagging_graph(words, ltags)
     for v, t in zip(vecs, tags):
         tid = self.meta.t2i[t]
         err = dy.pickneglogsoftmax(v, tid)
         self.loss.append(err)
Esempio n. 13
0
def calc_loss(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    trg = sent[1]

    #initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    #get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x]
                                            for x in src])[-1].output()
    #now step through the output sentence
    all_losses = []

    current_state = LSTM_TRG_BUILDER.initial_state().set_s(
        [src_output, dy.tanh(src_output)])
    prev_word = trg[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_word in trg[1:]:
        #feed the current state into the
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        all_losses.append(dy.pickneglogsoftmax(s, next_word))

        prev_word = next_word
    return dy.esum(all_losses)
Esempio n. 14
0
def train():

    # i = epoch index
    # j = batch index
    # k = sentence index (inside batch j)
    # l = token index (inside sentence k)

    for i in range(num_epochs):
        random.seed(i+100)
        random.shuffle(train_tokens) 
        random.seed(i+100)
        random.shuffle(train_labels) 
        for j in range(num_batches_training):
            # begin a clean computational graph
            dy.renew_cg()
            # build the batch
            batch_tokens = train_tokens[j*batch_size:(j+1)*batch_size]
            batch_labels = train_labels[j*batch_size:(j+1)*batch_size]
            # iterate through the batch
            for k in range(len(batch_tokens)):
                # prepare input: words to indexes
                seq_of_idxs = words2indexes(batch_tokens[k], w2i)
                # make a forward pass
                preds = forward_pass(seq_of_idxs)
                # calculate loss for each token in each example
                loss = [dy.pickneglogsoftmax(preds[l], batch_labels[k][l]) for l in range(len(preds))]
                # sum the loss for each token
                sent_loss = dy.esum(loss)
                # backpropogate the loss for the sentence
                sent_loss.backward()
                trainer.update()
Esempio n. 15
0
    def decode(self, states, y, encoded_input, train=False):
        def sample(probs):
            return np.argmax(probs)

        s = self.decoder_rnn.initial_state()

        start_encoded = self.l2e["sep"].encode("<s>")
        out = []
        loss = dy.scalarInput(0.)
        #s =  s.add_input(states[-1]) #s.add_input(dy.concatenate([start_encoded, states[-1]]))
        s = s.add_input(dy.concatenate([start_encoded, states[-1]]))

        generated_string = []

        for char in y:
            true_char_encoded = self.l2e["l"].encode(char)

            scores = self.predict_letter(s.output())

            generated_string.append(scores)

            weighted_states = self.attend(s.output(), states, encoded_input)
            #s = s.add_input(weighted_states) #s.add_input(dy.concatenate([true_char_encoded, weighted_states]))
            s = s.add_input(
                dy.concatenate([true_char_encoded, weighted_states]))
            if char in self.C2I:
                loss += dy.pickneglogsoftmax(scores, self.C2I[char])

        return loss, generated_string
Esempio n. 16
0
def calc_loss(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    trg = sent[1]


    #initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    #get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output()
    #now step through the output sentence
    all_losses = []

    current_state = LSTM_TRG_BUILDER.initial_state().set_s([src_output, dy.tanh(src_output)])
    prev_word = trg[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_word in trg[1:]:
        #feed the current state into the 
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        all_losses.append(dy.pickneglogsoftmax(s, next_word))

        prev_word = next_word
    return dy.esum(all_losses)
Esempio n. 17
0
    def train_sentence(self, words, word_idxs):
        dy.renew_cg()
        forward_init, backward_init = [
            b.initial_state() for b in self.builders
        ]
        embed_words = words.tensor
        # entities = words.ents
        forward = forward_init.transduce(embed_words)
        backward = backward_init.transduce(reversed(embed_words))

        errors = []
        encodings = []
        good = bad = 0.0
        for f, b, tag in zip(forward, backward, word_idxs):
            r_t = self(dy.concatenate([f, b]))
            temp_val = dy.softmax(r_t).value()
            chosen = np.argmax(temp_val)
            encodings.append(temp_val)
            good += 1 if chosen == tag else 0
            bad += 1 if chosen != tag else 0
            error = dy.pickneglogsoftmax(r_t, tag)
            errors.append(error)

        sum_errors = dy.esum(errors)
        loss = sum_errors.scalar_value()
        sum_errors.backward()
        self.trainer.update()
        accuracy = 100 * (good / (good + bad))
        print(str(accuracy), str(loss))

        return encodings
Esempio n. 18
0
def calc_lm_loss(sent):
    dy.renew_cg()
    # parameters -> expressions
    W_exp = dy.parameter(W_sm)
    b_exp = dy.parameter(b_sm)

    # initialize the RNN
    f_init = RNN.initial_state()

    # get the wids and masks for each step
    tot_words = len(sent)

    # start the rnn by inputting "<s>"
    s = f_init.add_input(WORDS_LOOKUP[S])

    # feed word vectors into the RNN and predict the next word
    losses = []
    for wid in sent:
        # calculate the softmax and loss
        score = W_exp * s.output() + b_exp
        loss = dy.pickneglogsoftmax(score, wid)
        losses.append(loss)
        # update the state of the RNN
        wemb = WORDS_LOOKUP[wid]
        s = s.add_input(wemb)

    return dy.esum(losses), tot_words
Esempio n. 19
0
def Train(sentence, epoch, dynamic=True):
    parser.eval = False
    if parser.meta.palgo in ['standard', 'swap']:
        configuration = Configuration(sentence, standard=True)
    else:
        configuration = Configuration(sentence)
    pr_bi_exps, pos_errs = parser.feature_extraction(sentence[1:-1])
    while not parser.transitionSystem.inFinalState(configuration):
        xo = parser.predict(configuration, pr_bi_exps)
        if parser.meta.palgo in ['swap', 'standard']: # Static Oracle
            goldTransitionFunc, goldLabel = parser.transitionSystem.LabelledAction(configuration)
            goldTransition = goldTransitionFunc.__name__
            parser.loss.append(dy.pickneglogsoftmax(xo, parser.meta.td2i[(goldTransition, goldLabel)]))
            goldTransitionFunc(configuration, goldLabel)
        else: # Dynamic Oracle
            output_probs = dy.softmax(xo).npvalue()
            ranked_actions = sorted(zip(output_probs, range(len(output_probs))), reverse=True)
            pscore, paction = ranked_actions[0]

            #{0: <bound method arceager.SHIFT>}
            validTransitions, allmoves = parser.transitionSystem.get_valid_transitions(configuration) 
            while parser.transitionSystem.action_cost(\
                    configuration, parser.meta.i2td[paction], parser.meta.transitions, validTransitions) > 500:
               ranked_actions = ranked_actions[1:]
               pscore, paction = ranked_actions[0]

            gaction = None
            for i,(score, ltrans) in enumerate(ranked_actions):
               cost = parser.transitionSystem.action_cost(\
                            configuration, parser.meta.i2td[ltrans], parser.meta.transitions, validTransitions)
               if cost == 0:
                  gaction = ltrans
                  need_update = (i > 0)
                  break

            gtransitionstr, goldLabel = parser.meta.i2td[gaction]
            ptransitionstr, predictedLabel = parser.meta.i2td[paction]
            if dynamic and (epoch > 2) and (np.random.random() < 0.9):
               predictedTransitionFunc = allmoves[parser.meta.transitions[ptransitionstr]]
               predictedTransitionFunc(configuration, predictedLabel)
            else:
               goldTransitionFunc = allmoves[parser.meta.transitions[gtransitionstr]]
               goldTransitionFunc(configuration, goldLabel)
            parser.loss.append(dy.pickneglogsoftmax(xo, parser.meta.td2i[(gtransitionstr, goldLabel)])) #NOTE original

    parser.loss.extend(pos_errs)
Esempio n. 20
0
def calc_loss(words, tags, holder):
    vecs = build_graph(words, holder)
    losses = []
    for v, t in zip(vecs, tags):
        tid = holder.tag2index[t]
        loss = dy.pickneglogsoftmax(v, tid)
        losses.append(loss)
    return dy.esum(losses)
Esempio n. 21
0
    def get_loss(self, sentence):
        scores = self.propogate(sentence)
        errs = []
        for i, score in enumerate(scores):
            root_err = dy.pickneglogsoftmax(score, 0)
            errs.append(root_err)

        return dy.esum(errs)
Esempio n. 22
0
def sent_loss(words, tags):
    vecs = build_tagging_graph(words)
    errs = []
    for v,t in zip(vecs,tags):
        tid = vt.w2i[t]
        err = dy.pickneglogsoftmax(v, tid)
        errs.append(err)
    return dy.esum(errs)
Esempio n. 23
0
 def create_network_return_loss(self,
                                inputs,
                                expected_output,
                                dropout=False):
     out = self(inputs, dropout)
     loss = dy.pickneglogsoftmax(out, expected_output)
     # loss = -dy.log(dy.pick(out, expected_output))
     return loss
def sent_loss(words, tags):
    vecs = build_tagging_graph(words)
    losses = []
    for v, t in zip(vecs, tags):
        tid = vt.w2i[t]
        loss = dy.pickneglogsoftmax(v, tid) # cross entropy loss
        losses.append(loss)
        return dy.esum(losses) # esum is max pooling?
Esempio n. 25
0
 def calc_loss(self, context, ref_action):
   scores = self.get_scores(context)
   # single mode
   if not Batcher.is_batched(ref_action):
     return dy.pickneglogsoftmax(scores, ref_action)
   # minibatch mode
   else:
     return dy.pickneglogsoftmax_batch(scores, ref_action)
Esempio n. 26
0
def sent_loss(words, tags):
    vecs = build_tagging_graph(words)
    errs = []
    for v,t in zip(vecs,tags):
        tid = vt.w2i[t]
        err = dy.pickneglogsoftmax(v, tid)
        errs.append(err)
    return dy.esum(errs)
Esempio n. 27
0
def calc_loss(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    trg = sent[1]

    # initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    # get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x]
                                            for x in src])[-1].output()

    # Now compute mean and standard deviation of source hidden state.
    W_mean = dy.parameter(W_mean_p)
    V_mean = dy.parameter(V_mean_p)
    b_mean = dy.parameter(b_mean_p)

    W_var = dy.parameter(W_var_p)
    V_var = dy.parameter(V_var_p)
    b_var = dy.parameter(b_var_p)

    # The mean vector from the encoder.
    mu = mlp(src_output, W_mean, V_mean, b_mean)
    # This is the diagonal vector of the log co-variance matrix from the encoder
    # (regard this as log variance is easier for furture implementation)
    log_var = mlp(src_output, W_var, V_var, b_var)

    # Compute KL[N(u(x), sigma(x)) || N(0, I)]
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    kl_loss = -0.5 * dy.sum_elems(1 + log_var -
                                  dy.pow(mu, dy.inputVector([2])) -
                                  dy.exp(log_var))

    z = reparameterize(mu, log_var)

    # now step through the output sentence
    all_losses = []

    current_state = LSTM_TRG_BUILDER.initial_state().set_s([z, dy.tanh(z)])
    prev_word = trg[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_word in trg[1:]:
        # feed the current state into the
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        all_losses.append(dy.pickneglogsoftmax(s, next_word))

        prev_word = next_word

    softmax_loss = dy.esum(all_losses)

    return kl_loss, softmax_loss
Esempio n. 28
0
    def train(self, epochs=30):

        n = self.generator.get_train_size()
        print "size of training set: ", n
        print "Training..."

        iteration = 0
        losses = []
        loss_avg = 0.

        for i, batch in enumerate(self.generator.generate(mode="train")):

            dy.renew_cg()

            for j, training_example in enumerate(batch):

                x, y, data_sample = training_example
                preds = self._predict(x, training=True)

                loss = dy.scalarInput(0.)

                for agr, pred in zip(self.agreements, preds):

                    true_number = y[agr]
                    loss += dy.pickneglogsoftmax(pred, true_number)

                losses.append(loss)

                if iteration % n % 2500 == 0:
                    print "{}/{}".format(iteration % n, n)

                iteration += 1

                #stopping criteria

                if iteration > epochs * n:

                    return

                # report progress.

                if iteration % n == 0:

                    print "EPOCH {} / {}".format(iteration / n, epochs)
                    print "Average loss: {}".format(loss_avg / n)
                    loss_avg = 0.
                    self.evaluate(mode="dev")
                    #self.collector.collect()
                    losses = []

            if losses:

                loss_sum = dy.esum(losses)
                loss_sum.forward()
                loss_sum.backward()
                self.trainer.update()
                loss_avg += loss_sum.value()
                losses = []
Esempio n. 29
0
def Train(sentence, epoch, dynamic=True):
    loss = []
    totalError = 0
    parser.eval = False
    configuration = Configuration(sentence)
    pr_bi_exps, pos_errs = parser.feature_extraction(sentence[1:-1])
    while not parser.isFinalState(configuration):
        rfeatures = parser.basefeaturesEager(configuration.nodes,
                                             configuration.stack,
                                             configuration.b0)
        xi = dy.concatenate([
            pr_bi_exps[id - 1] if id > 0 else parser.pad
            for id, rform in rfeatures
        ])
        xh = parser.pr_W1 * xi
        xh = dy.rectify(xh) + parser.pr_b1
        xo = parser.pr_W2 * xh + parser.pr_b2
        output_probs = dy.softmax(xo).npvalue()
        ranked_actions = sorted(zip(output_probs, range(len(output_probs))),
                                reverse=True)
        pscore, paction = ranked_actions[0]

        validTransitions, allmoves = parser.get_valid_transitions(
            configuration)  #{0: <bound method arceager.SHIFT>}
        while parser.action_cost(configuration, parser.meta.i2td[paction],
                                 parser.meta.transitions,
                                 validTransitions) > 500:
            ranked_actions = ranked_actions[1:]
            pscore, paction = ranked_actions[0]

        gaction = None
        for i, (score, ltrans) in enumerate(ranked_actions):
            cost = parser.action_cost(configuration, parser.meta.i2td[ltrans],
                                      parser.meta.transitions,
                                      validTransitions)
            if cost == 0:
                gaction = ltrans
                need_update = (i > 0)
                break

        gtransitionstr, goldLabel = parser.meta.i2td[gaction]
        ptransitionstr, predictedLabel = parser.meta.i2td[paction]
        if dynamic and (epoch > 2) and (np.random.random() < 0.9):
            predictedTransitionFunc = allmoves[
                parser.meta.transitions[ptransitionstr]]
            predictedTransitionFunc(configuration, predictedLabel)
        else:
            goldTransitionFunc = allmoves[
                parser.meta.transitions[gtransitionstr]]
            goldTransitionFunc(configuration, goldLabel)
        loss.append(
            dy.pickneglogsoftmax(
                xo,
                parser.meta.td2i[(gtransitionstr, goldLabel)]))  #NOTE original

        if need_update: totalError += 1

    return dy.esum(loss) + dy.esum(pos_errs), totalError
Esempio n. 30
0
    def train(self, train_file, epochs):
        loss_values = []

        for i in range(epochs):
            print 'started epoch', (i + 1)
            losses = []
            train_data = open(train_file, 'r').read().strip().split('\n')

            # shuffle the training data.
            random.shuffle(train_data)

            step = 0
            for line in train_data:
                fields = line.strip().split(' ')
                # label here means action y, lazy to modify original start code
                features, label = fields[:-1], fields[-1]
                gold_label = self.vocab.action2id(label)
                result = self.build_graph(features)

                # getting loss with respect to negative log softmax function and the gold label.
                loss = dynet.pickneglogsoftmax(result, gold_label)

                # appending to the minibatch losses
                losses.append(loss)
                step += 1

                if len(losses) >= self.properties.minibatch_size:
                    # now we have enough loss values to get loss for minibatch
                    minibatch_loss = dynet.esum(losses) / len(losses)

                    # calling dynet to run forward computation for all minibatch items
                    minibatch_loss.forward()

                    # getting float value of the loss for current minibatch
                    minibatch_loss_value = minibatch_loss.value()

                    # printing info and plotting
                    loss_values.append(minibatch_loss_value)
                    if len(loss_values) % 10 == 0:
                        progress = round(100 * float(step) / len(train_data),
                                         2)
                        print 'current minibatch loss', minibatch_loss_value, 'progress:', progress, '%'

                    # calling dynet to run backpropagation
                    minibatch_loss.backward()

                    # calling dynet to change parameter values with respect to current backpropagation
                    self.updater.update()

                    # empty the loss vector
                    losses = []

                    # refresh the memory of dynet
                    dynet.renew_cg()

            # there are still some minibatch items in the memory but they are smaller than the minibatch size
            # so we ask dynet to forget them
            dynet.renew_cg()
Esempio n. 31
0
 def _build_lm_graph(self, sent):
     state = self.builder.initial_state()
     errs = []
     for (cw, nw) in zip(sent, sent[1:]):
         emb = dy.lookup(self.embs, cw)
         state = state.add_input(emb)
         scores = self._get_scores(state)
         errs.append(dy.pickneglogsoftmax(scores, nw))
     return errs
Esempio n. 32
0
 def sent_loss(self, sent):
     words, tags = map(list, zip(*sent))
     vecs = self.build_tagging_graph(words)
     errs = []
     for v, t in zip(vecs, tags):
         tid = self.vt.w2i[t]
         err = dy.pickneglogsoftmax(v, tid)
         errs.append(err)
     return dy.esum(errs)
Esempio n. 33
0
def calc_loss(sent):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    src = sent[0]
    trg = sent[1]

    # initialize the LSTM
    init_state_src = LSTM_SRC_BUILDER.initial_state()

    # get the output of the first LSTM
    src_output = init_state_src.add_inputs([LOOKUP_SRC[x] for x in src])[-1].output()

    # Now compute mean and standard deviation of source hidden state.
    W_mean = dy.parameter(W_mean_p)
    V_mean = dy.parameter(V_mean_p)
    b_mean = dy.parameter(b_mean_p)

    W_var = dy.parameter(W_var_p)
    V_var = dy.parameter(V_var_p)
    b_var = dy.parameter(b_var_p)

    # The mean vector from the encoder.
    mu = mlp(src_output, W_mean, V_mean, b_mean)
    # This is the diagonal vector of the log co-variance matrix from the encoder
    # (regard this as log variance is easier for furture implementation)
    log_var = mlp(src_output, W_var, V_var, b_var)

    # Compute KL[N(u(x), sigma(x)) || N(0, I)]
    # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
    kl_loss = -0.5 * dy.sum_elems(1 + log_var - dy.pow(mu, dy.inputVector([2])) - dy.exp(log_var))

    z = reparameterize(mu, log_var)

    # now step through the output sentence
    all_losses = []

    current_state = LSTM_TRG_BUILDER.initial_state().set_s([z, dy.tanh(z)])
    prev_word = trg[0]
    W_sm = dy.parameter(W_sm_p)
    b_sm = dy.parameter(b_sm_p)

    for next_word in trg[1:]:
        # feed the current state into the
        current_state = current_state.add_input(LOOKUP_TRG[prev_word])
        output_embedding = current_state.output()

        s = dy.affine_transform([b_sm, W_sm, output_embedding])
        all_losses.append(dy.pickneglogsoftmax(s, next_word))

        prev_word = next_word

    softmax_loss = dy.esum(all_losses)

    return kl_loss, softmax_loss
Esempio n. 34
0
def transduce(seq,Y):
    seq = [E[i] for i in seq]
    fw = fwR.initial_state().transduce(seq)

    # this UNUSED part affects strategy 2
    XXX = fwR2.initial_state().transduce([E[3],E[5]])

    W = W_.expr()
    outs = [W*z for z in fw]
    losses = [dy.pickneglogsoftmax(o,y) for o,y in zip(outs,Y)]
    s = dy.esum(losses)
    return s
Esempio n. 35
0
def calc_sent_loss(sent):
  # Create a computation graph
  dy.renew_cg()
  # The initial history is equal to end of sentence symbols
  hist = [S] * N
  # Step through the sentence, including the end of sentence token
  all_losses = []
  for next_word in sent + [S]:
    s = calc_score_of_history(hist)
    all_losses.append(dy.pickneglogsoftmax(s, next_word))
    hist = hist[1:] + [next_word]
  return dy.esum(all_losses)
Esempio n. 36
0
 def sent_lm_loss(self, sent):
   rnn_cur = self.rnn.initial_state()
   losses = []
   prev_word = self.start
   for word in sent:
     x_t = self.embeddings[prev_word]
     rnn_cur = rnn_cur.add_input(x_t)
     logits = dy.affine_transform([self.lb,
                                   self.h2l,
                                   rnn_cur.output()])
     losses.append(dy.pickneglogsoftmax(logits, word))
     prev_word = word
   return dy.esum(losses)
Esempio n. 37
0
    def BuildLMGraph(self, sent):
        dy.renew_cg()
        init_state = self.builder.initial_state()

        errs = [] # will hold expressions
        es=[]
        state = init_state
        inputs = [self.lookup[int(cw)] for cw in sent[:-1]]
        expected_outputs = [int(nw) for nw in sent[1:]]
        outputs = state.transduce(inputs)
        r_ts = ((self.bias + (self.R * y_t)) for y_t in outputs)
        errs = [dy.pickneglogsoftmax(r_t, eo) for r_t, eo in zip(r_ts, expected_outputs)]
        nerr = dy.esum(errs)
        return nerr
Esempio n. 38
0
def calc_sent_loss(sent):
  # Create a computation graph
  dy.renew_cg()
  
  #add padding to the sentence equal to the size of the window
  #as we need to predict the eos as well, the future window at that point is N past it 
  padded_sent = [S] * N + sent + [S] * N
  padded_emb = [W_c_p[x] for x in padded_sent]

  # Step through the sentence
  all_losses = [] 
  for i in range(N,len(sent)+N):
    c = dy.esum(padded_emb[i-N:i] + padded_emb[i+1:i+N+1])
    s = W_w * c
    all_losses.append(dy.pickneglogsoftmax(s, padded_sent[i]))
  return dy.esum(all_losses)
Esempio n. 39
0
    def build_lm_graph(self, sent):
        dy.renew_cg()
        init_state = self.builder.initial_state()

        errs = [] # will hold expressions
        es=[]
        state = init_state
        for (cw,nw) in zip(sent,sent[1:]):
            # assume word is already a word-id
            x_t = dy.lookup(self.lookup, int(cw))
            state = state.add_input(x_t)
            y_t = state.output()
            r_t = self.bias + (self.R * y_t)
            err = dy.pickneglogsoftmax(r_t, int(nw))
            errs.append(err)
        nerr = dy.esum(errs)
        return nerr
Esempio n. 40
0
def calc_reinforce_loss(words, tags, delta):
    dy.renew_cg()

    # Transduce all batch elements with an LSTM
    word_reps = LSTM.transduce([LOOKUP[x] for x in words])

    # Softmax scores
    W = dy.parameter(W_sm)
    b = dy.parameter(b_sm)

    #calculate the probability distribution 
    scores = [dy.affine_transform([b, W, x]) for x in word_reps]
    losses = [dy.pickneglogsoftmax(score, tag) for score, tag in zip(scores, tags)]
    probs = [-dy.exp(loss).as_array() for loss in losses]

    #then take samples from the probability distribution
    samples = [np.random.choice(range(len(x)), p=x) for x in probs]

    #calculate accuracy=reward
    correct = [sample == tag for sample, tag in zip(samples, tags)]
    r_i = float(sum(correct))/len(correct)
    r = dy.constant((1), r_i)
    # Reward baseline for each word
    W_bl = dy.parameter(W_bl_p)
    b_bl = dy.parameter(b_bl_p)
    r_b = [dy.affine_transform([b_bl, W_bl, dy.nobackprop(x)]) for x in word_reps]

    #we need to take the value in order to break the computation graph
    #as the reward portion is trained seperatley and not backpropogated through during the overall score
    rewards_over_baseline = [(r - dy.nobackprop(x)) for x in r_b]
    #the scores for training the baseline
    baseline_scores = [dy.square(r - x) for x in r_b]

    #then calculate the reinforce scores using reinforce
    reinforce_scores = [r_s*score for r_s, score in zip(rewards_over_baseline, scores)]

    #we want the first len(sent)-delta scores from xent then delta scores from reinforce
    #for mixer
    if len(scores) > delta:
        mixer_scores = scores[:len(scores)-delta] + reinforce_scores[delta-1:]
    else:
        mixer_scores = reinforce_scores
    return dy.esum(mixer_scores), dy.esum(baseline_scores)
Esempio n. 41
0
 def loss(self, input_, y):
     if self.batched:
         return dy.pickneglogsoftmax_batch(input_, y)
     return dy.pickneglogsoftmax(input_, y)
Esempio n. 42
0
def calc_loss(scores, tags):
    losses = [dy.pickneglogsoftmax(score, tag) for score, tag in zip(scores, tags)]
    return dy.esum(losses)
Esempio n. 43
0
    def train(self, train_file, epochs):
        # matplotlib config
        loss_values = []
        plt.ion()
        ax = plt.gca()
        ax.set_xlim([0, 10])
        ax.set_ylim([0, 3])
        plt.title("Loss over time")
        plt.xlabel("Minibatch")
        plt.ylabel("Loss")

        for i in range(epochs):
            print('started epoch', (i+1))
            losses = []
            train_data = open(train_file, 'r').read().strip().split('\n')

            # shuffle the training data.
            random.shuffle(train_data)

            step = 0
            for line in train_data:

                fields = line.strip().split()
                features, label = fields[:-1], fields[-1]
                gold_label = self.vocab.action2id(label)
                result = self.build_graph(features)

                # getting loss with respect to negative log softmax function and the gold label.
                loss = dynet.pickneglogsoftmax(result, gold_label)

                # appending to the minibatch losses
                losses.append(loss)
                step += 1

                if len(losses) >= self.properties.minibatch_size:
                    # now we have enough loss values to get loss for minibatch
                    minibatch_loss = dynet.esum(losses) / len(losses)

                    # calling dynet to run forward computation for all minibatch items
                    minibatch_loss.forward()

                    # getting float value of the loss for current minibatch
                    minibatch_loss_value = minibatch_loss.value()

                    # printing info and plotting
                    loss_values.append(minibatch_loss_value)
                    if len(loss_values)%10==0:
                        ax.set_xlim([0, len(loss_values)+10])
                        ax.plot(loss_values)
                        plt.draw()
                        plt.pause(0.0001)
                        progress = round(100 * float(step) / len(train_data), 2)
                        print('current minibatch loss', minibatch_loss_value, 'progress:', progress, '%')

                    # calling dynet to run backpropagation
                    minibatch_loss.backward()

                    # calling dynet to change parameter values with respect to current backpropagation
                    self.updater.update()

                    # empty the loss vector
                    losses = []

                    # refresh the memory of dynet
                    dynet.renew_cg()

            # there are still some minibatch items in the memory but they are smaller than the minibatch size
            # so we ask dynet to forget them
            dynet.renew_cg()
Esempio n. 44
0
 def calc_loss(self, scores, axis, true, importance):
     ret = [i * dy.pickneglogsoftmax(scores, t) for t, i in zip(true, importance)]
     if self.loss == "max_margin":
         ret.append(dy.max_dim(dy.log_softmax(scores, restrict=list(set(range(self.num_labels[axis])) - set(true)))))
     return ret
Esempio n. 45
0
 def create_network_return_loss(self, inputs, expected_output, dropout=False):
     out = self(inputs, dropout)
     loss = dy.pickneglogsoftmax(out, expected_output)
     # loss = -dy.log(dy.pick(out, expected_output))
     return loss
def compute_loss(model, char_lookup, feat_lookup, R, bias, encoder_frnn, encoder_rrnn, decoder_rnn, W_c, W__a, U__a, v__a, lemma, feats, word, alphabet_index, feat_index,
                 feature_types):
    pc.renew_cg()

    # read the parameters
    # char_lookup = model["char_lookup"]
    # feat_lookup = model["feat_lookup"]
    # R = pc.parameter(model["R"])
    # bias = pc.parameter(model["bias"])
    # W_c = pc.parameter(model["W_c"])
    # W__a = pc.parameter(model["W__a"])
    # U__a = pc.parameter(model["U__a"])
    # v__a = pc.parameter(model["v__a"])

    R = pc.parameter(R)
    bias = pc.parameter(bias)
    W_c = pc.parameter(W_c)
    W__a = pc.parameter(W__a)
    U__a = pc.parameter(U__a)
    v__a = pc.parameter(v__a)

    blstm_outputs = encode_feats_and_chars(alphabet_index, char_lookup, encoder_frnn, encoder_rrnn, feat_index,
                                           feat_lookup, feats, feature_types, lemma)

    # initialize the decoder rnn
    s_0 = decoder_rnn.initial_state()
    s = s_0

    # set prev_output_vec for first lstm step as BEGIN_WORD
    prev_output_vec = char_lookup[alphabet_index[BEGIN_WORD]]
    loss = []
    padded_word = word + END_WORD

    # run the decoder through the output sequence and aggregate loss
    for i, output_char in enumerate(padded_word):

        # get current h of the decoder
        s = s.add_input(prev_output_vec)
        decoder_rnn_output = s.output()

        attention_output_vector, alphas, W = attend(blstm_outputs, decoder_rnn_output, W_c, v__a, W__a, U__a)

        # compute output probabilities
        # print 'computing readout layer...'
        readout = R * attention_output_vector + bias

        if output_char in alphabet_index:
            current_loss = pc.pickneglogsoftmax(readout, alphabet_index[output_char])
        else:
            current_loss = pc.pickneglogsoftmax(readout, alphabet_index[UNK])

        # print 'computed readout layer'
        loss.append(current_loss)

        # prepare for the next iteration - "feedback"
        # TODO: add "input feeding" - the attention_output_vector is also concatenated to the next decoder input
        if output_char in alphabet_index:
            prev_output_vec = char_lookup[alphabet_index[output_char]]
        else:
            prev_output_vec = char_lookup[alphabet_index[UNK]]

    total_sequence_loss = pc.esum(loss)
    # loss = average(loss)

    return total_sequence_loss
Esempio n. 47
0
b = model.add_parameters((ntags))                # Softmax bias

# A function to calculate scores for one value
def calc_scores(words):
  # Create a computation graph, and add parameters
  dy.renew_cg()
  # Take the sum of all the embedding vectors for each word
  score = dy.esum([dy.lookup(W, x) for x in words])
  # Add the bias vector and return
  return score + b

for ITER in range(100):
  # Perform training
  random.shuffle(train)
  train_loss = 0.0
  start = time.time()
  for words, tag in train:
    my_loss = dy.pickneglogsoftmax(calc_scores(words), tag)
    train_loss += my_loss.value()
    my_loss.backward()
    trainer.update()
  print("iter %r: train loss/sent=%.4f, time=%.2fs" % (ITER, train_loss/len(train), time.time()-start))
  # Perform testing
  test_correct = 0.0
  for words, tag in dev:
    scores = calc_scores(words).npvalue()
    predict = np.argmax(scores)
    if predict == tag:
      test_correct += 1
  print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))
Esempio n. 48
0
def node_iteration(rel, g, node, opts, assoc_model, trainer, log_file, is_source):
    """
    Perform one iteration of trying to score a node's neighbors above negative samples.
    """
    
    # true instances likelihood
    trues = targets(g, node) if is_source else sources(g, node)
    side = '->' if is_source else '<-'
    if len(trues) == 0: return 0.0
    
    if opts.debug:
        dy.renew_cg(immediate_compute = True, check_validity = True)
    else:
        dy.renew_cg()
    
    # compute association score as dynet expression (can't do this above due to staleness)
    true_scores = []
    for tr in trues:
        if is_source:
            j_assoc_score = assoc_model.word_assoc_score(node, tr, rel)
        else:
            j_assoc_score = assoc_model.word_assoc_score(tr, node, rel)
        if log_file is not None:
            log_file.write('{} {}\tTRUE_{}\t{:.3e}\n'\
                         .format(node, side, tr, j_assoc_score.scalar_value()))
        true_scores.append(j_assoc_score)


    # false targets likelihood - negative sampling (uniform)
    # collect negative samples
    if opts.nll:
        sample_scores = [[ts] for ts in true_scores]
    else:
        margins = []
    neg_samples = [np.random.choice(range(N)) for _ in range(opts.neg_samp * len(trues))]
    # remove source and true targets if applicable
    for t in [node] + trues:
        if t in neg_samples:
            neg_samples.remove(t)
            neg_samples.append(np.random.choice(range(N)))
    for (i,ns) in enumerate(neg_samples):
        # compute association score as dynet expression
        if is_source:
            ns_assoc_score = assoc_model.word_assoc_score(node, ns, rel)
        else:
            ns_assoc_score = assoc_model.word_assoc_score(ns, node, rel)
        if log_file is not None:
            log_file.write('{} {}\tNEG_{}\t{:.3e}\n'\
                         .format(node, side, ns, ns_assoc_score.scalar_value()))
        corresponding_true = i // opts.neg_samp
        if opts.nll:
            sample_scores[corresponding_true].append(ns_assoc_score)
        else:
            # TODO maybe use dy.hinge()
            ctt_score = true_scores[corresponding_true]
            margin = ctt_score - ns_assoc_score
            margins.append(dy.rectify(dy.scalarInput(1.0) - margin))


    # compute overall loss
    if opts.nll:
        if len(sample_scores) == 0:
            dy_loss = dy.scalarInput(0.0)
        else:
            dy_loss = dy.esum([dy.pickneglogsoftmax(dy.concatenate(scrs), 0) for scrs in sample_scores])
    else:
        if len(margins) == 0:
            dy_loss = dy.scalarInput(0.0)
        else:
            dy_loss = dy.esum(margins)
    sc_loss = dy_loss.scalar_value()
    if log_file is not None:
        log_file.write('{}\tLOSS\t{:.3e}\n'\
                         .format(node, sc_loss))
                         
    # backprop and recompute score
    if opts.v > 1:
        timeprint('overall loss for relation {}, node {} as {} = {:.6f}'\
                  .format(rel, node, 'source' if is_source else 'target', sc_loss))

    dy_loss.backward()
    trainer.update()

    return sc_loss
Esempio n. 49
0
  dev_time = 0
  report = args.minibatch_size * 30
  dev_report = args.minibatch_size * 600
  for epoch in range(50):
    random.shuffle(training)
    print(("Epoch {} starting".format(epoch+1)))
    i = 0
    while i < len(training):
      dy.renew_cg()
      mbsize = min(args.minibatch_size, len(training) - i)
      minibatch = training[i:i+mbsize]
      losses = []
      for lbl, img in minibatch:
        x = dy.inputVector(img)
        logits = classify(x, dropout=True)
        loss = dy.pickneglogsoftmax(logits, lbl)
        losses.append(loss)
      mbloss = dy.esum(losses) / mbsize
      mbloss.backward()
      sgd.update()

      # eloss is an exponentially smoothed loss.
      if eloss is None:
        eloss = mbloss.scalar_value()
      else:
        eloss = mbloss.scalar_value() * alpha + eloss * (1.0 - alpha)

      # Do dev evaluation here:
      if (i > 0) and (i % dev_report == 0):
        confusion = [[0 for _ in range(10)] for _ in range(10)]
        correct = 0
Esempio n. 50
0
    return ngrams

for ITER in range(10):
    # Perform training
    random.shuffle(train)
    train_loss = 0.0
    train_correct = 0.0
    start = time.time()
    for _, wids, tag in train:
        scores = calc_scores(wids)
        predict = np.argmax(scores.npvalue())
        if predict == tag:
            train_correct += 1

        my_loss = dy.pickneglogsoftmax(scores, tag)
        train_loss += my_loss.value()
        my_loss.backward()
        trainer.update()
    print("iter %r: train loss/sent=%.4f, acc=%.4f, time=%.2fs" % (ITER, train_loss/len(train), train_correct/len(train), time.time()-start))
    # Perform testing
    test_correct = 0.0
    for _, wids, tag in dev:
        scores = calc_scores(wids).npvalue()
        predict = np.argmax(scores)
        if predict == tag:
            test_correct += 1
    print("iter %r: test acc=%.4f" % (ITER, test_correct/len(dev)))


for words, wids, tag in dev: