Exemple #1
0
def calc_sent_loss(sent):
    # Create a computation graph
    dy.renew_cg()

    # Get embeddings for the sentence
    emb = [W_w_p[x] for x in sent]

    # Sample K negative words for each predicted word at each position
    all_neg_words = np.random.choice(nwords,
                                     size=2 * N * K * len(emb),
                                     replace=True,
                                     p=word_probabilities)

    # W_w = dy.parameter(W_w_p)
    # Step through the sentence and calculate the negative and positive losses
    all_losses = []
    for i, my_emb in enumerate(emb):
        neg_words = all_neg_words[i * K * 2 * N:(i + 1) * K * 2 * N]
        pos_words = (
            [sent[x] if x >= 0 else S for x in range(i - N, i)] +
            [sent[x] if x < len(sent) else S for x in range(i + 1, i + N + 1)])
        neg_loss = -dy.log(
            dy.logistic(
                -dy.dot_product(my_emb, dy.lookup_batch(W_c_p, neg_words))))
        pos_loss = -dy.log(
            dy.logistic(
                dy.dot_product(my_emb, dy.lookup_batch(W_c_p, pos_words))))
        all_losses.append(dy.sum_batches(neg_loss) + dy.sum_batches(pos_loss))
    return dy.esum(all_losses)
Exemple #2
0
    def train(self, rnnlm, train_quatrains, dev_quatrains):
        min_dev_loss = sys.maxsize
        for i in tqdm(range(self.epochs), desc='Training'):

            losses = []
            tqdm.write('Epoch {}'.format(i))
            total_loss = 0
            state = rnnlm.initialize()

            for count, quatrain in enumerate(train_quatrains):
                for token, (next_word, _, _, _) in zip(quatrain, quatrain[1:]):
                    state, probs = rnnlm.add_input(state, token)
                    loss = -dy.log(dy.pick(probs, next_word))
                    losses.append(loss)

                if count % self.BATCH_SIZE == 0:
                    loss = dy.esum(losses)
                    total_loss += loss.value()
                    loss.backward()
                    self.trainer.update()
                    losses = []
                    dy.renew_cg()
                    state = rnnlm.initialize()

                #if (count + 1)% 4 == 0:
                #  dy.renew_cg()
                #  state = rnnlm.initialize()

            dev_loss = 0
            state = rnnlm.initialize()
            for count, quatrain in enumerate(dev_quatrains):
                for token, (next_word, _, _, _) in zip(quatrain, quatrain[1:]):
                    state, probs = rnnlm.add_input(state, token)
                    loss = -dy.log(dy.pick(probs, next_word))
                    dev_loss += loss.value()
                if (count + 1) % 4 == 0:
                    dy.renew_cg()
                    state = rnnlm.initialize()
            tqdm.write('Dev loss: {}'.format(dev_loss))
            if dev_loss < min_dev_loss:
                tqdm.write('Best dev loss. Saving parameters...')
                self.pc.save('model.pt')
                min_loss = dev_loss
            else:
                tqdm.write('Not brst dev loss. Restarting with smaller...')
                self.lr = self.lr * .5
                self.trainer.restart(self.lr)

            tqdm.write('Training Loss: {}'.format(total_loss))
            rnnlm.generate(rnnlm.initialize())
Exemple #3
0
def decode(dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)
    w1 = dy.parameter(attention_w1)
    input_mat = dy.concatenate_cols(vectors)
    w1dt = None

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE*2), last_output_embeddings]))
    loss = []

    for char in output:
        # w1dt can be computed and cached once for the entire decoding phase
        w1dt = w1dt or w1 * input_mat
        vector = dy.concatenate([attend(input_mat, s, w1dt), last_output_embeddings])
        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = output_lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
    loss = dy.esum(loss)
    return loss
def calc_sent_loss(sent):
  # Create a computation graph
  dy.renew_cg()
  
  # Get embeddings for the sentence
  emb = [W_w_p[x] for x in sent]

  # Sample K negative words for each predicted word at each position
  all_neg_words = np.random.choice(nwords, size=2*N*K*len(emb), replace=True, p=word_probabilities)

  # W_w = dy.parameter(W_w_p)
  # Step through the sentence and calculate the negative and positive losses
  all_losses = [] 
  for i, my_emb in enumerate(emb):
    neg_words = all_neg_words[i*K*2*N:(i+1)*K*2*N]
    pos_words = ([sent[x] if x >= 0 else S for x in range(i-N,i)] +
                 [sent[x] if x < len(sent) else S for x in range(i+1,i+N+1)])
    neg_loss = -dy.log(dy.logistic(-dy.dot_product(my_emb, dy.lookup_batch(W_c_p, neg_words))))
    pos_loss = -dy.log(dy.logistic(dy.dot_product(my_emb, dy.lookup_batch(W_c_p, pos_words))))
    all_losses.append(dy.sum_batches(neg_loss) + dy.sum_batches(pos_loss))
  return dy.esum(all_losses)
Exemple #5
0
def decode(dec_lstm, vectors, output):
    output = [EOS] + list(output) + [EOS]
    output = [char2int[c] for c in output]

    w = dy.parameter(decoder_w)
    b = dy.parameter(decoder_b)

    last_output_embeddings = output_lookup[char2int[EOS]]
    s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE*2), last_output_embeddings]))
    loss = []
    for char in output:
        vector = dy.concatenate([attend(vectors, s), last_output_embeddings])

        s = s.add_input(vector)
        out_vector = w * s.output() + b
        probs = dy.softmax(out_vector)
        last_output_embeddings = output_lookup[char]
        loss.append(-dy.log(dy.pick(probs, char)))
    loss = dy.esum(loss)
    return loss
Exemple #6
0
def create_network_return_loss(inputs, expected_output):
    '''
    inputs is a list of numbers
    '''
    dy.renew_cg()
    W = dy.parameter(pW) # from parameters to expressions
    b = dy.parameter(pB)
    
    if(len(inputs) > documentLength):
       inputs = inputs[0:documentLength]
    
    emb_vectors = [lookup[i] for i in inputs]
    
    while(len(emb_vectors) < documentLength):
        pad = dy.vecInput(embDimension)
        pad.set(np.zeros(embDimension))
        emb_vectors.append(pad)
    
    net_input = dy.concatenate(emb_vectors)
    net_output = dy.softmax( (W*net_input) + b)
    loss = -dy.log(dy.pick(net_output, expected_output))
    return loss
Exemple #7
0
def CalculateLossForWord(word_obj, fValidation=False, fRunning=False):
    dy.renew_cg()

    if not fRunning: gold_lang = word_obj['tag']
    # add a bos before and after
    seq = ['*BOS*'] + list(word_obj['word']) + ['*BOS*']

    # get all the char encodings for the daf
    char_embeds = [let_enc(let) for let in seq]

    # run it through the bilstm
    char_bilstm_outputs = bilstm(char_embeds)
    bilistm_output = dy.concatenate([char_bilstm_outputs[0],char_bilstm_outputs[-1]])

    mlp_input = bilistm_output
    mlp_out = lang_mlp(mlp_input)
    predicted_lang = lang_tags[np.argmax(mlp_out.npvalue())]
    confidence = (mlp_out.npvalue()[:2] / np.sum(mlp_out.npvalue()[:2])).tolist() #skip ambiguous
    # if we aren't doing validation, calculate the loss
    if not fValidation and not fRunning:
        loss = -dy.log(dy.pick(mlp_out, gold_lang))
    # otherwise, set the answer to be the argmax
    elif not fRunning and fValidation:
        loss = None
        lang_conf_matrix(np.argmax(mlp_out.npvalue()), gold_lang)
    else:
        return predicted_lang,confidence

    pos_prec = 1 if predicted_lang == lang_tags[gold_lang] else 0

    tagged_word = { 'word': word_obj['word'], 'tag': predicted_lang, 'confidence':confidence, 'gold_tag':lang_tags[gold_lang]}

    if fValidation:
        return pos_prec, tagged_word

    return loss, pos_prec
def CalculateLossForDaf(daf, fValidation=False, fRunning=False):
    dy.renew_cg()
    tagged_daf = {"words":[],"file":daf["file"]}
    daf = daf["words"]

    # add a bos before and after
    seq = ['*BOS*'] + list(' '.join([word for word, _, _, _ in daf])) + ['*BOS*']

    # get all the char encodings for the daf
    char_embeds = [let_enc(let) for let in seq]

    # run it through the bilstm
    char_bilstm_outputs = bilstm(char_embeds)

    # now iterate and get all the separate word representations by concatenating the bilstm output
    # before and after the word
    word_bilstm_outputs = []
    iLet_start = 0
    for iLet, char in enumerate(seq):
        # if it is a bos, check if it's at the end of the sequence
        if char == '*BOS*':
            if iLet + 1 == len(seq):
                char = ' '
            else:
                continue
        # if we are at a space, take this bilstm output and the one at the letter start
        if char == ' ':
            cur_word_bilstm_output = dy.concatenate([char_bilstm_outputs[iLet_start], char_bilstm_outputs[iLet]])
            # add it in
            word_bilstm_outputs.append(cur_word_bilstm_output)

            # set the iLet_start ocunter to here
            iLet_start = iLet

    # safe-check, make sure word bilstm outputs length is the same as the daf
    if len(word_bilstm_outputs) != len(daf):
        log_message('Size mismatch!! word_bilstm_outputs: ' + str(len(word_bilstm_outputs)) + ', daf: ' + str(len(daf)))

    prev_pos_lstm_state = prev_pos_lstm.initial_state().add_input(pos_enc('*BOS*'))

    all_losses = []
    pos_prec = 0.0
    rough_pos_prec = 0.0
    pos_items = 0
    class_prec = 0.0
    class_items = 0.0
    # now iterate through the bilstm outputs, and each word in the daf
    for (word, gold_word_class, gold_word_pos, gold_word_lang), bilstm_output in zip(daf, word_bilstm_outputs):
        should_backprop = gold_word_class == 1

        # create the mlp input, a concatenate of the bilstm output and of the prev pos output
        mlp_input = dy.concatenate([bilstm_output, prev_pos_lstm_state.output()])

        # run through the class mlp
        class_mlp_output = class_mlp(mlp_input)

        predicted_word_class = np.argmax(class_mlp_output.npvalue())
        confidence = np.max(class_mlp_output.npvalue()) / np.sum(class_mlp_output.npvalue())


        # prec
        if should_backprop:
            class_prec += 1 if predicted_word_class == gold_word_class else 0
            class_items += 1

        # if we aren't doing validation, calculate the loss
        if not fValidation and not fRunning:
            if should_backprop: all_losses.append(-dy.log(dy.pick(class_mlp_output, gold_word_class)))
            word_class_ans = gold_word_class
        # otherwise, set the answer to be the argmax
        else:
            word_class_ans = predicted_word_class

        # if the word_class answer is 1, do the pos!
        # alternatively, if validating and it's aramic, do the pos!
        if word_class_ans or (fValidation and gold_word_lang) or (fRunning and gold_word_lang):
            # run the pos mlp output
            pos_mlp_output = pos_mlp(mlp_input)
            try:
                temp_pos_array = pos_mlp_output.npvalue()
                possible_pos_array = np.zeros(temp_pos_array.shape)
                pos_list = pos_hashtable[word]
                # pos_list.add('') #concat 'unknown' as possible pos
                possible_pos_indices = [pos_vocab[temp_pos] for temp_pos in pos_list]
                possible_pos_array[possible_pos_indices] = temp_pos_array[possible_pos_indices]
            except KeyError:
                possible_pos_array = pos_mlp_output.npvalue()
                # if fValidation:
                #    possible_pos_array[pos_vocab['']] = 0.0 # don't allow validation to guess UNK b/c it never trained against that TODO this makes sense, right?

            predicted_word_pos = pos_vocab.getItem(np.argmax(possible_pos_array))
            confidence = np.max(possible_pos_array) / np.sum(possible_pos_array)
            # prec
            if should_backprop:
                pos_prec += 1 if predicted_word_pos == gold_word_pos else 0
                rough_pos_prec += 1 if predicted_word_pos[0] == gold_word_pos[0] else 0 # you got at least the rough pos right
                pos_items += 1

            # if we aren't doing validation, calculate the loss
            if not fValidation and not fRunning:
                if should_backprop: all_losses.append(-dy.log(dy.pick(pos_mlp_output, pos_vocab[gold_word_pos])))
                word_pos_ans = gold_word_pos
            # otherwise, set the answer to be the argmax
            elif not fRunning and fValidation:
                if should_backprop: pos_conf_matrix(pos_vocab[predicted_word_pos], pos_vocab[gold_word_pos])
                word_pos_ans = predicted_word_pos
            else:
                word_pos_ans = predicted_word_pos

            # run through the prev-pos-mlp
            predicted = predicted_word_pos
            prev_pos_lstm_state = prev_pos_lstm_state.add_input(pos_enc(word_pos_ans))
        # if the answer is 0, put a '' through the prev-pos lstm
        else:
            predicted = 'UNK'
            prev_pos_lstm_state = prev_pos_lstm_state.add_input(pos_enc(''))

        tagged_daf["words"].append({"word":word,"gold_pos":gold_word_pos,"gold_class":gold_word_class,"predicted":predicted,"confidence":confidence, "lang": gold_word_lang})

    if fRunning:
        return tagged_daf

    pos_prec = pos_prec / pos_items if pos_items > 0 else None
    rough_pos_prec = rough_pos_prec / pos_items if pos_items > 0 else None
    class_prec = class_prec / class_items if class_items > 0 else None

    if fValidation:
        return class_prec, pos_prec,tagged_daf, rough_pos_prec

    total_loss = dy.esum(all_losses) if len(all_losses) > 0 else None
    return total_loss, class_prec, pos_prec, rough_pos_prec
def CalculateLossForDaf(daf, fValidation=False, fRunning=False):
    dy.renew_cg()
    tagged_daf = {"words": []}

    # add a bos before and after
    seq = ['*BOS*'] + list(' '.join([word for word, _ in daf])) + ['*BOS*']

    # get all the char encodings for the daf
    char_embeds = [let_enc(let) for let in seq]

    # run it through the bilstm
    char_bilstm_outputs = bilstm(char_embeds)

    # now iterate and get all the separate word representations by concatenating the bilstm output
    # before and after the word
    word_bilstm_outputs = []
    iLet_start = 0
    for iLet, char in enumerate(seq):
        # if it is a bos, check if it's at the end of the sequence
        if char == '*BOS*':
            if iLet + 1 == len(seq):
                char = ' '
            else:
                continue
        # if we are at a space, take this bilstm output and the one at the letter start
        if char == ' ':
            cur_word_bilstm_output = dy.concatenate([char_bilstm_outputs[iLet_start], char_bilstm_outputs[iLet]])
            # add it in
            word_bilstm_outputs.append(cur_word_bilstm_output)

            # set the iLet_start ocunter to here
            iLet_start = iLet

    # safe-check, make sure word bilstm outputs length is the same as the daf
    if len(word_bilstm_outputs) != len(daf):
        log_message('Size mismatch!! word_bilstm_outputs: ' + str(len(word_bilstm_outputs)) + ', daf: ' + str(len(daf)))

    prev_lang_lstm_state = prev_lang_lstm.initial_state().add_input(lang_enc('*BOS*'))

    all_losses = []
    lang_prec = 0.0
    lang_items = 0

    # now iterate through the bilstm outputs, and each word in the daf
    for (word, gold_word_lang), bilstm_output in zip(daf, word_bilstm_outputs):

        # create the mlp input, a concatenate of the bilstm output and of the prev pos output
        mlp_input = dy.concatenate([bilstm_output, prev_lang_lstm_state.output()])

        # run through the class mlp
        lang_mlp_output = lang_mlp(mlp_input)
        predicted_word_lang = lang_vocab.getItem(np.argmax(lang_mlp_output.npvalue()))
        confidence = np.max(lang_mlp_output.npvalue()) / np.sum(lang_mlp_output.npvalue())
        lang_prec += 1 if predicted_word_lang == gold_word_lang else 0
        lang_items += 1


        tagged_daf["words"].append(
            {"word": word, "predicted_lang": predicted_word_lang, "confidence": confidence})
        # if we aren't doing validation, calculate the loss
        if not fValidation and not fRunning:
            all_losses.append(-dy.log(dy.pick(lang_mlp_output, lang_vocab[gold_word_lang])))
            word_pos_ans = gold_word_lang
        # otherwise, set the answer to be the argmax
        elif not fRunning and fValidation:
            lang_conf_matrix(lang_vocab[predicted_word_lang], lang_vocab[gold_word_lang])
            word_pos_ans = predicted_word_lang
        else:
            continue

        # run through the prev-pos-mlp
        prev_lang_lstm_state = prev_lang_lstm_state.add_input(lang_enc(word_pos_ans))

        # prev_pos_lstm_state = prev_pos_lstm_state.add_input(pos_enc(''))



    lang_prec = lang_prec / lang_items if lang_items > 0 else None
    # class_prec = class_prec / class_items if class_items > 0 else None

    if fValidation:
        return lang_prec, tagged_daf

    if fRunning:
        return tagged_daf

    total_loss = dy.esum(all_losses) if len(all_losses) > 0 else None
    return total_loss, lang_prec
Exemple #10
0
# regular lookup
a = lp[1].npvalue()
b = lp[2].npvalue()
c = lp[3].npvalue()

# batch lookup instead of single elements.
# two ways of doing this.
abc1 = dy.lookup_batch(lp, [1,2,3])
print(abc1.npvalue())

abc2 = lp.batch([1,2,3])
print(abc2.npvalue())

print(np.hstack([a,b,c]))


# use pick and pickneglogsoftmax in batch mode
# (must be used in conjunction with lookup_batch):
print("\nPick")
W = dy.parameter( m.add_parameters((5, 10)) )
h = W * lp.batch([1,2,3])
print(h.npvalue())
print(dy.pick_batch(h,[1,2,3]).npvalue())
print(dy.pick(W*lp[1],1).value(), dy.pick(W*lp[2],2).value(), dy.pick(W*lp[3],3).value())

# using pickneglogsoftmax_batch
print("\nPick neg log softmax")
print((-dy.log(dy.softmax(h))).npvalue())
print(dy.pickneglogsoftmax_batch(h,[1,2,3]).npvalue())