def calc_test_loss(test_set = Dataset(test_x,test_y),display=True):
    accs = []
    worksum = int(len(test_x) / batch_size)
    loss_list = []
    predict_list = []
    target_list = []
    source_list = []
    pb = ProgressBar(worksum=worksum,info="validating...",auto_display=display)
    pb.startjob()
    #test_set = Dataset(test_x,test_y)
    for j in range(worksum):
        batch_x,batch_y = test_set.next_batch(batch_size)
        lx = [seq_max_len] * batch_size
        ly = [seq_max_len] * batch_size
        bx = [np.sum(m > 0) for m in batch_x]
        by = [np.sum(m > 0) for m in batch_y]
        tmp_loss,tran = session.run([train_loss,translations],feed_dict={x:batch_x,y:batch_y,
                                                     y_in:
                                                     np.concatenate((
                                                     np.ones((batch_y.shape[0],1),dtype=np.int) * ch2ind['<go>'],batch_y[:,:-1]) ,axis=1)
                                                     ,x_len:lx,y_len:ly,
                                                                        y_real_len:by,
                                                                        x_real_len:bx})
        loss_list.append(tmp_loss)
        tmp_acc = cal_acc(tran,batch_y)
        accs.append(tmp_acc)
        predict_list += [i for i in tran]
        target_list += [i for i in batch_y]
        source_list += [i for i in batch_x]
        pb.complete(1)
    return np.average(loss_list),np.average(accs),get_bleu_score(predict_list,target_list),predict_list,target_list,source_list
Esempio n. 2
0
def calc_test_loss(test_x, test_y, display=True):
    accs = []
    worksum = int(len(test_x) / batch_size)
    loss_list = []
    predict_list = []
    target_list = []
    source_list = []
    pb = ProgressBar(worksum=worksum,
                     info="validating...",
                     auto_display=display)
    pb.startjob()
    #test_set = Dataset(test_x,test_y)
    for j in range(0, len(test_x), batch_size):
        batch_x, batch_y = test_x[j:j + batch_size], test_y[
            j:j + batch_size]  #test_set.next_batch(batch_size)
        if len(batch_x) < batch_size:
            continue
        bx = [len(m) + 1 for m in batch_x]
        by = [len(m) + 1 for m in batch_y]

        lx = [max(bx)] * batch_size
        ly = [max(by)] * batch_size

        batch_x = preprocessing.sequence.pad_sequences(
            batch_x, max(bx), padding='post', value=en2ind_oov['<eos>'])
        batch_y = preprocessing.sequence.pad_sequences(
            batch_y, max(by), padding='post', value=ch2ind_oov['<eos>'])

        tmp_loss, tran = session.run(
            [train_loss, translations],
            feed_dict={
                x:
                batch_x,
                y:
                batch_y,
                y_in:
                np.concatenate((np.ones(
                    (batch_y.shape[0], 1), dtype=np.int) * ch2ind['<go>'],
                                batch_y[:, :-1]),
                               axis=1),
                x_len:
                lx,
                y_len:
                ly,
                y_real_len:
                by,
                x_real_len:
                bx,
                y_max_len:
                max(by)
            })
        loss_list.append(tmp_loss)
        tmp_acc = cal_acc(tran[:, :, 0], batch_y)
        accs.append(tmp_acc)
        predict_list += [i for i in tran[:, :, 0]]
        target_list += [i for i in batch_y]
        source_list += [i for i in batch_x]
        pb.complete(1)
    return np.average(loss_list), np.average(accs), get_bleu_score(
        predict_list, target_list), predict_list, target_list, source_list
Esempio n. 3
0
def get_most_common(a1, a2):
    temp_dict1 = {}
    temp_dict2 = {}
    pb = ProgressBar(worksum=len(a1), auto_display=False)
    pb.startjob()
    num = 0
    for s1, s2 in zip(a1, a2):
        num += 1
        pb.complete(1)
        if args.max_words != -1 and (len(s1) > args.max_words
                                     or len(s2) > args.max_words):
            continue
        for w1 in s1:
            temp_dict1.setdefault(w1, 0)
            temp_dict1[w1] += 1
        for w2 in s2:
            temp_dict2.setdefault(w2, 0)
            temp_dict2[w2] += 1

        if num % 32 == 0:
            pb.display_progress_bar()
    sorted1 = sorted(temp_dict1.items(), key=lambda i: i[1], reverse=True)
    sorted2 = sorted(temp_dict2.items(), key=lambda i: i[1], reverse=True)
    #print(sorted1[:100])
    #print(sorted2[:100])
    return [i[0] for i in sorted1[:args.vac_dict_ch]
            ], [i[0] for i in sorted2[:args.vac_dict_en]]
Esempio n. 4
0
    loss = tf.reduce_mean(loss)
    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(loss)
#    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss)
# In[]
# =============================================================================
# RUN INFERENCE
# =============================================================================
sess.run(tf.global_variables_initializer())
from utils import ProgressBar
losses = []
beginning_lr = 0.1
gen = data_generator(X_train, y_train, batch_size)
for one_epoch in range(1, 10):
    pb = ProgressBar(worksum=MAX_SEQUENCE_LENGTH)
    pb.startjob()
    for one_batch in range(0, len(X_train), batch_size):
        batch_x, batch_y = gen.__next__()
        batch_x_len = np.asarray([len(x) for x in batch_x])
        batch_lr = beginning_lr

        _, batch_loss = sess.run([optimizer, loss],
                                 feed_dict={
                                     X: batch_x,
                                     y: batch_y,
                                     X_len: batch_x_len,
                                     learning_rate: batch_lr
                                 })
    pb.info = "EPOCH {} batch{} lr {} loss {}".format(one_epoch, one_batch,
                                                      batch_lr, batch_loss)
    pb.complete(batch_size)