Example #1
0
    return word2embedding

if __name__ == '__main__':
    word_index, word_cnt = create_word_index([TRAIN_DATA, DEV_DATA, TEST_DATA])

    ind2word = {}
    for k, v in word_index.iteritems():
        ind2word[v] = k

    wx, y, m = read_data(TRAIN_DATA, word_index)
    if USE_DEV:
        dev_wx, dev_y, dev_m = read_data(TEST_DATA, word_index)
        wx, y, m = np.vstack((wx, dev_wx)), np.vstack((y, dev_y)), np.vstack((m, dev_m))
    twx, ty, tm = read_data(DEV_DATA, word_index)
    char_index, char_cnt= create_char_index([TRAIN_DATA, DEV_DATA, TEST_DATA])
    x, cm = read_char_data(TRAIN_DATA, char_index)
    if USE_DEV:
        dev_x, dev_cm = read_char_data(TEST_DATA, char_index)
        x, cm = np.vstack((x, dev_x)), np.vstack((cm, dev_cm))
    tx, tcm = read_char_data(DEV_DATA, char_index)
    model = cnn_rnn.cnn_rnn(char_cnt, len(LABEL_INDEX), word_cnt, ind2word)
    if LABELING_RATE < 1.0:
        ind = sample.create_sample_index(LABELING_RATE, x.shape[0])
        x, y, m, wx, cm = sample.sample_arrays((x, y, m, wx, cm), ind)
    model.add_data(x, y, m, wx, cm, None, tx, ty, tm, twx, tcm, None)
    model.build()
    word2embedding = read_word2embedding()
    model.set_embedding(word2embedding, word_index)
    model.train(evaluate)

Example #2
0
                gaze = np.vstack((gaze, dev_gaze))
        else:
            gaze, tgaze = None, None
        model = cnn_rnn.cnn_rnn(char_cnt, len(t.LABEL_INDEX), word_cnt)
        model.min_epoch = MIN_PERIODS[i]

        #### important: set model parameters for different cases ####
        if task == 'ner_span' or task == 'ner_ned':
            model.w_embedding_size = 64
        else:
            model.w_embedding_size = 50
        model.joint = True

        if LABELING_RATES[i] < 1.0:
            ind = sample.create_sample_index(LABELING_RATES[i], x.shape[0])
            x, y, m, wx, cm, gaze = sample.sample_arrays((x, y, m, wx, cm, gaze), ind)
        model.add_data(x, y, m, wx, cm, gaze, tx, ty, tm, twx, tcm, tgaze)
        model.build()
        if task == 'ner_span' or task == 'ner_ned':
            words, embeddings = t.read_word2embedding(t.PKL_FILE)
            model.set_embedding_pkl(words, embeddings, word_index, lower=False)
        else:
            word2embedding = t.read_word2embedding()
            model.set_embedding(word2embedding, word_index)
        model.step_train_init()
        models.append(model)
        eval_funcs.append(t.evaluate)

    prev_params = None
    max_f1s = [0.0, 0.0, 0.0]
    print "\t".join(['task', 'epoch', 'iter', 'max_f1', 'f1', 'prec', 'recall'])