return word2embedding if __name__ == '__main__': word_index, word_cnt = create_word_index([TRAIN_DATA, DEV_DATA, TEST_DATA]) ind2word = {} for k, v in word_index.iteritems(): ind2word[v] = k wx, y, m = read_data(TRAIN_DATA, word_index) if USE_DEV: dev_wx, dev_y, dev_m = read_data(TEST_DATA, word_index) wx, y, m = np.vstack((wx, dev_wx)), np.vstack((y, dev_y)), np.vstack((m, dev_m)) twx, ty, tm = read_data(DEV_DATA, word_index) char_index, char_cnt= create_char_index([TRAIN_DATA, DEV_DATA, TEST_DATA]) x, cm = read_char_data(TRAIN_DATA, char_index) if USE_DEV: dev_x, dev_cm = read_char_data(TEST_DATA, char_index) x, cm = np.vstack((x, dev_x)), np.vstack((cm, dev_cm)) tx, tcm = read_char_data(DEV_DATA, char_index) model = cnn_rnn.cnn_rnn(char_cnt, len(LABEL_INDEX), word_cnt, ind2word) if LABELING_RATE < 1.0: ind = sample.create_sample_index(LABELING_RATE, x.shape[0]) x, y, m, wx, cm = sample.sample_arrays((x, y, m, wx, cm), ind) model.add_data(x, y, m, wx, cm, None, tx, ty, tm, twx, tcm, None) model.build() word2embedding = read_word2embedding() model.set_embedding(word2embedding, word_index) model.train(evaluate)
gaze = np.vstack((gaze, dev_gaze)) else: gaze, tgaze = None, None model = cnn_rnn.cnn_rnn(char_cnt, len(t.LABEL_INDEX), word_cnt) model.min_epoch = MIN_PERIODS[i] #### important: set model parameters for different cases #### if task == 'ner_span' or task == 'ner_ned': model.w_embedding_size = 64 else: model.w_embedding_size = 50 model.joint = True if LABELING_RATES[i] < 1.0: ind = sample.create_sample_index(LABELING_RATES[i], x.shape[0]) x, y, m, wx, cm, gaze = sample.sample_arrays((x, y, m, wx, cm, gaze), ind) model.add_data(x, y, m, wx, cm, gaze, tx, ty, tm, twx, tcm, tgaze) model.build() if task == 'ner_span' or task == 'ner_ned': words, embeddings = t.read_word2embedding(t.PKL_FILE) model.set_embedding_pkl(words, embeddings, word_index, lower=False) else: word2embedding = t.read_word2embedding() model.set_embedding(word2embedding, word_index) model.step_train_init() models.append(model) eval_funcs.append(t.evaluate) prev_params = None max_f1s = [0.0, 0.0, 0.0] print "\t".join(['task', 'epoch', 'iter', 'max_f1', 'f1', 'prec', 'recall'])