true_word_seq = [] false_word_seq = [] label = [] for j in range(len(true_data)): true_data[j] = true_data[j].strip() false_data[j] = false_data[j].strip() true_word_seq.append(true_data[j].split(';')) false_word_seq.append(false_data[j].split(';')) label.append([[1], [0]]) # print(true_word_seq[0]) # print(false_word_seq[0]) x_true = fe.embedding(true_word_seq) x_false = fe.embedding(false_word_seq) x = [] for j in range(len(x_true)): x.append([x_true[j], x_false[j]]) x = np.array(x) y = np.array(label) # print('X shape', x.shape) # print('y shape', y.shape) model.fit( x=x, y=y, batch_size=32, epochs=1)
# print(model.summary()) fe = FeatureExtractor(5) fe.set_w2v(w2v_pathname, 500, keep_alive=True) for epoch in range(9, 11): model = load_model('model/bengio_sgd_{}.h5'.format(epoch - 1)) for i in range(1, 1001): filename = 'data/batch/bengio/6/{}.txt'.format(i) print(filename) with open(filename) as file: data = file.readlines() word_seq = [] label = [] for j in range(len(data)): data[j] = data[j].strip() splitted = data[j].split(';') word_seq.append(splitted[:-1]) word_label = splitted[-1] temp_one_hot = np.zeros(num_class) one_idx = vocab.index(word_label) if one_idx == -1: print(word_label) temp_one_hot[one_idx] = 1 label.append(temp_one_hot) x = fe.embedding(word_seq) y = np.array(label) # print('X shape', x.shape) # print('y shape', y.shape) model.fit(x=x, y=y, batch_size=32, epochs=1) model.save('model/bengio_sgd_{}.h5'.format(epoch))