max_word_len = 14
word_dim_list = [50, 100, 150, 200, 250, 300, 350, 400]
test = np.zeros((5060, 6))

for word_dim in word_dim_list:
    print('word dim=', word_dim)
    dm = DataManager()
    voc = Vocabulary()
    dm.word_dim = word_dim
    dm.word_len = max_word_len

    voc.word2vec('data/w2v_model/w2v_model_{}'.format(word_dim))
    print("reading data...", end='')
    dm.read_test_data('data/testing_data.csv', 'test_question', 'test_option')
    print("\rreading data...finish")

    print("construct data...")
    dm.construct_data_seq2seq('test_question', voc, 'data/test_question.npy')
    dm.construct_data_seq2seq('test_option',
                              voc,
                              'data/test_option.npy',
                              multi_seq=True)
    print("construct data...finish")
    print('test_question_seq.shape: ' + str(dm.data['test_question'].shape))
    print('test_option.shape: ' + str(dm.data['test_option'].shape))

    test = dm.output(dm.data['test_question'])
    test_y = np.argmax(test, axis=1)
    dm.write(test_y, 'ans_{}.csv'.format(word_dim))
Example #2
0
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''''''
train_x = np.concatenate(
    (dm.data['train1'][:-1], dm.data['train2'][:-1], dm.data['train3'][:-1],
     dm.data['train4'][:-1], dm.data['train5'][:-1]))
train_y = np.concatenate(
    (dm.data['train1'][1:], dm.data['train2'][1:], dm.data['train3'][1:],
     dm.data['train4'][1:], dm.data['train5'][1:]))
print(train_x.shape)
print(train_y.shape)
model.fit({'sequence_in': train_x}, {'main_output': train_y},
          epochs=n_epoch,
          batch_size=n_batch,
          shuffle=True,
          validation_split=0.05,
          callbacks=callbacks_list,
          verbose=1)
'''
'''
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''''''
'''''' '''''' '''''' '''       save model                               '''
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''''''
model.save('model.hdf5')
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''''''
'''''' '''''' '''''' '''       writing output                           '''
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''''''
test_y = model.predict({'sequence_in': dm.data['test_question']},
                       batch_size=n_batch,
                       verbose=1)
output = dm.output(test_y)
dm.write(output, './output.csv')
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''''''
'''''' '''''' '''''' '''       loading model                            '''
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''''''
model = load_model(continue_file)
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''''''
'''''' '''''' '''''' '''       print model                              '''
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''''''
model.summary()
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''''''
'''''' '''''' '''''' '''       decoder                                  '''
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''''''
data_in = dm.wrape_encoder(dm.data['test_question'], voc)
print(data_in.shape)
test_model = dm.construct_seq2seq_test(model, 1024)
data_out = []
#for i in range(len(data_in)):
for i in range(6):
    print('\rdecoding... sequence: ' + str(i), end='')
    data_out.append(
        dm.decode_seq(data_in[i].reshape((1, 14, 300)), test_model, voc))
data_out = np.array(data_out)
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''''''
'''''' '''''' '''''' '''       writing output                           '''
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''''''
'''
'''
output = dm.output(data_out)
dm.write(output, './output_seq2seq.csv')
'''
'''