max_word_len = 14 word_dim_list = [50, 100, 150, 200, 250, 300, 350, 400] test = np.zeros((5060, 6)) for word_dim in word_dim_list: print('word dim=', word_dim) dm = DataManager() voc = Vocabulary() dm.word_dim = word_dim dm.word_len = max_word_len voc.word2vec('data/w2v_model/w2v_model_{}'.format(word_dim)) print("reading data...", end='') dm.read_test_data('data/testing_data.csv', 'test_question', 'test_option') print("\rreading data...finish") print("construct data...") dm.construct_data_seq2seq('test_question', voc, 'data/test_question.npy') dm.construct_data_seq2seq('test_option', voc, 'data/test_option.npy', multi_seq=True) print("construct data...finish") print('test_question_seq.shape: ' + str(dm.data['test_question'].shape)) print('test_option.shape: ' + str(dm.data['test_option'].shape)) test = dm.output(dm.data['test_question']) test_y = np.argmax(test, axis=1) dm.write(test_y, 'ans_{}.csv'.format(word_dim))
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' train_x = np.concatenate( (dm.data['train1'][:-1], dm.data['train2'][:-1], dm.data['train3'][:-1], dm.data['train4'][:-1], dm.data['train5'][:-1])) train_y = np.concatenate( (dm.data['train1'][1:], dm.data['train2'][1:], dm.data['train3'][1:], dm.data['train4'][1:], dm.data['train5'][1:])) print(train_x.shape) print(train_y.shape) model.fit({'sequence_in': train_x}, {'main_output': train_y}, epochs=n_epoch, batch_size=n_batch, shuffle=True, validation_split=0.05, callbacks=callbacks_list, verbose=1) ''' ''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''' save model ''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' model.save('model.hdf5') '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''' writing output ''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' test_y = model.predict({'sequence_in': dm.data['test_question']}, batch_size=n_batch, verbose=1) output = dm.output(test_y) dm.write(output, './output.csv')
'''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''' loading model ''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' model = load_model(continue_file) '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''' print model ''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' model.summary() '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''' decoder ''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' data_in = dm.wrape_encoder(dm.data['test_question'], voc) print(data_in.shape) test_model = dm.construct_seq2seq_test(model, 1024) data_out = [] #for i in range(len(data_in)): for i in range(6): print('\rdecoding... sequence: ' + str(i), end='') data_out.append( dm.decode_seq(data_in[i].reshape((1, 14, 300)), test_model, voc)) data_out = np.array(data_out) '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''' writing output ''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' '''''' ''' ''' output = dm.output(data_out) dm.write(output, './output_seq2seq.csv') ''' '''