def main(): print 'Loading Data' x = cPickle.load(open('english_matrices.pkl', 'rb')) y = cPickle.load(open('chinese_matrices.pkl', 'rb')) print 'Done' # x = np.random.random((10, 10, 50, 1)) # y = np.random.random((10, 10, 50, 1)) encoder_lstm = LSTM(50, 100, 50) encoder_lstm.load_weights('encoder.pkl') outputs = [] for i in range(10000): outputs.append(encoder_lstm.predict(x[i])) # for _ in range(10): # for i in range(20): # idx_start = i*500 # idx_end = min((i+1)*500, len(x)) # sys.stdout.write('\n\nTraining Data %d - %d' % (idx_start, idx_end)) # train(encoder_lstm, x[idx_start:idx_end], y[idx_start:idx_end][0], 50, 'encoder') # encoder_lstm.save_weights('encoder.pkl') # outputs = encoder_lstm.predict(x[:10000]) # encoder_lstm.save_weights('encoder.pkl') embed() decoder_lstm = LSTM(50, 100, 50) for _ in range(4): for i in range(20): idx_start = i * 500 idx_end = min((i + 1) * 500, len(x)) sys.stdout.write('\n\nTraining Data %d - %d' % (idx_start, idx_end)) train(decoder_lstm, outputs[idx_start:idx_end], y[idx_start:idx_end], 50, 'decoder') decoder_lstm.save_weights('decoder.pkl')
if __name__ == '__main__': parser = argparse.ArgumentParser(description='Test.') parser.add_argument(nargs='*', action='store', dest='input', type=str, help='The text to parse.') args = parser.parse_args() sentence = args.input english_dict = cPickle.load(open('english_dictionary.pkl')) chinese_dict = cPickle.load(open('chinese_dictionary.pkl')) encoder = LSTM(50, 100, 50) encoder.load_weights('encoder.pkl') decoder = LSTM(50, 100, 50) mat = [] for word in sentence: mat.append(english_dict[word]) mat = np.array(mat) mat = mat.reshape((mat.shape[0], mat.shape[1], 1)) output = encoder.predict(mat) final = decoder.predict([output[-1].v], output[-1].h) translated_sentence = '' for word in final: translated_sentence += findWord(word.v, chinese_dict)