def main():
    print 'Loading Data'
    x = cPickle.load(open('english_matrices.pkl', 'rb'))
    y = cPickle.load(open('chinese_matrices.pkl', 'rb'))
    print 'Done'

    # x = np.random.random((10, 10, 50, 1))
    # y = np.random.random((10, 10, 50, 1))
    encoder_lstm = LSTM(50, 100, 50)
    encoder_lstm.load_weights('encoder.pkl')
    outputs = []
    for i in range(10000):
        outputs.append(encoder_lstm.predict(x[i]))
    # for _ in range(10):
    #     for i in range(20):
    #         idx_start = i*500
    #         idx_end = min((i+1)*500, len(x))
    #         sys.stdout.write('\n\nTraining Data %d - %d' % (idx_start, idx_end))
    #         train(encoder_lstm, x[idx_start:idx_end], y[idx_start:idx_end][0], 50, 'encoder')
    #         encoder_lstm.save_weights('encoder.pkl')
    # outputs = encoder_lstm.predict(x[:10000])
    # encoder_lstm.save_weights('encoder.pkl')
    embed()
    decoder_lstm = LSTM(50, 100, 50)
    for _ in range(4):
        for i in range(20):
            idx_start = i * 500
            idx_end = min((i + 1) * 500, len(x))
            sys.stdout.write('\n\nTraining Data %d - %d' % (idx_start, idx_end))
            train(decoder_lstm, outputs[idx_start:idx_end], y[idx_start:idx_end], 50, 'decoder')
            decoder_lstm.save_weights('decoder.pkl')
Exemple #2
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Test.')
    parser.add_argument(nargs='*',
                        action='store',
                        dest='input',
                        type=str,
                        help='The text to parse.')
    args = parser.parse_args()

    sentence = args.input

    english_dict = cPickle.load(open('english_dictionary.pkl'))
    chinese_dict = cPickle.load(open('chinese_dictionary.pkl'))

    encoder = LSTM(50, 100, 50)
    encoder.load_weights('encoder.pkl')
    decoder = LSTM(50, 100, 50)

    mat = []
    for word in sentence:
        mat.append(english_dict[word])
    mat = np.array(mat)
    mat = mat.reshape((mat.shape[0], mat.shape[1], 1))

    output = encoder.predict(mat)
    final = decoder.predict([output[-1].v], output[-1].h)

    translated_sentence = ''
    for word in final:
        translated_sentence += findWord(word.v, chinese_dict)