def test_AttentionSeq2Seq():
    x = np.random.random((samples, input_length, input_dim))
    y = np.random.random((samples, output_length, output_dim))

    models = []
    models += [
        AttentionSeq2Seq(output_dim=output_dim,
                         hidden_dim=hidden_dim,
                         output_length=output_length,
                         input_shape=(input_length, input_dim))
    ]
    models += [
        AttentionSeq2Seq(output_dim=output_dim,
                         hidden_dim=hidden_dim,
                         output_length=output_length,
                         input_shape=(input_length, input_dim),
                         depth=2)
    ]
    models += [
        AttentionSeq2Seq(output_dim=output_dim,
                         hidden_dim=hidden_dim,
                         output_length=output_length,
                         input_shape=(input_length, input_dim),
                         depth=3)
    ]

    # att = AttentionSeq2Seq(output_dim=tsp_output_dim, hidden_dim=tsp_hidden_dim, output_length=tsp_output_length, input_shape=(tsp_input_length, tsp_input_dim))

    for model in models:
        model.compile(loss='mse', optimizer='sgd')
        print model.summary()
        model.fit(x, y, epochs=1)
Example #2
0
def test_AttentionSeq2Seq():
    print("test seq2seq-attention")
    x = np.random.random((batch, max_encoder_length, input_dim))
    y = np.random.random((batch, max_decoder_length, output_dim))

    models = []
    models += [
        AttentionSeq2Seq(output_dim=output_dim,
                         hidden_dim=hidden_dim,
                         output_length=max_decoder_length,
                         input_shape=(max_encoder_length, input_dim))
    ]
    models += [
        AttentionSeq2Seq(output_dim=output_dim,
                         hidden_dim=hidden_dim,
                         output_length=max_decoder_length,
                         input_shape=(max_encoder_length, input_dim),
                         depth=2)
    ]
    models += [
        AttentionSeq2Seq(output_dim=output_dim,
                         hidden_dim=hidden_dim,
                         output_length=max_decoder_length,
                         input_shape=(max_encoder_length, input_dim),
                         depth=3)
    ]

    for model in models:
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, epochs=1)
Example #3
0
def test_attention_seq2seq():
    x = np.random.random((samples, input_length, input_dim))
    y = np.random.random((samples, output_length, output_dim))

    models = []
    models += [
        AttentionSeq2Seq(output_dim=output_dim,
                         hidden_dim=hidden_dim,
                         output_length=output_length,
                         input_shape=(input_length, input_dim))
    ]
    models += [
        AttentionSeq2Seq(output_dim=output_dim,
                         hidden_dim=hidden_dim,
                         output_length=output_length,
                         input_shape=(input_length, input_dim),
                         depth=2)
    ]
    models += [
        AttentionSeq2Seq(output_dim=output_dim,
                         hidden_dim=hidden_dim,
                         output_length=output_length,
                         input_shape=(input_length, input_dim),
                         depth=3)
    ]

    for model in models:
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, epochs=epoch_num)
Example #4
0
def run():

    data_process = DataProcess(use_word2cut=False)

    input_length = data_process.enc_input_length
    output_length = data_process.dec_output_length
    enc_embedding_length = data_process.enc_embedding_length
    dec_embedding_length = data_process.dec_embedding_length

    model = AttentionSeq2Seq(output_dim=dec_embedding_length, hidden_dim=data_process.hidden_dim, output_length=output_length, \
                             input_shape=(input_length, enc_embedding_length), \
                             batch_size=1, \
                             depth=data_process.layer_shape)

    model.compile(loss='mse', optimizer='rmsprop')

    model.load_weights("model/seq2seq_model_weights.h5")

    plot_model(model,
               to_file='model/seq2seq_model_structure.png',
               show_shapes=True,
               show_layer_names=True)

    text = u"碧水照嫩柳,桃花映春色"  #u"这围巾要火!"#u"你愿意嫁给我吗?"

    enc_padding_ids = data_to_padding_ids(text)
    enc_embedding = data_to_embedding(enc_padding_ids)

    prediction_words = predict_one_text(model, enc_embedding)

    print(prediction_words)

    print_score(model, enc_embedding)
Example #5
0
def test_AttentionSeq2Seq():
    x = np.random.random((samples, input_length, input_dim))
    y = np.random.random((samples, output_length, output_dim))

    models = []
    models += [AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim))]
    #models += [AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=2)]
    #models += [AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, input_shape=(input_length, input_dim), depth=3)]

    for model in models:
        model.compile(loss='mse', optimizer='sgd')
        plot_model(model, to_file='model1.png', show_shapes=True)
Example #6
0
def train(x_train, y_train, epoch_num, hidden_dim, depth_num):
    input_length = x_train.shape[1]
    output_length = y_train.shape[1]
    
    input_dim = x_train.shape[2]
    output_dim = y_train.shape[2]
    model = AttentionSeq2Seq(
        input_shape=(input_length, input_dim), depth=depth_num,
        output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length)
    
    model.compile(loss='mse', optimizer="rmsprop")
    model.summary()
    
    model.fit(x_train, y_train, epochs=epoch_num)
    return model
 def get_basic_model():
     RNN = recurrent.LSTM
     model = AttentionSeq2Seq(input_dim=in_vocab_size,
                              input_length=in_seq_length,
                              hidden_dim=10,
                              output_length=out_seq_length,
                              output_dim=out_vocab_size,
                              depth=2,
                              bidirectional=False)
     model.add(RNN(hidden_size, input_shape=(in_seq_length, in_vocab_size)))
     model.add(RepeatVector(out_seq_length))
     for _ in range(num_layers):
         model.add(RNN(hidden_size, return_sequences=True))
     model.add(TimeDistributed(Dense(out_vocab_size)))
     #model.add(Attention(recurrent.LSTM(out_vocab_size, input_dim=in_vocab_size, return_sequences=False, consume_less='mem')))
     model.add(Activation('softmax'))
     return model
Example #8
0
    def create_nn_model(self):
        self.create_embedding()
        #input_seq = Input(shape=(MAX_SEQ_LEN,))

        # This converts the positive indices(integers) into a dense multi-dim representation.
        model = Sequential()
        model.add(
            Embedding(self.data_vocab_size + 1,
                      TOKEN_REPRESENTATION_SIZE,
                      weights=[self.embedding_matrix],
                      trainable=False,
                      input_shape=(MAX_SEQ_LEN, )))
        model.add(
            AttentionSeq2Seq(
                batch_input_shape=(None, MAX_SEQ_LEN,
                                   TOKEN_REPRESENTATION_SIZE),
                #hidden_dim=200, output_length=MAX_SEQ_LEN, output_dim=TOKEN_REPRESENTATION_SIZE,
                #depth=MAX_SEQ_LEN, peek=True))#, return_sequences=True))
                hidden_dim=100,
                output_length=MAX_SEQ_LEN,
                output_dim=TOKEN_REPRESENTATION_SIZE,
                depth=1))
        model.add(
            TimeDistributed(
                Dense(self.data_vocab_size + 1, activation="softmax")))
        model.compile(optimizer="rmsprop",
                      loss="categorical_crossentropy",
                      metrics=["accuracy"])
        self.autoencoder = model

        #embedding = Embedding(self.data_vocab_size+1, TOKEN_REPRESENTATION_SIZE,
        #       weights=[self.embedding_matrix], trainable=False)(input_seq)

        #encoder = LSTM(200)(embedding)
        #repeated = RepeatVector(MAX_SEQ_LEN)(encoder)
        #decoder = LSTM(200, return_sequences=True)(repeated)

        #time_dist = TimeDistributed(Dense(self.data_vocab_size+1, activation="softmax"))(decoder)

        #seq_autoencoder = Model(input_seq, time_dist)

        #seq_autoencoder.compile(optimizer="rmsprop", loss="categorical_crossentropy", metrics=["accuracy"])
        #seq_autoencoder.summary()

        #self.autoencoder = seq_autoencoder
        return self.autoencoder
Example #9
0
    def s2s_model(self):
        """Train s2s model here.

        Returns:
            A s2s model that can be used to predict result.

        Raises:
            IOError: An error occurred training xgb model.
        """
        
        model = AttentionSeq2Seq(input_dim=self.input_dim, input_length=self.input_len, 
                                 hidden_dim=16, output_length=self.output_len, 
                                 output_dim=self.output_dim, depth=(1,1),
                                 stateful=False, dropout=0.5)
        model.compile(loss='mape', optimizer='adam', metrics=['mse'])
        model.fit(self.train_X, self.train_Y, epochs=75, verbose=2, shuffle=True)

        return model 
Example #10
0
    def create_critic_model(self):
        model = Sequential()
        model.add(
            Embedding(self.data_vocab_size + 1,
                      TOKEN_REPRESENTATION_SIZE,
                      weights=[self.get_embedding_matrix()],
                      trainable=False,
                      input_shape=(MAX_SEQ_LEN, )))
        model.add(
            AttentionSeq2Seq(batch_input_shape=(None, MAX_SEQ_LEN,
                                                TOKEN_REPRESENTATION_SIZE),
                             hidden_dim=100,
                             output_length=MAX_SEQ_LEN,
                             output_dim=TOKEN_REPRESENTATION_SIZE,
                             depth=1))
        # The output is expected to be one scalar approximating the value of a state.
        model.add(TimeDistributed(Dense(1, activation="linear")))
        print("Critic model created")
        model.summary()
        model.compile(optimizer="rmsprop", loss="mse", metrics=["accuracy"])

        self.critic = model
Example #11
0
def run():

    enc_vec_model = gensim.models.Word2Vec.load(r'model/encoder_vector.m')
    dec_vec_model = gensim.models.Word2Vec.load(r'model/decoder_vector.m')

    batch_size = 9
    epochs = 30
    data_process = DataProcess(use_word2cut=False)
    documents_length = data_process.get_documents_size(
        data_process.enc_ids_file, data_process.dec_ids_file)
    input_length = data_process.enc_input_length
    output_length = data_process.dec_output_length
    enc_embedding_length = data_process.enc_embedding_length
    dec_embedding_length = data_process.dec_embedding_length

    if batch_size > documents_length:
        print("ERROR--->" + u"语料数据量过少,请再添加一些")
        return None

    if (data_process.hidden_dim < data_process.enc_input_length):
        print("ERROR--->" + u"隐层神经元数目过少,请再添加一些")
        return None

    model = AttentionSeq2Seq(output_dim=dec_embedding_length, hidden_dim=data_process.hidden_dim, output_length=output_length, \
                             input_shape=(input_length, enc_embedding_length),
                             batch_size=batch_size,
                             depth=data_process.layer_shape)
    # keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
    model.compile(loss='mse', optimizer='rmsprop')
    model.fit_generator(generator=generate_batch(batch_size=batch_size, \
                                                 encoder_word2vec_model=enc_vec_model, \
                                                 decoder_word2vec_model=dec_vec_model, \
                                                 encoder_file_path=data_process.enc_ids_padding_file, \
                                                 decoder_file_path=data_process.dec_ids_padding_file, \
                                                 embedding_shape = (enc_embedding_length, dec_embedding_length)),
                        steps_per_epoch=int(documents_length / batch_size), \
                        epochs=epochs, verbose=1, workers=1)

    model.save_weights("model/seq2seq_model_weights.h5", overwrite=True)
Example #12
0
def build_model(training=True):
    
    data_process = DataProcess(use_word2cut=False)
    
    embedding_matrix = get_encoder_embedding()
    vocab_size, embedding_size = embedding_matrix.shape
    embedding_layer = Embedding(
        vocab_size,
        embedding_size,
        weights=[embedding_matrix],
        input_length=data_process.enc_input_length,
        trainable=training,
        name='encoder_embedding')

    enc_normalization = BatchNormalization(epsilon=data_process.epsilon)

    seq2seq = AttentionSeq2Seq(
        bidirectional=False,
        output_dim=data_process.dec_embedding_length, 
        hidden_dim=data_process.hidden_dim, 
        output_length=data_process.dec_output_length, 
        input_shape=(data_process.enc_input_length, data_process.enc_embedding_length), 
        depth=data_process.layer_shape)
    
    model = Sequential()
    model.add(embedding_layer)
    model.add(enc_normalization)
    model.add(seq2seq)
    
    # from keras.optimizers import SGD
    # sgd = SGD(lr=0.001, decay=0, clipvalue=0.0)
    # from keras.optimizers import Adam
    # adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-04)
    # adam rmsprop sgd
    model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
    
    return model
    
Example #13
0
    def __init__(self, eval_corpus, eval_corpus_vectors_path, result,
                 model_path, model_config):
        super().__init__()
        self.eval_corpus = eval_corpus
        self.eval_corpus_vectors_path = eval_corpus_vectors_path
        self.result = result
        self.model_path = model_path
        self.model_config = model_config

        self.model = Sequential()
        self.model.add(
            AttentionSeq2Seq(
                output_dim=model_config.get("output_dim"),
                hidden_dim=model_config.get("hidden_dim"),
                output_length=model_config.get("output_length"),
                input_shape=(
                    model_config.get("input_length"),
                    model_config.get("input_dim"),
                ),
            ))
        self.model.add(TimeDistributed(Dense(model_config.get("output_dim"))))
        self.model.add(Activation("softmax"))
        self.model.load_weights(model_path)
Example #14
0
if not os.path.exists(directory):
    os.makedirs(directory)

checkpoint = ModelCheckpoint(model_weights_path,
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True,
                             save_weights_only=True,
                             mode='max')
callbacks_list = [checkpoint]

model = Sequential()
model.add(
    AttentionSeq2Seq(output_dim=output_dim,
                     hidden_dim=hidden_dim,
                     output_length=output_length,
                     input_shape=(X_train[0].shape[0], X_train[0].shape[1]),
                     dropout=drop_out_ratio,
                     depth=depth))
model.add(TimeDistributed(Dense(output_dim)))
model.add(Activation('softmax'))

model.summary()
optimizer = Adam()
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])
hist = model.fit(X_train,
                 y_train_flat,
                 epochs=epoch_num,
                 batch_size=batch_size,
                 validation_data=(X_val, y_val_flat),
        AttentionSeq2Seq(output_dim=output_dim,
                         hidden_dim=hidden_dim,
                         output_length=output_length,
                         input_shape=(input_length, input_dim),
                         depth=2)
    ]
    models += [
        AttentionSeq2Seq(output_dim=output_dim,
                         hidden_dim=hidden_dim,
                         output_length=output_length,
                         input_shape=(input_length, input_dim),
                         depth=3)
    ]

    for model in models:
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, epochs=1)


# test_SimpleSeq2Seq()
# test_Seq2Seq()
# test_AttentionSeq2Seq()
from seq2seq.models import AttentionSeq2Seq

model = AttentionSeq2Seq(input_dim=5,
                         input_length=7,
                         hidden_dim=10,
                         output_length=8,
                         output_dim=20,
                         depth=4)
model.compile(loss='mse', optimizer='rmsprop')
Example #16
0
    def create_actor_model(self):
        # This converts the positive indices(integers) into a dense multi-dim
        # representation.

        def evaluation_output_shape(input_shapes):
            input_shape = input_shapes[0]
            return (input_shape[0], input_shape[1])

        self.create_critic_model()

        #####Sequential model with Attention#######

        model = Sequential()
        model.add(
            Embedding(self.data_vocab_size + 1,
                      TOKEN_REPRESENTATION_SIZE,
                      weights=[self.get_embedding_matrix()],
                      trainable=False,
                      input_shape=(MAX_SEQ_LEN, )))
        model.add(
            AttentionSeq2Seq(batch_input_shape=(None, MAX_SEQ_LEN,
                                                TOKEN_REPRESENTATION_SIZE),
                             hidden_dim=100,
                             output_length=MAX_SEQ_LEN,
                             output_dim=TOKEN_REPRESENTATION_SIZE,
                             depth=1))
        model.add(
            TimeDistributed(
                Dense(self.data_vocab_size + 1, activation="softmax")))
        model.summary()
        self.actor = model

        #####Functional model without Attention #######

        #input = Input(shape=(MAX_SEQ_LEN,))
        #embedding = Embedding(self.data_vocab_size+1,
        #        TOKEN_REPRESENTATION_SIZE,
        #        weights=[self.get_embedding_matrix()], trainable=False)(input)
        #encoder = LSTM(100)(embedding)
        #repeat = RepeatVector(MAX_SEQ_LEN)(encoder)
        #decoder = LSTM(TOKEN_REPRESENTATION_SIZE, return_sequences=True)(repeat)
        #prediction = TimeDistributed(Dense(self.data_vocab_size+1,
        #    activation="softmax"))(decoder)

        #self.actor = Model(input=input, output=prediction)

        optimizer = RMSprop()

        P = self.actor.output
        Q_pi = K.placeholder(shape=(MAX_SEQ_LEN, ))

        # loss = - evaluation
        # evaluation = Sum( prob * Q_pi )
        loss = -K.sum(K.dot(K.max(P), Q_pi))

        updates = optimizer.get_updates(self.actor.trainable_weights,
                                        self.actor.constraints, loss)

        self.evaluation_fn = K.function([self.actor.input, Q_pi], [loss],
                                        updates=updates)

        print("Actor model created")
Example #17
0
# hard-code output dim and length
output_dim = 10
output_length = 9

# zero padding inputs
max_length = max_fea_seq_length(X_train)
print('max_length of x_train is {}'.format(max_length))
X_test = pad_sequences(X_test, maxlen=max_length, dtype=np.float64)

# load model
model = Sequential()
if 'att' in net_name:
    model.add(
        AttentionSeq2Seq(output_dim=output_dim,
                         hidden_dim=hidden_dim,
                         output_length=output_length,
                         input_shape=(X_test[0].shape[0], X_test[0].shape[1]),
                         dropout=0,
                         depth=depth))
elif 'seq' in net_name:
    model.add(
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=output_length,
                input_shape=(X_test[0].shape[0], X_test[0].shape[1]),
                depth=depth,
                peek=True))
model.add(TimeDistributed(Dense(10)))
model.add(Activation('softmax'))

model.load_weights(model_path)
optimizer = Adam()
Example #18
0
else:
    from torch import FloatTensor

training_task = SHAPESModuloTask()
param_list = []

for mod in training_task.module_dict.values():
    param_list.extend(list(mod.parameters()))
    if state_dict['GPU_SUPPORT']:
        mod.cuda()

attn_seq2seq = AttentionSeq2Seq(vocab_size_1=len(state_dict['VOCAB']),
                                vocab_size_2=len(state_dict['TOKENS']),
                                word_dim=args.word_dim,
                                hidden_dim=args.hidden_dim,
                                batch_size=state_dict['BATCH_SIZE'],
                                num_layers=args.num_layers,
                                use_dropout=args.use_dropout,
                                dropout=args.dropout,
                                use_cuda=state_dict['GPU_SUPPORT'])

param_list.extend(list(attn_seq2seq.parameters()))

print('Number of trainable paramters: {}'.format(
    sum(param.numel() for param in param_list if param.requires_grad)))

if state_dict['GPU_SUPPORT']:
    attn_seq2seq.cuda()

loss = BCEWithLogitsLoss()
Example #19
0
plot_model(model, to_file='model2.png',show_shapes=True)

print()

'''

if __name__ == '__main__':

    datset = joblib.load('../data/data_seq.pkl')
    word_dict = joblib.load('../data/query_dict_seq.pkl')

    encoder_input_data = datset[0]
    decoder_input_data = datset[1]
    decoder_target_data = datset[2]
    input_token_index = word_dict[0]
    target_token_index = word_dict[1]

    model = AttentionSeq2Seq(output_dim=latent_dim,
                             hidden_dim=latent_dim,
                             output_length=max_decoder_seq_length,
                             input_shape=(max_encoder_seq_length,
                                          num_encoder_tokens))

    model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
    #plot_model(model, to_file='model4.png', show_shapes=True)
    model.fit(x=encoder_input_data,
              y=decoder_target_data,
              batch_size=batch_size,
              epochs=epochs)
    model.save('./model/seq2seq_attention_v2.h5')
Example #20
0
    args = parser.parse_args()

    if not os.path.exists("tmp.pkl"):
        distributes = KeyedVectors.load_word2vec_format(args.dist_path)
        with open("tmp.pkl", "wb") as f:
            pickle.dump(distributes, f)
    else:
        with open("tmp.pkl", "rb") as f:
            distributes = pickle.load(f)

    middle_dim, depth_num, _, input_length, output_length, _ = [
        int(i) for i in args.input_model.split(".")[:-1]
    ]
    dist_vec = get_distribute_vector(distributes,
                                     args.input_text)[:input_length, :]

    input_dim = 100
    output_dim = 100
    model = AttentionSeq2Seq(input_shape=(input_length, input_dim),
                             depth=depth_num,
                             output_dim=output_dim,
                             hidden_dim=middle_dim,
                             output_length=output_length)

    model.compile(loss='mse', optimizer="rmsprop")
    model.load_weights(args.input_model)
    y_pred = numpy.array([dist_vec])
    for i in range(args.iter):
        y_pred = model.predict(y_pred[:, -input_length:, :])
        for j in y_pred[0]:
            print(distributes.similar_by_vector(j, topn=1)[0][0], end=" ")
def train_keras(sentence_array, operator_chain_array):
    from seq2seq import SimpleSeq2Seq, Seq2Seq, AttentionSeq2Seq

    sentence_shape = sentence_array.shape
    length = sentence_shape[0]
    in_seq_length = sentence_shape[1]
    in_vocab_size = sentence_shape[2]

    op_shape = operator_chain_array.shape
    out_seq_length = op_shape[1]
    out_vocab_size = op_shape[2]

    print sentence_shape, op_shape

    batch_size = 64
    hidden_size, embedding_dim = 10, 10
    memory_dim = 200
    num_layers = 2

    # model = SimpleSeq2Seq(input_dim=in_vocab_size,
    #     hidden_dim=embedding_dim,
    #     output_length=out_seq_length,
    #     output_dim=out_vocab_size,
    #     depth=3)

    # model = Seq2Seq(batch_input_shape=(batch_size, in_seq_length, in_vocab_size),
    #     hidden_dim=embedding_dim,
    #     output_length=out_seq_length,
    #     output_dim=out_vocab_size,
    #     depth=num_layers)

    # model.compile(loss='mse', optimizer='rmsprop')
    # model = Sequential()
    # model.add(LSTM(10, return_sequences=True), batch_input_shape=(4, 5, 10))
    # model.add(TFAttentionRNNWrapper(LSTM(10, return_sequences=True, consume_less='gpu')))
    # model.add(Dense(5))
    # model.add(Activation('softmax'))
    # model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics = ['accuracy'])

    # model = Sequential()
    # model.add(InputLayer(batch_input_shape=(nb_samples, timesteps, embedding_dim)))
    # model.add(wrappers.Bidirectional(recurrent.LSTM(embedding_dim, input_dim=embedding_dim, return_sequences=True)))
    # model.add(Attention(recurrent.LSTM(output_dim, input_dim=embedding_dim, return_sequences=True, consume_less='mem')))
    # model.add(core.Activation('relu'))
    # model.compile(optimizer='rmsprop', loss='mse')
    # model.fit(x,y, nb_epoch=1, batch_size=nb_samples)

    def get_basic_model():
        RNN = recurrent.LSTM
        model = AttentionSeq2Seq(input_dim=in_vocab_size,
                                 input_length=in_seq_length,
                                 hidden_dim=10,
                                 output_length=out_seq_length,
                                 output_dim=out_vocab_size,
                                 depth=2,
                                 bidirectional=False)
        model.add(RNN(hidden_size, input_shape=(in_seq_length, in_vocab_size)))
        model.add(RepeatVector(out_seq_length))
        for _ in range(num_layers):
            model.add(RNN(hidden_size, return_sequences=True))
        model.add(TimeDistributed(Dense(out_vocab_size)))
        #model.add(Attention(recurrent.LSTM(out_vocab_size, input_dim=in_vocab_size, return_sequences=False, consume_less='mem')))
        model.add(Activation('softmax'))
        return model

    # model = get_basic_model()
    model = AttentionSeq2Seq(input_dim=in_vocab_size,
                             input_length=in_seq_length,
                             hidden_dim=2,
                             output_length=out_seq_length,
                             output_dim=out_vocab_size,
                             depth=2,
                             bidirectional=False)

    model.compile(loss='mse', optimizer='rmsprop', metrics=['accuracy'])

    num_train = int(0.9 * length)
    X_train = sentence_array[:num_train]
    X_val = sentence_array[num_train:]
    # human_sent, human_op = read_data_file("humantests.txt")
    # human_sentence_array = preprocess_english(human_sent)
    # human_operator_chain_array = preprocess_operators(human_op)
    #X_val = human_sentence_array

    y_train = operator_chain_array[:num_train]
    y_val = operator_chain_array[num_train:]
    #y_val = human_operator_chain_array

    # model.fit(sentence_array,
    #     operator_chain_array,
    #     batch_size=batch_size,
    #     nb_epoch=1,
    #     validation_data=(sentence_array, operator_chain_array))

    for iteration in range(1, 200):
        print()
        print('-' * 50)
        print('Iteration', iteration)
        a = model.fit(X_train,
                      y_train,
                      batch_size=batch_size,
                      nb_epoch=1,
                      validation_data=(X_val, y_val))
        ###
        # Select 10 samples from the validation set at random so we can visualize errors
        for i in range(10):
            ind = np.random.randint(0, len(X_val))
            rowX, rowy = X_val[np.array([ind])], y_val[np.array([ind])]
            model.predict(rowX, verbose=0)
            # q = ctable.decode(rowX[0])
            # correct = ctable.decode(rowy[0])
            # guess = ctable.decode(preds[0], calc_argmax=False)
            # print('Q', q[::-1] if INVERT else q)
            # print('T', correct)
            # print('---')
    return model