Example #1
0
def test_Seq2Seq():
    x = np.random.random((samples, input_length, input_dim))
    y = np.random.random((samples, output_length, output_dim))

    models = []
    models += [
        Seq2Seq(output_dim=output_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim))
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim),
                peek=True)
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim),
                depth=2)
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim),
                peek=True,
                depth=2)
    ]

    for model in models:
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, nb_epoch=1)
Example #2
0
def start_test(args, data):
    # 输入一句话
    if args.gpu == 0:
        use_cuda = False
    else:
        use_cuda = True
    input_dim = args.vocab_size
    output_dim = args.vocab_size
    embedding_dim = args.embedding_size
    hidden_dim = args.hidden_size
    args.mode = 'test'
    while True:
        input_word = input('请输入您的问题:')
        text = data.text2index(input_word)
        if use_cuda:
            seq2seq_model = Seq2Seq(input_dim, embedding_dim, hidden_dim, output_dim, use_cuda=use_cuda).cuda()
        else:
            seq2seq_model = Seq2Seq(input_dim, embedding_dim, hidden_dim, output_dim, use_cuda=use_cuda)
        seq2seq_model.load_state_dict(torch.load(args.module_path))
        seq2seq_model.eval()
        #predict_indices = seq2seq_model.seq2seq_predict(text)
        #predict_result = data.index2text(predict_indices)
        #print(predict_result)
        predict_sample_indices = seq2seq_model.beamsearch(text)
        for predict_indices in predict_sample_indices:
            predict_result = data.index2text(predict_indices)
            print("".join(predict_result[:-1]))
    def __init__(self, config):
        super(AEMapper, self).__init__()

        self.config = config
        self.source_seq2seq = Seq2Seq(config)

        self.target_seq2seq = Seq2Seq(config)

        self.mapper = layers.FeedForward(config.context_size,
                                         config.num_layers * config.decoder_hidden_size,
                                         num_layers=1,
                                         activation=config.activation)
def test_Seq2Seq():
    x = np.random.random((samples, input_length, input_dim))
    y = np.random.random((samples, output_length, output_dim))

    models = []
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim))
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim),
                peek=True)
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim),
                depth=2)
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim),
                peek=True,
                depth=2)
    ]

    # for model in models:
    #     model.compile(loss='mse', optimizer='sgd')
    #     model.summary()
    #     model.fit(x, y, epochs=1)

    model = Seq2Seq(output_dim=output_dim,
                    hidden_dim=hidden_dim,
                    output_length=output_length,
                    input_shape=(input_length, input_dim),
                    peek=True,
                    depth=2,
                    teacher_force=True)
    model.compile(loss='mse', optimizer='sgd')
    model.fit([x, y], y, epochs=1)
    for layer in model.layers:
        print layer
        if "RecurrentSequential" in str(layer):
            print K.get_value(layer.x)
Example #5
0
def train():
    model = Seq2Seq()
    data_loader = DataLoader()

    with tf.Session() as sess:
        checkpoint = tf.train.latest_checkpoint(FLAGS.train_dir)
        if checkpoint:
            print('다음 파일에서 모델을 읽는 중입니다... ', checkpoint)
            model.saver.restore(sess, checkpoint)
        else:
            print('새로운 모델을 생성하는 중입니다...')
            sess.run(tf.global_variables_initializer())

        writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        input_batch, output_batch, target_batch = data_loader.make_batch(data_loader.seq_data)

        for step in range(FLAGS.max_steps):
            _, loss = model.train(
                sess, input_batch, output_batch, target_batch)
            
            model.write_logs(sess, writer, input_batch, output_batch, target_batch)

            print('Step', '%04d' % (step + 1), 'cost =', '{:.6f}'.format(loss))

        checkpoint_path = os.path.join(FLAGS.train_dir, FLAGS.checkpoint_name)
        model.saver.save(sess, checkpoint_path)
    print('최적화 완료!')
Example #6
0
def test():
    model = Seq2Seq()
    data_loader = DataLoader()

    print(FLAGS.input_size)

    with tf.Session() as sess:
        checkpoint = tf.train.latest_checkpoint(FLAGS.train_dir)
        if checkpoint:
            print('다음 파일에서 모델을 읽는 중입니다... ', checkpoint)
            model.saver.restore(sess, checkpoint)

            while True:
                word = input('Enter an english word: ')
                seq_data = [word, 'P' * len(word)]

                input_batch, output_batch, target_batch = data_loader.make_batch([
                    seq_data])
                prediction = tf.argmax(model.logits, 2)
                result = sess.run(prediction, feed_dict={model.enc_input: input_batch,
                                                        model.dec_input: output_batch,
                                                        model.targets: target_batch})

                decoded = [data_loader.char_list[i] for i in result[0]]
                try:
                    end = decoded.index('E')
                    translated = ''.join(decoded[:end])
                except:
                    translated = ''.join(decoded)

                print(word, ' -> ', translated)
        else:
            print('학습된 데이터가 없습니다.')
Example #7
0
    def setUp(self):
        # data stuff
        self.batch_size = 32
        self.window_size = 64
        self.hidden_size = 256
        self.overlap_size = 32
        input_file = '../data/songs-utf-8.txt'
        self.assertTrue(os.path.exists(input_file))

        # training stuff
        self.learning_rate = 0.001
        self.decay_rate = 0.999
        self.decay_steps = 100
        self.epochs = 10
        self.skip_steps = 20

        # sampling stuff
        self.temp = 0.7
        self.seed = list(string.ascii_uppercase) + ['Š', 'Đ', 'Č', 'Ć', 'Ž']
        self.seed = list(
            filter(lambda v: v not in ['X', 'Y', 'Q', 'W'], self.seed))
        print('seed', self.seed)

        # clear everything that might be in the default graph from previous tests
        tf.reset_default_graph()
        # construction stuff
        self.seq2seq = Seq2Seq(input_file=input_file,
                               window_size=self.window_size,
                               overlap_size=self.overlap_size,
                               batch_size=self.batch_size,
                               hidden_size=self.hidden_size)
Example #8
0
def eval():
    model = Seq2Seq().to(config.device)
    model.load_state_dict(torch.load("./models/model.pkl"))

    loss_list = []
    acc_list = []
    data_loader = get_dataloader(train=False)  # 获取测试集
    with torch.no_grad():
        for idx, (input, target, input_len, target_len) in enumerate(data_loader):
            input = input.to(config.device)
            # target = target #[batch_size,max_len]
            input_len = input_len.to(config.device)
            # decoder_predict:[batch_size,max_len]
            decoder_outputs, decoder_predict = model.evaluate(input, input_len)  # [batch_Size,max_len,vocab_size]
            loss = F.nll_loss(decoder_outputs.view(-1, len(config.ns)), target.to(config.device).view(-1),
                              ignore_index=config.ns.PAD)
            loss_list.append(loss.item())

            # 把traget 和 decoder_predict进行inverse_transform
            target_inverse_tranformed = [config.ns.inverse_transform(i) for i in target.numpy()]
            predict_inverse_tranformed = [config.ns.inverse_transform(i) for i in decoder_predict]
            cur_eq = [1 if target_inverse_tranformed[i] == predict_inverse_tranformed[i] else 0 for i in
                      range(len(target_inverse_tranformed))]
            acc_list.extend(cur_eq)
            # print(np.mean(cur_eq))

    print("mean acc:{} mean loss:{:.6f}".format(np.mean(acc_list), np.mean(loss_list)))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--output_dir', type=str, default="../output/")
    parser.add_argument('--checkpoint_path',
                        type=str,
                        default="../checkpoints/seq2seq.h5")
    parser.add_argument('--ts_path',
                        type=str,
                        default="../processed_data/train/ts.joblib")
    parser.add_argument('--ts_dim', type=int, default=3)
    parser.add_argument('--input_length', type=int, default=64)
    parser.add_argument('--output_length', type=int, default=16)
    parser.add_argument('--hidden_size', type=int, default=128)
    config = parser.parse_args()

    model = Seq2Seq(hidden_size=config.hidden_size,
                    ts_dim=config.ts_dim,
                    input_length=config.input_length,
                    output_length=config.output_length,
                    teacher_forcing=True)

    ts = joblib.load(config.ts_path)
    inferer = Inference(model,
                        ts,
                        output_dir=config.output_dir,
                        checkpoint_path=config.checkpoint_path)
    inferer.infer()
Example #10
0
def main():
    BATCH_SIZE = 128
    X_indices, Y_indices, X_char2idx, Y_char2idx, X_idx2char, Y_idx2char = preprocess_data(
    )
    X_train = X_indices[BATCH_SIZE:]
    Y_train = Y_indices[BATCH_SIZE:]
    X_test = X_indices[:BATCH_SIZE]
    Y_test = Y_indices[:BATCH_SIZE]

    model = Seq2Seq(
        rnn_size=50,
        n_layers=2,
        X_word2idx=X_char2idx,
        encoder_embedding_dim=15,
        Y_word2idx=Y_char2idx,
        decoder_embedding_dim=15,
    )
    model.fit(X_train,
              Y_train,
              val_data=(X_test, Y_test),
              batch_size=BATCH_SIZE,
              n_epoch=50)
    model.infer('common', X_idx2char, Y_idx2char)
    model.infer('apple', X_idx2char, Y_idx2char)
    model.infer('zhedong', X_idx2char, Y_idx2char)
Example #11
0
def main(_):
    model_path = os.path.join('model', FLAGS.name)
    if os.path.exists(model_path) is False:
        os.makedirs(model_path)

    train_data, train_lang = loaddata(path, FLAGS.num_steps)
    vocab_size = train_lang.vocab_size

    converter = TextConverter(lang=train_lang, max_vocab=FLAGS.max_vocab)
    converter.save_lang(filename=FLAGS.name + '_converter.pkl')

    g = batch_generator(train_data, FLAGS.batch_size, FLAGS.max_steps)

    model = Seq2Seq('train',
                    vocab_size,
                    batch_size=FLAGS.batch_size,
                    num_steps=FLAGS.num_steps,
                    max_steps=FLAGS.max_steps,
                    lstm_size=FLAGS.lstm_size,
                    num_layers=FLAGS.num_layers,
                    learning_rate=FLAGS.learning_rate,
                    train_keep_prob=FLAGS.train_keep_prob,
                    use_embedding=FLAGS.use_embedding,
                    embedding_size=FLAGS.embedding_size,
                    max_iters=FLAGS.max_iters,
                    bidirectional=FLAGS.bidirectional,
                    beam_search=False)
    model.train(
        g,
        converter,
        FLAGS.max_steps,
        model_path,
        FLAGS.save_every_n,
        FLAGS.log_every_n,
    )
Example #12
0
def test_run(data_path, model_path, n_show=10):
    """
    Test function.
    Input is training data.
    Output have to be the sentence which is correct data in training phase.
    :return:
    """

    corpus = ConvCorpus(file_path=None)
    corpus.load(load_dir=data_path)

    print('Vocabulary Size (number of words) :', len(corpus.dic.token2id))
    print('')

    # rebuild seq2seq model
    model = Seq2Seq(len(corpus.dic.token2id), feature_num=args.feature_num,
                    hidden_num=args.hidden_num, batch_size=1, gpu_flg=args.gpu)
    serializers.load_hdf5(model_path, model)

    # run an interpreter
    for num, input_sentence in enumerate(corpus.posts):
        id_sequence = input_sentence.copy()
        # input_sentence.reverse()
        # input_sentence.insert(0, corpus.dic.token2id["<eos>"])

        model.initialize()  # initialize cell
        sentence = model.generate(input_sentence, sentence_limit=len(input_sentence) + 30,
                                  word2id=corpus.dic.token2id, id2word=corpus.dic)
        print("teacher : ", " ".join([corpus.dic[w_id] for w_id in id_sequence]))
        print("correct :", " ".join([corpus.dic[w_id] for w_id in corpus.cmnts[num]]))
        print("-> ", sentence)
        print('')

        if num == n_show:
            break
 def setUp(self):
     vocab_size = 13
     wordvec_size = 100
     hidden_size = 100
     self.seq2seq = Seq2Seq(vocab_size, wordvec_size, hidden_size)
     self.xs = np.random.randint(0, 13, (13, 100))
     self.ts = np.random.randint(0, 13, (13, 100))
def main():
    #Dataset
    dataset = PadDataset(WORKING_DIR, EMBEDDING_SIZE, diff_vocab = DIFF_VOCAB, embedding_path = EMBEDDING_PATH,\
                  limit_encode = LIMIT_ENCODE, limit_decode = LIMIT_DECODE)
    print("112")
    encoder_vocab_size = dataset.length_vocab_encode()
    decoder_vocab_size = dataset.length_vocab_decode()
    print("Steps per epoch %d" %(int(math.ceil(float(dataset.datasets["train"].number_of_samples)\
                                        /float(BATCH_SIZE)))))
    #Initialising Model
    embeddings_encoder = dataset.vocab.embeddings_encoder
    embeddings_encoder = torch.Tensor(embeddings_encoder).cuda()
    embeddings_decoder = dataset.vocab.embeddings_decoder
    embeddings_decoder = torch.Tensor(embeddings_decoder).cuda()
    content_encoder = Encoder(encoder_vocab_size, embeddings_encoder,
                              EMBEDDING_SIZE, HIDDEN_SIZE).cuda()
    print("123")
    query_encoder = Encoder(encoder_vocab_size, embeddings_encoder,
                            EMBEDDING_SIZE, HIDDEN_SIZE).cuda()
    print("ddf")
    decoder = Decoder(EMBEDDING_SIZE, embeddings_decoder, HIDDEN_SIZE,
                      decoder_vocab_size).cuda()
    print("adsf")
    seq2seqwattn = Seq2Seq(content_encoder, query_encoder, decoder).cuda()
    print("adsdf")

    run_this = run_model(dataset, seq2seqwattn)
    print('rehc')
    run_this.run_training()
    print('124124')
Example #15
0
def lstmmodel(neurons,layer,drop,batch_size,epochs,backday,n_outputs):
  x_train, y_train = [], []
  x_valid, y_valid = [], []
  x_test, y_test = [], []
  
  for i in range(backday,train_size-n_outputs):
      x_train.append(train[i-backday:i,:])
      y_train.append(train[i:i+n_outputs,0])
  x_train, y_train = np.array(x_train), np.array(y_train)
  y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], 1))
  
  for i in range(backday,valid_size-n_outputs):
      x_valid.append(valid[i-backday:i,:])
      y_valid.append(valid[i:i+n_outputs,0])
  x_valid, y_valid = np.array(x_valid), np.array(y_valid)
  y_valid = y_valid.reshape((y_valid.shape[0], y_valid.shape[1], 1))
  
  for i in range(backday,test_size-n_outputs):
      x_test.append(test[i-backday:i,:])
      y_test.append(test[i:i+n_outputs,0])
  x_test, y_test = np.array(x_test), np.array(y_test)
  y_test = y_test.reshape((y_test.shape[0], y_test.shape[1], 1))
  
  print(neurons,layer,drop,batch_size,epochs,backday,n_outputs)
  model = Seq2Seq(output_dim=1, hidden_dim=neurons, output_length=n_outputs, input_shape=(x_train.shape[1], x_train.shape[2]), peek=False, depth=layer,dropout=drop)
  model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mae'])
  early_stopping = EarlyStopping(monitor='mean_absolute_error', patience=10)
  model.fit(x_train, y_train, epochs=epochs, batch_size = len(x_train), validation_data=(x_valid, y_valid), verbose=0, callbacks=[early_stopping])
  
  loss,accuracy = model.evaluate(x_test,y_test)
  print('Test Mean Absolute Error: %f using %f,%f,%f,%f,%f,%f' % (accuracy,neurons,layer,drop,batch_size,epochs,backday))

  return model,accuracy,[neurons,layer,drop,batch_size,epochs,backday]
Example #16
0
def main(**args):
    vocab, vocab_rsd = default_build_vocab('./data/vocab.txt')
    vocab_size = len(vocab)
    print 'vocabulary size is %d' % vocab_size
    data = Seq2SeqIter(data_path='./data/data.pickle',
                       source_path='./data/a.txt',
                       target_path='./data/b.txt',
                       vocab=vocab,
                       vocab_rsd=vocab_rsd,
                       batch_size=10,
                       max_len=25,
                       data_name='data',
                       label_name='label',
                       split_char='\n',
                       text2id=None,
                       read_content=None,
                       model_parallel=False)
    print 'training data size is %d' % data.size
    model = Seq2Seq(seq_len=25,
                    batch_size=10,
                    num_layers=1,
                    input_size=vocab_size,
                    embed_size=150,
                    hidden_size=150,
                    output_size=vocab_size,
                    dropout=0.0,
                    mx_ctx=CTX)
    model.train(dataset=data, epoch=5)
 def __build_model(self):
     return Seq2Seq(input_size=self.__source_lang.n_words,
                    output_size=self.__target_lang.n_words,
                    hidden_size=constants.HIDDEN_SIZE,
                    learning_rate=constants.LEARNING_RATE,
                    teacher_forcing_ratio=constants.TEACHER_FORCING_RATIO,
                    device=constants.DEVICE)
Example #18
0
def predict():
    """
    针对用户输入的聊天内容给出回复
    :return:
    """
    du = data_unit.DataUnit(**data_config)
    save_path = os.path.join(BASE_MODEL_DIR, MODEL_NAME)
    batch_size = 1
    tf.reset_default_graph()
    model = Seq2Seq(batch_size=batch_size,
                    encoder_vocab_size=du.vocab_size,
                    decoder_vocab_size=du.vocab_size,
                    mode='decode',
                    **model_config)
    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        model.load(sess, save_path)
        while True:
            q = input('请输入聊天内容:')
            if q is None or q.strip() == '':
                print('-----------------------------')
                continue
            if q == r'\b':
                print('再见!')
                exit()
            q = q.strip()
            indexs = du.transform_sentence(q)
            x = np.asarray(indexs).reshape((1, -1))
            xl = np.asarray(len(indexs)).reshape((1, ))
            pred = model.predict(sess, np.array(x), np.array(xl))
            print('Q:   ', du.transform_indexs(x[0]))
            print('A:   ', du.transform_indexs(pred[0]))
            print('-----------------------------')
Example #19
0
def train(embedded, batch_size=100, epoch=100):
    model = Seq2Seq(embedded.voca_size)

    with tf.Session() as sess:
        checkpoint = tf.train.get_checkpoint_state("./model")

        if checkpoint and tf.train.checkpoint_exists(
                checkpoint.model_checkpoint_path):
            print("모델을 읽는 중", checkpoint.model_checkpoint_path)
            model.saver.restore(sess, checkpoint.model_checkpoint_path)
        else:
            print("새로운 모델을 생성")
            sess.run(tf.global_variables_initializer())

        writer = tf.summary.FileWriter("./logs", sess.graph)
        total_batch = int(math.ceil(len(embedded.test) / float(batch_size)))

        for step in range(total_batch * epoch):
            enc_input, dec_input, targets = embedded.batch(batch_size)
            _, loss = model.train(sess, enc_input, dec_input, targets)

            if (step + 1) % 100 == 0:
                model.write_logs(sess, writer, enc_input, dec_input, targets)
                print('Step:', '%06d' % model.global_step.eval(), 'cost =',
                      '{:.6f}'.format(loss))

        checkpoint_path = os.path.join("./model", "conversation.ckpt")
        model.saver.save(sess, checkpoint_path, global_step=model.global_step)

    print('완료')
Example #20
0
def get_trained_model(task='autoencoder',
                      data_name='autoencoder',
                      units=256,
                      random_seed=2):
    import evaluator
    from seq2seq import Seq2Seq
    from argparse import ArgumentParser
    parser = ArgumentParser()
    args = parser.parse_args(args=[])
    args.mode = "analysis"
    args.task = task
    args.data_name = data_name
    args.units = units
    args.random_seed = random_seed
    args.model_path = "../saved_model/%s_units=%s_seed=%d" % (
        args.data_name, args.units, args.random_seed)

    seq2seq = Seq2Seq(args)
    seq2seq.load_seq2seq(args.model_path)
    print("\tmode=%s, units=%d, model_path=%s" %
          (seq2seq.mode, seq2seq.units, seq2seq.model_path))
    if task == 'autoencoder' or task == 'autoenc-last':
        whole_accuracy, each_accuracy = evaluator.evaluate_autoencoder(
            seq2seq=seq2seq)
    else:
        raise 'no this task'
    assert whole_accuracy > 0.93 and each_accuracy > 0.99, "Load model failed."
    return seq2seq
Example #21
0
def train():
    with tf.Graph().as_default() as graph:
        model = Seq2Seq(600, [7800, 300])
        queue = InputPipeline([
            '{0}/frames/conversations_new.tfrecords'.format(options.data_dir)
        ],
                              batch_size=64,
                              n_epochs=1000,
                              capacity=1e4)

        input_seq, target_seq, input_seq_len, target_seq_len, history, history_size, history_seq_len = queue.inputs(
        )
        loss = model.graph(input_seq, target_seq, input_seq_len,
                           target_seq_len, history, history_size,
                           history_seq_len)

        global_step = tf.Variable(0, trainable=False, name='global_step')
        train_op = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(
            model.loss, global_step=global_step)

        sv = tf.train.Supervisor(graph=graph,
                                 logdir='{0}/seq2seq_v7/{1}'.format(
                                     options.output_dir, options.run_name),
                                 saver=tf.train.Saver(max_to_keep=None),
                                 summary_op=tf.summary.merge_all(),
                                 global_step=global_step)

        with sv.managed_session(config=tf.ConfigProto(
                log_device_placement=True)) as sess:
            while not sv.should_stop():
                sess.run(train_op)
Example #22
0
def main():
    # train data
    train_data, teach_data = 
    
    # train
    auto_encoder = AutoEncoderBase(Seq2Seq(128, 128))
    auto_encoder.model_train()
Example #23
0
def test_seq2seq():
    x = np.random.random((samples, input_length, input_dim))
    y = np.random.random((samples, output_length, output_dim))

    models = []
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim))
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim),
                peek=True)
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim),
                depth=2)
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim),
                peek=True,
                depth=2)
    ]

    for model in models:
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, epochs=epoch_num)

    model = Seq2Seq(output_dim=output_dim,
                    hidden_dim=hidden_dim,
                    output_length=output_length,
                    input_shape=(input_length, input_dim),
                    peek=True,
                    depth=2,
                    teacher_force=True)
    model.compile(loss='mse', optimizer='sgd')
    model.fit([x, y], y, epochs=epoch_num)
Example #24
0
def test_Seq2Seq():
    x = np.random.random((batch, max_encoder_length, input_dim))
    y = np.random.random((batch, max_decoder_length, output_dim))

    models = []
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=max_decoder_length,
                input_shape=(max_encoder_length, input_dim))
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=max_decoder_length,
                input_shape=(max_encoder_length, input_dim),
                peek=True)
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=max_decoder_length,
                input_shape=(max_encoder_length, input_dim),
                depth=2)
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=max_decoder_length,
                input_shape=(max_encoder_length, input_dim),
                peek=True,
                depth=2)
    ]

    for model in models:
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, epochs=1)

    model = Seq2Seq(output_dim=output_dim,
                    hidden_dim=hidden_dim,
                    output_length=max_decoder_length,
                    input_shape=(max_encoder_length, input_dim),
                    peek=True,
                    depth=2,
                    teacher_force=True)
    model.compile(loss='mse', optimizer='sgd')
    model.fit([x, y], y, epochs=1)
def training():
    params = parameters()

    # mode parameters dict
    model_param_dict = {'num_units': int(params.get('modelparam', 'num_units')),
                        'num_layers': int(params.get('modelparam', 'num_layers')),
                        'vocab_size': int(params.get('modelparam', 'vocab_size')),
                        'embedding_size': int(params.get('modelparam', 'embedding_size')),
                        'beam_size': int(params.get('modelparam', 'beam_size')),
                        'use_attention': bool(params.get('modelparam', 'use_attention')),
                        'use_beam_search': bool(params.get('modelparam', 'use_beam_search')),
                        'start_token_idx': int(params.get('modelparam', 'start_token_idx')),
                        'end_token_idx': int(params.get('modelparam', 'end_token_idx')),
                        'max_gradient_norm': float(params.get('modelparam', 'max_gradient_norm'))}

    # as for the comments for below parameters, please find in config.ini
    batch_size = int(params.get('trainparam', 'batch_size'))
    learning_rate = float(params.get('trainparam', 'learning_rate'))
    keep_prob = float(params.get('trainparam', 'keep_prob'))
    epochs = int(params.get('trainparam', 'epochs'))
    modelsaved_dir = params.get('trainparam', 'checkpoint_dir')
    savedname = params.get('trainparam', 'checkpoint_name')
    _, _, questionbatch, answerbatch, qlengthbatch, alengthbatch = data(DIR, epochs, batch_size)

    seq2seq = Seq2Seq(**model_param_dict)
    decode_outputs = seq2seq.model(questionbatch, answerbatch, qlengthbatch, alengthbatch, 'train',
                                   batch_size, keep_prob)
    train_op, loss, summary_merge, predicts = seq2seq.train(decode_outputs, answerbatch, alengthbatch, learning_rate)

    # for save and restore model
    ckpt = tf.train.get_checkpoint_state(modelsaved_dir)
    saver = tf.train.Saver()

    with tf.Session() as sess:
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print('Reloading model parameters..')
            saver.restore(sess, tf.train.latest_checkpoint(modelsaved_dir))
        else:
            print('Create model from scratch..')
            sess.run(tf.global_variables_initializer())
        step = 0
        summary_writer = tf.summary.FileWriter(modelsaved_dir, graph=sess.graph)
        while True:
            try:
                step += 1
                _, temploss, tempsummary = sess.run([train_op, loss, summary_merge])
                # temploss should not be named as loss, as the name has been used in the model, or, will raise error:
                # eg: 'Fetch argument 10.112038 has invalid type <class 'numpy.float32'>, must be a string or Tensor.
                # (Can not convert a float32 into a Tensor or Operation.)'
                print('run step: ', step, end='\r')
                if step % int(params.get('trainparam', 'steps_per_checkpoint')) == 0:
                    perplexity = math.exp(float(temploss)) if temploss < 300 else float('inf')
                    print('save at step: ', step, 'perplexity: ', perplexity)
                    summary_writer.add_summary(tempsummary, step)
                    checkpoint_path = os.path.join(modelsaved_dir, savedname)
                    saver.save(sess, checkpoint_path, global_step=step)
            except:
                print('done')
                break
def start_train(args,data):
    if args.gpu == 0:
        use_cuda = False
    else:
        use_cuda = True
    MAX_ITER = 5000
    epochs = 50
    print_every = 5000
    plot_every = 100
    start_time = time.time()
    plot_losses = [] # 存储loss用来绘图
    print_loss_total = 0
    input_dim = args.vocab_size
    output_dim = args.vocab_size
    embedding_dim = args.embedding_size
    hidden_dim = args.hidden_size
    learning_rate = args.learning_rate
    input_word = "怎么获得立减券"
    text = data.text2index(input_word)
    rfile=open('../data/questions_viewer.txt','w',encoding='utf-8')
    if use_cuda:
        seq2seq_model = Seq2Seq(input_dim,embedding_dim,hidden_dim,output_dim,use_cuda=use_cuda,learning_rate=learning_rate).cuda()
    else:
        seq2seq_model = Seq2Seq(input_dim,embedding_dim,hidden_dim,output_dim,use_cuda=use_cuda,learning_rate=learning_rate)
    seq2seq_model.train()
    print(len(data.source_index))
    for epoch in range(epochs):
        #valid_targets, valid_sources, valid_targets_lengths, valid_source_lengths = data.get_valid_batch()
        #valid_loss = seq2seq_model.seq2seq_train(valid_targets, valid_sources)
        for iter,(source, target) in enumerate(zip(data.source_index,data.target_index)):
            #print(source,target)
            loss = seq2seq_model.seq2seq_train(source,target)
            print_loss_total += loss
            if iter % print_every == 0:
                seq2seq_model.encoder_scheduler.step()
                seq2seq_model.decoder_scheduler.step()
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                time_dif = get_time_dif(start_time)
                print('Epoch {:>3}/{} - Training Loss: {:>6.6f}  Time:{}'.format(epoch,epochs,print_loss_avg,time_dif))
        torch.save(seq2seq_model.state_dict(), args.module_path)
        predict_indices = seq2seq_model.seq2seq_predict(text)
        predict_result = data.index2text(predict_indices)
        print(predict_result)
        rfile.write("".join(predict_result))
        rfile.write('\n')
Example #27
0
def _create_model(conf):
    """
    Creates a simple model using the given configuration
    """
    model = Seq2Seq(**conf['seq2seq'])
    output = _add_layers(model.output, conf['top'])
    model = Model(model.input, output)
    return model
    def __init_model(self):
        log('Initializing an empty model')
        self.model = Seq2Seq(
            input_size=len(self.input_vocab),
            output_size=len(self.output_vocab),
            hidden_size=constants.HIDDEN_SIZE,
            learning_rate=constants.LEARNING_RATE,
            teacher_forcing_ratio=constants.TEACHER_FORCING_RATIO,
            device=constants.DEVICE)

        log(str(self.model))
Example #29
0
def main():
	args = set_arguments()
	seq2seq = Seq2Seq(args)
	#pdb.set_trace()
	if "train" in args.mode:
		trainer = Trainer(args, seq2seq)
		trainer.train()
		print("\ttask =", args.task)
		print("\tunits =", args.units)
		print("\t=" * 50)
	pdb.set_trace()
Example #30
0
def interpreter(data_path, model_path):
    """
    Run this function, if you want to talk to seq2seq model.
    if you type "exit", finish to talk.
    :param data_path: the path of corpus you made model learn
    :param model_path: the path of model you made learn
    :return:
    """
    # call dictionary class
    corpus = ConvCorpus(file_path=None)
    corpus.load(load_dir=data_path)
    print('Vocabulary Size (number of words) :', len(corpus.dic.token2id))
    print('')

    # rebuild seq2seq model
    model = Seq2Seq(len(corpus.dic.token2id),
                    feature_num=args.feature_num,
                    hidden_num=args.hidden_num,
                    batch_size=1,
                    gpu_flg=args.gpu)
    serializers.load_hdf5(model_path, model)

    # run conversation system
    print('The system is ready to run, please talk to me!')
    print('( If you want to end a talk, please type "exit". )')
    print('')
    while True:
        print('>> ', end='')
        sentence = input()
        if sentence == 'exit':
            print('See you again!')
            break

        input_vocab = [
            unicodedata.normalize('NFKC', word.lower())
            for word in word_tokenize(sentence)
        ]
        input_vocab.reverse()
        input_vocab.insert(0, "<eos>")

        # convert word into ID
        input_sentence = [
            corpus.dic.token2id[word] for word in input_vocab
            if not corpus.dic.token2id.get(word) is None
        ]

        model.initialize()  # initialize cell
        sentence = model.generate(input_sentence,
                                  sentence_limit=len(input_sentence) + 30,
                                  word2id=corpus.dic.token2id,
                                  id2word=corpus.dic)
        print("-> ", sentence)
        print('')