예제 #1
0
def test_Seq2Seq():
    x = np.random.random((samples, input_length, input_dim))
    y = np.random.random((samples, output_length, output_dim))

    models = []
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim))
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim),
                peek=True)
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim),
                depth=2)
    ]
    models += [
        Seq2Seq(output_dim=output_dim,
                hidden_dim=hidden_dim,
                output_length=output_length,
                input_shape=(input_length, input_dim),
                peek=True,
                depth=2)
    ]

    for model in models:
        model.compile(loss='mse', optimizer='sgd')
        model.fit(x, y, epochs=1)

    model = Seq2Seq(output_dim=output_dim,
                    hidden_dim=hidden_dim,
                    output_length=output_length,
                    input_shape=(input_length, input_dim),
                    peek=True,
                    depth=2,
                    teacher_force=True)
    model.compile(loss='mse', optimizer='sgd')
    model.fit([x, y], y, epochs=1)
def training():
    params = parameters()

    # mode parameters dict
    model_param_dict = {'num_units': int(params.get('modelparam', 'num_units')),
                        'num_layers': int(params.get('modelparam', 'num_layers')),
                        'vocab_size': int(params.get('modelparam', 'vocab_size')),
                        'embedding_size': int(params.get('modelparam', 'embedding_size')),
                        'beam_size': int(params.get('modelparam', 'beam_size')),
                        'use_attention': bool(params.get('modelparam', 'use_attention')),
                        'use_beam_search': bool(params.get('modelparam', 'use_beam_search')),
                        'start_token_idx': int(params.get('modelparam', 'start_token_idx')),
                        'end_token_idx': int(params.get('modelparam', 'end_token_idx')),
                        'max_gradient_norm': float(params.get('modelparam', 'max_gradient_norm'))}

    # as for the comments for below parameters, please find in config.ini
    batch_size = int(params.get('trainparam', 'batch_size'))
    learning_rate = float(params.get('trainparam', 'learning_rate'))
    keep_prob = float(params.get('trainparam', 'keep_prob'))
    epochs = int(params.get('trainparam', 'epochs'))
    modelsaved_dir = params.get('trainparam', 'checkpoint_dir')
    savedname = params.get('trainparam', 'checkpoint_name')
    _, _, questionbatch, answerbatch, qlengthbatch, alengthbatch = data(DIR, epochs, batch_size)

    seq2seq = Seq2Seq(**model_param_dict)
    decode_outputs = seq2seq.model(questionbatch, answerbatch, qlengthbatch, alengthbatch, 'train',
                                   batch_size, keep_prob)
    train_op, loss, summary_merge, predicts = seq2seq.train(decode_outputs, answerbatch, alengthbatch, learning_rate)

    # for save and restore model
    ckpt = tf.train.get_checkpoint_state(modelsaved_dir)
    saver = tf.train.Saver()

    with tf.Session() as sess:
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print('Reloading model parameters..')
            saver.restore(sess, tf.train.latest_checkpoint(modelsaved_dir))
        else:
            print('Create model from scratch..')
            sess.run(tf.global_variables_initializer())
        step = 0
        summary_writer = tf.summary.FileWriter(modelsaved_dir, graph=sess.graph)
        while True:
            try:
                step += 1
                _, temploss, tempsummary = sess.run([train_op, loss, summary_merge])
                # temploss should not be named as loss, as the name has been used in the model, or, will raise error:
                # eg: 'Fetch argument 10.112038 has invalid type <class 'numpy.float32'>, must be a string or Tensor.
                # (Can not convert a float32 into a Tensor or Operation.)'
                print('run step: ', step, end='\r')
                if step % int(params.get('trainparam', 'steps_per_checkpoint')) == 0:
                    perplexity = math.exp(float(temploss)) if temploss < 300 else float('inf')
                    print('save at step: ', step, 'perplexity: ', perplexity)
                    summary_writer.add_summary(tempsummary, step)
                    checkpoint_path = os.path.join(modelsaved_dir, savedname)
                    saver.save(sess, checkpoint_path, global_step=step)
            except:
                print('done')
                break
def start_train(args,data):
    if args.gpu == 0:
        use_cuda = False
    else:
        use_cuda = True
    MAX_ITER = 5000
    epochs = 50
    print_every = 5000
    plot_every = 100
    start_time = time.time()
    plot_losses = [] # 存储loss用来绘图
    print_loss_total = 0
    input_dim = args.vocab_size
    output_dim = args.vocab_size
    embedding_dim = args.embedding_size
    hidden_dim = args.hidden_size
    learning_rate = args.learning_rate
    input_word = "怎么获得立减券"
    text = data.text2index(input_word)
    rfile=open('../data/questions_viewer.txt','w',encoding='utf-8')
    if use_cuda:
        seq2seq_model = Seq2Seq(input_dim,embedding_dim,hidden_dim,output_dim,use_cuda=use_cuda,learning_rate=learning_rate).cuda()
    else:
        seq2seq_model = Seq2Seq(input_dim,embedding_dim,hidden_dim,output_dim,use_cuda=use_cuda,learning_rate=learning_rate)
    seq2seq_model.train()
    print(len(data.source_index))
    for epoch in range(epochs):
        #valid_targets, valid_sources, valid_targets_lengths, valid_source_lengths = data.get_valid_batch()
        #valid_loss = seq2seq_model.seq2seq_train(valid_targets, valid_sources)
        for iter,(source, target) in enumerate(zip(data.source_index,data.target_index)):
            #print(source,target)
            loss = seq2seq_model.seq2seq_train(source,target)
            print_loss_total += loss
            if iter % print_every == 0:
                seq2seq_model.encoder_scheduler.step()
                seq2seq_model.decoder_scheduler.step()
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                time_dif = get_time_dif(start_time)
                print('Epoch {:>3}/{} - Training Loss: {:>6.6f}  Time:{}'.format(epoch,epochs,print_loss_avg,time_dif))
        torch.save(seq2seq_model.state_dict(), args.module_path)
        predict_indices = seq2seq_model.seq2seq_predict(text)
        predict_result = data.index2text(predict_indices)
        print(predict_result)
        rfile.write("".join(predict_result))
        rfile.write('\n')
예제 #4
0
def _create_model(conf):
    """
    Creates a simple model using the given configuration
    """
    model = Seq2Seq(**conf['seq2seq'])
    output = _add_layers(model.output, conf['top'])
    model = Model(model.input, output)
    return model
def start_test(args, data):
    # 输入一句话
    if args.gpu == 0:
        use_cuda = False
    else:
        use_cuda = True
    input_dim = args.vocab_size
    output_dim = args.vocab_size
    embedding_dim = args.embedding_size
    hidden_dim = args.hidden_size
    args.mode = 'test'
    while True:
        input_word = input('请输入您的问题:')
        input_word = data.clean_str(input_word)
        word1s = pseg.cut(input_word)
        x = []
        for w in word1s:
            if w.flag == 'n':
                x.append('*')
            else:
                x.append(w.word)
        print(x)
        text = data.text2index(x)
        if use_cuda:
            seq2seq_model = Seq2Seq(input_dim,
                                    embedding_dim,
                                    hidden_dim,
                                    output_dim,
                                    use_cuda=use_cuda).cuda()
        else:
            seq2seq_model = Seq2Seq(input_dim,
                                    embedding_dim,
                                    hidden_dim,
                                    output_dim,
                                    use_cuda=use_cuda)
        seq2seq_model.load_state_dict(torch.load(args.module_path))
        seq2seq_model.eval()
        #predict_indices = seq2seq_model.seq2seq_predict(text)
        #predict_result = data.index2text(predict_indices)
        #print(predict_result)
        predict_sample_indices = seq2seq_model.beamsearch(text)
        for predict_indices in predict_sample_indices:
            predict_result = data.index2text(predict_indices)
            print("".join(predict_result[:-1]))
예제 #6
0
def main():
	args = set_arguments()
	seq2seq = Seq2Seq(args)
	#pdb.set_trace()
	if "train" in args.mode:
		trainer = Trainer(args, seq2seq)
		trainer.train()
		print("\ttask =", args.task)
		print("\tunits =", args.units)
		print("\t=" * 50)
	pdb.set_trace()
    def __init_model(self):
        log('Initializing an empty model')
        self.model = Seq2Seq(
            input_size=len(self.input_vocab),
            output_size=len(self.output_vocab),
            hidden_size=constants.HIDDEN_SIZE,
            learning_rate=constants.LEARNING_RATE,
            teacher_forcing_ratio=constants.TEACHER_FORCING_RATIO,
            device=constants.DEVICE)

        log(str(self.model))
예제 #8
0
def build_model(voca_size=29331,
                hidden_size=512,
                lstm_layers_num=2,
                batch_size=10,
                max_epochs=200000,
                retrain=False):
    """
	train a model with mini-batch gradient descend
	"""
    model = None

    if not retrain:
        try:
            f = open(path_pkl)
            model = pickle.load(f)
            return model
        except Exception:
            print "Model does not pre-exist..."

    print "Will train a new model..."

    encoderInputs, decoderInputs, decoderTarget = load_train_data(
        path_train, 20000)
    #(sent_size, example_num)
    num_batchs = encoderInputs.shape[1] // batch_size
    model = Seq2Seq(voca_size, hidden_size, lstm_layers_num, learning_rate=0.1)

    batch_idx = 0
    for ep in xrange(max_epochs):
        enIpt = encoderInputs[:, batch_idx * batch_size:(batch_idx + 1) *
                              batch_size]
        deIpt = decoderInputs[:, batch_idx * batch_size:(batch_idx + 1) *
                              batch_size]
        deTgt = decoderTarget[:, batch_idx * batch_size:(batch_idx + 1) *
                              batch_size]

        enMsk = get_mask(enIpt)
        deMsk = get_mask(deIpt)
        loss, costs = model.train(enIpt, enMsk, deIpt, deMsk, deTgt)

        if ep % 20 == 0:
            print "in epoch %d/%d..." % (ep, max_epochs)
        if batch_idx == 0:
            ot = "in epoch %d/%d..." % (ep, max_epochs) + "	loss:	" + str(loss)
            print ot
            logging.info(ot)

        batch_idx = (batch_idx + 1) % num_batchs
    """
	with open(path_pkl, "wb") as mf:
		pickle.dump(model, mf)
	"""
    return model
예제 #9
0
def interpreter(data_path, model_path):
    """
    Run this function, if you want to talk to seq2seq model.
    if you type "exit", finish to talk.
    :param data_path: the path of corpus you made model learn
    :param model_path: the path of model you made learn
    :return:
    """
    # call dictionary class
    corpus = ConvCorpus(file_path=None)
    corpus.load(load_dir=data_path)
    print('Vocabulary Size (number of words) :', len(corpus.dic.token2id))
    print('')

    # rebuild seq2seq model
    model = Seq2Seq(len(corpus.dic.token2id),
                    feature_num=args.feature_num,
                    hidden_num=args.hidden_num,
                    batch_size=1,
                    gpu_flg=args.gpu)
    serializers.load_hdf5(model_path, model)

    # run conversation system
    print('The system is ready to run, please talk to me!')
    print('( If you want to end a talk, please type "exit". )')
    print('')
    while True:
        print('>> ', end='')
        sentence = input()
        if sentence == 'exit':
            print('See you again!')
            break

        input_vocab = [
            unicodedata.normalize('NFKC', word.lower())
            for word in word_tokenize(sentence)
        ]
        input_vocab.reverse()
        input_vocab.insert(0, "<eos>")

        # convert word into ID
        input_sentence = [
            corpus.dic.token2id[word] for word in input_vocab
            if not corpus.dic.token2id.get(word) is None
        ]

        model.initialize()  # initialize cell
        sentence = model.generate(input_sentence,
                                  sentence_limit=len(input_sentence) + 30,
                                  word2id=corpus.dic.token2id,
                                  id2word=corpus.dic)
        print("-> ", sentence)
        print('')
예제 #10
0
파일: train.py 프로젝트: JJoving/SMLAT
def main(args):
    # Construct Solver
    # data
    tr_dataset = AudioDataset(args.train_json, args.batch_size, args.maxlen_in,
                              args.maxlen_out)
    cv_dataset = AudioDataset(args.valid_json, args.batch_size, args.maxlen_in,
                              args.maxlen_out)
    tr_loader = AudioDataLoader(tr_dataset,
                                batch_size=1,
                                num_workers=args.num_workers)
    cv_loader = AudioDataLoader(cv_dataset,
                                batch_size=1,
                                num_workers=args.num_workers)
    # load dictionary and generate char_list, sos_id, eos_id
    char_list, sos_id, eos_id = process_dict(args.dict)
    vocab_size = len(char_list)
    data = {'tr_loader': tr_loader, 'cv_loader': cv_loader}
    # model
    encoder = Encoder(args.einput,
                      args.ehidden,
                      args.elayer,
                      dropout=args.edropout,
                      bidirectional=args.ebidirectional,
                      rnn_type=args.etype)
    decoder = Decoder(vocab_size,
                      args.dembed,
                      sos_id,
                      eos_id,
                      args.dhidden,
                      args.dlayer,
                      bidirectional_encoder=args.ebidirectional)
    model = Seq2Seq(encoder, decoder)
    print(model)
    model.cuda()
    # optimizer
    if args.optimizer == 'sgd':
        optimizier = torch.optim.SGD(model.parameters(),
                                     lr=args.lr,
                                     momentum=args.momentum,
                                     weight_decay=args.l2)
    elif args.optimizer == 'adam':
        optimizier = torch.optim.Adam(model.parameters(),
                                      lr=args.lr,
                                      weight_decay=args.l2)
    else:
        print("Not support optimizer")
        return

    # solver
    ctc = 0
    solver = Solver(data, model, optimizier, args)
    solver.train()
예제 #11
0
def predict():
    model = Seq2Seq(
        args.size_layer,
        args.num_layers,
        args.learning_rate,
        args.vocab_file,
        args.bert_config,
        args.is_training,
    )
    dictionary_output, rev_dictionary_output = model.tokenizer.vocab, model.tokenizer.inv_vocab
    dictionary_input, rev_dictionary_input = model.tokenizer.vocab, model.tokenizer.inv_vocab

    with tf.Session() as sess:
        with tf.device("/cpu:0"):

            ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)
            if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
                tf.logging.info("restore model from patch: %s",
                                ckpt.model_checkpoint_path)  # 加载预训练模型
                saver = tf.train.Saver(max_to_keep=4)
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                tf.logging.error("model path wrong !!")
                return

            while True:
                text = input("input your dream: ")
                inputs = ['[SEP]'] + list(text) + ['[CLS]']

                inputs_ids = model.tokenizer.convert_tokens_to_ids(inputs)
                segment_ids = [0] * len(inputs_ids)
                input_mask = [1] * len(inputs_ids)

                predicted2 = sess.run(model.predicting_ids,
                                      feed_dict={
                                          model.input_ids: [inputs_ids],
                                          model.input_mask: [input_mask],
                                          model.segment_ids: [segment_ids],
                                          model.dropout: 1.0
                                      })

                print(
                    'dream:', ''.join([
                        rev_dictionary_input[n] for n in inputs_ids
                        if n not in [0, 1, 2, 3]
                    ]))
                print(
                    'dream decoding:', ''.join([
                        rev_dictionary_output[n] for n in predicted2[0]
                        if n not in [0, 1, 2, 3]
                    ]), '')
                print("*" * 20)
예제 #12
0
def predict():
    sep_word = thulac.thulac(seg_only=True)

    model = Seq2Seq(batch_size=1, forward_only=True)

    model_path = './models/0612/'

    vocab_en, _, = utils.read_vocabulary(config.TRAIN_ENC_VOCABULARY)
    _, vocab_de, = utils.read_vocabulary(config.TRAIN_DEC_VOCABULARY)

    with tf.Session() as sess:
        # 恢复前一次训练
        ckpt = tf.train.get_checkpoint_state(model_path)
        if ckpt != None:
            print('find modal: ', ckpt.model_checkpoint_path)
            model.saver.restore(sess, ckpt.model_checkpoint_path)
        else:
            print("没找到模型")

        while True:
            input_string = raw_input('me > ')
            # 退出
            if input_string == 'quit':
                exit()
            personal_ans = utils.check_pre_ques(input_string.decode('utf-8'))
            if personal_ans is not None:
                print('AI > ' + personal_ans)
                continue

            input_string_vec = []
            aseq = sep_word.cut(input_string, text=True)
            for words in aseq.split(' '):
                input_string_vec.append(vocab_en.get(words, config.UNK_ID))
            bucket_id = min([
                b for b in range(len(config.BUCKETS))
                if config.BUCKETS[b][0] > len(input_string_vec)
            ])
            encoder_inputs, decoder_inputs, target_weights = model.get_batch(
                {bucket_id: [(input_string_vec, [])]}, bucket_id)
            _, _, output_logits = model.step(sess, encoder_inputs,
                                             decoder_inputs, target_weights,
                                             bucket_id, True)
            outputs = [
                int(np.argmax(logit, axis=1)) for logit in output_logits
            ]
            if config.EOS_ID in outputs:
                outputs = outputs[:outputs.index(config.EOS_ID)]

            response = "".join(
                [tf.compat.as_str(vocab_de[output]) for output in outputs])
            print('AI > ' + response)
예제 #13
0
def chatbot():
    """
    you can use this to chat with your own chat bot
    :return:
    """
    questionholder = tf.placeholder(shape=[None, None], dtype=tf.int32)
    quelengthholder = tf.placeholder(shape=[None], dtype=tf.int32)
    params = parameters()
    model_param_dict = {
        'num_units': int(params.get('modelparam', 'num_units')),
        'num_layers': int(params.get('modelparam', 'num_layers')),
        'vocab_size': int(params.get('modelparam', 'vocab_size')),
        'embedding_size': int(params.get('modelparam', 'embedding_size')),
        'beam_size': int(params.get('modelparam', 'beam_size')),
        'use_attention': bool(params.get('modelparam', 'use_attention')),
        'use_beam_search': bool(params.get('modelparam', 'use_beam_search')),
        'start_token_idx': int(params.get('modelparam', 'start_token_idx')),
        'end_token_idx': int(params.get('modelparam', 'end_token_idx')),
        'max_gradient_norm':
        float(params.get('modelparam', 'max_gradient_norm'))
    }

    modelsaved_dir = params.get('trainparam', 'checkpoint_dir')
    seq2seq = Seq2Seq(**model_param_dict)
    decode_outputs = seq2seq.model(source_input=questionholder,
                                   source_length=quelengthholder,
                                   mode='inference',
                                   batch_size=1,
                                   keep_probs=1.0)
    predict_ids = decode_outputs.predicted_ids

    ckpt = tf.train.get_checkpoint_state(modelsaved_dir)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
            print('Reloading model parameters..')
            saver.restore(sess, tf.train.latest_checkpoint(modelsaved_dir))
        else:
            raise ValueError(
                'There is no chatbot baby in {}'.format(modelsaved_dir))
        question = 'start'
        print('Hello, I\'m ibot, nice to meet you!')
        while question:
            question = input(':: ')
            questionbatch, question_length = sentence2ids(question)
            answer_ids = sess.run(predict_ids,
                                  feed_dict={
                                      questionholder: questionbatch,
                                      quelengthholder: question_length
                                  })
            answer = ids2sentence(answer_ids, model_param_dict['beam_size'])
예제 #14
0
def interface(_input):  # 进行预测
    model = Seq2Seq().to(config.device)
    model.load_state_dict(torch.load("./models/model.pkl"))
    input = list(str(_input))
    input_len = torch.LongTensor([len(input)])  # [1]
    input = torch.LongTensor([config.ns.transform(input)])  # [1,max_len]

    with torch.no_grad():
        input = input.to(config.device)
        input_len = input_len.to(config.device)
        _, decoder_predict = model.evaluate(input, input_len)  # [batch_Size,max_len,vocab_size]
        # decoder_predict进行inverse_transform
        pred = [config.ns.inverse_transform(i) for i in decoder_predict]
        print(_input, "---->", pred[0])
예제 #15
0
def create_model(session, FLAGS):
    config = OrderedDict(sorted(FLAGS.__flags.items()))
    model = Seq2Seq(config, 'train')

    ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir)
    if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
        print("reloading model parameters...")
        model.restore(session, ckpt.model_checkpoint_path)
    else:
        if not os.path.exists(FLAGS.model_dir):
            os.makedirs(FLAGS.model_dir)
        print("create new model parameters...")
        session.run(tf.global_variables_initializer())
    return model
예제 #16
0
def create_model(gen_config):
    encoder = Encoder(gen_config.vocab_size,
                      gen_config.emb_dim,
                      gen_config.hidden_size,
                      n_layers=2,
                      dropout=0.5)
    decoder = Decoder(gen_config.emb_dim,
                      gen_config.hidden_size,
                      gen_config.vocab_size,
                      n_layers=1,
                      dropout=0.5)
    seq2seq = Seq2Seq(encoder, decoder).cuda()
    optimizer = optim.Adam(seq2seq.parameters(), lr=gen_config.lr)
    return seq2seq, optimizer
예제 #17
0
def main(**kwargs):
    vocab, vocab_rsd = default_build_vocab('./data/vocab.txt')
    vocab_size = len(vocab)
    print 'vocabulary size is %d' % vocab_size
    data = Seq2SeqIter(data_path=None, source_path='./data/a.txt',
                       target_path='./data/b.txt', vocab=vocab,
                       vocab_rsd=vocab_rsd, batch_size=10, max_len=25,
                       data_name='data', label_name='label', split_char='\n',
                       text2id=None, read_content=None, model_parallel=False)
    print 'training data size is %d' % data.size
    model = Seq2Seq(seq_len=25, batch_size=10, num_layers=1,
                    input_size=vocab_size, embed_size=150, hidden_size=150,
                    output_size=vocab_size, dropout=0.0, mx_ctx=CTX)
    model.train(dataset=data, epoch=5)
예제 #18
0
def main():
    X_indices, Y_indices, X_char2idx, Y_char2idx, X_idx2char, Y_idx2char = preprocess_data(
    )

    model = Seq2Seq(rnn_size=50,
                    n_layers=2,
                    encoder_embedding_dim=15,
                    decoder_embedding_dim=15,
                    X_word2idx=X_char2idx,
                    Y_word2idx=Y_char2idx)
    model.fit(X_indices, Y_indices)

    model.infer('common', X_idx2char, Y_idx2char)
    model.infer('apple', X_idx2char, Y_idx2char)
    model.infer('zhedong', X_idx2char, Y_idx2char)
def lstmmodel(neurons, layer, drop, batch_size, epochs, backday, n_outputs):
    x_train, y_train = [], []
    x_valid, y_valid = [], []
    x_test, y_test = [], []

    for i in range(backday, train_size - n_outputs):
        x_train.append(train[i - backday:i, :])
        y_train.append(train[i:i + n_outputs, 0])
    x_train, y_train = np.array(x_train), np.array(y_train)
    y_train = y_train.reshape((y_train.shape[0], y_train.shape[1], 1))

    for i in range(backday, valid_size - n_outputs):
        x_valid.append(valid[i - backday:i, :])
        y_valid.append(valid[i:i + n_outputs, 0])
    x_valid, y_valid = np.array(x_valid), np.array(y_valid)
    y_valid = y_valid.reshape((y_valid.shape[0], y_valid.shape[1], 1))

    for i in range(backday, test_size - n_outputs):
        x_test.append(test[i - backday:i, :])
        y_test.append(test[i:i + n_outputs, 0])
    x_test, y_test = np.array(x_test), np.array(y_test)
    y_test = y_test.reshape((y_test.shape[0], y_test.shape[1], 1))

    print(neurons, layer, drop, batch_size, epochs, backday, n_outputs)
    model = Seq2Seq(output_dim=1,
                    hidden_dim=neurons,
                    output_length=n_outputs,
                    input_shape=(x_train.shape[1], x_train.shape[2]),
                    peek=False,
                    depth=layer,
                    dropout=drop)
    model.compile(loss='mean_absolute_error',
                  optimizer='adam',
                  metrics=['mae'])
    early_stopping = EarlyStopping(monitor='mean_absolute_error', patience=10)
    model.fit(x_train,
              y_train,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(x_valid, y_valid),
              verbose=0,
              callbacks=[early_stopping])

    loss, accuracy = model.evaluate(x_test, y_test)
    print('Test Mean Absolute Error: %f using %f,%f,%f,%f,%f,%f' %
          (accuracy, neurons, layer, drop, batch_size, epochs, backday))

    return model, accuracy, [neurons, layer, drop, batch_size, epochs, backday]
예제 #20
0
def build_model(config, en_vocab_size, cn_vocab_size):
    # 建構模型
    encoder = Encoder(en_vocab_size, config.emb_dim, config.hid_dim,
                      config.n_layers, config.dropout)
    decoder = Decoder(cn_vocab_size, config.emb_dim, config.hid_dim,
                      config.n_layers, config.dropout, config.attention)
    model = Seq2Seq(encoder, decoder, device)
    print(model)
    # 建構 optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    print(optimizer)
    if config.load_model:
        model = load_model(model, config.load_model_path)
    model = model.to(device)

    return model, optimizer
예제 #21
0
    def __init__(self):
        self.du = data_unit.DataUnit(**data_config)
        self.save_path = os.path.join(BASE_MODEL_DIR, MODEL_NAME)
        self.batch_size = 1
        tf.reset_default_graph()
        self.model = Seq2Seq(batch_size=self.batch_size,
                             encoder_vocab_size=self.du.vocab_size,
                             decoder_vocab_size=self.du.vocab_size,
                             mode='decode',
                             **model_config)

        self.sess = tf.InteractiveSession()
        self.init = tf.global_variables_initializer()

        self.sess.run(self.init)
        self.model.load(self.sess, self.save_path)
예제 #22
0
def eval(args):
    batch_size = 32
    train_on_gpu = torch.cuda.is_available()

    enc = RNNEncoder(300, args.embedding_file)
    dec = RNNDecoder(300, args.embedding_file)

    device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')

    model = Seq2Seq(enc, dec, device).to(device)
    ckpt = torch.load(args.model_path)
    model.load_state_dict(ckpt['state_dict'])

    model.eval()

    embedding_matrix = pickle.load(open(args.embedding_file, 'rb'))
    tokenizer = Tokenizer(lower=True)
    tokenizer.set_vocab(embedding_matrix.vocab)
    eval_data = pickle.load(open(args.test_data_path, 'rb'))
    eval_loader = DataLoader(eval_data,
                             batch_size=batch_size,
                             num_workers=0,
                             shuffle=False,
                             collate_fn=eval_data.collate_fn)

    output_file = open(args.output_path, 'w')
    val_losses = []
    prediction = {}
    for batch in tqdm(eval_loader):
        pred = model(batch, 0)
        pred = torch.argmax(pred, dim=2)
        # batch, seq_len

        for i in range(len(pred)):
            prediction[batch['id'][i]] = tokenizer.decode(
                pred[i]).split('</s>')[0].split(' ', 1)[1]
    pred_output = [
        json.dumps({
            'id': key,
            'predict': value
        })
        for key, value in sorted(prediction.items(), key=lambda item: item[0])
    ]
    output_file.write('\n'.join(pred_output))
    output_file.write('\n')
    output_file.close()
예제 #23
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path', type=str, required=True)
    parser.add_argument('--train_percent', type=float, default=0.8)
    parser.add_argument('--ts_dim', type=int, default=3)
    parser.add_argument('--input_length', type=int, default=24 * 2 * 6)
    parser.add_argument('--output_length', type=int, default=24 * 1 * 6)
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--hidden_size', type=int, default=128)
    parser.add_argument('--num_epochs', type=int, default=30)
    parser.add_argument('--early_stopping', type=int, default=5)
    parser.add_argument('--checkpoint_path',
                        type=str,
                        default="../checkpoints/seq2seq.h5")

    config = parser.parse_args()

    model = Seq2Seq(hidden_size=config.hidden_size,
                    ts_dim=config.ts_dim,
                    input_length=config.input_length,
                    output_length=config.output_length,
                    teacher_forcing=True)

    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                gamma=0.9,
                                                step_size=1)

    time_series_list = joblib.load(os.path.join(config.data_path, 'ts.joblib'))

    trainer = Trainer(model=model,
                      time_series_list=time_series_list,
                      criterion=criterion,
                      optimizer=optimizer,
                      scheduler=scheduler,
                      batch_size=config.batch_size,
                      num_epochs=config.num_epochs,
                      early_stopping=config.early_stopping,
                      train_valid_split=config.train_percent,
                      checkpoint_path=config.checkpoint_path,
                      plot=False,
                      offet_for_plot=50)

    trainer.train()
예제 #24
0
def build_model(options):
    model = Seq2Seq.load(ImageCaptioning,
                         options.model_path,
                         tok_dir=options.tokenizer_path,
                         use_obj=options.obj)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    num_gpu = torch.cuda.device_count()
    generator = BeamDecoder(model,
                            beam_width=options.beam_width,
                            max_len_a=options.max_len_a,
                            max_len_b=options.max_len_b,
                            len_penalty_ratio=options.len_penalty_ratio)
    if options.fp16:
        generator = amp.initialize(generator, opt_level="O2")
    if num_gpu > 1:
        generator = DataParallelModel(generator)
    return generator, model.text_processor
예제 #25
0
    def build_graph(self, name="train"):
        graph = tf.Graph()
        tf_config = tf.ConfigProto()
        tf_config.gpu_options.allow_growth = True

        with graph.as_default(), tf.container(name):
            self.logger.info("Building {} graph...".format(name))
            model = Seq2Seq(self.data, self.config)
            sess = tf.Session(config=tf_config, graph=graph)
            sess.run(tf.global_variables_initializer())
            sess.run(tf.local_variables_initializer())
            sess.run(tf.tables_initializer())
            sess.run(model.data_iterator.initializer)
            if (self.config.checkpoint_dir) and (name == "train"):
                self.logger.info('Loading checkpoint from {}'.format(
                    self.checkpoint_dir))
                model.load(sess)
                self.global_step = model.global_step_tensor.eval(sess)
        return model, sess
예제 #26
0
def main(_):
    converter = TextConverter(filename=FLAGS.converter_name + '_converter.pkl')
    if os.path.isdir(FLAGS.checkpoint_path):
        FLAGS.checkpoint_path = tf.train.latest_checkpoint(
            FLAGS.checkpoint_path)

    model = Seq2Seq('sample',
                    converter.vocab_size,
                    lstm_size=FLAGS.lstm_size,
                    num_steps=FLAGS.num_steps,
                    num_layers=FLAGS.num_layers,
                    use_embedding=FLAGS.use_embedding,
                    embedding_size=FLAGS.embedding_size,
                    bidirectional=FLAGS.bidirectional,
                    beam_search=FLAGS.beam_search,
                    beam_width=FLAGS.beam_width)

    model.load(FLAGS.checkpoint_path)

    max_len = FLAGS.num_steps
    while True:
        inp = input('Input (Q to quit): ')
        if inp == 'Q':
            break
        else:
            inp = converter.sentence_to_idxs(inp)
            if (len(inp) > max_len):
                inp = inp[:max_len]
            else:
                inp = inp + [0 for i in range(max_len - len(inp))]
            if FLAGS.beam_search == True:
                decoder_outputs = model.sample(inp)
                predicted_ids = decoder_outputs.predicted_ids[0]
                parent_ids = decoder_outputs.parent_ids[0]
                sentences = converter.beam_to_sentences(
                    predicted_ids, parent_ids)
                for i, s in enumerate(sentences):
                    print('Output %d: %s' % (i, s))
            else:
                sample_id = model.sample(inp)
                output = converter.idxs_to_words(sample_id[0])
                print('Output: %s' % output)
            print('--------------------')
def predict():

    dictionary_input, rev_dictionary_input = read_vocab(args.vocab_file)
    dictionary_output, rev_dictionary_output = dictionary_input, rev_dictionary_input

    model = Seq2Seq(args.size_layer, args.num_layers, args.embedded_size,
                    len(dictionary_input), len(dictionary_output),
                    args.learning_rate, dictionary_input)

    with tf.Session() as sess:
        with tf.device("/cpu:0"):

            ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)
            if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
                tf.logging.info("restore model from patch: %s",
                                ckpt.model_checkpoint_path)  # 加载预训练模型
                saver = tf.train.Saver(max_to_keep=4)
                saver.restore(sess, ckpt.model_checkpoint_path)
            else:
                tf.logging.error("model path wrong !!")
                return

            while True:
                text = input("input your dream: ")
                input_test = [" ".join(list(text))]
                input_test = str_idx(input_test, dictionary_input,
                                     dictionary_input['UNK'])
                batch_x, _ = pad_sentence_batch(input_test,
                                                dictionary_input["PAD"])
                predicted2 = sess.run(model.predicting_ids,
                                      feed_dict={model.X: batch_x})
                for i in range(len(batch_x)):
                    print(
                        'dream:', ''.join([
                            rev_dictionary_input[n] for n in batch_x[i]
                            if n not in [0, 1, 2, 3]
                        ]))
                    print(
                        'dream decoding:', ''.join([
                            rev_dictionary_output[n] for n in predicted2[i]
                            if n not in [0, 1, 2, 3]
                        ]), '\n')
                print("*" * 20)
예제 #28
0
def test_run(data_path, model_path, n_show=10):
    """
    Test function.
    Input is training data.
    Output have to be the sentence which is correct data in training phase.
    :return:
    """

    corpus = ConvCorpus(file_path=None)
    corpus.load(load_dir=data_path)

    print('Vocabulary Size (number of words) :', len(corpus.dic.token2id))
    print('')

    # rebuild seq2seq model
    model = Seq2Seq(len(corpus.dic.token2id),
                    feature_num=args.feature_num,
                    hidden_num=args.hidden_num,
                    batch_size=1,
                    gpu_flg=args.gpu)
    serializers.load_hdf5(model_path, model)

    # run an interpreter
    for num, input_sentence in enumerate(corpus.posts):
        id_sequence = input_sentence.copy()
        input_sentence.reverse()
        input_sentence.insert(0, corpus.dic.token2id["<eos>"])

        model.initialize()  # initialize cell
        sentence = model.generate(input_sentence,
                                  sentence_limit=len(input_sentence) + 30,
                                  word2id=corpus.dic.token2id,
                                  id2word=corpus.dic)
        print("teacher : ",
              " ".join([corpus.dic[w_id] for w_id in id_sequence]))
        print("correct :",
              " ".join([corpus.dic[w_id] for w_id in corpus.cmnts[num]]))
        print("-> ", sentence)
        print('')

        if num == n_show:
            break
예제 #29
0
    def test_albert_seq2seq_init(self):
        path_dir_name = os.path.dirname(os.path.realpath(__file__))
        data_path = os.path.join(path_dir_name, "sample.txt")

        with tempfile.TemporaryDirectory() as tmpdirname:
            processor = TextProcessor()
            processor.train_tokenizer([data_path],
                                      vocab_size=1000,
                                      to_save_dir=tmpdirname,
                                      languages={
                                          "<en>": 0,
                                          "<fa>": 1
                                      })
            seq2seq = Seq2Seq(text_processor=processor)
            src_inputs = torch.tensor([[
                1, 2, 3, 4, 5,
                processor.pad_token_id(),
                processor.pad_token_id()
            ], [1, 2, 3, 4, 5, 6, processor.pad_token_id()]])
            tgt_inputs = torch.tensor(
                [[6, 8, 7,
                  processor.pad_token_id(),
                  processor.pad_token_id()],
                 [6, 8, 7, 8, processor.pad_token_id()]])
            src_mask = (src_inputs != processor.pad_token_id())
            tgt_mask = (tgt_inputs != processor.pad_token_id())
            src_langs = torch.tensor([[0], [0]]).squeeze()
            tgt_langs = torch.tensor([[1], [1]]).squeeze()
            seq_output = seq2seq(src_inputs,
                                 tgt_inputs,
                                 src_mask,
                                 tgt_mask,
                                 src_langs,
                                 tgt_langs,
                                 log_softmax=True)
            assert list(seq_output.size()) == [5, processor.vocab_size()]

            seq_output = seq2seq(src_inputs, tgt_inputs, src_mask, tgt_mask,
                                 src_langs, tgt_langs)
            assert list(seq_output.size()) == [5, processor.vocab_size()]
예제 #30
0
파일: train.py 프로젝트: elel12/kmh_project
def test(embedded, batch_size=100):
    print("예측 테스트")
    model = Seq2Seq(embedded.voca_size)

    with tf.Session() as sess:
        ckpt = tf.train.get_checkpoint_state("./model")
        model.saver.restore(sess, ckpt.model_checkpoint_path)
        enc_input, dec_input, targets = embedded.batch(batch_size)
        expect, outputs, accuracy = model.test(sess, enc_input, dec_input,
                                               targets)
        expect = embedded.decode(expect)
        outputs = embedded.decode(outputs)
        pick = random.randrange(0, len(expect) / 2)
        input = embedded.decode([embedded.test[pick * 2]], True)
        expect = embedded.decode([embedded.test[pick * 2 + 1]], True)
        outputs = embedded.cut_eos(outputs[pick])

        print("\n정확도:", accuracy)
        print("랜덤 결과\n")
        print("입력값:", input)
        print("실제값:", expect)
        print("예측값:", ' '.join(outputs))