Beispiel #1
0
 def post(self):
     args = predict_parser.parse_args()
     input_question = args.question
     input_file = "../Data/prediction_api_input"
     file = open(input_file, "w")
     file.write(input_question)
     file.close()
     return s2s.predict_seq2seq(input_file, '../Data/vocab_map',
                                '../model/seq2seq', 'API')
Beispiel #2
0
def input_process(id):

    while True:
        if states[id]:
            time.sleep(2)
        else:
            in_dict = reply_dict[id]
            in_seq = input_str[id]
            print(time.asctime() + ' 线程' + str(id) + '正在处理消息')
            output = rnn.predict(in_seq)
            data = {"group_id": in_dict['group_id'],
                    "message": "[CQ:at,qq=" + str(in_dict['sender']['user_id']) + "] " + output}
            requests.post(url, data=data)
            states[id] = 1
Beispiel #3
0
def main():
    print("STARTED \n")

    if len(sys.argv) > 1:
        mode = sys.argv[1]
    else:
        raise ValueError("Main mode option not provided")

    tf.logging._logger.setLevel(logging.INFO)
    if mode.upper() == "TRAINING":
        #FOR TRAINING
        print("Training Started")
        s2s.train_seq2seq('Data/final_question_file', 'Data/final_answer_file', 'Data/vocab_map', 'model/seq2seq')
    elif mode.upper() == "INFERENCE":
        if len(sys.argv) > 2:
            pass
        else:
            sys.argv.append("COMMAND")
        #FOR PREDICTION     ---  'file'/'command'
        infer_mode = sys.argv[2]
        if infer_mode.upper() == "FILE":
            print("Entered Inference File Mode")
            s2s.predict_seq2seq('Data/prediction_input','Data/vocab_map', 'model/seq2seq', 'FILE')
        elif infer_mode.upper() == "COMMAND":
            print("Entered Inference Command Mode")
            command_line_input = input("Question: ")
            ans = s2s.predict_seq2seq('Data/prediction_input','Data/vocab_map', 'model/seq2seq', 'COMMAND', None,
                                      command_line_input)
            print("\nQuestion: ", command_line_input)
            print("\nAnswer: ", ans.replace('<EOS>',''))
        else:
            raise ValueError("Correct Inference mode (FILE/COMMAND) was not supplied")
    elif mode.upper() == 'TESTING':
        s2s.predict_seq2seq('Data/testing_input_file', 'Data/vocab_map', 'model/seq2seq', 'TESTING',
                            'Data/testing_ref_file')
    else:
        raise ValueError("Correct Main mode (Training/Inference) was not supplied")

    print("\nFINISHED \n")
Beispiel #4
0
print("-----------------")
print("本软件完全免费")
print("代码遵循MIT协议")
print("-----------------")
print("")
mode = '0'
while True:
    print("-----------------")
    print("模式1:搭建模型")
    print("模式2:训练模型")
    print('模式3:开启Coolq接口')
    print("模式4:进行对话")
    print("-----------------")
    mode = input('输入工作模式:')
    if mode == '1':
        rnn.pre_precess()
        rnn.setup_model()
    elif mode == '2':
        epo = input('输入循环轮数:')
        bat = input('输入batch size:')
        rnn.train_model(bat, epo)
    elif mode == '3':
        t0 = threading.Thread(target=input_process, args=(0,), name='http_receive0')
        t0.start()
        http_receive()
    elif mode == '4':
        print('输入数字0 退出')
        while True:
            str_in = input('你说:')
            if str_in == '0':
                break
Beispiel #5
0
    sort_within_batch=True,
    repeat=False)

### Encoder
### Decoder
### Seq2Seq

INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)

enc = Model.Encoder(INPUT_DIM, Paras.ENC_EMB_DIM, Paras.HID_DIM,
                    Paras.N_LAYERS, Paras.ENC_DROPOUT)
dec = Model.Decoder(OUTPUT_DIM, Paras.DEC_EMB_DIM, Paras.HID_DIM,
                    Paras.N_LAYERS, Paras.DEC_DROPOUT)

model = Seq2Seq.Seq2Seq(enc, dec, device).to(device)

### init_weights

model.apply(Model.init_weights)

### count_parameters

print(f'The model has {Model.count_parameters(model):,} trainable parameters')

optimizer = optim.Adam(model.parameters())

# calculating loss,ignoring loss of padding token
PAD_IDX = TRG.vocab.stoi['<pad>']

criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--resume_ret', action='store_true')
    parser.add_argument('--fourthofdata', action='store_true')
    parser.add_argument('--halfdata', action='store_true')

    opt = parser.parse_args()

    ############################### RETRIEVER #################################

    ret_enc = RNNEncoder(ret_INPUT_DIM, ret_ENC_EMB_DIM, ret_HID_DIM,
                         ret_N_LAYERS, ret_ENC_DROPOUT)
    ret_dec = RNNDecoder(ret_OUTPUT_DIM, ret_DEC_EMB_DIM, ret_HID_DIM,
                         ret_N_LAYERS, ret_DEC_DROPOUT)

    ret_model = Seq2Seq(ret_enc, ret_dec, cuda_device).to(cuda_device)

    print('The model has {0:9d} trainable parameters'.format(
        count_parameters(ret_model)))

    ret_optimizer = optim.Adam(ret_model.parameters())
    ret_criterion = nn.CrossEntropyLoss()

    if not os.path.isdir('models'):
        os.makedirs('models')

    if opt.resume_ret:
        with open("results/" + "ret" + "_data.pickle", "rb") as k:
            data = pickle.load(k)

        train_data = data["train"]
        valid_data = data["valid"]
        test_data = data["test"]
        train_data_adj = data["train_adj"]
        valid_data_adj = data["valid_adj"]
        test_data_adj = data["test_adj"]

        print("valid data", valid_data)

        MODEL_SAVE_PATH = os.path.join(SAVE_DIR, "ret" + '_model.pt')
        ret_model.load_state_dict(torch.load(MODEL_SAVE_PATH))

        with open("results/" + "ret" + "_latent_space_vect.pickle", "rb") as j:
            latent_space_vects = pickle.load(j)
            enc_train_vect = latent_space_vects["train"]
            enc_valid_vect = latent_space_vects["valid"]
    else:
        train_data, valid_data, test_data, train_data_adj, valid_data_adj, test_data_adj = split_data(
            opt)
        data = {}
        data["train"] = train_data
        data["valid"] = valid_data
        data["test"] = test_data
        data["train_adj"] = train_data_adj
        data["valid_adj"] = valid_data_adj
        data["test_adj"] = test_data_adj

        with open("results/" + "ret" + "_data.pickle", "wb") as k:
            pickle.dump(data, k)
        enc_train_vect, enc_valid_vect = train_valid_model(
            filename="ret",
            which_train=ret_train,
            which_evaluate=ret_evaluate,
            model=ret_model,
            train_data=train_data,
            valid_data=valid_data,
            train_data_adj=train_data_adj,
            valid_data_adj=valid_data_adj,
            optimizer=ret_optimizer,
            criterion=ret_criterion)

    enc_test_vect = test_model(filename="ret",
                               which_evaluate=ret_evaluate,
                               model=ret_model,
                               test_data=test_data,
                               test_data_adj=test_data_adj,
                               criterion=ret_criterion)

    ######################## NEAREST NEIGHBOUR #################################

    train_ann = create_annoy_index("AttnEncAttnDecTrain", enc_train_vect)
    valid_ann = create_annoy_index("AttnEncAttnDecValid", enc_valid_vect)
    test_ann = create_annoy_index("AttnEncAttnDecTest", enc_test_vect)

    wordlist2comment_dict = pickle.load(open("wordlist2comment.pickle", "rb"))
    word2idcommentvocab_dict = pickle.load(
        open("word2idcommentvocab.pickle", "rb"))

    sim_train_data = torch.zeros_like(train_data)
    sim_valid_data = torch.zeros_like(valid_data)
    sim_test_data = torch.zeros_like(test_data)

    for training_sample_id in range(train_data.shape[0]):
        training_sample_comment = train_data[
            training_sample_id][:max_comment_len]
        training_sample_code = train_data[training_sample_id][max_comment_len +
                                                              1:]

        annoy_vect = train_ann.get_item_vector(training_sample_id)
        sim_vect_id = train_ann.get_nns_by_vector(annoy_vect, 1)

        if sim_vect_id == training_sample_id:
            print("Same id for training vect and similar vect")
            exit(0)

        sim_train_data[training_sample_id] = train_data[sim_vect_id]

    new_train_data = torch.cat((train_data, sim_train_data), dim=1)
    #print("new_train_data ", new_train_data.shape)

    for valid_sample_id in range(valid_data.shape[0]):
        valid_sample_comment = valid_data[valid_sample_id][:max_comment_len]
        valid_sample_code = valid_data[valid_sample_id][max_comment_len + 1:]

        annoy_vect = valid_ann.get_item_vector(valid_sample_id)
        sim_vect_id = train_ann.get_nns_by_vector(annoy_vect, 1)

        if sim_vect_id == valid_sample_id:
            print("Same id for training vect and similar vect")
            exit(0)

        sim_valid_data[valid_sample_id] = train_data[sim_vect_id]

    new_valid_data = torch.cat((valid_data, sim_valid_data), dim=1)

    for test_sample_id in range(test_data.shape[0]):
        test_sample_comment = test_data[test_sample_id][:max_comment_len]
        test_sample_code = test_data[test_sample_id][max_comment_len + 1:]

        annoy_vect = test_ann.get_item_vector(test_sample_id)
        sim_vect_id = train_ann.get_nns_by_vector(annoy_vect, 1)

        if sim_vect_id == test_sample_id:
            print("Same id for training vect and similar vect")
            exit(0)

        sim_test_data[test_sample_id] = train_data[sim_vect_id]

    new_test_data = torch.cat((test_data, sim_test_data), dim=1)

    ############################### TSNE #################################

    #tsne_test_sample = enc_test_vect[0]
    num_tsne_train_data = 100
    which_tsne_test_sample = random.randint(0, enc_test_vect.shape[0])

    annoy_tsne_test_vect = test_ann.get_item_vector(which_tsne_test_sample)
    tsne_data = enc_train_vect[:num_tsne_train_data]
    tsne_data_add = torch.zeros(11, enc_test_vect.shape[1], device=cuda_device)
    tsne_data_add[0] = enc_test_vect[which_tsne_test_sample]

    nr = 1
    for id in train_ann.get_nns_by_vector(annoy_tsne_test_vect, 10):
        tsne_data_add[nr] = enc_train_vect[id]
        nr += 1

    tsne_data = torch.cat((tsne_data, tsne_data_add), dim=0)

    colour_labels = []
    for i in range(num_tsne_train_data):
        colour_labels += ["#0099cc"]  #train
    colour_labels += ["#e60b42"]  #test
    for i in range(10):
        colour_labels += ["#f09a00"]  #nearest neighbours

    vis_tsne(data=tsne_data, labels=colour_labels, name="10nearest")

    ############################### EDITOR #################################

    ed_enc = GraphCondAttnEncoder(src_vocab_size, trg_vocab_size, ed_hid_dim,
                                  ed_n_layers, ed_n_heads, ed_pf_dim,
                                  AttnEncoderLayer, SelfAttention,
                                  PositionwiseFeedforward, ed_dropout,
                                  cuda_device)
    ed_dec = AttnDecoder(ed_output_dim, ed_hid_dim, ed_n_layers, ed_n_heads,
                         ed_pf_dim, AttnDecoderLayer, SelfAttention,
                         PositionwiseFeedforward, ed_dropout, cuda_device)

    ed_pad_idx = 0
    ed_model = Editor(ed_enc, ed_dec, ed_pad_idx, cuda_device).to(cuda_device)

    for p in ed_model.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform_(p)

    print('The model has {0:9d} trainable parameters'.format(
        count_parameters(ed_model)))

    ed_optimizer = optim.Adam(ed_model.parameters())
    ed_criterion = nn.CrossEntropyLoss()

    output_train_vect, output_valid_vect_candidates = train_valid_model(
        filename="ed",
        which_train=ed_train,
        which_evaluate=ed_evaluate,
        model=ed_model,
        train_data=new_train_data,
        valid_data=new_valid_data,
        train_data_adj=train_data_adj,
        valid_data_adj=valid_data_adj,
        optimizer=ed_optimizer,
        criterion=ed_criterion)
    #print("Test model")
    output_test_vect_candidates = test_model(filename="ed",
                                             which_evaluate=ed_evaluate,
                                             model=ed_model,
                                             test_data=new_test_data,
                                             test_data_adj=test_data_adj,
                                             criterion=ed_criterion)
    output_test_vect_reference = test_data[:, max_comment_len:]

    token_dict = pickle.load(open("codevocab.pickle", "rb"))

    all_refs = []
    all_cands = []
    all_bleu_scores = []
    for j in range(test_data.shape[0]):
        ref = []
        cand = []
        for i in range(max_code_len):
            ref_el = output_test_vect_reference[j][i].item()
            cand_el = output_test_vect_candidates[j][i].item()
            if ref_el > 0:
                if ref_el in token_dict:
                    ref += [token_dict[ref_el]]
                if cand_el in token_dict:
                    cand += [token_dict[cand_el]]
        bleu = sentence_bleu([ref], cand)
        all_bleu_scores += [bleu]
        all_refs += [ref]
        all_cands += [cand]

    bleu_eval = {}
    bleu_eval["scores"] = all_bleu_scores
    bleu_eval["references"] = all_refs
    bleu_eval["candidates"] = all_cands

    print("Average BLEU score is ",
          sum(all_bleu_scores) / len(all_bleu_scores))
    pickle.dump(bleu_eval, open("results/bleu_evaluation_results.pickle",
                                "wb"))
Beispiel #7
0
def train():
    parser = ArgumentParser()
    parser.add_argument(
        "--dataset_path",
        type=str,
        default="../data/time_transfor/Time Dataset.json",
        help="Path or url of the dataset. If empty download from S3.")
    parser.add_argument("--dataset_cache",
                        type=str,
                        default='../cache/',
                        help="Path or url of the dataset cache")
    parser.add_argument("--batch_size",
                        type=int,
                        default=32,
                        help="Batch size for validation")
    parser.add_argument("--embedding_dim",
                        type=int,
                        default=100,
                        help="Batch size for validation")
    parser.add_argument("--hidden_dim",
                        type=int,
                        default=100,
                        help="Batch size for validation")
    parser.add_argument("--gradient_accumulation_steps",
                        type=int,
                        default=1,
                        help="Accumulate gradients on several steps")
    parser.add_argument("--lr",
                        type=float,
                        default=6.25e-5,
                        help="Learning rate")
    parser.add_argument("--train_precent",
                        type=float,
                        default=0.7,
                        help="Batch size for validation")
    parser.add_argument("--n_epochs",
                        type=int,
                        default=30,
                        help="Number of training epochs")
    parser.add_argument("--device",
                        type=str,
                        default="cuda" if torch.cuda.is_available() else "cpu",
                        help="Device (cuda or cpu)")
    parser.add_argument("--max_norm",
                        type=float,
                        default=1.0,
                        help="Clipping gradient norm")
    parser.add_argument("--log_step",
                        type=int,
                        default=10,
                        help="Multiple-choice loss coefficient")
    parser.add_argument("--raw_data",
                        action='store_true',
                        default=True,
                        help="If true read data by raw function")
    args = parser.parse_args()
    device = torch.device(args.device)

    train_data_loader, valid_data_loader, input_lengths, target_lengths = get_data_loaders(
        args.dataset_path, args.batch_size, args.train_precent)

    encoder = Encoder(input_lengths + 1, args.embedding_dim, args.hidden_dim)
    decoder = Decoder(target_lengths + 1, args.embedding_dim, args.hidden_dim)
    model = Seq2Seq(encoder, decoder, device).to(device)
    optimizer = optim.Adam(model.parameters())
    criterion = nn.NLLLoss(ignore_index=0).to(device)

    def update(engine, batch):
        model.train()
        src_seqs = batch[0].transpose(0, 1).to(device)
        src_lengths = batch[1].to(device)
        trg_seqs = batch[2].transpose(0, 1).to(device)
        output = model(src_seqs, src_lengths, trg_seqs)
        loss = criterion(output.contiguous().view(-1, output.shape[2]),
                         trg_seqs.contiguous().view(-1))
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_norm)

        if engine.state.iteration % args.gradient_accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
        return loss.item()

    trainer = Engine(update)

    def inference(engine, batch):
        model.eval()
        with torch.no_grad():
            src_seqs = batch[0].transpose(0, 1).to(device)
            src_lengths = batch[1].to(device)
            trg_seqs = batch[2].transpose(0, 1).to(device)
            output = model(src_seqs, src_lengths, trg_seqs)
            return output.contiguous().view(
                -1, output.shape[2]), trg_seqs.contiguous().view(-1)

    evaluator = Engine(inference)
    metrics = {
        "nll": Loss(criterion, output_transform=lambda x: (x[0], x[1])),
        "accuracy": Accuracy(output_transform=lambda x: (x[0], x[1]))
    }
    for name, metric in metrics.items():
        metric.attach(evaluator, name)
    Loss(criterion, output_transform=lambda x: (x[0], x[1]))

    @trainer.on(Events.EPOCH_COMPLETED)
    def log_validation_results(trainer):
        evaluator.run(valid_data_loader)
        ms = evaluator.state.metrics
        logger.info(
            "Validation Results - Epoch: [{}/{}]  Avg accuracy: {:.6f} Avg loss: {:.6f}"
            .format(trainer.state.epoch, trainer.state.max_epochs,
                    ms['accuracy'], ms['nll']))

    '''======================early stopping =========================='''

    def score_function(engine):
        val_loss = engine.state.metrics['nll']
        return -val_loss

    handler = EarlyStopping(patience=5,
                            score_function=score_function,
                            trainer=trainer)
    evaluator.add_event_handler(Events.COMPLETED, handler)
    '''==================print information by iterator========================='''
    steps = len(train_data_loader.dataset) // train_data_loader.batch_size
    steps = steps if steps > 0 else 1
    logger.info('steps:%d' % steps)

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_training_loss(trainer):
        if trainer.state.iteration % args.log_step == 0:
            logger.info("Epoch[{}/{}] Step[{}/{}] Loss: {:.6f}".format(
                trainer.state.epoch, trainer.state.max_epochs,
                trainer.state.iteration % steps, steps,
                trainer.state.output * args.gradient_accumulation_steps))

    '''================add check point========================'''
    checkpoint_handler = ModelCheckpoint(checkpoint_dir,
                                         'checkpoint',
                                         save_interval=1,
                                         n_saved=3)
    trainer.add_event_handler(
        Events.EPOCH_COMPLETED, checkpoint_handler,
        {'mymodel': getattr(model, 'module', model)
         })  # "getattr" take care of distributed encapsulation
    '''==============run trainer============================='''
    trainer.run(train_data_loader, max_epochs=args.n_epochs)
Beispiel #8
0
def main(N_EPOCHS, learning_rate, batch_size, device, save_dir):

    dataset = util.load_jsonl(train_path)
    dataset = Dataset.Dataset(dataset)

    train_length = int(len(dataset) * 0.8)
    valid_length = len(dataset) - train_length
    train_set, val_set = torch.utils.data.random_split(
        dataset, (train_length, valid_length))
    # default batch size 보다 작은 부분 남았을 경우 check
    #train_set, val_set,_ = torch.utils.data.random_split(dataset, (32, 32,len(dataset)-64))

    test_set = util.load_jsonl(test_path)
    test_set = Dataset.Dataset(test_set)
    # 생성된 문장 차원 check
    #test_set,_ = torch.utils.data.random_split(test_set, (2,len(test_set)-2))

    train_dataloader = torch.utils.data.DataLoader(train_set,
                                                   batch_size=batch_size,
                                                   shuffle=True)
    val_dataloader = torch.utils.data.DataLoader(
        val_set, batch_size=batch_size)  # shuffle=True

    test_dataloader = torch.utils.data.DataLoader(test_set,
                                                  batch_size=1)  # 한줄씩 생성요약

    # input dim, output dim 변경
    INPUT_DIM = 49990  #80000
    OUTPUT_DIM = 49990  #80000
    ENC_EMB_DIM = 32
    DEC_EMB_DIM = 32
    HID_DIM = 512
    N_LAYERS = 2
    ENC_DROPOUT = 0.5
    DEC_DROPOUT = 0.5

    vocab = Vocab.Vocab()

    enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT,
                  device)
    dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

    model = Seq2Seq(enc, dec, device).to(device)
    model.apply(init_weights)

    torch.autograd.set_detect_anomaly(True)

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss(ignore_index=0).to(device)  # padding

    #N_EPOCHS = 1
    CLIP = 1

    best_valid_loss = 100  #float('inf')
    '''
    for epoch in range(N_EPOCHS):

        #start_time = time.time()

        train_loss = train(model, train_dataloader, optimizer, criterion, CLIP, vocab, device)
        valid_loss = evaluate(model, val_dataloader, criterion, vocab, device) # , valid_sents

        #end_time = time.time()
        #epoch_mins, epoch_secs = epoch_time(start_time, end_time)

        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            torch.save(model, f'{save_dir}/seq2seq.pt')

        #print(f'Epoch: {epoch+1:2} | Time: {epoch_mins}m {epoch_secs}s')
        print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
        print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')
    '''
    model = torch.load(f'{save_dir}/seq2seq.pt')
    test_sents = test(model, test_dataloader, criterion, vocab, device)

    save_csv(out_path, test_sents)
Beispiel #9
0
NB_HIDDEN = 250
BATCH_SIZE = 50
NB_STEP = 15
NB_FEATURES = 26 # Alphabet
LEARNING_RATE = 1e-2
STOP_TRESHOLD = 1.
LOG_DIR = "logs/" + str(LEARNING_RATE) + "_learning_rate"

# Random one hot batch generator tensor
random_one_hot_batch_generator = tf.one_hot(tf.random_uniform([NB_STEP], minval=0, \
                                                              maxval=NB_FEATURES - 1, \
                                                              dtype=tf.int32), NB_FEATURES)

if __name__ == "__main__":
    seq2seq = Seq2Seq(NB_FEATURES, NB_HIDDEN, NB_HIDDEN, LEARNING_RATE)
    init_global = tf.global_variables_initializer()
    init_local = tf.local_variables_initializer()

    loss_placeholder = tf.placeholder(tf.float32, [])
    loss_summary = tf.summary.scalar("Loss", loss_placeholder)
    accuracy_placeholder = tf.placeholder(tf.float32, [])
    accuracy_summary = tf.summary.scalar("Accuracy", accuracy_placeholder)
    summary_writer = tf.summary.FileWriter(LOG_DIR, tf.get_default_graph())

    with tf.Session() as sess:
        sess.run([init_global, init_local])

        accuracy = 0.0
        epoch = 0
        while accuracy < STOP_TRESHOLD:
def test():
    parser = ArgumentParser()
    parser.add_argument(
        "--dataset_path",
        type=str,
        default="../data/Time Dataset.json",
        help="Path or url of the dataset. If empty download from S3.")
    parser.add_argument(
        "--check_point",
        type=str,
        default='../checkpoint/Oct30_21-01-28/checkpoint_mymodel_8.pth',
        help="Path or url of the dataset cache")
    parser.add_argument("--batch_size",
                        type=int,
                        default=100,
                        help="Batch size for validation")
    parser.add_argument("--embedding_dim",
                        type=int,
                        default=100,
                        help="Batch size for validation")
    parser.add_argument("--hidden_dim",
                        type=int,
                        default=100,
                        help="Batch size for validation")
    parser.add_argument("--device",
                        type=str,
                        default="cuda" if torch.cuda.is_available() else "cpu",
                        help="Device (cuda or cpu)")
    parser.add_argument("--train_precent",
                        type=float,
                        default=0.7,
                        help="Batch size for validation")
    args = parser.parse_args()
    device = torch.device(args.device)

    current_time = datetime.now().strftime('%b%d_%H-%M-%S')
    logdir = os.path.join('../logs', current_time + '_' + socket.gethostname())

    train_data_loader, valid_data_loader, input_lengths, target_lengths = get_data_loaders(
        args.dataset_path, args.batch_size, args.train_precent)

    encoder = Encoder(input_lengths + 1, args.embedding_dim, args.hidden_dim)
    decoder = Decoder(target_lengths + 1, args.embedding_dim, args.hidden_dim)
    model = Seq2Seq(encoder, decoder, device).to(device)

    check_point = torch.load(args.check_point)
    model.load_state_dict(check_point)
    model.eval()

    pairs = json.load(open('../data/Time Dataset.json', 'rt',
                           encoding='utf-8'))
    data = array(pairs)
    src_texts = data[:, 0]
    trg_texts = data[:, 1]
    src_c2ix, src_ix2c = build_vocab(src_texts)
    trg_c2ix, trg_ix2c = build_vocab(trg_texts)

    def get_decode(src):
        result = []
        for t in src:
            result.append(src_ix2c[t])
        sndx = 0
        if '^' in result:
            sndx = result.index('^') + 1
        endx = result.index('$')
        return ''.join(result[sndx:endx])

    def get_decode_target(target):
        result = []
        for t in target:
            result.append(trg_ix2c[int(t)])
        sndx = 0
        if '^' == result[0]:
            sndx = result.index('^') + 1
        endx = result.index('$')
        return ''.join(result[sndx:endx])

    max_src_len = max(list(map(len, src_texts))) + 2
    max_trg_len = max(list(map(len, trg_texts))) + 2
    max_src_len, max_trg_len

    for batch in valid_data_loader:
        src_seqs = batch[0].transpose(0, 1).to(device)
        src_lengths = batch[1].to(device)
        trg_seqs = batch[2].transpose(0, 1).to(device)
        outputs, attn_weights = model.predict(src_seqs=src_seqs,
                                              src_lengths=src_lengths)
        # print(outputs.cpu().detach().numpy())
        outputs_index = torch.argmax(outputs.cpu(), dim=2)
        outputs_index_mat = outputs_index.permute(1, 0)

        for i in range(outputs_index_mat.shape[0]):
            print('src:    \t',
                  get_decode(src_seqs.cpu().permute(1, 0)[i].numpy()))
            print('target :\t',
                  get_decode_target(trg_seqs.cpu().permute(1, 0)[i].numpy()))
            print('predict:\t',
                  get_decode_target(outputs_index_mat[i].detach().numpy()[1:]))
            print('=' * 64)
Beispiel #11
0
def main():
    parser = argparse.ArgumentParser(
        description='process user given parameters')
    parser.register('type', 'bool', str2bool)
    parser.add_argument('--random_seed', type=float, default=42)
    parser.add_argument('--teacher_forcing_ratio', type=float, default=0.5)
    parser.add_argument('--max_decode_len', type=int, default=100)
    parser.add_argument('--att_method', type=str, default='concat')
    parser.add_argument('--seq_model', type=str, default='simple')

    # I/O parameters
    parser.add_argument('--train_dir', type=str, default='./eng-fra.txt')
    parser.add_argument(
        '--word_embed_file',
        type=str,
        default=
        '/home/wang.9215/medical_phrases/pretrain_embeddings/glove.6B.300d.txt'
    )
    parser.add_argument('--save_best',
                        type='bool',
                        default=False,
                        help='save model in the best epoch or not')
    parser.add_argument('--save_dir',
                        type=str,
                        default='./saved_models',
                        help='save model in the best epoch or not')
    parser.add_argument('--save_interval',
                        type=int,
                        default=5,
                        help='intervals for saving models')

    # model parameters
    parser.add_argument('--use_pretrain_embed', type='bool', default=False)
    parser.add_argument('--word_dim', type=int, default=256)
    parser.add_argument('--lstm_hidden_dim', type=int, default=256)

    # optim parameters
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--num_epochs',
                        type=int,
                        default=1000,
                        help="number of epochs for training")
    parser.add_argument('--log_interval',
                        type=int,
                        default=100,
                        help='step interval for log')
    parser.add_argument('--test_interval',
                        type=int,
                        default=10,
                        help='epoch interval for testing')
    parser.add_argument('--early_stop_epochs', type=int, default=10)
    parser.add_argument('--learning_rate', type=float, default=0.0001)
    parser.add_argument('--min_epochs',
                        type=int,
                        default=50,
                        help='minimum number of epochs')
    parser.add_argument('--clip_grad', type=float, default=5.0)
    parser.add_argument('--lr_decay',
                        type=float,
                        default=0.05,
                        help='decay ratio of learning rate')
    parser.add_argument('--metric', type=str, default='map', help='mrr or map')
    parser.add_argument('--dropout', type=float, default=0.5)

    args = parser.parse_args()
    print('args: ', args)

    print('********Key parameters:******')
    print('Use GPU? {0}'.format(torch.cuda.is_available()))
    # print('Model Parameters: ')
    print('*****************************')

    np.random.seed(args.random_seed)
    torch.manual_seed(args.random_seed)

    # load the original data
    data, src_vocab, tgt_vocab = loader.txt_loader(args.train_dir)

    _, src_word2id, src_id2word = loader.raw_word_mapping(src_vocab)
    args.src_vocab_size = len(src_word2id) + 1
    args.src_word2id = src_word2id
    args.src_id2word = src_id2word

    _, tgt_word2id, tgt_id2word = loader.raw_word_mapping(tgt_vocab)
    args.tgt_vocab_size = len(tgt_word2id) + 1
    args.tgt_word2id = tgt_word2id
    args.tgt_id2word = tgt_id2word

    np.random.shuffle(data)
    train_ratio = 0.8
    train_data = data[:int(len(data) * train_ratio)]
    test_data = data[int(len(data) * train_ratio)::]
    # convert original data to digits
    train_idx_data = loader.make_idx_data(train_data, args)
    test_idx_data = loader.make_idx_data(test_data, args)

    print(train_idx_data[0])

    # global parameters
    args.cuda = torch.cuda.is_available()
    args.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # build model, initialize parameters
    # model = Seq2Seq.SimpleSeq2Seq(args).to(args.device)
    # model = Seq2Seq.ContextSeq2Seq(args).to(args.device)
    # model = Seq2Seq.BahdanauAttSeq2Seq(args).to(args.device)
    model = Seq2Seq.LuongAttSeq2Seq(args).to(args.device)
    # if args.cuda:
    #     model = model.cuda()
    print(model)
    # print([name for name, p in model.named_parameters()])

    criterion = nn.CrossEntropyLoss(ignore_index=config.PAD_WORD)
    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)

    train_loss = 0
    train_logits = []
    train_labels = []
    num_batches = len(train_idx_data) // args.batch_size
    print('Begin trainning...')
    for epoch in range(args.num_epochs):
        model.train()
        steps = 0
        np.random.shuffle(train_idx_data)
        for i in range(num_batches):
            train_batch = train_idx_data[i * args.batch_size:(i + 1) *
                                         args.batch_size]
            if i == num_batches - 1:
                train_batch = train_idx_data[i * args.batch_size::]

            list_xs, labels = loader.batch_processing(train_batch, args)

            labels = torch.tensor(labels, dtype=torch.long,
                                  device=args.device)  # (B * L)
            list_xs = [torch.tensor(x, device=args.device) for x in list_xs]
            # if args.cuda:
            #     labels = labels.cuda()
            #     list_xs = [x.cuda() for x in list_xs]

            optimizer.zero_grad()

            logits = model(list_xs, args.teacher_forcing_ratio)  # (B * L) * V
            # loss = utils.masked_cross_entropy(logits, labels)
            loss = criterion(logits.reshape(-1, args.tgt_vocab_size),
                             labels.reshape(-1))
            train_loss += loss

            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad)
            optimizer.step()

            if args.cuda:
                logits = logits.to('cpu').detach().data.numpy()
                labels = labels.to('cpu').detach().data.numpy()
                # logits = logits.cpu().detach().data.numpy()
                # labels = labels.cpu().detach().data.numpy()
            else:
                logits = logits.detach().data.numpy()
                labels = labels.detach().data.numpy()

            # print(logits.shape)
            train_logits.append(logits)
            train_labels.append(labels)

            # evaluation
            steps += 1
            if steps % args.log_interval == 0:
                # cur_logits = np.concatenate(train_logits, axis=0)
                # cur_labels = np.concatenate(train_labels, axis=0)
                cur_train_bleu = utils.cal_bleu_score(train_logits,
                                                      train_labels,
                                                      config.EOS_WORD)
                print(
                    "Epoch-{0}, steps-{1}: Train Loss - {2:.5}, Train BLEU - {3:.5}"
                    .format(epoch, steps, train_loss / len(train_batch),
                            cur_train_bleu))

                train_loss = 0
                train_logits = []
                train_labels = []

        # utils.adjust_learning_rate(optimizer, args.learning_rate / (1 + (epoch + 1) * args.lr_decay))

        if epoch == 0: continue

        if epoch % args.test_interval == 0:
            test_bleu = train_utils.dataset_eval(model, test_idx_data, args)
            print("Epoch-{0}: Test BLEU: {1:.5}".format(epoch, test_bleu))

            # test_case = np.random.choice(test_idx_data)
            idx = np.random.randint(0, len(test_idx_data))
            print(' '.join(test_data[idx][0]))
            print(' '.join(test_data[idx][1]))
            decode_ids, decode_words = model.seq_decoding(test_data[idx][0])
            # decode_words = train_utils.instance_eval(model, test_data[idx][0], test_data[idx][1], args, seq_decode=True)
            print(decode_words)

        # if epoch % args.test_interval == 0:
        #     # if args.logging:
        #     #     args.log_name = '../logs/deep_predition_val_logs_{0}.txt'.format(epoch)
        #     all_dev = train_utils.evaluation(dev_idx, model, criterion, args)
        #     print("Epoch-{0}: All Dev {1}: {2:.5}".format(epoch, args.metric.upper(), all_dev))
        #
        #     if all_dev > best_on_dev:
        #         print(datetime.now().strftime("%m/%d/%Y %X"))
        #         best_on_dev = all_dev
        #         last_epoch = epoch
        #
        #     all_iv = train_utils.evaluation(iv_test_idx, model, criterion, args)
        #     print("--- Testing: All IV Test {0}: {1:.5}".format(args.metric.upper(), all_iv))
        #
        #     if args.save_best:
        #         utils.save(model, args.save_dir, 'best', epoch)
        #
        #     else:
        #         if epoch - last_epoch > args.early_stop_epochs and epoch > args.min_epochs:
        #             print('Early stop at {0} epoch.'.format(epoch))
        #             break
        #
        # elif epoch % args.save_interval == 0:
        #     utils.save(model, args.save_dir, 'snapshot', epoch)

    return
Beispiel #12
0
enc_h_dim = 512
dec_h_dim = 512
e_dim = 256
dropout = 0.5

if att == True:
    attention = Seq2SeqBA.Attention(enc_h_dim, dec_h_dim)
    encoder = Seq2SeqBA.Encoder(i_dim, e_dim, enc_h_dim, dec_h_dim, dropout)
    decoder = Seq2SeqBA.Decoder(o_dim, e_dim, enc_h_dim, dec_h_dim, dropout,
                                attention)
    model = Seq2SeqBA.BahdanauS2S(encoder, decoder, device).to(device)
    model_name = "S2SBA.pt"
else:
    encoder = Seq2Seq.Encoder(i_dim,
                              e_dim,
                              enc_h_dim,
                              n_layers=2,
                              dropout=dropout)
    decoder = Seq2Seq.Decoder(o_dim,
                              e_dim,
                              dec_h_dim,
                              n_layers=2,
                              dropout=dropout)
    model = Seq2Seq.Seq2Seq(encoder, decoder, device).to(device)
    model_name = "S2S.pt"

print("Initialize weights")
model.apply(initialize_weights)

optimizer = optim.Adam(model.parameters(), lr=lr)
target_pad_idx = en_field.vocab.stoi[en_field.pad_token]
abb_losses = []  #keeps track of loss values for ABB words
training_losses = []  #keeps track of loss on training data each run
make_ABB = lambda x: x[:1] + [1.0] + x[1 + 1:]  #Adds semantic information
make_ABA = lambda x: x[:1] + [-1.0] + x[1 + 1:]  #Adds semantic information

for rep in range(REPS):
    print("Rep: " + str(rep))

    #Erase the previous model:
    keras.backend.clear_session()

    #Build the new model:
    model = Seq2Seq.seq2seq(input_dim=FEAT_NUM,
                            hidden_dim=FEAT_NUM,
                            output_length=2,
                            output_dim=FEAT_NUM,
                            batch_size=1,
                            learn_rate=0.001,
                            layer_type="lstm",
                            dropout=DROPOUT)

    #PRETRAINING
    if VOCAB_SIZE > 0:
        print("Simulating real-life experience of infants...Rep=" + str(rep))
        irl_X = []
        irl_Y = []
        for word in range(VOCAB_SIZE):
            ##Putting reduplication in training:
            if np.random.rand() < REDUP_IN_PT:
                syll_alpha = choice(all_sylls)
                template = choice(["ABB", "ABA"])
                if template == "ABB":
Beispiel #14
0
        #Shuffle training data:
        indexes = list(range(len(ordered_X)))
        shuffle(indexes)
        X = np.array([ordered_X[i] for i in indexes])
        Y = np.array([ordered_Y[i] for i in indexes])

        #Create the model object:
        #model = Seq2Seq(input_dim=FEAT_NUM, hidden_dim=FEAT_NUM*3, output_length=3, output_dim=FEAT_NUM, depth=2)

        #Build the new model:
        model = Seq2Seq.seq2seq(
            input_dim=FEAT_NUM,
            hidden_dim=FEAT_NUM * 3,
            output_length=3,
            output_dim=FEAT_NUM,
            batch_size=1,
            learn_rate=0.005,
            layer_type="lstm",
        )

        this_curve = []
        for ep in range(EPOCHS):
            #Train the model one epoch at a time,
            #so we can give it a forced-choice task at each step:
            print("Epoch: " + str(ep))
            hist = model.train(X, Y, epoch_num=1, print_every=2)
            this_curve.append(hist["Loss"][-1])
            for trial_type in accuracies.keys():
                corr_loss = np.square(
                    np.subtract(
Beispiel #15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--resume_ret', action='store_true')
    parser.add_argument('--fourthofdata', action='store_true')
    parser.add_argument('--halfdata', action='store_true')
    parser.add_argument('--threefourthsofdata', action='store_true')
    opt = parser.parse_args()

    train_data, valid_data, test_data = split_data(opt)

    ret_enc = RNNEncoder(ret_INPUT_DIM, ret_ENC_EMB_DIM, ret_HID_DIM,
                         ret_N_LAYERS, ret_ENC_DROPOUT)
    ret_dec = RNNDecoder(ret_OUTPUT_DIM, ret_DEC_EMB_DIM, ret_HID_DIM,
                         ret_N_LAYERS, ret_DEC_DROPOUT)

    model = Seq2Seq(ret_enc, ret_dec, cuda_device).to(cuda_device)

    print('The model has {0:9d} trainable parameters'.format(
        count_parameters(model)))

    optimizer = optim.Adam(model.parameters())
    criterion = nn.CrossEntropyLoss()

    if not os.path.isdir('models'):
        os.makedirs('models')

    enc_train_vect, enc_valid_vect = train_valid_model(model=model,
                                                       train_data=train_data,
                                                       valid_data=valid_data,
                                                       optimizer=optimizer,
                                                       criterion=criterion)
    enc_test_vect = test_model(model=model,
                               test_data=test_data,
                               criterion=criterion)

    train_ann = create_annoy_index("AttnEncAttnDecTrain", enc_train_vect)
    valid_ann = create_annoy_index("AttnEncAttnDecValid", enc_valid_vect)
    test_ann = create_annoy_index("AttnEncAttnDecTest", enc_test_vect)

    wordlist2comment_dict = pickle.load(open("wordlist2comment.pickle", "rb"))
    word2idcommentvocab_dict = pickle.load(
        open("word2idcommentvocab.pickle", "rb"))

    sim_train_data = torch.zeros_like(train_data)
    sim_valid_data = torch.zeros_like(valid_data)
    sim_test_data = torch.zeros_like(test_data)

    for training_sample_id in range(train_data.shape[0]):
        training_sample_comment = train_data[
            training_sample_id][:max_comment_len]
        training_sample_code = train_data[training_sample_id][max_comment_len +
                                                              1:]

        annoy_vect = train_ann.get_item_vector(training_sample_id)
        sim_vect_id = train_ann.get_nns_by_vector(annoy_vect, 1)

        if sim_vect_id == training_sample_id:
            print("Same id for training vect and similar vect")
            exit(0)

        sim_train_data[training_sample_id] = train_data[sim_vect_id]

    for valid_sample_id in range(valid_data.shape[0]):
        valid_sample_comment = valid_data[valid_sample_id][:max_comment_len]
        valid_sample_code = valid_data[valid_sample_id][max_comment_len + 1:]

        annoy_vect = valid_ann.get_item_vector(valid_sample_id)
        sim_vect_id = train_ann.get_nns_by_vector(annoy_vect, 1)

        if sim_vect_id == valid_sample_id:
            print("Same id for training vect and similar vect")
            exit(0)

        sim_valid_data[valid_sample_id] = train_data[sim_vect_id]

    for test_sample_id in range(test_data.shape[0]):
        test_sample_comment = test_data[test_sample_id][:max_comment_len]
        test_sample_code = test_data[test_sample_id][max_comment_len + 1:]

        annoy_vect = test_ann.get_item_vector(test_sample_id)
        sim_vect_id = train_ann.get_nns_by_vector(annoy_vect, 1)

        if sim_vect_id == test_sample_id:
            print("Same id for training vect and similar vect")
            exit(0)

        sim_test_data[test_sample_id] = train_data[sim_vect_id]

    output_test_vect_reference = test_data[:, max_comment_len:]
    output_test_vect_candidates = sim_test_data[:, max_comment_len:]

    token_dict = pickle.load(open("codevocab.pickle", "rb"))

    all_refs = []
    all_cands = []
    all_bleu_scores = []
    for j in range(test_data.shape[0]):
        ref = []
        cand = []
        for i in range(max_code_len):
            ref_el = output_test_vect_reference[j][i].item()
            cand_el = output_test_vect_candidates[j][i].item()
            if ref_el > 0:
                if ref_el in token_dict:
                    ref += [token_dict[ref_el]]
                if cand_el in token_dict:
                    cand += [token_dict[cand_el]]
        bleu = sentence_bleu([ref], cand)
        all_bleu_scores += [bleu]
        all_refs += [ref]
        all_cands += [cand]

    bleu_eval = {}
    bleu_eval["scores"] = all_bleu_scores
    bleu_eval["references"] = all_refs
    bleu_eval["candidates"] = all_cands

    print("Average BLEU score is ",
          sum(all_bleu_scores) / len(all_bleu_scores))
    pickle.dump(bleu_eval, open("results/bleu_evaluation_results.pickle",
                                "wb"))
Beispiel #16
0

def make_distance_matrix(vectors):
    nb_vector = len(vectors)
    out = np.zeros([nb_vector, nb_vector])
    for i, v1 in enumerate(vectors):
        for j, v2 in enumerate(vectors):
            out[i][j] = np.linalg.norm(v1 - v2)

    return out


if __name__ == "__main__":
    batch, vocab_size = convert_to_batch(SENTENCES)

    seq2seq = Seq2Seq(vocab_size, NB_HIDDEN, NB_HIDDEN, LEARNING_RATE)
    init_global = tf.global_variables_initializer()
    init_local = tf.local_variables_initializer()

    loss_placeholder = tf.placeholder(tf.float32, [])
    loss_summary = tf.summary.scalar("Loss", loss_placeholder)
    accuracy_placeholder = tf.placeholder(tf.float32, [])
    accuracy_summary = tf.summary.scalar("Accuracy", accuracy_placeholder)
    summary_writer = tf.summary.FileWriter(LOG_DIR, tf.get_default_graph())

    with tf.Session() as sess:
        sess.run([init_global, init_local])

        accuracy = 0.0
        epoch = 0
        while accuracy < STOP_TRESHOLD:
    elif SCOPE == "segment":
        feat_num, withheld_syll, syllables = novel_seg_data()
    elif SCOPE == "syllable":
        feat_num, withheld_syll, syllables = novel_syll_data()
    else:
        raise Exception(
            "Wrong scope! Must be from the set {feature, segment, syllable}.")

    X = np.array(syllables)
    Y = np.array([syll + syll for syll in syllables])

    #Build the model:
    model = Seq2Seq.seq2seq(input_dim=feat_num,
                            hidden_dim=feat_num * 3,
                            output_length=Y.shape[1],
                            output_dim=Y.shape[2],
                            batch_size=1,
                            learn_rate=0.001,
                            layer_type="lstm",
                            dropout=DROPOUT)

    #Train the model:
    hist = model.train(X, Y, epoch_num=EPOCHS, print_every=10)
    learning_curves.append(hist["Loss"])

    #Test the model on trained data:
    trained_IN = np.tile(X[0], (1, 1, 1))
    trained_OUT = np.tile(Y[0], (1, 1, 1))
    train_pred = model.predict(trained_IN)

    #Test the model on withheld data:
    withheld_IN = np.tile(np.array(withheld_syll), (1, 1, 1))
dim_y = len(w2i)
dim_tag = len(t2i)
num_sents = batch_size

print "#features = ", dim_x, "#labels = ", dim_y
print "#tag len = ", dim_tag


print "load test data..."
test_batch = 1
test_data_x_y = get_data.test_processing_long(r"data/post-test.txt", r"data/post-tag-test.txt", i2w, w2i, i2t, t2i, 100, test_batch)
reference_dic = cPickle.load(open(r'print_bleu_score/reference_dic.pkl', 'rb'))
print "done."

print "compiling..."
model = Seq2Seq(dim_x + dim_tag, dim_y + dim_tag, dim_y, dim_tag, hidden_size_encoder, hidden_size_decoder, cell, optimizer, drop_rate, num_sents)
# # load_error_model("GRU-200_best.model", model)

print "training..."


start = time.time()
g_error = 0.5
for i in xrange(10000):
    error = 0.0
    in_start = time.time()
    for get_num_start in xrange((full_data_len/read_data_batch)+1):
        read_data_batch_error = 0.0
        in_b_start = time.time()
        get_num_end = get_num_start*read_data_batch + read_data_batch
        if get_num_end > full_data_len:
Beispiel #19
0
def train_model():
    train_noisy_Id, train_noisy_len, train_clean_Id, train_clean_len, train_answer_Id, train_answer_len, test_noisy_Id, test_noisy_len, test_clean_Id, test_clean_len, test_answer_Id, test_answer_len, eval_noisy_Id, eval_noisy_len, eval_clean_Id, eval_clean_len, eval_answer_Id, eval_answer_len, vocab_size = load_data(
    )

    max_answer_length = np.asarray(train_answer_Id).shape[1]
    max_target_length = np.asarray(train_clean_Id).shape[1]
    max_source_length = np.asarray(train_noisy_Id).shape[1]

    print "trian answer Lstm model"
    an_Lstm = Answer_LSTM.answer_lstm(batch_size, max_answer_length,
                                      vocab_size, embedding_size, num_units,
                                      None, None, None, None)
    an_Lstm.build_graph()

    saver = tf.train.Saver(sharded=False)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        # saver.restore(sess, "../Seq_ckpt/pretrain-model")
        for batch in range(max_batches):
            answer_shuffle, ans_len = next_batch_Lstm(train_answer_Id,
                                                      train_answer_len,
                                                      batch_size)
            fd = {
                an_Lstm.answer_inputs: answer_shuffle,
                an_Lstm.answer_inputs_length: ans_len
            }
            l, _ = sess.run([an_Lstm.loss_answer, an_Lstm.train_lstm], fd)
            if batch == 0 or batch % batches_in_epoch == 0:
                print('batch {}'.format(batch))
                answer_shuffle, ans_len = next_batch_Lstm(
                    eval_answer_Id, eval_answer_len, batch_size)
                fd_eval = {
                    an_Lstm.answer_inputs: answer_shuffle,
                    an_Lstm.answer_inputs_length: ans_len
                }
                print('  minibatch loss: {}'.format(
                    sess.run(an_Lstm.loss_answer, fd_eval)))
        saver.save(sess, "../Seq_ckpt/pretrain-lstm")

    print "trian Seq2seq model"
    Seq2Seq_model = Seq2Seq.Seq2Seq(batch_size, max_source_length,
                                    max_target_length, vocab_size,
                                    embedding_size, num_units, None, None,
                                    None, None, None, None, None)
    Seq2Seq_model.build_graph()

    saver = tf.train.Saver(sharded=False)
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        # saver.restore(sess, "../Seq_ckpt/pretrain-model")
        for batch in range(max_batches):
            source_shuffle, source_len, target_shuffle, target_len = next_batch_Seq2seq(
                train_noisy_Id, train_noisy_len, train_clean_Id,
                train_clean_len, batch_size)
            fd = {
                Seq2Seq_model.encoder_inputs: source_shuffle,
                Seq2Seq_model.encoder_inputs_length: source_len,
                Seq2Seq_model.decoder_targets: target_shuffle,
                Seq2Seq_model.decoder_length: target_len
            }
            l, _ = sess.run(
                [Seq2Seq_model.loss_seq2seq, Seq2Seq_model.train_op], fd)
            if batch == 0 or batch % batches_in_epoch == 0:
                print('batch {}'.format(batch))
                source_shuffle, source_len, target_shuffle, target_len = next_batch_Seq2seq(
                    eval_noisy_Id, eval_noisy_len, eval_clean_Id,
                    eval_clean_len, batch_size)
                fd_eval = {
                    Seq2Seq_model.encoder_inputs: source_shuffle,
                    Seq2Seq_model.encoder_inputs_length: source_len,
                    Seq2Seq_model.decoder_targets: target_shuffle,
                    Seq2Seq_model.decoder_length: target_len
                }
                print('  minibatch loss: {}'.format(
                    sess.run(Seq2Seq_model.loss_seq2seq, fd_eval)))
        saver.save(sess, "../Seq_ckpt/pretrain-seq2seq")