コード例 #1
0
ファイル: client.py プロジェクト: OferMania/RAMCloud-1
def main():
    random.seed(0)

    d = Driver()
    t = Transport(d, isServer=False)
    services = Services(t)
    s = services.getService(TEST_ADDRESS)

    for i in itertools.count(1):
        #totalFrags = random.randrange(1, 2**16 - 1)
        totalFrags = random.randrange(1, 500)
        #totalFrags = 1000
        requestBuffer = Buffer(
            ['a' * t.dataPerFragment() for j in range(totalFrags)])
        responseBuffer = Buffer()
        start = gettime()
        r = t.clientSend(s, requestBuffer, responseBuffer)
        r.getReply()
        elapsedNs = gettime() - start
        resp = responseBuffer.getRange(0, responseBuffer.getTotalLength())
        req = requestBuffer.getRange(0, requestBuffer.getTotalLength())
        assert len(req) == len(resp), (len(req), len(resp), req[:10],
                                       resp[:10], req[-10:], resp[-10:])
        assert req == resp, (req, resp)
        print
        print "Message %d with %d frags OK in %dms" % (i, totalFrags,
                                                       elapsedNs / 1000000)
        d.stat()
コード例 #2
0
ファイル: client.py プロジェクト: 609467829/RAMCloud
def main():
    random.seed(0)

    d = Driver()
    t = Transport(d, isServer=False)
    services = Services(t)
    s = services.getService(TEST_ADDRESS)

    for i in itertools.count(1):
        #totalFrags = random.randrange(1, 2**16 - 1)
        totalFrags = random.randrange(1, 500)
        #totalFrags = 1000
        requestBuffer = Buffer(['a' * t.dataPerFragment() for j in range(totalFrags)])
        responseBuffer = Buffer()
        start = gettime()
        r = t.clientSend(s, requestBuffer, responseBuffer)
        r.getReply()
        elapsedNs = gettime() - start
        resp = responseBuffer.getRange(0, responseBuffer.getTotalLength())
        req = requestBuffer.getRange(0, requestBuffer.getTotalLength())
        assert len(req) == len(resp), (len(req), len(resp), req[:10], resp[:10],
                                       req[-10:], resp[-10:])
        assert req == resp, (req, resp)
        print
        print "Message %d with %d frags OK in %dms" % (i, totalFrags,
                                                       elapsedNs / 1000000)
        d.stat()
コード例 #3
0
def train(train_set,
          langs,
          embedding_size=600,
          learning_rate=0.01,
          iter_time=10,
          batch_size=32,
          get_loss=GET_LOSS,
          save_model=SAVE_MODEL,
          encoder_style=ENCODER_STYLE,
          use_model=USE_MODEL):
    """The training procedure."""
    # Set the timer
    start = time.time()

    # Initialize the model
    emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words,
                       langs['rm'].n_words, embedding_size)
    emb.init_weights()

    if encoder_style == 'LIN':
        encoder = EncoderLIN(embedding_size, emb)
    elif encoder_style == 'BiLSTM':
        encoder = EncoderBiLSTM(embedding_size, emb)
    else:
        encoder = EncoderRNN(embedding_size, emb)

    decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words)

    if use_cuda:
        emb.cuda()
        encoder.cuda()
        decoder.cuda()

    if use_model is not None:
        encoder = load_model(encoder, use_model[0])
        decoder = load_model(decoder, use_model[1])

    # Choose optimizer
    loss_optimizer = optim.Adagrad(list(encoder.parameters()) +
                                   list(decoder.parameters()),
                                   lr=learning_rate,
                                   lr_decay=0,
                                   weight_decay=0)
    # decoder_optimizer = optim.Adagrad(decoder.parameters(), lr=learning_rate, lr_decay=0, weight_decay=0)

    criterion = nn.NLLLoss()

    total_loss = 0
    iteration = 0
    for epo in range(1, iter_time + 1):
        print("Epoch #%d" % (epo))
        # Get data

        train_iter = data_iter(train_set, batch_size=batch_size)
        for dt in train_iter:
            iteration += 1
            data, idx_data = get_batch(dt)
            rt, re, rm, summary = idx_data

            # Add paddings
            rt = addpaddings(rt)
            re = addpaddings(re)
            rm = addpaddings(rm)
            summary = addpaddings(summary)

            rt = Variable(torch.LongTensor(rt), requires_grad=False)
            re = Variable(torch.LongTensor(re), requires_grad=False)
            rm = Variable(torch.LongTensor(rm), requires_grad=False)

            # For Decoding
            summary = Variable(torch.LongTensor(summary), requires_grad=False)

            if use_cuda:
                rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda(
                ), summary.cuda()

            # Get the average loss on the sentences
            loss = sentenceloss(rt, re, rm, summary, encoder, decoder,
                                loss_optimizer, criterion, embedding_size,
                                encoder_style)
            total_loss += loss

            # Print the information and save model
            if iteration % get_loss == 0:
                print("Time {}, iter {}, avg loss = {:.4f}".format(
                    gettime(start), iteration, total_loss / get_loss))
                total_loss = 0
        if epo % save_model == 0:
            torch.save(encoder.state_dict(),
                       "{}_encoder_{}".format(OUTPUT_FILE, iteration))
            torch.save(decoder.state_dict(),
                       "{}_decoder_{}".format(OUTPUT_FILE, iteration))
            print("Save the model at iter {}".format(iteration))

    return encoder, decoder
コード例 #4
0
def train(train_set,
          langs,
          embedding_size=EMBEDDING_SIZE,
          learning_rate=LR,
          batch_size=BATCH_SIZE,
          get_loss=GET_LOSS,
          grad_clip=GRAD_CLIP,
          encoder_style=ENCODER_STYLE,
          decoder_style=DECODER_STYLE,
          to_copy=TOCOPY,
          epoch_time=EPOCH_TIME,
          layer_depth=LAYER_DEPTH,
          max_length=MAX_LENGTH,
          max_sentence=MAX_SENTENCES,
          save_model=SAVE_MODEL,
          output_file=OUTPUT_FILE,
          iter_num=iterNum,
          pretrain=PRETRAIN):
    """The training procedure."""
    # # Test arg parser (For Debugging)
    # print("embedding_size={}, learning_rate={}, batch_size={}, get_loss={}, grad_clip={},\
    #         encoder_style={}, decoder_style={}, max_length={},\
    #         max_sentece={}, save_model={}, output_file={}, to_copy={},\
    #         epoch={}, layer_depth={}, iter num={}, pretrain={}".format(
    #         embedding_size, learning_rate, batch_size, get_loss, grad_clip,
    #         encoder_style, decoder_style, max_length, max_sentece, save_model, output_file,
    #         to_copy, epoch_time, layer_depth, iter_num, pretrain))
    # Set the timer
    start = time.time()

    # Initialize the model
    emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words,
                       langs['rm'].n_words, embedding_size)
    emb.init_weights()

    # Choose encoder style
    if encoder_style == 'LIN':
        encoder = EncoderLIN(embedding_size, emb)

    elif encoder_style == 'BiLSTM':
        encoder = EncoderBiLSTM(embedding_size, emb, n_layers=layer_depth)

    elif encoder_style == 'BiLSTMMax':
        encoder = EncoderBiLSTMMaxPool(embedding_size,
                                       emb,
                                       n_layers=layer_depth)

    elif encoder_style == 'HierarchicalBiLSTM':
        encoder_args = {
            "hidden_size": embedding_size,
            "local_embed": emb,
            "n_layers": layer_depth
        }
        encoder = HierarchicalBiLSTM(**encoder_args)

    elif encoder_style == 'HierarchicalLIN':
        encoder_args = {"hidden_size": embedding_size, "local_embed": emb}
        encoder = HierarchicalLIN(**encoder_args)

    else:
        # initialize hierarchical encoder rnn, (both global and local)
        encoder_args = {
            "hidden_size": embedding_size,
            "local_embed": emb,
            "n_layers": layer_depth
        }
        encoder = HierarchicalRNN(**encoder_args)

    # Choose decoder style and training function
    if decoder_style == 'HierarchicalRNN':
        decoder = HierarchicalDecoder(embedding_size,
                                      langs['summary'].n_words,
                                      n_layers=layer_depth,
                                      copy=to_copy)
        train_func = Hierarchical_seq_train
    else:
        decoder = AttnDecoderRNN(embedding_size,
                                 langs['summary'].n_words,
                                 n_layers=layer_depth,
                                 copy=to_copy)
        train_func = Plain_seq_train

    if use_cuda:
        emb.cuda()
        encoder.cuda()
        decoder.cuda()

    # Choose optimizer
    loss_optimizer = optim.Adagrad(list(encoder.parameters()) +
                                   list(decoder.parameters()),
                                   lr=learning_rate,
                                   lr_decay=0,
                                   weight_decay=0)

    # loss_optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()),
    #                             lr=learning_rate)

    # Load pre-train model
    use_model = None
    if pretrain is not None and iter_num is not None:
        use_model = [
            './models/' + pretrain + '_' + s + '_' + str(iter_num)
            for s in ['encoder', 'decoder', 'optim']
        ]

    if use_model is not None:
        encoder = load_model(encoder, use_model[0])
        decoder = load_model(decoder, use_model[1])
        loss_optimizer.load_state_dict(torch.load(use_model[2]))
        print("Load Pretrain Model {}".format(use_model))
    else:
        print("Not use Pretrain Model")

    criterion = nn.NLLLoss()

    # Build up the model
    model = Seq2Seq(encoder, decoder, train_func, criterion, embedding_size,
                    langs)

    # print(encoder)
    # print(decoder)
    # print(loss_optimizer)

    total_loss = 0
    iteration = 0
    for epo in range(1, epoch_time + 1):
        # Start of an epoch
        print("Epoch #%d" % (epo))

        # Get data
        train_iter = data_iter(train_set, batch_size=batch_size)
        for dt in train_iter:
            iteration += 1
            data, idx_data = get_batch(dt)
            rt, re, rm, summary = idx_data

            # Debugging: check the input triplets
            # show_triplets(data[0][0])

            # Add paddings
            rt = addpaddings(rt)
            re = addpaddings(re)
            rm = addpaddings(rm)

            # For summary paddings, if the model is herarchical then pad between sentences
            # If the batch_size is 1 then we don't need to do sentence padding
            if decoder_style == 'HierarchicalRNN' and batch_size != 1:
                summary = add_sentence_paddings(summary)
            else:
                summary = addpaddings(summary)

            rt = Variable(torch.LongTensor(rt), requires_grad=False)
            re = Variable(torch.LongTensor(re), requires_grad=False)
            rm = Variable(torch.LongTensor(rm), requires_grad=False)

            # For Decoding
            summary = Variable(torch.LongTensor(summary), requires_grad=False)

            if use_cuda:
                rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda(
                ), summary.cuda()

            # Zero the gradient
            loss_optimizer.zero_grad()
            model.train()
            # calculate loss of "a batch of input sequence"
            loss = sequenceloss(rt, re, rm, summary, model)

            # Backpropagation
            loss.backward()
            torch.nn.utils.clip_grad_norm(
                list(model.encoder.parameters()) +
                list(model.decoder.parameters()), grad_clip)
            loss_optimizer.step()

            # Get the average loss on the sentences
            target_length = summary.size()[1]
            if float(torch.__version__[:3]) > 0.3:
                total_loss += loss.item()
            else:
                total_loss += loss.data[0]

            # Print the information and save model
            if iteration % get_loss == 0:
                print("Time {}, iter {}, Seq_len:{}, avg loss = {:.4f}".format(
                    gettime(start), iteration, target_length,
                    total_loss / get_loss))
                total_loss = 0

        if epo % save_model == 0:
            torch.save(encoder.state_dict(),
                       "models/{}_encoder_{}".format(output_file, iteration))
            torch.save(decoder.state_dict(),
                       "models/{}_decoder_{}".format(output_file, iteration))
            torch.save(loss_optimizer.state_dict(),
                       "models/{}_optim_{}".format(output_file, iteration))
            print("Save the model at iter {}".format(iteration))

    return model.encoder, model.decoder
コード例 #5
0
ファイル: train.py プロジェクト: rchanda/Data2Doc
def train(train_set,
          langs,
          embedding_size=600,
          learning_rate=0.01,
          iter_time=10,
          batch_size=32,
          get_loss=GET_LOSS,
          save_model=SAVE_MODEL,
          encoder_style=ENCODER_STYLE,
          decoder_style=DECODER_STYLE,
          use_model=USE_MODEL):
    """The training procedure."""
    # Set the timer
    start = time.time()

    encoder, decoder, loss_optimizer, train_func = model_initialization(
        encoder_style, decoder_style, langs, embedding_size, learning_rate,
        use_model)

    criterion = nn.NLLLoss()

    # Build up the model
    model = Seq2Seq(encoder, decoder, train_func, None, criterion,
                    embedding_size, langs)

    # print(encoder)
    # print(decoder)
    # print(loss_optimizer)

    total_loss = 0
    iteration = 0
    for epo in range(1, iter_time + 1):
        # Start of an epoch
        print("Epoch #%d" % (epo))

        # Get data
        train_iter = data_iter(train_set, batch_size=batch_size)
        for dt in train_iter:
            iteration += 1
            data, idx_data = get_batch(dt)
            print(idx_data)
            rt, re, rm, summary = idx_data

            # Debugging: check the input triplets
            # show_triplets(data[0][0])

            # Add paddings
            rt = addpaddings(rt)
            re = addpaddings(re)
            rm = addpaddings(rm)

            # For summary paddings, if the model is herarchical then pad between sentences
            if decoder_style == 'HierarchicalRNN':
                summary = add_sentence_paddings(summary)
            else:
                summary = addpaddings(summary)

            rt = Variable(torch.LongTensor(rt), requires_grad=False)
            re = Variable(torch.LongTensor(re), requires_grad=False)
            rm = Variable(torch.LongTensor(rm), requires_grad=False)

            # DEBUG:
            if torch.sum(rm == 3).item() == 0:
                print('skip')
                continue

            # For Decoding
            summary = Variable(torch.LongTensor(summary), requires_grad=False)

            if use_cuda:
                rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda(
                ), summary.cuda()

            # Zero the gradient
            loss_optimizer.zero_grad()
            model.train()
            # calculate loss of "a batch of input sequence"
            loss = sequenceloss(rt, re, rm, summary, model)

            # Backpropagation
            loss.backward()
            torch.nn.utils.clip_grad_norm(
                list(model.encoder.parameters()) +
                list(model.decoder.parameters()), GRAD_CLIP)
            loss_optimizer.step()

            # Get the average loss on the sentences
            target_length = summary.size()[1]
            if float(torch.__version__[:3]) > 0.3:
                total_loss += loss.item()
            else:
                total_loss += loss.data[0]

            # Print the information and save model
            if iteration % get_loss == 0:
                print("Time {}, iter {}, Seq_len:{}, avg loss = {:.4f}".format(
                    gettime(start), iteration, target_length,
                    total_loss / get_loss))
                total_loss = 0

        if epo % save_model == 0:
            torch.save(encoder.state_dict(),
                       "models/{}_encoder_{}".format(OUTPUT_FILE, iteration))
            torch.save(decoder.state_dict(),
                       "models/{}_decoder_{}".format(OUTPUT_FILE, iteration))
            torch.save(loss_optimizer.state_dict(),
                       "models/{}_optim_{}".format(OUTPUT_FILE, iteration))
            print("Save the model at iter {}".format(iteration))

    return model.encoder, model.decoder
コード例 #6
0
def train(train_set,
          langs,
          embedding_size=600,
          learning_rate=0.01,
          iter_time=10,
          batch_size=32,
          get_loss=GET_LOSS,
          save_model=SAVE_MODEL,
          encoder_style=ENCODER_STYLE,
          decoder_style=DECODER_STYLE,
          use_model=USE_MODEL):
    """The training procedure."""
    # Set the timer
    start = time.time()

    # Initialize the model
    emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words,
                       langs['rm'].n_words, embedding_size)
    emb.init_weights()

    # Choose encoder style
    # TODO:: Set up a choice for hierarchical or not
    if encoder_style == 'LIN':
        encoder = EncoderLIN(embedding_size, emb)
    elif encoder_style == 'BiLSTM':
        encoder = EncoderBiLSTM(embedding_size, emb)
    elif encoder_style == 'BiLSTMMax':
        encoder = EncoderBiLSTMMaxPooling(embedding_size, emb)
    elif encoder_style == 'HierarchicalBiLSTM':
        encoder_args = {"hidden_size": embedding_size, "local_embed": emb}
        encoder = HierarchicalBiLSTM(**encoder_args)
    elif encoder_style == 'HierarchicalLIN':
        encoder_args = {"hidden_size": embedding_size, "local_embed": emb}
        encoder = HierarchicalLIN(**encoder_args)
    else:
        # initialize hierarchical encoder rnn, (both global and local)
        encoder_args = {"hidden_size": embedding_size, "local_embed": emb}
        encoder = HierarchicalEncoderRNN(**encoder_args)

    # Choose decoder style and training function
    if decoder_style == 'HierarchicalRNN':
        decoder = HierarchicalDecoder(embedding_size, langs['summary'].n_words)
        train_func = Hierarchical_seq_train
    else:
        decoder = AttnDecoderRNN(embedding_size, langs['summary'].n_words)
        train_func = Plain_seq_train

    if use_cuda:
        emb.cuda()
        encoder.cuda()
        decoder.cuda()

    # Choose optimizer
    loss_optimizer = optim.Adagrad(list(encoder.parameters()) +
                                   list(decoder.parameters()),
                                   lr=learning_rate,
                                   lr_decay=0,
                                   weight_decay=0)

    # loss_optimizer = optim.Adam(list(encoder.parameters()) + list(decoder.parameters()),
    #                             lr=learning_rate)

    if use_model is not None:
        encoder = load_model(encoder, use_model[0])
        decoder = load_model(decoder, use_model[1])
        loss_optimizer.load_state_dict(torch.load(use_model[2]))

    criterion = nn.NLLLoss()

    # Build up the model
    model = Seq2Seq(encoder, decoder, train_func, criterion, embedding_size,
                    langs)

    # print(encoder)
    # print(decoder)
    # print(loss_optimizer)

    total_loss = 0
    iteration = 0
    for epo in range(1, iter_time + 1):
        # Start of an epoch
        print("Epoch #%d" % (epo))

        # Get data
        train_iter = data_iter(train_set, batch_size=batch_size)
        for dt in train_iter:
            iteration += 1
            data, idx_data = get_batch(dt)
            rt, re, rm, summary = idx_data

            # Debugging: check the input triplets
            # show_triplets(data[0][0])

            # Add paddings
            rt = addpaddings(rt)
            re = addpaddings(re)
            rm = addpaddings(rm)

            # For summary paddings, if the model is herarchical then pad between sentences
            if decoder_style == 'HierarchicalRNN':
                summary = add_sentence_paddings(summary)
            else:
                summary = addpaddings(summary)

            rt = Variable(torch.LongTensor(rt), requires_grad=False)
            re = Variable(torch.LongTensor(re), requires_grad=False)
            rm = Variable(torch.LongTensor(rm), requires_grad=False)

            # For Decoding
            summary = Variable(torch.LongTensor(summary), requires_grad=False)

            if use_cuda:
                rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda(
                ), summary.cuda()

            # Zero the gradient
            loss_optimizer.zero_grad()
            model.train()
            # calculate loss of "a batch of input sequence"
            loss = sequenceloss(rt, re, rm, summary, model)

            # Backpropagation
            loss.backward()
            torch.nn.utils.clip_grad_norm(
                list(model.encoder.parameters()) +
                list(model.decoder.parameters()), GRAD_CLIP)
            loss_optimizer.step()

            # Get the average loss on the sentences
            target_length = summary.size()[1]
            if float(torch.__version__[:3]) > 0.3:
                total_loss += loss.item()
            else:
                total_loss += loss.data[0]

            # Print the information and save model
            if iteration % get_loss == 0:
                print("Time {}, iter {}, Seq_len:{}, avg loss = {:.4f}".format(
                    gettime(start), iteration, target_length,
                    total_loss / get_loss))
                total_loss = 0

        if epo % save_model == 0:
            torch.save(encoder.state_dict(),
                       "models/{}_encoder_{}".format(OUTPUT_FILE, iteration))
            torch.save(decoder.state_dict(),
                       "models/{}_decoder_{}".format(OUTPUT_FILE, iteration))
            torch.save(loss_optimizer.state_dict(),
                       "models/{}_optim_{}".format(OUTPUT_FILE, iteration))
            print("Save the model at iter {}".format(iteration))

    return model.encoder, model.decoder
コード例 #7
0
ファイル: plot.py プロジェクト: xspin/2018GMCM-F
def plot_alg1(Table, gatealloc, puckallock):
    narrow_count = 0
    wide_count = 0
    total_narrow_count = 0
    total_wide_count = 0
    for puck in Table['Puck'].dict:
        tp = util.bt(Table['Puck'][puck]['plane_type'])
        if tp == 'N': total_narrow_count += 1
        else: total_wide_count += 1
    for puck, gate in puckallock.items():
        if not gate: continue
        tp = util.bt(Table['Puck'][puck]['plane_type'])
        if tp == 'N': narrow_count += 1
        else: wide_count += 1

    plt.figure()
    plt.bar([0, 1], [wide_count, narrow_count],
            color='rb',
            tick_label=['Wide', 'Narrow'],
            align='center')
    plt.text(0, wide_count, '{}'.format(wide_count), fontsize=15)
    plt.text(1, narrow_count, '{}'.format(narrow_count), fontsize=15)
    plt.ylabel(u'数量')
    plt.savefig("output/alg1-1.pdf", bbox_inches='tight')

    plt.figure()
    plt.bar([0, 1],
            [wide_count / total_wide_count, narrow_count / total_narrow_count],
            color='rb',
            tick_label=['Wide', 'Narrow'],
            align='center')
    plt.text(0,
             wide_count / total_wide_count * 1.01,
             '{}'.format(wide_count / total_wide_count),
             fontsize=15)
    plt.text(1,
             narrow_count / total_narrow_count * 1.01,
             '{:.3f}'.format(narrow_count / total_narrow_count),
             fontsize=15)
    plt.ylabel(u'比例')
    plt.savefig("output/alg1-2.pdf", bbox_inches='tight')
    plt.show()

    T_count = 0
    S_count = 0
    T_time = 0
    S_time = 0
    start_date = datetime.datetime(2018, 1, 20)
    end_date = datetime.datetime(2018, 1, 21)
    ztime = datetime.time(0, 0, 0)
    start_time = util.gettime(start_date, ztime)
    end_time = util.gettime(end_date, ztime)
    for gate, pks in gatealloc.items():
        if len(pks) == 0: continue
        for pk in pks:
            sdate = (Table['Puck'][pk]['in_date'])
            stime = (Table['Puck'][pk]['in_time'])
            edate = (Table['Puck'][pk]['out_date'])
            etime = (Table['Puck'][pk]['out_time'])
            st = util.gettime(sdate, stime)
            et = util.gettime(edate, etime)
            if st < start_time: t = et - start_time
            elif et > end_time: t = end_time - st
            else: t = et - st
        h = Table['Gate'][gate]['hall']
        if h == 'T':
            T_count += 1
            T_time += t
        elif h == 'S':
            S_count += 1
            S_time += t
        else:
            print('Error hall:', h)
    T_time = T_time / (24 * 60) / T_count
    S_time = S_time / (24 * 60) / S_count
    plt.figure()
    plt.bar([0, 1], [T_count, S_count],
            color='rb',
            tick_label=['T', 'S'],
            align='center')
    plt.text(0, T_count, '{}'.format(T_count), fontsize=15)
    plt.text(1, S_count, '{}'.format(S_count), fontsize=15)
    plt.ylabel(u'数量')
    plt.savefig("output/alg1-3.pdf", bbox_inches='tight')

    plt.figure()
    plt.bar([0, 1], [T_time, S_time],
            color='rb',
            tick_label=['T', 'S'],
            align='center')
    plt.text(0, T_time * 1.01, '{:.2f}'.format(T_time), fontsize=15)
    plt.text(1, S_time * 1.01, '{:.2f}'.format(S_time), fontsize=15)
    plt.ylabel(u'平均使用率')
    plt.savefig("output/alg1-4.pdf", bbox_inches='tight')
コード例 #8
0
def train(train_set,
          langs,
          embedding_size=EMBEDDING_SIZE,
          learning_rate=LR,
          batch_size=BATCH_SIZE,
          get_loss=GET_LOSS,
          grad_clip=GRAD_CLIP,
          encoder_style=ENCODER_STYLE,
          decoder_style=DECODER_STYLE,
          to_copy=TOCOPY,
          epoch_time=EPOCH_TIME,
          layer_depth=LAYER_DEPTH,
          max_length=MAX_LENGTH,
          max_sentence=MAX_SENTENCES,
          save_model=SAVE_MODEL,
          output_file=OUTPUT_FILE,
          iter_num=iterNum,
          pretrain=PRETRAIN):

    start = time.time()

    emb = docEmbedding(langs['rt'].n_words, langs['re'].n_words,
                       langs['rm'].n_words, embedding_size)
    emb.init_weights()

    encoder_args = {
        "hidden_size": embedding_size,
        "local_embed": emb,
        "n_layers": layer_depth
    }
    encoder = HierarchicalRNN(**encoder_args)

    if decoder_style == 'HierarchicalRNN':
        decoder = HierarchicalDecoder(embedding_size,
                                      langs['summary'].n_words,
                                      n_layers=layer_depth,
                                      copy=to_copy)
        train_func = Hierarchical_seq_train
    else:
        decoder = AttnDecoderRNN(embedding_size,
                                 langs['summary'].n_words,
                                 n_layers=layer_depth,
                                 copy=to_copy)
        train_func = Plain_seq_train

    if use_cuda:
        emb.cuda()
        encoder.cuda()
        decoder.cuda()

    loss_optimizer = optim.Adagrad(list(encoder.parameters()) +
                                   list(decoder.parameters()),
                                   lr=learning_rate,
                                   lr_decay=0,
                                   weight_decay=0)

    use_model = None
    if pretrain is not None and iter_num is not None:
        use_model = [
            './models/' + pretrain + '_' + s + '_' + str(iter_num)
            for s in ['encoder', 'decoder', 'optim']
        ]

    if use_model is not None:
        encoder = load_model(encoder, use_model[0])
        decoder = load_model(decoder, use_model[1])
        loss_optimizer.load_state_dict(torch.load(use_model[2]))
        print("Load Pretrain Model {}".format(use_model))
    else:
        print("Not use Pretrain Model")

    criterion = nn.NLLLoss()

    model = Seq2Seq(encoder, decoder, train_func, criterion, embedding_size,
                    langs)

    total_loss = 0
    iteration = 0
    for epo in range(1, epoch_time + 1):
        print("Epoch #%d" % (epo))

        train_iter = data_iter(train_set, batch_size=batch_size)
        for dt in train_iter:
            iteration += 1
            data, idx_data = get_batch(dt)
            rt, re, rm, summary = idx_data

            rt = addpaddings(rt)
            re = addpaddings(re)
            rm = addpaddings(rm)

            if decoder_style == 'HierarchicalRNN' and batch_size != 1:
                summary = add_sentence_paddings(summary)
            else:
                summary = addpaddings(summary)

            rt = Variable(torch.LongTensor(rt), requires_grad=False)
            re = Variable(torch.LongTensor(re), requires_grad=False)
            rm = Variable(torch.LongTensor(rm), requires_grad=False)

            summary = Variable(torch.LongTensor(summary), requires_grad=False)

            if use_cuda:
                rt, re, rm, summary = rt.cuda(), re.cuda(), rm.cuda(
                ), summary.cuda()

            loss_optimizer.zero_grad()
            model.train()

            loss = sequenceloss(rt, re, rm, summary, model)

            loss.backward()
            torch.nn.utils.clip_grad_norm(
                list(model.encoder.parameters()) +
                list(model.decoder.parameters()), grad_clip)
            loss_optimizer.step()

            target_length = summary.size()[1]
            if float(torch.__version__[:3]) > 0.3:
                total_loss += loss.item() / target_length
            else:
                total_loss += loss.data[0] / target_length

            if iteration % get_loss == 0:
                print("Time {}, iter {}, Seq_len:{}, avg loss = {:.4f}".format(
                    gettime(start), iteration, target_length,
                    total_loss / get_loss))
                total_loss = 0

        if epo % save_model == 0:
            torch.save(encoder.state_dict(),
                       "models/{}_encoder_{}".format(output_file, iteration))
            torch.save(decoder.state_dict(),
                       "models/{}_decoder_{}".format(output_file, iteration))
            torch.save(loss_optimizer.state_dict(),
                       "models/{}_optim_{}".format(output_file, iteration))
            print("Save the model at iter {}".format(iteration))

    return model.encoder, model.decoder