criterion = LabelSmoothing(size=V, padding_idx=0, smoothing=0.0)
            criterion.cuda()
            model = make_model(V, V, N=transformer_size)
            model.cuda()
            model_opt = NoamOpt(model.src_embed[0].d_model, 1, 400,
                                torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))

            for epoch in range(epoch_number):
                if epoch % epoches_of_loss_record == 0:
                    f = open("procedure.txt", "a+")
                    f.write("step:%d \n" % epoch)
                    f.close()
                print("step: ", epoch)
                model.train()
                run_epoch("train", data_gen_char(dataloader, batch, nbatches), model,
                          SimpleLossCompute(model.generator, criterion, model_opt), nbatches, epoch)
                model.eval()
                run_epoch("test ", data_gen_char(dataloader, batch, 1), model,
                          SimpleLossCompute(model.generator, criterion, None), nbatches, epoch)

        else:

            dataloader = DataLoader_token_kg(filename, ents, chunk_len, device)
            V = dataloader.vocabularyLoader.n_tokens  # vocabolary size
            dataloader_ast = DataLoader_token_ast(filename, ents, "Path.txt", chunk_len, device, 9)
            V_ast = dataloader_ast.vocabularyLoader.n_token_ast

            criterion = LabelSmoothing(size=V, padding_idx=0, smoothing=0.0)
            criterion.to(device)
            #model = make_model_kg(V, V, "kg_embed/embedding.vec.json", N=transformer_size)
            model = make_model_ast(V_ast, V, V, "kg_embed/embedding.vec.json", N=transformer_size)
                torch.optim.Adam(model.parameters(),
                                 lr=0,
                                 betas=(0.9, 0.98),
                                 eps=1e-9))

            for epoch in range(epoch_number):
                if epoch % epoches_of_loss_record == 0:
                    f = open("procedure.txt", "a+")
                    f.write("step:%d \n" % epoch)
                    f.close()
                print("step: ", epoch)
                model.train()
                run_epoch(
                    "train", data_gen_overlap(dataloader, batch,
                                              nbatches), model,
                    SimpleLossCompute(model.generator, criterion, model_opt),
                    nbatches, epoch)
                model.eval()
                run_epoch("test ", data_gen_overlap(dataloader, batch,
                                                    nbatches), model,
                          SimpleLossCompute(model.generator, criterion, None),
                          nbatches, epoch)

        else:
            dataloader = DataLoader_token(filename, chunk_len, device)
            V = dataloader.vocabularyLoader.n_tokens  # vocabolary size

            criterion = LabelSmoothing(size=V, padding_idx=0, smoothing=0.0)
            criterion.cuda()
            model = make_model(V, V, N=transformer_size)
            model.cuda()