Example #1
0
def train(reload_dataset=False, pretrain_model_path=None, optim_fu='adam'):
    write = SummaryWriter()

    vis = visdom.Visdom(env="Graph_Attention_compression")
    viz = Visdom_line(vis=vis, win="Graph_Attention")

    # 一些配置
    DATA_DIR = './data/train_pairs'
    DICT_PATH = './checkpoint/dict_20000.pkl'
    EMBEDDING_PATH_RANDOM = './model/save_embedding_97and3.ckpt'
    SAVE_EMBEDDING = False
    RELOAD_DATASET = reload_dataset

    SAVE_DATASET_OBJ = './data/dataset.pkl'
    SAVE_MODEL_PATH = './checkpoint/Graph_Attn/'

    PRINT_STEP = 10
    SAVE_STEP = 1
    GPU_NUM = 0

    torch.manual_seed(2)
    torch.cuda.set_device(GPU_NUM)

    config = GraphAttenConfig()

    model = LSTMGraphAttn(config)
    model.cuda()

    if os.path.exists(SAVE_MODEL_PATH) is False:
        os.makedirs(SAVE_MODEL_PATH)

    # 读取embedding
    embed = get_word_embed().cuda()
    embed_flag = get_flag_embed().cuda()
    vocab = get_vocab()

    criterion = nn.CrossEntropyLoss(ignore_index=2)
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    trainset = GraphDataset(vocab=vocab)
    trainloader = DataLoader(dataset=trainset,
                             batch_size=config.batch_size,
                             collate_fn=graph_fn,
                             pin_memory=True,
                             shuffle=True)

    global_step = 0
    for epoch in range(config.epoch):
        epoch_loss = 0
        for index, (src, trg, neighbor,
                    labels) in enumerate(tqdm(trainloader)):
            src = embed(src.cuda())
            trg = embed(trg.cuda())
            neighbor = embed(neighbor.cuda())

            flag4encoder = torch.zeros(src.shape[0], src.shape[1], 3).cuda()
            src = torch.cat([src, flag4encoder], dim=2)

            flag4decoder = torch.zeros([labels.shape[0], 1]).long()
            flag4decoder = torch.cat([flag4decoder, labels[:, :-1]],
                                     dim=1).cuda()
            flag4decoder = embed_flag(flag4decoder)

            flag4neighbor = torch.zeros(neighbor.shape[0], neighbor.shape[1],
                                        neighbor.shape[2], 3).cuda()
            neighbor = torch.cat([neighbor, flag4neighbor], dim=-1)

            trg = torch.cat([trg, flag4decoder], dim=2)
            labels = labels.cuda()

            out = model(src, trg, neighbor)
            out = out.view(-1, 2)
            labels = labels.view(-1)
            loss = criterion(out, labels)
            epoch_loss += loss.item()
            print(loss.item())
            optimizer.zero_grad()
            loss.backward()

            optimizer.step()

            write.add_scalar('loss', loss.item(), global_step)
            global_step += 1

        model.save(SAVE_MODEL_PATH + 'model-' + str(epoch) + '.ckpt')
        write.add_scalar('epoch_loss', epoch_loss, epoch)
Example #2
0
def train(reload_dataset=False, pretrain_model_path=None, optim_fu='adam'):
    write = SummaryWriter()

    vis = visdom.Visdom(env="syntax_compression")
    viz = Visdom_line(vis=vis, win="syntax_geted_lstm")

    # 一些配置
    DATA_DIR = '../data/train_pairs'
    DICT_PATH = '../checkpoint/dict_20000.pkl'
    EMBEDDING_PATH_RANDOM = '../model/save_embedding_97and3.ckpt'

    SAVE_EMBEDDING = False
    RELOAD_DATASET = reload_dataset

    SAVE_DATASET_OBJ = '../data/dataset.pkl'
    SAVE_MODEL_PATH = './checkpoint/syntax_gate_lstm/'

    PRINT_STEP = 10
    SAVE_STEP = 1
    GPU_NUM = 1

    torch.manual_seed(2)
    torch.cuda.set_device(GPU_NUM)

    model = SyntaxLSTM(100, 100, 10)
    model.cuda()

    if os.path.exists(SAVE_MODEL_PATH) is False:
        os.makedirs(SAVE_MODEL_PATH)

    # 读取embedding
    embed = get_word_embed().cuda()
    embed_flag = get_flag_embed().cuda()

    vocab = get_vocab()

    criterion = nn.CrossEntropyLoss(ignore_index=2)

    # if pretrain_model_path is not None:
    #     print('Loading the pre train model', pretrain_model_path)
    #     model.load(pretrain_model_path)
    #     model.embed.weight.requires_grad = True
    #     parameters = model.parameters()
    #     optimizer = optim.SGD(parameters, lr=0.000001)
    # else:
    parameters = filter(lambda p: p.requires_grad, model.parameters())
    optimizer = optim.Adam(parameters, lr=0.0001)

    trainset = SyntaxDataset(vocab=vocab, reverse_src=True)
    trainloader = DataLoader(dataset=trainset,
                             batch_size=200,
                             collate_fn=syntax_fn,
                             pin_memory=True,
                             num_workers=5,
                             shuffle=True)

    global_step = 0
    loss_print = 0
    step_print = 0
    PINRT_STEP = 10
    for epoch in range(500):
        epoch_loss = 0
        for index, (src, trg, labels, tags) in enumerate(tqdm(trainloader)):
            src = embed(src.cuda())
            trg = embed(trg.cuda())
            tags = tags.cuda()

            flag4encoder = torch.zeros(src.shape[0], src.shape[1], 3).cuda()
            src = torch.cat([src, flag4encoder], dim=2)

            flag4decoder = torch.zeros([labels.shape[0],
                                        1]).long()  # decoder最前面插入一个起始全0
            flag4decoder = torch.cat([flag4decoder, labels[:, :-1]],
                                     dim=1).cuda()
            flag4decoder = embed_flag(flag4decoder)

            trg = torch.cat([trg, flag4decoder], dim=2)  # 插入最后三位标志位
            labels = labels.cuda()

            out = model(src, trg, tags)
            out = out.view(-1, 2)
            labels = labels.view(-1)
            loss = criterion(out, labels)
            epoch_loss += loss.item()
            loss_print += loss.item()
            print(loss.item())

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            global_step += 1

            if global_step % PRINT_STEP == 0:
                write.add_scalar('loss', loss_print / PRINT_STEP, step_print)
                step_print += 1
                loss_print = 0

        model.save(SAVE_MODEL_PATH + 'model-' + str(epoch) + '.ckpt')
        write.add_scalar('epoch_loss', epoch_loss, epoch)
Example #3
0
def rl_train_3layer():
    vis = visdom.Visdom(env="RL_compression")
    vis_g = Visdom_line(vis=vis, win="generator", name="Generator Loss")
    vis_d = Visdom_line(vis=vis,
                        win="discriminator",
                        name="Discriminator Loss")
    cuda_num = 1
    get_pretrain_generator = True  # True to load the pre train model
    get_pretrain_discriminator = False
    g_save_dir = './checkpoint/GAN_train/generator/'
    d_save_dir = './checkpoint/GAN_train/discriminator/'

    # MODEL_PATH = './checkpoint/LSTM3Layers/model2.ckpt'
    BATCH_SIZE = 100
    torch.cuda.set_device(cuda_num)

    # Load the model
    g_config = LSTM3LayersConfig()
    generator = LSTM3Layers(g_config).cuda()
    # generator.load(MODEL_PATH)

    # 读取embedding
    embed = get_word_embed().cuda()
    embed_flag = get_flag_embed().cuda()  # 后三位标志位embedding
    vocab = get_vocab()

    g_criterion = nn.CrossEntropyLoss(
        ignore_index=2)  # ignore the index of padding
    g_optimizer = optim.Adam(generator.parameters(),
                             g_config.lr,
                             weight_decay=0.1)

    g_trainloader = get_gen_trainloader(vocab=vocab,
                                        batch_size=g_config.batch_size,
                                        reload=True)
    d_config = SeDiscriminatorConfig()
    discriminator = SemanticDiscriminator(d_config)
    d_trainloader = get_gen_trainloader(vocab=vocab,
                                        batch_size=d_config.batch_size,
                                        reload=False)
    if torch.cuda.is_available():
        discriminator.cuda()
    d_criterion = nn.CrossEntropyLoss(
        ignore_index=2)  # ignore the index of padding
    d_optimizer = optim.Adam(discriminator.parameters(), lr=d_config.lr)

    if get_pretrain_generator is False:
        print("Pre train the generator...")
        vis.text('Pre train the generator at {}'.format(
            time.strftime('%x %X')),
                 win='log')
        for i in range(g_config.epoch):
            train_generator(model=generator,
                            embed=embed,
                            flag_embed=embed_flag,
                            dataloader=g_trainloader,
                            vis=vis_g,
                            criterion=g_criterion,
                            optimizer=g_optimizer,
                            cuda=True,
                            name="Trainig Generator Epoch {0}".format(i))
            generator.save(g_save_dir + 'pretrain_epoch{}'.format(i))

    if get_pretrain_discriminator is False:
        if get_pretrain_generator is True:
            generator.load(g_save_dir + 'pretrain_epoch{}'.format(0))
        print("Pre train the Discriminator...")
        vis.text('Pre train the Discriminator at {}'.format(
            time.strftime('%x %X')),
                 win='log')

        for j in range(d_config.epoch):
            train_discriminator(
                gen=generator,
                dis=discriminator,
                embed=embed,
                flag_embed=embed_flag,
                dataloader=d_trainloader,
                vis=vis_d,
                criterion=d_criterion,
                optimizer=d_optimizer,
                cuda=True,
                name="Training Discriminator Epoch {0}".format(j))
            discriminator.save(d_save_dir + 'pretrain_epoch{}'.format(j))
    else:
        print("Loading the pre train model")
        vis.text("Get the pre train model at {}".format(
            time.strftime('%x %X')),
                 win='log')
        generator.load(g_save_dir + 'pretrain_epoch{}'.format(0))
        discriminator.load(d_save_dir + 'pretrain_epoch{}'.format(0))

    # TODO: Adversarial Training
    ad_batch_size = 250
    ad_train_loader = get_gen_trainloader(vocab=vocab,
                                          batch_size=ad_batch_size,
                                          reload=False)
    ad_criterion = nn.NLLLoss(ignore_index=2)
    ad_optimizer = optim.Adam(generator.parameters(), lr=0.001)

    for ad_epoch in range(5):
        for g_step in range(1):
            train_ad_generator(g_model=generator,
                               d_model=discriminator,
                               embed=embed,
                               flag_embed=embed_flag,
                               dataloader=ad_train_loader,
                               vis=vis_g,
                               name="Ad traing Epoch {}".format(g_step),
                               test_criterion=g_criterion,
                               criterion=ad_criterion,
                               optimizer=g_optimizer,
                               cuda=True)
        for d_step in range(2):
            train_discriminator(dis=discriminator,
                                gen=generator,
                                embed=embed,
                                flag_embed=embed_flag,
                                dataloader=d_trainloader,
                                vis=vis_d,
                                criterion=d_criterion,
                                optimizer=d_optimizer)