예제 #1
0
파일: main.py 프로젝트: prof-Jian/J_lab
def pretrain_discriminator(model_dict,
                           optimizer_dict,
                           scheduler_dict,
                           dis_dataloader_params,
                           vocab_size,
                           positive_file,
                           negative_file,
                           batch_size,
                           epochs,
                           use_cuda=False,
                           temperature=1.0):
    discriminator = model_dict["discriminator"]

    d_optimizer = optimizer_dict[
        "discriminator"]  # d_optimizer = optim.Adam(discriminator.parameters(), lr=d_lr)
    d_lr_scheduler = scheduler_dict[
        "discriminator"]  # d_scheduler = optim.lr_scheduler.StepLR(d_optimizer, step_size=step_size, gamma=gamma)

    generate_samples(model_dict, negative_file, batch_size, use_cuda,
                     temperature)  #
    dis_dataloader_params["positive_filepath"] = positive_file
    dis_dataloader_params["negative_filepath"] = negative_file
    #print(dis_dataloader_params)
    dataloader = dis_data_loader(
        **dis_dataloader_params)  # this is where data iterator is used

    cross_entropy = nn.CrossEntropyLoss(
    )  #this one is similar to NLL (negative log likelihood)
    if use_cuda:
        cross_entropy = cross_entropy.cuda()

    for epoch in range(epochs):
        for i, sample in enumerate(dataloader):
            d_optimizer.zero_grad()
            data, label = sample["data"], sample[
                "label"]  #initialize sample variables 这里有个大问题,这个sample是batch_size的吗?
            data = Variable(data)
            label = Variable(label)
            if use_cuda:
                data = data.cuda()
                label = label.cuda()
            outs = discriminator(
                data)  # 目前我从Dsicriminator里面的提示,得到data是batch_size * seq_len的
            loss = cross_entropy(
                outs["score"], label.view(-1)) + discriminator.l2_loss(
                )  # 加上l2_loss这个是防止过拟合吗?注意,这个l2只是最后的linear部分系数,不涉及CNN部分
            d_lr_scheduler.step(
            )  # 这个和optimizer.step()谁先? scheduler的参数和epochs是否要配合??
            loss.backward()
            d_optimizer.step()
            if i == 63:
                print(
                    "Pre-Discriminator loss: {:.5f}".format(loss))  # 这是什么意思??

    model_dict["discriminator"] = discriminator
    optimizer_dict["discriminator"] = d_optimizer
    scheduler_dict["discriminator"] = d_lr_scheduler
    return model_dict, optimizer_dict, scheduler_dict
예제 #2
0
def pretrain_discriminator(model_dict,
                           optimizer_dict,
                           scheduler_dict,
                           dis_dataloader_params,
                           vocab_size,
                           positive_file,
                           negative_file,
                           batch_size,
                           epochs,
                           use_cuda=False,
                           temperature=1.0):
    discriminator = model_dict["discriminator"]

    d_optimizer = optimizer_dict["discriminator"]
    d_lr_scheduler = scheduler_dict["discriminator"]

    generate_samples(model_dict, negative_file, batch_size, use_cuda,
                     temperature)
    dis_dataloader_params["positive_filepath"] = positive_file
    dis_dataloader_params["negative_filepath"] = negative_file
    #print(dis_dataloader_params)
    dataloader = dis_data_loader(
        **dis_dataloader_params)  #this is where data iterator is used

    cross_entropy = nn.CrossEntropyLoss(
    )  #this one is similar to NLL (negative log likelihood)
    if use_cuda:
        cross_entropy = cross_entropy.cuda()

    for epoch in range(epochs):
        for i, sample in enumerate(dataloader):
            d_optimizer.zero_grad()
            data, label = sample["data"], sample[
                "label"]  #initialize sample variables
            data = Variable(data)
            label = Variable(label)
            if use_cuda:
                data = data.cuda()
                label = label.cuda()
            outs = discriminator(data)
            loss = cross_entropy(outs["score"],
                                 label.view(-1)) + discriminator.l2_loss()
            d_lr_scheduler.step()
            loss.backward()
            d_optimizer.step()
            if i == 63:
                print("Pre-Discriminator loss: {:.5f}".format(loss))

    model_dict["discriminator"] = discriminator
    optimizer_dict["discriminator"] = d_optimizer
    scheduler_dict["discriminator"] = d_lr_scheduler
    return model_dict, optimizer_dict, scheduler_dict
예제 #3
0
def adversarial_train(model_dict,
                      optimizer_dict,
                      scheduler_dict,
                      dis_dataloader_params,
                      vocab_size,
                      pos_file,
                      neg_file,
                      batch_size,
                      gen_train_num=1,
                      dis_train_epoch=5,
                      dis_train_num=3,
                      max_norm=5.0,
                      rollout_num=4,
                      use_cuda=False,
                      temperature=1.0,
                      epoch=1,
                      tot_epoch=100):
    """
        Get all the models, optimizer and schedulers
    """
    generator = model_dict["generator"]
    discriminator = model_dict["discriminator"]
    worker = generator.worker
    manager = generator.manager

    m_optimizer = optimizer_dict["manager"]
    w_optimizer = optimizer_dict["worker"]
    d_optimizer = optimizer_dict["discriminator"]

    #Why zero grad only m and w?
    m_optimizer.zero_grad()
    w_optimizer.zero_grad()

    m_lr_scheduler = scheduler_dict["manager"]
    w_lr_scheduler = scheduler_dict["worker"]
    d_lr_scheduler = scheduler_dict["discriminator"]

    #Adversarial training for generator
    for _ in range(gen_train_num):
        m_lr_scheduler.step()
        w_lr_scheduler.step()

        m_optimizer.zero_grad()
        w_optimizer.zero_grad()

        #get all the return values
        adv_rets = recurrent_func("adv")(model_dict, use_cuda)
        real_goal = adv_rets["real_goal"]
        all_goal = adv_rets["all_goal"]
        prediction = adv_rets["prediction"]
        delta_feature = adv_rets["delta_feature"]
        delta_feature_for_worker = adv_rets["delta_feature_for_worker"]
        gen_token = adv_rets["gen_token"]

        rewards = get_rewards(model_dict, gen_token, rollout_num, use_cuda)
        m_loss = loss_func("adv_manager")(rewards, real_goal, delta_feature)
        w_loss = loss_func("adv_worker")(all_goal, delta_feature_for_worker,
                                         gen_token, prediction, vocab_size,
                                         use_cuda)

        torch.autograd.grad(
            m_loss,
            manager.parameters())  #based on loss improve the parameters
        torch.autograd.grad(w_loss, worker.parameters())
        clip_grad_norm_(manager.parameters(), max_norm)
        clip_grad_norm_(worker.parameters(), max_norm)
        m_optimizer.step()
        w_optimizer.step()
        print("Adv-Manager loss: {:.5f} Adv-Worker loss: {:.5f}".format(
            m_loss, w_loss))

    del adv_rets
    del real_goal
    del all_goal
    del prediction
    del delta_feature
    del delta_feature_for_worker
    del gen_token
    del rewards

    #Adversarial training for discriminator
    for n in range(dis_train_epoch):
        generate_samples(model_dict, neg_file, batch_size, use_cuda,
                         temperature)
        dis_dataloader_params["positive_filepath"] = pos_file
        dis_dataloader_params["negative_filepath"] = neg_file
        dataloader = dis_data_loader(**dis_dataloader_params)

        cross_entropy = nn.CrossEntropyLoss()
        if use_cuda:
            cross_entropy = cross_entropy.cuda()
        """
        for d-steps do
            Use current G, θm,θw to generate negative examples and combine with given positive examples S 
            Train discriminator Dφ for k epochs by Eq. (2)
        end for
        """
        for _ in range(dis_train_num):
            for i, sample in enumerate(dataloader):
                data, label = sample["data"], sample["label"]
                data = Variable(data)
                label = Variable(label)
                if use_cuda:
                    data = data.cuda(async=True)
                    label = label.cuda(async=True)
                outs = discriminator(data)
                loss = cross_entropy(outs["score"],
                                     label.view(-1)) + discriminator.l2_loss()
                d_optimizer.zero_grad()
                d_lr_scheduler.step()
                loss.backward()
                d_optimizer.step()
        print("{}/{} Adv-Discriminator Loss: {:.5f}".format(
            n, range(dis_train_epoch), loss))
    #Save all changes
    model_dict["discriminator"] = discriminator
    generator.worker = worker
    generator.manager = manager
    model_dict["generator"] = generator

    optimizer_dict["manager"] = m_optimizer
    optimizer_dict["worker"] = w_optimizer
    optimizer_dict["discriminator"] = d_optimizer

    scheduler_dict["manager"] = m_lr_scheduler
    scheduler_dict["worker"] = w_lr_scheduler
    scheduler_dict["disciminator"] = d_lr_scheduler

    return model_dict, optimizer_dict, scheduler_dict