Esempio n. 1
0
def run_toy(lr=0.001,
            seqlen=8,
            batsize=10,
            epochs=1000,
            embdim=32,
            innerdim=64,
            z_dim=32,
            noaccumulate=False,
            usebase=False,
            ):
    # generate some toy data
    N = 1000
    data, vocab = gen_toy_data(N, seqlen=seqlen, mode="copymiddlefixed")
    datasm = q.StringMatrix()
    datasm.set_dictionary(vocab)
    datasm.tokenize = lambda x: list(x)
    for data_e in data:
        datasm.add(data_e)
    datasm.finalize()

    real_data = q.dataset(datasm.matrix)
    gen_data_d = q.gan.gauss_dataset(z_dim, len(real_data))
    disc_data = q.datacat([real_data, gen_data_d], 1)

    gen_data = q.gan.gauss_dataset(z_dim)

    disc_data = q.dataload(disc_data, batch_size=batsize, shuffle=True)
    gen_data = q.dataload(gen_data, batch_size=batsize, shuffle=True)

    discriminator = Discriminator(datasm.D, embdim, innerdim)
    generator = Decoder(datasm.D, embdim, z_dim, "<START>", innerdim, maxtime=seqlen)

    SeqGAN = SeqGAN_Base if usebase else SeqGAN_DCL

    disc_model = SeqGAN(discriminator, generator, gan_mode=q.gan.GAN.DISC_TRAIN, accumulate=not noaccumulate)
    gen_model = SeqGAN(discriminator, generator, gan_mode=q.gan.GAN.GEN_TRAIN, accumulate=not noaccumulate)

    disc_optim = torch.optim.Adam(q.params_of(discriminator), lr=lr)
    gen_optim = torch.optim.Adam(q.params_of(generator), lr=lr)

    disc_trainer = q.trainer(disc_model).on(disc_data).optimizer(disc_optim).loss(q.no_losses(2))
    gen_trainer = q.trainer(gen_model).on(gen_data).optimizer(gen_optim).loss(q.no_losses(2))

    gan_trainer = q.gan.GANTrainer(disc_trainer, gen_trainer)

    gan_trainer.run(epochs, disciters=5, geniters=1, burnin=500)

    # print some predictions:
    with torch.no_grad():
        rvocab = {v: k for k, v in vocab.items()}
        q.batch_reset(generator)
        eval_z = torch.randn(50, z_dim)
        eval_y, _ = generator(eval_z)
        for i in range(len(eval_y)):
            prow = "".join([rvocab[mij] for mij in eval_y[i].numpy()])
            print(prow)

    print("done")
Esempio n. 2
0
def run_cond_toy(lr=0.001,
                 seqlen=8,
                 batsize=10,
                 epochs=1000,
                 embdim=5,
                 innerdim=32,
                 z_dim=5,
                 usebase=False,
                 nrexamples=1000):
    data, vocab = gen_toy_data(nrexamples, seqlen=seqlen, mode="twointerleaveboth")
    datasm = q.StringMatrix()
    datasm.set_dictionary(vocab)
    datasm.tokenize = lambda x: list(x)
    for data_e in data:
        datasm.add(data_e)
    datasm.finalize()

    real_data = q.dataset(datasm.matrix)
    shuffled_datasm_matrix = datasm.matrix + 0
    np.random.shuffle(shuffled_datasm_matrix)
    fake_data = q.dataset(shuffled_datasm_matrix)
    disc_data = q.datacat([real_data, fake_data], 1)

    gen_data = q.dataset(datasm.matrix)

    disc_data = q.dataload(disc_data, batch_size=batsize, shuffle=True)
    gen_data = q.dataload(gen_data, batch_size=batsize, shuffle=True)

    discr = Discriminator(datasm.D, embdim, innerdim)
    decoder = Decoder_Cond(datasm.D, embdim, z_dim, "<START>", innerdim)

    disc_model = SeqGAN_Cond(discr, decoder, gan_mode=q.gan.GAN.DISC_TRAIN)
    gen_model = SeqGAN_Cond(discr, decoder, gan_mode=q.gan.GAN.GEN_TRAIN)

    disc_optim = torch.optim.Adam(q.params_of(discr), lr=lr)
    gen_optim = torch.optim.Adam(q.params_of(decoder), lr=lr)

    disc_trainer = q.trainer(disc_model).on(disc_data).optimizer(disc_optim).loss(q.no_losses(2))
    gen_trainer = q.trainer(gen_model).on(gen_data).optimizer(gen_optim).loss(q.no_losses(2))

    gan_trainer = q.gan.GANTrainer(disc_trainer, gen_trainer)

    gan_trainer.run(epochs, disciters=5, geniters=1, burnin=500)

    with torch.no_grad():
        rvocab = {v: k for k, v in vocab.items()}
        q.batch_reset(decoder)
        eval_z = torch.tensor(datasm.matrix[:50])
        eval_y, _, _, _ = decoder(eval_z)
        for i in range(len(eval_y)):
            prow = "".join([rvocab[mij] for mij in eval_y[i].numpy()])
            print(prow)

    print("done")
Esempio n. 3
0
 def loss(self, *args):
     # can be unspecified
     if len(args) == 1 and isinstance(args[0], int):
         self.losses = q.no_losses(args[0])
     elif len(args) == 1 and isinstance(args[0], lossarray):
         self.losses = args[0]
     else:
         self.losses = q.lossarray(*args)
     return self
Esempio n. 4
0
 def loss(self, *args):  # TODO: supports normal PyTorch losses too ???
     # can be unspecified
     if len(args) == 1 and isinstance(args[0], int):
         self.losses = q.no_losses(args[0])
     elif len(args) == 1 and isinstance(args[0], lossarray):
         self.losses = args[0]
     else:
         self.losses = q.lossarray(*args)
     return self
def run(lr=0.001):
    # data
    x = torch.randn(1000, 5, 5)

    real_data = q.dataset(x)
    gen_data_d = q.gan.gauss_dataset(10, len(real_data))
    disc_data = q.datacat([real_data, gen_data_d], 1)

    gen_data = q.gan.gauss_dataset(10)

    disc_data = q.dataload(disc_data, batch_size=20, shuffle=True)
    gen_data = q.dataload(gen_data, batch_size=20, shuffle=True)

    next(iter(disc_data))

    # models
    class Generator(torch.nn.Module):
        def __init__(self):
            super(Generator, self).__init__()
            self.lin1 = torch.nn.Linear(10, 20)
            self.lin2 = torch.nn.Linear(20, 25)

        def forward(self, z):
            ret = self.lin1(z)
            ret = torch.nn.functional.sigmoid(ret)
            ret = self.lin2(ret)
            ret = torch.nn.functional.sigmoid(ret)
            ret = ret.view(z.size(0), 5, 5)
            return ret

    class Discriminator(torch.nn.Module):
        def __init__(self):
            super(Discriminator, self).__init__()
            self.lin1 = torch.nn.Linear(25, 20)
            self.lin2 = torch.nn.Linear(20, 10)
            self.lin3 = torch.nn.Linear(10, 1)

        def forward(self, x):
            x = x.view(x.size(0), -1)
            ret = self.lin1(x)
            ret = torch.nn.functional.sigmoid(ret)
            ret = self.lin2(ret)
            ret = torch.nn.functional.sigmoid(ret)
            ret = self.lin3(ret)
            ret = torch.nn.functional.sigmoid(ret)
            ret = ret.squeeze(1)
            return ret

    discriminator = Discriminator()
    generator = Generator()

    disc_model = q.gan.GAN(discriminator,
                           generator,
                           gan_mode=q.gan.GAN.DISC_TRAIN)
    gen_model = q.gan.GAN(discriminator,
                          generator,
                          gan_mode=q.gan.GAN.GEN_TRAIN)

    disc_optim = torch.optim.Adam(q.params_of(discriminator), lr=lr)
    gen_optim = torch.optim.Adam(q.params_of(generator), lr=lr)

    disc_trainer = q.trainer(disc_model).on(disc_data).optimizer(
        disc_optim).loss(q.no_losses(1))
    gen_trainer = q.trainer(gen_model).on(gen_data).optimizer(gen_optim).loss(
        q.no_losses(1))

    gan_trainer = q.gan.GANTrainer(disc_trainer, gen_trainer)

    gan_trainer.run(50, disciters=10, geniters=3)
Esempio n. 6
0
def run_words(lr=0.001,
              seqlen=8,
              batsize=50,
              epochs=1000,
              embdim=64,
              innerdim=128,
              z_dim=64,
              usebase=True,
              noaccumulate=False,
              ):
    # get some words
    N = 1000
    glove = q.PretrainedWordEmb(50, vocabsize=N+2)
    words = list(glove.D.keys())[2:]
    datasm = q.StringMatrix()
    datasm.tokenize = lambda x: list(x)
    for word in words:
        datasm.add(word)
    datasm.finalize()
    datamat = datasm.matrix[:, :seqlen]
    # replace <mask> with <end>
    datamat = datamat + (datamat == datasm.D["<MASK>"]) * (datasm.D["<END>"] - datasm.D["<MASK>"])


    real_data = q.dataset(datamat)
    gen_data_d = q.gan.gauss_dataset(z_dim, len(real_data))
    disc_data = q.datacat([real_data, gen_data_d], 1)

    gen_data = q.gan.gauss_dataset(z_dim)

    disc_data = q.dataload(disc_data, batch_size=batsize, shuffle=True)
    gen_data = q.dataload(gen_data, batch_size=batsize, shuffle=True)

    discriminator = Discriminator(datasm.D, embdim, innerdim)
    generator = Decoder(datasm.D, embdim, z_dim, "<START>", innerdim, maxtime=seqlen)

    SeqGAN = SeqGAN_Base if usebase else SeqGAN_DCL

    disc_model = SeqGAN(discriminator, generator, gan_mode=q.gan.GAN.DISC_TRAIN, accumulate=not noaccumulate)
    gen_model = SeqGAN(discriminator, generator, gan_mode=q.gan.GAN.GEN_TRAIN, accumulate=not noaccumulate)

    disc_optim = torch.optim.Adam(q.params_of(discriminator), lr=lr)
    gen_optim = torch.optim.Adam(q.params_of(generator), lr=lr)

    disc_trainer = q.trainer(disc_model).on(disc_data).optimizer(disc_optim).loss(q.no_losses(2))
    gen_trainer = q.trainer(gen_model).on(gen_data).optimizer(gen_optim).loss(q.no_losses(2))

    gan_trainer = q.gan.GANTrainer(disc_trainer, gen_trainer)

    gan_trainer.run(epochs, disciters=5, geniters=1, burnin=500)

    # print some predictions:
    with torch.no_grad():
        rvocab = {v: k for k, v in datasm.D.items()}
        q.batch_reset(generator)
        eval_z = torch.randn(50, z_dim)
        eval_y, _ = generator(eval_z)
        for i in range(len(eval_y)):
            prow = "".join([rvocab[mij] for mij in eval_y[i].numpy()])
            print(prow)

    print("done")