def run(lr=0.001): x = np.random.random((1000, 5)).astype("float32") y = np.random.randint(0, 5, (1000, )).astype("int64") trainloader = q.dataload(x[:800], y[:800], batch_size=100) validloader = q.dataload(x[800:], y[800:], batch_size=100) m = torch.nn.Sequential(torch.nn.Linear(5, 100), torch.nn.Linear(100, 5)) m[1].weight.requires_grad = False losses = q.lossarray(torch.nn.CrossEntropyLoss()) params = m.parameters() for param in params: print(param.requires_grad) init_val = m[1].weight.detach().numpy() optim = torch.optim.Adam(q.params_of(m), lr=lr) trainer = q.trainer(m).on(trainloader).loss(losses).optimizer( optim).epochs(100) # for b, (i, e) in trainer.inf_batches(): # print(i, e) validator = q.tester(m).on(validloader).loss(losses) q.train(trainer, validator).run() new_val = m[1].weight.detach().numpy() print(np.linalg.norm(new_val - init_val))
def run(lr=20., dropout=0.2, dropconnect=0.2, gradnorm=0.25, epochs=25, embdim=200, encdim=200, numlayers=2, seqlen=35, batsize=20, eval_batsize=10, cuda=False, gpu=0, test=False): tt = q.ticktock("script") device = torch.device("cpu") if cuda: device = torch.device("cuda", gpu) tt.tick("loading data") train_batches, valid_batches, test_batches, D = \ load_data(batsize=batsize, eval_batsize=eval_batsize, seqlen=seqlen) tt.tock("data loaded") print("{} batches in train".format(len(train_batches))) tt.tick("creating model") dims = [embdim] + ([encdim] * numlayers) m = RNNLayer_LM(*dims, worddic=D, dropout=dropout) if test: for i, batch in enumerate(train_batches): y = m(batch[0]) if i > 5: break print(y.size()) loss = q.SeqKLLoss(time_average=True, size_average=True, mode="logits") ppl_loss = q.SeqPPL_Loss(time_average=True, size_average=True, mode="logits") optim = torch.optim.SGD(q.params_of(m), lr=lr) gradclip = q.ClipGradNorm(gradnorm) trainer = q.trainer(m).on(train_batches).loss(loss).optimizer( optim).device(device).hook(m).hook(gradclip) tester = q.tester(m).on(valid_batches).loss( loss, ppl_loss).device(device).hook(m) tt.tock("created model") tt.tick("training") q.train(trainer, tester).run(epochs=epochs) tt.tock("trained")
def run_classify(lr=0.001, seqlen=6, numex=500, epochs=25, batsize=10, test=True, cuda=False, gpu=0): device = torch.device("cpu") if cuda: device = torch.device("cuda", gpu) # region construct data colors = "red blue green magenta cyan orange yellow grey salmon pink purple teal".split( ) D = dict(zip(colors, range(len(colors)))) inpseqs = [] targets = [] for i in range(numex): inpseq = list(np.random.choice(colors, seqlen, replace=False)) target = np.random.choice(range(len(inpseq)), 1)[0] target_class = D[inpseq[target]] inpseq[target] = "${}$".format(inpseq[target]) inpseqs.append("".join(inpseq)) targets.append(target_class) sm = q.StringMatrix() sm.tokenize = lambda x: list(x) for inpseq in inpseqs: sm.add(inpseq) sm.finalize() print(sm[0]) print(sm.D) targets = np.asarray(targets) data = q.dataload(sm.matrix[:-100], targets[:-100], batch_size=batsize) valid_data = q.dataload(sm.matrix[-100:], targets[-100:], batch_size=batsize) # endregion # region model embdim = 20 enc2inpdim = 45 encdim = 20 outdim = 20 emb = q.WordEmb(embdim, worddic=sm.D) # sm dictionary (characters) out = q.WordLinout(outdim, worddic=D) # target dictionary # encoders: enc1 = q.RNNEncoder(embdim, encdim, bidir=True) enc2 = q.RNNCellEncoder(enc2inpdim, outdim // 2, bidir=True) # model class Model(torch.nn.Module): def __init__(self, dim, _emb, _out, _enc1, _enc2, **kw): super(Model, self).__init__(**kw) self.dim, self.emb, self.out, self.enc1, self.enc2 = dim, _emb, _out, _enc1, _enc2 self.score = torch.nn.Sequential( torch.nn.Linear(dim, 1, bias=False), torch.nn.Sigmoid()) self.emb_expander = ExpandVecs(embdim, enc2inpdim, 2) self.enc_expander = ExpandVecs(encdim * 2, enc2inpdim, 2) def forward(self, x, with_att=False): # embed and encode xemb, xmask = self.emb(x) xenc = self.enc1(xemb, mask=xmask) # compute attention xatt = self.score(xenc).squeeze( 2) * xmask.float()[:, :xenc.size(1)] # encode again _xemb = self.emb_expander(xemb[:, :xenc.size(1)]) _xenc = self.enc_expander(xenc) _, xenc2 = self.enc2(_xemb, gate=xatt, mask=xmask[:, :xenc.size(1)], ret_states=True) scores = self.out(xenc2.view(xenc.size(0), -1)) if with_att: return scores, xatt else: return scores model = Model(40, emb, out, enc1, enc2) # endregion # region test if test: inps = torch.tensor(sm.matrix[0:2]) outs = model(inps) # endregion # region train optimizer = torch.optim.Adam(q.params_of(model), lr=lr) trainer = q.trainer(model).on(data).loss(torch.nn.CrossEntropyLoss(), q.Accuracy())\ .optimizer(optimizer).hook(q.ClipGradNorm(5.)).device(device) validator = q.tester(model).on(valid_data).loss( q.Accuracy()).device(device) q.train(trainer, validator).run(epochs=epochs) # endregion # region check attention #TODO # feed a batch inpd = torch.tensor(sm.matrix[400:410]) outd, att = model(inpd, with_att=True) outd = torch.max(outd, 1)[1].cpu().detach().numpy() inpd = inpd.cpu().detach().numpy() att = att.cpu().detach().numpy() rD = {v: k for k, v in sm.D.items()} roD = {v: k for k, v in D.items()} for i in range(len(att)): inpdi = " ".join([rD[x] for x in inpd[i]]) outdi = roD[outd[i]] print("input: {}\nattention: {}\nprediction: {}".format( inpdi, " ".join(["{:.1f}".format(x) for x in att[i]]), outdi))
def run( lr=0.0001, batsize=64, epochs=100000, lamda=10, disciters=5, burnin=-1, validinter=1000, devinter=100, cuda=False, gpu=0, z_dim=128, test=False, dim_d=128, dim_g=128, ): settings = locals().copy() logger = q.log.Logger(prefix="wgan_resnet_cifar") logger.save_settings(**settings) print("started") burnin = disciters if burnin == -1 else burnin if test: validinter = 10 burnin = 1 batsize = 2 devinter = 1 tt = q.ticktock("script") device = torch.device("cpu") if not cuda else torch.device("cuda", gpu) tt.tick("creating networks") gen = OldGenerator(z_dim, dim_g).to(device) crit = OldDiscriminator(dim_d).to(device) tt.tock("created networks") # test # z = torch.randn(3, z_dim).to(device) # x = gen(z) # s = crit(x) # data # load cifar tt.tick("loading data") traincifar, testcifar = load_cifar_dataset(train=True), load_cifar_dataset( train=False) print(len(traincifar)) gen_data_d = q.gan.gauss_dataset(z_dim, len(traincifar)) disc_data = q.datacat([traincifar, gen_data_d], 1) gen_data = q.gan.gauss_dataset(z_dim) gen_data_valid = q.gan.gauss_dataset(z_dim, 50000) disc_data = q.dataload(disc_data, batch_size=batsize, shuffle=True) gen_data = q.dataload(gen_data, batch_size=batsize, shuffle=True) gen_data_valid = q.dataload(gen_data_valid, batch_size=batsize, shuffle=False) validcifar_loader = q.dataload(testcifar, batch_size=batsize, shuffle=False) dev_data_gauss = q.gan.gauss_dataset(z_dim, len(testcifar)) dev_disc_data = q.datacat([testcifar, dev_data_gauss], 1) dev_disc_data = q.dataload(dev_disc_data, batch_size=batsize, shuffle=False) # q.embed() tt.tock("loaded data") disc_model = q.gan.WGAN(crit, gen, lamda=lamda).disc_train() gen_model = q.gan.WGAN(crit, gen, lamda=lamda).gen_train() disc_optim = torch.optim.Adam(q.params_of(crit), lr=lr, betas=(0.5, 0.9)) gen_optim = torch.optim.Adam(q.params_of(gen), lr=lr, betas=(0.5, 0.9)) disc_bt = UnquantizeTransform() disc_trainer = q.trainer(disc_model).on(disc_data).optimizer(disc_optim).loss(3).device(device)\ .set_batch_transformer(lambda a, b: (disc_bt(a), b)) gen_trainer = q.trainer(gen_model).on(gen_data).optimizer(gen_optim).loss( 1).device(device) fidandis = q.gan.FIDandIS(device=device) if not test: fidandis.set_real_stats_with(validcifar_loader) saver = q.gan.GenDataSaver(logger, "saved.npz") generator_validator = q.gan.GeneratorValidator(gen, [fidandis, saver], gen_data_valid, device=device, logger=logger, validinter=validinter) train_validator = q.tester(disc_model).on(dev_disc_data).loss(3).device(device)\ .set_batch_transformer(lambda a, b: (disc_bt(a), b)) train_validator.validinter = devinter tt.tick("training") gan_trainer = q.gan.GANTrainer(disc_trainer, gen_trainer, validators=(generator_validator, train_validator), lr_decay=True) gan_trainer.run(epochs, disciters=disciters, geniters=1, burnin=burnin) tt.tock("trained")
def run(lr=0.01, epochs=10, batsize=64, momentum=0.5, cuda=False, gpu=0, seed=1): settings = locals().copy() logger = q.Logger(prefix="mnist") logger.save_settings(**settings) torch.manual_seed(seed) if cuda: torch.cuda.set_device(gpu) torch.cuda.manual_seed(seed) kwargs = {} train_loader = torch.utils.data.DataLoader(datasets.MNIST( '../../datasets/mnist', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=batsize, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.MNIST( '../../datasets/mnist', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])), batch_size=batsize, shuffle=False, **kwargs) class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.conv1 = nn.Conv2d(1, 10, kernel_size=5) self.conv2 = nn.Conv2d(10, 20, kernel_size=5) self.conv2_drop = nn.Dropout2d() self.fc1 = nn.Linear(320, 50) self.fc2 = nn.Linear(50, 10) def forward(self, x): x = F.relu(F.max_pool2d(self.conv1(x), 2)) x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) x = x.view(-1, 320) x = F.relu(self.fc1(x)) x = F.dropout(x, training=self.training) x = self.fc2(x) return F.log_softmax(x, dim=1) model = Net() optim = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum) trainer = q.trainer(model).on(train_loader)\ .loss(torch.nn.NLLLoss(), q.Accuracy())\ .optimizer(optim).cuda(cuda) validator = q.tester(model).on(test_loader)\ .loss(torch.nn.NLLLoss(), q.Accuracy())\ .cuda(cuda) logger.loglosses(trainer, "train.losses") logger.loglosses(validator, "valid.losses") q.train(trainer, validator).run(epochs)
def run(lr=0.0001, batsize=64, epochs=100000, lamda=10, disciters=5, burnin=-1, validinter=1000, devinter=100, cuda=False, gpu=0, z_dim=128, test=False, dim_d=128, dim_g=128, vggversion=13, vgglayer=9, vggvanilla=False, # if True, makes trainable feature transform extralayers=False, # adds a couple extra res blocks to generator to match added VGG pixelpenalty=False, # if True, uses penalty based on pixel-wise interpolate inceptionpath="/data/lukovnik/", normalwgan=False, ): # vggvanilla=True and pixelpenalty=True makes a normal WGAN settings = locals().copy() logger = q.log.Logger(prefix="wgan_resnet_cifar_feat") logger.save_settings(**settings) burnin = disciters if burnin == -1 else burnin if test: validinter=10 burnin=1 batsize=2 devinter = 1 tt = q.ticktock("script") device = torch.device("cpu") if not cuda else torch.device("cuda", gpu) tt.tick("creating networks") if not normalwgan: print("doing wgan-feat") gen = OldGenerator(z_dim, dim_g, extra_layers=extralayers).to(device) inpd = get_vgg_outdim(vggversion, vgglayer) crit = ReducedDiscriminator(inpd, dim_d).to(device) subvgg = SubVGG(vggversion, vgglayer, pretrained=not vggvanilla) else: print("doing normal wgan") gen = OldGenerator(z_dim, dim_g, extra_layers=False).to(device) crit = OldDiscriminator(dim_d).to(device) subvgg = None tt.tock("created networks") # test # z = torch.randn(3, z_dim).to(device) # x = gen(z) # s = crit(x) # data # load cifar tt.tick("loading data") traincifar, testcifar = load_cifar_dataset(train=True), load_cifar_dataset(train=False) print(len(traincifar), len(testcifar)) gen_data_d = q.gan.gauss_dataset(z_dim, len(traincifar)) disc_data = q.datacat([traincifar, gen_data_d], 1) gen_data = q.gan.gauss_dataset(z_dim) gen_data_valid = q.gan.gauss_dataset(z_dim, 50000) swd_gen_data = q.gan.gauss_dataset(z_dim, 10000) swd_real_data = [] swd_shape = traincifar[0].size() for i in range(10000): swd_real_data.append(testcifar[i]) swd_real_data = torch.stack(swd_real_data, 0) disc_data = q.dataload(disc_data, batch_size=batsize, shuffle=True) gen_data = q.dataload(gen_data, batch_size=batsize, shuffle=True) gen_data_valid = q.dataload(gen_data_valid, batch_size=batsize, shuffle=False) validcifar_loader = q.dataload(testcifar, batch_size=batsize, shuffle=False) swd_batsize = 64 swd_gen_data = q.dataload(swd_gen_data, batch_size=swd_batsize, shuffle=False) swd_real_data = q.dataload(swd_real_data, batch_size=swd_batsize, shuffle=False) dev_data_gauss = q.gan.gauss_dataset(z_dim, len(testcifar)) dev_disc_data = q.datacat([testcifar, dev_data_gauss], 1) dev_disc_data = q.dataload(dev_disc_data, batch_size=batsize, shuffle=False) # q.embed() tt.tock("loaded data") if not normalwgan: disc_model = q.gan.WGAN_F(crit, gen, subvgg, lamda=lamda, pixel_penalty=pixelpenalty).disc_train() gen_model = q.gan.WGAN_F(crit, gen, subvgg, lamda=lamda, pixel_penalty=pixelpenalty).gen_train() else: disc_model = q.gan.WGAN(crit, gen, lamda=lamda).disc_train() gen_model = q.gan.WGAN(crit, gen, lamda=lamda).gen_train() disc_params = q.params_of(crit) if vggvanilla and not normalwgan: disc_params += q.params_of(subvgg) disc_optim = torch.optim.Adam(disc_params, lr=lr, betas=(0.5, 0.9)) gen_optim = torch.optim.Adam(q.params_of(gen), lr=lr, betas=(0.5, 0.9)) disc_bt = UnquantizeTransform() disc_trainer = q.trainer(disc_model).on(disc_data).optimizer(disc_optim).loss(3).device(device)\ .set_batch_transformer(lambda a, b: (disc_bt(a), b)) gen_trainer = q.trainer(gen_model).on(gen_data).optimizer(gen_optim).loss(1).device(device) # fidandis = q.gan.FIDandIS(device=device) tfis = q.gan.tfIS(inception_path=inceptionpath, gpu=gpu) # if not test: # fidandis.set_real_stats_with(validcifar_loader) saver = q.gan.GenDataSaver(logger, "saved.npz") generator_validator = q.gan.GeneratorValidator(gen, [tfis, saver], gen_data_valid, device=device, logger=logger, validinter=validinter) train_validator = q.tester(disc_model).on(dev_disc_data).loss(3).device(device)\ .set_batch_transformer(lambda a, b: (disc_bt(a), b)) train_validator.validinter = devinter tt.tick("initializing SWD") swd = q.gan.SlicedWassersteinDistance(swd_shape) swd.prepare_reals(swd_real_data) tt.tock("SWD initialized") swd_validator = q.gan.GeneratorValidator(gen, [swd], swd_gen_data, device=device, logger=logger, validinter=validinter, name="swd") tt.tick("training") gan_trainer = q.gan.GANTrainer(disc_trainer, gen_trainer, validators=(generator_validator, train_validator, swd_validator), lr_decay=True) gan_trainer.run(epochs, disciters=disciters, geniters=1, burnin=burnin) tt.tock("trained")