def inference(inp, filename): lm, gen = init_inference() checkpoint = torch.load(f"{config.OUT_DIR}/checkpoint.pt") ctoi_file = open(f"{config.BASE_DIR}/src/ctoi.txt", "rb") encoding_dict = pickle.load(ctoi_file) ctoi_file.close() # print( # f'Checkpoint Details:\n Trained for: {checkpoint["epoch"]} epochs, Final Generator loss: {checkpoint["gen_loss"]}, Log File: {checkpoint["log_file"]}' # ) lm.load_state_dict(checkpoint["lm"]) gen.load_state_dict(checkpoint["gen"]) test = preprocess_labels([inp] * config.BATCH_SIZE, encoding_dict) with torch.no_grad(): zin = generate_noise(config.Z_LEN, config.BATCH_SIZE, device) gin = lm(test.to(device)) gout = gen(zin, gin) tgrid = torchvision.utils.make_grid(gout.detach().cpu(), nrow=4) imshow(tgrid, f"{config.OUT_DIR}/inference/{filename}.png") print(f'Inference Finished. Check "out" directory for {filename}.png')
def inference_tb(inp, writer): lm, gen = init_inference() checkpoint = torch.load(f"{config.OUT_DIR}/checkpoint.pt") ctoi_file = open(f"{config.BASE_DIR}/src/ctoi.txt", "rb") encoding_dict = pickle.load(ctoi_file) ctoi_file.close() # print( # f'Checkpoint Details:\n Trained for: {checkpoint["epoch"]} epochs, Final Generator loss: {checkpoint["gen_loss"]}, Log File: {checkpoint["log_file"]}' # ) lm.load_state_dict(checkpoint["lm"]) gen.load_state_dict(checkpoint["gen"]) test = preprocess_labels([inp] * config.BATCH_SIZE, encoding_dict) with torch.no_grad(): # lm.eval() # gen.eval() zin = generate_noise(config.Z_LEN, config.BATCH_SIZE, device) gin = lm(test.to(device)) gout = gen(zin, gin) tgrid = torchvision.utils.make_grid(gout.detach().cpu(), nrow=4) writer.add_image(str(checkpoint["epoch"]), tgrid)
X = np.concatenate((X, ut.load(objname))) labels = np.concatenate((labels, ut.load(labelsname))) else: print("Loading data...") X, labels = ut.load_data('data/train.csv', train=True, selected=sort_idx) dims = X.shape print(dims, 'dims') ######################################################################## print("Preprocessing data") X, scaler = ut.preprocess_data(X) print("Preprocessing labels") y, encoder = ut.preprocess_labels(labels) X_test, ids = ut.load_data('data/test.csv', train=False, selected=sort_idx) X_test, _ = ut.preprocess_data(X_test, scaler) nb_classes = y.shape[1] print(nb_classes, 'classes') dims = X.shape[1] print(dims, 'dims') ## check if model exists and resume otherwise rebuild if os.path.isfile("./tmp/keras-nn"): print ("Loading existing neural network...") model = ut.load("keras-nn", "./tmp/") print ("done.")
def train(epochs=1, from_checkpoint=False, checkpoint_interval=1): """Runs training for specified number of epochs""" lm, gen, dis, rec = init_models() lm_opt, gen_opt, dis_opt, rec_opt = init_optim(lm, gen, dis, rec) ctoi_file = open(f"{config.BASE_DIR}/src/ctoi.txt", "rb") encoding_dict = pickle.load(ctoi_file) ctoi_file.close() _, trainloader = get_dataloader() stddev = 1 ctc_criterion = nn.CTCLoss(zero_infinity=True) if from_checkpoint: point = torch.load(f"{config.OUT_DIR}/checkpoint.pt") lm.load_state_dict(point["lm"]) gen.load_state_dict(point["gen"]) dis.load_state_dict(point["dis"]) rec.load_state_dict(point["rec"]) lm_opt.load_state_dict(point["lm_opt"]) gen_opt.load_state_dict(point["gen_opt"]) dis_opt.load_state_dict(point["dis_opt"]) rec_opt.load_state_dict(point["rec_opt"]) stddev = point["stddev"] gen_losses = [] dis_losses = [] rec_losses = [] count = 1 for epoch in range(epochs): gen_loss_epoch = [] dis_loss_epoch = [] rec_loss_epoch = [] for batch in tqdm(trainloader): noise = torch.distributions.normal.Normal(0, stddev) stddev -= 0.00001 imgs, labels, lens = batch imgs = imgs.to(device) labels = preprocess_labels(labels, encoding_dict).to(device) ctc_labels = labels.transpose(0, 1).to(device) lens = torch.LongTensor(lens).to(device) # ========= Train Discriminator and R ========= dis_opt.zero_grad() rec_opt.zero_grad() z_dis = generate_noise(config.Z_LEN, config.BATCH_SIZE, device) emb_dis = lm(labels) gen_out_dis = gen(z_dis, emb_dis) # Adding noise to discriminator input # if epoch >= 100: # dis_out_fake = dis(gen_out_dis) # dis_out_real = dis(imgs) # else: dis_out_fake = dis(gen_out_dis + noise.sample(gen_out_dis.shape).to(device)) dis_out_real = dis(imgs + noise.sample(imgs.shape).to(device)) rec_out_dis = rec(imgs.to(device2)) dis_loss = dis_criterion(dis_out_fake, dis_out_real) rec_loss = compute_ctc_loss(ctc_criterion, rec_out_dis, ctc_labels, lens) dis_loss_epoch.append(dis_loss.detach().cpu().numpy()) rec_loss_epoch.append(rec_loss.detach().cpu().numpy()) dis_loss.backward() rec_loss.backward() dis_opt.step() rec_opt.step() # Creating Histograms of weights try: for name, param in dis.named_parameters(): writer.add_scalar(f"dis gradnorm {name}", param.grad.norm(), count) except: print("Histogram error in Discriminator") try: for name, param in rec.named_parameters(): writer.add_scalar(f"rec gradnorm {name}", param.grad.norm(), count) except: print("Histogram error in Recognizer") # ========= Train Generator ========= dis_handles = [] rec_handles = [] dis_stats["vars"] = [] for n, p in rec.named_parameters(): if n == "rnn.1.out.bias": rec_handles.append( p.register_hook(lambda grad: dis_std(grad))) else: rec_handles.append( p.register_hook( lambda grad: grad * dis_stats["std"] / grad.std())) for n, p in dis.named_parameters(): dis_handles.append( p.register_hook(lambda grad: dis_stats["vars"].append( grad.var().item()))) lm_opt.zero_grad() gen_opt.zero_grad() z = generate_noise(config.Z_LEN, config.BATCH_SIZE, device) emb = lm(labels) gen_out = gen(z, emb) rec_out = rec(gen_out.to(device2)).to(device) # dis_out = dis(gen_out) dis_out = dis(gen_out + noise.sample(gen_out.shape).to(device)) ctc = compute_ctc_loss(ctc_criterion, rec_out, ctc_labels, lens) gen_loss = gen_criterion(dis_out, ctc) gen_loss_epoch.append(gen_loss.detach().cpu().numpy()) gen_loss.backward() gen_opt.step() lm_opt.step() for handle in dis_handles: handle.remove() for handle in rec_handles: handle.remove() try: for name, param in gen.named_parameters(): writer.add_scalar(f"gen gradnorm {name}", param.grad.norm(), count) except: print("Histogram error in Generator") try: for name, param in lm.named_parameters(): writer.add_scalar(f"lm gradnorm {name}", param.grad.norm(), count) except: print("Histogram error in Language Model") writer.add_scalar("noise stddev", stddev, count) count += 1 # Printing epoch details gen_epoch = np.mean(gen_loss_epoch) dis_epoch = np.mean(dis_loss_epoch) rec_epoch = np.mean(rec_loss_epoch) gen_losses.append(gen_epoch) dis_losses.append(dis_epoch) rec_losses.append(rec_epoch) print( f"Epoch: {epoch}, Discriminator Loss: {dis_epoch}, Generator Loss: {gen_epoch}, R loss: {rec_epoch}" ) writer.add_scalar("Discriminator loss", dis_epoch, epoch) writer.add_scalar("Gererator loss", gen_epoch, epoch) writer.add_scalar("R loss", rec_epoch, epoch) # Creating model checkpoint if (epoch + 1) % checkpoint_interval == 0: torch.save( { "epoch": epoch + 1, "stddev": stddev, "lm": lm.state_dict(), "gen": gen.state_dict(), "dis": dis.state_dict(), "rec": rec.state_dict(), "lm_opt": lm_opt.state_dict(), "gen_opt": gen_opt.state_dict(), "dis_opt": dis_opt.state_dict(), "rec_opt": rec_opt.state_dict(), "dis_loss": dis_epoch, "gen_loss": gen_epoch, "rec_loss": rec_epoch, "log_file": current_log, }, f"{config.OUT_DIR}/checkpoint.pt", ) inference("amit", str(epoch)) print("Training Finished")
images, labels = readTrafficSigns.readTrafficSigns(args.data) vprint('\tdone\nplot exemplary images') fig = plt.figure(figsize=(20, 5)) for i in range(30): ax = plt.subplot(3, 10, i + 1) rand_sample = random.randrange(0, len(images)) plt.imshow(images[rand_sample], cmap='gray') ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) plt.show() vprint('preprocess input data') labels = [int(l) for l in labels] labels = utils.preprocess_labels(labels, size=max(labels)) images = np.array(images) #df = pd.DataFrame(images) X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2) vprint('\tdone') # do assert shape here vprint('begin setting up model') model = Sequential() #model.add(Dropout(input_shape=images[0].shape, rate=0.1)) model.add( Convolution2D(input_shape=images[0].shape, name='Convolution', strides=(1, 1),
def train(epochs, checkpoint_interval=1): rec = models.R().to(device) rec_optim = optim.Adam( rec.parameters(), lr=config.LEARNING_RATE, betas=config.BETAS ) ctoi_file = open(f"{config.BASE_DIR}/src/ctoi.txt", "rb") encoding_dict = pickle.load(ctoi_file) ctoi_file.close() # print(encoding_dict) _, trainloader = get_dataloader() ctc_criterion = nn.CTCLoss(zero_infinity=True) losses = [] count = 1 for epoch in range(epochs): epoch_loss = [] for batch in tqdm(trainloader): imgs, labels, lens = batch imgs = imgs.to(device) labels = preprocess_labels(labels, encoding_dict).transpose(0, 1).to(device) lens = torch.LongTensor(lens).to(device) rec_optim.zero_grad() out = rec(imgs) loss = compute_ctc_loss(ctc_criterion, out, labels, lens) epoch_loss.append(loss.item()) loss.backward() try: for name, param in rec.named_parameters(): writer.add_scalar(f"rec gradnorm {name}", param.grad.norm(), count) except: print("Histogram error in Recognizer") rec_optim.step() count += 1 mean_loss = np.mean(epoch_loss) losses.append(mean_loss) writer.add_scalar("Loss", mean_loss, epoch) print(f"Epoch {epoch}, Loss: {mean_loss}") if (epoch + 1) % checkpoint_interval == 0: torch.save( { "epoch": epoch + 1, "model": rec.state_dict(), "opt": rec_optim.state_dict(), "loss": mean_loss, }, f"{config.OUT_DIR}/rec_checkpoint.pt", ) print("Training finished") rec.eval() ximgs, xlabels, _ = next(iter(trainloader)) ximgs = ximgs.to(device) writer.add_image("test_image", ximgs[1, :, :, :]) inf_out = rec(ximgs[1, :, :, :].reshape((1, 1, 128, 512))) print(f"Network Output: f{decode(torch.argmax(inf_out, dim=2).cpu().numpy(), 0)}") print(f"Ground Truth: {xlabels[1]}")