def train_from_density(model, optimizer, z0_distr, density, name: str, num_layers: int, batch_size: int = 128, num_iter: int = 20000): """Train the normalizing flow from density""" loss_sum = 0 for i in range(num_iter + 1): z0 = z0_distr.sample((batch_size, )) x, logdet = model(z0) loss = torch.mean(loss2(density(x), z0, logdet)) loss_sum += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() # print loss average if i % 200 == 0 and i != 0: print(f"loss: {loss_sum/200}") loss_sum = 0 # plot samples and save it if i == num_iter: z0 = z0_distr.sample((500, )) x, logdet = model(z0) x_ = x[:, 0].detach().numpy() y_ = x[:, 1].detach().numpy() plot_samples(x_, y_, f"samples_{name}_l{num_layers}")
def warp_images(): print('building model') layers = vgg16.build_model((None, 3, 227, 227)) batch_size = 32 infer_dir = join('data', 'inference') weightsfile = join('weights', 'weights.pickle') with open(weightsfile, 'rb') as f: param_values = pickle.load(f) set_all_param_values(layers['trans'], param_values) pretrainfile = join('weights', 'vgg16.pkl') with open(pretrainfile, 'rb') as f: data = pickle.load(f) mean = data['mean value'] image_fpaths = [('Cars_013b.png', 'Cars_009b.png'), ('060_0071.png', '060_0000.png'), ('246_0052.png', '246_0042.png')] print('compiling theano functions for inference') num_infer_idx = (len(image_fpaths) + batch_size - 1) / batch_size infer_func = theano_funcs.create_infer_func(layers) infer_iter = utils.get_batch_idx(len(image_fpaths), batch_size) for i, idx in tqdm(infer_iter, total=num_infer_idx, leave=False): Xa, Xb = utils.prepare_batch(image_fpaths[idx], mean) M = infer_func(Xa, Xb) utils.plot_samples(Xa, Xb, M, mean, prefix=join(infer_dir, 'infer_%d' % i))
def train(self, batch_size, train_dataset, num_steps, keep_prob=1.0, num_display=64): display_step = num_steps // 40 summary_step = num_steps // 100 perm = np.random.permutation(train_dataset.shape[0]) X = train_dataset[perm, :] # Use fixed Z to generate samples display_Z = self.sample_noise([num_display, self.noise_dim]) fig_index = 0 inner_step = 0 print('Training GAN for %d steps' % num_steps) D_history = [] G_history = [] for step in xrange(num_steps): for k in range(1): # use next different batches batch_X = next_batch(X, inner_step, batch_size) inner_step += 1 batch_Z_D = self.sample_noise([batch_size, self.noise_dim]) _, D_V_neg = self.sess.run( [self.D_solver, self.D_V_neg], feed_dict={self.X: batch_X, self.Z: batch_Z_D, self.keep_prob: keep_prob}) # finish k steps for training D batch_Z_G = self.sample_noise([batch_size, self.noise_dim]) _, G_V = self.sess.run([self.G_solver, self.G_V], feed_dict={self.Z: batch_Z_G, self.keep_prob: keep_prob}) if step % display_step == 0: print('Batch(%d cases) value function at step %d' % (batch_X.shape[0], step)) print('V(D) = %.6f, V(G) = %.6f' % (-D_V_neg, G_V)) samples = self.sess.run(self.G_sample, feed_dict={self.Z: display_Z, self.keep_prob: 1.0}) plot_samples(samples, self.dirname, fig_index) fig_index += 1 if step % summary_step == 0: D_history.append(-D_V_neg) G_history.append(G_V) self.make_summary(step, batch_X, batch_Z_G, keep_prob) Z_D = self.sample_noise([X.shape[0], self.noise_dim]) D_V_neg = self.sess.run(self.D_V_neg, feed_dict={self.X: X, self.Z: Z_D, self.keep_prob: 1.0}) Z_G = self.sample_noise([X.shape[0], self.noise_dim]) G_V = self.sess.run(self.G_V, feed_dict={self.Z: Z_G, self.keep_prob: 1.0}) print('Finish training\nV(D) = %.6f, V(G) = %.6f' % (-D_V_neg, G_V)) self.make_summary(num_steps, X, Z_G, keep_prob=1.0) plot_V(self.dirname, D_history, G_history)
def main(): # set session sess = tf.Session() model = GAN(sess=sess, init=False, gf_dim=128) model.restore(model_path='hw3_1/model_file/WGAN_v2') z_plot = np.random.uniform(-1., 1., size=[25, 100]) img = model.generate(z_plot) plot_samples(img, save=True, h=5, w=5, filename='gan', folder_path='samples/')
def optimize_sigma(model, loader, writer, sigma_0, lr_sigma, flag='train', radius=None, gaussian_num_ds=1, epoch=1): model = model.eval() total = 0 test_loss, test_loss_corrupted = 0, 0 correct, correct_corrupted = 0, 0 for _, (batch, targets, idx) in enumerate(loader): batch, targets = batch.to(device), targets.to( device ) #Here I will put iters to 1 as the outer loop contains the number of iterations sigma, batch_corrupted, rad = get_sigma(model, batch, lr_sigma, sigma_0[idx], 1, device, ret_radius=True, gaussian_num=gaussian_num_ds) sigma_0[idx], radius[idx] = sigma, rad with torch.no_grad(): outputs_corrputed_softmax = model(batch_corrupted) _, predicted_corrupted = outputs_corrputed_softmax.max(1) total += targets.size(0) correct_corrupted += predicted_corrupted.eq(targets).sum().item() #plottings n = min(batch.size(0), 8) comparison = torch.cat([batch[:n], batch_corrupted[:n]]) comparison = torch.clamp(comparison, min=0, max=1) fig = plot_samples(comparison.detach().cpu().numpy().transpose( 0, 2, 3, 1).squeeze(), h=2, w=n) writer.add_figure('optimizing sigma sample of noisy ' + flag + ' examples', fig, epoch) # writer.add_scalar('optimizing_sigma/'+flag+'/loss_clean', test_loss / total, epoch) # writer.add_scalar('optimizing_sigma/'+flag+'/accuracy_clean', 100.*correct / total, epoch) # writer.add_scalar('optimizing_sigma/'+flag+'/loss_corrupted', test_loss_corrupted / total, epoch) writer.add_scalar('optimizing_sigma/' + flag + '/accuracy_corrupted', 100. * correct_corrupted / total, epoch) writer.add_scalar('optimizing_sigma/' + flag + '/sigma_mean', sigma_0.mean().item(), epoch) writer.add_scalar('optimizing_sigma/' + flag + '/sigma_min', sigma_0.min().item(), epoch) writer.add_scalar('optimizing_sigma/' + flag + '/sigma_max', sigma_0.max().item(), epoch) writer.add_scalar('optimizing_sigma/' + flag + '/radius_for_sample_0', radius[0].item(), epoch) #Saving the sigmas return sigma_0
def resolve_and_tensorboard_plot(our_model, lr_image_paths, title='', make_input_img_bw=False): samples = [] for lr_image_path in lr_image_paths: lr = load_image(lr_image_path, make_input_img_bw) sr = resolve_single(our_model, lr) samples.append((lr, sr)) fig = plot_samples(samples, interpolate_lr=True, input_img_bw=make_input_img_bw) buf = io.BytesIO() plt.savefig(buf, format='png') # Closing the figure prevents it from being displayed directly inside # the notebook. plt.close(fig) buf.seek(0) # Convert PNG buffer to TF image image = tf.image.decode_png(buf.getvalue(), channels=4) # Add the batch dimension image = tf.expand_dims(image, 0) with tb_file_writer.as_default(): tf.summary.image(title, image, step=0)
def test(epoch, model, test_loader, writer, sigma_0, lr_sigma, iters_sig): model = model.eval() test_loss = 0 test_loss_corrupted = 0 total = 0 correct = 0 correct_corrupted = 0 for _, (batch, targets, idx) in enumerate(test_loader): batch = batch.to(device) targets = targets.to(device) sigma, batch_corrupted = get_sigma(model, batch, lr_sigma, sigma_0[idx], iters_sig, device) sigma_0[idx] = sigma # update sigma with torch.no_grad(): # forward pass through the base classifier outputs_softmax = model(batch) outputs_corrputed_softmax = model(batch_corrupted) loss = compute_loss(outputs_softmax, targets) loss_corrupted = compute_loss(outputs_corrputed_softmax, targets) test_loss += loss.item() * len(batch) test_loss_corrupted += loss_corrupted.item() * len(batch) _, predicted = outputs_softmax.max(1) _, predicted_corrupted = outputs_corrputed_softmax.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() correct_corrupted += predicted_corrupted.eq(targets).sum().item() print( '===> Test Loss: {}. Test Accuracy: {}. Test Loss Corrupted: {}. Test Accuracy Corrupted: {}' .format(test_loss / total, 100. * correct / total, test_loss_corrupted / total, 100. * correct_corrupted / total)) n = min(batch.size(0), 8) comparison = torch.cat([batch[:n], batch_corrupted[:n]]) comparison = torch.clamp(comparison, min=0, max=1) fig = plot_samples(comparison.detach().cpu().numpy().transpose( 0, 2, 3, 1).squeeze(), h=2, w=n) writer.add_figure('sample of noisy test examples', fig, epoch) writer.add_scalar('loss/test_loss', test_loss / total, epoch) writer.add_scalar('accuracy/test_accuracy', 100. * correct / total, epoch) writer.add_scalar('loss/test_loss_corrupted', test_loss_corrupted / total, epoch) writer.add_scalar('accuracy/test_accuracy_corrupted', 100. * correct_corrupted / total, epoch) writer.add_scalar('sigma/test_sigma_mean', sigma_0.mean().item(), epoch) writer.add_scalar('sigma/test_sigma_min', sigma_0.min().item(), epoch) writer.add_scalar('sigma/test_sigma_max', sigma_0.max().item(), epoch) return 100. * correct_corrupted / total, sigma_0
if is_trainable: # Read the pickle file Data_A = read_pickle('./Data/Data_Train/Data_Left_train.pkl') Data_B = read_pickle('./Data/Data_Train/Data_Right_train.pkl') print("Data A/B: ", Data_A.shape, Data_B.shape) # Initialize the model assert Data_A.shape == Data_B.shape if len(Data_A.shape) == 4 and len(Data_B.shape) == 4: img_shape = (Data_A.shape[1], Data_A.shape[2], Data_A.shape[3]) banis = BANIS(img_shape) else: print("The shape of input dataset don't match!!!") # Train the model and record the runtime timer = ElapsedTimer() banis.train(Data_A, Data_B, EPOCHS=n_epochs, BATCH_SIZE=128, WARMUP_STEP=n_step, NUM_IMG=5) timer.elapsed_time() else: # Plotting the sampling images A_gen_list = np.load("./A_gen_baait.npy") plot_samples(A_gen_list, name='Agen') B_gen_list = np.load("./B_gen_baait.npy") plot_samples(B_gen_list, name='Bgen') AB_rec_list = np.load("./AB_rec_baait.npy") plot_samples(AB_rec_list, name='ABrec')
def main(args): print(args) if args.push_to_hub: login_to_hub() if not isinstance(args.workers, int): args.workers = min(16, mp.cpu_count()) torch.backends.cudnn.benchmark = True vocab = VOCABS[args.vocab] fonts = args.font.split(",") # Load val data generator st = time.time() val_set = CharacterGenerator( vocab=vocab, num_samples=args.val_samples * len(vocab), cache_samples=True, img_transforms=Compose( [ T.Resize((args.input_size, args.input_size)), # Ensure we have a 90% split of white-background images T.RandomApply(T.ColorInversion(), 0.9), ] ), font_family=fonts, ) val_loader = DataLoader( val_set, batch_size=args.batch_size, drop_last=False, num_workers=args.workers, sampler=SequentialSampler(val_set), pin_memory=torch.cuda.is_available(), ) print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)") batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301)) # Load doctr model model = classification.__dict__[args.arch](pretrained=args.pretrained, num_classes=len(vocab), classes=list(vocab)) # Resume weights if isinstance(args.resume, str): print(f"Resuming {args.resume}") checkpoint = torch.load(args.resume, map_location="cpu") model.load_state_dict(checkpoint) # GPU if isinstance(args.device, int): if not torch.cuda.is_available(): raise AssertionError("PyTorch cannot access your GPU. Please investigate!") if args.device >= torch.cuda.device_count(): raise ValueError("Invalid device index") # Silent default switch to GPU if available elif torch.cuda.is_available(): args.device = 0 else: logging.warning("No accessible GPU, targe device set to CPU.") if torch.cuda.is_available(): torch.cuda.set_device(args.device) model = model.cuda() if args.test_only: print("Running evaluation") val_loss, acc = evaluate(model, val_loader, batch_transforms) print(f"Validation loss: {val_loss:.6} (Acc: {acc:.2%})") return st = time.time() # Load train data generator train_set = CharacterGenerator( vocab=vocab, num_samples=args.train_samples * len(vocab), cache_samples=True, img_transforms=Compose( [ T.Resize((args.input_size, args.input_size)), # Augmentations T.RandomApply(T.ColorInversion(), 0.9), # GaussianNoise T.RandomApply(Grayscale(3), 0.1), ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02), T.RandomApply(GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 3)), 0.3), RandomRotation(15, interpolation=InterpolationMode.BILINEAR), ] ), font_family=fonts, ) train_loader = DataLoader( train_set, batch_size=args.batch_size, drop_last=True, num_workers=args.workers, sampler=RandomSampler(train_set), pin_memory=torch.cuda.is_available(), ) print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)") if args.show_samples: x, target = next(iter(train_loader)) plot_samples(x, list(map(vocab.__getitem__, target))) return # Optimizer optimizer = torch.optim.Adam( [p for p in model.parameters() if p.requires_grad], args.lr, betas=(0.95, 0.99), eps=1e-6, weight_decay=args.weight_decay, ) # LR Finder if args.find_lr: lrs, losses = record_lr(model, train_loader, batch_transforms, optimizer, amp=args.amp) plot_recorder(lrs, losses) return # Scheduler if args.sched == "cosine": scheduler = CosineAnnealingLR(optimizer, args.epochs * len(train_loader), eta_min=args.lr / 25e4) elif args.sched == "onecycle": scheduler = OneCycleLR(optimizer, args.lr, args.epochs * len(train_loader)) # Training monitoring current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name # W&B if args.wb: run = wandb.init( name=exp_name, project="character-classification", config={ "learning_rate": args.lr, "epochs": args.epochs, "weight_decay": args.weight_decay, "batch_size": args.batch_size, "architecture": args.arch, "input_size": args.input_size, "optimizer": "adam", "framework": "pytorch", "vocab": args.vocab, "scheduler": args.sched, "pretrained": args.pretrained, }, ) # Create loss queue min_loss = np.inf # Training loop mb = master_bar(range(args.epochs)) for epoch in mb: fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, mb) # Validation loop at the end of each epoch val_loss, acc = evaluate(model, val_loader, batch_transforms) if val_loss < min_loss: print(f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state...") torch.save(model.state_dict(), f"./{exp_name}.pt") min_loss = val_loss mb.write(f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} (Acc: {acc:.2%})") # W&B if args.wb: wandb.log( { "val_loss": val_loss, "acc": acc, } ) if args.wb: run.finish() if args.push_to_hub: push_to_hf_hub(model, exp_name, task="classification", run_config=args) if args.export_onnx: print("Exporting model to ONNX...") dummy_batch = next(iter(val_loader)) dummy_input = dummy_batch[0].cuda() if torch.cuda.is_available() else dummy_batch[0] model_path = export_model_to_onnx(model, exp_name, dummy_input) print(f"Exported model saved in {model_path}")
def main(args): print(args) if args.push_to_hub: login_to_hub() if not isinstance(args.workers, int): args.workers = min(16, mp.cpu_count()) vocab = VOCABS[args.vocab] fonts = args.font.split(",") # AMP if args.amp: mixed_precision.set_global_policy("mixed_float16") # Load val data generator st = time.time() val_set = CharacterGenerator( vocab=vocab, num_samples=args.val_samples * len(vocab), cache_samples=True, img_transforms=T.Compose( [ T.Resize((args.input_size, args.input_size)), # Ensure we have a 90% split of white-background images T.RandomApply(T.ColorInversion(), 0.9), ] ), font_family=fonts, ) val_loader = DataLoader( val_set, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=args.workers, collate_fn=collate_fn, ) print( f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{val_loader.num_batches} batches)" ) # Load doctr model model = classification.__dict__[args.arch]( pretrained=args.pretrained, input_shape=(args.input_size, args.input_size, 3), num_classes=len(vocab), classes=list(vocab), include_top=True, ) # Resume weights if isinstance(args.resume, str): model.load_weights(args.resume) batch_transforms = T.Compose( [ T.Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301)), ] ) if args.test_only: print("Running evaluation") val_loss, acc = evaluate(model, val_loader, batch_transforms) print(f"Validation loss: {val_loss:.6} (Acc: {acc:.2%})") return st = time.time() # Load train data generator train_set = CharacterGenerator( vocab=vocab, num_samples=args.train_samples * len(vocab), cache_samples=True, img_transforms=T.Compose( [ T.Resize((args.input_size, args.input_size)), # Augmentations T.RandomApply(T.ColorInversion(), 0.9), T.RandomApply(T.ToGray(3), 0.1), T.RandomJpegQuality(60), T.RandomSaturation(0.3), T.RandomContrast(0.3), T.RandomBrightness(0.3), # Blur T.RandomApply(T.GaussianBlur(kernel_shape=(3, 3), std=(0.1, 3)), 0.3), ] ), font_family=fonts, ) train_loader = DataLoader( train_set, batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=args.workers, collate_fn=collate_fn, ) print( f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{train_loader.num_batches} batches)" ) if args.show_samples: x, target = next(iter(train_loader)) plot_samples(x, list(map(vocab.__getitem__, target))) return # Optimizer scheduler = tf.keras.optimizers.schedules.ExponentialDecay( args.lr, decay_steps=args.epochs * len(train_loader), decay_rate=1 / (1e3), # final lr as a fraction of initial lr staircase=False, ) optimizer = tf.keras.optimizers.Adam( learning_rate=scheduler, beta_1=0.95, beta_2=0.99, epsilon=1e-6, ) if args.amp: optimizer = mixed_precision.LossScaleOptimizer(optimizer) # LR Finder if args.find_lr: lrs, losses = record_lr(model, train_loader, batch_transforms, optimizer, amp=args.amp) plot_recorder(lrs, losses) return # Tensorboard to monitor training current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name # W&B if args.wb: run = wandb.init( name=exp_name, project="character-classification", config={ "learning_rate": args.lr, "epochs": args.epochs, "weight_decay": 0.0, "batch_size": args.batch_size, "architecture": args.arch, "input_size": args.input_size, "optimizer": "adam", "framework": "tensorflow", "vocab": args.vocab, "scheduler": "exp_decay", "pretrained": args.pretrained, }, ) # Create loss queue min_loss = np.inf # Training loop mb = master_bar(range(args.epochs)) for epoch in mb: fit_one_epoch(model, train_loader, batch_transforms, optimizer, mb, args.amp) # Validation loop at the end of each epoch val_loss, acc = evaluate(model, val_loader, batch_transforms) if val_loss < min_loss: print(f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state...") model.save_weights(f"./{exp_name}/weights") min_loss = val_loss mb.write(f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} (Acc: {acc:.2%})") # W&B if args.wb: wandb.log( { "val_loss": val_loss, "acc": acc, } ) if args.wb: run.finish() if args.push_to_hub: push_to_hf_hub(model, exp_name, task="classification", run_config=args) if args.export_onnx: print("Exporting model to ONNX...") dummy_input = [tf.TensorSpec([None, args.input_size, args.input_size, 3], tf.float32, name="input")] model_path, _ = export_model_to_onnx(model, exp_name, dummy_input) print(f"Exported model saved in {model_path}")
def train(epoch, model, train_loader, optimizer, writer, sigma_0, lr_sigma, iters_sig, attacker, num_noise_vec=1): model = model.train() train_loss = 0 total = 0 correct = 0 # CE_loss = nn.CrossEntropyLoss() import time start_time = time.time() for batch_idx, (batch, targets, idx) in enumerate(train_loader): start_time = time.time() optimizer.zero_grad() batch_size = len(idx) batch = batch.to(device) targets = targets.to(device) # model.eval() sigma, _ = get_sigma(model, batch, lr_sigma, sigma_0[idx], iters_sig, device, gaussian_num=gaussian_num_ds) # model.train() sigma_0[idx] = sigma # updating sigma new_shape = [batch_size * num_noise_vec] new_shape.extend(batch[0].shape) batch = batch.repeat((1, num_noise_vec, 1, 1)).view(new_shape) #repeating sigmas to do the monte carlo sigma_repeated = sigma.repeat( (1, num_noise_vec, 1, 1)).view(-1, 1, 1, 1) noise = torch.randn_like(batch) * sigma_repeated targets = targets.unsqueeze(1).repeat(1, num_noise_vec).reshape( -1, 1).squeeze() #Getting adversarial instances: model.requires_grad_(False) model.eval() batch = attacker.attack(model, batch, targets, noise=noise, num_noise_vectors=num_noise_vec, no_grad=False) model.train() model.requires_grad_(True) batch_corrupted = batch + noise outputs_softmax = model(batch_corrupted) # clean_output = model(batch) total_loss = compute_loss(outputs_softmax, targets) # clean_loss = compute_loss(clean_output, targets) # total_loss += clean_loss train_loss += total_loss.item() * len(batch) _, predicted = outputs_softmax.max(1) total += batch_size * num_noise_vec correct += predicted.eq(targets).sum().item() # update parameters total_loss.backward() optimizer.step() print('Required time (mins) for a batch is: ', (time.time() - start_time) / 60.0) if batch_idx % 100 == 0: print( '+ Epoch: {}. Iter: [{}/{} ({:.0f}%)]. Loss: {}. Accuracy: {}'. format(epoch, batch_idx * len(batch), len(train_loader.dataset), 100. * batch_idx / len(train_loader), train_loss / total, 100. * correct / total)) n = min(batch.size(0), 8) comparison = torch.cat([batch[:n], batch_corrupted[:n]]) comparison = torch.clamp(comparison, min=0, max=1) fig = plot_samples(comparison.detach().cpu().numpy().transpose( 0, 2, 3, 1).squeeze(), h=2, w=n) writer.add_figure('sample of noisy trained examples', fig, epoch) writer.add_scalar('loss/train_loss', train_loss / total, epoch) writer.add_scalar('accuracy/train_accuracy', 100. * correct / total, epoch) writer.add_scalar('sigma/train_sigma_mean', sigma_0.mean().item(), epoch) writer.add_scalar('sigma/train_sigma_min', sigma_0.min().item(), epoch) writer.add_scalar('sigma/train_sigma_max', sigma_0.max().item(), epoch) return sigma_0
def main(args): print(args) if args.push_to_hub: login_to_hub() if not isinstance(args.workers, int): args.workers = min(16, mp.cpu_count()) vocab = VOCABS[args.vocab] fonts = args.font.split(",") # AMP if args.amp: mixed_precision.set_global_policy("mixed_float16") st = time.time() if isinstance(args.val_path, str): with open(os.path.join(args.val_path, "labels.json"), "rb") as f: val_hash = hashlib.sha256(f.read()).hexdigest() # Load val data generator val_set = RecognitionDataset( img_folder=os.path.join(args.val_path, "images"), labels_path=os.path.join(args.val_path, "labels.json"), img_transforms=T.Resize((args.input_size, 4 * args.input_size), preserve_aspect_ratio=True), ) else: val_hash = None # Load synthetic data generator val_set = WordGenerator( vocab=vocab, min_chars=args.min_chars, max_chars=args.max_chars, num_samples=args.val_samples * len(vocab), font_family=fonts, img_transforms=T.Compose([ T.Resize((args.input_size, 4 * args.input_size), preserve_aspect_ratio=True), # Ensure we have a 90% split of white-background images T.RandomApply(T.ColorInversion(), 0.9), ]), ) val_loader = DataLoader( val_set, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=args.workers, ) print( f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{val_loader.num_batches} batches)") # Load doctr model model = recognition.__dict__[args.arch]( pretrained=args.pretrained, input_shape=(args.input_size, 4 * args.input_size, 3), vocab=vocab, ) # Resume weights if isinstance(args.resume, str): model.load_weights(args.resume) # Metrics val_metric = TextMatch() batch_transforms = T.Compose([ T.Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301)), ]) if args.test_only: print("Running evaluation") val_loss, exact_match, partial_match = evaluate( model, val_loader, batch_transforms, val_metric) print( f"Validation loss: {val_loss:.6} (Exact: {exact_match:.2%} | Partial: {partial_match:.2%})" ) return st = time.time() if isinstance(args.train_path, str): # Load train data generator base_path = Path(args.train_path) parts = ([base_path] if base_path.joinpath("labels.json").is_file() else [base_path.joinpath(sub) for sub in os.listdir(base_path)]) with open(parts[0].joinpath("labels.json"), "rb") as f: train_hash = hashlib.sha256(f.read()).hexdigest() train_set = RecognitionDataset( parts[0].joinpath("images"), parts[0].joinpath("labels.json"), img_transforms=T.Compose([ T.RandomApply(T.ColorInversion(), 0.1), T.Resize((args.input_size, 4 * args.input_size), preserve_aspect_ratio=True), # Augmentations T.RandomJpegQuality(60), T.RandomSaturation(0.3), T.RandomContrast(0.3), T.RandomBrightness(0.3), ]), ) if len(parts) > 1: for subfolder in parts[1:]: train_set.merge_dataset( RecognitionDataset(subfolder.joinpath("images"), subfolder.joinpath("labels.json"))) else: train_hash = None # Load synthetic data generator train_set = WordGenerator( vocab=vocab, min_chars=args.min_chars, max_chars=args.max_chars, num_samples=args.train_samples * len(vocab), font_family=fonts, img_transforms=T.Compose([ T.Resize((args.input_size, 4 * args.input_size), preserve_aspect_ratio=True), # Ensure we have a 90% split of white-background images T.RandomApply(T.ColorInversion(), 0.9), T.RandomJpegQuality(60), T.RandomSaturation(0.3), T.RandomContrast(0.3), T.RandomBrightness(0.3), ]), ) train_loader = DataLoader( train_set, batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=args.workers, ) print( f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{train_loader.num_batches} batches)") if args.show_samples: x, target = next(iter(train_loader)) plot_samples(x, target) return # Optimizer scheduler = tf.keras.optimizers.schedules.ExponentialDecay( args.lr, decay_steps=args.epochs * len(train_loader), decay_rate=1 / (25e4), # final lr as a fraction of initial lr staircase=False, ) optimizer = tf.keras.optimizers.Adam(learning_rate=scheduler, beta_1=0.95, beta_2=0.99, epsilon=1e-6, clipnorm=5) if args.amp: optimizer = mixed_precision.LossScaleOptimizer(optimizer) # LR Finder if args.find_lr: lrs, losses = record_lr(model, train_loader, batch_transforms, optimizer, amp=args.amp) plot_recorder(lrs, losses) return # Tensorboard to monitor training current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name # W&B if args.wb: run = wandb.init( name=exp_name, project="text-recognition", config={ "learning_rate": args.lr, "epochs": args.epochs, "weight_decay": 0.0, "batch_size": args.batch_size, "architecture": args.arch, "input_size": args.input_size, "optimizer": "adam", "framework": "tensorflow", "scheduler": "exp_decay", "vocab": args.vocab, "train_hash": train_hash, "val_hash": val_hash, "pretrained": args.pretrained, }, ) min_loss = np.inf # Training loop mb = master_bar(range(args.epochs)) for epoch in mb: fit_one_epoch(model, train_loader, batch_transforms, optimizer, mb, args.amp) # Validation loop at the end of each epoch val_loss, exact_match, partial_match = evaluate( model, val_loader, batch_transforms, val_metric) if val_loss < min_loss: print( f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state..." ) model.save_weights(f"./{exp_name}/weights") min_loss = val_loss mb.write( f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} " f"(Exact: {exact_match:.2%} | Partial: {partial_match:.2%})") # W&B if args.wb: wandb.log({ "val_loss": val_loss, "exact_match": exact_match, "partial_match": partial_match, }) if args.wb: run.finish() if args.push_to_hub: push_to_hf_hub(model, exp_name, task="recognition", run_config=args)
def main(): # set GPU card os.environ["CUDA_VISIBLE_DEVICES"] = "0" # load anime face data_dir = '../anime_face/data_64/images/' data_extra_dir = '../anime_face/extra_data/images/' ds = dataset() ds.load_data(data_dir, verbose=0) ds.load_data(data_extra_dir, verbose=0) ds.shuffle() # reset graph tf.reset_default_graph() # set session config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) # build model model = GAN(sess, gf_dim=128) # training z_plot = sample_z(36, 100) # initial fake image z = sample_z((bs), 100) i = 1 while True: if (i == 1) or (i <= 100 and i % 20 == 0) or (i <= 200 and i % 50 == 0) or ( i <= 1000 and i % 100 == 0) or (i % 200 == 0): g_samples = model.generate(z_plot) plot_samples(g_samples, save=True, filename=str(i), folder_path='out2/', h=6, w=6) # train discriminator more for _ in range(5): real_img = ds.next_batch(bs) z = sample_z(bs, 100) fake_img = model.generate(z) # train D D_loss = model.train_D(real_img, fake_img) G_loss = model.train_G(bs) if (i % 100) == 0: model.save(model_name='WGAN_v2') z_loss = sample_z(64, 100) g_loss = model.generate(sample_z(32, 100)) g, d = model.sess.run([model.G_loss, model.D_loss], feed_dict={ model.xs: ds.random_sample(32), model.gs: g_loss, model.zs: z_loss }) print(str(i) + ' iteration:') print('D_loss:', d) print('G_loss:', g, '\n') i = i + 1
samples = [] for it in tqdm(range(config.num_iterations)): d_infos = [] for d_index in range(config.d_steps): d_info = d_loop(G, D, d_optimizer, criterion) d_infos.append(d_info) d_infos = np.mean(d_infos, 0) d_real_loss, d_fake_loss = d_infos g_infos = [] for g_index in range(config.g_steps): g_info = g_loop(G, D, g_optimizer, d_optimizer, criterion) g_infos.append(g_info) g_infos = np.mean(g_infos) g_loss = g_infos if it % config.log_interval == 0: g_fake_data = g_sample() samples.append(g_fake_data) utils.plot_scatter(points=g_fake_data, centers=dset.centers, title='[{}] Iteration {}'.format(prefix, it), path='{}/samples_{}.png'.format(exp_dir, it)) print(d_real_loss, d_fake_loss, g_loss) raise utils.plot_samples(samples, config.log_interval, config.unrolled_steps, path='{}/samples_{}.png'.format(exp_dir, 'final'))
def train_fader_network(): gpu_id = 1 use_cuda = False # use true instead data_dir = 'data' sample_every = 10 test_dir = join(data_dir, 'test-samples') encoder_decoder_fpath = join(data_dir, 'weights', 'adver.params') discriminator_fpath = join(data_dir, 'weights', 'discr.params') train, valid, test = split_train_val_test(data_dir) num_attr = train.attribute_names.shape[0] encoder_decoder = EncoderDecoder(num_attr, gpu_id=gpu_id) discriminator = Discriminator(num_attr) if use_cuda: encoder_decoder.cuda(gpu_id) discriminator.cuda(gpu_id) train_iter = DataLoader(train, batch_size=64, shuffle=True, num_workers=8) valid_iter = DataLoader(valid, batch_size=64, shuffle=False, num_workers=8) test_iter = DataLoader(test, batch_size=64, shuffle=False, num_workers=8) # train_iter = DataLoader(train, batch_size=32, shuffle=True, num_workers=8) # valid_iter = DataLoader(valid, batch_size=32, shuffle=False, num_workers=8) # test_iter = DataLoader(test, batch_size=32, shuffle=False, num_workers=8) max_epochs = 1000 lr, beta1 = 2e-3, 0.5 adversarial_optimizer = optim.Adam(encoder_decoder.parameters(), lr=lr, betas=(beta1, 0.999)) discriminator_optimizer = optim.Adam(discriminator.parameters(), lr=lr, betas=(beta1, 0.999)) mse_loss = nn.MSELoss(size_average=True) bce_loss = nn.BCELoss(size_average=True) num_iters = 5 lambda_e = np.linspace(0, 1e-4, 500000) attribute_classifier = AttributeClassifier(num_attr, use_cuda=False) # load classifier instead try: for epoch in range(1, max_epochs): encoder_decoder.train() discriminator.train() for iteration, (x, yb, yt, _) in enumerate(train_iter, start=1): if use_cuda: x = x.cuda(gpu_id) yb, yt = yb.cuda(gpu_id), yt.cuda(gpu_id) x, yb, yt = Variable(x), Variable(yb), Variable(yt) # changing yb and yt to be the output of the classifier yt.data = attribute_classifier(x).data yb.data[:, 0] = yt.data yb.data[:, 1] = 1 - yt.data #print yb.data.cpu().numpy().shape #print yt.data.cpu().numpy().shape adversarial_optimizer.zero_grad() z, x_hat = encoder_decoder(x, yb) #if (epoch == 1) or (epoch % sample_every == 0): #if (epoch % sample_every == 0): # plot_samples(x, x_hat, prefix='train_%d_%d' % ( # epoch, iteration)) # send the output of the encoder as a new Variable that is not # part of the backward pass # not sure if this is the correct way to do so # https://discuss.pytorch.org/t/how-to-copy-a-variable-in-a-network-graph/1603/9 z_in = Variable(z.data, requires_grad=False) discriminator_optimizer.zero_grad() y_hat = discriminator(z_in) # adversarial loss y_in = Variable(y_hat.data, requires_grad=False) le_idx = min(500000 - 1, num_iters) le_val = Variable(torch.FloatTensor([lambda_e[le_idx] ]).float(), requires_grad=False) if use_cuda: le_val = le_val.cuda(gpu_id) advers_loss = mse_loss(x_hat, x) +\ le_val * bce_loss(y_in, 1 - yt) advers_loss.backward() adversarial_optimizer.step() # discriminative loss discrim_loss = bce_loss(y_hat, yt) discrim_loss.backward() discriminator_optimizer.step() print(' Train epoch %d, iter %d (lambda_e = %.2e)' % (epoch, iteration, le_val.data[0])) print(' adv. loss = %.6f' % (advers_loss.data[0])) print(' dsc. loss = %.6f' % (discrim_loss.data[0])) num_iters += 1 encoder_decoder.eval() discriminator.eval() for iteration, (x, yb, yt, _) in enumerate(valid_iter, start=1): if use_cuda: x = x.cuda(gpu_id) yb, yt = yb.cuda(gpu_id), yt.cuda(gpu_id) x, yb, yt = Variable(x), Variable(yb), Variable(yt) yt.data = attribute_classifier(x).data yb.data[:, 0] = yt.data yb.data[:, 1] = 1 - yt.data z, x_hat = encoder_decoder(x, yb) #plot_samples(x, x_hat, prefix='valid_%d_%d' % ( # epoch, iteration)) z_in = Variable(z.data, requires_grad=False) y_hat = discriminator(z_in) y_in = Variable(y_hat.data, requires_grad=False) valid_advers_loss = mse_loss(x_hat, x) +\ le_val * bce_loss(y_in, 1 - yt) valid_discrim_loss = bce_loss(y_hat, yt) print(' Valid epoch %d, iter %d (lambda_e = %.2e)' % (epoch, iteration, le_val.data[0])) print(' adv. loss = %.6f' % (valid_advers_loss.data[0])) print(' dsc. loss = %.6f' % (valid_discrim_loss.data[0])) if (epoch % sample_every == 0): encoder_decoder.eval() for iteration, (x, yb, ys, fp) in enumerate(test_iter, 1): # randomly choose an attribute and swap the targets to_swap = np.random.choice(test.attribute_names) swap_idx, = np.where(test.attribute_names == to_swap)[0] # map (0, 1) --> (1, 0), and (1, 0) --> (0, 1) yb[:, 2 * swap_idx] = 1 - yb[:, 2 * swap_idx] yb[:, 2 * swap_idx + 1] = 1 - yb[:, 2 * swap_idx + 1] if use_cuda: x, yb = x.cuda(gpu_id), yb.cuda(gpu_id) x, yb = Variable(x), Variable(yb) yt.data = attribute_classifier(x).data yb.data[:, 0] = yt.data yb.data[:, 1] = 1 - yt.data _, x_hat = encoder_decoder(x, yb) sample_dir = join(test_dir, '%s' % epoch, '%s' % to_swap) if not exists(sample_dir): makedirs(sample_dir) fnames = ['%s.png' % splitext(basename(f))[0] for f in fp] fpaths = [join(sample_dir, f) for f in fnames] plot_samples(x, x_hat, fpaths) except KeyboardInterrupt: print('Caught Ctrl-C, interrupting training.') except RuntimeError: print('RuntimeError') print('Saving encoder/decoder parameters to %s' % (encoder_decoder_fpath)) torch.save(encoder_decoder.state_dict(), encoder_decoder_fpath) print('Saving discriminator parameters to %s' % (discriminator_fpath)) torch.save(discriminator.state_dict(), discriminator_fpath)
def main(args): print(args) if args.push_to_hub: login_to_hub() if not isinstance(args.workers, int): args.workers = min(16, mp.cpu_count()) torch.backends.cudnn.benchmark = True st = time.time() val_set = DetectionDataset( img_folder=os.path.join(args.val_path, "images"), label_path=os.path.join(args.val_path, "labels.json"), sample_transforms=T.SampleCompose(([ T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True) ] if not args.rotation or args.eval_straight else []) + ([ T.Resize(args.input_size, preserve_aspect_ratio=True ), # This does not pad T.RandomRotate(90, expand=True), T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True), ] if args.rotation and not args.eval_straight else [])), use_polygons=args.rotation and not args.eval_straight, ) val_loader = DataLoader( val_set, batch_size=args.batch_size, drop_last=False, num_workers=args.workers, sampler=SequentialSampler(val_set), pin_memory=torch.cuda.is_available(), collate_fn=val_set.collate_fn, ) print( f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)") with open(os.path.join(args.val_path, "labels.json"), "rb") as f: val_hash = hashlib.sha256(f.read()).hexdigest() batch_transforms = Normalize(mean=(0.798, 0.785, 0.772), std=(0.264, 0.2749, 0.287)) # Load doctr model model = detection.__dict__[args.arch]( pretrained=args.pretrained, assume_straight_pages=not args.rotation) # Resume weights if isinstance(args.resume, str): print(f"Resuming {args.resume}") checkpoint = torch.load(args.resume, map_location="cpu") model.load_state_dict(checkpoint) # GPU if isinstance(args.device, int): if not torch.cuda.is_available(): raise AssertionError( "PyTorch cannot access your GPU. Please investigate!") if args.device >= torch.cuda.device_count(): raise ValueError("Invalid device index") # Silent default switch to GPU if available elif torch.cuda.is_available(): args.device = 0 else: logging.warning("No accessible GPU, targe device set to CPU.") if torch.cuda.is_available(): torch.cuda.set_device(args.device) model = model.cuda() # Metrics val_metric = LocalizationConfusion(use_polygons=args.rotation and not args.eval_straight, mask_shape=(args.input_size, args.input_size)) if args.test_only: print("Running evaluation") val_loss, recall, precision, mean_iou = evaluate(model, val_loader, batch_transforms, val_metric, amp=args.amp) print( f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | " f"Mean IoU: {mean_iou:.2%})") return st = time.time() # Load both train and val data generators train_set = DetectionDataset( img_folder=os.path.join(args.train_path, "images"), label_path=os.path.join(args.train_path, "labels.json"), img_transforms=Compose([ # Augmentations T.RandomApply(T.ColorInversion(), 0.1), ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02), ]), sample_transforms=T.SampleCompose(([ T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True) ] if not args.rotation else []) + ([ T.Resize(args.input_size, preserve_aspect_ratio=True), T.RandomRotate(90, expand=True), T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True), ] if args.rotation else [])), use_polygons=args.rotation, ) train_loader = DataLoader( train_set, batch_size=args.batch_size, drop_last=True, num_workers=args.workers, sampler=RandomSampler(train_set), pin_memory=torch.cuda.is_available(), collate_fn=train_set.collate_fn, ) print( f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)") with open(os.path.join(args.train_path, "labels.json"), "rb") as f: train_hash = hashlib.sha256(f.read()).hexdigest() if args.show_samples: x, target = next(iter(train_loader)) plot_samples(x, target) return # Backbone freezing if args.freeze_backbone: for p in model.feat_extractor.parameters(): p.reguires_grad_(False) # Optimizer optimizer = torch.optim.Adam( [p for p in model.parameters() if p.requires_grad], args.lr, betas=(0.95, 0.99), eps=1e-6, weight_decay=args.weight_decay, ) # LR Finder if args.find_lr: lrs, losses = record_lr(model, train_loader, batch_transforms, optimizer, amp=args.amp) plot_recorder(lrs, losses) return # Scheduler if args.sched == "cosine": scheduler = CosineAnnealingLR(optimizer, args.epochs * len(train_loader), eta_min=args.lr / 25e4) elif args.sched == "onecycle": scheduler = OneCycleLR(optimizer, args.lr, args.epochs * len(train_loader)) # Training monitoring current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name # W&B if args.wb: run = wandb.init( name=exp_name, project="text-detection", config={ "learning_rate": args.lr, "epochs": args.epochs, "weight_decay": args.weight_decay, "batch_size": args.batch_size, "architecture": args.arch, "input_size": args.input_size, "optimizer": "adam", "framework": "pytorch", "scheduler": args.sched, "train_hash": train_hash, "val_hash": val_hash, "pretrained": args.pretrained, "rotation": args.rotation, "amp": args.amp, }, ) # Create loss queue min_loss = np.inf # Training loop mb = master_bar(range(args.epochs)) for epoch in mb: fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, mb, amp=args.amp) # Validation loop at the end of each epoch val_loss, recall, precision, mean_iou = evaluate(model, val_loader, batch_transforms, val_metric, amp=args.amp) if val_loss < min_loss: print( f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state..." ) torch.save(model.state_dict(), f"./{exp_name}.pt") min_loss = val_loss log_msg = f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} " if any(val is None for val in (recall, precision, mean_iou)): log_msg += "(Undefined metric value, caused by empty GTs or predictions)" else: log_msg += f"(Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%})" mb.write(log_msg) # W&B if args.wb: wandb.log({ "val_loss": val_loss, "recall": recall, "precision": precision, "mean_iou": mean_iou, }) if args.wb: run.finish() if args.push_to_hub: push_to_hf_hub(model, exp_name, task="detection", run_config=args)
def main(args): print(args) if not isinstance(args.workers, int): args.workers = min(16, mp.cpu_count()) torch.backends.cudnn.benchmark = True st = time.time() val_set = DocArtefacts( train=False, download=True, img_transforms=T.Resize((args.input_size, args.input_size)), ) val_loader = DataLoader( val_set, batch_size=args.batch_size, drop_last=False, num_workers=args.workers, sampler=SequentialSampler(val_set), pin_memory=torch.cuda.is_available(), collate_fn=val_set.collate_fn, ) print( f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)") # Load doctr model model = obj_detection.__dict__[args.arch](pretrained=args.pretrained, num_classes=5) # Resume weights if isinstance(args.resume, str): print(f"Resuming {args.resume}") checkpoint = torch.load(args.resume, map_location='cpu') model.load_state_dict(checkpoint) # GPU if isinstance(args.device, int): if not torch.cuda.is_available(): raise AssertionError( "PyTorch cannot access your GPU. Please investigate!") if args.device >= torch.cuda.device_count(): raise ValueError("Invalid device index") # Silent default switch to GPU if available elif torch.cuda.is_available(): args.device = 0 else: logging.warning("No accessible GPU, target device set to CPU.") if torch.cuda.is_available(): torch.cuda.set_device(args.device) model = model.cuda() # Metrics metric = DetectionMetric(iou_thresh=0.5) if args.test_only: print("Running evaluation") recall, precision, mean_iou = evaluate(model, val_loader, metric, amp=args.amp) print( f"Recall: {recall:.2%} | Precision: {precision:.2%} |IoU: {mean_iou:.2%}" ) return st = time.time() # Load train data generators train_set = DocArtefacts( train=True, download=True, img_transforms=Compose([ T.Resize((args.input_size, args.input_size)), T.RandomApply(T.GaussianNoise(0., 0.25), p=0.5), ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02), T.RandomApply(GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 3)), .3), ]), sample_transforms=T.RandomHorizontalFlip(p=0.5), ) train_loader = DataLoader( train_set, batch_size=args.batch_size, drop_last=True, num_workers=args.workers, sampler=RandomSampler(train_set), pin_memory=torch.cuda.is_available(), collate_fn=train_set.collate_fn, ) print( f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)") if args.show_samples: images, targets = next(iter(train_loader)) targets = convert_to_abs_coords(targets, images.shape) plot_samples(images, targets, train_set.CLASSES) return # Backbone freezing if args.freeze_backbone: for p in model.backbone.parameters(): p.reguires_grad_(False) # Optimizer optimizer = optim.SGD([p for p in model.parameters() if p.requires_grad], lr=args.lr, weight_decay=args.weight_decay) # LR Finder if args.find_lr: lrs, losses = record_lr(model, train_loader, optimizer, amp=args.amp) plot_recorder(lrs, losses) return # Scheduler scheduler = StepLR(optimizer, step_size=8, gamma=0.7) # Training monitoring current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name # W&B if args.wb: run = wandb.init(name=exp_name, project="object-detection", config={ "learning_rate": args.lr, "epochs": args.epochs, "weight_decay": args.weight_decay, "batch_size": args.batch_size, "architecture": args.arch, "input_size": args.input_size, "optimizer": "sgd", "framework": "pytorch", "scheduler": "step", "pretrained": args.pretrained, "amp": args.amp, }) mb = master_bar(range(args.epochs)) max_score = 0. for epoch in mb: fit_one_epoch(model, train_loader, optimizer, scheduler, mb, amp=args.amp) # Validation loop at the end of each epoch recall, precision, mean_iou = evaluate(model, val_loader, metric, amp=args.amp) f1_score = 2 * precision * recall / (precision + recall) if ( precision + recall) > 0 else 0. if f1_score > max_score: print( f"Validation metric increased {max_score:.6} --> {f1_score:.6}: saving state..." ) torch.save(model.state_dict(), f"./{exp_name}.pt") max_score = f1_score log_msg = f"Epoch {epoch + 1}/{args.epochs} - " if any(val is None for val in (recall, precision, mean_iou)): log_msg += "Undefined metric value, caused by empty GTs or predictions" else: log_msg += f"Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%}" mb.write(log_msg) # W&B if args.wb: wandb.log({ 'recall': recall, 'precision': precision, 'mean_iou': mean_iou, }) if args.wb: run.finish()
target: _train[i * batch_size: (i + 1) * batch_size] }) test_fn = theano.function( [i], ll, givens={ target: _test[i * batch_size: (i + 1) * batch_size] }) num_train_batches = train.shape[0] / batch_size num_test_batches = test.shape[0] / batch_size for e in xrange(30): train_errs = [] test_errs = [] for idx in xrange(num_train_batches): train_errs.append(train_fn(idx)) for idx in xrange(num_test_batches): test_errs.append(test_fn(idx)) print 'epoch', e, 'train err', np.mean(train_errs), 'test err', np.mean(test_errs) sample_reconstructions(test, recon, target) # construct separate decoder z_input = T.matrix() single_decoder = lasagne.layers.InputLayer((None, latent_size), z_input) single_decoder = lasagne.layers.DenseLayer(single_decoder, num_units=100, nonlinearity=rectify, W=decoder1.W, b=decoder1.b) single_decoder = lasagne.layers.DenseLayer(single_decoder, num_units=100, nonlinearity=rectify, W=decoder2.W, b=decoder2.b) decode = theano.function([z_input], lasagne.layers.get_output(single_decoder)) plot_samples(decode)
def train_geometric_matching(): trans_params = { 'rotation': (0, 0), 'offset': (0, 0), 'flip': (False, False), 'shear': (0., 0.), 'stretch': (1. / 2, 2), } print('building model') layers = vgg16.build_model((None, 3, 227, 227)) # file to store the learned weights weightsfile = join('weights', 'weights.pickle') # initialize the feature extraction layers pretrainfile = join('weights', 'vgg16.pkl') print('initializing feature extraction layers from %s' % (pretrainfile)) with open(pretrainfile, 'rb') as f: data = pickle.load(f) # weights are tied, no need to initialize a and b set_all_param_values(layers['pool4a'], data['param values'][0:20]) # used to initialize from learned weights #with open(weightsfile, 'rb') as f: # param_values = pickle.load(f) #set_all_param_values(layers['trans'], param_values) mean = data['mean value'] max_epochs = 5000 batch_size = 16 sample_every = 25 # visualizes network output every n epochs sample_dir = join('data', 'samples') # set this to point to the root of Pascal VOC-2011 voc_fpath = '/media/hdd/hendrik/datasets/pascal-2011' train_fpaths, valid_fpaths = utils.train_val_split(voc_fpath) print('compiling theano functions for training') train_func = theano_funcs.create_train_func(layers) print('compiling theano functions for validation') valid_func = theano_funcs.create_valid_func(layers) try: for epoch in range(1, max_epochs + 1): print('epoch %d' % (epoch)) train_losses = [] num_train_idx = (len(train_fpaths) + batch_size - 1) / batch_size train_iter = utils.get_batch_idx(len(train_fpaths), batch_size) for i, idx in tqdm(train_iter, total=num_train_idx, leave=False): X_crop_train, X_warp_train, M_train =\ utils.prepare_synth_batch(train_fpaths[idx], mean, trans_params) M, train_loss = train_func(X_crop_train, X_warp_train, M_train) train_losses.append(train_loss) if epoch % sample_every == 0: utils.plot_samples(X_crop_train, X_warp_train, M, mean, prefix=join(sample_dir, 'train_%d' % i)) print(' train loss = %.6f' % (np.mean(train_losses))) valid_losses = [] num_valid_idx = (len(valid_fpaths) + batch_size - 1) / batch_size valid_iter = utils.get_batch_idx(len(valid_fpaths), batch_size) for i, idx in tqdm(valid_iter, total=num_valid_idx, leave=False): X_crop_valid, X_warp_valid, M_valid =\ utils.prepare_synth_batch(valid_fpaths[idx], mean, trans_params) M, valid_loss = valid_func(X_crop_valid, X_warp_valid, M_valid) valid_losses.append(valid_loss) if epoch % sample_every == 0: utils.plot_samples(X_crop_valid, X_warp_valid, M, mean, prefix=join(sample_dir, 'valid_%d' % i)) print(' valid loss = %.6f' % (np.mean(valid_losses))) except KeyboardInterrupt: print('caught ctrl-c, stopped training') print('saving weights to %s' % (weightsfile)) weights = get_all_param_values(layers['trans']) with open(weightsfile, 'wb') as f: pickle.dump(weights, f, protocol=pickle.HIGHEST_PROTOCOL)
def train(epoch, model, train_loader, optimizer, writer, sigma_0, lr_sigma, iters_sig, gaussian_num=1, lamda=0.0, gamma=0.0, gaussian_num_ds=1): model = model.train() train_loss = 0 total = 0 correct = 0 # CE_loss = nn.CrossEntropyLoss() for batch_idx, (batch, targets, idx) in enumerate(train_loader): optimizer.zero_grad() batch_size = len(idx) batch = batch.to(device) targets = targets.to(device) # model.eval() sigma, _ = get_sigma(model, batch, lr_sigma, sigma_0[idx], iters_sig, device, gaussian_num=gaussian_num_ds) # model.train() sigma_0[idx] = sigma # updating sigma #repeating the input for computing the macer loss new_shape = [batch_size * gaussian_num] new_shape.extend(batch[0].shape) batch = batch.repeat((1, gaussian_num, 1, 1)).view(new_shape) #repeating sigmas to do the monte carlo sigma_repeated = sigma.repeat( (1, gaussian_num, 1, 1)).view(-1, 1, 1, 1) noise = torch.randn_like(batch) * sigma_repeated batch_corrupted = batch + noise outputs_softmax = model(batch_corrupted).reshape( batch_size, gaussian_num, 1000).mean(1) #1000 here is for ImageNet # clean_output = model(batch) total_loss = compute_loss(outputs_softmax, targets) if torch.isnan(outputs_softmax).any() or torch.isnan(total_loss).any(): print('F**k') total_loss += lamda * macer_loss(outputs_softmax, targets, sigma, gamma) # clean_loss = compute_loss(clean_output, targets) # total_loss += clean_loss train_loss += total_loss.item() * len(batch) _, predicted = outputs_softmax.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() # update parameters total_loss.backward() optimizer.step() if batch_idx % 100 == 0: print( '+ Epoch: {}. Iter: [{}/{} ({:.0f}%)]. Loss: {}. Accuracy: {}'. format(epoch, batch_idx * len(batch), len(train_loader.dataset), 100. * batch_idx / len(train_loader), train_loss / total, 100. * correct / total)) n = min(batch.size(0), 8) comparison = torch.cat([batch[:n], batch_corrupted[:n]]) comparison = torch.clamp(comparison, min=0, max=1) fig = plot_samples(comparison.detach().cpu().numpy().transpose( 0, 2, 3, 1).squeeze(), h=2, w=n) writer.add_figure('sample of noisy trained examples', fig, epoch) writer.add_scalar('loss/train_loss', train_loss / total, epoch) writer.add_scalar('accuracy/train_accuracy', 100. * correct / total, epoch) writer.add_scalar('sigma/train_sigma_mean', sigma_0.mean().item(), epoch) writer.add_scalar('sigma/train_sigma_min', sigma_0.min().item(), epoch) writer.add_scalar('sigma/train_sigma_max', sigma_0.max().item(), epoch) return sigma_0
from utils import plot_samples from train import train_loop from test import test_loop import torch.optim as optim import torch.nn as nn #model = Model7() model = ResNet18() show_model_summary(model.to(DEVICE), (3, 32, 32)) # Constants, put in config epochs = 50 cuda_batch_size=128 cpu_batch_size = 4 num_workers = 4 # ToDo: Create separate transforms for train and test... #transforms = model7_transforms() (train_loader, test_loader, classes) = load_cifar10(model9_resnet_train_transforms(), model9_resnet_test_transforms(), cuda_batch_size, cpu_batch_size, num_workers) plot_samples(train_loader) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.009, momentum=0.9) train_loop(epochs, train_loader, model, DEVICE, optimizer, criterion, None, False) test_loop(test_loader, model, DEVICE, criterion)
def main(args): print(args) if args.push_to_hub: login_to_hub() if not isinstance(args.workers, int): args.workers = min(16, mp.cpu_count()) torch.backends.cudnn.benchmark = True vocab = VOCABS[args.vocab] fonts = args.font.split(",") # Load val data generator st = time.time() if isinstance(args.val_path, str): with open(os.path.join(args.val_path, "labels.json"), "rb") as f: val_hash = hashlib.sha256(f.read()).hexdigest() val_set = RecognitionDataset( img_folder=os.path.join(args.val_path, "images"), labels_path=os.path.join(args.val_path, "labels.json"), img_transforms=T.Resize((args.input_size, 4 * args.input_size), preserve_aspect_ratio=True), ) else: val_hash = None # Load synthetic data generator val_set = WordGenerator( vocab=vocab, min_chars=args.min_chars, max_chars=args.max_chars, num_samples=args.val_samples * len(vocab), font_family=fonts, img_transforms=Compose([ T.Resize((args.input_size, 4 * args.input_size), preserve_aspect_ratio=True), # Ensure we have a 90% split of white-background images T.RandomApply(T.ColorInversion(), 0.9), ]), ) val_loader = DataLoader( val_set, batch_size=args.batch_size, drop_last=False, num_workers=args.workers, sampler=SequentialSampler(val_set), pin_memory=torch.cuda.is_available(), collate_fn=val_set.collate_fn, ) print( f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)") batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301)) # Load doctr model model = recognition.__dict__[args.arch](pretrained=args.pretrained, vocab=vocab) # Resume weights if isinstance(args.resume, str): print(f"Resuming {args.resume}") checkpoint = torch.load(args.resume, map_location="cpu") model.load_state_dict(checkpoint) # GPU if isinstance(args.device, int): if not torch.cuda.is_available(): raise AssertionError( "PyTorch cannot access your GPU. Please investigate!") if args.device >= torch.cuda.device_count(): raise ValueError("Invalid device index") # Silent default switch to GPU if available elif torch.cuda.is_available(): args.device = 0 else: logging.warning("No accessible GPU, targe device set to CPU.") if torch.cuda.is_available(): torch.cuda.set_device(args.device) model = model.cuda() # Metrics val_metric = TextMatch() if args.test_only: print("Running evaluation") val_loss, exact_match, partial_match = evaluate(model, val_loader, batch_transforms, val_metric, amp=args.amp) print( f"Validation loss: {val_loss:.6} (Exact: {exact_match:.2%} | Partial: {partial_match:.2%})" ) return st = time.time() if isinstance(args.train_path, str): # Load train data generator base_path = Path(args.train_path) parts = ([base_path] if base_path.joinpath("labels.json").is_file() else [base_path.joinpath(sub) for sub in os.listdir(base_path)]) with open(parts[0].joinpath("labels.json"), "rb") as f: train_hash = hashlib.sha256(f.read()).hexdigest() train_set = RecognitionDataset( parts[0].joinpath("images"), parts[0].joinpath("labels.json"), img_transforms=Compose([ T.Resize((args.input_size, 4 * args.input_size), preserve_aspect_ratio=True), # Augmentations T.RandomApply(T.ColorInversion(), 0.1), ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02), ]), ) if len(parts) > 1: for subfolder in parts[1:]: train_set.merge_dataset( RecognitionDataset(subfolder.joinpath("images"), subfolder.joinpath("labels.json"))) else: train_hash = None # Load synthetic data generator train_set = WordGenerator( vocab=vocab, min_chars=args.min_chars, max_chars=args.max_chars, num_samples=args.train_samples * len(vocab), font_family=fonts, img_transforms=Compose([ T.Resize((args.input_size, 4 * args.input_size), preserve_aspect_ratio=True), # Ensure we have a 90% split of white-background images T.RandomApply(T.ColorInversion(), 0.9), ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02), ]), ) train_loader = DataLoader( train_set, batch_size=args.batch_size, drop_last=True, num_workers=args.workers, sampler=RandomSampler(train_set), pin_memory=torch.cuda.is_available(), collate_fn=train_set.collate_fn, ) print( f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)") if args.show_samples: x, target = next(iter(train_loader)) plot_samples(x, target) return # Optimizer optimizer = torch.optim.Adam( [p for p in model.parameters() if p.requires_grad], args.lr, betas=(0.95, 0.99), eps=1e-6, weight_decay=args.weight_decay, ) # LR Finder if args.find_lr: lrs, losses = record_lr(model, train_loader, batch_transforms, optimizer, amp=args.amp) plot_recorder(lrs, losses) return # Scheduler if args.sched == "cosine": scheduler = CosineAnnealingLR(optimizer, args.epochs * len(train_loader), eta_min=args.lr / 25e4) elif args.sched == "onecycle": scheduler = OneCycleLR(optimizer, args.lr, args.epochs * len(train_loader)) # Training monitoring current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name # W&B if args.wb: run = wandb.init( name=exp_name, project="text-recognition", config={ "learning_rate": args.lr, "epochs": args.epochs, "weight_decay": args.weight_decay, "batch_size": args.batch_size, "architecture": args.arch, "input_size": args.input_size, "optimizer": "adam", "framework": "pytorch", "scheduler": args.sched, "vocab": args.vocab, "train_hash": train_hash, "val_hash": val_hash, "pretrained": args.pretrained, }, ) # Create loss queue min_loss = np.inf # Training loop mb = master_bar(range(args.epochs)) for epoch in mb: fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, mb, amp=args.amp) # Validation loop at the end of each epoch val_loss, exact_match, partial_match = evaluate(model, val_loader, batch_transforms, val_metric, amp=args.amp) if val_loss < min_loss: print( f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state..." ) torch.save(model.state_dict(), f"./{exp_name}.pt") min_loss = val_loss mb.write( f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} " f"(Exact: {exact_match:.2%} | Partial: {partial_match:.2%})") # W&B if args.wb: wandb.log({ "val_loss": val_loss, "exact_match": exact_match, "partial_match": partial_match, }) if args.wb: run.finish() if args.push_to_hub: push_to_hf_hub(model, exp_name, task="recognition", run_config=args)
def main(args): print(args) if args.push_to_hub: login_to_hub() if not isinstance(args.workers, int): args.workers = min(16, mp.cpu_count()) # AMP if args.amp: mixed_precision.set_global_policy("mixed_float16") st = time.time() val_set = DetectionDataset( img_folder=os.path.join(args.val_path, "images"), label_path=os.path.join(args.val_path, "labels.json"), sample_transforms=T.SampleCompose(([ T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True) ] if not args.rotation or args.eval_straight else []) + ([ T.Resize(args.input_size, preserve_aspect_ratio=True ), # This does not pad T.RandomRotate(90, expand=True), T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True), ] if args.rotation and not args.eval_straight else [])), use_polygons=args.rotation and not args.eval_straight, ) val_loader = DataLoader( val_set, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=args.workers, ) print( f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{val_loader.num_batches} batches)") with open(os.path.join(args.val_path, "labels.json"), "rb") as f: val_hash = hashlib.sha256(f.read()).hexdigest() batch_transforms = T.Compose([ T.Normalize(mean=(0.798, 0.785, 0.772), std=(0.264, 0.2749, 0.287)), ]) # Load doctr model model = detection.__dict__[args.arch]( pretrained=args.pretrained, input_shape=(args.input_size, args.input_size, 3), assume_straight_pages=not args.rotation, ) # Resume weights if isinstance(args.resume, str): model.load_weights(args.resume) # Metrics val_metric = LocalizationConfusion(use_polygons=args.rotation and not args.eval_straight, mask_shape=(args.input_size, args.input_size)) if args.test_only: print("Running evaluation") val_loss, recall, precision, mean_iou = evaluate( model, val_loader, batch_transforms, val_metric) print( f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | " f"Mean IoU: {mean_iou:.2%})") return st = time.time() # Load both train and val data generators train_set = DetectionDataset( img_folder=os.path.join(args.train_path, "images"), label_path=os.path.join(args.train_path, "labels.json"), img_transforms=T.Compose([ # Augmentations T.RandomApply(T.ColorInversion(), 0.1), T.RandomJpegQuality(60), T.RandomSaturation(0.3), T.RandomContrast(0.3), T.RandomBrightness(0.3), ]), sample_transforms=T.SampleCompose(([ T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True) ] if not args.rotation else []) + ([ T.Resize(args.input_size, preserve_aspect_ratio=True ), # This does not pad T.RandomRotate(90, expand=True), T.Resize((args.input_size, args.input_size), preserve_aspect_ratio=True, symmetric_pad=True), ] if args.rotation else [])), use_polygons=args.rotation, ) train_loader = DataLoader( train_set, batch_size=args.batch_size, shuffle=True, drop_last=True, num_workers=args.workers, ) print( f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{train_loader.num_batches} batches)") with open(os.path.join(args.train_path, "labels.json"), "rb") as f: train_hash = hashlib.sha256(f.read()).hexdigest() if args.show_samples: x, target = next(iter(train_loader)) plot_samples(x, target) return # Optimizer scheduler = tf.keras.optimizers.schedules.ExponentialDecay( args.lr, decay_steps=args.epochs * len(train_loader), decay_rate=1 / (25e4), # final lr as a fraction of initial lr staircase=False, ) optimizer = tf.keras.optimizers.Adam(learning_rate=scheduler, beta_1=0.95, beta_2=0.99, epsilon=1e-6, clipnorm=5) if args.amp: optimizer = mixed_precision.LossScaleOptimizer(optimizer) # LR Finder if args.find_lr: lrs, losses = record_lr(model, train_loader, batch_transforms, optimizer, amp=args.amp) plot_recorder(lrs, losses) return # Tensorboard to monitor training current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name # W&B if args.wb: run = wandb.init( name=exp_name, project="text-detection", config={ "learning_rate": args.lr, "epochs": args.epochs, "weight_decay": 0.0, "batch_size": args.batch_size, "architecture": args.arch, "input_size": args.input_size, "optimizer": "adam", "framework": "tensorflow", "scheduler": "exp_decay", "train_hash": train_hash, "val_hash": val_hash, "pretrained": args.pretrained, "rotation": args.rotation, }, ) if args.freeze_backbone: for layer in model.feat_extractor.layers: layer.trainable = False min_loss = np.inf # Training loop mb = master_bar(range(args.epochs)) for epoch in mb: fit_one_epoch(model, train_loader, batch_transforms, optimizer, mb, args.amp) # Validation loop at the end of each epoch val_loss, recall, precision, mean_iou = evaluate( model, val_loader, batch_transforms, val_metric) if val_loss < min_loss: print( f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state..." ) model.save_weights(f"./{exp_name}/weights") min_loss = val_loss log_msg = f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} " if any(val is None for val in (recall, precision, mean_iou)): log_msg += "(Undefined metric value, caused by empty GTs or predictions)" else: log_msg += f"(Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%})" mb.write(log_msg) # W&B if args.wb: wandb.log({ "val_loss": val_loss, "recall": recall, "precision": precision, "mean_iou": mean_iou, }) if args.wb: run.finish() if args.push_to_hub: push_to_hf_hub(model, exp_name, task="detection", run_config=args)