Esempio n. 1
0
def train_from_density(model,
                       optimizer,
                       z0_distr,
                       density,
                       name: str,
                       num_layers: int,
                       batch_size: int = 128,
                       num_iter: int = 20000):
    """Train the normalizing flow from density"""
    loss_sum = 0

    for i in range(num_iter + 1):
        z0 = z0_distr.sample((batch_size, ))

        x, logdet = model(z0)

        loss = torch.mean(loss2(density(x), z0, logdet))
        loss_sum += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print loss average
        if i % 200 == 0 and i != 0:
            print(f"loss: {loss_sum/200}")
            loss_sum = 0

        # plot samples and save it
        if i == num_iter:
            z0 = z0_distr.sample((500, ))
            x, logdet = model(z0)
            x_ = x[:, 0].detach().numpy()
            y_ = x[:, 1].detach().numpy()
            plot_samples(x_, y_, f"samples_{name}_l{num_layers}")
def warp_images():
    print('building model')
    layers = vgg16.build_model((None, 3, 227, 227))

    batch_size = 32
    infer_dir = join('data', 'inference')
    weightsfile = join('weights', 'weights.pickle')
    with open(weightsfile, 'rb') as f:
        param_values = pickle.load(f)
    set_all_param_values(layers['trans'], param_values)

    pretrainfile = join('weights', 'vgg16.pkl')
    with open(pretrainfile, 'rb') as f:
        data = pickle.load(f)

    mean = data['mean value']

    image_fpaths = [('Cars_013b.png', 'Cars_009b.png'),
                    ('060_0071.png', '060_0000.png'),
                    ('246_0052.png', '246_0042.png')]

    print('compiling theano functions for inference')
    num_infer_idx = (len(image_fpaths) + batch_size - 1) / batch_size
    infer_func = theano_funcs.create_infer_func(layers)
    infer_iter = utils.get_batch_idx(len(image_fpaths), batch_size)

    for i, idx in tqdm(infer_iter, total=num_infer_idx, leave=False):
        Xa, Xb = utils.prepare_batch(image_fpaths[idx], mean)
        M = infer_func(Xa, Xb)
        utils.plot_samples(Xa,
                           Xb,
                           M,
                           mean,
                           prefix=join(infer_dir, 'infer_%d' % i))
Esempio n. 3
0
    def train(self, batch_size, train_dataset, num_steps, keep_prob=1.0,
              num_display=64):
        display_step = num_steps // 40
        summary_step = num_steps // 100
        perm = np.random.permutation(train_dataset.shape[0])
        X = train_dataset[perm, :]
        # Use fixed Z to generate samples
        display_Z = self.sample_noise([num_display, self.noise_dim])

        fig_index = 0
        inner_step = 0
        print('Training GAN for %d steps' % num_steps)
        D_history = []
        G_history = []
        for step in xrange(num_steps):
            for k in range(1):
                # use next different batches
                batch_X = next_batch(X, inner_step, batch_size)
                inner_step += 1
                batch_Z_D = self.sample_noise([batch_size, self.noise_dim])
                _, D_V_neg = self.sess.run(
                    [self.D_solver, self.D_V_neg],
                    feed_dict={self.X: batch_X,
                               self.Z: batch_Z_D,
                               self.keep_prob: keep_prob})
            #  finish k steps for training D
            batch_Z_G = self.sample_noise([batch_size, self.noise_dim])
            _, G_V = self.sess.run([self.G_solver, self.G_V],
                                   feed_dict={self.Z: batch_Z_G,
                                              self.keep_prob: keep_prob})

            if step % display_step == 0:
                print('Batch(%d cases) value function at step %d' %
                      (batch_X.shape[0], step))
                print('V(D) = %.6f, V(G) = %.6f' % (-D_V_neg, G_V))
                samples = self.sess.run(self.G_sample,
                                        feed_dict={self.Z: display_Z,
                                                   self.keep_prob: 1.0})
                plot_samples(samples, self.dirname, fig_index)
                fig_index += 1

            if step % summary_step == 0:
                D_history.append(-D_V_neg)
                G_history.append(G_V)
                self.make_summary(step, batch_X, batch_Z_G, keep_prob)

        Z_D = self.sample_noise([X.shape[0], self.noise_dim])
        D_V_neg = self.sess.run(self.D_V_neg,
                                feed_dict={self.X: X,
                                           self.Z: Z_D,
                                           self.keep_prob: 1.0})

        Z_G = self.sample_noise([X.shape[0], self.noise_dim])
        G_V = self.sess.run(self.G_V,
                            feed_dict={self.Z: Z_G,
                                       self.keep_prob: 1.0})
        print('Finish training\nV(D) = %.6f, V(G) = %.6f' % (-D_V_neg, G_V))
        self.make_summary(num_steps, X, Z_G, keep_prob=1.0)
        plot_V(self.dirname, D_history, G_history)
Esempio n. 4
0
def main():
    # set session
    sess = tf.Session()
    model = GAN(sess=sess, init=False, gf_dim=128)
    model.restore(model_path='hw3_1/model_file/WGAN_v2')

    z_plot = np.random.uniform(-1., 1., size=[25, 100])
    img = model.generate(z_plot)
    plot_samples(img,
                 save=True,
                 h=5,
                 w=5,
                 filename='gan',
                 folder_path='samples/')
def optimize_sigma(model,
                   loader,
                   writer,
                   sigma_0,
                   lr_sigma,
                   flag='train',
                   radius=None,
                   gaussian_num_ds=1,
                   epoch=1):
    model = model.eval()
    total = 0
    test_loss, test_loss_corrupted = 0, 0
    correct, correct_corrupted = 0, 0

    for _, (batch, targets, idx) in enumerate(loader):
        batch, targets = batch.to(device), targets.to(
            device
        )  #Here I will put iters to 1 as the outer loop contains the number of iterations
        sigma, batch_corrupted, rad = get_sigma(model,
                                                batch,
                                                lr_sigma,
                                                sigma_0[idx],
                                                1,
                                                device,
                                                ret_radius=True,
                                                gaussian_num=gaussian_num_ds)
        sigma_0[idx], radius[idx] = sigma, rad

        with torch.no_grad():
            outputs_corrputed_softmax = model(batch_corrupted)

        _, predicted_corrupted = outputs_corrputed_softmax.max(1)
        total += targets.size(0)
        correct_corrupted += predicted_corrupted.eq(targets).sum().item()
    #plottings
    n = min(batch.size(0), 8)
    comparison = torch.cat([batch[:n], batch_corrupted[:n]])
    comparison = torch.clamp(comparison, min=0, max=1)
    fig = plot_samples(comparison.detach().cpu().numpy().transpose(
        0, 2, 3, 1).squeeze(),
                       h=2,
                       w=n)

    writer.add_figure('optimizing sigma sample of noisy ' + flag + ' examples',
                      fig, epoch)
    # writer.add_scalar('optimizing_sigma/'+flag+'/loss_clean', test_loss / total, epoch)
    # writer.add_scalar('optimizing_sigma/'+flag+'/accuracy_clean', 100.*correct / total, epoch)
    # writer.add_scalar('optimizing_sigma/'+flag+'/loss_corrupted', test_loss_corrupted / total, epoch)
    writer.add_scalar('optimizing_sigma/' + flag + '/accuracy_corrupted',
                      100. * correct_corrupted / total, epoch)
    writer.add_scalar('optimizing_sigma/' + flag + '/sigma_mean',
                      sigma_0.mean().item(), epoch)
    writer.add_scalar('optimizing_sigma/' + flag + '/sigma_min',
                      sigma_0.min().item(), epoch)
    writer.add_scalar('optimizing_sigma/' + flag + '/sigma_max',
                      sigma_0.max().item(), epoch)
    writer.add_scalar('optimizing_sigma/' + flag + '/radius_for_sample_0',
                      radius[0].item(), epoch)
    #Saving the sigmas
    return sigma_0
def resolve_and_tensorboard_plot(our_model,
                                 lr_image_paths,
                                 title='',
                                 make_input_img_bw=False):

    samples = []

    for lr_image_path in lr_image_paths:

        lr = load_image(lr_image_path, make_input_img_bw)
        sr = resolve_single(our_model, lr)
        samples.append((lr, sr))

    fig = plot_samples(samples,
                       interpolate_lr=True,
                       input_img_bw=make_input_img_bw)

    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    # Closing the figure prevents it from being displayed directly inside
    # the notebook.
    plt.close(fig)
    buf.seek(0)
    # Convert PNG buffer to TF image
    image = tf.image.decode_png(buf.getvalue(), channels=4)
    # Add the batch dimension
    image = tf.expand_dims(image, 0)

    with tb_file_writer.as_default():
        tf.summary.image(title, image, step=0)
Esempio n. 7
0
def test(epoch, model, test_loader, writer, sigma_0, lr_sigma, iters_sig):
    model = model.eval()
    test_loss = 0
    test_loss_corrupted = 0
    total = 0
    correct = 0
    correct_corrupted = 0
    for _, (batch, targets, idx) in enumerate(test_loader):
        batch = batch.to(device)
        targets = targets.to(device)

        sigma, batch_corrupted = get_sigma(model, batch, lr_sigma,
                                           sigma_0[idx], iters_sig, device)
        sigma_0[idx] = sigma  # update sigma
        with torch.no_grad():

            # forward pass through the base classifier
            outputs_softmax = model(batch)
            outputs_corrputed_softmax = model(batch_corrupted)

        loss = compute_loss(outputs_softmax, targets)
        loss_corrupted = compute_loss(outputs_corrputed_softmax, targets)

        test_loss += loss.item() * len(batch)
        test_loss_corrupted += loss_corrupted.item() * len(batch)

        _, predicted = outputs_softmax.max(1)
        _, predicted_corrupted = outputs_corrputed_softmax.max(1)

        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()
        correct_corrupted += predicted_corrupted.eq(targets).sum().item()

    print(
        '===> Test Loss: {}. Test Accuracy: {}. Test Loss Corrupted: {}. Test Accuracy Corrupted: {}'
        .format(test_loss / total, 100. * correct / total,
                test_loss_corrupted / total, 100. * correct_corrupted / total))
    n = min(batch.size(0), 8)
    comparison = torch.cat([batch[:n], batch_corrupted[:n]])
    comparison = torch.clamp(comparison, min=0, max=1)
    fig = plot_samples(comparison.detach().cpu().numpy().transpose(
        0, 2, 3, 1).squeeze(),
                       h=2,
                       w=n)

    writer.add_figure('sample of noisy test examples', fig, epoch)
    writer.add_scalar('loss/test_loss', test_loss / total, epoch)
    writer.add_scalar('accuracy/test_accuracy', 100. * correct / total, epoch)
    writer.add_scalar('loss/test_loss_corrupted', test_loss_corrupted / total,
                      epoch)
    writer.add_scalar('accuracy/test_accuracy_corrupted',
                      100. * correct_corrupted / total, epoch)
    writer.add_scalar('sigma/test_sigma_mean', sigma_0.mean().item(), epoch)
    writer.add_scalar('sigma/test_sigma_min', sigma_0.min().item(), epoch)
    writer.add_scalar('sigma/test_sigma_max', sigma_0.max().item(), epoch)

    return 100. * correct_corrupted / total, sigma_0
Esempio n. 8
0
if is_trainable:
    # Read the pickle file
    Data_A = read_pickle('./Data/Data_Train/Data_Left_train.pkl')
    Data_B = read_pickle('./Data/Data_Train/Data_Right_train.pkl')
    print("Data A/B: ", Data_A.shape, Data_B.shape)
    # Initialize the model
    assert Data_A.shape == Data_B.shape
    if len(Data_A.shape) == 4 and len(Data_B.shape) == 4:
        img_shape = (Data_A.shape[1], Data_A.shape[2], Data_A.shape[3])
        banis = BANIS(img_shape)
    else:
        print("The shape of input dataset don't match!!!")
    # Train the model and record the runtime
    timer = ElapsedTimer()
    banis.train(Data_A,
                Data_B,
                EPOCHS=n_epochs,
                BATCH_SIZE=128,
                WARMUP_STEP=n_step,
                NUM_IMG=5)
    timer.elapsed_time()
else:
    # Plotting the sampling images
    A_gen_list = np.load("./A_gen_baait.npy")
    plot_samples(A_gen_list, name='Agen')
    B_gen_list = np.load("./B_gen_baait.npy")
    plot_samples(B_gen_list, name='Bgen')
    AB_rec_list = np.load("./AB_rec_baait.npy")
    plot_samples(AB_rec_list, name='ABrec')
Esempio n. 9
0
def main(args):

    print(args)

    if args.push_to_hub:
        login_to_hub()

    if not isinstance(args.workers, int):
        args.workers = min(16, mp.cpu_count())

    torch.backends.cudnn.benchmark = True

    vocab = VOCABS[args.vocab]

    fonts = args.font.split(",")

    # Load val data generator
    st = time.time()
    val_set = CharacterGenerator(
        vocab=vocab,
        num_samples=args.val_samples * len(vocab),
        cache_samples=True,
        img_transforms=Compose(
            [
                T.Resize((args.input_size, args.input_size)),
                # Ensure we have a 90% split of white-background images
                T.RandomApply(T.ColorInversion(), 0.9),
            ]
        ),
        font_family=fonts,
    )
    val_loader = DataLoader(
        val_set,
        batch_size=args.batch_size,
        drop_last=False,
        num_workers=args.workers,
        sampler=SequentialSampler(val_set),
        pin_memory=torch.cuda.is_available(),
    )
    print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)")

    batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))

    # Load doctr model
    model = classification.__dict__[args.arch](pretrained=args.pretrained, num_classes=len(vocab), classes=list(vocab))

    # Resume weights
    if isinstance(args.resume, str):
        print(f"Resuming {args.resume}")
        checkpoint = torch.load(args.resume, map_location="cpu")
        model.load_state_dict(checkpoint)

    # GPU
    if isinstance(args.device, int):
        if not torch.cuda.is_available():
            raise AssertionError("PyTorch cannot access your GPU. Please investigate!")
        if args.device >= torch.cuda.device_count():
            raise ValueError("Invalid device index")
    # Silent default switch to GPU if available
    elif torch.cuda.is_available():
        args.device = 0
    else:
        logging.warning("No accessible GPU, targe device set to CPU.")
    if torch.cuda.is_available():
        torch.cuda.set_device(args.device)
        model = model.cuda()

    if args.test_only:
        print("Running evaluation")
        val_loss, acc = evaluate(model, val_loader, batch_transforms)
        print(f"Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
        return

    st = time.time()

    # Load train data generator
    train_set = CharacterGenerator(
        vocab=vocab,
        num_samples=args.train_samples * len(vocab),
        cache_samples=True,
        img_transforms=Compose(
            [
                T.Resize((args.input_size, args.input_size)),
                # Augmentations
                T.RandomApply(T.ColorInversion(), 0.9),
                # GaussianNoise
                T.RandomApply(Grayscale(3), 0.1),
                ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02),
                T.RandomApply(GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 3)), 0.3),
                RandomRotation(15, interpolation=InterpolationMode.BILINEAR),
            ]
        ),
        font_family=fonts,
    )

    train_loader = DataLoader(
        train_set,
        batch_size=args.batch_size,
        drop_last=True,
        num_workers=args.workers,
        sampler=RandomSampler(train_set),
        pin_memory=torch.cuda.is_available(),
    )
    print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")

    if args.show_samples:
        x, target = next(iter(train_loader))
        plot_samples(x, list(map(vocab.__getitem__, target)))
        return

    # Optimizer
    optimizer = torch.optim.Adam(
        [p for p in model.parameters() if p.requires_grad],
        args.lr,
        betas=(0.95, 0.99),
        eps=1e-6,
        weight_decay=args.weight_decay,
    )

    # LR Finder
    if args.find_lr:
        lrs, losses = record_lr(model, train_loader, batch_transforms, optimizer, amp=args.amp)
        plot_recorder(lrs, losses)
        return
    # Scheduler
    if args.sched == "cosine":
        scheduler = CosineAnnealingLR(optimizer, args.epochs * len(train_loader), eta_min=args.lr / 25e4)
    elif args.sched == "onecycle":
        scheduler = OneCycleLR(optimizer, args.lr, args.epochs * len(train_loader))

    # Training monitoring
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name

    # W&B
    if args.wb:

        run = wandb.init(
            name=exp_name,
            project="character-classification",
            config={
                "learning_rate": args.lr,
                "epochs": args.epochs,
                "weight_decay": args.weight_decay,
                "batch_size": args.batch_size,
                "architecture": args.arch,
                "input_size": args.input_size,
                "optimizer": "adam",
                "framework": "pytorch",
                "vocab": args.vocab,
                "scheduler": args.sched,
                "pretrained": args.pretrained,
            },
        )

    # Create loss queue
    min_loss = np.inf
    # Training loop
    mb = master_bar(range(args.epochs))
    for epoch in mb:
        fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, mb)

        # Validation loop at the end of each epoch
        val_loss, acc = evaluate(model, val_loader, batch_transforms)
        if val_loss < min_loss:
            print(f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state...")
            torch.save(model.state_dict(), f"./{exp_name}.pt")
            min_loss = val_loss
        mb.write(f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
        # W&B
        if args.wb:
            wandb.log(
                {
                    "val_loss": val_loss,
                    "acc": acc,
                }
            )

    if args.wb:
        run.finish()

    if args.push_to_hub:
        push_to_hf_hub(model, exp_name, task="classification", run_config=args)

    if args.export_onnx:
        print("Exporting model to ONNX...")
        dummy_batch = next(iter(val_loader))
        dummy_input = dummy_batch[0].cuda() if torch.cuda.is_available() else dummy_batch[0]
        model_path = export_model_to_onnx(model, exp_name, dummy_input)
        print(f"Exported model saved in {model_path}")
Esempio n. 10
0
def main(args):

    print(args)

    if args.push_to_hub:
        login_to_hub()

    if not isinstance(args.workers, int):
        args.workers = min(16, mp.cpu_count())

    vocab = VOCABS[args.vocab]

    fonts = args.font.split(",")

    # AMP
    if args.amp:
        mixed_precision.set_global_policy("mixed_float16")

    # Load val data generator
    st = time.time()
    val_set = CharacterGenerator(
        vocab=vocab,
        num_samples=args.val_samples * len(vocab),
        cache_samples=True,
        img_transforms=T.Compose(
            [
                T.Resize((args.input_size, args.input_size)),
                # Ensure we have a 90% split of white-background images
                T.RandomApply(T.ColorInversion(), 0.9),
            ]
        ),
        font_family=fonts,
    )
    val_loader = DataLoader(
        val_set,
        batch_size=args.batch_size,
        shuffle=False,
        drop_last=False,
        num_workers=args.workers,
        collate_fn=collate_fn,
    )
    print(
        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
        f"{val_loader.num_batches} batches)"
    )

    # Load doctr model
    model = classification.__dict__[args.arch](
        pretrained=args.pretrained,
        input_shape=(args.input_size, args.input_size, 3),
        num_classes=len(vocab),
        classes=list(vocab),
        include_top=True,
    )

    # Resume weights
    if isinstance(args.resume, str):
        model.load_weights(args.resume)

    batch_transforms = T.Compose(
        [
            T.Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301)),
        ]
    )

    if args.test_only:
        print("Running evaluation")
        val_loss, acc = evaluate(model, val_loader, batch_transforms)
        print(f"Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
        return

    st = time.time()

    # Load train data generator
    train_set = CharacterGenerator(
        vocab=vocab,
        num_samples=args.train_samples * len(vocab),
        cache_samples=True,
        img_transforms=T.Compose(
            [
                T.Resize((args.input_size, args.input_size)),
                # Augmentations
                T.RandomApply(T.ColorInversion(), 0.9),
                T.RandomApply(T.ToGray(3), 0.1),
                T.RandomJpegQuality(60),
                T.RandomSaturation(0.3),
                T.RandomContrast(0.3),
                T.RandomBrightness(0.3),
                # Blur
                T.RandomApply(T.GaussianBlur(kernel_shape=(3, 3), std=(0.1, 3)), 0.3),
            ]
        ),
        font_family=fonts,
    )
    train_loader = DataLoader(
        train_set,
        batch_size=args.batch_size,
        shuffle=True,
        drop_last=True,
        num_workers=args.workers,
        collate_fn=collate_fn,
    )
    print(
        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
        f"{train_loader.num_batches} batches)"
    )

    if args.show_samples:
        x, target = next(iter(train_loader))
        plot_samples(x, list(map(vocab.__getitem__, target)))
        return

    # Optimizer
    scheduler = tf.keras.optimizers.schedules.ExponentialDecay(
        args.lr,
        decay_steps=args.epochs * len(train_loader),
        decay_rate=1 / (1e3),  # final lr as a fraction of initial lr
        staircase=False,
    )
    optimizer = tf.keras.optimizers.Adam(
        learning_rate=scheduler,
        beta_1=0.95,
        beta_2=0.99,
        epsilon=1e-6,
    )
    if args.amp:
        optimizer = mixed_precision.LossScaleOptimizer(optimizer)

    # LR Finder
    if args.find_lr:
        lrs, losses = record_lr(model, train_loader, batch_transforms, optimizer, amp=args.amp)
        plot_recorder(lrs, losses)
        return

    # Tensorboard to monitor training
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name

    # W&B
    if args.wb:

        run = wandb.init(
            name=exp_name,
            project="character-classification",
            config={
                "learning_rate": args.lr,
                "epochs": args.epochs,
                "weight_decay": 0.0,
                "batch_size": args.batch_size,
                "architecture": args.arch,
                "input_size": args.input_size,
                "optimizer": "adam",
                "framework": "tensorflow",
                "vocab": args.vocab,
                "scheduler": "exp_decay",
                "pretrained": args.pretrained,
            },
        )

    # Create loss queue
    min_loss = np.inf

    # Training loop
    mb = master_bar(range(args.epochs))
    for epoch in mb:
        fit_one_epoch(model, train_loader, batch_transforms, optimizer, mb, args.amp)

        # Validation loop at the end of each epoch
        val_loss, acc = evaluate(model, val_loader, batch_transforms)
        if val_loss < min_loss:
            print(f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state...")
            model.save_weights(f"./{exp_name}/weights")
            min_loss = val_loss
        mb.write(f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
        # W&B
        if args.wb:
            wandb.log(
                {
                    "val_loss": val_loss,
                    "acc": acc,
                }
            )

    if args.wb:
        run.finish()

    if args.push_to_hub:
        push_to_hf_hub(model, exp_name, task="classification", run_config=args)

    if args.export_onnx:
        print("Exporting model to ONNX...")
        dummy_input = [tf.TensorSpec([None, args.input_size, args.input_size, 3], tf.float32, name="input")]
        model_path, _ = export_model_to_onnx(model, exp_name, dummy_input)
        print(f"Exported model saved in {model_path}")
def train(epoch,
          model,
          train_loader,
          optimizer,
          writer,
          sigma_0,
          lr_sigma,
          iters_sig,
          attacker,
          num_noise_vec=1):
    model = model.train()
    train_loss = 0
    total = 0
    correct = 0
    # CE_loss = nn.CrossEntropyLoss()
    import time
    start_time = time.time()
    for batch_idx, (batch, targets, idx) in enumerate(train_loader):
        start_time = time.time()
        optimizer.zero_grad()

        batch_size = len(idx)
        batch = batch.to(device)
        targets = targets.to(device)

        # model.eval()
        sigma, _ = get_sigma(model,
                             batch,
                             lr_sigma,
                             sigma_0[idx],
                             iters_sig,
                             device,
                             gaussian_num=gaussian_num_ds)
        # model.train()
        sigma_0[idx] = sigma  # updating sigma

        new_shape = [batch_size * num_noise_vec]
        new_shape.extend(batch[0].shape)
        batch = batch.repeat((1, num_noise_vec, 1, 1)).view(new_shape)
        #repeating sigmas to do the monte carlo
        sigma_repeated = sigma.repeat(
            (1, num_noise_vec, 1, 1)).view(-1, 1, 1, 1)
        noise = torch.randn_like(batch) * sigma_repeated
        targets = targets.unsqueeze(1).repeat(1, num_noise_vec).reshape(
            -1, 1).squeeze()

        #Getting adversarial instances:
        model.requires_grad_(False)
        model.eval()
        batch = attacker.attack(model,
                                batch,
                                targets,
                                noise=noise,
                                num_noise_vectors=num_noise_vec,
                                no_grad=False)
        model.train()
        model.requires_grad_(True)

        batch_corrupted = batch + noise

        outputs_softmax = model(batch_corrupted)
        # clean_output = model(batch)

        total_loss = compute_loss(outputs_softmax, targets)
        # clean_loss = compute_loss(clean_output, targets)
        # total_loss += clean_loss

        train_loss += total_loss.item() * len(batch)
        _, predicted = outputs_softmax.max(1)
        total += batch_size * num_noise_vec
        correct += predicted.eq(targets).sum().item()
        # update parameters
        total_loss.backward()
        optimizer.step()
        print('Required time (mins) for a batch is: ',
              (time.time() - start_time) / 60.0)

        if batch_idx % 100 == 0:

            print(
                '+ Epoch: {}. Iter: [{}/{} ({:.0f}%)]. Loss: {}. Accuracy: {}'.
                format(epoch, batch_idx * len(batch),
                       len(train_loader.dataset),
                       100. * batch_idx / len(train_loader),
                       train_loss / total, 100. * correct / total))

    n = min(batch.size(0), 8)
    comparison = torch.cat([batch[:n], batch_corrupted[:n]])
    comparison = torch.clamp(comparison, min=0, max=1)
    fig = plot_samples(comparison.detach().cpu().numpy().transpose(
        0, 2, 3, 1).squeeze(),
                       h=2,
                       w=n)

    writer.add_figure('sample of noisy trained examples', fig, epoch)
    writer.add_scalar('loss/train_loss', train_loss / total, epoch)
    writer.add_scalar('accuracy/train_accuracy', 100. * correct / total, epoch)
    writer.add_scalar('sigma/train_sigma_mean', sigma_0.mean().item(), epoch)
    writer.add_scalar('sigma/train_sigma_min', sigma_0.min().item(), epoch)
    writer.add_scalar('sigma/train_sigma_max', sigma_0.max().item(), epoch)

    return sigma_0
Esempio n. 12
0
def main(args):

    print(args)

    if args.push_to_hub:
        login_to_hub()

    if not isinstance(args.workers, int):
        args.workers = min(16, mp.cpu_count())

    vocab = VOCABS[args.vocab]
    fonts = args.font.split(",")

    # AMP
    if args.amp:
        mixed_precision.set_global_policy("mixed_float16")

    st = time.time()

    if isinstance(args.val_path, str):
        with open(os.path.join(args.val_path, "labels.json"), "rb") as f:
            val_hash = hashlib.sha256(f.read()).hexdigest()

        # Load val data generator
        val_set = RecognitionDataset(
            img_folder=os.path.join(args.val_path, "images"),
            labels_path=os.path.join(args.val_path, "labels.json"),
            img_transforms=T.Resize((args.input_size, 4 * args.input_size),
                                    preserve_aspect_ratio=True),
        )
    else:
        val_hash = None
        # Load synthetic data generator
        val_set = WordGenerator(
            vocab=vocab,
            min_chars=args.min_chars,
            max_chars=args.max_chars,
            num_samples=args.val_samples * len(vocab),
            font_family=fonts,
            img_transforms=T.Compose([
                T.Resize((args.input_size, 4 * args.input_size),
                         preserve_aspect_ratio=True),
                # Ensure we have a 90% split of white-background images
                T.RandomApply(T.ColorInversion(), 0.9),
            ]),
        )

    val_loader = DataLoader(
        val_set,
        batch_size=args.batch_size,
        shuffle=False,
        drop_last=False,
        num_workers=args.workers,
    )
    print(
        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
        f"{val_loader.num_batches} batches)")

    # Load doctr model
    model = recognition.__dict__[args.arch](
        pretrained=args.pretrained,
        input_shape=(args.input_size, 4 * args.input_size, 3),
        vocab=vocab,
    )
    # Resume weights
    if isinstance(args.resume, str):
        model.load_weights(args.resume)

    # Metrics
    val_metric = TextMatch()

    batch_transforms = T.Compose([
        T.Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301)),
    ])

    if args.test_only:
        print("Running evaluation")
        val_loss, exact_match, partial_match = evaluate(
            model, val_loader, batch_transforms, val_metric)
        print(
            f"Validation loss: {val_loss:.6} (Exact: {exact_match:.2%} | Partial: {partial_match:.2%})"
        )
        return

    st = time.time()

    if isinstance(args.train_path, str):
        # Load train data generator
        base_path = Path(args.train_path)
        parts = ([base_path]
                 if base_path.joinpath("labels.json").is_file() else
                 [base_path.joinpath(sub) for sub in os.listdir(base_path)])
        with open(parts[0].joinpath("labels.json"), "rb") as f:
            train_hash = hashlib.sha256(f.read()).hexdigest()

        train_set = RecognitionDataset(
            parts[0].joinpath("images"),
            parts[0].joinpath("labels.json"),
            img_transforms=T.Compose([
                T.RandomApply(T.ColorInversion(), 0.1),
                T.Resize((args.input_size, 4 * args.input_size),
                         preserve_aspect_ratio=True),
                # Augmentations
                T.RandomJpegQuality(60),
                T.RandomSaturation(0.3),
                T.RandomContrast(0.3),
                T.RandomBrightness(0.3),
            ]),
        )
        if len(parts) > 1:
            for subfolder in parts[1:]:
                train_set.merge_dataset(
                    RecognitionDataset(subfolder.joinpath("images"),
                                       subfolder.joinpath("labels.json")))
    else:
        train_hash = None
        # Load synthetic data generator
        train_set = WordGenerator(
            vocab=vocab,
            min_chars=args.min_chars,
            max_chars=args.max_chars,
            num_samples=args.train_samples * len(vocab),
            font_family=fonts,
            img_transforms=T.Compose([
                T.Resize((args.input_size, 4 * args.input_size),
                         preserve_aspect_ratio=True),
                # Ensure we have a 90% split of white-background images
                T.RandomApply(T.ColorInversion(), 0.9),
                T.RandomJpegQuality(60),
                T.RandomSaturation(0.3),
                T.RandomContrast(0.3),
                T.RandomBrightness(0.3),
            ]),
        )

    train_loader = DataLoader(
        train_set,
        batch_size=args.batch_size,
        shuffle=True,
        drop_last=True,
        num_workers=args.workers,
    )
    print(
        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
        f"{train_loader.num_batches} batches)")

    if args.show_samples:
        x, target = next(iter(train_loader))
        plot_samples(x, target)
        return

    # Optimizer
    scheduler = tf.keras.optimizers.schedules.ExponentialDecay(
        args.lr,
        decay_steps=args.epochs * len(train_loader),
        decay_rate=1 / (25e4),  # final lr as a fraction of initial lr
        staircase=False,
    )
    optimizer = tf.keras.optimizers.Adam(learning_rate=scheduler,
                                         beta_1=0.95,
                                         beta_2=0.99,
                                         epsilon=1e-6,
                                         clipnorm=5)
    if args.amp:
        optimizer = mixed_precision.LossScaleOptimizer(optimizer)
    # LR Finder
    if args.find_lr:
        lrs, losses = record_lr(model,
                                train_loader,
                                batch_transforms,
                                optimizer,
                                amp=args.amp)
        plot_recorder(lrs, losses)
        return

    # Tensorboard to monitor training
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name

    # W&B
    if args.wb:

        run = wandb.init(
            name=exp_name,
            project="text-recognition",
            config={
                "learning_rate": args.lr,
                "epochs": args.epochs,
                "weight_decay": 0.0,
                "batch_size": args.batch_size,
                "architecture": args.arch,
                "input_size": args.input_size,
                "optimizer": "adam",
                "framework": "tensorflow",
                "scheduler": "exp_decay",
                "vocab": args.vocab,
                "train_hash": train_hash,
                "val_hash": val_hash,
                "pretrained": args.pretrained,
            },
        )

    min_loss = np.inf

    # Training loop
    mb = master_bar(range(args.epochs))
    for epoch in mb:
        fit_one_epoch(model, train_loader, batch_transforms, optimizer, mb,
                      args.amp)

        # Validation loop at the end of each epoch
        val_loss, exact_match, partial_match = evaluate(
            model, val_loader, batch_transforms, val_metric)
        if val_loss < min_loss:
            print(
                f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state..."
            )
            model.save_weights(f"./{exp_name}/weights")
            min_loss = val_loss
        mb.write(
            f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} "
            f"(Exact: {exact_match:.2%} | Partial: {partial_match:.2%})")
        # W&B
        if args.wb:
            wandb.log({
                "val_loss": val_loss,
                "exact_match": exact_match,
                "partial_match": partial_match,
            })

    if args.wb:
        run.finish()

    if args.push_to_hub:
        push_to_hf_hub(model, exp_name, task="recognition", run_config=args)
Esempio n. 13
0
def main():
    # set GPU card
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"
    # load anime face
    data_dir = '../anime_face/data_64/images/'
    data_extra_dir = '../anime_face/extra_data/images/'
    ds = dataset()
    ds.load_data(data_dir, verbose=0)
    ds.load_data(data_extra_dir, verbose=0)
    ds.shuffle()

    # reset graph
    tf.reset_default_graph()

    # set session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # build model
    model = GAN(sess, gf_dim=128)

    # training
    z_plot = sample_z(36, 100)

    # initial fake image
    z = sample_z((bs), 100)
    i = 1
    while True:
        if (i == 1) or (i <= 100
                        and i % 20 == 0) or (i <= 200 and i % 50 == 0) or (
                            i <= 1000 and i % 100 == 0) or (i % 200 == 0):
            g_samples = model.generate(z_plot)
            plot_samples(g_samples,
                         save=True,
                         filename=str(i),
                         folder_path='out2/',
                         h=6,
                         w=6)

        # train discriminator more
        for _ in range(5):
            real_img = ds.next_batch(bs)
            z = sample_z(bs, 100)
            fake_img = model.generate(z)
            # train D
            D_loss = model.train_D(real_img, fake_img)

        G_loss = model.train_G(bs)

        if (i % 100) == 0:
            model.save(model_name='WGAN_v2')
            z_loss = sample_z(64, 100)
            g_loss = model.generate(sample_z(32, 100))
            g, d = model.sess.run([model.G_loss, model.D_loss],
                                  feed_dict={
                                      model.xs: ds.random_sample(32),
                                      model.gs: g_loss,
                                      model.zs: z_loss
                                  })
            print(str(i) + ' iteration:')
            print('D_loss:', d)
            print('G_loss:', g, '\n')

        i = i + 1
Esempio n. 14
0
    samples = []
    for it in tqdm(range(config.num_iterations)):
        d_infos = []
        for d_index in range(config.d_steps):
            d_info = d_loop(G, D, d_optimizer, criterion)
            d_infos.append(d_info)
        d_infos = np.mean(d_infos, 0)
        d_real_loss, d_fake_loss = d_infos

        g_infos = []
        for g_index in range(config.g_steps):
            g_info = g_loop(G, D, g_optimizer, d_optimizer, criterion)
            g_infos.append(g_info)
        g_infos = np.mean(g_infos)
        g_loss = g_infos

        if it % config.log_interval == 0:
            g_fake_data = g_sample()
            samples.append(g_fake_data)
            utils.plot_scatter(points=g_fake_data,
                               centers=dset.centers,
                               title='[{}] Iteration {}'.format(prefix, it),
                               path='{}/samples_{}.png'.format(exp_dir, it))
            print(d_real_loss, d_fake_loss, g_loss)

        raise
    utils.plot_samples(samples,
                       config.log_interval,
                       config.unrolled_steps,
                       path='{}/samples_{}.png'.format(exp_dir, 'final'))
def train_fader_network():
    gpu_id = 1
    use_cuda = False
    # use true instead
    data_dir = 'data'
    sample_every = 10
    test_dir = join(data_dir, 'test-samples')
    encoder_decoder_fpath = join(data_dir, 'weights', 'adver.params')
    discriminator_fpath = join(data_dir, 'weights', 'discr.params')

    train, valid, test = split_train_val_test(data_dir)

    num_attr = train.attribute_names.shape[0]
    encoder_decoder = EncoderDecoder(num_attr, gpu_id=gpu_id)
    discriminator = Discriminator(num_attr)
    if use_cuda:
        encoder_decoder.cuda(gpu_id)
        discriminator.cuda(gpu_id)

    train_iter = DataLoader(train, batch_size=64, shuffle=True, num_workers=8)
    valid_iter = DataLoader(valid, batch_size=64, shuffle=False, num_workers=8)
    test_iter = DataLoader(test, batch_size=64, shuffle=False, num_workers=8)

    # train_iter = DataLoader(train, batch_size=32, shuffle=True, num_workers=8)
    # valid_iter = DataLoader(valid, batch_size=32, shuffle=False, num_workers=8)
    # test_iter  = DataLoader(test, batch_size=32, shuffle=False, num_workers=8)

    max_epochs = 1000
    lr, beta1 = 2e-3, 0.5

    adversarial_optimizer = optim.Adam(encoder_decoder.parameters(),
                                       lr=lr,
                                       betas=(beta1, 0.999))
    discriminator_optimizer = optim.Adam(discriminator.parameters(),
                                         lr=lr,
                                         betas=(beta1, 0.999))
    mse_loss = nn.MSELoss(size_average=True)
    bce_loss = nn.BCELoss(size_average=True)

    num_iters = 5
    lambda_e = np.linspace(0, 1e-4, 500000)

    attribute_classifier = AttributeClassifier(num_attr, use_cuda=False)
    # load classifier instead

    try:
        for epoch in range(1, max_epochs):
            encoder_decoder.train()
            discriminator.train()
            for iteration, (x, yb, yt, _) in enumerate(train_iter, start=1):
                if use_cuda:
                    x = x.cuda(gpu_id)
                    yb, yt = yb.cuda(gpu_id), yt.cuda(gpu_id)
                x, yb, yt = Variable(x), Variable(yb), Variable(yt)

                # changing yb and yt to be the output of the classifier
                yt.data = attribute_classifier(x).data
                yb.data[:, 0] = yt.data
                yb.data[:, 1] = 1 - yt.data

                #print yb.data.cpu().numpy().shape
                #print yt.data.cpu().numpy().shape
                adversarial_optimizer.zero_grad()
                z, x_hat = encoder_decoder(x, yb)

                #if (epoch == 1) or (epoch % sample_every == 0):
                #if (epoch % sample_every == 0):
                #    plot_samples(x, x_hat, prefix='train_%d_%d' % (
                #        epoch, iteration))

                # send the output of the encoder as a new Variable that is not
                # part of the backward pass
                # not sure if this is the correct way to do so
                # https://discuss.pytorch.org/t/how-to-copy-a-variable-in-a-network-graph/1603/9
                z_in = Variable(z.data, requires_grad=False)
                discriminator_optimizer.zero_grad()
                y_hat = discriminator(z_in)

                # adversarial loss
                y_in = Variable(y_hat.data, requires_grad=False)
                le_idx = min(500000 - 1, num_iters)
                le_val = Variable(torch.FloatTensor([lambda_e[le_idx]
                                                     ]).float(),
                                  requires_grad=False)
                if use_cuda:
                    le_val = le_val.cuda(gpu_id)
                advers_loss = mse_loss(x_hat, x) +\
                    le_val * bce_loss(y_in, 1 - yt)
                advers_loss.backward()
                adversarial_optimizer.step()

                # discriminative loss
                discrim_loss = bce_loss(y_hat, yt)
                discrim_loss.backward()
                discriminator_optimizer.step()

                print(' Train epoch %d, iter %d (lambda_e = %.2e)' %
                      (epoch, iteration, le_val.data[0]))
                print('  adv. loss = %.6f' % (advers_loss.data[0]))
                print('  dsc. loss = %.6f' % (discrim_loss.data[0]))

                num_iters += 1

            encoder_decoder.eval()
            discriminator.eval()
            for iteration, (x, yb, yt, _) in enumerate(valid_iter, start=1):
                if use_cuda:
                    x = x.cuda(gpu_id)
                    yb, yt = yb.cuda(gpu_id), yt.cuda(gpu_id)
                x, yb, yt = Variable(x), Variable(yb), Variable(yt)
                yt.data = attribute_classifier(x).data
                yb.data[:, 0] = yt.data
                yb.data[:, 1] = 1 - yt.data
                z, x_hat = encoder_decoder(x, yb)

                #plot_samples(x, x_hat, prefix='valid_%d_%d' % (
                #    epoch, iteration))

                z_in = Variable(z.data, requires_grad=False)
                y_hat = discriminator(z_in)

                y_in = Variable(y_hat.data, requires_grad=False)
                valid_advers_loss = mse_loss(x_hat, x) +\
                    le_val * bce_loss(y_in, 1 - yt)
                valid_discrim_loss = bce_loss(y_hat, yt)
                print(' Valid epoch %d, iter %d (lambda_e = %.2e)' %
                      (epoch, iteration, le_val.data[0]))
                print('  adv. loss = %.6f' % (valid_advers_loss.data[0]))
                print('  dsc. loss = %.6f' % (valid_discrim_loss.data[0]))

            if (epoch % sample_every == 0):
                encoder_decoder.eval()
                for iteration, (x, yb, ys, fp) in enumerate(test_iter, 1):
                    # randomly choose an attribute and swap the targets
                    to_swap = np.random.choice(test.attribute_names)
                    swap_idx, = np.where(test.attribute_names == to_swap)[0]
                    # map (0, 1) --> (1, 0), and (1, 0) --> (0, 1)
                    yb[:, 2 * swap_idx] = 1 - yb[:, 2 * swap_idx]
                    yb[:, 2 * swap_idx + 1] = 1 - yb[:, 2 * swap_idx + 1]
                    if use_cuda:
                        x, yb = x.cuda(gpu_id), yb.cuda(gpu_id)
                    x, yb = Variable(x), Variable(yb)

                    yt.data = attribute_classifier(x).data
                    yb.data[:, 0] = yt.data
                    yb.data[:, 1] = 1 - yt.data

                    _, x_hat = encoder_decoder(x, yb)
                    sample_dir = join(test_dir, '%s' % epoch, '%s' % to_swap)
                    if not exists(sample_dir):
                        makedirs(sample_dir)
                    fnames = ['%s.png' % splitext(basename(f))[0] for f in fp]
                    fpaths = [join(sample_dir, f) for f in fnames]
                    plot_samples(x, x_hat, fpaths)
    except KeyboardInterrupt:
        print('Caught Ctrl-C, interrupting training.')
    except RuntimeError:
        print('RuntimeError')
    print('Saving encoder/decoder parameters to %s' % (encoder_decoder_fpath))
    torch.save(encoder_decoder.state_dict(), encoder_decoder_fpath)
    print('Saving discriminator parameters to %s' % (discriminator_fpath))
    torch.save(discriminator.state_dict(), discriminator_fpath)
Esempio n. 16
0
def main(args):

    print(args)

    if args.push_to_hub:
        login_to_hub()

    if not isinstance(args.workers, int):
        args.workers = min(16, mp.cpu_count())

    torch.backends.cudnn.benchmark = True

    st = time.time()
    val_set = DetectionDataset(
        img_folder=os.path.join(args.val_path, "images"),
        label_path=os.path.join(args.val_path, "labels.json"),
        sample_transforms=T.SampleCompose(([
            T.Resize((args.input_size, args.input_size),
                     preserve_aspect_ratio=True,
                     symmetric_pad=True)
        ] if not args.rotation or args.eval_straight else []) + ([
            T.Resize(args.input_size, preserve_aspect_ratio=True
                     ),  # This does not pad
            T.RandomRotate(90, expand=True),
            T.Resize((args.input_size, args.input_size),
                     preserve_aspect_ratio=True,
                     symmetric_pad=True),
        ] if args.rotation and not args.eval_straight else [])),
        use_polygons=args.rotation and not args.eval_straight,
    )
    val_loader = DataLoader(
        val_set,
        batch_size=args.batch_size,
        drop_last=False,
        num_workers=args.workers,
        sampler=SequentialSampler(val_set),
        pin_memory=torch.cuda.is_available(),
        collate_fn=val_set.collate_fn,
    )
    print(
        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
        f"{len(val_loader)} batches)")
    with open(os.path.join(args.val_path, "labels.json"), "rb") as f:
        val_hash = hashlib.sha256(f.read()).hexdigest()

    batch_transforms = Normalize(mean=(0.798, 0.785, 0.772),
                                 std=(0.264, 0.2749, 0.287))

    # Load doctr model
    model = detection.__dict__[args.arch](
        pretrained=args.pretrained, assume_straight_pages=not args.rotation)

    # Resume weights
    if isinstance(args.resume, str):
        print(f"Resuming {args.resume}")
        checkpoint = torch.load(args.resume, map_location="cpu")
        model.load_state_dict(checkpoint)

    # GPU
    if isinstance(args.device, int):
        if not torch.cuda.is_available():
            raise AssertionError(
                "PyTorch cannot access your GPU. Please investigate!")
        if args.device >= torch.cuda.device_count():
            raise ValueError("Invalid device index")
    # Silent default switch to GPU if available
    elif torch.cuda.is_available():
        args.device = 0
    else:
        logging.warning("No accessible GPU, targe device set to CPU.")
    if torch.cuda.is_available():
        torch.cuda.set_device(args.device)
        model = model.cuda()

    # Metrics
    val_metric = LocalizationConfusion(use_polygons=args.rotation
                                       and not args.eval_straight,
                                       mask_shape=(args.input_size,
                                                   args.input_size))

    if args.test_only:
        print("Running evaluation")
        val_loss, recall, precision, mean_iou = evaluate(model,
                                                         val_loader,
                                                         batch_transforms,
                                                         val_metric,
                                                         amp=args.amp)
        print(
            f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | "
            f"Mean IoU: {mean_iou:.2%})")
        return

    st = time.time()
    # Load both train and val data generators
    train_set = DetectionDataset(
        img_folder=os.path.join(args.train_path, "images"),
        label_path=os.path.join(args.train_path, "labels.json"),
        img_transforms=Compose([
            # Augmentations
            T.RandomApply(T.ColorInversion(), 0.1),
            ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3,
                        hue=0.02),
        ]),
        sample_transforms=T.SampleCompose(([
            T.Resize((args.input_size, args.input_size),
                     preserve_aspect_ratio=True,
                     symmetric_pad=True)
        ] if not args.rotation else []) + ([
            T.Resize(args.input_size, preserve_aspect_ratio=True),
            T.RandomRotate(90, expand=True),
            T.Resize((args.input_size, args.input_size),
                     preserve_aspect_ratio=True,
                     symmetric_pad=True),
        ] if args.rotation else [])),
        use_polygons=args.rotation,
    )

    train_loader = DataLoader(
        train_set,
        batch_size=args.batch_size,
        drop_last=True,
        num_workers=args.workers,
        sampler=RandomSampler(train_set),
        pin_memory=torch.cuda.is_available(),
        collate_fn=train_set.collate_fn,
    )
    print(
        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
        f"{len(train_loader)} batches)")
    with open(os.path.join(args.train_path, "labels.json"), "rb") as f:
        train_hash = hashlib.sha256(f.read()).hexdigest()

    if args.show_samples:
        x, target = next(iter(train_loader))
        plot_samples(x, target)
        return

    # Backbone freezing
    if args.freeze_backbone:
        for p in model.feat_extractor.parameters():
            p.reguires_grad_(False)

    # Optimizer
    optimizer = torch.optim.Adam(
        [p for p in model.parameters() if p.requires_grad],
        args.lr,
        betas=(0.95, 0.99),
        eps=1e-6,
        weight_decay=args.weight_decay,
    )
    # LR Finder
    if args.find_lr:
        lrs, losses = record_lr(model,
                                train_loader,
                                batch_transforms,
                                optimizer,
                                amp=args.amp)
        plot_recorder(lrs, losses)
        return
    # Scheduler
    if args.sched == "cosine":
        scheduler = CosineAnnealingLR(optimizer,
                                      args.epochs * len(train_loader),
                                      eta_min=args.lr / 25e4)
    elif args.sched == "onecycle":
        scheduler = OneCycleLR(optimizer, args.lr,
                               args.epochs * len(train_loader))

    # Training monitoring
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name

    # W&B
    if args.wb:

        run = wandb.init(
            name=exp_name,
            project="text-detection",
            config={
                "learning_rate": args.lr,
                "epochs": args.epochs,
                "weight_decay": args.weight_decay,
                "batch_size": args.batch_size,
                "architecture": args.arch,
                "input_size": args.input_size,
                "optimizer": "adam",
                "framework": "pytorch",
                "scheduler": args.sched,
                "train_hash": train_hash,
                "val_hash": val_hash,
                "pretrained": args.pretrained,
                "rotation": args.rotation,
                "amp": args.amp,
            },
        )

    # Create loss queue
    min_loss = np.inf

    # Training loop
    mb = master_bar(range(args.epochs))
    for epoch in mb:
        fit_one_epoch(model,
                      train_loader,
                      batch_transforms,
                      optimizer,
                      scheduler,
                      mb,
                      amp=args.amp)
        # Validation loop at the end of each epoch
        val_loss, recall, precision, mean_iou = evaluate(model,
                                                         val_loader,
                                                         batch_transforms,
                                                         val_metric,
                                                         amp=args.amp)
        if val_loss < min_loss:
            print(
                f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state..."
            )
            torch.save(model.state_dict(), f"./{exp_name}.pt")
            min_loss = val_loss
        log_msg = f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} "
        if any(val is None for val in (recall, precision, mean_iou)):
            log_msg += "(Undefined metric value, caused by empty GTs or predictions)"
        else:
            log_msg += f"(Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%})"
        mb.write(log_msg)
        # W&B
        if args.wb:
            wandb.log({
                "val_loss": val_loss,
                "recall": recall,
                "precision": precision,
                "mean_iou": mean_iou,
            })

    if args.wb:
        run.finish()

    if args.push_to_hub:
        push_to_hf_hub(model, exp_name, task="detection", run_config=args)
Esempio n. 17
0
def main(args):
    print(args)

    if not isinstance(args.workers, int):
        args.workers = min(16, mp.cpu_count())

    torch.backends.cudnn.benchmark = True

    st = time.time()
    val_set = DocArtefacts(
        train=False,
        download=True,
        img_transforms=T.Resize((args.input_size, args.input_size)),
    )
    val_loader = DataLoader(
        val_set,
        batch_size=args.batch_size,
        drop_last=False,
        num_workers=args.workers,
        sampler=SequentialSampler(val_set),
        pin_memory=torch.cuda.is_available(),
        collate_fn=val_set.collate_fn,
    )
    print(
        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
        f"{len(val_loader)} batches)")

    # Load doctr model
    model = obj_detection.__dict__[args.arch](pretrained=args.pretrained,
                                              num_classes=5)

    # Resume weights
    if isinstance(args.resume, str):
        print(f"Resuming {args.resume}")
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint)

    # GPU
    if isinstance(args.device, int):
        if not torch.cuda.is_available():
            raise AssertionError(
                "PyTorch cannot access your GPU. Please investigate!")
        if args.device >= torch.cuda.device_count():
            raise ValueError("Invalid device index")
    # Silent default switch to GPU if available
    elif torch.cuda.is_available():
        args.device = 0
    else:
        logging.warning("No accessible GPU, target device set to CPU.")
    if torch.cuda.is_available():
        torch.cuda.set_device(args.device)
        model = model.cuda()

    # Metrics
    metric = DetectionMetric(iou_thresh=0.5)

    if args.test_only:
        print("Running evaluation")
        recall, precision, mean_iou = evaluate(model,
                                               val_loader,
                                               metric,
                                               amp=args.amp)
        print(
            f"Recall: {recall:.2%} | Precision: {precision:.2%} |IoU: {mean_iou:.2%}"
        )
        return

    st = time.time()
    # Load train data generators
    train_set = DocArtefacts(
        train=True,
        download=True,
        img_transforms=Compose([
            T.Resize((args.input_size, args.input_size)),
            T.RandomApply(T.GaussianNoise(0., 0.25), p=0.5),
            ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3,
                        hue=0.02),
            T.RandomApply(GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 3)),
                          .3),
        ]),
        sample_transforms=T.RandomHorizontalFlip(p=0.5),
    )
    train_loader = DataLoader(
        train_set,
        batch_size=args.batch_size,
        drop_last=True,
        num_workers=args.workers,
        sampler=RandomSampler(train_set),
        pin_memory=torch.cuda.is_available(),
        collate_fn=train_set.collate_fn,
    )
    print(
        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
        f"{len(train_loader)} batches)")

    if args.show_samples:
        images, targets = next(iter(train_loader))
        targets = convert_to_abs_coords(targets, images.shape)
        plot_samples(images, targets, train_set.CLASSES)
        return

    # Backbone freezing
    if args.freeze_backbone:
        for p in model.backbone.parameters():
            p.reguires_grad_(False)

    # Optimizer
    optimizer = optim.SGD([p for p in model.parameters() if p.requires_grad],
                          lr=args.lr,
                          weight_decay=args.weight_decay)
    # LR Finder
    if args.find_lr:
        lrs, losses = record_lr(model, train_loader, optimizer, amp=args.amp)
        plot_recorder(lrs, losses)
        return
    # Scheduler
    scheduler = StepLR(optimizer, step_size=8, gamma=0.7)

    # Training monitoring
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name

    # W&B
    if args.wb:
        run = wandb.init(name=exp_name,
                         project="object-detection",
                         config={
                             "learning_rate": args.lr,
                             "epochs": args.epochs,
                             "weight_decay": args.weight_decay,
                             "batch_size": args.batch_size,
                             "architecture": args.arch,
                             "input_size": args.input_size,
                             "optimizer": "sgd",
                             "framework": "pytorch",
                             "scheduler": "step",
                             "pretrained": args.pretrained,
                             "amp": args.amp,
                         })

    mb = master_bar(range(args.epochs))
    max_score = 0.

    for epoch in mb:
        fit_one_epoch(model,
                      train_loader,
                      optimizer,
                      scheduler,
                      mb,
                      amp=args.amp)
        # Validation loop at the end of each epoch
        recall, precision, mean_iou = evaluate(model,
                                               val_loader,
                                               metric,
                                               amp=args.amp)
        f1_score = 2 * precision * recall / (precision + recall) if (
            precision + recall) > 0 else 0.

        if f1_score > max_score:
            print(
                f"Validation metric increased {max_score:.6} --> {f1_score:.6}: saving state..."
            )
            torch.save(model.state_dict(), f"./{exp_name}.pt")
            max_score = f1_score
        log_msg = f"Epoch {epoch + 1}/{args.epochs} - "
        if any(val is None for val in (recall, precision, mean_iou)):
            log_msg += "Undefined metric value, caused by empty GTs or predictions"
        else:
            log_msg += f"Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%}"
        mb.write(log_msg)
        # W&B
        if args.wb:
            wandb.log({
                'recall': recall,
                'precision': precision,
                'mean_iou': mean_iou,
            })

    if args.wb:
        run.finish()
Esempio n. 18
0
            target: _train[i * batch_size: (i + 1) * batch_size]
        })
    test_fn = theano.function(
        [i], ll,
        givens={
            target: _test[i * batch_size: (i + 1) * batch_size]
        })

    num_train_batches = train.shape[0] / batch_size
    num_test_batches = test.shape[0] / batch_size

    for e in xrange(30):
        train_errs = []
        test_errs = []
        for idx in xrange(num_train_batches):
            train_errs.append(train_fn(idx))
        for idx in xrange(num_test_batches):
            test_errs.append(test_fn(idx))
        print 'epoch', e, 'train err', np.mean(train_errs), 'test err', np.mean(test_errs)

    sample_reconstructions(test, recon, target)

    # construct separate decoder
    z_input = T.matrix()
    single_decoder = lasagne.layers.InputLayer((None, latent_size), z_input)
    single_decoder = lasagne.layers.DenseLayer(single_decoder, num_units=100, nonlinearity=rectify, W=decoder1.W, b=decoder1.b)
    single_decoder = lasagne.layers.DenseLayer(single_decoder, num_units=100, nonlinearity=rectify, W=decoder2.W, b=decoder2.b)
    decode = theano.function([z_input], lasagne.layers.get_output(single_decoder))

    plot_samples(decode)
def train_geometric_matching():
    trans_params = {
        'rotation': (0, 0),
        'offset': (0, 0),
        'flip': (False, False),
        'shear': (0., 0.),
        'stretch': (1. / 2, 2),
    }

    print('building model')
    layers = vgg16.build_model((None, 3, 227, 227))

    # file to store the learned weights
    weightsfile = join('weights', 'weights.pickle')

    # initialize the feature extraction layers
    pretrainfile = join('weights', 'vgg16.pkl')
    print('initializing feature extraction layers from %s' % (pretrainfile))
    with open(pretrainfile, 'rb') as f:
        data = pickle.load(f)
    # weights are tied, no need to initialize a and b
    set_all_param_values(layers['pool4a'], data['param values'][0:20])

    # used to initialize from learned weights
    #with open(weightsfile, 'rb') as f:
    #    param_values = pickle.load(f)
    #set_all_param_values(layers['trans'], param_values)

    mean = data['mean value']

    max_epochs = 5000
    batch_size = 16
    sample_every = 25  # visualizes network output every n epochs
    sample_dir = join('data', 'samples')

    # set this to point to the root of Pascal VOC-2011
    voc_fpath = '/media/hdd/hendrik/datasets/pascal-2011'
    train_fpaths, valid_fpaths = utils.train_val_split(voc_fpath)

    print('compiling theano functions for training')
    train_func = theano_funcs.create_train_func(layers)
    print('compiling theano functions for validation')
    valid_func = theano_funcs.create_valid_func(layers)

    try:
        for epoch in range(1, max_epochs + 1):
            print('epoch %d' % (epoch))
            train_losses = []
            num_train_idx = (len(train_fpaths) + batch_size - 1) / batch_size
            train_iter = utils.get_batch_idx(len(train_fpaths), batch_size)
            for i, idx in tqdm(train_iter, total=num_train_idx, leave=False):
                X_crop_train, X_warp_train, M_train =\
                    utils.prepare_synth_batch(train_fpaths[idx], mean,
                                              trans_params)
                M, train_loss = train_func(X_crop_train, X_warp_train, M_train)
                train_losses.append(train_loss)
                if epoch % sample_every == 0:
                    utils.plot_samples(X_crop_train,
                                       X_warp_train,
                                       M,
                                       mean,
                                       prefix=join(sample_dir, 'train_%d' % i))
            print(' train loss = %.6f' % (np.mean(train_losses)))

            valid_losses = []
            num_valid_idx = (len(valid_fpaths) + batch_size - 1) / batch_size
            valid_iter = utils.get_batch_idx(len(valid_fpaths), batch_size)
            for i, idx in tqdm(valid_iter, total=num_valid_idx, leave=False):
                X_crop_valid, X_warp_valid, M_valid =\
                    utils.prepare_synth_batch(valid_fpaths[idx], mean,
                                              trans_params)
                M, valid_loss = valid_func(X_crop_valid, X_warp_valid, M_valid)
                valid_losses.append(valid_loss)
                if epoch % sample_every == 0:
                    utils.plot_samples(X_crop_valid,
                                       X_warp_valid,
                                       M,
                                       mean,
                                       prefix=join(sample_dir, 'valid_%d' % i))
            print(' valid loss = %.6f' % (np.mean(valid_losses)))
    except KeyboardInterrupt:
        print('caught ctrl-c, stopped training')

    print('saving weights to %s' % (weightsfile))
    weights = get_all_param_values(layers['trans'])
    with open(weightsfile, 'wb') as f:
        pickle.dump(weights, f, protocol=pickle.HIGHEST_PROTOCOL)
Esempio n. 20
0
def train(epoch,
          model,
          train_loader,
          optimizer,
          writer,
          sigma_0,
          lr_sigma,
          iters_sig,
          gaussian_num=1,
          lamda=0.0,
          gamma=0.0,
          gaussian_num_ds=1):
    model = model.train()
    train_loss = 0
    total = 0
    correct = 0
    # CE_loss = nn.CrossEntropyLoss()
    for batch_idx, (batch, targets, idx) in enumerate(train_loader):
        optimizer.zero_grad()

        batch_size = len(idx)
        batch = batch.to(device)
        targets = targets.to(device)

        # model.eval()
        sigma, _ = get_sigma(model,
                             batch,
                             lr_sigma,
                             sigma_0[idx],
                             iters_sig,
                             device,
                             gaussian_num=gaussian_num_ds)
        # model.train()
        sigma_0[idx] = sigma  # updating sigma

        #repeating the input for computing the macer loss
        new_shape = [batch_size * gaussian_num]
        new_shape.extend(batch[0].shape)
        batch = batch.repeat((1, gaussian_num, 1, 1)).view(new_shape)
        #repeating sigmas to do the monte carlo
        sigma_repeated = sigma.repeat(
            (1, gaussian_num, 1, 1)).view(-1, 1, 1, 1)
        noise = torch.randn_like(batch) * sigma_repeated

        batch_corrupted = batch + noise

        outputs_softmax = model(batch_corrupted).reshape(
            batch_size, gaussian_num, 1000).mean(1)  #1000 here is for ImageNet
        # clean_output = model(batch)

        total_loss = compute_loss(outputs_softmax, targets)
        if torch.isnan(outputs_softmax).any() or torch.isnan(total_loss).any():
            print('F**k')
        total_loss += lamda * macer_loss(outputs_softmax, targets, sigma,
                                         gamma)
        # clean_loss = compute_loss(clean_output, targets)
        # total_loss += clean_loss

        train_loss += total_loss.item() * len(batch)
        _, predicted = outputs_softmax.max(1)
        total += targets.size(0)
        correct += predicted.eq(targets).sum().item()

        # update parameters
        total_loss.backward()
        optimizer.step()

        if batch_idx % 100 == 0:
            print(
                '+ Epoch: {}. Iter: [{}/{} ({:.0f}%)]. Loss: {}. Accuracy: {}'.
                format(epoch, batch_idx * len(batch),
                       len(train_loader.dataset),
                       100. * batch_idx / len(train_loader),
                       train_loss / total, 100. * correct / total))

    n = min(batch.size(0), 8)
    comparison = torch.cat([batch[:n], batch_corrupted[:n]])
    comparison = torch.clamp(comparison, min=0, max=1)
    fig = plot_samples(comparison.detach().cpu().numpy().transpose(
        0, 2, 3, 1).squeeze(),
                       h=2,
                       w=n)

    writer.add_figure('sample of noisy trained examples', fig, epoch)
    writer.add_scalar('loss/train_loss', train_loss / total, epoch)
    writer.add_scalar('accuracy/train_accuracy', 100. * correct / total, epoch)
    writer.add_scalar('sigma/train_sigma_mean', sigma_0.mean().item(), epoch)
    writer.add_scalar('sigma/train_sigma_min', sigma_0.min().item(), epoch)
    writer.add_scalar('sigma/train_sigma_max', sigma_0.max().item(), epoch)

    return sigma_0
Esempio n. 21
0
from utils import plot_samples
from train import train_loop
from test import test_loop
import torch.optim as optim
import torch.nn as nn



#model = Model7()
model = ResNet18()
show_model_summary(model.to(DEVICE), (3, 32, 32))

# Constants, put in config
epochs = 50
cuda_batch_size=128
cpu_batch_size = 4
num_workers = 4

# ToDo: Create separate transforms for train and test...
#transforms = model7_transforms()
(train_loader, test_loader, classes) = load_cifar10(model9_resnet_train_transforms(), model9_resnet_test_transforms(),
                             cuda_batch_size, cpu_batch_size, num_workers)

plot_samples(train_loader)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.009, momentum=0.9)

train_loop(epochs, train_loader, model, DEVICE, optimizer, criterion, None, False)
test_loop(test_loader, model, DEVICE, criterion)
Esempio n. 22
0
def main(args):

    print(args)

    if args.push_to_hub:
        login_to_hub()

    if not isinstance(args.workers, int):
        args.workers = min(16, mp.cpu_count())

    torch.backends.cudnn.benchmark = True

    vocab = VOCABS[args.vocab]
    fonts = args.font.split(",")

    # Load val data generator
    st = time.time()
    if isinstance(args.val_path, str):
        with open(os.path.join(args.val_path, "labels.json"), "rb") as f:
            val_hash = hashlib.sha256(f.read()).hexdigest()

        val_set = RecognitionDataset(
            img_folder=os.path.join(args.val_path, "images"),
            labels_path=os.path.join(args.val_path, "labels.json"),
            img_transforms=T.Resize((args.input_size, 4 * args.input_size),
                                    preserve_aspect_ratio=True),
        )
    else:
        val_hash = None
        # Load synthetic data generator
        val_set = WordGenerator(
            vocab=vocab,
            min_chars=args.min_chars,
            max_chars=args.max_chars,
            num_samples=args.val_samples * len(vocab),
            font_family=fonts,
            img_transforms=Compose([
                T.Resize((args.input_size, 4 * args.input_size),
                         preserve_aspect_ratio=True),
                # Ensure we have a 90% split of white-background images
                T.RandomApply(T.ColorInversion(), 0.9),
            ]),
        )

    val_loader = DataLoader(
        val_set,
        batch_size=args.batch_size,
        drop_last=False,
        num_workers=args.workers,
        sampler=SequentialSampler(val_set),
        pin_memory=torch.cuda.is_available(),
        collate_fn=val_set.collate_fn,
    )
    print(
        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
        f"{len(val_loader)} batches)")

    batch_transforms = Normalize(mean=(0.694, 0.695, 0.693),
                                 std=(0.299, 0.296, 0.301))

    # Load doctr model
    model = recognition.__dict__[args.arch](pretrained=args.pretrained,
                                            vocab=vocab)

    # Resume weights
    if isinstance(args.resume, str):
        print(f"Resuming {args.resume}")
        checkpoint = torch.load(args.resume, map_location="cpu")
        model.load_state_dict(checkpoint)

    # GPU
    if isinstance(args.device, int):
        if not torch.cuda.is_available():
            raise AssertionError(
                "PyTorch cannot access your GPU. Please investigate!")
        if args.device >= torch.cuda.device_count():
            raise ValueError("Invalid device index")
    # Silent default switch to GPU if available
    elif torch.cuda.is_available():
        args.device = 0
    else:
        logging.warning("No accessible GPU, targe device set to CPU.")
    if torch.cuda.is_available():
        torch.cuda.set_device(args.device)
        model = model.cuda()

    # Metrics
    val_metric = TextMatch()

    if args.test_only:
        print("Running evaluation")
        val_loss, exact_match, partial_match = evaluate(model,
                                                        val_loader,
                                                        batch_transforms,
                                                        val_metric,
                                                        amp=args.amp)
        print(
            f"Validation loss: {val_loss:.6} (Exact: {exact_match:.2%} | Partial: {partial_match:.2%})"
        )
        return

    st = time.time()

    if isinstance(args.train_path, str):
        # Load train data generator
        base_path = Path(args.train_path)
        parts = ([base_path]
                 if base_path.joinpath("labels.json").is_file() else
                 [base_path.joinpath(sub) for sub in os.listdir(base_path)])
        with open(parts[0].joinpath("labels.json"), "rb") as f:
            train_hash = hashlib.sha256(f.read()).hexdigest()

        train_set = RecognitionDataset(
            parts[0].joinpath("images"),
            parts[0].joinpath("labels.json"),
            img_transforms=Compose([
                T.Resize((args.input_size, 4 * args.input_size),
                         preserve_aspect_ratio=True),
                # Augmentations
                T.RandomApply(T.ColorInversion(), 0.1),
                ColorJitter(brightness=0.3,
                            contrast=0.3,
                            saturation=0.3,
                            hue=0.02),
            ]),
        )
        if len(parts) > 1:
            for subfolder in parts[1:]:
                train_set.merge_dataset(
                    RecognitionDataset(subfolder.joinpath("images"),
                                       subfolder.joinpath("labels.json")))
    else:
        train_hash = None
        # Load synthetic data generator
        train_set = WordGenerator(
            vocab=vocab,
            min_chars=args.min_chars,
            max_chars=args.max_chars,
            num_samples=args.train_samples * len(vocab),
            font_family=fonts,
            img_transforms=Compose([
                T.Resize((args.input_size, 4 * args.input_size),
                         preserve_aspect_ratio=True),
                # Ensure we have a 90% split of white-background images
                T.RandomApply(T.ColorInversion(), 0.9),
                ColorJitter(brightness=0.3,
                            contrast=0.3,
                            saturation=0.3,
                            hue=0.02),
            ]),
        )

    train_loader = DataLoader(
        train_set,
        batch_size=args.batch_size,
        drop_last=True,
        num_workers=args.workers,
        sampler=RandomSampler(train_set),
        pin_memory=torch.cuda.is_available(),
        collate_fn=train_set.collate_fn,
    )
    print(
        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
        f"{len(train_loader)} batches)")

    if args.show_samples:
        x, target = next(iter(train_loader))
        plot_samples(x, target)
        return

    # Optimizer
    optimizer = torch.optim.Adam(
        [p for p in model.parameters() if p.requires_grad],
        args.lr,
        betas=(0.95, 0.99),
        eps=1e-6,
        weight_decay=args.weight_decay,
    )
    # LR Finder
    if args.find_lr:
        lrs, losses = record_lr(model,
                                train_loader,
                                batch_transforms,
                                optimizer,
                                amp=args.amp)
        plot_recorder(lrs, losses)
        return
    # Scheduler
    if args.sched == "cosine":
        scheduler = CosineAnnealingLR(optimizer,
                                      args.epochs * len(train_loader),
                                      eta_min=args.lr / 25e4)
    elif args.sched == "onecycle":
        scheduler = OneCycleLR(optimizer, args.lr,
                               args.epochs * len(train_loader))

    # Training monitoring
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name

    # W&B
    if args.wb:

        run = wandb.init(
            name=exp_name,
            project="text-recognition",
            config={
                "learning_rate": args.lr,
                "epochs": args.epochs,
                "weight_decay": args.weight_decay,
                "batch_size": args.batch_size,
                "architecture": args.arch,
                "input_size": args.input_size,
                "optimizer": "adam",
                "framework": "pytorch",
                "scheduler": args.sched,
                "vocab": args.vocab,
                "train_hash": train_hash,
                "val_hash": val_hash,
                "pretrained": args.pretrained,
            },
        )

    # Create loss queue
    min_loss = np.inf
    # Training loop
    mb = master_bar(range(args.epochs))
    for epoch in mb:
        fit_one_epoch(model,
                      train_loader,
                      batch_transforms,
                      optimizer,
                      scheduler,
                      mb,
                      amp=args.amp)

        # Validation loop at the end of each epoch
        val_loss, exact_match, partial_match = evaluate(model,
                                                        val_loader,
                                                        batch_transforms,
                                                        val_metric,
                                                        amp=args.amp)
        if val_loss < min_loss:
            print(
                f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state..."
            )
            torch.save(model.state_dict(), f"./{exp_name}.pt")
            min_loss = val_loss
        mb.write(
            f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} "
            f"(Exact: {exact_match:.2%} | Partial: {partial_match:.2%})")
        # W&B
        if args.wb:
            wandb.log({
                "val_loss": val_loss,
                "exact_match": exact_match,
                "partial_match": partial_match,
            })

    if args.wb:
        run.finish()

    if args.push_to_hub:
        push_to_hf_hub(model, exp_name, task="recognition", run_config=args)
Esempio n. 23
0
def main(args):

    print(args)

    if args.push_to_hub:
        login_to_hub()

    if not isinstance(args.workers, int):
        args.workers = min(16, mp.cpu_count())

    # AMP
    if args.amp:
        mixed_precision.set_global_policy("mixed_float16")

    st = time.time()
    val_set = DetectionDataset(
        img_folder=os.path.join(args.val_path, "images"),
        label_path=os.path.join(args.val_path, "labels.json"),
        sample_transforms=T.SampleCompose(([
            T.Resize((args.input_size, args.input_size),
                     preserve_aspect_ratio=True,
                     symmetric_pad=True)
        ] if not args.rotation or args.eval_straight else []) + ([
            T.Resize(args.input_size, preserve_aspect_ratio=True
                     ),  # This does not pad
            T.RandomRotate(90, expand=True),
            T.Resize((args.input_size, args.input_size),
                     preserve_aspect_ratio=True,
                     symmetric_pad=True),
        ] if args.rotation and not args.eval_straight else [])),
        use_polygons=args.rotation and not args.eval_straight,
    )
    val_loader = DataLoader(
        val_set,
        batch_size=args.batch_size,
        shuffle=False,
        drop_last=False,
        num_workers=args.workers,
    )
    print(
        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
        f"{val_loader.num_batches} batches)")
    with open(os.path.join(args.val_path, "labels.json"), "rb") as f:
        val_hash = hashlib.sha256(f.read()).hexdigest()

    batch_transforms = T.Compose([
        T.Normalize(mean=(0.798, 0.785, 0.772), std=(0.264, 0.2749, 0.287)),
    ])

    # Load doctr model
    model = detection.__dict__[args.arch](
        pretrained=args.pretrained,
        input_shape=(args.input_size, args.input_size, 3),
        assume_straight_pages=not args.rotation,
    )

    # Resume weights
    if isinstance(args.resume, str):
        model.load_weights(args.resume)

    # Metrics
    val_metric = LocalizationConfusion(use_polygons=args.rotation
                                       and not args.eval_straight,
                                       mask_shape=(args.input_size,
                                                   args.input_size))
    if args.test_only:
        print("Running evaluation")
        val_loss, recall, precision, mean_iou = evaluate(
            model, val_loader, batch_transforms, val_metric)
        print(
            f"Validation loss: {val_loss:.6} (Recall: {recall:.2%} | Precision: {precision:.2%} | "
            f"Mean IoU: {mean_iou:.2%})")
        return

    st = time.time()
    # Load both train and val data generators
    train_set = DetectionDataset(
        img_folder=os.path.join(args.train_path, "images"),
        label_path=os.path.join(args.train_path, "labels.json"),
        img_transforms=T.Compose([
            # Augmentations
            T.RandomApply(T.ColorInversion(), 0.1),
            T.RandomJpegQuality(60),
            T.RandomSaturation(0.3),
            T.RandomContrast(0.3),
            T.RandomBrightness(0.3),
        ]),
        sample_transforms=T.SampleCompose(([
            T.Resize((args.input_size, args.input_size),
                     preserve_aspect_ratio=True,
                     symmetric_pad=True)
        ] if not args.rotation else []) + ([
            T.Resize(args.input_size, preserve_aspect_ratio=True
                     ),  # This does not pad
            T.RandomRotate(90, expand=True),
            T.Resize((args.input_size, args.input_size),
                     preserve_aspect_ratio=True,
                     symmetric_pad=True),
        ] if args.rotation else [])),
        use_polygons=args.rotation,
    )
    train_loader = DataLoader(
        train_set,
        batch_size=args.batch_size,
        shuffle=True,
        drop_last=True,
        num_workers=args.workers,
    )
    print(
        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
        f"{train_loader.num_batches} batches)")
    with open(os.path.join(args.train_path, "labels.json"), "rb") as f:
        train_hash = hashlib.sha256(f.read()).hexdigest()

    if args.show_samples:
        x, target = next(iter(train_loader))
        plot_samples(x, target)
        return

    # Optimizer
    scheduler = tf.keras.optimizers.schedules.ExponentialDecay(
        args.lr,
        decay_steps=args.epochs * len(train_loader),
        decay_rate=1 / (25e4),  # final lr as a fraction of initial lr
        staircase=False,
    )
    optimizer = tf.keras.optimizers.Adam(learning_rate=scheduler,
                                         beta_1=0.95,
                                         beta_2=0.99,
                                         epsilon=1e-6,
                                         clipnorm=5)
    if args.amp:
        optimizer = mixed_precision.LossScaleOptimizer(optimizer)
    # LR Finder
    if args.find_lr:
        lrs, losses = record_lr(model,
                                train_loader,
                                batch_transforms,
                                optimizer,
                                amp=args.amp)
        plot_recorder(lrs, losses)
        return

    # Tensorboard to monitor training
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name

    # W&B
    if args.wb:

        run = wandb.init(
            name=exp_name,
            project="text-detection",
            config={
                "learning_rate": args.lr,
                "epochs": args.epochs,
                "weight_decay": 0.0,
                "batch_size": args.batch_size,
                "architecture": args.arch,
                "input_size": args.input_size,
                "optimizer": "adam",
                "framework": "tensorflow",
                "scheduler": "exp_decay",
                "train_hash": train_hash,
                "val_hash": val_hash,
                "pretrained": args.pretrained,
                "rotation": args.rotation,
            },
        )

    if args.freeze_backbone:
        for layer in model.feat_extractor.layers:
            layer.trainable = False

    min_loss = np.inf

    # Training loop
    mb = master_bar(range(args.epochs))
    for epoch in mb:
        fit_one_epoch(model, train_loader, batch_transforms, optimizer, mb,
                      args.amp)
        # Validation loop at the end of each epoch
        val_loss, recall, precision, mean_iou = evaluate(
            model, val_loader, batch_transforms, val_metric)
        if val_loss < min_loss:
            print(
                f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state..."
            )
            model.save_weights(f"./{exp_name}/weights")
            min_loss = val_loss
        log_msg = f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} "
        if any(val is None for val in (recall, precision, mean_iou)):
            log_msg += "(Undefined metric value, caused by empty GTs or predictions)"
        else:
            log_msg += f"(Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%})"
        mb.write(log_msg)
        # W&B
        if args.wb:
            wandb.log({
                "val_loss": val_loss,
                "recall": recall,
                "precision": precision,
                "mean_iou": mean_iou,
            })

    if args.wb:
        run.finish()

    if args.push_to_hub:
        push_to_hf_hub(model, exp_name, task="detection", run_config=args)