예제 #1
0
    def __getitem__(self, index):
        if self.augment_data == False:
            img = cv2.resize(
                cv2.imread(self.train_images +
                           '{}.jpg'.format(self.img_id[index])), (512, 512))
        else:
            img = Image.open(self.train_images +
                             '{}.jpg'.format(self.img_id[index])).resize(
                                 (512, 512))
            color = ColorJitter(brightness=0.4,
                                contrast=0.4,
                                saturation=0.6,
                                hue=0.4)
            blur = GaussianBlur((3, 3), sigma=(0.1, 2))
            img = color(img)
            img = blur(img)
            img = np.array(img)
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)

        img = self.normalize(img)
        img = img.transpose([2, 0, 1])
        heatmap, offset_x, offset_y, object_size_x, object_size_y = generate_heatmap_offset(
            self.img_id[index], self.dictionnary_labels_per_image)
        regr = np.zeros((2, 128, 128))
        offset = np.zeros((2, 128, 128))
        regr[0, :, :] = object_size_x
        regr[1, :, :] = object_size_y

        offset[0, :, :] = offset_x
        offset[1, :, :] = offset_y
        return img, self.img_id[index], heatmap, regr, offset
예제 #2
0
def create_blury_images():
    path = './images/dog2.jpg'
    im = Image.open(path)
    sigma = 2.0
    for i in range(20):
        gb = GaussianBlur(kernel_size=(3 + 2 * i), sigma=sigma)
        im_blur = gb(im)
        im_blur.save(f'./images/dog_blur{i:03d}.png')
예제 #3
0
def low_res_transform(crop_size, upscale_factor):
    return Compose([
        ToPILImage(),
        # Lambda(randomJPEGCompresss),
        GaussianBlur(kernel_size=5, sigma=2),
        # RandomCrop(crop_size, Image.BICUBIC),
        Resize(crop_size // upscale_factor, interpolation=Image.BICUBIC),
        ToTensor()
    ])
예제 #4
0
    def __init__(self, images, objects):
        super(TextDetectionDataset, self).__init__()
        self.images = images
        self.objects = objects

        self.transforms = Compose([
            ColorJitter(),
            GaussianBlur(kernel_size=5),
            Resize((300, 300)),
            ToTensor(),
            Normalize(mean=(0.485, 0.456, 0.406),
                      std=(0.229, 0.224, 0.225))
        ])
예제 #5
0
def main(args):

    print(args)

    if args.push_to_hub:
        login_to_hub()

    if not isinstance(args.workers, int):
        args.workers = min(16, mp.cpu_count())

    torch.backends.cudnn.benchmark = True

    vocab = VOCABS[args.vocab]

    fonts = args.font.split(",")

    # Load val data generator
    st = time.time()
    val_set = CharacterGenerator(
        vocab=vocab,
        num_samples=args.val_samples * len(vocab),
        cache_samples=True,
        img_transforms=Compose(
            [
                T.Resize((args.input_size, args.input_size)),
                # Ensure we have a 90% split of white-background images
                T.RandomApply(T.ColorInversion(), 0.9),
            ]
        ),
        font_family=fonts,
    )
    val_loader = DataLoader(
        val_set,
        batch_size=args.batch_size,
        drop_last=False,
        num_workers=args.workers,
        sampler=SequentialSampler(val_set),
        pin_memory=torch.cuda.is_available(),
    )
    print(f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in " f"{len(val_loader)} batches)")

    batch_transforms = Normalize(mean=(0.694, 0.695, 0.693), std=(0.299, 0.296, 0.301))

    # Load doctr model
    model = classification.__dict__[args.arch](pretrained=args.pretrained, num_classes=len(vocab), classes=list(vocab))

    # Resume weights
    if isinstance(args.resume, str):
        print(f"Resuming {args.resume}")
        checkpoint = torch.load(args.resume, map_location="cpu")
        model.load_state_dict(checkpoint)

    # GPU
    if isinstance(args.device, int):
        if not torch.cuda.is_available():
            raise AssertionError("PyTorch cannot access your GPU. Please investigate!")
        if args.device >= torch.cuda.device_count():
            raise ValueError("Invalid device index")
    # Silent default switch to GPU if available
    elif torch.cuda.is_available():
        args.device = 0
    else:
        logging.warning("No accessible GPU, targe device set to CPU.")
    if torch.cuda.is_available():
        torch.cuda.set_device(args.device)
        model = model.cuda()

    if args.test_only:
        print("Running evaluation")
        val_loss, acc = evaluate(model, val_loader, batch_transforms)
        print(f"Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
        return

    st = time.time()

    # Load train data generator
    train_set = CharacterGenerator(
        vocab=vocab,
        num_samples=args.train_samples * len(vocab),
        cache_samples=True,
        img_transforms=Compose(
            [
                T.Resize((args.input_size, args.input_size)),
                # Augmentations
                T.RandomApply(T.ColorInversion(), 0.9),
                # GaussianNoise
                T.RandomApply(Grayscale(3), 0.1),
                ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3, hue=0.02),
                T.RandomApply(GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 3)), 0.3),
                RandomRotation(15, interpolation=InterpolationMode.BILINEAR),
            ]
        ),
        font_family=fonts,
    )

    train_loader = DataLoader(
        train_set,
        batch_size=args.batch_size,
        drop_last=True,
        num_workers=args.workers,
        sampler=RandomSampler(train_set),
        pin_memory=torch.cuda.is_available(),
    )
    print(f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in " f"{len(train_loader)} batches)")

    if args.show_samples:
        x, target = next(iter(train_loader))
        plot_samples(x, list(map(vocab.__getitem__, target)))
        return

    # Optimizer
    optimizer = torch.optim.Adam(
        [p for p in model.parameters() if p.requires_grad],
        args.lr,
        betas=(0.95, 0.99),
        eps=1e-6,
        weight_decay=args.weight_decay,
    )

    # LR Finder
    if args.find_lr:
        lrs, losses = record_lr(model, train_loader, batch_transforms, optimizer, amp=args.amp)
        plot_recorder(lrs, losses)
        return
    # Scheduler
    if args.sched == "cosine":
        scheduler = CosineAnnealingLR(optimizer, args.epochs * len(train_loader), eta_min=args.lr / 25e4)
    elif args.sched == "onecycle":
        scheduler = OneCycleLR(optimizer, args.lr, args.epochs * len(train_loader))

    # Training monitoring
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name

    # W&B
    if args.wb:

        run = wandb.init(
            name=exp_name,
            project="character-classification",
            config={
                "learning_rate": args.lr,
                "epochs": args.epochs,
                "weight_decay": args.weight_decay,
                "batch_size": args.batch_size,
                "architecture": args.arch,
                "input_size": args.input_size,
                "optimizer": "adam",
                "framework": "pytorch",
                "vocab": args.vocab,
                "scheduler": args.sched,
                "pretrained": args.pretrained,
            },
        )

    # Create loss queue
    min_loss = np.inf
    # Training loop
    mb = master_bar(range(args.epochs))
    for epoch in mb:
        fit_one_epoch(model, train_loader, batch_transforms, optimizer, scheduler, mb)

        # Validation loop at the end of each epoch
        val_loss, acc = evaluate(model, val_loader, batch_transforms)
        if val_loss < min_loss:
            print(f"Validation loss decreased {min_loss:.6} --> {val_loss:.6}: saving state...")
            torch.save(model.state_dict(), f"./{exp_name}.pt")
            min_loss = val_loss
        mb.write(f"Epoch {epoch + 1}/{args.epochs} - Validation loss: {val_loss:.6} (Acc: {acc:.2%})")
        # W&B
        if args.wb:
            wandb.log(
                {
                    "val_loss": val_loss,
                    "acc": acc,
                }
            )

    if args.wb:
        run.finish()

    if args.push_to_hub:
        push_to_hf_hub(model, exp_name, task="classification", run_config=args)

    if args.export_onnx:
        print("Exporting model to ONNX...")
        dummy_batch = next(iter(val_loader))
        dummy_input = dummy_batch[0].cuda() if torch.cuda.is_available() else dummy_batch[0]
        model_path = export_model_to_onnx(model, exp_name, dummy_input)
        print(f"Exported model saved in {model_path}")
예제 #6
0
def main(args):
    print(args)

    if not isinstance(args.workers, int):
        args.workers = min(16, mp.cpu_count())

    torch.backends.cudnn.benchmark = True

    st = time.time()
    val_set = DocArtefacts(
        train=False,
        download=True,
        img_transforms=T.Resize((args.input_size, args.input_size)),
    )
    val_loader = DataLoader(
        val_set,
        batch_size=args.batch_size,
        drop_last=False,
        num_workers=args.workers,
        sampler=SequentialSampler(val_set),
        pin_memory=torch.cuda.is_available(),
        collate_fn=val_set.collate_fn,
    )
    print(
        f"Validation set loaded in {time.time() - st:.4}s ({len(val_set)} samples in "
        f"{len(val_loader)} batches)")

    # Load doctr model
    model = obj_detection.__dict__[args.arch](pretrained=args.pretrained,
                                              num_classes=5)

    # Resume weights
    if isinstance(args.resume, str):
        print(f"Resuming {args.resume}")
        checkpoint = torch.load(args.resume, map_location='cpu')
        model.load_state_dict(checkpoint)

    # GPU
    if isinstance(args.device, int):
        if not torch.cuda.is_available():
            raise AssertionError(
                "PyTorch cannot access your GPU. Please investigate!")
        if args.device >= torch.cuda.device_count():
            raise ValueError("Invalid device index")
    # Silent default switch to GPU if available
    elif torch.cuda.is_available():
        args.device = 0
    else:
        logging.warning("No accessible GPU, target device set to CPU.")
    if torch.cuda.is_available():
        torch.cuda.set_device(args.device)
        model = model.cuda()

    # Metrics
    metric = DetectionMetric(iou_thresh=0.5)

    if args.test_only:
        print("Running evaluation")
        recall, precision, mean_iou = evaluate(model,
                                               val_loader,
                                               metric,
                                               amp=args.amp)
        print(
            f"Recall: {recall:.2%} | Precision: {precision:.2%} |IoU: {mean_iou:.2%}"
        )
        return

    st = time.time()
    # Load train data generators
    train_set = DocArtefacts(
        train=True,
        download=True,
        img_transforms=Compose([
            T.Resize((args.input_size, args.input_size)),
            T.RandomApply(T.GaussianNoise(0., 0.25), p=0.5),
            ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3,
                        hue=0.02),
            T.RandomApply(GaussianBlur(kernel_size=(3, 3), sigma=(0.1, 3)),
                          .3),
        ]),
        sample_transforms=T.RandomHorizontalFlip(p=0.5),
    )
    train_loader = DataLoader(
        train_set,
        batch_size=args.batch_size,
        drop_last=True,
        num_workers=args.workers,
        sampler=RandomSampler(train_set),
        pin_memory=torch.cuda.is_available(),
        collate_fn=train_set.collate_fn,
    )
    print(
        f"Train set loaded in {time.time() - st:.4}s ({len(train_set)} samples in "
        f"{len(train_loader)} batches)")

    if args.show_samples:
        images, targets = next(iter(train_loader))
        targets = convert_to_abs_coords(targets, images.shape)
        plot_samples(images, targets, train_set.CLASSES)
        return

    # Backbone freezing
    if args.freeze_backbone:
        for p in model.backbone.parameters():
            p.reguires_grad_(False)

    # Optimizer
    optimizer = optim.SGD([p for p in model.parameters() if p.requires_grad],
                          lr=args.lr,
                          weight_decay=args.weight_decay)
    # LR Finder
    if args.find_lr:
        lrs, losses = record_lr(model, train_loader, optimizer, amp=args.amp)
        plot_recorder(lrs, losses)
        return
    # Scheduler
    scheduler = StepLR(optimizer, step_size=8, gamma=0.7)

    # Training monitoring
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    exp_name = f"{args.arch}_{current_time}" if args.name is None else args.name

    # W&B
    if args.wb:
        run = wandb.init(name=exp_name,
                         project="object-detection",
                         config={
                             "learning_rate": args.lr,
                             "epochs": args.epochs,
                             "weight_decay": args.weight_decay,
                             "batch_size": args.batch_size,
                             "architecture": args.arch,
                             "input_size": args.input_size,
                             "optimizer": "sgd",
                             "framework": "pytorch",
                             "scheduler": "step",
                             "pretrained": args.pretrained,
                             "amp": args.amp,
                         })

    mb = master_bar(range(args.epochs))
    max_score = 0.

    for epoch in mb:
        fit_one_epoch(model,
                      train_loader,
                      optimizer,
                      scheduler,
                      mb,
                      amp=args.amp)
        # Validation loop at the end of each epoch
        recall, precision, mean_iou = evaluate(model,
                                               val_loader,
                                               metric,
                                               amp=args.amp)
        f1_score = 2 * precision * recall / (precision + recall) if (
            precision + recall) > 0 else 0.

        if f1_score > max_score:
            print(
                f"Validation metric increased {max_score:.6} --> {f1_score:.6}: saving state..."
            )
            torch.save(model.state_dict(), f"./{exp_name}.pt")
            max_score = f1_score
        log_msg = f"Epoch {epoch + 1}/{args.epochs} - "
        if any(val is None for val in (recall, precision, mean_iou)):
            log_msg += "Undefined metric value, caused by empty GTs or predictions"
        else:
            log_msg += f"Recall: {recall:.2%} | Precision: {precision:.2%} | Mean IoU: {mean_iou:.2%}"
        mb.write(log_msg)
        # W&B
        if args.wb:
            wandb.log({
                'recall': recall,
                'precision': precision,
                'mean_iou': mean_iou,
            })

    if args.wb:
        run.finish()
예제 #7
0
def main(args):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    if torch.cuda.is_available():
        torch.backends.cudnn.benchmark = True

    # ImageNet stats for now
    mean, std = [0.485, 0.456, 0.406], [0.229, 0.224, 0.225]

    transform = Compose([
        ToImageMode("RGB"),
        Resize(1024),
        RandomCrop(224),
        RandomHorizontalFlip(p=0.5),
        RandomApply([
            ColorJitter(brightness=0.8, contrast=0.8, saturation=0.8, hue=0.2)
        ],
                    p=0.8),
        RandomApply([GaussianBlur((3, 3), (1.5, 1.5))], p=0.1),
        RandomGrayscale(p=0.2),
        ToTensor(),
        Normalize(mean=mean, std=std),
        ToPatches(16)
    ])

    # Applying the same transform twice will give us
    # different transformations on the same image,
    # because the transformation's rng state changes.
    dataset = ImageDirectory(args.dataset, transform, transform)

    # TODO: hard coded for now, works on my 2x Titan RTX machine
    loader = DataLoader(dataset,
                        batch_size=144,
                        num_workers=40,
                        shuffle=True,
                        pin_memory=True,
                        drop_last=True)

    # We will chop off the final layer anyway,
    # therefore num_classes doesn't matter here.
    online = VisionTransformer(num_classes=1, C=3, H=224, W=224, P=16)
    target = VisionTransformer(num_classes=1, C=3, H=224, W=224, P=16)

    # Projection heads for both networks
    #online.final = mlp(768, 4096, 256)
    #target.final = mlp(768, 4096, 256)
    online.final = nn.Identity()
    target.final = nn.Identity()

    # Target network does not learn on its own.
    # Gets average of online network's weights.

    online.train()
    target.eval()

    for param in target.parameters():
        param.requires_grad = False

    def update_target():
        update(target, online, 0.99)

    # In addition to projection heads,
    # The online network has predictor.
    #predictor = mlp(256, 4096, 256)
    predictor = mlp(768, 4096, 768)

    # Move everything to devices

    online = online.to(device)
    online = nn.DataParallel(online)

    predictor = predictor.to(device)
    predictor = nn.DataParallel(predictor)

    target = target.to(device)
    target = nn.DataParallel(target)

    def criterion(x, y):
        x = nn.functional.normalize(x, dim=-1)
        y = nn.functional.normalize(y, dim=-1)
        return 2 - 2 * (x * y).sum(dim=-1)

    # Online and predictor learns, target gets assigned moving average of online network's weights.

    lr = 0.1
    epochs = 15

    optimizer = torch.optim.SGD(list(online.parameters()) +
                                list(predictor.parameters()),
                                lr=lr,
                                momentum=0.9)
    scheduler = torch.optim.lr_scheduler.OneCycleLR(
        optimizer, max_lr=lr, steps_per_epoch=len(loader), epochs=epochs)

    scaler = torch.cuda.amp.GradScaler()

    step = 0
    running = 0

    for epoch in range(epochs):
        progress = tqdm(loader, desc=f"Epoch {epoch+1}", unit="batch")

        for inputs1, inputs2 in progress:
            assert inputs1.size() == inputs2.size()

            # Overlap data transfers to gpus, pinned memory
            inputs1 = inputs1.to(device, non_blocking=True)
            inputs2 = inputs2.to(device, non_blocking=True)

            optimizer.zero_grad()

            with torch.cuda.amp.autocast():
                # Target network is in eval mode and does not
                # require grads, forward no grad ctx to be sure
                with torch.no_grad():
                    labels1 = target(inputs1).detach()
                    labels2 = target(inputs2).detach()

                outputs1 = predictor(online(inputs1))
                outputs2 = predictor(online(inputs2))

                # Symmetrize the loss, both transformations
                # go through both networks, one at a time
                loss = criterion(outputs1, labels2) + criterion(
                    outputs2, labels1)
                loss = loss.mean()

            scaler.scale(loss).backward()

            # Transformers need their nails clipped
            scaler.unscale_(optimizer)
            nn.utils.clip_grad_norm_(online.parameters(), 1)
            nn.utils.clip_grad_norm_(predictor.parameters(), 1)

            scaler.step(optimizer)
            scaler.update()

            scheduler.step()

            # After training the online network, we transfer
            # a weighted average of the weights to the target
            update_target()

            running += loss.item() * inputs1.size(0)

            if step % 100 == 0:
                progress.write(f"loss: {running / 100}")
                running = 0

            step += 1

        torch.save(online.state_dict(), f"vt-{epoch + 1:03d}.pth")
예제 #8
0
def _build_transform_train(cfg, choices, expected_size, normalize):
    print('Building transform_train')
    tfm_train = []

    print('+ resize to {}'.format(expected_size))
    tfm_train += [Resize(cfg.INPUT.SIZE)]

    if 'random_flip' in choices:
        print('+ random flip')
        tfm_train += [RandomHorizontalFlip()]

    if 'random_translation' in choices:
        print('+ random translation')
        tfm_train += [
            Random2DTranslation(cfg.INPUT.SIZE[0], cfg.INPUT.SIZE[1])
        ]

    if 'random_crop' in choices:
        crop_padding = cfg.INPUT.CROP_PADDING
        print('+ random crop (padding = {})'.format(crop_padding))
        tfm_train += [RandomCrop(cfg.INPUT.SIZE, padding=crop_padding)]

    if 'random_resized_crop' in choices:
        print('+ random resized crop')
        tfm_train += [RandomResizedCrop(cfg.INPUT.SIZE)]

    if 'center_crop' in choices:
        print('+ center crop (on 1.125x enlarged input)')
        enlarged_size = [int(x * 1.125) for x in cfg.INPUT.SIZE]
        tfm_train += [Resize(enlarged_size)]
        tfm_train += [CenterCrop(cfg.INPUT.SIZE)]

    if 'imagenet_policy' in choices:
        print('+ imagenet policy')
        tfm_train += [ImageNetPolicy()]

    if 'cifar10_policy' in choices:
        print('+ cifar10 policy')
        tfm_train += [CIFAR10Policy()]

    if 'svhn_policy' in choices:
        print('+ svhn policy')
        tfm_train += [SVHNPolicy()]

    if 'randaugment' in choices:
        n_ = cfg.INPUT.RANDAUGMENT_N
        m_ = cfg.INPUT.RANDAUGMENT_M
        print('+ randaugment (n={}, m={})'.format(n_, m_))
        tfm_train += [RandAugment(n_, m_)]

    if 'randaugment_fixmatch' in choices:
        n_ = cfg.INPUT.RANDAUGMENT_N
        print('+ randaugment_fixmatch (n={})'.format(n_))
        tfm_train += [RandAugmentFixMatch(n_)]

    if 'randaugment2' in choices:
        n_ = cfg.INPUT.RANDAUGMENT_N
        print('+ randaugment2 (n={})'.format(n_))
        tfm_train += [RandAugment2(n_)]

    if 'colorjitter' in choices:
        print('+ color jitter')
        tfm_train += [
            ColorJitter(brightness=cfg.INPUT.COLORJITTER_B,
                        contrast=cfg.INPUT.COLORJITTER_C,
                        saturation=cfg.INPUT.COLORJITTER_S,
                        hue=cfg.INPUT.COLORJITTER_H)
        ]

    if 'randomgrayscale' in choices:
        print('+ random gray scale')
        tfm_train += [RandomGrayscale(p=cfg.INPUT.RGS_P)]

    if 'gaussian_blur' in choices:
        print(f'+ gaussian blur (kernel={cfg.INPUT.GB_K})')
        tfm_train += [
            RandomApply([GaussianBlur(cfg.INPUT.GB_K)], p=cfg.INPUT.GB_P)
        ]

    print('+ to torch tensor of range [0, 1]')
    tfm_train += [ToTensor()]

    if 'cutout' in choices:
        cutout_n = cfg.INPUT.CUTOUT_N
        cutout_len = cfg.INPUT.CUTOUT_LEN
        print('+ cutout (n_holes={}, length={})'.format(cutout_n, cutout_len))
        tfm_train += [Cutout(cutout_n, cutout_len)]

    if 'normalize' in choices:
        print('+ normalization (mean={}, '
              'std={})'.format(cfg.INPUT.PIXEL_MEAN, cfg.INPUT.PIXEL_STD))
        tfm_train += [normalize]

    if 'gaussian_noise' in choices:
        print('+ gaussian noise (mean={}, std={})'.format(
            cfg.INPUT.GN_MEAN, cfg.INPUT.GN_STD))
        tfm_train += [GaussianNoise(cfg.INPUT.GN_MEAN, cfg.INPUT.GN_STD)]

    if 'instance_norm' in choices:
        print('+ instance normalization')
        tfm_train += [InstanceNormalization()]

    tfm_train = Compose(tfm_train)

    return tfm_train