Exemple #1
0
    def test_imdb_dataset(self):
        from src.datasets import ImdbDataset
        from torchvision.transforms import ToTensor
        from src.datasets import unpickle_imdb
        df = unpickle_imdb(f"{self.imdb_root}/imdb.pickle")
        ds = ImdbDataset(root=self.imdb_root, df=df, transform=ToTensor())

        from torchvision.transforms import ToPILImage
        pil = ToPILImage()
        tensor, label = ds[-1]
Exemple #2
0
def train_gender_imdb(weights=''):
    model = resnet18(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, 2)
    if weights:
        model.load_state_dict(torch.load(weights))

    tr_trans = transforms.Compose([
        transforms.RandomRotation(12),
        transforms.Resize(144),
        transforms.RandomCrop(128),
        transforms.RandomHorizontalFlip(0.5),
        transforms.RandomGrayscale(0.1),
        transforms.ToTensor(),
        transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
    ])
    ts_trans = transforms.Compose([
        transforms.Resize(64),
        transforms.CenterCrop(64),
        transforms.ToTensor(),
        transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
    ])
    """
    tr_ds = AppaRealDataset(trans=tr_trans, split='train', target_trans=None, faceonly=True)
    val_ds = AppaRealDataset(trans=tr_trans, split='val', target_trans=None, faceonly=True)
    tr_dl = DataLoader(tr_ds, batch_size=16, shuffle=True, num_workers=8, pin_memory=True)
    val_dl = DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)

    """
    ds = ImdbDataset(root=IMDB_ROOT, df=unpickle_imdb(f"{IMDB_ROOT}/imdb.pickle"),
                     transform=tr_trans)
    tr_ds, val_ds = random_split(ds, [len(ds) - len(ds) // 10, len(ds) // 10])
    tr_dl = DataLoader(tr_ds, batch_size=16, shuffle=True, num_workers=8, pin_memory=True)
    val_dl = DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=4, pin_memory=True)
    #"""

    loss_fn = CrossEntropyLoss(reduction='mean')
    optim = Adam
    optim_kwargs = {
        'lr': 3e-4,
        'weight_decay': 1e-5,
    }
    scheduler = MyScheduler
    scheduler_kwargs = {
        'warmup_steps': len(tr_dl)*2,
        'scheduler_class': CosineAnnealingLR,
        'T_max': len(tr_dl),
        'eta_min': 0,
        'last_epoch': -1,
    }
    trainer = GenderTrainer(model, tr_dl, val_dl, loss_fn,
                            optim=optim, optim_kwargs=optim_kwargs,
                            scheduler=scheduler, scheduler_kwargs=scheduler_kwargs)
    trainer.train(20)
    def __init__(self, print_every_iters=50):
        self.device = torch.device(
            'cuda') if torch.cuda.is_available() else torch.device('cpu')
        self.dtype = torch.float32
        self.savedir = f"{LOG_DIR}"
        self.e = 0
        self.train_steps = 0
        self.eval_steps = 0
        self.sw = SummaryWriter()

        self.print_every_iters = print_every_iters
        self.trans = transforms.Compose([
            transforms.Resize(64),
            transforms.CenterCrop(64),
            transforms.RandomHorizontalFlip(0.5),
            transforms.ToTensor(),
            transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
        ])
        ds = ImdbDataset(root=IMDB_ROOT,
                         df=unpickle_imdb(f"{IMDB_ROOT}/imdb.pickle"),
                         transform=self.trans)
        tr_ds, val_ds = random_split(
            ds,
            [len(ds) - len(ds) // 10, len(ds) // 10])
        self.tr_dl = DataLoader(tr_ds,
                                batch_size=16,
                                shuffle=True,
                                num_workers=8,
                                pin_memory=True)
        self.val_dl = DataLoader(val_ds,
                                 batch_size=16,
                                 shuffle=False,
                                 num_workers=4,
                                 pin_memory=True)
        self.loss_fn = CrossEntropyLoss(reduction='mean')

        self.model = resnet18(pretrained=True)
        self.model.to(device=self.device, dtype=self.dtype)
        self.optim = Adam(self.model.parameters(), lr=3e-4, weight_decay=1e-6)
 def test_gender_analyze(self):
     import os
     import pickle
     from src.gender_estimation import gender_analyze
     from src.datasets import ImdbDataset, unpickle_imdb
     from src.utils import load_config
     from src.convnets.utils import IMAGENET_MEAN, IMAGENET_STD
     from torchvision import transforms
     load_config()
     trans = transforms.Compose([
         transforms.Resize(64),
         transforms.CenterCrop(64),
         transforms.ToTensor(),
         transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
     ])
     imdb_root = os.environ['IMDB_ROOT']
     df = unpickle_imdb(f"{imdb_root}/imdb.pickle")
     ds = ImdbDataset(root=imdb_root,
                      df=df[:250],
                      transform=trans,
                      include_path=True)
     log = gender_analyze(self.weights, ds)
     pickle.dump(log, open('tmp/gender_analyze_log.p', 'wb'))
def gender_estimation(weights=None):
    load_config()
    device = torch.device('cuda')
    imdb_root = os.environ['IMDB_ROOT']
    df = unpickle_imdb(f"{imdb_root}/imdb.pickle")
    savedir = f"{os.environ['LOG_DIR']}"
    trans = transforms.Compose([
        # transforms.Resize(72),
        #transforms.RandomCrop(64),
        transforms.Resize(64),
        transforms.CenterCrop(64),
        transforms.RandomHorizontalFlip(0.5),
        transforms.ToTensor(),
        transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD),
    ])
    ds = ImdbDataset(root=imdb_root, df=df, transform=trans)
    print(f"Loaded ds with {len(ds)} items.")
    tr, val = random_split(ds, [len(ds) - len(ds) // 10, len(ds) // 10])
    loss_fn = CrossEntropyLoss()

    #model = resnet50(pretrained=True)
    model = resnet18(pretrained=True)
    model.fc = nn.Linear(model.fc.in_features, 2)
    """
    from src.simclr import ResNetSimCLR
    model = ResNetSimCLR('resnet50', 64)
    #if weights:
    #    model.load_state_dict(torch.load(weights))

    model.projector = nn.Sequential(
        nn.Linear(model.n_features, model.n_features, bias=False),
        nn.ReLU(),
        nn.Linear(model.n_features, 2, bias=False)
    )
    for param in model.encoder.parameters():
        param.requires_grad = False
    """
    model.to(device)
    optim = Adam(model.parameters(), lr=3e-4, weight_decay=1e-6)
    tr_dl = DataLoader(tr,
                       batch_size=16,
                       shuffle=True,
                       num_workers=8,
                       pin_memory=True)
    val_dl = DataLoader(val,
                        batch_size=16,
                        shuffle=False,
                        num_workers=4,
                        pin_memory=True)
    tr_log, val_log = {}, {}

    def untrans_display(im):
        std, mean = torch.as_tensor(IMAGENET_STD), torch.as_tensor(
            IMAGENET_MEAN)
        if mean.ndim == 1: mean = mean[:, None, None]
        if std.ndim == 1: std = std[:, None, None]
        im.mul_(std).add_(mean)
        trans = transforms.ToPILImage()
        im = trans(im)
        im.show()

    def log_epoch(preds, labels, loss, log):
        _, pred_class = torch.max(preds.data, 1)
        log[epoch].append({
            'loss':
            loss.item(),
            'count':
            labels.size(0),
            'correct': (pred_class == labels).sum().item(),
            'tp': ((pred_class == 1) & (labels == 1)).sum().item(),
            'tn': ((pred_class == 0) & (labels == 0)).sum().item(),
            'fp': ((pred_class == 1) & (labels == 0)).sum().item(),
            'fn': ((pred_class == 0) & (labels == 1)).sum().item(),
            'cnt_p': (labels == 1).sum().item(),
            'cnt_n': (labels == 0).sum().item(),
        })

    def print_log_epoch(_e, log, pretext=''):
        epoch_loss = [x['loss'] for x in log[_e]]
        sum_loss = sum(epoch_loss)
        cnt_loss = len(epoch_loss)
        avg_loss = sum_loss / cnt_loss
        print(
            f"{pretext}Epoch {_e}: Total Loss={sum_loss}\tAvg Loss={avg_loss}\tNum Batches={cnt_loss}"
        )

        e_cnt = [x['count'] for x in log[_e]]
        e_correct = [x['correct'] for x in log[_e]]
        e_acc = sum(e_correct) / sum(e_cnt)
        print(
            f"{pretext}Epoch {_e}: Total Cnt={sum(e_cnt)}\tTotal Cor={sum(e_correct)}\tAcc={e_acc}"
        )

        tp_cnt = sum([x['tp'] for x in log[_e]])
        tn_cnt = sum([x['tn'] for x in log[_e]])
        fp_cnt = sum([x['fp'] for x in log[_e]])
        fn_cnt = sum([x['fn'] for x in log[_e]])
        p_cnt = sum([x['cnt_p'] for x in log[_e]])
        n_cnt = sum([x['cnt_n'] for x in log[_e]])
        print(
            f"{pretext}Epoch {_e}: TP={tp_cnt}\tTN={tn_cnt}\tFP={fp_cnt}\tFN={fn_cnt}\tP cnt={p_cnt}\tN cnt={n_cnt}"
        )

    def _epoch(train):
        if train:
            dl = tr_dl
            model.train()
        else:
            dl = val_dl
            model.eval()

        for ix, (img, label) in enumerate(dl):
            img = img.to(device=device)
            labels = label.to(device=device, dtype=torch.int64)
            preds = model(img)
            # _, preds = model(img)
            loss = loss_fn(preds, labels)
            if train:
                optim.zero_grad()
                loss.backward()
                optim.step()
                log_epoch(preds, labels, loss, tr_log)
            else:
                log_epoch(preds, labels, loss, val_log)

    def _save_weights(prefix='', suffix=''):
        time = datetime.datetime.now()
        s = f"{prefix}_{time.month}_{time.day}_{time.hour}_{time.minute}_{time.second}_{suffix}.pth"
        fname = f"{savedir}/{s}"
        print(f"Saving to: {fname}")
        torch.save(model.state_dict(), fname)

    epoch = 0
    for i in range(20):
        tr_log[epoch] = []
        _epoch(True)
        print_log_epoch(epoch, tr_log)

        val_log[epoch] = []
        _epoch(False)
        print_log_epoch(epoch, val_log, pretext='VAL::')
        epoch += 1
        _save_weights(prefix=f'long')
Exemple #6
0
 def test_load_pickle(self):
     from src.datasets import unpickle_imdb
     df = unpickle_imdb(f"{self.imdb_root}/imdb.pickle")
     print(df)