def test_imdb_dataset(self): from src.datasets import ImdbDataset from torchvision.transforms import ToTensor from src.datasets import unpickle_imdb df = unpickle_imdb(f"{self.imdb_root}/imdb.pickle") ds = ImdbDataset(root=self.imdb_root, df=df, transform=ToTensor()) from torchvision.transforms import ToPILImage pil = ToPILImage() tensor, label = ds[-1]
def train_gender_imdb(weights=''): model = resnet18(pretrained=True) model.fc = nn.Linear(model.fc.in_features, 2) if weights: model.load_state_dict(torch.load(weights)) tr_trans = transforms.Compose([ transforms.RandomRotation(12), transforms.Resize(144), transforms.RandomCrop(128), transforms.RandomHorizontalFlip(0.5), transforms.RandomGrayscale(0.1), transforms.ToTensor(), transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD), ]) ts_trans = transforms.Compose([ transforms.Resize(64), transforms.CenterCrop(64), transforms.ToTensor(), transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD), ]) """ tr_ds = AppaRealDataset(trans=tr_trans, split='train', target_trans=None, faceonly=True) val_ds = AppaRealDataset(trans=tr_trans, split='val', target_trans=None, faceonly=True) tr_dl = DataLoader(tr_ds, batch_size=16, shuffle=True, num_workers=8, pin_memory=True) val_dl = DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=4, pin_memory=True) """ ds = ImdbDataset(root=IMDB_ROOT, df=unpickle_imdb(f"{IMDB_ROOT}/imdb.pickle"), transform=tr_trans) tr_ds, val_ds = random_split(ds, [len(ds) - len(ds) // 10, len(ds) // 10]) tr_dl = DataLoader(tr_ds, batch_size=16, shuffle=True, num_workers=8, pin_memory=True) val_dl = DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=4, pin_memory=True) #""" loss_fn = CrossEntropyLoss(reduction='mean') optim = Adam optim_kwargs = { 'lr': 3e-4, 'weight_decay': 1e-5, } scheduler = MyScheduler scheduler_kwargs = { 'warmup_steps': len(tr_dl)*2, 'scheduler_class': CosineAnnealingLR, 'T_max': len(tr_dl), 'eta_min': 0, 'last_epoch': -1, } trainer = GenderTrainer(model, tr_dl, val_dl, loss_fn, optim=optim, optim_kwargs=optim_kwargs, scheduler=scheduler, scheduler_kwargs=scheduler_kwargs) trainer.train(20)
def __init__(self, print_every_iters=50): self.device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') self.dtype = torch.float32 self.savedir = f"{LOG_DIR}" self.e = 0 self.train_steps = 0 self.eval_steps = 0 self.sw = SummaryWriter() self.print_every_iters = print_every_iters self.trans = transforms.Compose([ transforms.Resize(64), transforms.CenterCrop(64), transforms.RandomHorizontalFlip(0.5), transforms.ToTensor(), transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD), ]) ds = ImdbDataset(root=IMDB_ROOT, df=unpickle_imdb(f"{IMDB_ROOT}/imdb.pickle"), transform=self.trans) tr_ds, val_ds = random_split( ds, [len(ds) - len(ds) // 10, len(ds) // 10]) self.tr_dl = DataLoader(tr_ds, batch_size=16, shuffle=True, num_workers=8, pin_memory=True) self.val_dl = DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=4, pin_memory=True) self.loss_fn = CrossEntropyLoss(reduction='mean') self.model = resnet18(pretrained=True) self.model.to(device=self.device, dtype=self.dtype) self.optim = Adam(self.model.parameters(), lr=3e-4, weight_decay=1e-6)
def test_gender_analyze(self): import os import pickle from src.gender_estimation import gender_analyze from src.datasets import ImdbDataset, unpickle_imdb from src.utils import load_config from src.convnets.utils import IMAGENET_MEAN, IMAGENET_STD from torchvision import transforms load_config() trans = transforms.Compose([ transforms.Resize(64), transforms.CenterCrop(64), transforms.ToTensor(), transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD), ]) imdb_root = os.environ['IMDB_ROOT'] df = unpickle_imdb(f"{imdb_root}/imdb.pickle") ds = ImdbDataset(root=imdb_root, df=df[:250], transform=trans, include_path=True) log = gender_analyze(self.weights, ds) pickle.dump(log, open('tmp/gender_analyze_log.p', 'wb'))
def gender_estimation(weights=None): load_config() device = torch.device('cuda') imdb_root = os.environ['IMDB_ROOT'] df = unpickle_imdb(f"{imdb_root}/imdb.pickle") savedir = f"{os.environ['LOG_DIR']}" trans = transforms.Compose([ # transforms.Resize(72), #transforms.RandomCrop(64), transforms.Resize(64), transforms.CenterCrop(64), transforms.RandomHorizontalFlip(0.5), transforms.ToTensor(), transforms.Normalize(IMAGENET_MEAN, IMAGENET_STD), ]) ds = ImdbDataset(root=imdb_root, df=df, transform=trans) print(f"Loaded ds with {len(ds)} items.") tr, val = random_split(ds, [len(ds) - len(ds) // 10, len(ds) // 10]) loss_fn = CrossEntropyLoss() #model = resnet50(pretrained=True) model = resnet18(pretrained=True) model.fc = nn.Linear(model.fc.in_features, 2) """ from src.simclr import ResNetSimCLR model = ResNetSimCLR('resnet50', 64) #if weights: # model.load_state_dict(torch.load(weights)) model.projector = nn.Sequential( nn.Linear(model.n_features, model.n_features, bias=False), nn.ReLU(), nn.Linear(model.n_features, 2, bias=False) ) for param in model.encoder.parameters(): param.requires_grad = False """ model.to(device) optim = Adam(model.parameters(), lr=3e-4, weight_decay=1e-6) tr_dl = DataLoader(tr, batch_size=16, shuffle=True, num_workers=8, pin_memory=True) val_dl = DataLoader(val, batch_size=16, shuffle=False, num_workers=4, pin_memory=True) tr_log, val_log = {}, {} def untrans_display(im): std, mean = torch.as_tensor(IMAGENET_STD), torch.as_tensor( IMAGENET_MEAN) if mean.ndim == 1: mean = mean[:, None, None] if std.ndim == 1: std = std[:, None, None] im.mul_(std).add_(mean) trans = transforms.ToPILImage() im = trans(im) im.show() def log_epoch(preds, labels, loss, log): _, pred_class = torch.max(preds.data, 1) log[epoch].append({ 'loss': loss.item(), 'count': labels.size(0), 'correct': (pred_class == labels).sum().item(), 'tp': ((pred_class == 1) & (labels == 1)).sum().item(), 'tn': ((pred_class == 0) & (labels == 0)).sum().item(), 'fp': ((pred_class == 1) & (labels == 0)).sum().item(), 'fn': ((pred_class == 0) & (labels == 1)).sum().item(), 'cnt_p': (labels == 1).sum().item(), 'cnt_n': (labels == 0).sum().item(), }) def print_log_epoch(_e, log, pretext=''): epoch_loss = [x['loss'] for x in log[_e]] sum_loss = sum(epoch_loss) cnt_loss = len(epoch_loss) avg_loss = sum_loss / cnt_loss print( f"{pretext}Epoch {_e}: Total Loss={sum_loss}\tAvg Loss={avg_loss}\tNum Batches={cnt_loss}" ) e_cnt = [x['count'] for x in log[_e]] e_correct = [x['correct'] for x in log[_e]] e_acc = sum(e_correct) / sum(e_cnt) print( f"{pretext}Epoch {_e}: Total Cnt={sum(e_cnt)}\tTotal Cor={sum(e_correct)}\tAcc={e_acc}" ) tp_cnt = sum([x['tp'] for x in log[_e]]) tn_cnt = sum([x['tn'] for x in log[_e]]) fp_cnt = sum([x['fp'] for x in log[_e]]) fn_cnt = sum([x['fn'] for x in log[_e]]) p_cnt = sum([x['cnt_p'] for x in log[_e]]) n_cnt = sum([x['cnt_n'] for x in log[_e]]) print( f"{pretext}Epoch {_e}: TP={tp_cnt}\tTN={tn_cnt}\tFP={fp_cnt}\tFN={fn_cnt}\tP cnt={p_cnt}\tN cnt={n_cnt}" ) def _epoch(train): if train: dl = tr_dl model.train() else: dl = val_dl model.eval() for ix, (img, label) in enumerate(dl): img = img.to(device=device) labels = label.to(device=device, dtype=torch.int64) preds = model(img) # _, preds = model(img) loss = loss_fn(preds, labels) if train: optim.zero_grad() loss.backward() optim.step() log_epoch(preds, labels, loss, tr_log) else: log_epoch(preds, labels, loss, val_log) def _save_weights(prefix='', suffix=''): time = datetime.datetime.now() s = f"{prefix}_{time.month}_{time.day}_{time.hour}_{time.minute}_{time.second}_{suffix}.pth" fname = f"{savedir}/{s}" print(f"Saving to: {fname}") torch.save(model.state_dict(), fname) epoch = 0 for i in range(20): tr_log[epoch] = [] _epoch(True) print_log_epoch(epoch, tr_log) val_log[epoch] = [] _epoch(False) print_log_epoch(epoch, val_log, pretext='VAL::') epoch += 1 _save_weights(prefix=f'long')
def test_load_pickle(self): from src.datasets import unpickle_imdb df = unpickle_imdb(f"{self.imdb_root}/imdb.pickle") print(df)