def main(args): # Step 1: parse args config logging.basicConfig( format= '[%(asctime)s] [p%(process)s] [%(pathname)s:%(lineno)d] [%(levelname)s] %(message)s', level=logging.INFO, handlers=[ logging.FileHandler(args.log_file, mode='w'), logging.StreamHandler() ]) print_args(args) # Step 2: model, criterion, optimizer, scheduler plfd_backbone = PFLDInference().cuda() auxiliarynet = AuxiliaryNet().cuda() criterion = PFLDLoss() optimizer = torch.optim.Adam([{ 'params': plfd_backbone.parameters() }, { 'params': auxiliarynet.parameters() }], lr=args.base_lr, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', patience=args.lr_patience, verbose=True) # step 3: data # argumetion transform = transforms.Compose([transforms.ToTensor()]) wlfwdataset = WLFWDatasets(args.dataroot, transform) dataloader = DataLoader(wlfwdataset, batch_size=args.train_batchsize, shuffle=True, num_workers=args.workers, drop_last=False) wlfw_val_dataset = WLFWDatasets(args.val_dataroot, transform) wlfw_val_dataloader = DataLoader(wlfw_val_dataset, batch_size=args.val_batchsize, shuffle=False, num_workers=args.workers) # step 4: run writer = SummaryWriter(args.tensorboard) for epoch in range(args.start_epoch, args.end_epoch + 1): weighted_train_loss, train_loss = train(dataloader, plfd_backbone, auxiliarynet, criterion, optimizer, epoch) filename = os.path.join(str(args.snapshot), "checkpoint_epoch_" + str(epoch) + '.pth.tar') save_checkpoint( { 'epoch': epoch, 'plfd_backbone': plfd_backbone.state_dict(), 'auxiliarynet': auxiliarynet.state_dict() }, filename) val_loss = validate(wlfw_val_dataloader, plfd_backbone, auxiliarynet, criterion, epoch) scheduler.step(val_loss) writer.add_scalar('data/weighted_loss', weighted_train_loss, epoch) writer.add_scalars('data/loss', { 'val loss': val_loss, 'train loss': train_loss }, epoch) writer.close()
def main(args): # Step 1: parse args config logging.basicConfig(format='[%(asctime)s] [%(levelname)s] %(message)s', level=logging.INFO, handlers=[ logging.FileHandler(args.log_file, mode='w'), logging.StreamHandler() ]) print_args(args) # Step 2: model, criterion, optimizer, scheduler plfd_backbone = PFLDInference().to(device) auxiliarynet = AuxiliaryNet().to(device) # criterion = PFLDLoss() criterion = LandmarkLoss() optimizer = torch.optim.Adam([{ 'params': plfd_backbone.parameters() }, { 'params': auxiliarynet.parameters() }], lr=args.base_lr, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', patience=args.lr_patience, verbose=True) if args.resume_path: print('loading checkpoint {}'.format(args.resume_path)) checkpoint = torch.load(str(args.resume_path)) args.start_epoch = checkpoint['epoch'] plfd_backbone.load_state_dict(checkpoint['plfd_backbone']) auxiliarynet.load_state_dict(checkpoint['auxiliarynet']) if 'optimizer' in checkpoint.keys(): optimizer.load_state_dict(checkpoint['optimizer']) # step 3: data # argumetion train_transform = transforms.Compose([ AugCrop(output_size=112, is_training=True), HorizontalFlip(mirror=args.mirror_file), RandomRotate(max_angle=30), Affine(max_strength=30, output_size=112), ColorDistort() ]) val_transform = transforms.Compose([AugCrop(output_size=112)]) ibugdataset = IBUGDatasets(args.train_json, transform=train_transform, is_train=True) train_dataset_size = ibugdataset.get_dataset_size() sampler = RandomSampler(ibugdataset, replacement=True, num_samples=train_dataset_size) dataloader = DataLoader(ibugdataset, batch_size=args.train_batchsize, sampler=sampler, num_workers=args.workers, drop_last=False) ibug_val_dataset = IBUGDatasets(args.val_json, transform=val_transform) val_dataset_size = ibug_val_dataset.get_dataset_size() val_sampler = RandomSampler(ibug_val_dataset, replacement=True, num_samples=val_dataset_size) ibug_val_dataloader = DataLoader(ibug_val_dataset, batch_size=args.val_batchsize, sampler=val_sampler, num_workers=args.workers) # step 4: run writer = SummaryWriter(args.tensorboard) for epoch in range(args.start_epoch, args.end_epoch + 1): weighted_train_loss, train_loss = train(dataloader, plfd_backbone, auxiliarynet, criterion, optimizer, epoch) filename = os.path.join(str(args.snapshot), "checkpoint_epoch_" + str(epoch) + '.pth.tar') save_checkpoint( { 'epoch': epoch, 'plfd_backbone': plfd_backbone.state_dict(), 'auxiliarynet': auxiliarynet.state_dict(), 'optimizer': optimizer.state_dict() }, filename) val_loss = validate(ibug_val_dataloader, plfd_backbone, auxiliarynet, criterion, epoch) scheduler.step(val_loss) writer.add_scalar('data/weighted_loss', weighted_train_loss, epoch) writer.add_scalars('data/loss', { 'val loss': val_loss, 'train loss': train_loss }, epoch) writer.close()
def main(args): # Step 1: parse args config logging.basicConfig( format= '[%(asctime)s] [p%(process)s] [%(pathname)s:%(lineno)d] [%(levelname)s] %(message)s', level=logging.INFO, handlers=[ logging.FileHandler(args.log_file, mode='w'), logging.StreamHandler() ]) print_args(args) if args.backbone == "v2": from models.pfld import PFLDInference, AuxiliaryNet elif args.backbone == "v3": from models.mobilev3_pfld import PFLDInference, AuxiliaryNet elif args.backbone == "ghost": from models.ghost_pfld import PFLDInference, AuxiliaryNet elif args.backbone == "lite": from models.lite import PFLDInference, AuxiliaryNet else: raise ValueError("backbone is not implemented") plfd_backbone = PFLDInference() auxiliarynet = AuxiliaryNet() if os.path.exists(args.resume) and args.resume.endswith('.pth'): logging.info("loading the checkpoint from {}".format(args.resume)) check = torch.load(args.resume, map_location=torch.device('cpu')) plfd_backbone.load_state_dict(check["plfd_backbone"]) auxiliarynet.load_state_dict(check["auxiliarynet"]) args.start_epoch = check["epoch"] # Step 2: model, criterion, optimizer, scheduler plfd_backbone = plfd_backbone.to(device) auxiliarynet = auxiliarynet.to(device) criterion = LandMarkLoss() optimizer = torch.optim.Adam([{ 'params': plfd_backbone.parameters() }, { 'params': auxiliarynet.parameters() }], lr=args.base_lr, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', patience=args.lr_patience, verbose=True) # step 3: data # argumetion transform = transforms.Compose([transforms.ToTensor()]) wlfwdataset = PFLDDatasets(args.dataroot, transform, img_root=os.path.realpath('./data'), img_size=args.img_size) dataloader = DataLoader(wlfwdataset, batch_size=args.train_batchsize, shuffle=True, num_workers=args.workers, drop_last=False) wlfw_val_dataset = PFLDDatasets(args.val_dataroot, transform, img_root=os.path.realpath('./data'), img_size=args.img_size) wlfw_val_dataloader = DataLoader(wlfw_val_dataset, batch_size=args.val_batchsize, shuffle=False, num_workers=args.workers) # step 4: run weighted_losses = [] train_losses = [] val_losses = [] val_nme = 1e6 for epoch in range(args.start_epoch, args.end_epoch + 1): weighted_train_loss, train_loss = train(dataloader, plfd_backbone, auxiliarynet, criterion, optimizer, epoch) if epoch % args.epoch_interval == 0: filename = os.path.join(str(args.snapshot), "checkpoint_epoch_" + str(epoch) + '.pth') save_checkpoint( { 'epoch': epoch, 'plfd_backbone': plfd_backbone.state_dict(), 'auxiliarynet': auxiliarynet.state_dict() }, filename) val_loss, cur_val_nme = validate(wlfw_val_dataloader, plfd_backbone, auxiliarynet, criterion) if cur_val_nme < val_nme: filename = os.path.join(str(args.snapshot), "checkpoint_min_nme.pth") save_checkpoint( { 'epoch': epoch, 'plfd_backbone': plfd_backbone.state_dict(), 'auxiliarynet': auxiliarynet.state_dict() }, filename) val_nme = cur_val_nme scheduler.step(val_loss) weighted_losses.append(weighted_train_loss.item()) train_losses.append(train_loss.item()) val_losses.append(val_loss.item()) logging.info( "epoch: {}, weighted_train_loss: {:.4f}, trainset loss: {:.4f} valset loss: {:.4f} best val " "nme: {:.4f}\n ".format(epoch, weighted_train_loss, train_loss, val_loss, val_nme)) weighted_losses = " ".join(list(map(str, weighted_losses))) train_losses = " ".join(list(map(str, train_losses))) val_losses = " ".join(list(map(str, val_losses))) logging.info(weighted_losses) logging.info(train_losses) logging.info(val_losses)
def main(args): # Step 1: parse args config logging.basicConfig( format= '[%(asctime)s] [p%(process)s] [%(pathname)s:%(lineno)d] [%(levelname)s] %(message)s', level=logging.INFO, handlers=[ logging.FileHandler(args.log_file, mode='w'), logging.StreamHandler() ]) print_args(args) # Step 2: model, criterion, optimizer, scheduler if wandb.config.pfld_backbone == "GhostNet": plfd_backbone = CustomizedGhostNet(width=wandb.config.ghostnet_width, dropout=0.2) logger.info(f"Using GHOSTNET with width={wandb.config.ghostnet_width} as backbone of PFLD backbone") # If using pretrained weight from ghostnet model trained on image net if (wandb.config.ghostnet_with_pretrained_weight_image_net == True): logger.info(f"Using pretrained weights of ghostnet model trained on image net data ") plfd_backbone = load_pretrained_weight_imagenet_for_ghostnet_backbone( plfd_backbone, "./checkpoint_imagenet/state_dict_93.98.pth") else: plfd_backbone = PFLDInference().to(device) # MobileNet2 defaut logger.info("Using MobileNet2 as backbone of PFLD backbone") auxiliarynet = AuxiliaryNet().to(device) # Watch model by wandb wandb.watch(plfd_backbone) wandb.watch(auxiliarynet) criterion = PFLDLoss() optimizer = torch.optim.Adam( [{ 'params': plfd_backbone.parameters() }, { 'params': auxiliarynet.parameters() }], lr=args.base_lr, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=args.lr_patience, verbose=True) # step 3: data # argumetion transform = transforms.Compose([transforms.ToTensor()]) wlfwdataset = WLFWDatasets(args.dataroot, transform) dataloader = DataLoader( wlfwdataset, batch_size=args.train_batchsize, shuffle=True, num_workers=args.workers, drop_last=False) wlfw_val_dataset = WLFWDatasets(args.val_dataroot, transform) wlfw_val_dataloader = DataLoader( wlfw_val_dataset, batch_size=args.val_batchsize, shuffle=False, num_workers=args.workers) # step 4: run writer = SummaryWriter(args.tensorboard) for epoch in range(args.start_epoch, args.end_epoch + 1): weighted_train_loss, train_loss = train(dataloader, plfd_backbone, auxiliarynet, criterion, optimizer, epoch) filename = os.path.join( str(args.snapshot), "checkpoint_epoch_" + str(epoch) + '.pth.tar') save_checkpoint({ 'epoch': epoch, 'plfd_backbone': plfd_backbone.state_dict(), 'auxiliarynet': auxiliarynet.state_dict() }, filename) val_loss = validate(wlfw_val_dataloader, plfd_backbone, auxiliarynet, criterion) wandb.log({"metric/val_loss": val_loss}) scheduler.step(val_loss) writer.add_scalar('data/weighted_loss', weighted_train_loss, epoch) writer.add_scalars('data/loss', {'val loss': val_loss, 'train loss': train_loss}, epoch) writer.close()