def show_model_tensorboard(): import torch.utils.tensorboard as tensorboard pfld = PFLD() auxiliarynet = AuxiliaryNet() dataloader = create_test_loader(batch_size=20, transform=True) dataitr = iter(dataloader) image, labels = dataitr.next() print("ray2") writer = tensorboard.SummaryWriter("checkpoint/ray22") writer.add_graph(pfld, image) print("added model to tensorboard") time.sleep(4) writer.close()
def overfit_one_mini_batch(): # ========= dataset =========== dataloader = create_test_loader(batch_size=20) # =========== models ============= pfld_model = PFLD().to(device) auxiliary_model = AuxiliaryNet().to(device) pfld_model.train() auxiliary_model.train() criterion = PFLD_L2Loss().to(device) parameters = list(pfld_model.parameters()) + list( auxiliary_model.parameters()) optimizer = torch.optim.Adam(parameters, lr=0.0001, weight_decay=1e-6) image, labels = next(iter(dataloader)) print(image.shape) time.sleep(5) for i in range(6000): euler_angles = labels['euler_angles'].squeeze() # shape (batch, 3) attributes = labels['attributes'].squeeze() # shape (batch, 6) landmarks = labels['landmarks'].squeeze() # shape (batch, 98, 2) landmarks = landmarks.reshape( (landmarks.shape[0], 196)) # reshape landmarks to match loss function image = image.to(device) landmarks = landmarks.to(device) euler_angles = euler_angles.to(device) attributes = attributes.to(device) pfld_model = pfld_model.to(device) auxiliary_model = auxiliary_model.to(device) featrues, pred_landmarks = pfld_model(image) pred_angles = auxiliary_model(featrues) weighted_loss, loss = criterion(pred_landmarks, landmarks, pred_angles, euler_angles, attributes) train_w_loss = round(weighted_loss.item(), 3) train_loss = round(loss.item(), 3) print(f"\t.. weighted_loss= {train_w_loss} ... loss={train_loss}") optimizer.zero_grad() weighted_loss.backward() optimizer.step()
def main(): # ========= dataloaders =========== train_dataloader = create_train_loader(root=args.datapath, batch_size=args.batch_size) test_dataloader = create_test_loader(root=args.datapath, batch_size=args.batch_size) start_epoch = 0 # ======== models & loss ========== pfld = PFLD().to(device) auxiliarynet = AuxiliaryNet().to(device) loss = PFLD_L2Loss().to(device) # ========= load weights =========== if args.resume: checkpoint = torch.load(args.pretrained) pfld.load_state_dict(checkpoint["pfld"], strict=False) auxiliarynet.load_state_dict(checkpoint["auxiliary"]) start_epoch = checkpoint['epoch'] + 1 print(f'\tLoaded checkpoint from {args.pretrained}\n') # logging.info(f'\tLoaded checkpoint from {args.pretrained}\n') time.sleep(1) else: print( "******************* Start training from scratch *******************\n" ) time.sleep(5) # =========== optimizer =========== # parameters = list(pfld.parameters()) + list(auxiliarynet.parameters()) parameters = [{ 'params': pfld.parameters() }, { 'params': auxiliarynet.parameters() }] optimizer = torch.optim.Adam(parameters, lr=args.lr, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', patience=args.lr_patience, verbose=True) # ======================================================================== for epoch in range(start_epoch, args.epochs): # =========== train / validate =========== w_train_loss, train_loss = train_one_epoch(pfld, auxiliarynet, loss, optimizer, train_dataloader, epoch) val_loss = validate(pfld, auxiliarynet, loss, test_dataloader, epoch) scheduler.step(val_loss) logging.info( f"\ttraining epoch={epoch} .. weighted_loss= {w_train_loss} ... loss={train_loss}" ) # ============= tensorboard ============= # writer.add_scalar('train_weighted_loss',w_train_loss, epoch) writer.add_scalar('train_loss', train_loss, epoch) writer.add_scalar('val_loss', val_loss, epoch) # ============== save model ============= if epoch % args.savefreq == 0: checkpoint_state = { "pfld": pfld.state_dict(), "auxiliary": auxiliarynet.state_dict(), "epoch": epoch } savepath = os.path.join(args.savepath, f'weights.pth_epoch_{epoch}.tar') torch.save(checkpoint_state, savepath) print(f'\n\t*** Saved checkpoint in {savepath} ***\n') time.sleep(2) writer.close()
for i in range(len(dataset)): image, labels = dataset[i] print('landmarks', labels['landmarks']) print ("*" * 80, '\n\n\t press n for next example .... ESC to exit') print('\tcurrent image: ',labels['image_name']) visualizer.visualize(image, labels) if visualizer.user_press == 27: break # Tensorboard Visualization on 5 batches with 64 batch size else: batch_size = 64 dataloader = create_test_loader(batch_size=batch_size, transform=None) batch = 0 for (images, labels) in dataloader: batch += 1 visualizer.visualize_tensorboard(images, labels, batch) print ("*" * 60, f'\n\n\t Saved {batch_size} images with Step{batch}. run tensorboard @ project root') if batch == args.stop_batch: break visualizer.writer.close()
import time from config import Config from dataset import create_test_loader from model_factory import get_model from meanteacher import Tester from model_utils import save_checkpoint, load_checkpoint if __name__ == "__main__": cfg = Config() cfg.device = torch.device("cuda" if cfg.device_ids != "cpu" else "cpu") # dataset eval_loader = create_test_loader(cfg.data_dir, cfg) # create model model = get_model(cfg.model_arch, pretrained=cfg.pretrained) ema_model = get_model(cfg.model_arch, pretrained=cfg.pretrained, ema=True) # resume training / load trained weights last_epoch = 0 if cfg.resume: model, ema_model, optimizer, last_epoch = load_checkpoint( model, ema_model, cfg.resume) # create trainer tester = Tester(cfg, model, ema_model) tester._set_device(cfg.device)