test_data_loader = data_utils.DataLoader( test_dataset, batch_size=hparams.syncnet_batch_size, num_workers=8) device = torch.device("cuda" if use_cuda else "cpu") # Model model = SyncNet().to(device) print('total trainable params {}'.format(sum(p.numel() for p in model.parameters() if p.requires_grad))) optimizer = optim.Adam([p for p in model.parameters() if p.requires_grad], lr=hparams.syncnet_lr) if checkpoint_path is not None: load_checkpoint(checkpoint_path, model, optimizer, reset_optimizer=False) train(device, model, train_data_loader, test_data_loader, optimizer, checkpoint_dir=checkpoint_dir, checkpoint_interval=hparams.syncnet_checkpoint_interval, nepochs=hparams.nepochs) #we add a save option if not os.path.exists(model_dir): os.mkdir(model_dir) model_path = model_dir+"/color_syncnet.pth" print('\nModel Path : {}'.format(model_path)) torch.save(model.state_dict(),model_path) else: print('\nExisting model found at ' + model_dir) print('\nDid not overwrite old model. Run the job again with a different location to store the model')
device = torch.device("cuda" if use_cuda else "cpu") # Model model = SyncNet().to(device) print('total trainable params {}'.format( sum(p.numel() for p in model.parameters() if p.requires_grad))) wandb.watch(model) optimizer = optim.Adam([p for p in model.parameters() if p.requires_grad], lr=hparams.syncnet_lr) if checkpoint_path is not None: load_checkpoint(checkpoint_path, model, optimizer, reset_optimizer=False) train(device, model, train_data_loader, test_data_loader, optimizer, checkpoint_dir=checkpoint_dir, checkpoint_interval=hparams.syncnet_checkpoint_interval, nepochs=hparams.nepochs) torch.save(model.state_dict(), "model.h5") wandb.save('model.h5')