logger.info(model) logger.info('EMA: {}'.format(ema)) # Optimization def tensor_in(t, a): for a_ in a: if t is a_: return True return False scheduler = None if args.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.99), weight_decay=args.wd) if args.scheduler: scheduler = CosineAnnealingWarmRestarts(optimizer, 20, T_mult=2, last_epoch=args.begin_epoch - 1) elif args.optimizer == 'adamax': optimizer = optim.Adamax(model.parameters(), lr=args.lr, betas=(0.9, 0.99), weight_decay=args.wd) elif args.optimizer == 'rmsprop': optimizer = optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.wd) elif args.optimizer == 'sgd': optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.wd) if args.scheduler: scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[60, 120, 160], gamma=0.2, last_epoch=args.begin_epoch - 1 ) else: raise ValueError('Unknown optimizer {}'.format(args.optimizer)) best_test_bpd = math.inf
logger.info(model) logger.info('EMA: {}'.format(ema)) # Optimization def tensor_in(t, a): for a_ in a: if t is a_: return True return False scheduler = None if args.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.99), weight_decay=args.wd) if args.scheduler: scheduler = CosineAnnealingWarmRestarts(optimizer, 20, T_mult=2, last_epoch=args.begin_epoch - 1) elif args.optimizer == 'adamax': optimizer = optim.Adamax(model.parameters(), lr=args.lr, betas=(0.9, 0.99), weight_decay=args.wd) elif args.optimizer == 'rmsprop':
logger.info(model) logger.info('EMA: {}'.format(ema)) # Optimization def tensor_in(t, a): for a_ in a: if t is a_: return True return False scheduler = None params = [par for par in model.parameters()] + [par for par in gmm.parameters()] # params = [par for par in gmm.parameters()] if args.optimizer == 'adam': optimizer = optim.Adam(params, lr=args.lr, betas=(0.9, 0.99), weight_decay=args.wd) if args.scheduler: scheduler = CosineAnnealingWarmRestarts(optimizer, 20, T_mult=2, last_epoch=args.begin_epoch - 1) elif args.optimizer == 'adamax': optimizer = optim.Adamax(params,
logger.info(model) logger.info('EMA: {}'.format(ema)) # Optimization def tensor_in(t, a): for a_ in a: if t is a_: return True return False scheduler = None if args.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.99), weight_decay=args.wd) if args.scheduler: scheduler = CosineAnnealingWarmRestarts( optimizer, 20, T_mult=2, last_epoch=args.begin_epoch - 1) elif args.optimizer == 'adamax': optimizer = optim.Adamax(model.parameters(), lr=args.lr, betas=( 0.9, 0.99), weight_decay=args.wd) elif args.optimizer == 'rmsprop': optimizer = optim.RMSprop( model.parameters(), lr=args.lr, weight_decay=args.wd) elif args.optimizer == 'sgd': optimizer = torch.optim.SGD( model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.wd) if args.scheduler: scheduler = torch.optim.lr_scheduler.MultiStepLR(
logger.info('EMA: {}'.format(ema)) # Optimization def tensor_in(t, a): for a_ in a: if t is a_: return True return False scheduler = None #from itertools import chain params = list(model.encoder.parameters()) + list(model.transforms.parameters()) optimizer = optim.Adam(model.parameters(), lr=1e-3, betas=(0.9, 0.99), weight_decay=args.wd) params_classifer = list(model.classification_heads.parameters()) # optimizer_classifier = torch.optim.SGD(params_classifer, lr=1e-1, momentum=0.9, weight_decay=args.wd) optimizer_classifier = torch.optim.Adam(model.parameters(), lr=1e-1, betas=(0.9, 0.99), weight_decay=args.wd) if args.scheduler: scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160], gamma=0.2, last_epoch=args.begin_epoch - 1) best_test_bpd = math.inf if (args.resume is not None): logger.info('Resuming model from {}'.format(args.resume)) with torch.no_grad(): x = torch.rand(1, *input_size[1:]).to(device) model(x) checkpt = torch.load(args.resume) sd = {k: v for k, v in checkpt['state_dict'].items() if 'last_n_samples' not in k} state = model.state_dict()