def __init__(self, args): print("WGAN_GradientPenalty init model.") self.G = ResGenerator2D self.D = ResDiscriminator2D self.C = args.channels # Check if cuda is available self.check_cuda(args.cuda) # WGAN values from paper self.learning_rate = 1e-4 self.b1 = 0.5 self.b2 = 0.999 self.batch_size = 64 # WGAN_gradient penalty uses ADAM self.d_optimizer = optim.RMSProp(self.D.parameters(), lr=self.learning_rate, alpha=0.99) self.g_optimizer = optim.RMSProp(self.G.parameters(), lr=self.learning_rate, alpha=0.99) self.generator_iters = args.generator_iters self.critic_iter = 5 self.lambda_term = 10 self.start_GPU = args['start_GPU'] self.device = torch.device(f"cuda:{self.start_GPU}" if (torch.cuda.is_available() and self.num_GPU > 0) else "cpu")
def Optimizer(opt, gen, dis): if opt.gan_optim == 'Adam': g_optimizer = optim.Adam(gen.parameters(), lr=opt.g_learning_rate) d_optimizer = optim.Adam(dis.parameters(), lr=opt.d_learning_rate) elif opt.gan_optim == 'rmsprop': g_optimizer = optim.RMSProp(gen.parameters(), lr=opt.g_learning_rate) d_optimizer = optim.RMSProp(dis.parameters(), lr=opt.d_learning_rate) else: print("GAN OPTIMIZER IS NOT IMPLEMENTED") raise NotImplementedError return g_optimizer, d_optimizer
def __init__(self, observation_shape, num_actions, device='cuda:0', gamma=0.99, learning_rate=0.001, weight_decay=0.0, update_tar_interval=1000, clip_gradient=True, optim_name='Adam'): self.num_actions = num_actions self.gamma = gamma self.device = device self.clip_gradient = clip_gradient self.optim_name = optim_name self.weight_decay = weight_decay self.update_tar_interval = update_tar_interval self.model = VoxelDQN(observation_shape, num_actions).to(device) self.target_model = VoxelDQN(observation_shape, num_actions).to(device) self.target_model.load_state_dict(self.model.state_dict()) if optim_name == "SGD": self.optimizer = optim.SGD(self.model.parameters(), lr=learning_rate, weight_decay=weight_decay) elif optim_name == "RMSProp": self.optimizer = optim.RMSProp(self.model.parameters(), lr=learning_rate, weight_decay=weight_decay) elif optim_name == "Adam": self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
def __init__(self, parametersList): lstmParams = [parametersList[0]] rest = [paramtersList[1:]] self.optimizers = [ optim.SGD(rest, lr=LR, weight_decay=5e-4, momentum=MOMENTUM), optim.RMSProp(lstmParams, lr=LR, weight_decay=5e-4) ] self.param_groups = itertools.chain(self.optimizers[0].param_groups, self.optimizers[1].param_groups)
def load_optim(optimizer, model, custom_optim=None, epochs=None): run_optim = "" optim_name = optimizer['name'] lr = optimizer['lr'] if 'lr_min' in optimizer.keys(): lr_min = optimizer['lr_min'] else: lr_min = lr*0.01 # Hardcoded for now but if we dont specify lets go from lr to 1/100th of lr if optim_name in supported_optims: if optim_name == "Adam": run_optim = optim.Adam(model.parameters(), lr=lr) elif optim_name == "SGD": run_optim = optim.SGD(model.parameters(), lr=lr) elif optim_name == "Adagrad": run_optim = optim.Adagrad(model.parameters(), lr=lr) elif optim_name == "RMSProp": run_optim = optim.RMSProp(model.parameters(), lr=lr) else: print("Unknown optim defaulting to...Adam") run_optim = optim.Adam(model.parameters(), lr=lr) else: run_optim = custom_optim # Now if we specified a scheduler use it if 'scheduler' in optimizer.keys() and 'scheduler_type' in optimizer.keys(): use_scheduler = optimizer['scheduler'] scheduler_type = optimizer['scheduler_type'] if use_scheduler: if scheduler_type == "linear": step_size = (lr - lr_min) / epochs lr_lambda = lambda epoch: epoch - step_size lr_optim = CustomLRScheduler(run_optim, lr_lambda) elif scheduler_type == "multiplicative": if 'multiplier' in optimizer.keys(): multiplier = optimizer['multiplier'] else: multiplier = 0.95 # Default to this # torch.optim.lr_scheduler.MultiplicativeLR doesn't seem to be in torch 1.5.0 so make the same function lr_lambda = lambda epoch: epoch * multiplier lr_optim = CustomLRScheduler(run_optim, lr_lambda) else: if 'multiplier' in optimizer.keys(): multiplier = optimizer['multiplier'] else: multiplier = 0.95 # Default to this # torch.optim.lr_scheduler.MultiplicativeLR doesn't seem to be in torch 1.5.0 so make the same function lr_lambda = lambda epoch: epoch * multiplier lr_optim = CustomLRScheduler(run_optim, lr_lambda) return lr_optim else: print("Use scheduler is false") return run_optim
def __init__(self, params, args): super().__init__() optimizer_type = args.optimizer lr = args.learning_rate momentum = args.momentum weight_decay = args.weight_decay # eps = args.eps if optimizer_type == "RMSProp": self.m_optimizer = optim.RMSProp(params, lr=lr, momentum=momentum) elif optimizer_type == "SGD": self.m_optimizer = optim.SGD(params, lr=lr, weight_decay=weight_decay) elif optimizer_type == "Adam": self.m_optimizer = optim.Adam(params, lr=lr, weight_decay=weight_decay) elif optimizer_type == "AdamW": self.m_optimizer = optim.AdamW(params, lr=lr, weight_decay=weight_decay) else: raise NotImplementedError
def __init__(self, params, args): optimizer_type = args.optimizer_type lr = args.learning_rate momentum = args.momentum weight_decay = args.weight_decay eps = args.eps if optimizer_type == "RMSProp": self.m_optimizer = optim.RMSProp(params, lr=lr, eps=eps, weight_decay=weight_decay, momentum=momentum) elif optimizer_type == "Adam": self.m_optimizer = optim.Adam(params, lr=lr) else: raise NotImplementedError
model.to(device) # Freeze the embeddings for n_freeze epochs if args.n_freeze > 0: if args.model in ['Context_CP', 'Context_CP_v2']: model.lhs.weight.requires_grad = False model.rel.weight.requires_grad = False model.rh.weight.requires_grad = False elif args.model in ['ContExt']: for i in range(2): model.embeddings[i].weight.requires_grad = False optim_method = { 'Adagrad': lambda: optim.Adagrad(model.parameters(), lr=args.learning_rate), 'RMSprop': lambda: optim.RMSProp(model.parameters(), lr=args.learning_rate), 'SGD': lambda: optim.SGD(model.parameters(), lr=args.learning_rate) }[args.optimizer]() # print('Model state:') # for param_tensor in model.state_dict(): # print(f'\t{param_tensor}\t{model.state_dict()[param_tensor].size()}') optimizer = KBCOptimizer(model, regularizer, optim_method, args.batch_size, n_freeze=args.n_freeze) def avg_both(mrrs: Dict[str, float], hits: Dict[str, torch.FloatTensor]): """ aggregate metrics for missing lhs and rhs :param mrrs: d :param hits: :return:
def optimizer_factory(config, params): """ A convenience function that initializes some of the common optimizers supported by PyTorch. Supports: - adadelta - adagrad - adam - adamax - rmsprop - sgd For more information on these optimizers, see the PyTorch documentation. Args: config: dict Contains the parameters needed to initialize the optimizer, such as the learning rate, weight decay, etc. params: iterable An iterable of parameters to optimize or dicts defining parameter groups. Returns: optim: optim.Optimizer An optimizer object """ if config["type"] == "adadelta": return optim.Adadelta(params, lr=config.get("lr", 1.0), rho=config.get("rho", 0.9), eps=config.get("eps", 1e-6), weight_decay=config.get("weight_decay", 0)) elif config["type"] == "adagrad": return optim.Adagrad(params, lr=config.get("lr", 0.01), lr_decay=config.get("lr_decay", 0), weight_decay=config.get("weight_decay", 0), initial_accumulator_value=config.get( "initial_accumulator_value", 0)) elif config["type"] == "adam": return optim.Adam(params, lr=config.get("lr", 0.001), betas=config.get("betas", (0.9, 0.999)), eps=config.get("eps", 1e-8), weight_decay=config.get("weight_decay", 0), amsgrad=config.get("amsgrad", False)) elif config["type"] == "adamax": return optim.Adamax(params, lr=config.get("lr", 0.002), betas=config.get("betas", (0.9, 0.999)), eps=config.get("eps", 1e-8), weight_decay=config.get("weight_decay", 0)) elif config["type"] == "rmsprop": return optim.RMSProp(params, lr=config.get("lr", 0.01), alpha=config.get("alpha", 0.99), eps=config.get("eps", 1e-8), weight_decay=config.get("weight_decay", 0), momentum=config.get("momentum", 0), centered=config.get("centered", False)) elif config["type"] == "sgd": return optim.SGD(params, lr=config.get("lr", 0.001), momentum=config.get("momentum", 0), dampening=config.get("dampening", 0), weight_decay=config.get("weight_decay", 0), nesterov=config.get("nesterov", False)) else: raise ValueError("Unrecognized optimizer type.")
def __init__(self, ): super(SAE, self).__init__() self.fc1 = nn.Linear(nb_movies, 20) self.fc2 = nn.Linear(20, 10) self.fc3 = nn.Linear(10, 20) self.fc4 = nn.Linear(20, nb_movies) self.activation = nn.Sigmoid() def forward(self, x): x = self.activation(self.fc1(x)) x = self.activation(self.fc2(x)) x = self.activation(self.fc3(x)) x = self.fc4(x) return x sae = SAE() criterion = nn.MSELoss() optimizer = optim.RMSProp(sae.parameters(), lr = 0.01, weight_decay = 0.5) # Training the SAE nb_epoch = 200 for epoch in range(1, nb_epoch + 1): train_loss = 0 s = 0. for id_user in range(nb_users): input = Variable(training_set[id_user]).unsqueeze(0) target = input.clone() if torch.sum(target.data > 0) > 0: output = sae(input) target.require_grad = False output[target == 0] = 0 loss = criterion(output, target) mean_corrector = nb_movies/float(torch.sum(target.data > 0 + 1e-10))
def train(): if args.dataset == 'COCO': pass # if args.dataset_root == VOC_ROOT: # if not os.path.exists(COCO_ROOT): # parser.error('Must specify dataset_root if specifying dataset') # print("WARNING: Using default COCO dataset_root because " + # "--dataset_root was not specified.") # args.dataset_root = COCO_ROOT # cfg = coco # dataset = COCODetection(root=args.dataset_root, # transform=SSDAugmentation(cfg['min_dim'], # MEANS)) elif args.dataset == 'VOC': # if args.dataset_root == COCO_ROOT: # parser.error('Must specify dataset if specifying dataset_root') cfg = voc dataset = VOCDetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) elif args.dataset == 'STANFORD': # if args.dataset_root == COCO_ROOT: # parser.error('Must specify dataset if specifying dataset_root') cfg = stanford dataset = StanfordDetection(root=args.dataset_root, transform=SSDAugmentation( cfg['min_dim'], MEANS)) ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes']) net = ssd_net if args.cuda: net = torch.nn.DataParallel(ssd_net) cudnn.benchmark = True if args.resume: print('Resuming training, loading {}...'.format(args.resume)) ssd_net.load_weights(args.resume) else: vgg_weights = torch.load(args.save_folder + args.basenet) print('Loading base network...') ssd_net.vgg.load_state_dict(vgg_weights) if args.cuda: net = net.cuda() if not args.resume: print('Initializing weights...') # initialize newly added layers' weights with xavier method ssd_net.extras.apply(weights_init) ssd_net.loc.apply(weights_init) ssd_net.conf.apply(weights_init) if args.optimizer == 'Adadelta': optimizer = optim.Adadelta(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optimizer == 'RMSProp': optimizer = optim.RMSProp(net.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer == 'Adam': optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.weight_decay) else: optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5, False, args.cuda) net.train() # loss counters loc_loss = 0 conf_loss = 0 epoch = 0 print('Loading the dataset...') epoch_size = len(dataset) // args.batch_size print('Training SSD on:', dataset.name) print('Using the specified args:') print(args) step_index = 0 if args.visdom: vis_title = 'SSD.PyTorch on ' + dataset.name vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss'] iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend) epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend) data_loader = data.DataLoader(dataset, args.batch_size, num_workers=args.num_workers, shuffle=True, collate_fn=detection_collate, pin_memory=True) # create batch iterator batch_iterator = iter(data_loader) for iteration in range(args.start_iter, cfg['max_iter']): if iteration != 0 and (iteration % epoch_size == 0): print('Saving state, epoch:', iteration / epoch_size) torch.save( ssd_net.state_dict(), 'weights/ssd300_STANFORD_epoch_' + repr(iteration / epoch_size) + '.pth') if args.visdom: update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None, 'append', epoch_size) # reset epoch loss counters loc_loss = 0 conf_loss = 0 epoch += 1 if iteration in cfg['lr_steps']: step_index += 1 adjust_learning_rate(optimizer, args.gamma, step_index) try: # load train data images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(data_loader) images, targets = next(batch_iterator) if args.cuda: images = Variable(images.cuda()) targets = [Variable(ann.cuda(), volatile=True) for ann in targets] else: images = Variable(images) targets = [Variable(ann, volatile=True) for ann in targets] # forward t0 = time.time() out = net(images) # backprop optimizer.zero_grad() loss_l, loss_c = criterion(out, targets) loss = loss_l + loss_c loss.backward() optimizer.step() t1 = time.time() loc_loss += loss_l.item() conf_loss += loss_c.item() if iteration % 10 == 0: print('timer: %.4f sec.' % (t1 - t0)) print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ') if args.visdom: update_vis_plot(iteration, loss_l.data[0], loss_c.data[0], iter_plot, epoch_plot, 'append') # if iteration != 0 and iteration % 5000 == 0: # print('Saving state, iter:', iteration) # torch.save(ssd_net.state_dict(), 'weights/ssd300_COCO_' + # repr(iteration) + '.pth') torch.save(ssd_net.state_dict(), args.save_folder + '' + args.dataset + '.pth')