def get_optimizer(model,optimizer_type,learning_rate,momentum1=False): if optimizer_type =='sgd': return optim.sgd(model.parameters(),lr=learning_rate,momentum=momentum1) if optimizer_type == 'RMSprop': return optim.RMSprop(model.parameters(),lr=learning_rate,momentum= momentum1) if optimizer_type == 'Adam': return optim.Adam(model.parameters(),lr=learning_rate, weight_decay=0.0) if optimizer_type == 'lbfgs': return optim.LBFGS(model.parameters(), lr = learning_rate)
def get_optimizer(params, lr, config, name='adam'): name = name.lower() if name == 'sgd': optimizer = optim.sgd(params, lr=lr, **config[name]) elif name == 'adam': optimizer = optim.Adam(params, lr=lr, **config[name]) elif name == 'rmsprop': optimizer = optim.RMSprop(params, lr=lr, **config[name]) else: raise RuntimeError("%s is not available." % name) return optimizer
def __init__(self, opt, state_dict=None, num_train_step=-1): self.config = opt self.updates = state_dict['updates'] if state_dict and 'updates' in state_dict else 0 self.local_updates = 0 self.train_loss = AverageMeter() self.network = SANBertNetwork(opt) if state_dict: self.network.load_state_dict(state_dict['state'], strict=False) self.mnetwork = nn.DataParallel(self.network) if opt['multi_gpu_on'] else self.network self.total_param = sum([p.nelement() for p in self.network.parameters() if p.requires_grad]) if opt['cuda']: self.network.cuda() no_decay = ['bias', 'gamma', 'beta', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_parameters = [ {'params': [p for n, p in self.network.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in self.network.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] # note that adamax are modified based on the BERT code if opt['optimizer'] == 'sgd': self.optimizer = optim.sgd(optimizer_parameters, opt['learning_rate'], weight_decay=opt['weight_decay']) elif opt['optimizer'] == 'adamax': self.optimizer = Adamax(optimizer_parameters, opt['learning_rate'], warmup=opt['warmup'], t_total=num_train_step, max_grad_norm=opt['grad_clipping'], schedule=opt['warmup_schedule']) if opt.get('have_lr_scheduler', False): opt['have_lr_scheduler'] = False elif opt['optimizer'] == 'adadelta': self.optimizer = optim.Adadelta(optimizer_parameters, opt['learning_rate'], rho=0.95) elif opt['optimizer'] == 'adam': self.optimizer = Adam(optimizer_parameters, lr=opt['learning_rate'], warmup=opt['warmup'], t_total=num_train_step, max_grad_norm=opt['grad_clipping'], schedule=opt['warmup_schedule']) if opt.get('have_lr_scheduler', False): opt['have_lr_scheduler'] = False else: raise RuntimeError('Unsupported optimizer: %s' % opt['optimizer']) if state_dict and 'optimizer' in state_dict: self.optimizer.load_state_dict(state_dict['optimizer']) if opt['fp16']: try: from apex import amp global amp except ImportError: raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.") model, optimizer = amp.initialize(self.network, self.optimizer, opt_level=opt['fp16_opt_level']) self.network = model self.optimizer = optimizer if opt.get('have_lr_scheduler', False): if opt.get('scheduler_type', 'rop') == 'rop': self.scheduler = ReduceLROnPlateau(self.optimizer, mode='max', factor=opt['lr_gamma'], patience=3) elif opt.get('scheduler_type', 'rop') == 'exp': self.scheduler = ExponentialLR(self.optimizer, gamma=opt.get('lr_gamma', 0.95)) else: milestones = [int(step) for step in opt.get('multi_step_lr', '10,20,30').split(',')] self.scheduler = MultiStepLR(self.optimizer, milestones=milestones, gamma=opt.get('lr_gamma')) else: self.scheduler = None self.ema = None if opt['ema_opt'] > 0: self.ema = EMA(self.config['ema_gamma'], self.network) if opt['cuda']: self.ema.cuda() self.para_swapped = False # zero optimizer grad self.optimizer.zero_grad()
def __init__(self, opt, state_dict=None, num_train_step=-1): self.config = opt self.updates = state_dict['updates'] if state_dict and 'updates' in state_dict else 0 self.train_loss = AverageMeter() self.network = SANBertNetwork(opt) if state_dict: new_state = set(self.network.state_dict().keys()) for k in list(state_dict['state'].keys()): if k not in new_state: del state_dict['state'][k] for k, v in list(self.network.state_dict().items()): if k not in state_dict['state']: state_dict['state'][k] = v self.network.load_state_dict(state_dict['state']) self.mnetwork = nn.DataParallel(self.network) if opt['multi_gpu_on'] else self.network self.total_param = sum([p.nelement() for p in self.network.parameters() if p.requires_grad]) no_decay = ['bias', 'gamma', 'beta', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_parameters = [ {'params': [p for n, p in self.network.named_parameters() if n not in no_decay], 'weight_decay_rate': 0.01}, {'params': [p for n, p in self.network.named_parameters() if n in no_decay], 'weight_decay_rate': 0.0} ] # note that adamax are modified based on the BERT code if opt['optimizer'] == 'sgd': self.optimizer = optim.sgd(optimizer_parameters, opt['learning_rate'], weight_decay=opt['weight_decay']) elif opt['optimizer'] == 'adamax': self.optimizer = Adamax(optimizer_parameters, opt['learning_rate'], warmup=opt['warmup'], t_total=num_train_step, max_grad_norm=opt['grad_clipping'], schedule=opt['warmup_schedule']) if opt.get('have_lr_scheduler', False): opt['have_lr_scheduler'] = False elif opt['optimizer'] == 'adadelta': self.optimizer = optim.Adadelta(optimizer_parameters, opt['learning_rate'], rho=0.95) elif opt['optimizer'] == 'adam': self.optimizer = Adam(optimizer_parameters, lr=opt['learning_rate'], warmup=opt['warmup'], t_total=num_train_step, max_grad_norm=opt['grad_clipping'], schedule=opt['warmup_schedule']) if opt.get('have_lr_scheduler', False): opt['have_lr_scheduler'] = False else: raise RuntimeError('Unsupported optimizer: %s' % opt['optimizer']) if state_dict and 'optimizer' in state_dict: self.optimizer.load_state_dict(state_dict['optimizer']) if opt.get('have_lr_scheduler', False): if opt.get('scheduler_type', 'rop') == 'rop': self.scheduler = ReduceLROnPlateau(self.optimizer, mode='max', factor=opt['lr_gamma'], patience=3) elif opt.get('scheduler_type', 'rop') == 'exp': self.scheduler = ExponentialLR(self.optimizer, gamma=opt.get('lr_gamma', 0.95)) else: milestones = [int(step) for step in opt.get('multi_step_lr', '10,20,30').split(',')] self.scheduler = MultiStepLR(self.optimizer, milestones=milestones, gamma=opt.get('lr_gamma')) else: self.scheduler = None self.ema = None if opt['ema_opt'] > 0: self.ema = EMA(self.config['ema_gamma'], self.network) self.para_swapped = False
self.model.training() for n, sample in dataloader.run(): dataTime = dataTimer.time().real self.copyInputs(sample) output = self.model:forward(self.input):float() batchSize = output:size(1) loss = self.criterion:forward(self.model.output, self.target) self.model:zeroGradParameters() self.criterion:backward(self.model.output, self.target) self.model:backward(self.input, self.criterion.gradInput) optim.sgd(feval, self.params, self.optimState) local top1, top5 = self:computeScore(output, sample.target, 1) top1Sum = top1Sum + top1*batchSize top5Sum = top5Sum + top5*batchSize lossSum = lossSum + loss*batchSize N = N + batchSize print((' | Epoch: [%d][%d/%d] Time %.3f Data %.3f Err %1.4f top1 %7.3f top5 %7.3f'):format( epoch, n, trainSize, timer:time().real, dataTime, loss, top1, top5)) assert(self.params:storage() == self.model:parameters()[1]:storage()) timer.reset() dataTimer.reset()
def __init__(self, observation_space, action_space, reduced_action_dim=3, parameter_action_dim=4, actor_class=QActor, actor_kwargs={}, actor_param_class=ParamActor, actor_param_kwargs={}, epsilon_initial=1.0, epsilon_final=0.05, epsilon_steps=10000, batch_size=64, gamma=0.99, tau_actor=0.01, # Polyak averaging factor for copying target weights tau_actor_param=0.001, replay_memory_size=1000000, learning_rate_actor=0.0001, learning_rate_actor_param=0.00001, initial_memory_threshold=0, use_ornstein_noise=False, # if false, uses epsilon-greedy with uniform-random action-parameter exploration loss_func=F.mse_loss, # F.mse_loss clip_grad=10, inverting_gradients=False, zero_index_gradients=False, indexed=False, weighted=False, average=False, random_weighted=False, device="cuda" if torch.cuda.is_available() else "cpu", initial_phase=True, embed_lr=1e-4, initial_phase_epochs=1000, seed=None): super(PDQNAgent, self).__init__(observation_space, action_space) self.device = torch.device(device) self.num_actions = self.action_space.spaces[0].n self.action_parameter_sizes = np.array([self.action_space.spaces[i].shape[0] for i in range(1,self.num_actions+1)]) self.action_parameter_size = int(self.action_parameter_sizes.sum()) self.action_max = torch.from_numpy(np.ones((self.num_actions,))).float().to(device) self.action_min = -self.action_max.detach() self.action_range = (self.action_max-self.action_min).detach() print([self.action_space.spaces[i].high for i in range(1,self.num_actions+1)]) self.action_parameter_max_numpy = np.concatenate([self.action_space.spaces[i].high for i in range(1,self.num_actions+1)]).ravel() self.action_parameter_min_numpy = np.concatenate([self.action_space.spaces[i].low for i in range(1,self.num_actions+1)]).ravel() self.action_parameter_range_numpy = (self.action_parameter_max_numpy - self.action_parameter_min_numpy) self.action_parameter_max = torch.from_numpy(self.action_parameter_max_numpy).float().to(device) self.action_parameter_min = torch.from_numpy(self.action_parameter_min_numpy).float().to(device) self.action_parameter_range = torch.from_numpy(self.action_parameter_range_numpy).float().to(device) self.epsilon = epsilon_initial self.epsilon_initial = epsilon_initial self.epsilon_final = epsilon_final self.epsilon_steps = epsilon_steps self.indexed = indexed self.weighted = weighted self.average = average self.random_weighted = random_weighted assert (weighted ^ average ^ random_weighted) or not (weighted or average or random_weighted) self.action_parameter_offsets = self.action_parameter_sizes.cumsum() self.action_parameter_offsets = np.insert(self.action_parameter_offsets, 0, 0) self.batch_size = batch_size self.gamma = gamma self.replay_memory_size = replay_memory_size self.initial_memory_threshold = initial_memory_threshold self.learning_rate_actor = learning_rate_actor self.learning_rate_actor_param = learning_rate_actor_param self.inverting_gradients = inverting_gradients self.tau_actor = tau_actor self.tau_actor_param = tau_actor_param self._step = 0 self._episode = 0 self.updates = 0 self.clip_grad = clip_grad self.zero_index_gradients = zero_index_gradients self.np_random = None self.seed = seed self._seed(seed) #embedding初始部分 self.action_rep = ActionRepresentation.Action_representation(state_dim=self.observation_space.shape[0], action_dim=self.num_actions, reduced_action_dim=self.num_actions, parameter_action_dim=self.action_parameter_size) self.initial_phase=initial_phase self.reduced_action_dim=reduced_action_dim self.parameter_action_dim=parameter_action_dim self.embed_lr=embed_lr self.initial_phase_epochs=initial_phase_epochs self.use_ornstein_noise = use_ornstein_noise self.noise = OrnsteinUhlenbeckActionNoise(self.action_parameter_size, random_machine=self.np_random, mu=0., theta=0.15, sigma=0.0001) #, theta=0.01, sigma=0.01) print(self.num_actions+self.action_parameter_size) self.replay_memory = Memory(replay_memory_size, observation_space.shape, (1+3+self.action_parameter_size+self.reduced_action_dim+self.parameter_action_dim,), next_actions=False) #原始不是3是1 self.actor = actor_class(self.observation_space.shape[0], self.num_actions, self.action_parameter_size, **actor_kwargs).to(device) self.actor_target = actor_class(self.observation_space.shape[0], self.num_actions, self.action_parameter_size, **actor_kwargs).to(device) hard_update_target_network(self.actor, self.actor_target) self.actor_target.eval() self.actor_param = actor_param_class(self.observation_space.shape[0], self.num_actions, self.action_parameter_size, **actor_param_kwargs).to(device) self.actor_param_target = actor_param_class(self.observation_space.shape[0], self.num_actions, self.action_parameter_size, **actor_param_kwargs).to(device) hard_update_target_network(self.actor_param, self.actor_param_target) self.actor_param_target.eval() self.loss_func = loss_func # l1_smooth_loss performs better but original paper used MSE # Original DDPG paper [Lillicrap et al. 2016] used a weight decay of 0.01 for Q (critic) # but setting weight_decay=0.01 on the critic_optimiser seems to perform worse... # using AMSgrad ("fixed" version of Adam, amsgrad=True) doesn't seem to help either... self.actor_optimiser = optim.Adam(self.actor.parameters(), lr=self.learning_rate_actor) #, betas=(0.95, 0.999)) self.actor_param_optimiser = optim.Adam(self.actor_param.parameters(), lr=self.learning_rate_actor_param) #, betas=(0.95, 0.999)) #, weight_decay=critic_l2_reg) self.action_rep_optimiser=optim.sgd(self.action_rep.parameters(),lr=self.embed_lr) self.cost_his = []