def main(args): config = load_config(args) global_eval_config = config["eval_params"] models, model_names = config_modelloader(config, load_pretrain = True) robust_errs = [] errs = [] for model, model_id, model_config in zip(models, model_names, config["models"]): # make a copy of global training config, and update per-model config eval_config = copy.deepcopy(global_eval_config) if "eval_params" in model_config: eval_config.update(model_config["eval_params"]) model = BoundSequential.convert(model, eval_config["method_params"]["bound_opts"]) model = model.cuda() # read training parameters from config file method = eval_config["method"] verbose = eval_config["verbose"] eps = eval_config["epsilon"] # parameters specific to a training method method_param = eval_config["method_params"] norm = float(eval_config["norm"]) train_data, test_data = config_dataloader(config, **eval_config["loader_params"]) model_name = get_path(config, model_id, "model", load = False) print(model_name) model_log = get_path(config, model_id, "eval_log") logger = Logger(open(model_log, "w")) logger.log("evaluation configurations:", eval_config) logger.log("Evaluating...") with torch.no_grad(): # evaluate robust_err, err = Train(model, 0, test_data, EpsilonScheduler("linear", 0, 0, eps, eps, 1), eps, norm, logger, verbose, False, None, method, **method_param) robust_errs.append(robust_err) errs.append(err) print('model robust errors (for robustly trained models, not valid for naturally trained models):') print(robust_errs) robust_errs = np.array(robust_errs) print('min: {:.4f}, max: {:.4f}, median: {:.4f}, mean: {:.4f}'.format(np.min(robust_errs), np.max(robust_errs), np.median(robust_errs), np.mean(robust_errs))) print('clean errors for models with min, max and median robust errors') i_min = np.argmin(robust_errs) i_max = np.argmax(robust_errs) i_median = np.argsort(robust_errs)[len(robust_errs) // 2] print('for min: {:.4f}, for max: {:.4f}, for median: {:.4f}'.format(errs[i_min], errs[i_max], errs[i_median])) print('model clean errors:') print(errs) print('min: {:.4f}, max: {:.4f}, median: {:.4f}, mean: {:.4f}'.format(np.min(errs), np.max(errs), np.median(errs), np.mean(errs)))
def main(args): config = load_config(args) prefix = config['env_id'] training_config = config['training_config'] if config['name_suffix']: prefix += config['name_suffix'] if config['path_prefix']: prefix = os.path.join(config['path_prefix'], prefix) if not os.path.exists(prefix): os.makedirs(prefix) train_log = os.path.join(prefix, 'train.log') logger = Logger(open(train_log, "w")) logger.log('Command line:', " ".join(sys.argv[:])) logger.log(args) logger.log(config) env_params = training_config['env_params'] env_id = config['env_id'] if "NoFrameskip" not in env_id: env = make_atari_cart(env_id) else: env = make_atari(env_id) env = wrap_deepmind(env, **env_params) env = wrap_pytorch(env) seed = training_config['seed'] env.seed(seed) np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) state = env.reset() dtype = state.dtype logger.log("env_shape: {}, num of actions: {}".format( env.observation_space.shape, env.action_space.n)) if "NoFrameskip" in env_id: logger.log('action meaning:', env.unwrapped.get_action_meanings()[:env.action_space.n]) robust = training_config.get('robust', False) adv_train = training_config.get('adv_train', False) bound_solver = training_config.get('bound_solver', 'cov') attack_config = {} if adv_train or bound_solver == 'pgd': test_config = config['test_config'] attack_config = training_config["attack_config"] adv_ratio = training_config.get('adv_ratio', 1) if adv_train: logger.log('using adversarial examples for training, adv ratio:', adv_ratio) else: logger.log('using pgd regularization training') if robust or adv_train: schedule_start = training_config['schedule_start'] schedule_length = training_config['schedule_length'] starting_epsilon = training_config['start_epsilon'] end_epsilon = training_config['epsilon'] epsilon_scheduler = EpsilonScheduler( training_config.get("schedule_type", "linear"), schedule_start, schedule_start + schedule_length - 1, starting_epsilon, end_epsilon, 1) max_eps = end_epsilon model_width = training_config['model_width'] robust_model = robust and bound_solver != 'pgd' dueling = training_config.get('dueling', True) current_model = model_setup(env_id, env, robust_model, logger, USE_CUDA, dueling, model_width) target_model = model_setup(env_id, env, robust_model, logger, USE_CUDA, dueling, model_width) load_path = training_config["load_model_path"] if load_path != "" and os.path.exists(load_path): load_frame = int(re.findall('^.*frame_([0-9]+).pth$', load_path)[0]) logger.log('\ntrain from model {}, current frame index is {}\n'.format( load_path, load_frame)) current_model.features.load_state_dict(torch.load(load_path)) target_model.features.load_state_dict(torch.load(load_path)) else: logger.log('\ntrain from scratch') load_frame = 1 lr = training_config['lr'] grad_clip = training_config['grad_clip'] natural_loss_fn = training_config['natural_loss_fn'] optimizer = optim.Adam(current_model.parameters(), lr=lr, eps=training_config['adam_eps']) # Do not evaluate gradient for target model. for param in target_model.features.parameters(): param.requires_grad = False buffer_config = training_config['buffer_params'] replay_initial = buffer_config['replay_initial'] buffer_capacity = buffer_config['buffer_capacity'] use_cpp_buffer = training_config["cpprb"] use_async_rb = training_config['use_async_rb'] num_frames = training_config['num_frames'] batch_size = training_config['batch_size'] gamma = training_config['gamma'] if use_cpp_buffer: logger.log('using cpp replay buffer') if use_async_rb: replay_buffer_ctor = AsyncReplayBuffer(initial_state=state, batch_size=batch_size) else: replay_buffer_ctor = cpprb.PrioritizedReplayBuffer else: logger.log('using python replay buffer') per = training_config['per'] if per: logger.log('using prioritized experience replay.') alpha = buffer_config['alpha'] buffer_beta_start = buffer_config['buffer_beta_start'] buffer_beta_frames = buffer_config.get('buffer_beta_frames', -1) if buffer_beta_frames < replay_initial: buffer_beta_frames = num_frames - replay_initial logger.log('beffer_beta_frames reset to ', buffer_beta_frames) buffer_beta_scheduler = BufferBetaScheduler(buffer_beta_start, buffer_beta_frames, start_frame=replay_initial) if use_cpp_buffer: replay_buffer = replay_buffer_ctor( size=buffer_capacity, # env_dict={"obs": {"shape": state.shape, "dtype": np.uint8}, env_dict={ "obs": { "shape": state.shape, "dtype": dtype }, "act": { "shape": 1, "dtype": np.uint8 }, "rew": {}, # "next_obs": {"shape": state.shape, "dtype": np.uint8}, "next_obs": { "shape": state.shape, "dtype": dtype }, "done": {} }, alpha=alpha, eps=0.0) # We add eps manually in training loop else: replay_buffer = PrioritizedReplayBuffer(buffer_capacity, alpha=alpha) else: logger.log('using regular replay.') if use_cpp_buffer: replay_buffer = cpprb.ReplayBuffer( buffer_capacity, # {"obs": {"shape": state.shape, "dtype": np.uint8}, { "obs": { "shape": state.shape, "dtype": dtype }, "act": { "shape": 1, "dtype": np.uint8 }, "rew": {}, # "next_obs": {"shape": state.shape, "dtype": np.uint8}, "next_obs": { "shape": state.shape, "dtype": dtype }, "done": {} }) else: replay_buffer = ReplayBuffer(buffer_capacity) update_target(current_model, target_model) act_epsilon_start = training_config['act_epsilon_start'] act_epsilon_final = training_config['act_epsilon_final'] act_epsilon_decay = training_config['act_epsilon_decay'] act_epsilon_method = training_config['act_epsilon_method'] if training_config.get('act_epsilon_decay_zero', True): decay_zero = num_frames else: decay_zero = None act_epsilon_scheduler = ActEpsilonScheduler(act_epsilon_start, act_epsilon_final, act_epsilon_decay, method=act_epsilon_method, start_frame=replay_initial, decay_zero=decay_zero) # Use optimized cuda memory management memory_mgr = CudaTensorManager(state.shape, batch_size, per, USE_CUDA, dtype=dtype) losses = [] td_losses = [] batch_cur_q = [] batch_exp_q = [] sa = None kappa = None hinge = False if robust: logger.log( 'using convex relaxation certified classification loss as a regularization!' ) kappa = training_config['kappa'] reg_losses = [] sa = np.zeros( (current_model.num_actions, current_model.num_actions - 1), dtype=np.int32) for i in range(sa.shape[0]): for j in range(sa.shape[1]): if j < i: sa[i][j] = j else: sa[i][j] = j + 1 sa = torch.LongTensor(sa) hinge = training_config.get('hinge', False) logger.log('using hinge loss (default is cross entropy): ', hinge) if training_config['use_async_env']: # Create an environment in a separate process, run asychronously async_env = AsyncEnv(env_id, result_path=prefix, draw=training_config['show_game'], record=training_config['record_game'], env_params=env_params, seed=seed) # initialize parameters in logging all_rewards = [] episode_reward = 0 act_epsilon = np.nan grad_norm = np.nan weights_norm = np.nan best_test_reward = -float('inf') buffer_stored_size = 0 if adv_train: attack_count = 0 suc_count = 0 if robust and bound_solver == 'pgd': ori_margin, adv_margin = np.nan, np.nan start_time = time.time() period_start_time = time.time() # Main Loop for frame_idx in range(load_frame, num_frames + 1): # Step 1: get current action frame_start = time.time() t = time.time() eps = 0 if adv_train or robust: eps = epsilon_scheduler.get_eps(frame_idx, 0) act_epsilon = act_epsilon_scheduler.get(frame_idx) if adv_train and eps != np.nan and eps >= np.finfo(np.float32).tiny: ori_state_tensor = torch.from_numpy( np.ascontiguousarray(state)).unsqueeze(0).cuda().to( torch.float32) if dtype in UINTS: ori_state_tensor /= 255 attack_config['params']['epsilon'] = eps if random.random() < adv_ratio: attack_count += 1 state_tensor = attack(current_model, ori_state_tensor, attack_config) if current_model.act(state_tensor)[0] != current_model.act( ori_state_tensor)[0]: suc_count += 1 else: state_tensor = ori_state_tensor action = current_model.act(state_tensor, act_epsilon)[0] else: with torch.no_grad(): state_tensor = torch.from_numpy( np.ascontiguousarray(state)).unsqueeze(0).cuda().to( torch.float32) if dtype in UINTS: state_tensor /= 255 ori_state_tensor = torch.clone(state_tensor) action = current_model.act(state_tensor, act_epsilon)[0] # torch.cuda.synchronize() log_time('act_time', time.time() - t) # Step 2: run environment t = time.time() if training_config['use_async_env']: async_env.async_step(action) else: next_state, reward, done, _ = env.step(action) log_time('env_time', time.time() - t) # Step 3: save to buffer # For asynchronous env, defer saving if not training_config['use_async_env']: t = time.time() if use_cpp_buffer: replay_buffer.add(obs=state, act=action, rew=reward, next_obs=next_state, done=done) else: replay_buffer.push(state, action, reward, next_state, done) log_time('save_time', time.time() - t) if use_cpp_buffer: buffer_stored_size = replay_buffer.get_stored_size() else: buffer_stored_size = len(replay_buffer) beta = np.nan buffer_beta = np.nan t = time.time() if buffer_stored_size > replay_initial: if training_config['per']: buffer_beta = buffer_beta_scheduler.get(frame_idx) if robust: convex_final_beta = training_config['convex_final_beta'] convex_start_beta = training_config['convex_start_beta'] beta = ( max_eps - eps * (1.0 - convex_final_beta)) / max_eps * convex_start_beta res = compute_td_loss(current_model, target_model, batch_size, replay_buffer, per, use_cpp_buffer, use_async_rb, optimizer, gamma, memory_mgr, robust, buffer_beta=buffer_beta, grad_clip=grad_clip, natural_loss_fn=natural_loss_fn, eps=eps, beta=beta, sa=sa, kappa=kappa, dtype=dtype, hinge=hinge, hinge_c=training_config.get('hinge_c', 1), env_id=env_id, bound_solver=bound_solver, attack_config=attack_config) loss, grad_norm, weights_norm, td_loss, batch_cur_q_value, batch_exp_q_value = res[ 0], res[1], res[2], res[3], res[4], res[5] if robust: reg_loss = res[-1] reg_losses.append(reg_loss.data.item()) if bound_solver == 'pgd': ori_margin, adv_margin = res[-3].data.item( ), res[-2].data.item() losses.append(loss.data.item()) td_losses.append(td_loss.data.item()) batch_cur_q.append(batch_cur_q_value.data.item()) batch_exp_q.append(batch_exp_q_value.data.item()) log_time('loss_time', time.time() - t) # Step 2: run environment (async) t = time.time() if training_config['use_async_env']: next_state, reward, done, _ = async_env.wait_step() log_time('env_time', time.time() - t) # Step 3: save to buffer (async) if training_config['use_async_env']: t = time.time() if use_cpp_buffer: replay_buffer.add(obs=state, act=action, rew=reward, next_obs=next_state, done=done) else: replay_buffer.push(state, action, reward, next_state, done) log_time('save_time', time.time() - t) # Update states and reward t = time.time() state = next_state episode_reward += reward if done: if training_config['use_async_env']: state = async_env.reset() else: state = env.reset() all_rewards.append(episode_reward) episode_reward = 0 log_time('env_time', time.time() - t) # All kinds of result logging if frame_idx % training_config[ 'print_frame'] == 0 or frame_idx == num_frames or ( robust and abs(frame_idx - schedule_start) < 5 ) or abs(buffer_stored_size - replay_initial) < 5: logger.log( '\nframe {}/{}, learning rate: {:.6g}, buffer beta: {:.6g}, action epsilon: {:.6g}' .format(frame_idx, num_frames, lr, buffer_beta, act_epsilon)) logger.log( 'total time: {:.2f}, epoch time: {:.4f}, speed: {:.2f} frames/sec, last total loss: {:.6g}, avg total loss: {:.6g}, grad norm: {:.6g}, weights_norm: {:.6g}, latest episode reward: {:.6g}, avg 10 episode reward: {:.6g}' .format( time.time() - start_time, time.time() - period_start_time, training_config['print_frame'] / (time.time() - period_start_time), losses[-1] if losses else np.nan, np.average(losses[:-training_config['print_frame'] - 1:-1]) if losses else np.nan, grad_norm, weights_norm, all_rewards[-1] if all_rewards else np.nan, np.average(all_rewards[:-11:-1]) if all_rewards else np.nan)) logger.log('last td loss: {:.6g}, avg td loss: {:.6g}'.format( td_losses[-1] if td_losses else np.nan, np.average(td_losses[:-training_config['print_frame'] - 1:-1]) if td_losses else np.nan)) logger.log( 'last batch cur q: {:.6g}, avg batch cur q: {:.6g}'.format( batch_cur_q[-1] if batch_cur_q else np.nan, np.average(batch_cur_q[:-training_config['print_frame'] - 1:-1]) if batch_cur_q else np.nan)) logger.log( 'last batch exp q: {:.6g}, avg batch exp q: {:.6g}'.format( batch_exp_q[-1] if batch_exp_q else np.nan, np.average(batch_exp_q[:-training_config['print_frame'] - 1:-1]) if batch_exp_q else np.nan)) if robust: logger.log('current input epsilon: {:.6g}'.format(eps)) if bound_solver == 'pgd': logger.log( 'last logit margin: ori: {:.6g}, adv: {:.6g}'.format( ori_margin, adv_margin)) else: logger.log('current bound beta: {:.6g}'.format(beta)) logger.log( 'last cert reg loss: {:.6g}, avg cert reg loss: {:.6g}'. format( reg_losses[-1] if reg_losses else np.nan, np.average( reg_losses[:-training_config['print_frame'] - 1:-1]) if reg_losses else np.nan)) logger.log('current kappa: {:.6g}'.format(kappa)) if adv_train: logger.log( 'current attack epsilon (same as input epsilon): {:.6g}'. format(eps)) diff = ori_state_tensor - state_tensor diff = np.abs(diff.data.cpu().numpy()) logger.log('current Linf distortion: {:.6g}'.format( np.max(diff))) logger.log( 'this batch attacked: {}, success: {}, attack success rate: {:.6g}' .format( attack_count, suc_count, suc_count * 1.0 / attack_count if attack_count > 0 else np.nan)) attack_count = 0 suc_count = 0 logger.log('attack stats reseted.') period_start_time = time.time() log_time.print() log_time.clear() if frame_idx % training_config[ 'save_frame'] == 0 or frame_idx == num_frames: plot(frame_idx, all_rewards, losses, prefix) torch.save(current_model.features.state_dict(), '{}/frame_{}.pth'.format(prefix, frame_idx)) if frame_idx % training_config['update_target_frame'] == 0: update_target(current_model, target_model) if frame_idx % training_config.get('mini_test', 100000) == 0 and ( (robust and beta == 0) or (not robust and frame_idx * 1.0 / num_frames >= 0.8)): test_reward = mini_test(current_model, config, logger, dtype) logger.log('this test avg reward: {:6g}'.format(test_reward)) if test_reward >= best_test_reward: best_test_reward = test_reward logger.log( 'new best reward {:6g} achieved, update checkpoint'.format( test_reward)) torch.save(current_model.features.state_dict(), '{}/best_frame_{}.pth'.format(prefix, frame_idx)) log_time.log_time('total', time.time() - frame_start)
def main(args): config = load_config(args) global_train_config = config["training_params"] models, model_names = config_modelloader(config) for model, model_id, model_config in zip(models, model_names, config["models"]): # make a copy of global training config, and update per-model config train_config = copy.deepcopy(global_train_config) if "training_params" in model_config: train_config = update_dict(train_config, model_config["training_params"]) model = BoundSequential.convert( model, train_config["method_params"]["bound_opts"]) # read training parameters from config file epochs = train_config["epochs"] lr = train_config["lr"] weight_decay = train_config["weight_decay"] starting_epsilon = train_config["starting_epsilon"] end_epsilon = train_config["epsilon"] schedule_length = train_config["schedule_length"] schedule_start = train_config["schedule_start"] optimizer = train_config["optimizer"] method = train_config["method"] verbose = train_config["verbose"] lr_decay_step = train_config["lr_decay_step"] lr_decay_milestones = train_config["lr_decay_milestones"] lr_decay_factor = train_config["lr_decay_factor"] multi_gpu = train_config["multi_gpu"] # parameters specific to a training method method_param = train_config["method_params"] norm = float(train_config["norm"]) train_data, test_data = config_dataloader( config, **train_config["loader_params"]) if optimizer == "adam": opt = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) elif optimizer == "sgd": opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9, nesterov=True, weight_decay=weight_decay) else: raise ValueError("Unknown optimizer") batch_multiplier = train_config["method_params"].get( "batch_multiplier", 1) batch_size = train_data.batch_size * batch_multiplier num_steps_per_epoch = int( np.ceil(1.0 * len(train_data.dataset) / batch_size)) epsilon_scheduler = EpsilonScheduler( train_config.get("schedule_type", "linear"), schedule_start * num_steps_per_epoch, ((schedule_start + schedule_length) - 1) * num_steps_per_epoch, starting_epsilon, end_epsilon, num_steps_per_epoch) max_eps = end_epsilon if lr_decay_step: # Use StepLR. Decay by lr_decay_factor every lr_decay_step. lr_scheduler = optim.lr_scheduler.StepLR(opt, step_size=lr_decay_step, gamma=lr_decay_factor) lr_decay_milestones = None elif lr_decay_milestones: # Decay learning rate by lr_decay_factor at a few milestones. lr_scheduler = optim.lr_scheduler.MultiStepLR( opt, milestones=lr_decay_milestones, gamma=lr_decay_factor) else: raise ValueError( "one of lr_decay_step and lr_decay_milestones must be not empty." ) model_name = get_path(config, model_id, "model", load=False) best_model_name = get_path(config, model_id, "best_model", load=False) model_log = get_path(config, model_id, "train_log") logger = Logger(open(model_log, "w")) logger.log(model_name) logger.log("Command line:", " ".join(sys.argv[:])) logger.log("training configurations:", train_config) logger.log("Model structure:") logger.log(str(model)) logger.log("data std:", train_data.std) best_err = np.inf recorded_clean_err = np.inf timer = 0.0 if multi_gpu: logger.log( "\nUsing multiple GPUs for computing CROWN-IBP bounds\n") model = BoundDataParallel(model) model = model.cuda() for t in range(epochs): epoch_start_eps = epsilon_scheduler.get_eps(t, 0) epoch_end_eps = epsilon_scheduler.get_eps(t + 1, 0) logger.log( "Epoch {}, learning rate {}, epsilon {:.6g} - {:.6g}".format( t, lr_scheduler.get_lr(), epoch_start_eps, epoch_end_eps)) # with torch.autograd.detect_anomaly(): start_time = time.time() Train(model, t, train_data, epsilon_scheduler, max_eps, norm, logger, verbose, True, opt, method, **method_param) if lr_decay_step: # Use stepLR. Note that we manually set up epoch number here, so the +1 offset. lr_scheduler.step( epoch=max(t - (schedule_start + schedule_length - 1) + 1, 0)) elif lr_decay_milestones: # Use MultiStepLR with milestones. lr_scheduler.step() epoch_time = time.time() - start_time timer += epoch_time logger.log('Epoch time: {:.4f}, Total time: {:.4f}'.format( epoch_time, timer)) logger.log("Evaluating...") with torch.no_grad(): # evaluate err, clean_err = Train( model, t, test_data, EpsilonScheduler("linear", 0, 0, epoch_end_eps, epoch_end_eps, 1), max_eps, norm, logger, verbose, False, None, method, **method_param) logger.log('saving to', model_name) torch.save( { 'state_dict': model.module.state_dict() if multi_gpu else model.state_dict(), 'epoch': t, }, model_name) # save the best model after we reached the schedule if t >= (schedule_start + schedule_length): if err <= best_err: best_err = err recorded_clean_err = clean_err logger.log('Saving best model {} with error {}'.format( best_model_name, best_err)) torch.save( { 'state_dict': model.module.state_dict() if multi_gpu else model.state_dict(), 'robust_err': err, 'clean_err': clean_err, 'epoch': t, }, best_model_name) logger.log('Total Time: {:.4f}'.format(timer)) logger.log('Model {} best err {}, clean err {}'.format( model_id, best_err, recorded_clean_err))