def run(args=None): device = 'cuda' if torch.cuda.is_available() and (not args.no_cuda) else 'cpu' num_train, train_loader, test_loader, input_size, input_channel, n_class = get_loaders(args) lossFn = nn.CrossEntropyLoss(reduction='none') def evalFn(x): return torch.max(x, dim=1)[1] ## initialize SpecNet dTNet = MyDeepTrunkNet.get_deepTrunk_net(args, device, lossFn, evalFn, input_size, input_channel, n_class) ## setup logging and checkpointing timestamp = int(time.time()) model_signature = '%s/%s/%d/%s_%.5f/%d' % (args.dataset, args.exp_name, args.exp_id, args.net, args.train_eps, timestamp) model_dir = args.root_dir + 'models_new/%s' % (model_signature) args.model_dir = model_dir print("Saving model to: %s" % model_dir) count_vars(args, dTNet) if not os.path.exists(model_dir): os.makedirs(model_dir) tb_writer = SummaryWriter(model_dir) stats = Statistics(len(train_loader), tb_writer, model_dir) args_file = os.path.join(model_dir, 'args.json') with open(args_file, 'w') as fou: json.dump(vars(args), fou, indent=4) write_config(args, os.path.join(model_dir, 'run_config.txt')) ## main part depending on training mode if 'train' in args.train_mode: epoch = train_deepTrunk(dTNet, args, device, stats, train_loader, test_loader) if args.cert: with torch.no_grad(): cert_deepTrunk_net(dTNet, args, device, test_loader if args.test_set == "test" else train_loader, stats, log_ind=True, break_on_failure=False, epoch=epoch) elif args.train_mode == 'test': with torch.no_grad(): test_deepTrunk_net(dTNet, args, device, test_loader if args.test_set == "test" else train_loader, stats, log_ind=True) elif args.train_mode == "cert": with torch.no_grad(): cert_deepTrunk_net(dTNet, args, device, test_loader if args.test_set == "test" else train_loader, stats, log_ind=True, break_on_failure=False) else: assert False, 'Unknown mode: {}!'.format(args.train_mode) exit(0)
def _build_network(self): obs_dim = self.obs_space.shape[0] act_dim = self.act_space.shape[0] self.state_ph, self.act_ph, self.next_state_ph, self.rew_ph, self.done_ph = placeholders( obs_dim, act_dim, obs_dim, None, None) # Main outputs with tf.variable_scope('main'): self.pi, self.q, q_pi = ddpg_mlp_actor_critic( self.state_ph, self.act_ph, action_space=self.act_space, hidden_sizes=self.hidden_sizes) # Target networks with tf.variable_scope('target'): _, _, q_pi_targ = ddpg_mlp_actor_critic( self.next_state_ph, self.act_ph, action_space=self.act_space, hidden_sizes=self.hidden_sizes ) # We only need q_pi_targ to compute bellman backup var_counts = tuple( count_vars(scope) for scope in ['main/pi', 'main/q', 'main']) print('\nNumber of parameters: \t pi: %d, \t q: %d, \t total: %d\n' % var_counts) # Bellman target backup backup = tf.stop_gradient(self.rew_ph + self.gamma * (1 - self.done_ph) * q_pi_targ) # Objectives self.q_loss = tf.reduce_mean((self.q - backup)**2) self.pi_loss = -tf.reduce_mean(q_pi) # Optimizers self.train_q_opt = tf.train.AdamOptimizer(learning_rate=1e-3).minimize( self.q_loss, var_list=get_vars('main/q')) self.train_pi_opt = tf.train.AdamOptimizer( learning_rate=1e-3).minimize(self.pi_loss, var_list=get_vars('main/pi')) # Polyak averaging for target variables self.target_update = tf.group([ tf.assign(v_target, self.polyak * v_target + (1 - self.polyak) * v_main) for v_target, v_main in zip(get_vars('target'), get_vars('main')) ]) # Init self.target_init = tf.group([ tf.assign(v_target, v_main) for v_target, v_main in zip(get_vars('target'), get_vars('main')) ])
# Main outputs from computation graph pi, logp, logp_pi, v = mlp_actor_critic(x_ph, a_ph) # Need all placeholders in *this* order later (to zip with data from buffer) all_phs = [x_ph, a_ph, adv_ph, ret_ph, logp_old_ph] # Every step, get: action, value, and logprob get_action_ops = [pi, v, logp_pi] # Experience buffer local_steps_per_epoch = int(args.steps / num_procs()) memory = ReplayMemory(obs_dim, act_dim, local_steps_per_epoch, args.gamma, args.lam) # Count variables var_counts = tuple(count_vars(scope) for scope in ['pi', 'v']) # Objective functions ratio = tf.exp(logp - logp_old_ph) # pi(a|s) / pi_old(a|s) min_adv = tf.where(adv_ph > 0, (1 + args.clip_ratio) * adv_ph, (1 - args.clip_ratio) * adv_ph) actor_loss = -tf.reduce_mean(tf.minimum(ratio * adv_ph, min_adv)) critic_loss = tf.reduce_mean((ret_ph - v)**2) # Info (useful to watch during learning) approx_kl = tf.reduce_mean( logp_old_ph - logp) # a sample estimate for KL-divergence, easy to compute approx_ent = tf.reduce_mean( -logp) # a sample estimate for entropy, also easy to compute clipped = tf.logical_or(ratio > (1 + args.clip_ratio), ratio < (1 - args.clip_ratio))
with tf.variable_scope('target'): with tf.variable_scope('pi'): opt_action_target = create_nn(s_ph, hidden_sizes + [action_dim], act_limit=max_act) with tf.variable_scope('q'): opt_action_value2_target = tf.squeeze(create_nn( tf.concat([s_ph, opt_action_target], axis=-1), hidden_sizes=hidden_sizes + [1]), axis=1) # Count variables to check if the number is right var_counts = tuple( count_vars(scope) for scope in ['main/pi', 'main/q', 'main']) print('\nNumber of parameters: \t pi: %d, \t q: %d, \t total: %d\n' % var_counts) # Create target value (y) # y = r + gamma*(1-d)*Qtar(s',atar'(s')) y = tf.stop_gradient(r_ph + gamma * (1 - d_ph) * opt_action_value2_target) # Create loss function for optimal deterministic policy (u(s)) loss_opt_action = -tf.reduce_mean(opt_action_value2) # Create loss for optimal action-value function (Q* loss function) # loss = E(Q*(a,s)-y) = E((Q*(a,v)-(r+gamma*Q(s',a(s')))^2) loss_opt_act_val = tf.reduce_mean((opt_action_value - y)**2) # Creating optimizers
def main(args): # create environment env = gym.make(args.env) env.seed(args.seed) obs_dim = env.observation_space.shape[0] if isinstance(env.action_space, Discrete): discrete = True act_dim = env.action_space.n else: discrete = False act_dim = env.action_space.shape[0] # actor critic ac = ActorCritic(obs_dim, act_dim, discrete).to(args.device) print('Number of parameters', count_vars(ac)) # Set up experience buffer steps_per_epoch = int(args.steps_per_epoch) buf = PGBuffer(obs_dim, act_dim, discrete, steps_per_epoch, args) logs = defaultdict(lambda: []) writer = SummaryWriter(args_to_str(args)) gif_frames = [] # Set up function for computing policy loss def compute_loss_pi(batch): obs, act, psi, logp_old = batch['obs'], batch['act'], batch['psi'], batch['logp'] pi, logp = ac.pi(obs, act) # Policy loss if args.loss_mode == 'vpg': # TODO (Task 2): implement vanilla policy gradient loss elif args.loss_mode == 'ppo': # TODO (Task 4): implement clipped PPO loss else: raise Exception('Invalid loss_mode option', args.loss_mode) # Useful extra info approx_kl = (logp_old - logp).mean().item() ent = pi.entropy().mean().item() pi_info = dict(kl=approx_kl, ent=ent) return loss_pi, pi_info # Set up function for computing value loss def compute_loss_v(batch): obs, ret = batch['obs'], batch['ret'] v = ac.v(obs) # TODO: (Task 2): compute value function loss return loss_v # Set up optimizers for policy and value function pi_optimizer = Adam(ac.pi.parameters(), lr=args.pi_lr) vf_optimizer = Adam(ac.v.parameters(), lr=args.v_lr) # Set up update function def update(): batch = buf.get() # Get loss and info values before update pi_l_old, pi_info_old = compute_loss_pi(batch) pi_l_old = pi_l_old.item() v_l_old = compute_loss_v(batch).item() # Policy learning for i in range(args.train_pi_iters): pi_optimizer.zero_grad() loss_pi, pi_info = compute_loss_pi(batch) loss_pi.backward() pi_optimizer.step() # Value function learning for i in range(args.train_v_iters): vf_optimizer.zero_grad() loss_v = compute_loss_v(batch) loss_v.backward() vf_optimizer.step() # Log changes from update kl, ent = pi_info['kl'], pi_info_old['ent'] logs['kl'] += [kl] logs['ent'] += [ent] logs['loss_v'] += [loss_v.item()] logs['loss_pi'] += [loss_pi.item()] # Prepare for interaction with environment start_time = time.time() o, ep_ret, ep_len = env.reset(), 0, 0 ep_count = 0 # just for logging purpose, number of episodes run # Main loop: collect experience in env and update/log each epoch for epoch in range(args.epochs): for t in range(steps_per_epoch): a, v, logp = ac.step(torch.as_tensor(o, dtype=torch.float32).to(args.device)) next_o, r, d, _ = env.step(a) ep_ret += r ep_len += 1 # save and log buf.store(o, a, r, v, logp) if ep_count % 100 == 0: frame = env.render(mode='rgb_array') # uncomment this line if you want to log to tensorboard (can be memory intensive) #gif_frames.append(frame) #gif_frames.append(PIL.Image.fromarray(frame).resize([64,64])) # you can try this downsize version if you are resource constrained time.sleep(0.01) # Update obs (critical!) o = next_o timeout = ep_len == args.max_ep_len terminal = d or timeout epoch_ended = t==steps_per_epoch-1 if terminal or epoch_ended: # if trajectory didn't reach terminal state, bootstrap value target if timeout or epoch_ended: _, v, _ = ac.step(torch.as_tensor(o, dtype=torch.float32).to(args.device)) else: v = 0 buf.finish_path(v) if terminal: # only save EpRet / EpLen if trajectory finished logs['ep_ret'] += [ep_ret] logs['ep_len'] += [ep_len] ep_count += 1 o, ep_ret, ep_len = env.reset(), 0, 0 # save a video to tensorboard so you can view later if len(gif_frames) != 0: vid = np.stack(gif_frames) vid_tensor = vid.transpose(0,3,1,2)[None] writer.add_video('rollout', vid_tensor, epoch, fps=50) gif_frames = [] writer.flush() print('wrote video') # Perform VPG update! update() if epoch % 10 == 0: vals = {key: np.mean(val) for key, val in logs.items()} for key in vals: writer.add_scalar(key, vals[key], epoch) writer.flush() print('Epoch', epoch, vals) logs = defaultdict(lambda: [])
def run(args=None): device = 'cuda' if torch.cuda.is_available() and ( not args.no_cuda) else 'cpu' num_train, train_loader, test_loader, input_size, input_channel, n_class = get_loaders( args) lossFn = nn.CrossEntropyLoss(reduction='none') evalFn = lambda x: torch.max(x, dim=1)[1] net = get_net(device, args.dataset, args.net, input_size, input_channel, n_class, load_model=args.load_model, net_dim=args.cert_net_dim ) #, feature_extract=args.core_feature_extract) timestamp = int(time.time()) model_signature = '%s/%s/%d/%s_%.5f/%d' % (args.dataset, args.exp_name, args.exp_id, args.net, args.train_eps, timestamp) model_dir = args.root_dir + 'models_new/%s' % (model_signature) args.model_dir = model_dir count_vars(args, net) if not os.path.exists(model_dir): os.makedirs(model_dir) if isinstance(net, UpscaleNet): relaxed_net = None relu_ids = None else: relaxed_net = RelaxedNetwork(net.blocks, args.n_rand_proj).to(device) relu_ids = relaxed_net.get_relu_ids() if "nat" in args.train_mode: cnet = CombinedNetwork(net, relaxed_net, lossFn=lossFn, evalFn=evalFn, device=device, no_r_net=True).to(device) else: dummy_input = torch.rand((1, ) + net.dims[0], device=device, dtype=torch.float32) cnet = CombinedNetwork(net, relaxed_net, lossFn=lossFn, evalFn=evalFn, device=device, dummy_input=dummy_input).to(device) n_epochs, test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc = args.n_epochs, None, None, None, None if 'train' in args.train_mode: tb_writer = SummaryWriter(model_dir) stats = Statistics(len(train_loader), tb_writer, model_dir) args_file = os.path.join(model_dir, 'args.json') with open(args_file, 'w') as fou: json.dump(vars(args), fou, indent=4) write_config(args, os.path.join(model_dir, 'run_config.txt')) eps = 0 epoch = 0 lr = args.lr n_epochs = args.n_epochs if "COLT" in args.train_mode: relu_stable = args.relu_stable # if args.layers is None: # args.layers = [-2, -1] + relu_ids layers = get_layers(args.train_mode, cnet, n_attack_layers=args.n_attack_layers, protected_layers=args.protected_layers) elif "adv" in args.train_mode: relu_stable = None layers = [-1, -1] args.mix = False elif "natural" in args.train_mode: relu_stable = None layers = [-2, -2] args.nat_factor = 1 args.mix = False elif "diffAI" in args.train_mode: relu_stable = None layers = [-2, -2] else: assert False, "Unknown train mode %s" % args.train_mode print('Saving model to:', model_dir) print('Training layers: ', layers) for j in range(len(layers) - 1): opt, lr_scheduler = get_opt(cnet.net, args.opt, lr, args.lr_step, args.lr_factor, args.n_epochs, train_loader, args.lr_sched, fixup="fixup" in args.net) curr_layer_idx = layers[j + 1] eps_old = eps eps = get_scaled_eps(args, layers, relu_ids, curr_layer_idx, j) kappa_sched = Scheduler(0.0 if args.mix else 1.0, 1.0, num_train * args.mix_epochs, 0) beta_sched = Scheduler( args.beta_start if args.mix else args.beta_end, args.beta_end, args.train_batch * len(train_loader) * args.mix_epochs, 0) eps_sched = Scheduler(eps_old if args.anneal else eps, eps, num_train * args.anneal_epochs, 0) layer_dir = '{}/{}'.format(model_dir, curr_layer_idx) if not os.path.exists(layer_dir): os.makedirs(layer_dir) print('\nnew train phase: eps={:.5f}, lr={:.2e}, curr_layer={}\n'. format(eps, lr, curr_layer_idx)) for curr_epoch in range(n_epochs): train(device, epoch, args, j + 1, layers, cnet, eps_sched, kappa_sched, opt, train_loader, lr_scheduler, relu_ids, stats, relu_stable, relu_stable_protected=args.relu_stable_protected, beta_sched=beta_sched) if isinstance(lr_scheduler, optim.lr_scheduler.StepLR ) and curr_epoch >= args.mix_epochs: lr_scheduler.step() if (epoch + 1) % args.test_freq == 0: with torch.no_grad(): test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc = test( device, args, cnet, test_loader if args.test_set == "test" else train_loader, [curr_layer_idx], stats=stats, log_ind=(epoch + 1) % n_epochs == 0) if (epoch + 1) % args.test_freq == 0 or (epoch + 1) % n_epochs == 0: torch.save( net.state_dict(), os.path.join(layer_dir, 'net_%d.pt' % (epoch + 1))) torch.save( opt.state_dict(), os.path.join(layer_dir, 'opt_%d.pt' % (epoch + 1))) stats.update_tb(epoch) epoch += 1 relu_stable = None if relu_stable is None else relu_stable * args.relu_stable_layer_dec lr = lr * args.lr_layer_dec if args.cert: with torch.no_grad(): diffAI_cert( device, args, cnet, test_loader if args.test_set == "test" else train_loader, stats=stats, log_ind=True, epoch=epoch, domains=args.cert_domain) elif args.train_mode == 'print': print('printing network to:', args.out_net_file) dummy_input = torch.randn(1, input_channel, input_size, input_size, device='cuda') net.skip_norm = True torch.onnx.export(net, dummy_input, args.out_net_file, verbose=True) elif args.train_mode == 'test': with torch.no_grad(): test(device, args, cnet, test_loader if args.test_set == "test" else train_loader, [-1], log_ind=True) elif args.train_mode == "cert": tb_writer = SummaryWriter(model_dir) stats = Statistics(len(train_loader), tb_writer, model_dir) args_file = os.path.join(model_dir, 'args.json') with open(args_file, 'w') as fou: json.dump(vars(args), fou, indent=4) write_config(args, os.path.join(model_dir, 'run_config.txt')) print('Saving results to:', model_dir) with torch.no_grad(): diffAI_cert( device, args, cnet, test_loader if args.test_set == "test" else train_loader, stats=stats, log_ind=True, domains=args.cert_domain) exit(0) else: assert False, 'Unknown mode: {}!'.format(args.train_mode) return test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc