コード例 #1
0
def run(args=None):
    device = 'cuda' if torch.cuda.is_available() and (not args.no_cuda) else 'cpu'
    num_train, train_loader, test_loader, input_size, input_channel, n_class = get_loaders(args)

    lossFn = nn.CrossEntropyLoss(reduction='none')
    def evalFn(x): return torch.max(x, dim=1)[1]

    ## initialize SpecNet
    dTNet = MyDeepTrunkNet.get_deepTrunk_net(args, device, lossFn, evalFn, input_size, input_channel, n_class)

    ## setup logging and checkpointing
    timestamp = int(time.time())
    model_signature = '%s/%s/%d/%s_%.5f/%d' % (args.dataset, args.exp_name, args.exp_id, args.net, args.train_eps, timestamp)
    model_dir = args.root_dir + 'models_new/%s' % (model_signature)
    args.model_dir = model_dir


    print("Saving model to: %s" % model_dir)
    count_vars(args, dTNet)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    tb_writer = SummaryWriter(model_dir)
    stats = Statistics(len(train_loader), tb_writer, model_dir)
    args_file = os.path.join(model_dir, 'args.json')
    with open(args_file, 'w') as fou:
        json.dump(vars(args), fou, indent=4)
    write_config(args, os.path.join(model_dir, 'run_config.txt'))


    ## main part depending on training mode
    if 'train' in args.train_mode:
        epoch = train_deepTrunk(dTNet, args, device, stats, train_loader, test_loader)
        if args.cert:
            with torch.no_grad():
                cert_deepTrunk_net(dTNet, args, device, test_loader if args.test_set == "test" else train_loader,
                                   stats, log_ind=True, break_on_failure=False, epoch=epoch)
    elif args.train_mode == 'test':
        with torch.no_grad():
            test_deepTrunk_net(dTNet, args, device, test_loader if args.test_set == "test" else train_loader, stats,
                               log_ind=True)
    elif args.train_mode == "cert":
        with torch.no_grad():
            cert_deepTrunk_net(dTNet, args, device, test_loader if args.test_set == "test" else train_loader, stats,
                               log_ind=True, break_on_failure=False)
    else:
        assert False, 'Unknown mode: {}!'.format(args.train_mode)

    exit(0)
コード例 #2
0
    def _build_network(self):
        obs_dim = self.obs_space.shape[0]
        act_dim = self.act_space.shape[0]
        self.state_ph, self.act_ph, self.next_state_ph, self.rew_ph, self.done_ph = placeholders(
            obs_dim, act_dim, obs_dim, None, None)

        # Main outputs
        with tf.variable_scope('main'):
            self.pi, self.q, q_pi = ddpg_mlp_actor_critic(
                self.state_ph,
                self.act_ph,
                action_space=self.act_space,
                hidden_sizes=self.hidden_sizes)

        # Target networks
        with tf.variable_scope('target'):
            _, _, q_pi_targ = ddpg_mlp_actor_critic(
                self.next_state_ph,
                self.act_ph,
                action_space=self.act_space,
                hidden_sizes=self.hidden_sizes
            )  # We only need q_pi_targ to compute bellman backup

        var_counts = tuple(
            count_vars(scope) for scope in ['main/pi', 'main/q', 'main'])
        print('\nNumber of parameters: \t pi: %d, \t q: %d, \t total: %d\n' %
              var_counts)

        # Bellman target backup
        backup = tf.stop_gradient(self.rew_ph + self.gamma *
                                  (1 - self.done_ph) * q_pi_targ)

        # Objectives
        self.q_loss = tf.reduce_mean((self.q - backup)**2)
        self.pi_loss = -tf.reduce_mean(q_pi)

        # Optimizers
        self.train_q_opt = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(
            self.q_loss, var_list=get_vars('main/q'))
        self.train_pi_opt = tf.train.AdamOptimizer(
            learning_rate=1e-3).minimize(self.pi_loss,
                                         var_list=get_vars('main/pi'))

        # Polyak averaging for target variables
        self.target_update = tf.group([
            tf.assign(v_target,
                      self.polyak * v_target + (1 - self.polyak) * v_main)
            for v_target, v_main in zip(get_vars('target'), get_vars('main'))
        ])

        # Init
        self.target_init = tf.group([
            tf.assign(v_target, v_main)
            for v_target, v_main in zip(get_vars('target'), get_vars('main'))
        ])
コード例 #3
0
ファイル: ppo.py プロジェクト: jinbeizame007/tensorflow-ppo
# Main outputs from computation graph
pi, logp, logp_pi, v = mlp_actor_critic(x_ph, a_ph)

# Need all placeholders in *this* order later (to zip with data from buffer)
all_phs = [x_ph, a_ph, adv_ph, ret_ph, logp_old_ph]

# Every step, get: action, value, and logprob
get_action_ops = [pi, v, logp_pi]

# Experience buffer
local_steps_per_epoch = int(args.steps / num_procs())
memory = ReplayMemory(obs_dim, act_dim, local_steps_per_epoch, args.gamma,
                      args.lam)

# Count variables
var_counts = tuple(count_vars(scope) for scope in ['pi', 'v'])

# Objective functions
ratio = tf.exp(logp - logp_old_ph)  # pi(a|s) / pi_old(a|s)
min_adv = tf.where(adv_ph > 0, (1 + args.clip_ratio) * adv_ph,
                   (1 - args.clip_ratio) * adv_ph)
actor_loss = -tf.reduce_mean(tf.minimum(ratio * adv_ph, min_adv))
critic_loss = tf.reduce_mean((ret_ph - v)**2)

# Info (useful to watch during learning)
approx_kl = tf.reduce_mean(
    logp_old_ph - logp)  # a sample estimate for KL-divergence, easy to compute
approx_ent = tf.reduce_mean(
    -logp)  # a sample estimate for entropy, also easy to compute
clipped = tf.logical_or(ratio > (1 + args.clip_ratio), ratio <
                        (1 - args.clip_ratio))
コード例 #4
0
with tf.variable_scope('target'):

    with tf.variable_scope('pi'):
        opt_action_target = create_nn(s_ph,
                                      hidden_sizes + [action_dim],
                                      act_limit=max_act)

    with tf.variable_scope('q'):
        opt_action_value2_target = tf.squeeze(create_nn(
            tf.concat([s_ph, opt_action_target], axis=-1),
            hidden_sizes=hidden_sizes + [1]),
                                              axis=1)

# Count variables to check if the number is right
var_counts = tuple(
    count_vars(scope) for scope in ['main/pi', 'main/q', 'main'])
print('\nNumber of parameters: \t pi: %d, \t q: %d, \t total: %d\n' %
      var_counts)

# Create target value (y)
# y = r + gamma*(1-d)*Qtar(s',atar'(s'))
y = tf.stop_gradient(r_ph + gamma * (1 - d_ph) * opt_action_value2_target)

# Create loss function for optimal deterministic policy (u(s))
loss_opt_action = -tf.reduce_mean(opt_action_value2)

# Create loss for optimal action-value function (Q* loss function)
# loss = E(Q*(a,s)-y) = E((Q*(a,v)-(r+gamma*Q(s',a(s')))^2)
loss_opt_act_val = tf.reduce_mean((opt_action_value - y)**2)

# Creating optimizers
コード例 #5
0
def main(args):
    # create environment 
    env = gym.make(args.env)
    env.seed(args.seed)
    obs_dim = env.observation_space.shape[0]
    if isinstance(env.action_space, Discrete):
        discrete = True
        act_dim = env.action_space.n
    else:
        discrete = False
        act_dim = env.action_space.shape[0]

    # actor critic 
    ac = ActorCritic(obs_dim, act_dim, discrete).to(args.device)
    print('Number of parameters', count_vars(ac))

    # Set up experience buffer
    steps_per_epoch = int(args.steps_per_epoch)
    buf = PGBuffer(obs_dim, act_dim, discrete, steps_per_epoch, args)
    logs = defaultdict(lambda: [])
    writer = SummaryWriter(args_to_str(args))
    gif_frames = []

    # Set up function for computing policy loss
    def compute_loss_pi(batch):
        obs, act, psi, logp_old = batch['obs'], batch['act'], batch['psi'], batch['logp']
        pi, logp = ac.pi(obs, act)

        # Policy loss
        if args.loss_mode == 'vpg':
            # TODO (Task 2): implement vanilla policy gradient loss
        elif args.loss_mode == 'ppo':
            # TODO (Task 4): implement clipped PPO loss
        else:
            raise Exception('Invalid loss_mode option', args.loss_mode)

        # Useful extra info
        approx_kl = (logp_old - logp).mean().item()
        ent = pi.entropy().mean().item()
        pi_info = dict(kl=approx_kl, ent=ent)

        return loss_pi, pi_info

    # Set up function for computing value loss
    def compute_loss_v(batch):
        obs, ret = batch['obs'], batch['ret']
        v = ac.v(obs)
        # TODO: (Task 2): compute value function loss
        return loss_v

    # Set up optimizers for policy and value function
    pi_optimizer = Adam(ac.pi.parameters(), lr=args.pi_lr)
    vf_optimizer = Adam(ac.v.parameters(), lr=args.v_lr)

    # Set up update function
    def update():
        batch = buf.get()

        # Get loss and info values before update
        pi_l_old, pi_info_old = compute_loss_pi(batch)
        pi_l_old = pi_l_old.item()
        v_l_old = compute_loss_v(batch).item()

        # Policy learning
        for i in range(args.train_pi_iters):
            pi_optimizer.zero_grad()
            loss_pi, pi_info = compute_loss_pi(batch)
            loss_pi.backward()
            pi_optimizer.step()

        # Value function learning
        for i in range(args.train_v_iters):
            vf_optimizer.zero_grad()
            loss_v = compute_loss_v(batch)
            loss_v.backward()
            vf_optimizer.step()

        # Log changes from update
        kl, ent = pi_info['kl'], pi_info_old['ent']
        logs['kl'] += [kl]
        logs['ent'] += [ent]
        logs['loss_v'] += [loss_v.item()]
        logs['loss_pi'] += [loss_pi.item()]

    # Prepare for interaction with environment
    start_time = time.time()
    o, ep_ret, ep_len = env.reset(), 0, 0

    ep_count = 0  # just for logging purpose, number of episodes run
    # Main loop: collect experience in env and update/log each epoch
    for epoch in range(args.epochs):
        for t in range(steps_per_epoch):
            a, v, logp = ac.step(torch.as_tensor(o, dtype=torch.float32).to(args.device))

            next_o, r, d, _ = env.step(a)
            ep_ret += r
            ep_len += 1

            # save and log
            buf.store(o, a, r, v, logp)
            if ep_count % 100 == 0:
                frame = env.render(mode='rgb_array')
                # uncomment this line if you want to log to tensorboard (can be memory intensive)
                #gif_frames.append(frame)
                #gif_frames.append(PIL.Image.fromarray(frame).resize([64,64]))  # you can try this downsize version if you are resource constrained
                time.sleep(0.01)
            
            # Update obs (critical!)
            o = next_o

            timeout = ep_len == args.max_ep_len
            terminal = d or timeout
            epoch_ended = t==steps_per_epoch-1

            if terminal or epoch_ended:
                # if trajectory didn't reach terminal state, bootstrap value target
                if timeout or epoch_ended:
                    _, v, _ = ac.step(torch.as_tensor(o, dtype=torch.float32).to(args.device))
                else:
                    v = 0
                buf.finish_path(v)
                if terminal:
                    # only save EpRet / EpLen if trajectory finished
                    logs['ep_ret'] += [ep_ret]
                    logs['ep_len'] += [ep_len]
                    ep_count += 1

                o, ep_ret, ep_len = env.reset(), 0, 0

                # save a video to tensorboard so you can view later
                if len(gif_frames) != 0:
                    vid = np.stack(gif_frames)
                    vid_tensor = vid.transpose(0,3,1,2)[None]
                    writer.add_video('rollout', vid_tensor, epoch, fps=50)
                    gif_frames = []
                    writer.flush()
                    print('wrote video')

        # Perform VPG update!
        update()

        if epoch % 10 == 0:
            vals = {key: np.mean(val) for key, val in logs.items()}
            for key in vals:
                writer.add_scalar(key, vals[key], epoch)
            writer.flush()
            print('Epoch', epoch, vals)
            logs = defaultdict(lambda: [])
コード例 #6
0
ファイル: main.py プロジェクト: eth-sri/ACE
def run(args=None):
    device = 'cuda' if torch.cuda.is_available() and (
        not args.no_cuda) else 'cpu'
    num_train, train_loader, test_loader, input_size, input_channel, n_class = get_loaders(
        args)

    lossFn = nn.CrossEntropyLoss(reduction='none')
    evalFn = lambda x: torch.max(x, dim=1)[1]

    net = get_net(device,
                  args.dataset,
                  args.net,
                  input_size,
                  input_channel,
                  n_class,
                  load_model=args.load_model,
                  net_dim=args.cert_net_dim
                  )  #, feature_extract=args.core_feature_extract)

    timestamp = int(time.time())
    model_signature = '%s/%s/%d/%s_%.5f/%d' % (args.dataset, args.exp_name,
                                               args.exp_id, args.net,
                                               args.train_eps, timestamp)
    model_dir = args.root_dir + 'models_new/%s' % (model_signature)
    args.model_dir = model_dir
    count_vars(args, net)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    if isinstance(net, UpscaleNet):
        relaxed_net = None
        relu_ids = None
    else:
        relaxed_net = RelaxedNetwork(net.blocks, args.n_rand_proj).to(device)
        relu_ids = relaxed_net.get_relu_ids()

    if "nat" in args.train_mode:
        cnet = CombinedNetwork(net,
                               relaxed_net,
                               lossFn=lossFn,
                               evalFn=evalFn,
                               device=device,
                               no_r_net=True).to(device)
    else:
        dummy_input = torch.rand((1, ) + net.dims[0],
                                 device=device,
                                 dtype=torch.float32)
        cnet = CombinedNetwork(net,
                               relaxed_net,
                               lossFn=lossFn,
                               evalFn=evalFn,
                               device=device,
                               dummy_input=dummy_input).to(device)

    n_epochs, test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc = args.n_epochs, None, None, None, None

    if 'train' in args.train_mode:
        tb_writer = SummaryWriter(model_dir)
        stats = Statistics(len(train_loader), tb_writer, model_dir)
        args_file = os.path.join(model_dir, 'args.json')
        with open(args_file, 'w') as fou:
            json.dump(vars(args), fou, indent=4)
        write_config(args, os.path.join(model_dir, 'run_config.txt'))

        eps = 0
        epoch = 0
        lr = args.lr
        n_epochs = args.n_epochs

        if "COLT" in args.train_mode:
            relu_stable = args.relu_stable
            # if args.layers is None:
            #     args.layers = [-2, -1] + relu_ids
            layers = get_layers(args.train_mode,
                                cnet,
                                n_attack_layers=args.n_attack_layers,
                                protected_layers=args.protected_layers)
        elif "adv" in args.train_mode:
            relu_stable = None
            layers = [-1, -1]
            args.mix = False
        elif "natural" in args.train_mode:
            relu_stable = None
            layers = [-2, -2]
            args.nat_factor = 1
            args.mix = False
        elif "diffAI" in args.train_mode:
            relu_stable = None
            layers = [-2, -2]
        else:
            assert False, "Unknown train mode %s" % args.train_mode

        print('Saving model to:', model_dir)
        print('Training layers: ', layers)

        for j in range(len(layers) - 1):
            opt, lr_scheduler = get_opt(cnet.net,
                                        args.opt,
                                        lr,
                                        args.lr_step,
                                        args.lr_factor,
                                        args.n_epochs,
                                        train_loader,
                                        args.lr_sched,
                                        fixup="fixup" in args.net)

            curr_layer_idx = layers[j + 1]
            eps_old = eps
            eps = get_scaled_eps(args, layers, relu_ids, curr_layer_idx, j)

            kappa_sched = Scheduler(0.0 if args.mix else 1.0, 1.0,
                                    num_train * args.mix_epochs, 0)
            beta_sched = Scheduler(
                args.beta_start if args.mix else args.beta_end, args.beta_end,
                args.train_batch * len(train_loader) * args.mix_epochs, 0)
            eps_sched = Scheduler(eps_old if args.anneal else eps, eps,
                                  num_train * args.anneal_epochs, 0)

            layer_dir = '{}/{}'.format(model_dir, curr_layer_idx)
            if not os.path.exists(layer_dir):
                os.makedirs(layer_dir)

            print('\nnew train phase: eps={:.5f}, lr={:.2e}, curr_layer={}\n'.
                  format(eps, lr, curr_layer_idx))

            for curr_epoch in range(n_epochs):
                train(device,
                      epoch,
                      args,
                      j + 1,
                      layers,
                      cnet,
                      eps_sched,
                      kappa_sched,
                      opt,
                      train_loader,
                      lr_scheduler,
                      relu_ids,
                      stats,
                      relu_stable,
                      relu_stable_protected=args.relu_stable_protected,
                      beta_sched=beta_sched)

                if isinstance(lr_scheduler, optim.lr_scheduler.StepLR
                              ) and curr_epoch >= args.mix_epochs:
                    lr_scheduler.step()

                if (epoch + 1) % args.test_freq == 0:
                    with torch.no_grad():
                        test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc = test(
                            device,
                            args,
                            cnet,
                            test_loader if args.test_set == "test" else
                            train_loader, [curr_layer_idx],
                            stats=stats,
                            log_ind=(epoch + 1) % n_epochs == 0)

                if (epoch + 1) % args.test_freq == 0 or (epoch +
                                                         1) % n_epochs == 0:
                    torch.save(
                        net.state_dict(),
                        os.path.join(layer_dir, 'net_%d.pt' % (epoch + 1)))
                    torch.save(
                        opt.state_dict(),
                        os.path.join(layer_dir, 'opt_%d.pt' % (epoch + 1)))

                stats.update_tb(epoch)
                epoch += 1
            relu_stable = None if relu_stable is None else relu_stable * args.relu_stable_layer_dec
            lr = lr * args.lr_layer_dec
        if args.cert:
            with torch.no_grad():
                diffAI_cert(
                    device,
                    args,
                    cnet,
                    test_loader if args.test_set == "test" else train_loader,
                    stats=stats,
                    log_ind=True,
                    epoch=epoch,
                    domains=args.cert_domain)
    elif args.train_mode == 'print':
        print('printing network to:', args.out_net_file)
        dummy_input = torch.randn(1,
                                  input_channel,
                                  input_size,
                                  input_size,
                                  device='cuda')
        net.skip_norm = True
        torch.onnx.export(net, dummy_input, args.out_net_file, verbose=True)
    elif args.train_mode == 'test':
        with torch.no_grad():
            test(device,
                 args,
                 cnet,
                 test_loader if args.test_set == "test" else train_loader,
                 [-1],
                 log_ind=True)
    elif args.train_mode == "cert":
        tb_writer = SummaryWriter(model_dir)
        stats = Statistics(len(train_loader), tb_writer, model_dir)
        args_file = os.path.join(model_dir, 'args.json')
        with open(args_file, 'w') as fou:
            json.dump(vars(args), fou, indent=4)
        write_config(args, os.path.join(model_dir, 'run_config.txt'))
        print('Saving results to:', model_dir)
        with torch.no_grad():
            diffAI_cert(
                device,
                args,
                cnet,
                test_loader if args.test_set == "test" else train_loader,
                stats=stats,
                log_ind=True,
                domains=args.cert_domain)
        exit(0)
    else:
        assert False, 'Unknown mode: {}!'.format(args.train_mode)

    return test_nat_loss, test_nat_acc, test_adv_loss, test_adv_acc