Esempio n. 1
0
def test_attack():
    agent = Agent(args.img_stack, device)
    agent.load_param()
    env = Env(args.seed, args.img_stack, args.action_repeat)

    # load adv input, by default general attack perturbation
    delta_s = np.load('param/adv_general.npy')
    if args.attack_type != 'general':
        file_path = 'param/adv_' + args.attack_type
        if args.attack_type == 'patch':
            file_path += '_' + args.patch_type
        file_path += '.npy'
        delta_s = np.load(file_path)
    # show adv
    fig = plt.figure(figsize=(8, 8))
    plt.title('Stack of ' + str(args.img_stack) +
              ' adversarial signals seen by Agent')
    plt.axis('off')
    columns, rows = args.img_stack // 2, args.img_stack // 2
    for i in range(1, columns * rows + 1):
        # denormalize while showing the image
        img = (delta_s[i - 1] + 1) * 128
        fig.add_subplot(rows, columns, i)
        plt.imshow(img, cmap='gray')
    plt.show()

    for i_ep in range(10):
        score = 0
        state = env.reset()

        for t in range(1000):
            # steps range to render attack in 1000
            attack_render = [30, 40]
            if t in np.arange(attack_render[0], attack_render[1] + 1):
                if t in attack_render:
                    s_with_ds = (state + delta_s)
                    # clip the image limits and denormalize for displaying
                    s_with_ds = np.clip(s_with_ds, -1, 0.9921875)
                    s_with_ds = (s_with_ds + 1) * 128
                    title = 'Attack Started' if t == attack_render[
                        0] else 'Attack ended'
                    title += ' (showing first frame of 4 frames visible to policy)'
                    plt.imshow(s_with_ds[0], cmap='gray')
                    plt.axis('off')
                    plt.title(title)
                    plt.show()
                state += delta_s

            action = agent.select_action(state)
            state_, reward, done, die = env.step(action *
                                                 np.array([2., 1., 1.]) +
                                                 np.array([-1., 0., 0.]))
            if args.render:
                env.render()
            score += reward
            state = state_
            if done:
                break

        print('Ep {}\tScore: {:.2f}\t'.format(i_ep, score))
Esempio n. 2
0
def run_agent():
    agent = Agent(args.img_stack, device)
    agent.load_param()
    env = Env(args.seed, args.img_stack, args.action_repeat)

    state = env.reset()

    # Prepare attack
    attack = AdvAttack(args.attack_type)
    attack.initialize_perturbation(state.shape)
    attack.load_networks()

    for i_ep in range(50):
        score = 0
        state = env.reset()

        for t in range(1000):
            action = agent.select_action(state)
            # update buffer for training the attack
            attack.update_buffer(state)

            # write to tensorboard
            input_imgs_to_net = torch.tensor(
                (attack.buffer['s'] + attack.buffer['d_s']))
            input_imgs_grid = make_grid(input_imgs_to_net[0].reshape(
                4, 1, 96, 96))
            writer.add_image('Four stack of input state with adversarial',
                             input_imgs_grid)
            writer.add_graph(attack.net, input_imgs_to_net)
            writer.close()

            # train attack
            attack.train()

            state_, reward, done, die = env.step(action *
                                                 np.array([2., 1., 1.]) +
                                                 np.array([-1., 0., 0.]))
            if args.render:
                env.render()
            score += reward
            state = state_
            if done or die:
                break

        print('Ep {}\tScore: {:.2f}\t'.format(i_ep, score))