def test_attack(): agent = Agent(args.img_stack, device) agent.load_param() env = Env(args.seed, args.img_stack, args.action_repeat) # load adv input, by default general attack perturbation delta_s = np.load('param/adv_general.npy') if args.attack_type != 'general': file_path = 'param/adv_' + args.attack_type if args.attack_type == 'patch': file_path += '_' + args.patch_type file_path += '.npy' delta_s = np.load(file_path) # show adv fig = plt.figure(figsize=(8, 8)) plt.title('Stack of ' + str(args.img_stack) + ' adversarial signals seen by Agent') plt.axis('off') columns, rows = args.img_stack // 2, args.img_stack // 2 for i in range(1, columns * rows + 1): # denormalize while showing the image img = (delta_s[i - 1] + 1) * 128 fig.add_subplot(rows, columns, i) plt.imshow(img, cmap='gray') plt.show() for i_ep in range(10): score = 0 state = env.reset() for t in range(1000): # steps range to render attack in 1000 attack_render = [30, 40] if t in np.arange(attack_render[0], attack_render[1] + 1): if t in attack_render: s_with_ds = (state + delta_s) # clip the image limits and denormalize for displaying s_with_ds = np.clip(s_with_ds, -1, 0.9921875) s_with_ds = (s_with_ds + 1) * 128 title = 'Attack Started' if t == attack_render[ 0] else 'Attack ended' title += ' (showing first frame of 4 frames visible to policy)' plt.imshow(s_with_ds[0], cmap='gray') plt.axis('off') plt.title(title) plt.show() state += delta_s action = agent.select_action(state) state_, reward, done, die = env.step(action * np.array([2., 1., 1.]) + np.array([-1., 0., 0.])) if args.render: env.render() score += reward state = state_ if done: break print('Ep {}\tScore: {:.2f}\t'.format(i_ep, score))
def run_agent(): agent = Agent(args.img_stack, device) agent.load_param() env = Env(args.seed, args.img_stack, args.action_repeat) state = env.reset() # Prepare attack attack = AdvAttack(args.attack_type) attack.initialize_perturbation(state.shape) attack.load_networks() for i_ep in range(50): score = 0 state = env.reset() for t in range(1000): action = agent.select_action(state) # update buffer for training the attack attack.update_buffer(state) # write to tensorboard input_imgs_to_net = torch.tensor( (attack.buffer['s'] + attack.buffer['d_s'])) input_imgs_grid = make_grid(input_imgs_to_net[0].reshape( 4, 1, 96, 96)) writer.add_image('Four stack of input state with adversarial', input_imgs_grid) writer.add_graph(attack.net, input_imgs_to_net) writer.close() # train attack attack.train() state_, reward, done, die = env.step(action * np.array([2., 1., 1.]) + np.array([-1., 0., 0.])) if args.render: env.render() score += reward state = state_ if done or die: break print('Ep {}\tScore: {:.2f}\t'.format(i_ep, score))