def collect_data(adapter_name): env = Newenv() model = Weight_adapter(3, 2).to(device) model.load_state_dict(torch.load(adapter_name)) EP_NUM = 1500 data_set = [] for ep in range(EP_NUM): ep_loss = 0 state = env.reset() for t in range(env.max_iteration): state = torch.from_numpy(state).float().to(device) action = model(state).cpu().data.numpy() with torch.no_grad(): ca1 = model_1(state) ca2 = model_2(state) control_action = ca1*action[0] + ca2*action[1] next_state, reward, done = env.step(control_action.cpu().data.numpy()[0], smoothness=1) control_action = np.clip(control_action.cpu().data.numpy()[0], -1, 1) data_set.append([state.cpu().data.numpy()[0], state.cpu().data.numpy()[1], state.cpu().data.numpy()[2], control_action]) state = next_state if done: break print(ep_loss, t) return np.array(data_set)
def test(adapter_name=None, state_list=None, renew=False, mode='switch', INDI_NAME=None): print(mode) env = Newenv() EP_NUM = 500 if mode == 'switch': model = DQN(3, 2).to(device) model.load_state_dict(torch.load(adapter_name)) if mode == 'weight': model = Weight_adapter(3, 2).to(device) model.load_state_dict(torch.load(adapter_name)) if mode == 'individual': Individual.load_state_dict(torch.load(INDI_NAME)) if renew: state_list = [] fuel_list = [] ep_reward = [] trajectory = [] safe = [] unsafe = [] control_action_list = [] for ep in range(EP_NUM): if renew: state = env.reset() state_list.append(state) else: assert len(state_list) == EP_NUM state = env.reset(state_list[ep][0], state_list[ep][1], state_list[ep][2]) ep_r = 0 fuel = 0 if ep == 0: trajectory.append(state) for t in range(env.max_iteration): state = torch.from_numpy(state).float().to(device) if mode == 'switch': action = model.act(state, epsilon=0) with torch.no_grad(): if action == 0: control_action = model_1(state).cpu().data.numpy()[0] elif action == 1: control_action = MController(state).cpu().data.numpy() else: assert False control_action = 0 elif mode == 'ppo': action = ppo.choose_action(state.cpu().data.numpy(), True) ca1 = model_1(state).cpu().data.numpy()[0] ca2 = MController(state).cpu().data.numpy() control_action = action[0] * ca1 + action[1] * ca2 elif mode == 'weight': action = model(state).cpu().data.numpy() ca1 = model_1(state).cpu().data.numpy()[0] # ca2 = model_2(state).cpu().data.numpy()[0] ca2 = MController(state).cpu().data.numpy() control_action = action[0] * ca1 + action[1] * ca2 if ep == 0: print(t, state.cpu().data.numpy(), control_action, action) elif mode == 'd1': control_action = model_1(state).cpu().data.numpy()[0] elif mode == 'd2': # control_action = model_2(state).cpu().data.numpy()[0] control_action = (0.634 * state[0] - 0.296 * state[1] - 0.153 * state[2] + 0.053 * state[0]**2 - 1.215 * state[0]**3).cpu().data.numpy() if ep == 0: print(state, control_action) elif mode == 'individual': if ATTACK: delta, original = fgsm(Individual, state) # delta = torch.from_numpy(np.random.uniform(low=-0.04, high=0.04, size=state.shape)).float().to(device) control_action = Individual(state + delta).cpu().data.numpy()[0] else: control_action = Individual(state).cpu().data.numpy()[0] next_state, reward, done = env.step(control_action, smoothness=0.5) control_action = np.clip(control_action, -1, 1) fuel += abs(control_action) * 10 state = next_state if ep == 0: trajectory.append(state) control_action_list.append(control_action) ep_r += reward if done: break ep_reward.append(ep_r) if t >= 195: fuel_list.append(fuel) safe.append(state_list[ep]) else: print(ep, state_list[ep]) unsafe.append(state_list[ep]) # safe = np.array(safe) # unsafe = np.array(unsafe) # np.save('./plot/'+mode+'_safe.npy', safe) # np.save('./plot/'+mode+'_unsafe.npy', unsafe) return ep_reward, np.array(fuel_list), state_list, control_action_list
def test(adapter_name=None, state_list=None, renew=False, mode='switch', INDI_NAME=None): print(mode) env = Osillator() EP_NUM = 1 if mode == 'switch': model = DQN(2, 2).to(device) model.load_state_dict(torch.load(adapter_name)) if mode == 'weight': model = Weight_adapter(2, 2).to(device) model.load_state_dict(torch.load(adapter_name)) if mode == 'individual': Individual.load_state_dict(torch.load(INDI_NAME)) if renew: state_list = [] fuel_list = [] ep_reward = [] trajectory = [] safe = [] unsafe = [] control_action_list = [] for ep in range(EP_NUM): if renew: state = env.reset() state_list.append(state) else: assert len(state_list) == EP_NUM state = env.reset(state_list[ep][0], state_list[ep][1]) ep_r = 0 fuel = 0 if ep == 0: trajectory.append(state) for t in range(env.max_iteration): # attack happens here # state += np.random.uniform(low=-0.35, high=0.35, size=state.shape) state = torch.from_numpy(state).float().to(device) if mode == 'switch': action = model.act(state, epsilon=0) with torch.no_grad(): if action == 0: control_action = model_1(state).cpu().data.numpy()[0] elif action == 1: control_action = model_2(state).cpu().data.numpy()[0] else: assert False control_action = 0 elif mode == 'weight': action = model(state).cpu().data.numpy() ca1 = model_1(state).cpu().data.numpy()[0] ca2 = model_2(state).cpu().data.numpy()[0] control_action = action[0] * ca1 + action[1] * ca2 if ep == 0: print(t, state, control_action, action, ca1, ca2) elif mode == 'average': ca1 = model_1(state).cpu().data.numpy()[0] ca2 = model_2(state).cpu().data.numpy()[0] control_action = (ca1 + ca2) / 2 elif mode == 'planning': ca1 = model_1(state).cpu().data.numpy()[0] ca2 = model_2(state).cpu().data.numpy()[0] control_action = plan(state, ca1, ca2) elif mode == 'd1': control_action = model_1(state).cpu().data.numpy()[0] elif mode == 'd2': control_action = model_2(state).cpu().data.numpy()[0] elif mode == 'individual': # delta, original = fgsm(Individual, state) # if ep == 0: # print(delta, original) # control_action = Individual(state+delta).cpu().data.numpy()[0] control_action = Individual(state).cpu().data.numpy()[0] next_state, reward, done = env.step(control_action) control_action = np.clip(control_action, -1, 1) fuel += abs(control_action) * 20 state = next_state if ep == 0: trajectory.append(state) control_action_list.append(control_action) ep_r += reward if done: break ep_reward.append(ep_r) if t >= 95: fuel_list.append(fuel) safe.append(state_list[ep]) else: print(ep, state_list[ep]) unsafe.append(state_list[ep]) if ep == 0: trajectory = np.array(trajectory) # plt.figure() plt.plot(trajectory[:, 0], trajectory[:, 1], label=mode) plt.legend() plt.savefig('trajectory.png') # safe = np.array(safe) # unsafe = np.array(unsafe) # plt.figure() # plt.scatter(safe[:, 0], safe[:, 1], c='green') # plt.scatter(unsafe[:, 0], unsafe[:, 1], c='red') # plt.savefig('./safe_sample_plot/'+ mode +'.png') return ep_reward, np.array(fuel_list), state_list, np.array( control_action_list)