Esempio n. 1
0
def collect_data(adapter_name):
	env = Newenv()
	model = Weight_adapter(3, 2).to(device)
	model.load_state_dict(torch.load(adapter_name))
	EP_NUM = 1500
	data_set = []
	for ep in range(EP_NUM):
		ep_loss = 0
		state = env.reset()
		for t in range(env.max_iteration):
			state = torch.from_numpy(state).float().to(device)
			action = model(state).cpu().data.numpy()
			with torch.no_grad():
				ca1 = model_1(state)
				ca2 = model_2(state)
			control_action = ca1*action[0] + ca2*action[1]

			next_state, reward, done = env.step(control_action.cpu().data.numpy()[0], smoothness=1)
			control_action = np.clip(control_action.cpu().data.numpy()[0], -1, 1)
			data_set.append([state.cpu().data.numpy()[0], state.cpu().data.numpy()[1], state.cpu().data.numpy()[2], control_action])
			state = next_state
			if done:
				break
		print(ep_loss, t)
	return np.array(data_set)
Esempio n. 2
0
def test(adapter_name=None,
         state_list=None,
         renew=False,
         mode='switch',
         INDI_NAME=None):
    print(mode)
    env = Newenv()
    EP_NUM = 500
    if mode == 'switch':
        model = DQN(3, 2).to(device)
        model.load_state_dict(torch.load(adapter_name))
    if mode == 'weight':
        model = Weight_adapter(3, 2).to(device)
        model.load_state_dict(torch.load(adapter_name))
    if mode == 'individual':
        Individual.load_state_dict(torch.load(INDI_NAME))
    if renew:
        state_list = []
    fuel_list = []
    ep_reward = []
    trajectory = []
    safe = []
    unsafe = []
    control_action_list = []
    for ep in range(EP_NUM):
        if renew:
            state = env.reset()
            state_list.append(state)
        else:
            assert len(state_list) == EP_NUM
            state = env.reset(state_list[ep][0], state_list[ep][1],
                              state_list[ep][2])
        ep_r = 0
        fuel = 0
        if ep == 0:
            trajectory.append(state)
        for t in range(env.max_iteration):
            state = torch.from_numpy(state).float().to(device)
            if mode == 'switch':
                action = model.act(state, epsilon=0)
                with torch.no_grad():
                    if action == 0:
                        control_action = model_1(state).cpu().data.numpy()[0]
                    elif action == 1:
                        control_action = MController(state).cpu().data.numpy()
                    else:
                        assert False
                        control_action = 0
            elif mode == 'ppo':
                action = ppo.choose_action(state.cpu().data.numpy(), True)
                ca1 = model_1(state).cpu().data.numpy()[0]
                ca2 = MController(state).cpu().data.numpy()
                control_action = action[0] * ca1 + action[1] * ca2

            elif mode == 'weight':
                action = model(state).cpu().data.numpy()
                ca1 = model_1(state).cpu().data.numpy()[0]
                # ca2 = model_2(state).cpu().data.numpy()[0]
                ca2 = MController(state).cpu().data.numpy()
                control_action = action[0] * ca1 + action[1] * ca2
                if ep == 0:
                    print(t, state.cpu().data.numpy(), control_action, action)

            elif mode == 'd1':
                control_action = model_1(state).cpu().data.numpy()[0]
            elif mode == 'd2':
                # control_action = model_2(state).cpu().data.numpy()[0]
                control_action = (0.634 * state[0] - 0.296 * state[1] -
                                  0.153 * state[2] + 0.053 * state[0]**2 -
                                  1.215 * state[0]**3).cpu().data.numpy()
                if ep == 0:
                    print(state, control_action)
            elif mode == 'individual':
                if ATTACK:
                    delta, original = fgsm(Individual, state)
                    # delta = torch.from_numpy(np.random.uniform(low=-0.04, high=0.04, size=state.shape)).float().to(device)
                    control_action = Individual(state +
                                                delta).cpu().data.numpy()[0]
                else:
                    control_action = Individual(state).cpu().data.numpy()[0]

            next_state, reward, done = env.step(control_action, smoothness=0.5)
            control_action = np.clip(control_action, -1, 1)
            fuel += abs(control_action) * 10
            state = next_state
            if ep == 0:
                trajectory.append(state)
                control_action_list.append(control_action)
            ep_r += reward
            if done:
                break

        ep_reward.append(ep_r)
        if t >= 195:
            fuel_list.append(fuel)
            safe.append(state_list[ep])
        else:
            print(ep, state_list[ep])
            unsafe.append(state_list[ep])
    # safe = np.array(safe)
    # unsafe = np.array(unsafe)
    # np.save('./plot/'+mode+'_safe.npy', safe)
    # np.save('./plot/'+mode+'_unsafe.npy', unsafe)
    return ep_reward, np.array(fuel_list), state_list, control_action_list
Esempio n. 3
0
def test(adapter_name=None,
         state_list=None,
         renew=False,
         mode='switch',
         INDI_NAME=None):
    print(mode)
    env = Osillator()
    EP_NUM = 1
    if mode == 'switch':
        model = DQN(2, 2).to(device)
        model.load_state_dict(torch.load(adapter_name))
    if mode == 'weight':
        model = Weight_adapter(2, 2).to(device)
        model.load_state_dict(torch.load(adapter_name))
    if mode == 'individual':
        Individual.load_state_dict(torch.load(INDI_NAME))
    if renew:
        state_list = []
    fuel_list = []
    ep_reward = []
    trajectory = []
    safe = []
    unsafe = []
    control_action_list = []
    for ep in range(EP_NUM):
        if renew:
            state = env.reset()
            state_list.append(state)
        else:
            assert len(state_list) == EP_NUM
            state = env.reset(state_list[ep][0], state_list[ep][1])
        ep_r = 0
        fuel = 0
        if ep == 0:
            trajectory.append(state)
        for t in range(env.max_iteration):
            # attack happens here
            # state += np.random.uniform(low=-0.35, high=0.35, size=state.shape)
            state = torch.from_numpy(state).float().to(device)
            if mode == 'switch':
                action = model.act(state, epsilon=0)
                with torch.no_grad():
                    if action == 0:
                        control_action = model_1(state).cpu().data.numpy()[0]
                    elif action == 1:
                        control_action = model_2(state).cpu().data.numpy()[0]
                    else:
                        assert False
                        control_action = 0
            elif mode == 'weight':
                action = model(state).cpu().data.numpy()
                ca1 = model_1(state).cpu().data.numpy()[0]
                ca2 = model_2(state).cpu().data.numpy()[0]
                control_action = action[0] * ca1 + action[1] * ca2
                if ep == 0:
                    print(t, state, control_action, action, ca1, ca2)
            elif mode == 'average':
                ca1 = model_1(state).cpu().data.numpy()[0]
                ca2 = model_2(state).cpu().data.numpy()[0]
                control_action = (ca1 + ca2) / 2
            elif mode == 'planning':
                ca1 = model_1(state).cpu().data.numpy()[0]
                ca2 = model_2(state).cpu().data.numpy()[0]
                control_action = plan(state, ca1, ca2)

            elif mode == 'd1':
                control_action = model_1(state).cpu().data.numpy()[0]

            elif mode == 'd2':
                control_action = model_2(state).cpu().data.numpy()[0]

            elif mode == 'individual':
                # delta, original = fgsm(Individual, state)
                # if ep == 0:
                # 	print(delta, original)
                # control_action = Individual(state+delta).cpu().data.numpy()[0]
                control_action = Individual(state).cpu().data.numpy()[0]

            next_state, reward, done = env.step(control_action)
            control_action = np.clip(control_action, -1, 1)
            fuel += abs(control_action) * 20
            state = next_state
            if ep == 0:
                trajectory.append(state)
                control_action_list.append(control_action)
            ep_r += reward
            if done:
                break

        ep_reward.append(ep_r)
        if t >= 95:
            fuel_list.append(fuel)
            safe.append(state_list[ep])
        else:
            print(ep, state_list[ep])
            unsafe.append(state_list[ep])
        if ep == 0:
            trajectory = np.array(trajectory)
            # plt.figure()
            plt.plot(trajectory[:, 0], trajectory[:, 1], label=mode)
            plt.legend()
            plt.savefig('trajectory.png')
    # safe = np.array(safe)
    # unsafe = np.array(unsafe)
    # plt.figure()
    # plt.scatter(safe[:, 0], safe[:, 1], c='green')
    # plt.scatter(unsafe[:, 0], unsafe[:, 1], c='red')
    # plt.savefig('./safe_sample_plot/'+ mode +'.png')
    return ep_reward, np.array(fuel_list), state_list, np.array(
        control_action_list)