Beispiel #1
0
    def state_to_tensor(self, state):
        key = str(state)
        tensor = self.tensor_cache.get(key)
        if tensor is None:
            tensor = SIMULATOR.state_to_tensor(state).to(Device.get_device())
            self.tensor_cache[key] = tensor

        return tensor
def run_exper(model, steps, get_features, pre_proc_features):
    r_tup, e_tup = [], []
    rover_poss = []
    total_stats = {'total': 0, 'good': 0}

    from environment import SIMULATOR

    # initializing our environment
    my_sim = SIMULATOR()

    # beginning of an episode
    state_temp = my_sim.reset()
    observation = my_sim.state_to_tensor(state_temp)
    state_obs = observation
    total_moves = 0

    # main loop
    prev_input = None
    for i in range(steps):
        # preprocess the observation, set input as difference between images
        cur_input = observation

        x = cur_input.astype(
            np.float).ravel() if prev_input is not None else np.zeros(70)
        x = x[10:80] if prev_input is not None else x
        x = np.array([x[i] for i in range(len(x)) if not (i % 10 == 0)])
        x = np.array([x[i] for i in range(len(x)) if not ((i - 8) % 9 == 0)])

        prev_input = cur_input

        x, rover_pos = get_rover_pos(x, r_tup, e_tup, rover_poss)
        rover_poss.append(rover_pos)
        x = np.array(x)
        """
        x = x[x != 0]
        if(len(x) == 1):
            x = np.zeros(4)
            x = x.tolist()
            x.append(-7.)
            x = np.array(x)
        """

        x_t = pre_proc_features.fit_transform(x.reshape(-1, 1))
        x_t = x_t.reshape(1, INPUT_SIZE)[0]
        # forward the policy network and sample action according to the proba distribution
        proba = model.predict(np.expand_dims(x_t, axis=1).T)
        action = proba.argmax()

        #run one step
        state_temp, reward, done, r_tup, e_tup = my_sim.step(action)
        observation = my_sim.state_to_tensor(state_temp)
        #my_sim.render()
        total_moves += 1
        if (total_moves == MAX_STEPS):
            done = True
            total_moves = 0

        # if episode is over, reset to beginning
        if done:
            total_stats['total'] += 1
            so = np.asarray(state_obs).ravel().tolist()
            o = np.asarray(observation).ravel().tolist()
            #print("state obs ===============")
            #print(state_obs)
            #print("obs ===============")
            #print(observation)
            try:
                index_obs = so.index(7.0)
            except ValueError:
                index_obs = -1
            try:
                index_curr = o.index(7.0)
            except ValueError:
                index_curr = -1

            if (index_obs != -1 and index_curr == -1):
                #print("Good Game")
                #print(so)
                #print(o)
                total_stats['good'] += 1
            state_temp = my_sim.reset()
            observation = my_sim.state_to_tensor(state_temp)
            state_obs = observation
            rover_poss = []
            #my_sim.render()

    return total_stats
def run_exper(model, steps, get_features, pre_proc_features):
    from environment import SIMULATOR

    # initializing our environment
    my_sim = SIMULATOR()

    # beginning of an episode
    state_temp = my_sim.reset()
    observation = my_sim.state_to_tensor(state_temp)
    r_tup, e_tup, rover_poss = [], [], []
    # main loop
    prev_input = None
    total_moves = 0
    MAX_MOVES = 25
    for i in range(steps):
        total_moves += 1
        start = time.perf_counter()
        cur_input = observation
        x = cur_input.astype(
            np.float).ravel() if prev_input is not None else np.zeros(70)
        x = x[10:80] if prev_input is not None else x
        x = np.array([x[i] for i in range(len(x)) if not (i % 10 == 0)])
        x = np.array([x[i] for i in range(len(x)) if not ((i - 8) % 9 == 0)])

        x, rov_pos = get_rover_pos(x, r_tup, e_tup, rover_poss)
        x = np.array(x)
        rover_poss.append(rov_pos)
        """
        x = x[x != 0]
        if(len(x) == 1):
            x = np.zeros(4)
            x = x.tolist()
            x.append(-7.)
            x = np.array(x)
        """
        #print_map(x)
        x_t = pre_proc_features.fit_transform(x.reshape(-1, 1))
        x_t = x_t.reshape(1, INPUT_SIZE)[0]
        print("Shape = ", x_t.shape)
        prev_input = cur_input

        # forward the policy network and sample action according to the proba distribution
        #print_map(x)
        proba = model.predict(np.expand_dims(x_t, axis=1).T)
        end = time.perf_counter()
        action = proba[0].argmax()
        print("Time taken = ", end - start)

        #run one step
        state_temp, reward, done, r_tup, e_tup = my_sim.step(action)
        observation = my_sim.state_to_tensor(state_temp)
        my_sim.render()
        time.sleep(1)

        if total_moves == MAX_MOVES:
            total_moves = 0
            done = True
        # if episode is over, reset to beginning
        if done:
            state_temp = my_sim.reset()
            observation = my_sim.state_to_tensor(state_temp)
            my_sim.render()
            rover_poss = []
                        default='sparse',
                        type=str,
                        help='Choose between encoded or sparse')
    args = parser.parse_args()
    data_type = args.data_type

model = get_model(data_type)

import numpy as np
import gym

# gym initialization
from environment import SIMULATOR
my_sim = SIMULATOR()
state_temp = my_sim.reset()
observation = my_sim.state_to_tensor(state_temp)
prev_input = None

# Hyperparameters to calculate discount rewards
gamma = 0.99

# initialization of variables used in the main loop
x_train, y_train, y_pred, rewards, r_tup, e_tup, rover_poss = [], [], [], [], [], [], []
reward_sum = 0
episode_nb = 0
resume = True
running_reward = None
EPOCHS_BEFORE_SAVING = 50
moves_count = 0
MAX_NEG_REWARD = -100
get_features, pre_proc_features = get_pre_proc_info(data_type)