예제 #1
0
policy_freq = 2  # Number of iterations to wait before the policy network (Actor model) is updated
episode_reward = 0
maxepisode_timesteps = 500

torch.manual_seed(seed)
np.random.seed(seed)
state_dim = 5
action_dim = 1
max_action = 5
min_action = -5

# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
#brain = Dqn(5,3,0.9)
action2rotation = [0, 5, -5]
#spacenetwork = ObsSpaceNetwork()
policy = TD3(state_dim, action_dim, max_action)
replay_buffer = ReplayBuffer()
last_reward = 0
scores = []
im = CoreImage("./images/MASK1.png")
total_timesteps = 0
timesteps_since_eval = 0
episode_num = 0
episode_timesteps = 0
done = True
t0 = time.time()

# textureMask = CoreImage(source="./kivytest/simplemask1.png")

# Initializing the map
first_update = True
예제 #2
0
# Initializing the last distance
last_distance = 0
orientation = 0

obs_img = np.zeros((50, 50))
obs_dis = last_distance
obs_ori = orientation

new_obs_img = np.zeros((50, 50))
new_obs_dis = last_distance
new_obs_ori = orientation

##################################################
''' We create the policy network (the Actor model) '''
policy = TD3(action_dim, max_action)

##################################################
''' We create the Experience Replay memory'''
replay_buffer = ReplayBuffer(sample_size=sample_size)

##################################################
im = CoreImage("./images/MASK1.png")

imgCV2 = cv2.imread('./images/MASK1.png')
rows, cols, dims = imgCV2.shape

# Initializing the map
first_update = True

예제 #3
0
from ai import TD3

# Adding this line if we don't want the right click to put a red point
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')
Config.set('graphics', 'resizable', False)
Config.set('graphics', 'width', '1429')
Config.set('graphics', 'height', '660')

# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
last_x = 0
last_y = 0
n_points = 0
length = 0

# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
brain = TD3((1, 40, 40), 1, 5)
# action2rotation = [0,5,-5]
last_reward = 0
scores = []
im = CoreImage("./images/MASK1.png")

# textureMask = CoreImage(source="./kivytest/simplemask1.png")

# Initializing the map
first_update = True


def init():
    global sand
    global goal_x
    global goal_y
예제 #4
0
    def update(self, dt):

        global longueur
        global largeur

        longueur = self.width
        largeur = self.height
        if first_update:
            init()

        def evaluate_policy(policy, eval_episodes=10):
            avg_reward = 0.
            for _ in range(eval_episodes):
                obs = reset(self)
                done = False
                while not done:
                    action = policy.select_action(obs)
                    obs, reward, done, _ = Car.move(action)
                    avg_reward += reward
            avg_reward /= eval_episodes
            print("---------------------------------------")
            print("Average Reward over the Evaluation Step: %f" % (avg_reward))
            print("---------------------------------------")
            return avg_reward

        file_name = "%s_%s_%s" % ("TD3", env_name, str(seed))
        print("---------------------------------------")
        print("Settings: %s" % (file_name))
        print("--------------------------------------  -")

        if not os.path.exists("./results"):
            os.makedirs("./results")
        if save_models and not os.path.exists("./pytorch_models"):
            os.makedirs("./pytorch_models")

        torch.manual_seed(seed)
        np.random.seed(seed)

        state_dim = [32, 32, 1]
        action_dim = 1
        max_action = 5

        policy = TD3(state_dim, action_dim, max_action)

        replay_buffer = ReplayBuffer()

        evaluations = [evaluate_policy(policy)]

        def mkdir(base, name):
            path = os.path.join(base, name)
            if not os.path.exists(path):
                os.makedirs(path)
            return path

        work_dir = mkdir('exp', 'brs')
        monitor_dir = mkdir(work_dir, 'monitor')
        max_episode_steps = 400

        total_timesteps = 0
        timesteps_since_eval = 0
        episode_num = 0
        done = True
        t0 = time.time()

        # We start the main loop over 40,000 timesteps
        while total_timesteps < max_timesteps:

            # If the episode is done
            if done:

                # If we are not at the very beginning, we start the training process of the model
                if (total_timesteps != 0 and total_timesteps > (batch_size)):
                    print("Total Timesteps: {} Episode Num: {} Reward: {}".
                          format(total_timesteps, episode_num, episode_reward))
                    policy.train(replay_buffer, episode_timesteps, batch_size,
                                 discount, tau, policy_noise, noise_clip,
                                 policy_freq)

                # We evaluate the episode and we save the policy
                if timesteps_since_eval >= eval_freq:
                    timesteps_since_eval %= eval_freq
                    evaluations.append(evaluate_policy(policy))
                    policy.save(file_name, directory="./pytorch_models")
                    np.save("./results/%s" % (file_name), evaluations)

                # When the training step is done, we reset the state of the environment
                obs = reset()

                # Set the Done to False
                done = False

                # Set rewards and episode timesteps to zero
                episode_reward = 0
                episode_timesteps = 0
                episode_num += 1

            # Before 10000 timesteps, we play random actions
            if total_timesteps < start_timesteps:
                action = np.random.normal(0, 1,
                                          size=1).clip(-1,
                                                       1).astype(np.float32)
            else:  # After 10000 timesteps, we switch to the model
                action = policy.select_action(obs)
                # If the explore_noise parameter is not 0, we add noise to the action and we clip it
                if expl_noise != 0:
                    action = (action +
                              np.random.normal(0, expl_noise, size=1)).clip(
                                  -1, 1)

            # The agent performs the action in the environment, then reaches the next state and receives the reward
            new_obs, reward, done, _ = move(action)

            # We check if the episode is done
            # done_bool = 0 if episode_timesteps + 1 == env._max_episode_steps else float(done)
            if episode_timesteps + 1 == max_episode_steps:
                done = True
            done = float(done)
            # We increase the total reward
            episode_reward += reward

            # We store the new transition into the Experience Replay memory (ReplayBuffer)
            replay_buffer.add((obs, new_obs, action, reward, done_bool))

            # We update the state, the episode timestep, the total timesteps, and the timesteps since the evaluation of the policy
            obs = new_obs
            episode_timesteps += 1
            total_timesteps += 1
            timesteps_since_eval += 1

        t1 = time.time()
        print("Total time  taken: {}".format(t1 - t0))
        evaluations.append(evaluate_policy(policy))
        if save_models:
            policy.save("%s" % (file_name), directory="./pytorch_models")
        np.save("./results/%s" % (file_name), evaluations)
        CarApp().stop()
예제 #5
0
# Adding this line if we don't want the right click to put a red point
Config.set('input', 'mouse', 'mouse, multitouch_on_demand')
Config.set('graphics', 'resizable', False)
Config.set('graphics', 'width', '1429')
Config.set('graphics', 'height', '660')

# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
last_x = 0
last_y = 0
n_points = 0
length = 0
counter = 0

# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
policy = TD3(state_dim=22, action_dim=2, max_action=np.asarray([10., 2.]))
replay_buffer = ReplayBuffer()
last_reward = 0
scores = []
im = CoreImage("./images/MASK1.png")
max_velocity = 6.
min_velocity = 0
max_angle = +5.
# min_angle = -5
max_stuck = 100
stuck_count = 0

# Initializing the map
first_update = True

예제 #6
0
state_dim = 5 # position, velocity# ,orientation,
action_dim = 1 #moving
max_action = 5
min_action = -5
total_timesteps = 0
timesteps_since_eval = 0
episode_num = 0
episode_timesteps = 0
done = True
t0 = time.time()

# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
#brain = Dqn(5,3,0.9) # CHANGE
#brain = TD3(6,3,5)  # states, action, max_Action
action2rotation = [0, 5,-5] #action2rotation = [0,5,-5] #angle of rotation
brain = TD3(state_dim, action_dim, max_action)
replay_buffer = ReplayBuffer()
last_reward = 0
scores = []
im = CoreImage("./images/MASK1.png")

# Initializing the map , keep i as 0
first_update = True
i = 0

def init():
    global sand
    global goal_x
    global goal_y
    global first_update
    global img
예제 #7
0
from ai import TD3

# Adding this line if we don't want the right click to put a red point
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')
Config.set('graphics', 'resizable', False)
Config.set('graphics', 'width', '1429')
Config.set('graphics', 'height', '660')

# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
last_x = 0
last_y = 0
n_points = 0
length = 0

# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
brain = TD3((1, 40, 40), 1, 10)
# action2rotation = [0,5,-5]
last_reward = 0
reward = 0
# scores = []
im = CoreImage("./images/MASK1.png")

# textureMask = CoreImage(source="./kivytest/simplemask1.png")

# Initializing the map
first_update = True


def init():
    global sand
    global img
import cv2

# Adding this line if we don't want the right click to put a red point
Config.set('input', 'mouse', 'mouse,multitouch_on_demand')
Config.set('graphics', 'resizable', False)
Config.set('graphics', 'width', '1429')
Config.set('graphics', 'height', '660')

# Introducing last_x and last_y, used to keep the last point in memory when we draw the sand on the map
last_x = 0
last_y = 0
n_points = 0
length = 0

# Getting our AI, which we call "brain", and that contains our neural network that represents our Q-function
brain = TD3(3, 1, 5)
last_reward = 0
scores = []

crop_size = 80
border_size = 5

# Initializing the map
first_update = True


def init():
    global sand
    global img
    global goal_x
    global goal_y