Python Agent Examples

Programming Language: Python

Namespace/Package Name: modules.agent

Class/Type: Agent

Examples at hotexamples.com: 14

Python Agent - 14 examples found. These are the top rated real world Python examples of modules.agent.Agent extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Agent(11)

step(6)

tensorize(4)

set_team_strategy(4)

init_hidden(4)

act(2)

spawn(2)

set_part_team_strategy(2)

train(2)

clear_agent(2)

move(1)

load(1)

save(1)

save_reward(1)

finish_nstep(1)

epsilon_greedy_policy(1)

draw(1)

take_action(1)

choose_start(1)

terminal(1)

reInitilizeAgent(1)

Example #1

Show file

    def on_load(self):
        self.sector_manager = Sector_Manager(SECTORS)
        self.route_manager = Route_Manager(ROUTES,
                                           test_routes=VISUALIZE,
                                           draw_paths=VISUALIZE)
        self.traffic_manager = Traffic_Manager(
            max_ac=MAX_AC,
            times=TIME_SEP,
            max_spd=CONSTRAINTS["cas"]["max"],
            min_spd=CONSTRAINTS["cas"]["min"],
            max_alt=32000,
            min_alt=32000,
            network=self.route_manager)

        self.memory = Memory()

        self.agent = Agent(state_size=STATE_SHAPE,
                           action_size=ACTION_SHAPE,
                           value_size=VALUE_SHAPE)

        try:
            self.agent.load(path=FILE + "best.h5")
        except:
            try:
                self.agent.load(path=FILE + ".h5")
            except:
                pass

        self.initilized = True

        print("ATC: READY")
        string = "=================================\n   UPDATE: RUNNING EPOCH {}\n=================================\n".format(
            self.format_epoch())
        self.print_all(string)

Example #2

Show file

File: train.py Project: Matesxs/CarlaSimulator-Playground

    def __init__(self, client, identifier, epsilon, get_qs_callbatch,
                 update_replay_memory_callback):
        super().__init__()
        self.daemon = True
        self.client = client

        self.terminate = False
        self.fail_flag = False
        self.halt = False

        self.get_qs = get_qs_callbatch
        self.update_replay_memory = update_replay_memory_callback
        self.identifier = identifier

        self.agent = Agent(identifier, self.client, True)

        self.action = None
        self.episode = 0
        self.epsilon = epsilon
        self.scores_history = deque(maxlen=settings.LOG_EVERY)
        self.score_record = None
        self.steps_per_second = deque(maxlen=settings.LOG_EVERY)

        self.actions_statistic = deque(
            maxlen=int(settings.LOG_EVERY * settings.SECONDS_PER_EXPISODE *
                       settings.FPS_COMPENSATION))

Example #3

Show file

    def __init__(self, world):

        self.world = world.split('\n    ')[1:-1]
        self.action_map = {0: 'right', 1: 'down', 2: 'left', 3: 'up'}
        self.action_space = [0, 1, 2, 3]
        self.slip = 0.2  #20% chance of taking wrong action

        self.col = len(self.world[0])  #10 num of columns in the above string
        self.row = len(self.world)  #5 num of rows in the above string
        self.state_color = (50, 100, 10)
        self.renderfirst = True
        self.policy = {}
        self.episode_step = 0
        self._max_epi_step = 1000

        self.wall_group = pg.sprite.Group()
        self.state_group = pg.sprite.Group()

        self.state_dict = defaultdict(lambda: 0)

        i = 0
        for y, et_row in enumerate(self.world):
            for x, block_type in enumerate(et_row):

                if block_type == 'w':
                    self.wall_group.add(Wall(col=x, row=y))

                elif block_type == 'a':
                    self.agent = Agent(col=x, row=y)
                    self.state_group.add(State(col=x, row=y))
                    self.state_dict[(x, y)] = {
                        'state': i,
                        'reward': -1,
                        'done': False
                    }
                    i += 1

                elif block_type == 'g':
                    self.goal = Goal(col=x, row=y)
                    self.state_dict[(x, y)] = {
                        'state': i,
                        'reward': 10,
                        'done': True
                    }
                    i += 1

                elif block_type == ' ':
                    self.state_group.add(State(col=x, row=y))
                    self.state_dict[(x, y)] = {
                        'state': i,
                        'reward': -1,
                        'done': False
                    }
                    i += 1

        self.state_dict = dict(self.state_dict)
        self.state_count = len(self.state_dict)

Example #4

Show file

File: main.py Project: Cranial-XIX/COPA

def test_training():
    config = Config()
    n = 1
    env = make_parallel_env(n, 100000)
    update_config(env, config)

    model_path = "/home/liub/Desktop/mount/teamstrategy/oldmodels/mpe/aqmix+coach+vi2+ctr8+l10.0001+l20.0001/run0"
    #model_path = "/home/liub/Desktop/mount/teamstrategy/models/mpe/aqmix+ctr8+l10.0001+l20.0001/run0"

    # setup modules
    mac = Agent(config) # policy
    qlearner = QLearner(mac, config)
    qlearner.load_models(model_path)
    qlearner.cuda()

    all_rewards = []

    #orders = tt_orders = 0
    orders = 0
    tt_orders = 1e-12
    for it in tqdm(range(100)):
        o, e, c, m, ms = reset_wrapper(env)
        rnn_hidden = mac.init_hidden(o.shape[0], o.shape[1])

        episode_reward = 0
        prev_z = None
        for t in range(config.max_steps):
            o_, e_, c_, m_, ms_ = mac.tensorize(o, e, c, m, ms)
            if config.has_coach and t % config.centralized_every == 0:
                _, z_team, logvar = qlearner.coach(o_, e_, c_, ms_)
                if prev_z is None:
                    mac.set_team_strategy(z_team)
                    prev_z = z_team
                else:
                    bs, n = z_team.shape[:2]
                    mask = ms_.sum(-1).gt(0).float()
                    #normal = D.Normal(z_team, (0.5*logvar).exp())
                    #logprob = normal.log_prob(prev_z).sum(-1)
                    #prob = logprob.exp()
                    #broadcast = (prob > 0.001).float()
                    #import pdb; pdb.set_trace()
                    l2 = (z_team - prev_z).pow(2).sum(-1).sqrt()
                    broadcast = (l2 > 5).float()
                    mac.set_part_team_strategy(z_team, broadcast)
                    #import pdb; pdb.set_trace()
                    orders += (broadcast * mask).sum()
                    tt_orders += mask.sum()
                    prev_z = mac.z_team.clone()

            actions, rnn_hidden = mac.step(o_, e_, c_, m_, ms_, rnn_hidden, epsilon=0.)
            o, e, m, ms, r, d = step_wrapper(env, actions)
            episode_reward += r.sum()

        all_rewards.append(episode_reward)
    all_rewards = np.array(all_rewards)
    print(f"broadcast rate {orders/tt_orders}")
    print(f"mean reward {all_rewards.mean()} | std reward {all_rewards.std()}")
    return all_rewards.mean()

Example #5

Show file

File: main.py Project: Cranial-XIX/COPA

def render_episodes():
    from PIL import Image
    config = Config()
    n = 1
    env = make_parallel_env(n, 9999)
    update_config(env, config)

    model_path = "/home/liub/Desktop/mount/teamstrategy/coach1/mpe/aqmix+coach+vi2+ctr4+l20.001/run0"
    #save_path = f"imgs/{config.method}/"

    # setup modules
    mac = Agent(config) # policy
    qlearner = QLearner(mac, config)
    qlearner.load_models(model_path)
    qlearner.cuda()

    all_rewards = []

    for it in range(20):
        save_path = f"imgs/{config.method}/it{it}/"
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        #fourcc = VideoWriter_fourcc(*'MP4V')
        #video = VideoWriter(f"{save_path}/epi{it+1}.mp4", fourcc, float(12), (700,700))
        o, e, c, m, ms = reset_wrapper(env)
        prev_a = torch.zeros(o.shape[0], o.shape[1]).long().to(config.device)
        rnn_hidden = mac.init_hidden(o.shape[0], o.shape[1])
        prev_z = torch.zeros(o.shape[0], o.shape[1], config.coach_hidden_dim).to(config.device)
        print(c[0,:4])

        episode_reward = 0
        for t in range(config.max_steps):
            if "full" in config.method:
                m = ms
            o_, e_, c_, m_, ms_ = mac.tensorize(o, e, c, m, ms)
            if config.has_coach and t % config.centralized_every == 0:
               z_team, _, _ = qlearner.coach(o_, e_, c_, ms_)
               mac.set_team_strategy(z_team)

            frame = env.envs[0].render(mode="rgb_array")[0]
            #video.write(np.uint8(frame))
            #if t == 10:
                #print(o[0,:4])
            im = Image.fromarray(frame)
            im.save(f"{save_path}t{t}.jpg")

            actions, rnn_hidden = mac.step(o_, e_, c_, m_, ms_, rnn_hidden, prev_a, epsilon=0.)
            prev_a = torch.LongTensor(actions).to(config.device)
            o, e, m, ms, r, d = step_wrapper(env, actions)
            episode_reward += r.sum()

            #if (t+1) % config.centralized_every == 0 and config.has_coach:
            #    prev_z = z

        all_rewards.append(episode_reward)
        #video.release()
    all_rewards = np.array(all_rewards)
    print(f"mean reward {all_rewards.mean()} | std reward {all_rewards.std()}")
    return all_rewards.mean()

Example #6

Show file

def train():
    env = make_atari(conf.env_name)
    env = bench.Monitor(env, os.path.join(conf.path_game_scan, conf.env_name))
    env = wrap_deepmind(env,
                        episode_life=True,
                        clip_rewards=True,
                        frame_stack=False,
                        scale=True)
    env = WrapPyTorch(env)
    agent = Agent(conf=conf, env=env, test=False)

    episode_reward = 0
    losses = []
    all_rewards = []
    state = env.reset()  # (1, 84, 84)
    for frame_idx in range(1, conf.max_train_steps + 1):
        epsilon = conf.epsilon_by_frame(frame_idx)

        action = agent.act(state, epsilon, test=False)
        # agent.save_action(action, frame_idx)

        next_state, reward, done, _ = env.step(action)
        next_state = None if done else next_state
        loss = agent.update(state,
                            action,
                            reward,
                            next_state,
                            done,
                            test=False,
                            frame=frame_idx)

        # state = next_state
        episode_reward += reward

        if done:
            agent.finish_nstep()
            state = env.reset()
            agent.save_reward(episode_reward)
            episode_reward = 0
        if loss is not None:
            losses.append(loss.item())

        if frame_idx % conf.log_freq == 0 and loss:
            print("frame: {}, loss: {}, reward: {}.".format(
                frame_idx, loss.item(), episode_reward))

    if conf.save_curve:
        curve_plot(conf.path_plot, frame_idx, agent.all_rewards, losses)

Example #7

Show file

def run():

    # uncomment these if you want
    #memory_fix()
    #memory_hard_fix()

    # setup data feed
    dm = DataManager()

    # for _ in range(10):
    #     print(dm.renderer_stream.next())

    # setup exchange. Needs raw data
    binance_exchange = BinanceExchange(data=dm.data)

    # setup portfolio
    binance_portfolio = BinancePortfolio(exchange=binance_exchange)

    # setup environment. Needs data feed stream
    env = Environment(portfolio=binance_portfolio,
                      data_stream=dm.stream,
                      renderer_stream=dm.renderer_stream)

    # for _ in range(10):
    #     print(env.observer.feed.next())

    # setup agent
    agent = Agent(environment=env)

    # train agent
    print(agent.train(steps=100, episodes=4, render_interval=10))

    # show plots of performance
    a = binance_portfolio.performance.plot()
    plt.show()
    b = binance_portfolio.performance.net_worth.plot()
    plt.show()

Example #8

Show file

File: play.py Project: Matesxs/CarlaSimulator-Playground

def play():
  client = carla.Client(settings.CONNECTION_IP, settings.CONNECTION_PORT)
  client.set_timeout(20.0)

  # Create controllers
  trafic_control = TraficControlThread(client)
  weather_control = WeatherControlThread(client)
  trafic_control.start()
  weather_control.start()
  logger.info("Controllers started")

  predicter = ModelHandler(settings.MODEL_NAME, target_weights_path=MODEL_WEIGHTS, train=False)
  agent = Agent(999999, client, False)

  try:
    while True:
      step = 1

      state = agent.spawn()

      while True:
        start_step_time = time.time()

        action = int(np.argmax(predicter.get_qs(state)))
        new_state, _, done = agent.step(action)
        state = new_state

        if done:
          agent.clear_agent()
          break

        time_diff1 = agent.episode_start + step / settings.FPS_COMPENSATION - time.time()
        time_diff2 = start_step_time + 1 / settings.FPS_COMPENSATION - time.time()
        if time_diff1 > 0:
          time.sleep(min(0.125, time_diff1))
        elif time_diff2 > 0:
          time.sleep(min(0.125, time_diff2))
  except KeyboardInterrupt:
    logger.info("Exiting playing - Keyboard interrupt")
  except:
    logger.error("Playing failed")
  finally:
    trafic_control.terminate = True
    weather_control.terminate = True

Example #9

Show file

def test_exp(config, fn, exp, threshold=0.):
    env = make_parallel_env(1, 9999, fn)
    update_config(env, config)
    config.method = exp
    k = exp.find("ctr")
    config.centralized_every = int(exp[k + 3:k + 4])
    if "165" in exp:
        config.agent_hidden_dim = 165
    else:
        config.agent_hidden_dim = 128

    if "coach" in exp:
        config.has_coach = True

    # setup modules
    mac = Agent(config)  # policy
    qlearner = QLearner(mac, config)

    R = []
    OR = []

    for run_num in tqdm([0, 1, 2, 3, 4]):
        model_path = f"/home/liub/Desktop/mount/teamstrategy/coach1/mpe/{exp}/run{run_num}"

        qlearner.load_models(model_path)
        qlearner.cuda()

        reward = 0
        n_orders = 0
        n_total_orders = 1e-12

        for n_ep in range(n_eval):
            o, e, c, m, ms = reset_wrapper(env)
            prev_a = torch.zeros(o.shape[0],
                                 o.shape[1]).long().to(config.device)
            rnn_hidden = mac.init_hidden(o.shape[0], o.shape[1])

            prev_z = None

            for t in range(145):
                if "full" in exp:
                    m = ms
                if "interval" in exp and t % config.centralized_every == 0:
                    m = ms
                o_, e_, c_, m_, ms_ = mac.tensorize(o, e, c, m, ms)

                if config.has_coach and t % config.centralized_every == 0:
                    ma = ms_.sum(-1).gt(0).float()
                    with torch.no_grad():
                        _, z_team, _ = qlearner.coach(o_, e_, c_, ms_)
                    if prev_z is None:
                        mac.set_team_strategy(z_team * ma.unsqueeze(-1))
                        prev_z = z_team
                        n_orders += ma.sum().item()
                        n_total_orders += ma.sum().item()
                    else:
                        bs, n = z_team.shape[:2]
                        #normal = D.Normal(z_team, (0.5*logvar).exp())
                        #logprob = normal.log_prob(prev_z).sum(-1)
                        #prob = logprob.exp()
                        #broadcast = (prob > 0.001).float()
                        #import pdb; pdb.set_trace()
                        l2 = (z_team * ma.unsqueeze(-1) -
                              prev_z * ma.unsqueeze(-1)).pow(2).sum(-1).sqrt()
                        broadcast = (l2 > threshold).float()
                        mac.set_part_team_strategy(z_team, broadcast)
                        #import pdb; pdb.set_trace()
                        n_orders += broadcast.sum().item()
                        n_total_orders += ma.sum().item()
                        prev_z = mac.z_team.clone()

                actions, rnn_hidden = mac.step(o_, e_, c_, m_, ms_, rnn_hidden,
                                               prev_a, 0)
                prev_a = torch.LongTensor(actions).to(config.device)
                o, e, m, ms, r, d = step_wrapper(env, actions)
                reward += r.sum()

        reward = reward / n_eval
        rate = n_orders / n_total_orders

        R.append(reward)
        OR.append(rate)

    R = np.array(R)
    OR = np.array(OR)
    print(
        f"{exp:30s}[{threshold:3d}] | muR: {R.mean():.4f} stdR: {R.std()/np.sqrt(5):.4f} | muC: {OR.mean():.4f} stdC: {OR.std()/np.sqrt(5):.4f}"
    )
    return R.mean(), R.std(), OR.mean(), OR.std()

Example #10

Show file

File: main.py Project: Cranial-XIX/COPA

def run():
    config = Config()
    run_dir, log_dir = prerun(config)

    env = make_parallel_env(config.n_rollout_threads, config.seed)
    update_config(env, config)

    config.pprint()

    # setup modules
    mac = Agent(config) # policy
    qlearner = QLearner(mac, config)
    if config.device == "cuda":
        qlearner.cuda()

    train_stats = {
        "reward": [],
    }

    step = 0
    reward_buffer = collections.deque(maxlen=100)

    use_tqdm = True
    n_iters = config.total_steps // config.max_steps // config.n_rollout_threads

    if use_tqdm:
        pbar = tqdm(total=n_iters)

    prev_update_step = 0

    start_epsilon = 1.0
    end_epsilon = 0.05

    delta = -np.log(end_epsilon) / n_iters

    logger = SummaryWriter(log_dir)

    for it in range(n_iters):
        o, e, c, m, ms = reset_wrapper(env)
        prev_a = torch.zeros(o.shape[0], o.shape[1]).long().to(config.device)

        temporal_buffer = collections.deque(maxlen=config.centralized_every+1) # record t=0,1,...T

        episode_reward = 0.
        epsilon = min(start_epsilon, max(end_epsilon, np.exp(-it * delta)))

        rnn_hidden = mac.init_hidden(o.shape[0], o.shape[1])

        for t in range(config.max_steps):
            step += config.n_rollout_threads

            if "full" in config.method:
                m = ms
            if "interval" in config.method and t % config.centralized_every == 0:
                m = ms

            o_, e_, c_, m_, ms_ = mac.tensorize(o, e, c, m, ms)

            if config.has_coach and t % config.centralized_every == 0:
                with torch.no_grad():
                    z_team, _, _ = qlearner.coach(o_, e_, c_, ms_)
                    mac.set_team_strategy(z_team)

            actions, rnn_hidden = mac.step(o_, e_, c_, m_, ms_, rnn_hidden, prev_a, epsilon) # [n_agents,]
            prev_a = torch.LongTensor(actions).to(config.device)

            no, ne, nm, nms, r, d = step_wrapper(env, actions)

            temporal_buffer.append((o, e, c, m, ms, actions, r))
            episode_reward += r

            if t % config.centralized_every == 0 and t > 0:
                O, E, C, M, MS, A, R = map(np.stack, zip(*temporal_buffer))
                for j in range(config.n_rollout_threads):
                    qlearner.buffer.push(O[:,j], E[:,j], C[:,j],
                                         M[:,j], MS[:,j], A[:,j], R[:,j])

            if (step - prev_update_step) >= config.update_every:
                prev_update_step = step
                qlearner.update(logger, step)

            o = no; e = ne; m = nm; ms = nms

        reward_buffer.extend(episode_reward)
        pbar.update(1)
        running_reward_mean = np.array(reward_buffer).mean()
        train_stats["reward"].append((step, running_reward_mean))
        logger.add_scalar("reward", running_reward_mean, step)
        pbar.set_description(f"ep {it:10d} | {running_reward_mean:8.4f} |")

        if (it+1) % 100 == 0 or (it+1 == n_iters):
            with open(f"{log_dir}/stats.npy", 'wb') as f:
                np.save(f, train_stats)
            f.close()
            qlearner.save_models(f"{run_dir}")

    if use_tqdm:
        pbar.close()
    env.close()

Example #11

Show file

def qLearning(learning_rate, discount_factor, epsilon, reward_map, state_grid,
              max_steps, epochs):

    agent = Agent(learning_rate, discount_factor, reward_map, state_grid,
                  max_steps)

    stateDic = {}
    # all_epochs = []
    # epoch_rewards = []
    # every_5 = []
    # mean_every_5 = []
    # epochs_mean = []

    for e in range(epochs):

        current_state = Agent.choose_start(reward_map, state_grid, max_steps)

        # epoch_reward = 0
        # all_epochs.append(e)
        # current_epoch_rewards = []

        for _ in range(0, len(reward_map) * len(reward_map[0]) * 2):

            action = Agent.epsilon_greedy_policy(epsilon, current_state)

            next_state, reward = agent.take_action(current_state, action,
                                                   stateDic)

            # update
            current_state.update_qvalue(learning_rate, reward, discount_factor,
                                        next_state, action)

            stateDic[(current_state.posX, current_state.posY,
                      current_state.steps)] = current_state

            # epoch_reward += reward
            # current_epoch_rewards.append(reward)

            if next_state.is_terminal:
                # epoch_rewards.append(epoch_reward)
                # every_5.append(epoch_reward)
                break

            current_state = next_state

        # epochs_mean.append(sum(current_epoch_rewards) / len(current_epoch_rewards))

        # if(e % 5 == 0):
        #     mean_every_5.append(sum(every_5) / len(every_5))
        #     every_5 = []

    # plt.style.use(['dark_background'])
    # plt.figure(figsize=(18,12))
    # plt.plot(epochs_mean)
    # plt.xlabel("Episodios")
    # plt.ylabel("Reward médio por episodio", size=10)
    # plt.title("Reward médio por episodio 0.9 Lambda")
    # plt.savefig('epochs_mean.png')
    # plt.show()

    # plt.style.use(['dark_background'])
    # plt.figure(figsize=(20,10))
    # plt.plot(epoch_rewards)
    # plt.xlabel("Episodios")
    # plt.ylabel("Reward axumulativa", size=10)
    # plt.title("Reward acumulativa por episodio")
    # plt.savefig('reward_cumulative.png')
    # plt.show()

    # plt.figure(figsize=(20,10))
    # plt.plot(mean_every_5)
    # plt.xlabel("Episodios")
    # plt.ylabel("Média Móvel a cada 5 episodios", size=10)
    # plt.title("Média Móvel")
    # plt.savefig('mean_every5.png')
    # plt.show()

    # plt.figure(figsize=(20,10))
    # plt.plot(epochs_mean)
    # plt.xlabel("Episodios")
    # plt.ylabel("Reward médio por episodio", size=10)
    # plt.title("Reward médio por episodio")
    # plt.savefig('epochs_mean.png')
    # plt.show()

    return stateDic

Example #12

Show file

class ATC(core.Entity):
    ''' Example new entity object for BlueSky. '''
    def __init__(self):
        super().__init__()
        self.super_start = time.perf_counter()

        self.initilized = False

        self.epoch_counter = 0
        # [Success, Fail]
        self.results = np.zeros(2)

        self.all_success = []
        self.all_fail = []
        self.mean_success = 0
        self.all_mean_success, self.best = 0, 0
        self.mean_rewards = []
        self.epoch_actions = np.zeros(ACTION_SHAPE)

        self.start = None
        self.stop = None

        self.dist = [0, -1]
        self.spd = [0, -1]
        self.trk = [0, 360]
        self.vs = [0, -1]

        self.last_observation = {}
        self.last_reward_observation = {}
        self.previous_action = {}
        self.observation = {}

    def on_load(self):
        self.sector_manager = Sector_Manager(SECTORS)
        self.route_manager = Route_Manager(ROUTES,
                                           test_routes=VISUALIZE,
                                           draw_paths=VISUALIZE)
        self.traffic_manager = Traffic_Manager(
            max_ac=MAX_AC,
            times=TIME_SEP,
            max_spd=CONSTRAINTS["cas"]["max"],
            min_spd=CONSTRAINTS["cas"]["min"],
            max_alt=32000,
            min_alt=32000,
            network=self.route_manager)

        self.memory = Memory()

        self.agent = Agent(state_size=STATE_SHAPE,
                           action_size=ACTION_SHAPE,
                           value_size=VALUE_SHAPE)

        try:
            self.agent.load(path=FILE + "best.h5")
        except:
            try:
                self.agent.load(path=FILE + ".h5")
            except:
                pass

        self.initilized = True

        print("ATC: READY")
        string = "=================================\n   UPDATE: RUNNING EPOCH {}\n=================================\n".format(
            self.format_epoch())
        self.print_all(string)

    # Functions that need to be called periodically can be indicated to BlueSky
    # with the timed_function decorator

    @core.timed_function(name='example', dt=12)
    def update(self):
        # Initilize system
        if not self.initilized:
            self.on_load()

        # Start epoch timer
        if not self.start:
            self.start = time.perf_counter()

        # Create aircraft
        self.traffic_manager.spawn()
        # Update Aircraft active sectors
        self.traffic_manager.update_active(self.sector_manager.system_sectors)

        # Generate a full distancematrix between each aircraft
        full_dist_matrix = self.get_dist_martix()

        # Get nearest ac in a matrix
        nearest_ac = self.get_nearest_ac(dist_matrix=full_dist_matrix)

        # Get goal distances for each aircraft
        g_distance = self.get_goal_distances()

        # Get an array of terminal aircraft
        terminal_ac, terminal_id = self.get_terminal(nearest_ac, g_distance)

        self.handle_terminal(terminal_id)

        if self.traffic_manager.check_done():
            self.epoch_reset()
            return

        if not TRAIN and (self.traffic_manager.total % 50 == 0):
            string = "Success: {} | Fail: {} | Mean Success: {:.3f}%".format(
                int(self.results[0]), int(self.results[1]),
                (self.results[0] / MAX_AC) * 100)
            self.print_all(string)

        if len(traf.id) <= 0:
            return

        if not len(traf.id) == 0:
            policy, normal_state, normal_context = self.get_actions(
                terminal_ac, g_distance, full_dist_matrix)

            if len(policy) > 0:
                idx = 0
                new_actions = {}
                for i in range(len(traf.id)):
                    if terminal_ac[i] == 0 and len(
                            self.traffic_manager.active_sectors[i]) > 0:
                        if not np.any(np.isnan(policy[idx])):
                            _id = traf.id[i]

                            if not _id in self.last_observation.keys():
                                self.last_observation[_id] = [
                                    normal_state[idx], normal_context[idx]
                                ]

                            action = np.random.choice(
                                ACTION_SHAPE, 1, p=policy[idx].flatten())[0]

                            # print(policy[idx], action)

                            self.epoch_actions[action] += 1

                            if not _id in self.observation.keys(
                            ) and _id in self.previous_action.keys():
                                self.observation[_id] = [
                                    normal_state[idx], normal_context[idx]
                                ]

                                self.memory.store(_id,
                                                  self.last_observation[_id],
                                                  self.previous_action[_id],
                                                  nearest_ac[idx])

                                self.last_observation[_id] = self.observation[
                                    _id]

                                del self.observation[_id]

                            self.perform_action(i, action)

                            new_actions[_id] = action

                        self.previous_action = new_actions

                        idx += 1

    # Act
    def get_actions(self, terminal_ac, g_dists, dist_matrix):
        ids = []
        new_actions = {}

        state = self.get_state()

        normal_state, normal_context = self.normalise_all(
            state, terminal_ac, g_dists, dist_matrix)

        policy = []
        if not len(normal_state) == 0:
            policy = self.agent.act(normal_state, normal_context)

        return policy, normal_state, normal_context

    # For an aircraft perform an action
    def perform_action(self, i, action):
        if action < 3:
            traf_alt = int(traf.alt[i] / ft)
            new_alt = int(round((traf_alt + ACTIONS[action])))

            alt = max(CONSTRAINTS["alt"]["min"],
                      min(CONSTRAINTS["alt"]["max"], new_alt))

            # print(traf_alt, alt)

            stack.stack("{} alt {}".format(traf.id[i], alt))
        elif action == 4:
            traf_alt = traf.alt[i] / ft
            new_alt = int(round((traf_alt)))

    # Get the current state

    def get_state(self):
        state = np.zeros((len(traf.id), 6))

        start_ids, end_ids = self.get_all_nodes()

        state[:, 0] = traf.lat
        state[:, 1] = traf.lon
        state[:, 2] = traf.trk
        state[:, 3] = traf.alt
        state[:, 4] = traf.tas
        state[:, 5] = traf.vs

        return state

    # Get all nodes for each aircraft
    def get_all_nodes(self):
        start_ids = np.zeros(len(traf.id), dtype=int)
        end_ids = np.zeros(len(traf.id), dtype=int)

        for i in range(len(traf.id)):
            _id = traf.id[i]
            route = self.traffic_manager.routes[_id]
            start_ids[i] = np.argwhere(
                self.route_manager.idx_array == route[0])
            end_ids[i] = np.argwhere(self.route_manager.idx_array == route[-1])

        return start_ids, end_ids

    # Normalise the state and context
    def normalise_all(self, state, terminal_ac, g_dists, dist_matrix):
        normal_states = self.normalise_state(state, terminal_ac, g_dists)

        normal_context = []

        start_ids, end_ids = self.get_all_nodes()

        max_agents = 0
        for _id in traf.id:
            if terminal_ac[traf.id2idx(_id)] > 0 or len(
                    self.traffic_manager.active_sectors[traf.id2idx(
                        _id)]) <= 0:
                continue

            new_context = self.normalise_context(_id, terminal_ac, dist_matrix,
                                                 start_ids, end_ids)

            max_agents = max(max_agents, len(new_context))

            if len(normal_context) == 0:
                normal_context = new_context
            else:
                normal_context = np.append(
                    keras.preprocessing.sequence.pad_sequences(
                        normal_context, max_agents, dtype='float32'),
                    keras.preprocessing.sequence.pad_sequences(
                        new_context, max_agents, dtype='float32'),
                    axis=0)

        if len(normal_context) == 0:
            normal_context = np.array([0, 0, 0, 0, 0, 0, 0]).reshape(1, 1, 7)

        # print(normal_states.shape, normal_context.shape)
        return normal_states, normal_context

    # Normalise the agent state only
    def normalise_state(self, state, terminal_ac, g_dists):
        total_active = 0

        for i in range(len(terminal_ac)):
            if terminal_ac[i] == 0 and len(
                    self.traffic_manager.active_sectors[i]) > 0:
                total_active += 1

        normalised_state = np.zeros((total_active, STATE_SHAPE))

        count = 0
        for i in range(len(traf.id)):
            if terminal_ac[i] > 0 or len(
                    self.traffic_manager.active_sectors[i]) <= 0:
                continue

            normalised_state[count, :] = self.normalise(state[i],
                                                        'state',
                                                        traf.id[i],
                                                        g_dist=g_dists[i])

            count += 1

        return normalised_state

    # Get and normalise context
    def normalise_context(self, _id, terminal_ac, dist_matrix, start_ids,
                          end_ids):
        context = []
        idx = traf.id2idx(_id)

        distances = dist_matrix[:, idx]
        this_sectors = self.traffic_manager.active_sectors[idx]

        this_lat, this_lon = traf.lat[idx], traf.lon[idx]

        for i in range(len(distances)):
            # Ignore current aircraft
            if i == idx:
                continue

            if terminal_ac[i] > 0 or len(
                    self.traffic_manager.active_sectors[i]) <= 0:
                continue

            sectors = self.traffic_manager.active_sectors[i]

            # Only care if the ac in a matching sector
            flag = False
            for x in sectors:
                if x in this_sectors:
                    flag = True

            if not flag:
                continue

            dist = get_dist([this_lat, this_lon], [traf.lat[i], traf.lon[i]])

            # Only care about visible distance aircraft
            if dist > 40:
                continue

            spd = traf.tas[i]
            alt = traf.alt[i]
            trk = traf.trk[i]
            vs = traf.vs[i]
            start_id = start_ids[i]
            end_id = end_ids[i]

            self.dist[1] = max(self.dist[1], dist)
            self.spd[1] = max(self.spd[1], spd)
            self.vs[1] = max(self.vs[1], vs)

            dist = dist / self.dist[1]
            spd = spd / self.spd[1]
            trk = trk / self.trk[1]
            alt = ((alt/ft)-CONSTRAINTS["alt"]["min"]) / \
                (CONSTRAINTS["alt"]["max"]-CONSTRAINTS["alt"]["min"])

            vs = 0
            if not vs == 0:
                vs = vs / self.vs[1]

            n_nodes, dist2next = get_n_nodes(traf.id[i], self.traffic_manager,
                                             self.route_manager)

            self.dist[1] = max(self.dist[1], dist2next)
            dist2next = dist2next / self.dist[1]

            if len(context) == 0:
                context = np.array([
                    spd, alt, trk, vs, dist, dist2next, n_nodes[0], n_nodes[1],
                    n_nodes[2]
                ]).reshape(1, 1, 9)
            else:
                context = np.append(context,
                                    np.array([
                                        spd, alt, trk, vs, dist, dist2next,
                                        n_nodes[0], n_nodes[1], n_nodes[2]
                                    ]).reshape(1, 1, 9),
                                    axis=1)

        if len(context) == 0:
            context = np.zeros(9).reshape(1, 1, 9)

        return context

    # perform normalisation
    def normalise(self, state, what, _id, g_dist=None):

        # Normalise the entire state
        if what == 'state':
            if not g_dist:
                raise Exception(
                    "For normalising a state please pass the distance to the goal."
                )

            self.dist[1] = max(self.dist[1], g_dist)
            self.spd[1] = max(self.spd[1], state[4])
            self.vs[1] = max(self.vs[1], state[5])

            dist = g_dist / self.dist[1]
            spd = state[4] / self.spd[1]
            trk = state[2] / self.trk[1]
            alt = ((state[3]/ft)-CONSTRAINTS["alt"]["min"]) / \
                (CONSTRAINTS["alt"]["max"]-CONSTRAINTS["alt"]["min"])

            vs = 0
            if not state[5] == 0:
                vs = state[5] / self.vs[1]

            n_nodes, dist2next = get_n_nodes(_id, self.traffic_manager,
                                             self.route_manager)

            self.dist[1] = max(self.dist[1], dist2next)
            dist2next = dist2next / self.dist[1]

            return np.array([
                spd, alt, trk, vs, dist, dist2next, n_nodes[0], n_nodes[1],
                n_nodes[2]
            ])

    # Get the terminal aircraft
    def get_terminal(self, nearest_ac, g_dists):
        terminal_ac = np.zeros(len(traf.id), dtype=int)
        terminal_id = []

        # Loop through all aircraft
        for i in range(len(traf.id)):
            # Terminal state 0 = not terminal, 1 = collision, 2 = success
            T = 0

            # Only care about aircraft in a sector
            if len(self.traffic_manager.active_sectors[i]) > 0:
                close_ac = nearest_ac[i]
                n_ac_data = (close_ac[0], close_ac[1])

                # Get the terminal state
                T = self.agent.terminal(i, n_ac_data, g_dists[i])

                # Only care about terminal aircraft
                if not T == 0:
                    # Update collision aircraft
                    if T == 1:
                        terminal_ac[i] = 1
                        terminal_ac[traf.id2idx(close_ac[2])] = 1
                    elif not terminal_ac[i] == 1:
                        terminal_ac[i] = 2

                    _id = traf.id[i]
                    self.memory.store(_id, self.last_observation[_id],
                                      self.previous_action[_id], nearest_ac[i],
                                      T)

        for i in range(len(terminal_ac)):
            if terminal_ac[i] > 0:
                terminal_id.append([traf.id[i], terminal_ac[i]])

        return terminal_ac, terminal_id

    # Handle terminal aircraft
    def handle_terminal(self, terminal_id):
        for ac in terminal_id:
            stack.stack('DEL {}'.format(ac[0]))

            self.traffic_manager.active -= 1

            if ac[1] == 1:
                self.results[1] += 1
            elif ac[1] == 2:
                self.results[0] += 1

    # Generates a distance matrix of all aircraft in the system
    def get_dist_martix(self):
        size = traf.lat.shape[0]
        return geo.latlondist_matrix(np.repeat(traf.lat, size),
                                     np.repeat(traf.lon, size),
                                     np.tile(traf.lat, size),
                                     np.tile(traf.lon,
                                             size)).reshape(size, size)

    # Get the nearest aircraft to agents
    def get_nearest_ac(self, dist_matrix):
        nearest = []

        # Loop through all aircraft
        for i in range(len(traf.id)):
            a_alt = traf.alt[i] / ft

            ac_dists = dist_matrix[:, i]

            close = 10e+25
            alt_sep = 10e+25

            nearest_id = None

            # Loop through the row on the dist matrix
            for x in range(len(ac_dists)):
                # Ensure the aircraft is in controlled airspace and not the current aircraft
                if not x == i and len(
                        self.traffic_manager.active_sectors[x]) > 0:

                    # See if it is closest and update
                    if ac_dists[x] < close:
                        close = float(ac_dists[x])
                        i_alt = traf.alt[x] / ft

                        alt_sep = abs(a_alt - i_alt)

                        nearest_id = traf.id[x]
            nearest.append([close, alt_sep, nearest_id])

        return np.array(nearest)

    # returns a matrix of distances to a goal
    def get_goal_distances(self):
        goal_ds = np.zeros(len(traf.id), dtype=float)

        for i in range(len(traf.id)):
            goal_ds[i] = get_goal_dist(traf.id[i], self.traffic_manager,
                                       self.route_manager)

        return goal_ds

    # Reset the environment for the next epoch
    def epoch_reset(self):
        # Reset the traffic creation
        self.traffic_manager.reset()

        # Keep track of all success and failures
        self.all_success.append(self.results[0])
        self.all_fail.append(self.results[1])

        # Calcuate total mean success
        self.all_mean_success = np.mean(self.all_success)

        # Calcuate rolling mean success
        if (self.epoch_counter + 1) >= 50:
            self.mean_success = np.mean(self.all_success[-50:])

        if (self.epoch_counter + 1) % 5 == 0:
            if self.mean_success > self.best:
                if TRAIN:
                    print('::::::: Saving Best ::::::')
                    self.agent.save(path=NEW_FILE + "best.h5")
                self.best = self.mean_success
            if TRAIN:
                print(':::::: Saving Model ::::::')
                self.agent.save(path=NEW_FILE + ".h5")
                print(":::::::: Training ::::::::")
                self.agent.train(self.memory)
                print(":::::::: Complete ::::::::")

        temp = np.array([np.array(self.all_success), np.array(self.all_fail)])
        np.savetxt("Files/" + NEW_FILE + "_numpy.csv", temp, delimiter=',')

        # Stop the timer
        self.stop = time.perf_counter()
        # -------- Printing Outputs --------
        string = "Epoch run in {:.2f} seconds".format(self.stop - self.start)
        self.print_all(string)
        string = "Success: {} | Fail: {} | Mean Success: {:.3f}% | (50) Mean Success Rolling {:.3f}% | Best {:.3f}%".format(
            int(self.results[0]), int(self.results[1]),
            (self.all_mean_success / MAX_AC) * 100,
            (self.mean_success / MAX_AC) * 100, (self.best / MAX_AC) * 100)
        self.print_all(string)
        string = "Actions -> Descend: {}, Hold Current: {}, Climb: {}, Maintain Climb: {}".format(
            self.epoch_actions[0], self.epoch_actions[1],
            self.epoch_actions[2], self.epoch_actions[3])
        # string = "Actions -> Descend: {}, Climb: {}".format(
        #     self.epoch_actions[1], self.epoch_actions[0])
        self.print_all(string)

        if self.epoch_counter + 1 >= EPOCHS:
            super_stop = time.perf_counter()
            stack.stack("STOP")
            string = "::END:: Training {} episodes took {:.2f} hours".format(
                EPOCHS, ((super_stop - self.super_start) / 60) / 60)
            self.print_all(string)
            return

        self.epoch_counter += 1
        string = "=================================\n   UPDATE: RUNNING EPOCH {}\n=================================\n".format(
            self.format_epoch())
        self.print_all(string)

        # Reset values
        self.results = np.zeros(2)
        self.stop = None
        self.start = None
        self.mean_rewards = []
        self.epoch_actions = []
        self.epoch_actions = np.zeros(ACTION_SHAPE)

        self.previous_action = {}
        self.last_observation = {}
        self.observation = {}

    # Scripts for printing values
    def print_all(self, string):
        stack.stack(f'ECHO {string}')
        print(string)

    def format_epoch(self):
        epoch_string = ""

        if self.epoch_counter + 1 < 10:
            epoch_string += "0"
        if self.epoch_counter + 1 < 100:
            epoch_string += "0"
        if self.epoch_counter + 1 < 1000:
            epoch_string += "0"
        if self.epoch_counter + 1 < 10000:
            epoch_string += "0"

        epoch_string += str(self.epoch_counter + 1)
        return epoch_string

Example #13

Show file

class GridWorld:
    def __init__(self, world):

        self.world = world.split('\n    ')[1:-1]
        self.action_map = {0: 'right', 1: 'down', 2: 'left', 3: 'up'}
        self.action_space = [0, 1, 2, 3]
        self.slip = 0.2  #20% chance of taking wrong action

        self.col = len(self.world[0])  #10 num of columns in the above string
        self.row = len(self.world)  #5 num of rows in the above string
        self.state_color = (50, 100, 10)
        self.renderfirst = True
        self.policy = {}
        self.episode_step = 0
        self._max_epi_step = 1000

        self.wall_group = pg.sprite.Group()
        self.state_group = pg.sprite.Group()

        self.state_dict = defaultdict(lambda: 0)

        i = 0
        for y, et_row in enumerate(self.world):
            for x, block_type in enumerate(et_row):

                if block_type == 'w':
                    self.wall_group.add(Wall(col=x, row=y))

                elif block_type == 'a':
                    self.agent = Agent(col=x, row=y)
                    self.state_group.add(State(col=x, row=y))
                    self.state_dict[(x, y)] = {
                        'state': i,
                        'reward': -1,
                        'done': False
                    }
                    i += 1

                elif block_type == 'g':
                    self.goal = Goal(col=x, row=y)
                    self.state_dict[(x, y)] = {
                        'state': i,
                        'reward': 10,
                        'done': True
                    }
                    i += 1

                elif block_type == ' ':
                    self.state_group.add(State(col=x, row=y))
                    self.state_dict[(x, y)] = {
                        'state': i,
                        'reward': -1,
                        'done': False
                    }
                    i += 1

        self.state_dict = dict(self.state_dict)
        self.state_count = len(self.state_dict)

    def reset(self):
        self.episode_step = 0
        self.agent.reInitilizeAgent()
        return self.state_dict[(self.agent.initial_position.x,
                                self.agent.initial_position.y)]['state']

    def get_action_with_probof_slip(self, action):  #slip property of env
        individual_slip = self.slip / 3
        prob = [individual_slip for a in self.action_space]
        prob[action] = 1 - self.slip
        act = np.random.choice(self.action_space, p=prob)
        return act

    def step(self, action, testing=False):
        if not testing:
            action = self.get_action_with_probof_slip(action)
        action = self.action_map[action]
        response = self.agent.move(action, self.wall_group, self.state_dict)
        self.episode_step += 1

        if self.episode_step <= self._max_epi_step:
            return response['state'], response['reward'], response['done'], {
            }  #info
        else:
            return response['state'], response['reward'], True, {
                'TimeLimit': True
            }

    def render(self):
        if self.renderfirst:
            pg.init()
            self.screen = pg.display.set_mode((self.col * 50, self.row * 50))

        self.screen.fill(self.state_color)

        self.wall_group.draw(self.screen)
        self.goal.draw(self.screen)
        self.agent.draw(self.screen)

        pg.display.update()
        pg.display.flip()

    def close(self):
        self.renderfirst = True
        pg.quit()

    def setPolicy(self, policy):
        for i, act in enumerate(policy):
            self.policy[i] = self.action_map[act]

        for s in self.state_group:
            s.change_with_policy(self.state_dict, self.policy)

    def play_as_human(self,
                      show_policy=False):  #policy={state_no:action('left')}
        if show_policy and len(self.policy) == 0:
            raise Exception(
                "Sorry, no policy found setPolicy first...use world.setPolicy([list of action for states])"
            )

        pg.init()
        screen = pg.display.set_mode((self.col * 50, self.row * 50))

        clock = pg.time.Clock()

        done = False

        while not done:

            for event in pg.event.get():
                if event.type == pg.QUIT:
                    done = True

                elif event.type == pg.KEYDOWN:
                    if event.key == pg.K_LEFT:
                        response = self.agent.move('left', self.wall_group,
                                                   self.state_dict)
                        #print(response)
                    elif event.key == pg.K_RIGHT:
                        response = self.agent.move('right', self.wall_group,
                                                   self.state_dict)
                        #print(response)
                    elif event.key == pg.K_UP:
                        response = self.agent.move('up', self.wall_group,
                                                   self.state_dict)
                        #print(response)
                    elif event.key == pg.K_DOWN:
                        response = self.agent.move('down', self.wall_group,
                                                   self.state_dict)
                        #print(response)

            screen.fill(self.state_color)

            self.wall_group.draw(screen)
            if show_policy: self.state_group.draw(screen)
            self.goal.draw(screen)
            self.agent.draw(screen)

            pg.display.update()
            pg.display.flip()
            clock.tick(60)
        pg.quit()

Example #14

Show file

File: train.py Project: Matesxs/CarlaSimulator-Playground

class Trainer(Thread):
    def __init__(self, client, identifier, epsilon, get_qs_callbatch,
                 update_replay_memory_callback):
        super().__init__()
        self.daemon = True
        self.client = client

        self.terminate = False
        self.fail_flag = False
        self.halt = False

        self.get_qs = get_qs_callbatch
        self.update_replay_memory = update_replay_memory_callback
        self.identifier = identifier

        self.agent = Agent(identifier, self.client, True)

        self.action = None
        self.episode = 0
        self.epsilon = epsilon
        self.scores_history = deque(maxlen=settings.LOG_EVERY)
        self.score_record = None
        self.steps_per_second = deque(maxlen=settings.LOG_EVERY)

        self.actions_statistic = deque(
            maxlen=int(settings.LOG_EVERY * settings.SECONDS_PER_EXPISODE *
                       settings.FPS_COMPENSATION))

    def get_action(self, action: int):
        num_of_logged_actions = len(self.actions_statistic)
        if num_of_logged_actions <= 0: return 0
        return self.actions_statistic.count(action) / num_of_logged_actions

    def get_steps_per_second(self):
        if len(self.steps_per_second) > 0:
            return sum(self.steps_per_second) / len(self.steps_per_second)
        return 0

    def get_preview_data(self):
        if self.agent.prev_camera is not None and self.agent.initialized:
            return cv2.cvtColor(self.agent.prev_camera, cv2.COLOR_RGB2BGR)
        return np.zeros((settings.PREVIEW_CAMERA_IMAGE_DIMENSIONS[1],
                         settings.PREVIEW_CAMERA_IMAGE_DIMENSIONS[0],
                         settings.PREVIEW_CAMERA_IMAGE_DIMENSIONS[2]))

    def get_mean_score(self):
        if len(self.scores_history) > 0:
            return sum(self.scores_history) / len(self.scores_history)
        return 0

    def get_episode(self):
        return self.episode

    def run(self) -> None:
        logger.info(f"Trainer {self.identifier} started")

        while not self.terminate:
            if self.halt:
                time.sleep(0.1)
                continue

            reward = None
            episode_reward = 0
            step = 1

            try:
                state = self.agent.spawn()
                self.fail_flag = False
            except:
                self.fail_flag = True
                break

            episode_data_memory = deque()

            while not self.fail_flag:
                start_step_time = time.time()

                if self.epsilon is None or np.random.random() > self.epsilon:
                    self.action = int(np.argmax(self.get_qs(state)))
                    self.actions_statistic.append(self.action)
                else:
                    self.action = random.choice(list(settings.ACTIONS.keys()))

                try:
                    new_state, reward, done = self.agent.step(self.action)
                except:
                    logger.error(
                        f"Trainer {self.identifier} - Failed to make step")
                    self.fail_flag = True
                    break

                episode_data_memory.append(
                    (state, self.action, reward, new_state, done))
                state = new_state

                episode_reward += reward

                if done:
                    self.agent.clear_agent()
                    self.action = None
                    break

                time_diff1 = self.agent.episode_start + step / settings.FPS_COMPENSATION - time.time(
                )
                time_diff2 = start_step_time + 1 / settings.FPS_COMPENSATION - time.time(
                )
                if time_diff1 > 0:
                    time.sleep(min(0.125, time_diff1))
                elif time_diff2 > 0:
                    time.sleep(min(0.125, time_diff2))

                step += 1

            if not reward or not self.agent.episode_start: continue

            episode_time = time.time() - self.agent.episode_start
            if episode_time == 0: episode_time = 10 ^ -9
            average_steps_per_second = step / episode_time

            self.steps_per_second.append(average_steps_per_second)

            reward_factor = settings.FPS_COMPENSATION / average_steps_per_second
            episode_reward_weighted = (
                (episode_reward - reward) * reward_factor +
                reward) * settings.EPISODE_REWARD_MULTIPLIER

            if episode_time > settings.MINIMUM_EPISODE_LENGTH:
                self.update_replay_memory(episode_data_memory)
                self.scores_history.append(episode_reward_weighted)
                self.episode += 1

            del episode_data_memory

        logger.info(f"Trainer {self.identifier} stopped")