Exemple #1
0
    def create_controls(self):

        src_add = self.srcTxt.GetValue()
        obj = stat.Stats()

        #print self.qlist,self.rlist
        qsent, qrecv, rsent, rrecv, avgRtt = obj.data_collector(
            src_add, self.qlist, self.rlist, self.qrlist)
        #print qsent,qrecv

        wx.StaticLine(self, -1, (25, 50), (300, 1))
        wx.StaticText(self,
                      -1,
                      'Queries Sent', (25, 100),
                      style=wx.ALIGN_RIGHT)
        wx.StaticText(self,
                      -1,
                      'Responses Received', (25, 120),
                      style=wx.ALIGN_RIGHT)
        wx.StaticText(self, -1, 'Queries Received', (25, 140))
        wx.StaticText(self, -1, 'Responses Sent', (25, 160))
        #wx.StaticText(self, -1, '', (25, 180))
        wx.StaticText(self, -1, 'Average RTT', (25, 180))
        wx.StaticText(self, -1, 'Generated Msg/Recieved Msg', (25, 220))
        wx.StaticText(self, -1, str(qsent), (250, 100))
        wx.StaticText(self, -1, str(rrecv), (250, 120))
        wx.StaticText(self, -1, str(qrecv), (250, 140))
        wx.StaticText(self, -1, str(rsent), (250, 160))
        wx.StaticText(self, -1, str(avgRtt), (250, 180))
        wx.StaticText(self, -1,
                      str(qsent + rsent) + '/' + str(qrecv + rrecv),
                      (250, 220))

        wx.StaticLine(self, -1, (25, 260), (300, 1))
        self.Centre()
Exemple #2
0
def stats(albumdir, options):
    """
    Prints out statistics.
    """
    import statistics

    stats = statistics.Stats()

    if options.recursive:
        for root, dirs, files in albumdir.walk():
            if len(dirs) > 0:
                for dir in dirs:
                    fp = FilePath(root, dir)
                    if isIgnored(options, fp):
                        continue
                    try:
                        stats = statistics.dirstat(fp, stats, options.verbose)
                    except NamingMuseException, strerr:
                        print strerr
Exemple #3
0
def train(environment,
          agent,
          n_episodes=1000000,
          max_t=1000,
          solve_score=100.0):
    """ Run training loop.

    Params
    ======
        environment: environment object
        agent: agent object
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        solve_score (float): criteria for considering the environment solved
    """

    stats = statistics.Stats()
    stats_format = 'Buffer: {:6}   NoiseW: {:.4}'

    for i_episode in range(1, n_episodes + 1):
        rewards = []
        state = environment.reset()
        # loop over steps
        for t in range(max_t):
            # select an action
            if agent.evaluation_only:  # disable noise on evaluation
                action = agent.act(state, add_noise=False)
            else:
                action = agent.act(state)
            # take action in environment

            # map from continuous actions to discrete actions, largest absolute value is the action chosen
            action = action.squeeze(0)
            action_tmp = [action[0:2], action[2:4], action[4:7], action[7:10]]
            env_action = []
            for a in action_tmp:
                idx = np.argmax(np.abs(
                    a))  # pick the largest absolute value for each agent
                is_negative = a[
                    idx] < 0  # True if the action value is negative else False
                env_action.append(
                    (idx * 2) + is_negative
                )  # map from continuous action to discrete action
            env_action = np.array(env_action)
            #print('env_action: {}'.format(env_action))

            # DEBUG action mapping
            #print('is_negative: {}'.format(is_negative))
            #print('idx: {}'.format(idx))
            #print('env_action: {}'.format(env_action))

            # DEBUG playing against random agents
            #random_actions = np.random.random_sample(4) * 5
            #env_action = random_actions  # both teams take random actions
            #env_action = np.array((random_actions[0], env_action[1], random_actions[2], env_action[3]))  # +1B only red team takes random actions
            #env_action = np.array((env_action[0], env_action[1], random_actions[2], env_action[3]))      # +2B only red striker takes random actions
            #env_action = np.array((env_action[0], random_actions[1], random_actions[2], env_action[3]))  # +3B red striker and blue goalie take random actions
            #env_action = np.array((env_action[0], random_actions[1], env_action[2], random_actions[3]))  # +1R only blue team takes random actions
            #env_action = np.array((env_action[0], env_action[1], env_action[2], random_actions[3]))      # +2R only blue striker takes random actions
            #env_action = np.array((random_actions[0], env_action[1], env_action[2], random_actions[3]))  # +3R blue striker and red goalie take random actions
            #print('env_action: {}'.format(env_action))

            next_state, reward, done = environment.step(env_action)
            # update agent with returned information
            agent.step(state, action, reward, next_state, done)
            state = next_state
            rewards.append(reward)
            if all(done):
                break

        # every episode
        buffer_len = len(agent.memory)
        per_agent_rewards = []  # calculate per agent rewards
        for i in range(agent.n_agents):
            per_agent_reward = 0
            for step in rewards:
                per_agent_reward += step[i]
            per_agent_rewards.append(per_agent_reward)
        stats.update(t, [np.sum((per_agent_rewards[0], per_agent_rewards[2]))],
                     [np.sum((per_agent_rewards[1], per_agent_rewards[3]))],
                     i_episode)  # track sum rewards across each team
        stats.print_episode(
            i_episode, t, stats_format, buffer_len, agent.noise_weight,
            agent.agents[0].critic_loss, agent.agents[1].critic_loss,
            agent.agents[2].critic_loss, agent.agents[3].critic_loss,
            agent.agents[0].actor_loss, agent.agents[1].actor_loss,
            agent.agents[2].actor_loss, agent.agents[3].actor_loss,
            agent.agents[0].noise_val, agent.agents[1].noise_val,
            agent.agents[2].noise_val, agent.agents[3].noise_val,
            per_agent_rewards[0], per_agent_rewards[1], per_agent_rewards[2],
            per_agent_rewards[3])

        # every epoch (100 episodes)
        if i_episode % 100 == 0:
            stats.print_epoch(i_episode, stats_format, buffer_len,
                              agent.noise_weight)
            save_name = 'checkpoints/episode.{}.'.format(i_episode)
            for i, save_agent in enumerate(agent.agents):
                torch.save(save_agent.actor_local.state_dict(),
                           save_name + str(i) + '.actor.pth')
                torch.save(save_agent.critic_local.state_dict(),
                           save_name + str(i) + '.critic.pth')

        # if solved
        if stats.is_solved(i_episode, solve_score):
            stats.print_solve(i_episode, stats_format, buffer_len,
                              agent.noise_weight)
            save_name = 'checkpoints/solved.'
            for i, save_agent in enumerate(agent.agents):
                torch.save(save_agent.actor_local.state_dict(),
                           save_name + str(i) + '.actor.pth')
                torch.save(save_agent.critic_local.state_dict(),
                           save_name + str(i) + '.critic.pth')
            break
def train(environment, agent, n_episodes=10000, max_t=1000, solve_score=0.5):
    """ Run training loop.

    Params
    ======
        environment: environment object
        agent: agent object
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        solve_score (float): criteria for considering the environment solved
    """

    stats = statistics.Stats()
    stats_format = 'Buffer: {:6}   NoiseW: {:.4}'

    for i_episode in range(1, n_episodes + 1):
        rewards = []
        state = environment.reset()
        # loop over steps
        for t in range(max_t):
            # select an action
            if agent.evaluation_only:  # disable noise on evaluation
                action = agent.act(state, add_noise=False)
            else:
                action = agent.act(state)
            # take action in environment
            next_state, reward, done = environment.step(action)
            # update agent with returned information
            agent.step(state, action, reward, next_state, done)
            state = next_state
            rewards.append(reward)
            if any(done):
                break

        # every episode
        buffer_len = len(agent.memory)
        per_agent_rewards = []  # calculate per agent rewards
        for i in range(agent.n_agents):
            per_agent_reward = 0
            for step in rewards:
                per_agent_reward += step[i]
            per_agent_rewards.append(per_agent_reward)
        stats.update(t, [np.max(per_agent_rewards)],
                     i_episode)  # use max over all agents as episode reward
        stats.print_episode(
            i_episode, t, stats_format, buffer_len, agent.noise_weight,
            agent.agents[0].critic_loss, agent.agents[1].critic_loss,
            agent.agents[0].actor_loss, agent.agents[1].actor_loss,
            agent.agents[0].noise_val, agent.agents[1].noise_val,
            per_agent_rewards[0], per_agent_rewards[1])

        # every epoch (100 episodes)
        if i_episode % 100 == 0:
            stats.print_epoch(i_episode, stats_format, buffer_len,
                              agent.noise_weight)
            save_name = 'checkpoints/episode.{}.'.format(i_episode)
            for i, save_agent in enumerate(agent.agents):
                torch.save(save_agent.actor_local.state_dict(),
                           save_name + str(i) + '.actor.pth')
                torch.save(save_agent.critic_local.state_dict(),
                           save_name + str(i) + '.critic.pth')

        # if solved
        if stats.is_solved(i_episode, solve_score):
            stats.print_solve(i_episode, stats_format, buffer_len,
                              agent.noise_weight)
            save_name = 'checkpoints/solved.'
            for i, save_agent in enumerate(agent.agents):
                torch.save(save_agent.actor_local.state_dict(),
                           save_name + str(i) + '.actor.pth')
                torch.save(save_agent.critic_local.state_dict(),
                           save_name + str(i) + '.critic.pth')
            break
def train(environment,
          agent,
          n_episodes=1000,
          max_t=1000,
          solve_score=30.0,
          graph_when_done=True):
    """ Run training loop for DQN.

    Params
    ======
        environment: environment object
        agent: agent object
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        solve_score (float): criteria for considering the environment solved
        graph_when_done (bool): whether to show matplotlib graphs of the training run
    """

    stats = statistics.Stats()

    for i_episode in range(1, n_episodes + 1):
        rewards = []
        state = environment.reset()
        # loop over steps
        for t in range(max_t):
            # select an action
            action = agent.act(state)
            # take action in environment
            next_state, reward, done = environment.step(action)
            # update agent with returned information
            agent.step(state, action, reward, next_state, done)
            state = next_state
            rewards.append(reward)
            if any(done):
                break

        # every episode
        buffer_len = len(agent.memory)
        per_agent_rewards = []  # calculate per agent rewards
        for i in range(agent.n_agents):
            per_agent_reward = 0
            for step in rewards:
                per_agent_reward += step[i]
            per_agent_rewards.append(per_agent_reward)
        stats.update(t, [np.mean(per_agent_rewards)], i_episode)
        stats.print_episode(i_episode, agent.alpha, buffer_len, t)

        # every epoch (100 episodes)
        if i_episode % 100 == 0:
            stats.print_epoch(i_episode, agent.alpha, buffer_len)
            save_name = 'checkpoints/episode.{}'.format(i_episode)
            torch.save(agent.actor_local.state_dict(),
                       save_name + '.actor.pth')
            torch.save(agent.critic_local.state_dict(),
                       save_name + '.critic.pth')

        # if solved
        if stats.is_solved(i_episode, solve_score):
            stats.print_solve(i_episode, agent.alpha, buffer_len)
            torch.save(agent.actor_local.state_dict(),
                       'checkpoints/solved.actor.pth')
            torch.save(agent.critic_local.state_dict(),
                       'checkpoints/solved.critic.pth')
            break

    # training finished
    if graph_when_done:
        stats.plot(agent.loss_list)
Exemple #6
0
def train(PATH,
          environment,
          agent,
          timestamp,
          n_episodes=10000,
          max_t=1000,
          score_threshold=0.5):
    """Train with MADDPG."""
    start = time.time()
    total_scores = deque(maxlen=100)
    stats = statistics.Stats()
    stats_format = "Buffer: {:6} NoiseW: {:.4}"

    for i_episode in range(1, n_episodes + 1):
        scores = []
        states = environment.reset()
        for t in range(max_t):
            if agent.evaluation_only:
                action = agent.act(states, add_noise=False)
            else:
                action = agent.act(states)
            actions = agent.act(states)
            next_states, rewards, dones = environment.step(actions)
            agent.step(states, actions, rewards, next_states, dones)
            states = next_states
            scores.append(rewards)
            if np.any(dones):
                break
        buffer_len = len(agent.memory)
        per_agent_rewards = []
        for i in range(agent.num_agents):
            per_agent_reward = 0
            for step in scores:
                per_agent_reward += step[i]
            per_agent_rewards.append(per_agent_reward)
        stats.update(t, [np.max(per_agent_rewards)],
                     i_episode)  # use max over all agents as episode reward
        stats.print_episode(
            i_episode, t, stats_format, buffer_len, agent.noise_weight,
            agent.agents[0].critic_loss, agent.agents[1].critic_loss,
            agent.agents[0].actor_loss, agent.agents[1].actor_loss,
            agent.agents[0].noise_val, agent.agents[1].noise_val,
            per_agent_rewards[0], per_agent_rewards[1])

        if i_episode % 500 == 0:
            stats.print_epoch(i_episode, stats_format, buffer_len,
                              agent.noise_weight)
            save_name = f"../results/{timestamp}_episode_{i_episode}"
            for i, save_agent in enumerate(agent.agents):
                torch.save(save_agent.actor.state_dict(),
                           save_name + f"A{i}_actor.pth")
                torch.save(save_agent.critic.state_dict(),
                           save_name + f"A{i}_critic.pth")

        # if total_average_score>score_threshold:
        if stats.is_solved(i_episode, score_threshold):
            stats.print_solve(i_episode, stats_format, buffer_len,
                              agent.noise_weight)
            save_name = f"../results/{timestamp}_solved_episode_{i_episode}"
            for i, save_agent in enumerate(agent.agents):
                torch.save(save_agent.actor.state_dict(),
                           save_name + f"A{i}_actor.pth")
                torch.save(save_agent.critic.state_dict(),
                           save_name + f"A{i}_critic.pth")
            break
Exemple #7
0
def train(environment,
          agent,
          n_episodes=2500,
          max_t=1000,
          solve_score=0.5,
          best_margin=0.35,
          notebook=False):
    """ Run training loop.
    Params
    ======
        environment: environment object
        agent: agent object
        n_episodes (int): maximum number of training episodes
        max_t (int): maximum number of timesteps per episode
        solve_score (float): criteria for considering the environment solved
        best_margin (float): criteria for stopping after best result (above solve_score)
        notebook (boolean): flag for returning scores for plotting in notebook
    """

    stat = statistics.Stats()
    stats_format = 'Buffer: {:6}   NoiseW: {:.4}'
    solved_flag = False

    for i_episode in range(1, n_episodes + 1):
        rewards = []
        state = environment.reset()
        # loop over steps
        for t in range(max_t):
            # select an action
            action = agent.act(
                state)  # if evalution noise will not be used by agent
            # take action in environment
            next_state, reward, done = environment.step(action)
            # update agent with returned information
            agent.step(state, action, reward, next_state, done)
            state = next_state
            rewards.append(reward)
            if any(done):
                break

        # every episode
        buffer_len = len(agent.memory)
        per_agent_rewards = []  # calculate per agent rewards
        for i in range(agent.n_agents):
            per_agent_reward = 0
            for step in rewards:
                per_agent_reward += step[i]
            per_agent_rewards.append(per_agent_reward)
        stat.update(t, [np.max(per_agent_rewards)],
                    i_episode)  # use max over all agents as episode reward
        stat.print_episode(
            i_episode, t, stats_format, buffer_len, agent.noise_weight,
            agent.agents[0].critic_loss, agent.agents[1].critic_loss,
            agent.agents[0].actor_loss, agent.agents[1].actor_loss,
            agent.agents[0].noise_val, agent.agents[1].noise_val,
            per_agent_rewards[0], per_agent_rewards[1])

        # every epoch (100 episodes)
        if i_episode % 100 == 0:
            stat.print_epoch(i_episode, stats_format, buffer_len,
                             agent.noise_weight)
            save_name = 'checkpoints/episode.{}.'.format(i_episode)
            for i, save_agent in enumerate(agent.agents):
                torch.save(save_agent.actor_local.state_dict(),
                           save_name + str(i) + '.actor.pth')
                torch.save(save_agent.critic_local.state_dict(),
                           save_name + str(i) + '.critic.pth')

        # if solved
        if stat.is_solved(i_episode, solve_score) and not solved_flag:
            solved_flag = True
            best = False
            stat.print_solve(i_episode, stats_format, best, buffer_len,
                             agent.noise_weight)
            save_name = 'checkpoints/solved.'
            for i, save_agent in enumerate(agent.agents):
                torch.save(save_agent.actor_local.state_dict(),
                           save_name + str(i) + '.actor.pth')
                torch.save(save_agent.critic_local.state_dict(),
                           save_name + str(i) + '.critic.pth')

        elif stat.is_solved(i_episode, solve_score + best_margin):
            best = True
            stat.print_solve(i_episode, stats_format, best, buffer_len,
                             agent.noise_weight)
            save_name = 'checkpoints/solved_best.'
            for i, save_agent in enumerate(agent.agents):
                torch.save(save_agent.actor_local.state_dict(),
                           save_name + str(i) + '.actor.pth')
                torch.save(save_agent.critic_local.state_dict(),
                           save_name + str(i) + '.critic.pth')
            if notebook:
                return stat
            else:
                break