Ejemplo n.º 1
0
    def test_checkpoint_creation(self):
        self.loss = 1

        def train_one_epoch():
            self.loss -= 0.1
            return self.loss

        def measure_performance():
            return (5.2, {'a': 1}), '5|3|1|4'

        network = SimpleFC(name='simple_fc', directory=self.test_dir)
        cch = coach.Coach(network=network)

        cch.train(target=coach.targets.epoch_reached(3),
                  train_one_epoch=train_one_epoch,
                  measure_performance=measure_performance,
                  settings_notes={'learning_rate': 5},
                  checkpoint_frequency=10,
                  checkpoint=-1)

        created_checkpoint = network.latest_checkpoint()
        self.assertEqual(created_checkpoint.id, 0)
        self.assertEqual(
            created_checkpoint.notes, {
                'train_set_performance': (5.2, {
                    'a': 1
                }),
                'dev_set_performance': '5|3|1|4',
                'learning_rate': 5
            })
Ejemplo n.º 2
0
    def __init__(self, nom, vision, grSim, com, disp=0):
        self.nom = nom
        Y0 = rbt.Robot('Y', 0, self, grSim, com)
        Y1 = rbt.Robot('Y', 1, self, grSim, com)
        B0 = rbt.Robot('B', 0, self, grSim, com)
        B1 = rbt.Robot('B', 1, self, grSim, com)
        self.n = 9
        self.m = 9
        self.disp = disp
        self.vision = vision
        self.joueurs = [Y0, Y1, B0, B1]
        Yellow = coach.Coach([self.joueurs[0], self.joueurs[1]], 'Y', 'R')
        Blue = coach.Coach([self.joueurs[2], self.joueurs[3]], 'B', 'L')
        self.balle = Balle(0, 0)
        self.blue = Blue
        self.yellow = Yellow

        self.stop = False
        self.go = False
        self.score_jaune = 0
        self.score_bleu = 0
Ejemplo n.º 3
0
    def __init__(self,
                 nom,
                 vision,
                 grSim,
                 com,
                 blueSide='L',
                 start='B',
                 disp=0):
        self.nom = nom
        Y0 = rbt.Robot('Y', 0, self, grSim, com)
        Y1 = rbt.Robot('Y', 1, self, grSim, com)
        B0 = rbt.Robot('B', 0, self, grSim, com)
        B1 = rbt.Robot('B', 1, self, grSim, com)
        self.n = 9
        self.m = 9
        self.disp = disp
        self.vision = vision
        self.joueurs = [Y0, Y1, B0, B1]
        self.blueSide = blueSide
        if blueSide == 'L':
            yellowSide = 'R'
        else:
            yellowSide = 'L'
        Yellow = coach.Coach([self.joueurs[0], self.joueurs[1]], 'Y',
                             yellowSide)
        Blue = coach.Coach([self.joueurs[2], self.joueurs[3]], 'B', blueSide)
        self.balle = Balle(0, 0)
        self.blue = Blue
        self.yellow = Yellow

        self.stop = False
        self.go = False
        self.score_jaune = 0
        self.score_bleu = 0
        self.team_engagement = start
        self.engagement = True
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Train a ddpg agent to play the Unity Environment Tennis app')
    parser.add_argument("--episodes",
                        type=int,
                        help="Number of training episodes to run",
                        default=5000)
    parser.add_argument("--max_steps",
                        type=int,
                        help="Maximum steps per episode",
                        default=1000)
    parser.add_argument(
        "--saveto",
        help=
        "Save agent after training.  agent- and critic- are prepended to the specified name.",
        default='checkpoint.pth')
    parser.add_argument("--loadfrom",
                        help="Load previously saved model before training")
    parser.add_argument(
        "--min_score",
        type=float,
        help="Only save the model if the it achieves this score",
        default=0.5)
    parser.add_argument("--saveplot", help="Location to save plot of scores")
    parser.add_argument(
        "--environment",
        help="Path to Unity environment for game (i.e. ./Tennis.App)",
        default="./Tennis.app")
    parser.add_argument(
        "--eval",
        type=bool,
        help=
        "Turns on eval mode, which affects the unity environment and removes the random noise from the predicted agent actions",
        default=False)
    args = parser.parse_args()

    env = UnityEnvironment(file_name=args.environment)
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]

    # number of actions
    action_size = brain.vector_action_space_size
    print("Action size: " + str(action_size))

    # examine the state space
    state = env_info.vector_observations[0]
    state_size = len(state)

    num_agents = len(env_info.agents)
    print('Number of agents:', num_agents)

    # Create agent and start training
    _agent = maddpg_agent.MADDPGAgent(state_size, action_size, num_agents)
    if args.loadfrom:
        _agent.load(args.loadfrom)
    _coach = coach.Coach(_agent, env)

    # Callback function which will save the agent if it exceeds the
    # minimum score
    max_score = 0

    def save_fn(agent, episode, avg_score):
        nonlocal max_score
        if avg_score > max_score and avg_score > args.min_score and args.saveto:
            agent.save(args.saveto)
            print("Training succeeded.  New max score %s at step %s" %
                  (avg_score, episode))
            max_score = avg_score

    # Train (or eval) the agent
    scores = _coach.run_episodes(args.episodes,
                                 args.max_steps,
                                 train=not args.eval,
                                 callback=save_fn)

    # Plot scores
    plt.plot(scores)
    plt.plot(moving_average(scores, 100), color='red')
    plt.ylabel('Episode scores')
    if args.saveplot:
        plt.savefig(args.saveplot, bbox_inches='tight')
Ejemplo n.º 5
0
parser.add_argument("loadfrom", help="Model checkpoint used for eval")

args = parser.parse_args()

env = UnityEnvironment(file_name="./Banana.app")
brain_name = env.brain_names[0]
brain = env.brains[brain_name]

# reset the environment
env_info = env.reset(train_mode=True)[brain_name]

# number of actions
action_size = brain.vector_action_space_size

# examine the state space
state = env_info.vector_observations[0]
state_size = len(state)


_agent = agent.Agent(state_size, action_size, seed=time.time())
if args.loadfrom:
    _agent.load(args.loadfrom)
else:
    print(args.usage())
    exit()
_coach = coach.Coach(_agent, env)

scores = _coach.run_episodes(episodes=args.episodes, train=False)
mean_score = np.mean(scores[-100:])
print("Your model achieved a final mean score of {}".format(mean_score))
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser(
        description=
        'Train a ddpg agent to play the Unity Environment Reacher app')
    parser.add_argument("--episodes",
                        type=int,
                        help="Number of training episodes to run",
                        default=200)
    parser.add_argument("--max_steps",
                        type=int,
                        help="Maximum steps per episode",
                        default=1000)
    parser.add_argument(
        "--saveto",
        help=
        "Save agent after training.  agent- and critic- are prepended to the specified name.",
        default='checkpoint.pth')
    parser.add_argument("--loadfrom",
                        help="Load previously saved model before training")
    parser.add_argument(
        "--min_score",
        type=float,
        help="Only save the model if the it achieves this score",
        default=30.)
    parser.add_argument("--saveplot", help="Location to save plot of scores")
    parser.add_argument(
        "--environment",
        help="Path to Unity environment for game (i.e. ./Reacher.App)",
        default="./Reacher.app")
    parser.add_argument(
        "--eval",
        type=bool,
        help=
        "Turns on eval mode, which affects the unity environment and removes the random noise from the predicted agent actions",
        default=False)
    args = parser.parse_args()

    env = UnityEnvironment(file_name=args.environment)
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]

    # number of actions
    action_size = brain.vector_action_space_size

    # examine the state space
    state = env_info.vector_observations[0]
    state_size = len(state)

    num_agents = len(env_info.agents)
    print('Number of agents:', num_agents)

    # Create agent and start training
    _agent = ddpg_agent.DDPGAgent(state_size, action_size, num_agents)
    if args.loadfrom:
        _agent.load(args.loadfrom)
    _coach = coach.Coach(_agent, env)
    scores = _coach.run_episodes(args.episodes,
                                 args.max_steps,
                                 train=not args.eval)
    mean_score = np.mean(scores[-100:])

    # Save the network if successful
    if mean_score > args.min_score and args.saveto:
        _agent.save(args.saveto)
        print("Training succeeded!")

    # Plot scores
    plt.plot(scores)
    plt.plot(moving_average(scores, 100), color='red')
    plt.ylabel('Episode scores')
    if args.saveplot:
        plt.savefig(args.saveplot, bbox_inches='tight')

    print("Your agent received a final mean score of {}".format(mean_score))
Ejemplo n.º 7
0
import torch
import math
import coach
import coach.targets
from sequences_with_gaps_10_100_datasets import train_set, dev_set, test_set, set_size, amount_of_points
from network_interface import NetworkInterface
from networks.gabbi import Gabbi
from networks.gabbi2x import Gabbi2x


network = Gabbi2x()
shuffle = True
batch_size = 4096#8192
learning_rate = 0.1
cch = coach.Coach(network=network)

def train_one_epoch():
    network_interface = NetworkInterface(
        network=network,
        loss_class=torch.nn.MSELoss,
        batch_size=batch_size
    )
    epoch_loss = network_interface.train_one_epoch(
        train_set,
        shuffle=shuffle,
        learning_rate=learning_rate
    )

    return epoch_loss

def measure_performance():
Ejemplo n.º 8
0
Archivo: soul.py Proyecto: Haya1/Master
# Robots own variables
robot = gsc.getRobotNumber()
memProxy.insertListData([['dntBallDist', '', 0], ['dntPhase', 0, 0],
                         ['dntNaoNum', robot, 0]])
teamColor = None
kickOff = None
penalty = None
(teamColor, kickOff, penalty) = gsc.getMatchInfo()

# If keeper -> different style of play , coaches other naos

playerType = (robot == 1)
if playerType == 1:
    try:
        # specify all playing naos here! TODO find them automatically
        coachThread = coach.Coach('coach', ['192.168.1.14', \
                                            '192.168.1.13'])
        coachThread.start()
        print 'Coaching started'
    except:
        print 'Could not coach, wrong ip?'

## STATES (Gamestates)
# Initial()
# Ready()
# Set()
# Playing()
# Penalized()
# Finished()


# Initial state: do nothing, stand ready for match
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser(
        description='Train a dqn agent to play the Unity Environment Banana app'
    )
    parser.add_argument("--episodes",
                        type=int,
                        help="Number of training episodes to run",
                        default=2000)
    parser.add_argument("--saveto",
                        help="Save agent to this file after training",
                        default='checkpoint.pth')
    parser.add_argument("--loadfrom",
                        help="Load previously saved model before training")
    parser.add_argument(
        "--min_score",
        type=float,
        help="Only save the model if the it achieves this score",
        default=13.)
    parser.add_argument("--epsilon",
                        type=float,
                        help="Starting epsilon",
                        default=1.)
    parser.add_argument("--saveplot", help="Location to save plot of scores")
    parser.add_argument(
        "--environment",
        help="Path to Unity environment for game (i.e. Banana.App)",
        default="./Banana.app")

    args = parser.parse_args()

    env = UnityEnvironment(file_name=args.environment)
    brain_name = env.brain_names[0]
    brain = env.brains[brain_name]

    # reset the environment
    env_info = env.reset(train_mode=True)[brain_name]

    # number of actions
    action_size = brain.vector_action_space_size

    # examine the state space
    state = env_info.vector_observations[0]
    state_size = len(state)

    _agent = agent.Agent(state_size,
                         action_size,
                         seed=time.time(),
                         epsilon_start=args.epsilon)
    if args.loadfrom:
        _agent.load(args.loadfrom)
        print("Loaded checkpoint: %s" % (args.loadfrom, ))
    _coach = coach.Coach(_agent, env)

    scores = _coach.run_episodes(episodes=args.episodes, train=True)
    mean_score = np.mean(scores[-100:])
    if mean_score > 13.:
        if args.saveto:
            _agent.save(args.saveto)
        print("The training succeeded!")
    plt.plot(scores)
    plt.plot(moving_average(scores, 100), color='red')
    plt.ylabel('Episode scores')
    if args.saveplot:
        plt.savefig(args.saveplot, bbox_inches='tight')
    print("Your model achieved a final mean score of {}".format(mean_score))