def test_checkpoint_creation(self): self.loss = 1 def train_one_epoch(): self.loss -= 0.1 return self.loss def measure_performance(): return (5.2, {'a': 1}), '5|3|1|4' network = SimpleFC(name='simple_fc', directory=self.test_dir) cch = coach.Coach(network=network) cch.train(target=coach.targets.epoch_reached(3), train_one_epoch=train_one_epoch, measure_performance=measure_performance, settings_notes={'learning_rate': 5}, checkpoint_frequency=10, checkpoint=-1) created_checkpoint = network.latest_checkpoint() self.assertEqual(created_checkpoint.id, 0) self.assertEqual( created_checkpoint.notes, { 'train_set_performance': (5.2, { 'a': 1 }), 'dev_set_performance': '5|3|1|4', 'learning_rate': 5 })
def __init__(self, nom, vision, grSim, com, disp=0): self.nom = nom Y0 = rbt.Robot('Y', 0, self, grSim, com) Y1 = rbt.Robot('Y', 1, self, grSim, com) B0 = rbt.Robot('B', 0, self, grSim, com) B1 = rbt.Robot('B', 1, self, grSim, com) self.n = 9 self.m = 9 self.disp = disp self.vision = vision self.joueurs = [Y0, Y1, B0, B1] Yellow = coach.Coach([self.joueurs[0], self.joueurs[1]], 'Y', 'R') Blue = coach.Coach([self.joueurs[2], self.joueurs[3]], 'B', 'L') self.balle = Balle(0, 0) self.blue = Blue self.yellow = Yellow self.stop = False self.go = False self.score_jaune = 0 self.score_bleu = 0
def __init__(self, nom, vision, grSim, com, blueSide='L', start='B', disp=0): self.nom = nom Y0 = rbt.Robot('Y', 0, self, grSim, com) Y1 = rbt.Robot('Y', 1, self, grSim, com) B0 = rbt.Robot('B', 0, self, grSim, com) B1 = rbt.Robot('B', 1, self, grSim, com) self.n = 9 self.m = 9 self.disp = disp self.vision = vision self.joueurs = [Y0, Y1, B0, B1] self.blueSide = blueSide if blueSide == 'L': yellowSide = 'R' else: yellowSide = 'L' Yellow = coach.Coach([self.joueurs[0], self.joueurs[1]], 'Y', yellowSide) Blue = coach.Coach([self.joueurs[2], self.joueurs[3]], 'B', blueSide) self.balle = Balle(0, 0) self.blue = Blue self.yellow = Yellow self.stop = False self.go = False self.score_jaune = 0 self.score_bleu = 0 self.team_engagement = start self.engagement = True
def main(): parser = argparse.ArgumentParser( description= 'Train a ddpg agent to play the Unity Environment Tennis app') parser.add_argument("--episodes", type=int, help="Number of training episodes to run", default=5000) parser.add_argument("--max_steps", type=int, help="Maximum steps per episode", default=1000) parser.add_argument( "--saveto", help= "Save agent after training. agent- and critic- are prepended to the specified name.", default='checkpoint.pth') parser.add_argument("--loadfrom", help="Load previously saved model before training") parser.add_argument( "--min_score", type=float, help="Only save the model if the it achieves this score", default=0.5) parser.add_argument("--saveplot", help="Location to save plot of scores") parser.add_argument( "--environment", help="Path to Unity environment for game (i.e. ./Tennis.App)", default="./Tennis.app") parser.add_argument( "--eval", type=bool, help= "Turns on eval mode, which affects the unity environment and removes the random noise from the predicted agent actions", default=False) args = parser.parse_args() env = UnityEnvironment(file_name=args.environment) brain_name = env.brain_names[0] brain = env.brains[brain_name] # reset the environment env_info = env.reset(train_mode=True)[brain_name] # number of actions action_size = brain.vector_action_space_size print("Action size: " + str(action_size)) # examine the state space state = env_info.vector_observations[0] state_size = len(state) num_agents = len(env_info.agents) print('Number of agents:', num_agents) # Create agent and start training _agent = maddpg_agent.MADDPGAgent(state_size, action_size, num_agents) if args.loadfrom: _agent.load(args.loadfrom) _coach = coach.Coach(_agent, env) # Callback function which will save the agent if it exceeds the # minimum score max_score = 0 def save_fn(agent, episode, avg_score): nonlocal max_score if avg_score > max_score and avg_score > args.min_score and args.saveto: agent.save(args.saveto) print("Training succeeded. New max score %s at step %s" % (avg_score, episode)) max_score = avg_score # Train (or eval) the agent scores = _coach.run_episodes(args.episodes, args.max_steps, train=not args.eval, callback=save_fn) # Plot scores plt.plot(scores) plt.plot(moving_average(scores, 100), color='red') plt.ylabel('Episode scores') if args.saveplot: plt.savefig(args.saveplot, bbox_inches='tight')
parser.add_argument("loadfrom", help="Model checkpoint used for eval") args = parser.parse_args() env = UnityEnvironment(file_name="./Banana.app") brain_name = env.brain_names[0] brain = env.brains[brain_name] # reset the environment env_info = env.reset(train_mode=True)[brain_name] # number of actions action_size = brain.vector_action_space_size # examine the state space state = env_info.vector_observations[0] state_size = len(state) _agent = agent.Agent(state_size, action_size, seed=time.time()) if args.loadfrom: _agent.load(args.loadfrom) else: print(args.usage()) exit() _coach = coach.Coach(_agent, env) scores = _coach.run_episodes(episodes=args.episodes, train=False) mean_score = np.mean(scores[-100:]) print("Your model achieved a final mean score of {}".format(mean_score))
def main(): parser = argparse.ArgumentParser( description= 'Train a ddpg agent to play the Unity Environment Reacher app') parser.add_argument("--episodes", type=int, help="Number of training episodes to run", default=200) parser.add_argument("--max_steps", type=int, help="Maximum steps per episode", default=1000) parser.add_argument( "--saveto", help= "Save agent after training. agent- and critic- are prepended to the specified name.", default='checkpoint.pth') parser.add_argument("--loadfrom", help="Load previously saved model before training") parser.add_argument( "--min_score", type=float, help="Only save the model if the it achieves this score", default=30.) parser.add_argument("--saveplot", help="Location to save plot of scores") parser.add_argument( "--environment", help="Path to Unity environment for game (i.e. ./Reacher.App)", default="./Reacher.app") parser.add_argument( "--eval", type=bool, help= "Turns on eval mode, which affects the unity environment and removes the random noise from the predicted agent actions", default=False) args = parser.parse_args() env = UnityEnvironment(file_name=args.environment) brain_name = env.brain_names[0] brain = env.brains[brain_name] # reset the environment env_info = env.reset(train_mode=True)[brain_name] # number of actions action_size = brain.vector_action_space_size # examine the state space state = env_info.vector_observations[0] state_size = len(state) num_agents = len(env_info.agents) print('Number of agents:', num_agents) # Create agent and start training _agent = ddpg_agent.DDPGAgent(state_size, action_size, num_agents) if args.loadfrom: _agent.load(args.loadfrom) _coach = coach.Coach(_agent, env) scores = _coach.run_episodes(args.episodes, args.max_steps, train=not args.eval) mean_score = np.mean(scores[-100:]) # Save the network if successful if mean_score > args.min_score and args.saveto: _agent.save(args.saveto) print("Training succeeded!") # Plot scores plt.plot(scores) plt.plot(moving_average(scores, 100), color='red') plt.ylabel('Episode scores') if args.saveplot: plt.savefig(args.saveplot, bbox_inches='tight') print("Your agent received a final mean score of {}".format(mean_score))
import torch import math import coach import coach.targets from sequences_with_gaps_10_100_datasets import train_set, dev_set, test_set, set_size, amount_of_points from network_interface import NetworkInterface from networks.gabbi import Gabbi from networks.gabbi2x import Gabbi2x network = Gabbi2x() shuffle = True batch_size = 4096#8192 learning_rate = 0.1 cch = coach.Coach(network=network) def train_one_epoch(): network_interface = NetworkInterface( network=network, loss_class=torch.nn.MSELoss, batch_size=batch_size ) epoch_loss = network_interface.train_one_epoch( train_set, shuffle=shuffle, learning_rate=learning_rate ) return epoch_loss def measure_performance():
# Robots own variables robot = gsc.getRobotNumber() memProxy.insertListData([['dntBallDist', '', 0], ['dntPhase', 0, 0], ['dntNaoNum', robot, 0]]) teamColor = None kickOff = None penalty = None (teamColor, kickOff, penalty) = gsc.getMatchInfo() # If keeper -> different style of play , coaches other naos playerType = (robot == 1) if playerType == 1: try: # specify all playing naos here! TODO find them automatically coachThread = coach.Coach('coach', ['192.168.1.14', \ '192.168.1.13']) coachThread.start() print 'Coaching started' except: print 'Could not coach, wrong ip?' ## STATES (Gamestates) # Initial() # Ready() # Set() # Playing() # Penalized() # Finished() # Initial state: do nothing, stand ready for match
def main(): parser = argparse.ArgumentParser( description='Train a dqn agent to play the Unity Environment Banana app' ) parser.add_argument("--episodes", type=int, help="Number of training episodes to run", default=2000) parser.add_argument("--saveto", help="Save agent to this file after training", default='checkpoint.pth') parser.add_argument("--loadfrom", help="Load previously saved model before training") parser.add_argument( "--min_score", type=float, help="Only save the model if the it achieves this score", default=13.) parser.add_argument("--epsilon", type=float, help="Starting epsilon", default=1.) parser.add_argument("--saveplot", help="Location to save plot of scores") parser.add_argument( "--environment", help="Path to Unity environment for game (i.e. Banana.App)", default="./Banana.app") args = parser.parse_args() env = UnityEnvironment(file_name=args.environment) brain_name = env.brain_names[0] brain = env.brains[brain_name] # reset the environment env_info = env.reset(train_mode=True)[brain_name] # number of actions action_size = brain.vector_action_space_size # examine the state space state = env_info.vector_observations[0] state_size = len(state) _agent = agent.Agent(state_size, action_size, seed=time.time(), epsilon_start=args.epsilon) if args.loadfrom: _agent.load(args.loadfrom) print("Loaded checkpoint: %s" % (args.loadfrom, )) _coach = coach.Coach(_agent, env) scores = _coach.run_episodes(episodes=args.episodes, train=True) mean_score = np.mean(scores[-100:]) if mean_score > 13.: if args.saveto: _agent.save(args.saveto) print("The training succeeded!") plt.plot(scores) plt.plot(moving_average(scores, 100), color='red') plt.ylabel('Episode scores') if args.saveplot: plt.savefig(args.saveplot, bbox_inches='tight') print("Your model achieved a final mean score of {}".format(mean_score))