Example #1
0
    def __init__(self):
        """ Do any initial setup here """

        #		self.model = PPO1(CnnPolicy, Env(), timesteps_per_actorbatch=128, clip_param=0.2, entcoeff=0.01,optim_epochs=4, optim_stepsize=1e-3, optim_batchsize=64, gamma=0.99, lam=0.95, schedule='linear',verbose=1)
        path = pathlib.Path(__file__).resolve().parent
        print("Loading ...", str(path) + '/ppo_save')
        self.model = PPO1.load(str(path) + '/ppo_save')
Example #2
0
def load_model(env, name = None):
    if name:
        filename = os.path.join(config.MODELDIR, env.name, name)
        if os.path.exists(filename):
            logger.info(f'Loading {name}')
            cont = True
            while cont:
                try:
                    ppo_model = PPO1.load(filename, env=env)
                    cont = False
                except Exception as e:
                    time.sleep(5)
                    print(e)
        else:
            raise Exception(f'\n{filename} not found')
    else:
        logger.info(f'Loading base PPO model')
        cont = True
        while cont:
            try:
                ppo_model = PPO1(get_network_arch(env.name), env=env)
                cont = False
            except Exception as e:
                time.sleep(5)
                print(e)

    return ppo_model
 def __init__(self):
   super(TankSelfPlayEnv, self).__init__()
   self.policy = self
   self.best_model = None
   self.best_model_filename = None
   self.oppo_model = None
   if USE_STRONG_OPP:
     print("Using strong opp as a start point")
     self.oppo_model = PPO1.load("ppo_small_stepsize/best_model.zip")
Example #4
0
def load_model(env, name):

    filename = os.path.join(config.MODELDIR, env.name, name)
    if os.path.exists(filename):
        logger.info(f'Loading {name}')
        cont = True
        while cont:
            try:
                ppo_model = PPO1.load(filename, env=env)
                cont = False
            except Exception as e:
                time.sleep(5)
                print(e)

    elif name == 'base.zip':
        cont = True
        while cont:
            try:

                rank = MPI.COMM_WORLD.Get_rank()
                if rank == 0:
                    ppo_model = PPO1(get_network_arch(env.name), env=env)
                    logger.info(f'Saving base.zip PPO model...')
                    ppo_model.save(
                        os.path.join(config.MODELDIR, env.name, 'base.zip'))
                else:

                    ppo_model = PPO1.load(os.path.join(config.MODELDIR,
                                                       env.name, 'base.zip'),
                                          env=env)

                cont = False
            except IOError as e:
                sys.exit(f'Permissions not granted on zoo/{env.name}/...')
            except Exception as e:

                print('Waiting for base.zip to be created...', e)
                time.sleep(2)

    else:
        raise Exception(f'\n{filename} not found')

    return ppo_model
 def reset(self):
   # load model if it's there
   modellist = [f for f in os.listdir(LOGDIR) if f.startswith("history")]
   modellist.sort()
   if len(modellist) > 0:
     filename = os.path.join(LOGDIR, modellist[-1]) # the latest best model
     if filename != self.best_model_filename:
       print("loading model: ", filename)
       self.best_model_filename = filename
       if self.best_model is not None:
         del self.best_model
       self.best_model = PPO1.load(filename, env=self)
   return super(SlimeVolleySelfPlayEnv, self).reset()
    def reset(self):
        # Load model if it's there, else wait (meant to be used in parallel with opponent training)
        # reset() is run multiple times throughout the experiment, not just during callbacks.

        while True:
            opp_modellist = [
                f for f in os.listdir(OPP_LOGDIR) if f.startswith("history")
            ]
            opp_modellist.sort()

            self_modellist = [
                f for f in os.listdir(SELF_LOGDIR) if f.startswith("history")
            ]
            self_modellist.sort()

            # Experiment just started, so no history files
            if len(self_modellist) == 0:
                return super(SlimeVolleyMultiAgentEnv, self).reset()

            # Middle of experiment
            if len(self_modellist) > 0:
                # If num of history files is the same, check opponent's last gen.
                if len(self_modellist) - len(opp_modellist) == 0:
                    opp_filename = opp_modellist[-1]
                    # Opponent's last gen has no change -> Both models still training the same gen
                    if opp_filename == self.opp_model_filename:
                        return super(SlimeVolleyMultiAgentEnv, self).reset()
                    # Opponent's last gen changed -> Opponent model has been waiting -> Load new opp.
                    elif opp_filename != self.opp_model_filename:
                        print("Loading model:", opp_filename)
                        self.opp_model_filename = opp_filename
                        if self.opp_model is not None:
                            del self.opp_model
                        self.opp_model = PPO1.load(os.path.join(
                            OPP_LOGDIR, opp_filename),
                                                   env=self)
                        return super(SlimeVolleyMultiAgentEnv, self).reset()
                # Opponent's finished current gen training, self should continue training.
                elif len(opp_modellist) - len(self_modellist) == 1:
                    print(
                        f"Self: Gen {len(self_modellist)}, Opp: Gen {len(opp_modellist)}. Opponent waiting for self training to complete."
                    )
                    return super(SlimeVolleyMultiAgentEnv, self).reset()
            print(
                f"Self: Gen {len(self_modellist)}, Opp: Gen {len(opp_modellist)}. Waiting for opponent training to complete."
            )
            time.sleep(5)
Example #7
0
def main():
    """
    Runs the test
    """
    args = mujoco_arg_parser().parse_args()
    logger.configure()
    train(args.env, num_timesteps=args.num_timesteps, seed=args.seed)

    env = make_mujoco_env(args.env, args.seed)
    model = PPO1(MlpPolicy,
                 env,
                 timesteps_per_actorbatch=2048,
                 clip_param=0.2,
                 entcoeff=0.0,
                 optim_epochs=10,
                 optim_stepsize=3e-4,
                 optim_batchsize=64,
                 gamma=0.99,
                 lam=0.95,
                 schedule='linear')
    model.learn(total_timesteps=args.num_timesteps)

    model.save("ppo1")
    # env.close()

    del model  # remove to demonstrate saving and loading
    # env = make_mujoco_env(args.env, args.seed)

    model = PPO1.load("ppo1")
    logger.log("~!!!!!!!!")
    episode_rew = 0
    obs = env.reset()

    while True:
        action, _states = model.predict(obs)
        ob, reward, done, info = env.step(action)
        episode_rew += reward
        env.render()
        if done:
            print(f'episode_rew={episode_rew}')
            episode_rew = 0
            obs = env.reset()
Example #8
0
def train():
  # train selfplay agent
  logger.configure(folder=LOGDIR)

  env = SlimeVolleySelfPlayEnv()
  env.seed(SEED)

  model = PPO1.load(BEST_MODEL_PATH, env=env)

  eval_callback = SelfPlayCallback(env,
    best_model_save_path=LOGDIR,
    log_path=LOGDIR,
    eval_freq=EVAL_FREQ,
    n_eval_episodes=EVAL_EPISODES,
    deterministic=False)

  model.learn(total_timesteps=NUM_TIMESTEPS, callback=eval_callback)

  model.save(os.path.join(LOGDIR, "final_model")) # probably never get to this point.

  env.close()
Example #9
0
def main(args):

    rank = MPI.COMM_WORLD.Get_rank()

    model_dir = os.path.join(config.MODELDIR, args.env_name)

    if rank == 0:
        try:
            os.makedirs(model_dir)
        except:
            pass
        if args.reset:
            reset_files(model_dir)
        logger.configure(config.LOGDIR)
    else:
        logger.configure(format_strs=[])

    if args.debug:
        logger.set_level(config.DEBUG)
    else:
        time.sleep(5)
        logger.set_level(config.INFO)

    workerseed = args.seed + 10000 * MPI.COMM_WORLD.Get_rank()
    set_global_seeds(workerseed)

    logger.info('\nSetting up the selfplay training environment opponents...')
    base_env = get_environment(args.env_name)
    env = selfplay_wrapper(base_env)(opponent_type=args.opponent_type,
                                     verbose=args.verbose)
    env.seed(workerseed)

    CustomPolicy = get_network_arch(args.env_name)

    params = {
        'gamma': args.gamma,
        'timesteps_per_actorbatch': args.timesteps_per_actorbatch,
        'clip_param': args.clip_param,
        'entcoeff': args.entcoeff,
        'optim_epochs': args.optim_epochs,
        'optim_stepsize': args.optim_stepsize,
        'optim_batchsize': args.optim_batchsize,
        'lam': args.lam,
        'adam_epsilon': args.adam_epsilon,
        'schedule': 'linear',
        'verbose': 1,
        'tensorboard_log': config.LOGDIR
    }

    time.sleep(
        5
    )  # allow time for the base model to be saved out when the environment is created

    if args.reset or not os.path.exists(
            os.path.join(model_dir, 'best_model.zip')):
        logger.info('\nLoading the base PPO agent to train...')
        model = PPO1.load(os.path.join(model_dir, 'base.zip'), env, **params)
    else:
        logger.info(
            '\nLoading the best_model.zip PPO agent to continue training...')
        model = PPO1.load(os.path.join(model_dir, 'best_model.zip'), env,
                          **params)

    #Callbacks
    logger.info(
        '\nSetting up the selfplay evaluation environment opponents...')
    callback_args = {
        'eval_env':
        selfplay_wrapper(base_env)(opponent_type=args.opponent_type,
                                   verbose=args.verbose),
        'best_model_save_path':
        config.TMPMODELDIR,
        'log_path':
        config.LOGDIR,
        'eval_freq':
        args.eval_freq,
        'n_eval_episodes':
        args.n_eval_episodes,
        'deterministic':
        False,
        'render':
        True,
        'verbose':
        0
    }

    if args.rules:
        logger.info(
            '\nSetting up the evaluation environment against the rules-based agent...'
        )
        # Evaluate against a 'rules' agent as well
        eval_actual_callback = EvalCallback(
            eval_env=selfplay_wrapper(base_env)(opponent_type='rules',
                                                verbose=args.verbose),
            eval_freq=1,
            n_eval_episodes=args.n_eval_episodes,
            deterministic=args.best,
            render=True,
            verbose=0)
        callback_args['callback_on_new_best'] = eval_actual_callback

    # Evaluate the agent against previous versions
    eval_callback = SelfPlayCallback(args.opponent_type, args.threshold,
                                     args.env_name, **callback_args)

    logger.info('\nSetup complete - commencing learning...\n')

    model.learn(total_timesteps=int(1e9),
                callback=[eval_callback],
                reset_num_timesteps=False,
                tb_log_name="tb")

    env.close()
    del env