Example #1
0
def main():
    args = setup_utils.setup_and_load()
    setup_utils.load_for_setup_if_necessary()

    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    print('size', size)

    # For wandb package to visualize results curves
    config = Config.get_args_dict()
    wandb.init(project="coinrun",
               notes=" baseline train",
               tags=["baseline", Config.RUN_ID.split('-')[0]],
               config=config)

    seed = int(time.time()) % 10000
    set_global_seeds(seed * 100 + rank)

    utils.setup_mpi_gpus()
    utils.mpi_print('Set up gpu')
    utils.mpi_print(args)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True  # pylint: disable=E1101

    # nenvs is how many envs run parallel on a cpu
    # VenEnv class allows parallel rollout
    nenvs = Config.NUM_ENVS
    total_timesteps = int(256 * 10**6)

    env = utils.make_general_env(nenvs, seed=rank)
    utils.mpi_print('Set up env')

    with tf.Session(config=config):
        env = wrappers.add_final_wrappers(env)

        policy = policies_back.get_policy()
        #policy = policies.get_policy()
        utils.mpi_print('Set up policy')

        learn_func(policy=policy,
                   env=env,
                   log_interval=args.log_interval,
                   save_interval=args.save_interval,
                   nsteps=Config.NUM_STEPS,
                   nminibatches=Config.NUM_MINIBATCHES,
                   lam=Config.GAE_LAMBDA,
                   gamma=Config.GAMMA,
                   noptepochs=Config.PPO_EPOCHS,
                   ent_coef=Config.ENTROPY_COEFF,
                   vf_coef=Config.VF_COEFF,
                   max_grad_norm=Config.MAX_GRAD_NORM,
                   lr=lambda f: f * Config.LEARNING_RATE,
                   cliprange=lambda f: f * Config.CLIP_RANGE,
                   total_timesteps=total_timesteps)
Example #2
0
def main():
    args = setup_utils.setup_and_load()

    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()
    print('size', size)

    # For wandb package to visualize results curves
    config = Config.get_args_dict()
    wandb.init(project="coinrun",
               notes="network randomization",
               tags=["baseline"],
               config=config)

    seed = int(time.time()) % 10000
    set_global_seeds(seed * 100 + rank)

    utils.setup_mpi_gpus()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True  # pylint: disable=E1101

    nenvs = Config.NUM_ENVS
    total_timesteps = int(256e6)

    env = utils.make_general_env(nenvs, seed=rank)

    with tf.Session(config=config):
        env = wrappers.add_final_wrappers(env)

        policy = nr_policies.get_policy()

        nr_ppo2.learn(policy=policy,
                      env=env,
                      save_interval=args.save_interval,
                      nsteps=Config.NUM_STEPS,
                      nminibatches=Config.NUM_MINIBATCHES,
                      lam=0.95,
                      gamma=Config.GAMMA,
                      noptepochs=Config.PPO_EPOCHS,
                      log_interval=1,
                      ent_coef=Config.ENTROPY_COEFF,
                      lr=lambda f: f * Config.LEARNING_RATE,
                      cliprange=lambda f: f * 0.2,
                      total_timesteps=total_timesteps)
Example #3
0
    def dump_model(self):
        #utils.save_params_in_scopes(self.sess, [self.scope_dir + "model"], Config.get_save_file())
        data_dict = {}

        save_path = utils.file_to_path(Config.get_save_file())

        data_dict['args'] = Config.get_args_dict()
        data_dict['args']['use_minimum_model'] = True
        param_dict = {}

        if len(self.params) > 0:
            #print('saving scope', scope, filename)
            ps = self.sess.run(self.params)

            param_dict["model"] = ps

        data_dict['params'] = param_dict
        joblib.dump(data_dict, save_path)
Example #4
0
def main():
    # load from restore file
    args_dict = utils.load_args()
    # train args of restore id
    test_args = setup_utils.setup_and_load()
    if 'NR' in Config.RESTORE_ID:
        Config.USE_LSTM = 2
    if 'dropout' in Config.RESTORE_ID:
        Config.DROPOUT = 0
        Config.USE_BATCH_NORM = 0

    wandb.init(project="coinrun",
               notes="test",
               tags=["baseline", "test"],
               config=Config.get_args_dict())

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    seed = np.random.randint(100000)
    Config.SET_SEED = seed

    overlap = {
        'set_seed': Config.SET_SEED,
        'rep': Config.REP,
        'highd': Config.HIGH_DIFFICULTY,
        'num_levels': Config.NUM_LEVELS,
        'use_lstm': Config.USE_LSTM,
        'dropout': Config.DROPOUT,
        'use_batch_norm': Config.USE_BATCH_NORM
    }

    load_file = Config.get_load_filename(restore_id=Config.RESTORE_ID)
    mpi_print('load file name', load_file)
    mpi_print('seed', seed)
    mpi_print("---------------------------------------")
    for checkpoint in range(1, 33):
        with tf.Session() as sess:
            steps_elapsed = checkpoint * 8000000
            mpi_print('steps_elapsed:', steps_elapsed)
            enjoy_env_sess(sess, checkpoint, overlap)
Example #5
0
def save_params_in_scopes(sess, scopes, filename, base_dict=None):
    data_dict = {}

    if base_dict is not None:
        data_dict.update(base_dict)

    save_path = file_to_path(filename)

    data_dict['args'] = Config.get_args_dict()
    param_dict = {}

    for scope in scopes:
        params = tf.trainable_variables(scope)

        if len(params) > 0:
            print('saving scope', scope, filename)
            ps = sess.run(params)

            param_dict[scope] = ps
        
    data_dict['params'] = param_dict
    joblib.dump(data_dict, save_path)
Example #6
0
def enjoy_env_sess(sess, checkpoint, overlap):
    #base_name = str(8*checkpoint)  + 'M'
    #load_file = setup_utils.restore_file(Config.RESTORE_ID,base_name=base_name)
    should_eval = True
    mpi_print('test levels seed', Config.SET_SEED)
    mpi_print('test levels ', Config.NUM_LEVELS)
    rep_count = 50

    env = utils.make_general_env(20)
    env = wrappers.add_final_wrappers(env)
    nenvs = env.num_envs

    sess.run(tf.global_variables_initializer())
    args_now = Config.get_args_dict()
    #args_run = utils.load_args()
    agent = create_act_model(sess, env, nenvs)

    # load name is specified by config.RESTORE_ID adn return True/False
    if checkpoint != 32:
        base_name = str(8 * checkpoint) + 'M'
    elif checkpoint == 0:
        mean_score = 0.0
        succ_rate = 0.0
        wandb.log({
            'Rew_mean': mean_score,
            'Succ_rate': succ_rate,
            'Step_elapsed': steps_elapsed
        })
        return mean_score, succ_rate
    else:
        base_name = None

    sess.run(tf.global_variables_initializer())
    # env init here
    load_file = setup_utils.restore_file(Config.RESTORE_ID,
                                         overlap_config=overlap,
                                         base_name=base_name)

    is_loaded = utils.load_params_for_scope(sess, 'model')
    if not is_loaded:
        mpi_print('NO SAVED PARAMS LOADED')
        return mean_score, succ_rate

    obs = env.reset()
    t_step = 0

    scores = np.zeros((nenvs, rep_count))
    eplens = np.zeros((nenvs, rep_count))
    #scores = np.array([0] * nenvs)
    score_counts = np.array([0] * nenvs)

    # curr_rews = np.zeros((nenvs, 3))

    def should_continue():
        if should_eval:
            return np.sum(score_counts) < rep_count * nenvs

        return True

    state = agent.initial_state
    done = np.zeros(nenvs)

    def rollout(obs, state, done):
        """rollout for rep * nenv times and return scores"""
        t = 0
        count = 0
        rews = np.zeros((nenvs, rep_count))
        while should_continue():
            action, values, state, _ = agent.step(obs, state, done)
            obs, rew, done, info = env.step(action)
            rews[:, count] += rew
            t += 1

            for i, d in enumerate(done):
                if d:
                    eplens[i][count] = t
                    if score_counts[i] < rep_count:
                        score_counts[i] += 1
                        count = score_counts[i] - 1
                        # aux score
                        if 'episode' in info[i]:
                            scores[i][count] = info[i].get('episode')['r']

        return scores, rews, eplens

    if is_loaded:
        mpi_print(load_file)
        scores, rews, eplens = rollout(obs, state, done)

    size = MPI.COMM_WORLD.Get_size()
    rank = MPI.COMM_WORLD.Get_rank()
    if size == 1:
        if rank == 0:
            testset_size = rep_count * nenvs
            utils.save_pickle(scores, Config.LOGDIR + 'scores')
            mean_score = np.sum(scores) / testset_size
            succ_rate = np.sum(scores == 10.0) / testset_size
            mpi_print('cpus ', size)
            mpi_print('testset size', testset_size)
            # NUM_LEVELS = 0 means unbounded set so the set size is rep_counts * nenvs
            # each one has a new seed(maybe counted)
            # mpi_print('score detail',scores.flatten())
            mpi_print('succ_rate', succ_rate)
            steps_elapsed = checkpoint * 8000000
            mpi_print('steps_elapsed:', steps_elapsed)
            mpi_print('mean score', mean_score)
            wandb.log({
                'Rew_mean': mean_score,
                'Succ_rate': succ_rate,
                'Step_elapsed': steps_elapsed
            })
            #mpi_print('mean score of each env',[np.mean(s) for s in scores])
    else:
        testset_size = rep_count * nenvs
        succ = np.sum(scores=10.0) / testset_size
        succ_rate = utils.mpi_average([succ])
        mean_score_tmp = np.sum(scores) / testset_size
        mean_score = utils.mpi_average([mean_score_tmp])
        if rank == 0:
            mpi_print('testset size', rep_count * nenvs * size)
            mpi_print('load file name', load_file)
            mpi_print('testset size', testset_size)
            # NUM_LEVELS = 0 means unbounded set so the set size is rep_counts * nenvs
            # each one has a new seed(maybe counted)
            # mpi_print('score detail',scores.flatten())
            mpi_print('succ_rate', succ_rate)
            mpi_print('mean score', mean_score)
            wandb.log({'Rew_mean': mean_score, 'Succ_rate': succ_rate})

    return mean_score, succ_rate