Exemple #1
0
    def dump_model(self):
        #utils.save_params_in_scopes(self.sess, [self.scope_dir + "model"], Config.get_save_file())
        data_dict = {}

        save_path = utils.file_to_path(Config.get_save_file())

        data_dict['args'] = Config.get_args_dict()
        data_dict['args']['use_minimum_model'] = True
        param_dict = {}

        if len(self.params) > 0:
            #print('saving scope', scope, filename)
            ps = self.sess.run(self.params)

            param_dict["model"] = ps

        data_dict['params'] = param_dict
        joblib.dump(data_dict, save_path)
Exemple #2
0
 def save_model(base_name=None):
     base_dict = {'datapoints': datapoints}
     utils.save_params_in_scopes(sess, ['model'],
                                 Config.get_save_file(base_name=base_name),
                                 base_dict)
Exemple #3
0
def main(sess):

    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    seed = int(time.time()) % 10000

    if Config.EXTRACT_SEED != -1:
        seed = Config.EXTRACT_SEED
    if Config.EXTRACT_RANK != -1:
        rank = Config.EXTRACT_RANK

    set_global_seeds(seed * 100 + rank)

    utils.setup_mpi_gpus()

    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True # pylint: disable=E1101

    use_policy = (Config.RESTORE_ID != '')

    nenvs = Config.NUM_ENVS
    total_timesteps = int(502e6)
    env = utils.make_general_env(nenvs, seed=rank)

    if use_policy:
        agent = create_act_model(sess, env, nenvs)
        sess.run(tf.compat.v1.global_variables_initializer())
        loaded_params = utils.load_params_for_scope(sess, 'model')
        if not loaded_params:
            print('NO SAVED PARAMS LOADED')

    # make directory
    DIR_NAME = './VAE/records/'
    if not os.path.exists(DIR_NAME):
        os.makedirs(DIR_NAME, exist_ok=True)
    
    # set file name
    filename = DIR_NAME+"/"+Config.get_save_file()+"_"+str(seed * 100 + rank)+".npz"
    
    with tf.compat.v1.Session(config=config):
        env = wrappers.add_final_wrappers(env)
        nenv = nenv = env.num_envs if hasattr(env, 'num_envs') else 1
        obs = np.zeros((nenv,) + env.observation_space.shape, dtype=env.observation_space.dtype.name)
        obs[:] = env.reset()
        dones = [False for _ in range(nenv)]
        
        # remove noisy inputs
        actions = [env.action_space.sample() for _ in range(nenv)]
        actions = np.array(actions)
        obs[:], rewards, dones, _ = env.step(actions)
        state = agent.initial_state
        
        mb_obs, mb_rewards, mb_actions, mb_next_obs, mb_dones = [],[],[],[],[]
        # For n in range number of steps
        for _ in range(400):
            # Given observations, get action value and neglopacs
            # We already have self.obs because Runner superclass run self.obs[:] = env.reset() on init
            if use_policy:
                actions, _, _, _ = agent.step(obs, state, dones)
            else:
                actions = [env.action_space.sample() for _ in range(nenv)]
            actions = np.array(actions)
            mb_obs.append(obs.copy())
            mb_actions.append(actions)
            mb_dones.append(dones)
            
            # Take actions in env and look the results
            # Infos contains a ton of useful informations
            obs[:], rewards, dones, _ = env.step(actions)
            mb_next_obs.append(obs.copy())
            mb_rewards.append(rewards)
        #batch of steps to batch of rollouts
        mb_obs = np.asarray(mb_obs, dtype=obs.dtype)
        mb_next_obs = np.asarray(mb_next_obs, dtype=obs.dtype)
        mb_rewards = np.asarray(mb_rewards, dtype=np.float32)
        mb_actions = np.asarray(mb_actions)
        mb_dones = np.asarray(mb_dones, dtype=np.bool)
        
        #np.savez_compressed(filename, obs=mb_obs, action=mb_actions, next_obs=mb_next_obs, reward=mb_rewards, dones=mb_dones)
        np.savez_compressed(filename, obs=mb_obs)
        return filename
Exemple #4
0
def main():
    # general setup

    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

    args = setup_utils.setup_and_load()

    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()

    seed = int(time.time()) % 10000
    set_global_seeds(seed * 100 + rank)

    utils.setup_mpi_gpus()

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True  # pylint: disable=E1101

    # perpare directory
    sub_dir = utils.file_to_path(Config.get_save_file(base_name="tmp"))
    if os.path.isdir(sub_dir):
        shutil.rmtree(path=sub_dir)
    os.mkdir(sub_dir)

    # hyperparams
    nenvs = Config.NUM_ENVS
    total_timesteps = Config.TIMESTEPS
    population_size = Config.POPULATION_SIZE
    timesteps_per_agent = Config.TIMESTEPS_AGENT
    worker_count = Config.WORKER_COUNT
    passthrough_perc = Config.PASSTHROUGH_PERC
    mutating_perc = Config.MUTATING_PERC

    # create environment
    def make_env():
        env = utils.make_general_env(nenvs, seed=rank)
        env = wrappers.add_final_wrappers(env)
        return env

    # setup session and workers, and therefore tensorflow ops
    graph = tf.get_default_graph()
    sess = tf.Session(graph=graph)

    policy = policies.get_policy()

    workers = [
        Worker(sess, i, nenvs, make_env, policy, sub_dir)
        for i in range(worker_count)
    ]

    tb_writer = TB_Writer(sess)

    def clean_exit():

        for worker in workers:
            Thread.join(worker.thread)

        utils.mpi_print("")
        utils.mpi_print("== total duration",
                        "{:.1f}".format(time.time() - t_first_start), " s ==")
        utils.mpi_print(" exit...")

        # save best performing agent
        population.sort(key=lambda k: k['fit'], reverse=True)
        workers[0].restore_model(name=population[0]["name"])
        workers[0].dump_model()

        # cleanup
        sess.close()
        shutil.rmtree(path=sub_dir)

    # load data from restore point and seed the whole population
    loaded_name = None
    if workers[0].try_load_model():
        loaded_name = str(uuid.uuid1())
        workers[0].save_model(name=loaded_name)

    # initialise population
    # either all random and no mutations pending
    # or all from restore point with all but one to be mutated
    population = [{
        "name": loaded_name or str(uuid.uuid1()),
        "fit": -1,
        "need_mut": loaded_name != None and i != 0,
        "age": -1,
        "mean_ep_len": -1
    } for i in range(population_size)]

    utils.mpi_print("== population size", population_size, ", t_agent ",
                    timesteps_per_agent, " ==")

    t_first_start = time.time()
    try:
        # main loop
        generation = 0
        timesteps_done = 0
        while timesteps_done < total_timesteps:
            t_generation_start = time.time()

            utils.mpi_print("")
            utils.mpi_print("__ Generation", generation, " __")

            # initialise and evaluate all new agents
            for agent in population:
                #if agent["fit"] < 0: # test/
                if True:  # test constant reevaluation, to dismiss "lucky runs" -> seems good

                    # pick worker from pool and let it work on the agent
                    not_in_work = True
                    while not_in_work:
                        for worker in workers:
                            if worker.can_take_work():
                                worker.work(agent, timesteps_per_agent)
                                not_in_work = False
                                break

                    timesteps_done += timesteps_per_agent * nenvs

            for worker in workers:
                Thread.join(worker.thread)

            # sort by fitness
            population.sort(key=lambda k: k["fit"], reverse=True)

            # print stuff
            fitnesses = [agent["fit"] for agent in population]
            ages = [agent["age"] for agent in population]
            ep_lens = [agent["mean_ep_len"] for agent in population]

            utils.mpi_print(*["{:5.3f}".format(f) for f in fitnesses])
            utils.mpi_print(*["{:5}".format(a) for a in ages])
            utils.mpi_print("__ average fit", "{:.1f}".format(
                np.mean(fitnesses)), ", t_done", timesteps_done, ", took",
                            "{:.1f}".format(time.time() - t_generation_start),
                            "s", ", total",
                            "{:.1f}".format(time.time() - t_first_start),
                            "s __")

            # log stuff
            tb_writer.log_scalar(np.mean(fitnesses), "mean_fit",
                                 timesteps_done)
            tb_writer.log_scalar(np.median(fitnesses), "median_fit",
                                 timesteps_done)
            tb_writer.log_scalar(np.max(fitnesses), "max_fit", timesteps_done)
            tb_writer.log_scalar(np.mean(ages), "mean_age", timesteps_done)
            ep_lens_mean = np.nanmean(ep_lens)
            if (ep_lens_mean):
                tb_writer.log_scalar(ep_lens_mean, "mean_ep_lens",
                                     timesteps_done)

            # cleanup to prevent disk clutter
            to_be_removed = set(
                re.sub(r'\..*$', '', f) for f in os.listdir(sub_dir)) - set(
                    [agent["name"] for agent in population])
            for filename in to_be_removed:
                os.remove(sub_dir + "/" + filename + ".index")
                os.remove(sub_dir + "/" + filename + ".data-00000-of-00001")

            # break when times up
            if not timesteps_done < total_timesteps:
                break

            # mark weak agents for replacement
            cutoff_passthrough = math.floor(population_size * passthrough_perc)
            cutoff_mutating = math.floor(population_size * mutating_perc)
            source_agents = population[:cutoff_mutating]

            new_population = population[:cutoff_passthrough]

            k = 0
            while len(new_population) < population_size:
                new_agent = {
                    "name": source_agents[k]
                    ["name"],  # Take name from source agent, so mutation knows the parent
                    "fit": -1,
                    "need_mut": True,
                    "age": 0
                }
                new_population.append(new_agent)
                k = (k + 1) % len(source_agents)

            population = new_population
            generation += 1

        clean_exit()
    except KeyboardInterrupt:
        clean_exit()

    return 0