def dump_model(self): #utils.save_params_in_scopes(self.sess, [self.scope_dir + "model"], Config.get_save_file()) data_dict = {} save_path = utils.file_to_path(Config.get_save_file()) data_dict['args'] = Config.get_args_dict() data_dict['args']['use_minimum_model'] = True param_dict = {} if len(self.params) > 0: #print('saving scope', scope, filename) ps = self.sess.run(self.params) param_dict["model"] = ps data_dict['params'] = param_dict joblib.dump(data_dict, save_path)
def save_model(base_name=None): base_dict = {'datapoints': datapoints} utils.save_params_in_scopes(sess, ['model'], Config.get_save_file(base_name=base_name), base_dict)
def main(sess): comm = MPI.COMM_WORLD rank = comm.Get_rank() seed = int(time.time()) % 10000 if Config.EXTRACT_SEED != -1: seed = Config.EXTRACT_SEED if Config.EXTRACT_RANK != -1: rank = Config.EXTRACT_RANK set_global_seeds(seed * 100 + rank) utils.setup_mpi_gpus() config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True # pylint: disable=E1101 use_policy = (Config.RESTORE_ID != '') nenvs = Config.NUM_ENVS total_timesteps = int(502e6) env = utils.make_general_env(nenvs, seed=rank) if use_policy: agent = create_act_model(sess, env, nenvs) sess.run(tf.compat.v1.global_variables_initializer()) loaded_params = utils.load_params_for_scope(sess, 'model') if not loaded_params: print('NO SAVED PARAMS LOADED') # make directory DIR_NAME = './VAE/records/' if not os.path.exists(DIR_NAME): os.makedirs(DIR_NAME, exist_ok=True) # set file name filename = DIR_NAME+"/"+Config.get_save_file()+"_"+str(seed * 100 + rank)+".npz" with tf.compat.v1.Session(config=config): env = wrappers.add_final_wrappers(env) nenv = nenv = env.num_envs if hasattr(env, 'num_envs') else 1 obs = np.zeros((nenv,) + env.observation_space.shape, dtype=env.observation_space.dtype.name) obs[:] = env.reset() dones = [False for _ in range(nenv)] # remove noisy inputs actions = [env.action_space.sample() for _ in range(nenv)] actions = np.array(actions) obs[:], rewards, dones, _ = env.step(actions) state = agent.initial_state mb_obs, mb_rewards, mb_actions, mb_next_obs, mb_dones = [],[],[],[],[] # For n in range number of steps for _ in range(400): # Given observations, get action value and neglopacs # We already have self.obs because Runner superclass run self.obs[:] = env.reset() on init if use_policy: actions, _, _, _ = agent.step(obs, state, dones) else: actions = [env.action_space.sample() for _ in range(nenv)] actions = np.array(actions) mb_obs.append(obs.copy()) mb_actions.append(actions) mb_dones.append(dones) # Take actions in env and look the results # Infos contains a ton of useful informations obs[:], rewards, dones, _ = env.step(actions) mb_next_obs.append(obs.copy()) mb_rewards.append(rewards) #batch of steps to batch of rollouts mb_obs = np.asarray(mb_obs, dtype=obs.dtype) mb_next_obs = np.asarray(mb_next_obs, dtype=obs.dtype) mb_rewards = np.asarray(mb_rewards, dtype=np.float32) mb_actions = np.asarray(mb_actions) mb_dones = np.asarray(mb_dones, dtype=np.bool) #np.savez_compressed(filename, obs=mb_obs, action=mb_actions, next_obs=mb_next_obs, reward=mb_rewards, dones=mb_dones) np.savez_compressed(filename, obs=mb_obs) return filename
def main(): # general setup os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' args = setup_utils.setup_and_load() comm = MPI.COMM_WORLD rank = comm.Get_rank() seed = int(time.time()) % 10000 set_global_seeds(seed * 100 + rank) utils.setup_mpi_gpus() config = tf.ConfigProto() config.gpu_options.allow_growth = True # pylint: disable=E1101 # perpare directory sub_dir = utils.file_to_path(Config.get_save_file(base_name="tmp")) if os.path.isdir(sub_dir): shutil.rmtree(path=sub_dir) os.mkdir(sub_dir) # hyperparams nenvs = Config.NUM_ENVS total_timesteps = Config.TIMESTEPS population_size = Config.POPULATION_SIZE timesteps_per_agent = Config.TIMESTEPS_AGENT worker_count = Config.WORKER_COUNT passthrough_perc = Config.PASSTHROUGH_PERC mutating_perc = Config.MUTATING_PERC # create environment def make_env(): env = utils.make_general_env(nenvs, seed=rank) env = wrappers.add_final_wrappers(env) return env # setup session and workers, and therefore tensorflow ops graph = tf.get_default_graph() sess = tf.Session(graph=graph) policy = policies.get_policy() workers = [ Worker(sess, i, nenvs, make_env, policy, sub_dir) for i in range(worker_count) ] tb_writer = TB_Writer(sess) def clean_exit(): for worker in workers: Thread.join(worker.thread) utils.mpi_print("") utils.mpi_print("== total duration", "{:.1f}".format(time.time() - t_first_start), " s ==") utils.mpi_print(" exit...") # save best performing agent population.sort(key=lambda k: k['fit'], reverse=True) workers[0].restore_model(name=population[0]["name"]) workers[0].dump_model() # cleanup sess.close() shutil.rmtree(path=sub_dir) # load data from restore point and seed the whole population loaded_name = None if workers[0].try_load_model(): loaded_name = str(uuid.uuid1()) workers[0].save_model(name=loaded_name) # initialise population # either all random and no mutations pending # or all from restore point with all but one to be mutated population = [{ "name": loaded_name or str(uuid.uuid1()), "fit": -1, "need_mut": loaded_name != None and i != 0, "age": -1, "mean_ep_len": -1 } for i in range(population_size)] utils.mpi_print("== population size", population_size, ", t_agent ", timesteps_per_agent, " ==") t_first_start = time.time() try: # main loop generation = 0 timesteps_done = 0 while timesteps_done < total_timesteps: t_generation_start = time.time() utils.mpi_print("") utils.mpi_print("__ Generation", generation, " __") # initialise and evaluate all new agents for agent in population: #if agent["fit"] < 0: # test/ if True: # test constant reevaluation, to dismiss "lucky runs" -> seems good # pick worker from pool and let it work on the agent not_in_work = True while not_in_work: for worker in workers: if worker.can_take_work(): worker.work(agent, timesteps_per_agent) not_in_work = False break timesteps_done += timesteps_per_agent * nenvs for worker in workers: Thread.join(worker.thread) # sort by fitness population.sort(key=lambda k: k["fit"], reverse=True) # print stuff fitnesses = [agent["fit"] for agent in population] ages = [agent["age"] for agent in population] ep_lens = [agent["mean_ep_len"] for agent in population] utils.mpi_print(*["{:5.3f}".format(f) for f in fitnesses]) utils.mpi_print(*["{:5}".format(a) for a in ages]) utils.mpi_print("__ average fit", "{:.1f}".format( np.mean(fitnesses)), ", t_done", timesteps_done, ", took", "{:.1f}".format(time.time() - t_generation_start), "s", ", total", "{:.1f}".format(time.time() - t_first_start), "s __") # log stuff tb_writer.log_scalar(np.mean(fitnesses), "mean_fit", timesteps_done) tb_writer.log_scalar(np.median(fitnesses), "median_fit", timesteps_done) tb_writer.log_scalar(np.max(fitnesses), "max_fit", timesteps_done) tb_writer.log_scalar(np.mean(ages), "mean_age", timesteps_done) ep_lens_mean = np.nanmean(ep_lens) if (ep_lens_mean): tb_writer.log_scalar(ep_lens_mean, "mean_ep_lens", timesteps_done) # cleanup to prevent disk clutter to_be_removed = set( re.sub(r'\..*$', '', f) for f in os.listdir(sub_dir)) - set( [agent["name"] for agent in population]) for filename in to_be_removed: os.remove(sub_dir + "/" + filename + ".index") os.remove(sub_dir + "/" + filename + ".data-00000-of-00001") # break when times up if not timesteps_done < total_timesteps: break # mark weak agents for replacement cutoff_passthrough = math.floor(population_size * passthrough_perc) cutoff_mutating = math.floor(population_size * mutating_perc) source_agents = population[:cutoff_mutating] new_population = population[:cutoff_passthrough] k = 0 while len(new_population) < population_size: new_agent = { "name": source_agents[k] ["name"], # Take name from source agent, so mutation knows the parent "fit": -1, "need_mut": True, "age": 0 } new_population.append(new_agent) k = (k + 1) % len(source_agents) population = new_population generation += 1 clean_exit() except KeyboardInterrupt: clean_exit() return 0