def test_logger(test_multiline_str): logger.critical(test_multiline_str) logger.debug(test_multiline_str) logger.error(test_multiline_str) logger.exception(test_multiline_str) logger.info(test_multiline_str) logger.warn(test_multiline_str)
def body_done_log(self, body): '''Log the summary for a body when it is done''' env = body.env clock = env.clock memory = body.memory msg = f'Trial {self.info_space.get("trial")} session {self.info_space.get("session")} env {env.e}, body {body.aeb}, epi {clock.get("epi")}, t {clock.get("t")}, loss: {body.loss:.4f}, total_reward: {memory.total_reward:.2f}, last-{memory.avg_window}-epi avg: {memory.avg_total_reward:.2f}' logger.info(msg)
def post_body_init(self): '''Run init for components that need bodies to exist first, e.g. memory or architecture.''' self.nanflat_body_e = util.nanflatten(self.body_e) for idx, body in enumerate(self.nanflat_body_e): body.nanflat_e_idx = idx self.body_num = len(self.nanflat_body_e) logger.info(util.self_desc(self))
def close(self): ''' Close session and clean up. Save agent, close env. Prepare self.df. ''' self.agent_space.close() self.env_space.close() logger.info('Session done, closing.')
def update_lr(self): assert 'lr' in self.optim_spec old_lr = self.optim_spec['lr'] new_lr = self.lr_decay(self) if new_lr == old_lr: return self.optim_spec['lr'] = new_lr logger.info(f'Learning rate decayed from {old_lr:.6f} to {self.optim_spec["lr"]:.6f}') self.optim = net_util.get_optim(self, self.optim_spec)
def __init__(self, experiment): from slm_lab.experiment.control import Experiment ray.register_custom_serializer(Experiment, use_pickle=True) ray.register_custom_serializer(InfoSpace, use_pickle=True) ray.register_custom_serializer(pd.DataFrame, use_pickle=True) ray.register_custom_serializer(pd.Series, use_pickle=True) self.experiment = experiment self.config_space = build_config_space(experiment) logger.info(f'Running {util.get_class_name(self)}, with meta spec:\n{self.experiment.spec["meta"]}')
def post_body_init(self): '''Initializes the part of algorithm needing a body to exist first.''' self.body = self.agent.nanflat_body_a[0] # single-body algo # create the extra replay memory for SIL memory_name = self.memory_spec['sil_replay_name'] MemoryClass = getattr(memory, memory_name) self.body.replay_memory = MemoryClass(self.memory_spec, self, self.body) self.init_algorithm_params() self.init_nets() logger.info(util.self_desc(self))
def __init__(self, spec, info_space=None): info_space = info_space or InfoSpace() init_thread_vars(spec, info_space, unit='trial') self.spec = spec self.info_space = info_space self.coor, self.index = self.info_space.get_coor_idx(self) self.session_data_dict = {} self.data = None analysis.save_spec(spec, info_space, unit='trial') logger.info(f'Initialized trial {self.index}')
def retro_analyze_experiment(predir): '''Retro-analyze all experiment level datas.''' logger.info('Retro-analyzing experiment from file') from slm_lab.experiment.control import Experiment # mock experiment spec, info_space = mock_info_space_spec(predir) experiment = Experiment(spec, info_space) trial_data_dict = trial_data_dict_from_file(predir) experiment.trial_data_dict = trial_data_dict return analyze_experiment(experiment)
def analyze_trial(trial): ''' Gather trial data, plot, and return trial df for high level agg. @returns {DataFrame} trial_fitness_df Single-row df of trial fitness vector (avg over aeb, sessions), indexed with trial index. ''' logger.info('Analyzing trial') trial_fitness_df = calc_trial_fitness_df(trial) trial_fig = plot_trial(trial.spec, trial.info_space) save_trial_data(trial.spec, trial.info_space, trial_fitness_df, trial_fig) return trial_fitness_df
def load_algorithm(algorithm): '''Save all the nets for an algorithm''' agent = algorithm.agent net_names = algorithm.net_names prepath = util.get_prepath(agent.spec, agent.info_space, unit='session') logger.info(f'Loading algorithm {util.get_class_name(algorithm)} nets {net_names}') for net_name in net_names: net = getattr(algorithm, net_name) model_path = f'{prepath}_model_{net_name}.pth' load(net, model_path)
def get_grad_norms(net): '''Returns a list of the norm of the gradients for all parameters''' norms = [] for i, param in enumerate(net.parameters()): if param.grad is None: logger.info(f'Param with None grad: {param.shape}, layer: {i}') norms.append(None) else: grad_norm = torch.norm(param.grad) norms.append(grad_norm) return norms
def post_body_init(self): ''' Initializes the part of algorithm needing a body to exist first. A body is a part of an Agent. Agents may have 1 to k bodies. Bodies do the acting in environments, and contain: - Memory (holding experiences obtained by acting in the environment) - State and action dimensions for an environment - Boolean var for if the action space is discrete ''' self.body = self.agent.nanflat_body_a[0] # single-body algo self.init_algorithm_params() self.init_nets() logger.info(util.self_desc(self))
def post_init_nets(self): ''' Method to conditionally load models. Call at the end of init_net() after setting self.net_names ''' assert hasattr(self, 'net_names') if util.get_lab_mode() == 'enjoy': logger.info('Loaded algorithm models for lab_mode: enjoy') self.load() else: logger.info(f'Initialized algorithm models for lab_mode: {util.get_lab_mode()}')
def save_algorithm(algorithm, epi=None): '''Save all the nets for an algorithm''' agent = algorithm.agent net_names = algorithm.net_names prepath = util.get_prepath(agent.spec, agent.info_space, unit='session') if epi is not None: prepath = f'{prepath}_epi_{epi}' logger.info(f'Saving algorithm {util.get_class_name(algorithm)} nets {net_names}') for net_name in net_names: net = getattr(algorithm, net_name) model_path = f'{prepath}_model_{net_name}.pth' save(net, model_path)
def __init__(self, spec, info_space=None): info_space = info_space or InfoSpace() init_thread_vars(spec, info_space, unit='experiment') self.spec = spec self.info_space = info_space self.coor, self.index = self.info_space.get_coor_idx(self) self.trial_data_dict = {} self.data = None SearchClass = getattr(search, spec['meta'].get('search')) self.search = SearchClass(self) analysis.save_spec(spec, info_space, unit='experiment') logger.info(f'Initialized experiment {self.index}')
def retro_analyze_sessions(predir): '''Retro-analyze all session level datas.''' logger.info('Retro-analyzing sessions from file') from slm_lab.experiment.control import Session for filename in os.listdir(predir): if filename.endswith('_session_df.csv'): tn, sn = filename.replace('_session_df.csv', '').split('_')[-2:] trial_index, session_index = int(tn[1:]), int(sn[1:]) # mock session spec, info_space = mock_info_space_spec(predir, trial_index, session_index) session = Session(spec, info_space) session_data = session_data_from_file(predir, trial_index, session_index) analyze_session(session, session_data)
def analyze_session(session, session_data=None): ''' Gather session data, plot, and return fitness df for high level agg. @returns {DataFrame} session_fitness_df Single-row df of session fitness vector (avg over aeb), indexed with session index. ''' logger.info('Analyzing session') if session_data is None: session_mdp_data, session_data = get_session_data(session) else: # from retro analysis session_mdp_data = None session_fitness_df = calc_session_fitness_df(session, session_data) session_fig = plot_session(session.spec, session.info_space, session_data) save_session_data(session.spec, session.info_space, session_mdp_data, session_data, session_fitness_df, session_fig) return session_fitness_df
def check_all(): '''Check all spec files, all specs.''' spec_files = ps.filter_(os.listdir(SPEC_DIR), lambda f: f.endswith('.json') and not f.startswith('_')) for spec_file in spec_files: spec_dict = util.read(f'{SPEC_DIR}/{spec_file}') for spec_name, spec in spec_dict.items(): try: spec['name'] = spec_name spec['git_SHA'] = subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode().strip() check(spec) except Exception as e: logger.exception(f'spec_file {spec_file} fails spec check') raise e logger.info(f'Checked all specs from: {ps.join(spec_files, ",")}') return True
def retro_analyze(predir): ''' Method to analyze experiment from file after it ran. Read from files, constructs lab units, run retro analyses on all lab units. This method has no side-effects, i.e. doesn't overwrite data it should not. @example from slm_lab.experiment import analysis predir = 'data/reinforce_cartpole_2018_01_22_211751' analysis.retro_analyze(predir) ''' os.environ['PREPATH'] = f'{predir}/retro_analyze' # to prevent overwriting log file logger.info(f'Retro-analyzing {predir}') retro_analyze_sessions(predir) retro_analyze_trials(predir) retro_analyze_experiment(predir)
def save_image(figure, filepath=None): if os.environ['PY_ENV'] == 'test': return if filepath is None: filepath = f'{PLOT_FILEDIR}/{ps.get(figure, "layout.title")}.png' filepath = util.smart_path(filepath) dirname, filename = os.path.split(filepath) try: cmd = f'orca graph -o {filename} \'{json.dumps(figure)}\'' if 'linux' in sys.platform: cmd = 'xvfb-run -a -s "-screen 0 1400x900x24" -- ' + cmd Popen(cmd, cwd=dirname, shell=True, stderr=DEVNULL, stdout=DEVNULL) logger.info(f'Graph saved to {dirname}/{filename}') except Exception as e: logger.exception( 'Please install orca for plotly and run retro-analysis to generate graphs.')
def calc_session_fitness_df(session, session_data): '''Calculate the session fitness df''' session_fitness_data = {} for aeb in session_data: aeb_df = session_data[aeb] util.downcast_float32(aeb_df) body = session.aeb_space.body_space.data[aeb] aeb_fitness_sr = calc_aeb_fitness_sr(aeb_df, body.env.name) aeb_fitness_df = pd.DataFrame([aeb_fitness_sr], index=[session.index]) aeb_fitness_df = aeb_fitness_df.reindex(FITNESS_COLS[:3], axis=1) session_fitness_data[aeb] = aeb_fitness_df # form multiindex df, then take mean across all bodies session_fitness_df = pd.concat(session_fitness_data, axis=1) mean_fitness_df = session_fitness_df.mean(axis=1, level=3) session_fitness = calc_fitness(mean_fitness_df) logger.info(f'Session mean fitness: {session_fitness}\n{mean_fitness_df}') return session_fitness_df
def __init__(self, spec, info_space=None): info_space = info_space or InfoSpace() init_thread_vars(spec, info_space, unit='session') self.spec = deepcopy(spec) self.info_space = info_space self.coor, self.index = self.info_space.get_coor_idx(self) self.random_seed = 100 * (info_space.get('trial') or 0) + self.index torch.cuda.manual_seed_all(self.random_seed) torch.manual_seed(self.random_seed) np.random.seed(self.random_seed) self.data = None self.aeb_space = AEBSpace(self.spec, self.info_space) self.env_space = EnvSpace(self.spec, self.aeb_space) self.agent_space = AgentSpace(self.spec, self.aeb_space) logger.info(util.self_desc(self)) self.aeb_space.init_body_space() self.aeb_space.post_body_init() logger.info(f'Initialized session {self.index}')
def analyze_experiment(experiment): ''' Gather experiment trial_data_dict as experiment_df, plot. Search module must return best_spec and experiment_data with format {trial_index: exp_trial_data}, where trial_data = {**var_spec, **fitness_vec, fitness}. This is then made into experiment_df. @returns {DataFrame} experiment_df Of var_specs, fitness_vec, fitness for all trials. ''' logger.info('Analyzing experiment') experiment_df = pd.DataFrame(experiment.trial_data_dict).transpose() cols = FITNESS_COLS + ['fitness'] config_cols = sorted(ps.difference(experiment_df.columns.tolist(), cols)) sorted_cols = config_cols + cols experiment_df = experiment_df.reindex(sorted_cols, axis=1) experiment_df.sort_values(by=['fitness'], ascending=False, inplace=True) logger.info(f'Experiment data:\n{experiment_df}') experiment_fig = plot_experiment(experiment.spec, experiment_df) save_experiment_data(experiment.spec, experiment.info_space, experiment_df, experiment_fig) return experiment_df
def save_session_data(spec, info_space, session_mdp_data, session_data, session_fitness_df, session_fig): ''' Save the session data: session_mdp_df, session_df, session_fitness_df, session_graph. session_data is saved as session_df; multi-indexed with (a,e,b), 3 extra levels to read, use: session_df = util.read(filepath, header=[0, 1, 2, 3]) session_data = util.session_df_to_data(session_df) Likewise for session_mdp_df ''' prepath = util.get_prepath(spec, info_space, unit='session') logger.info(f'Saving session data to {prepath}') if session_mdp_data is not None: # not from retro analysis session_mdp_df = pd.concat(session_mdp_data, axis=1) session_df = pd.concat(session_data, axis=1) # TODO reactivate saving when get to the transition matrix research # util.write(session_mdp_df, f'{prepath}_session_mdp_df.csv') util.write(session_df, f'{prepath}_session_df.csv') util.write(session_fitness_df, f'{prepath}_session_fitness_df.csv') viz.save_image(session_fig, f'{prepath}_session_graph.png')
def calc_trial_fitness_df(trial): ''' Calculate the trial fitness df by aggregating from the collected session_data_dict (session_fitness_df's). Adds a consistency dimension to fitness vector. ''' trial_fitness_data = {} all_session_fitness_df = pd.concat(list(trial.session_data_dict.values())) for aeb in util.get_df_aeb_list(all_session_fitness_df): aeb_fitness_df = all_session_fitness_df.loc[:, aeb] aeb_fitness_sr = aeb_fitness_df.mean() consistency = calc_consistency(aeb_fitness_df) aeb_fitness_sr = aeb_fitness_sr.append(pd.Series({'consistency': consistency})) aeb_fitness_df = pd.DataFrame([aeb_fitness_sr], index=[trial.index]) aeb_fitness_df = aeb_fitness_df.reindex(FITNESS_COLS, axis=1) trial_fitness_data[aeb] = aeb_fitness_df # form multiindex df, then take mean across all bodies trial_fitness_df = pd.concat(trial_fitness_data, axis=1) mean_fitness_df = trial_fitness_df.mean(axis=1, level=3) trial_fitness_df = mean_fitness_df trial_fitness = calc_fitness(mean_fitness_df) logger.info(f'Trial mean fitness: {trial_fitness}\n{mean_fitness_df}') return trial_fitness_df
def retro_analyze_trials(predir): '''Retro-analyze all trial level datas.''' logger.info('Retro-analyzing trials from file') from slm_lab.experiment.control import Trial for filename in os.listdir(predir): if filename.endswith('_trial_data.json'): filepath = f'{predir}/{filename}' tn = filename.replace('_trial_data.json', '').split('_')[-1] trial_index = int(tn[1:]) # mock trial spec, info_space = mock_info_space_spec(predir, trial_index) trial = Trial(spec, info_space) session_data_dict = session_data_dict_from_file(predir, trial_index) trial.session_data_dict = session_data_dict trial_fitness_df = analyze_trial(trial) # write trial_data that was written from ray search fitness_vec = trial_fitness_df.iloc[0].to_dict() fitness = calc_fitness(trial_fitness_df) trial_data = util.read(filepath) trial_data.update({ **fitness_vec, 'fitness': fitness, 'trial_index': trial_index, }) util.write(trial_data, filepath)
def run(self): meta_spec = self.experiment.spec['meta'] ray.init(**meta_spec.get('resources', {})) max_generation = meta_spec['max_generation'] pop_size = meta_spec['max_trial'] or calc_population_size(self.experiment) logger.info(f'EvolutionarySearch max_generation: {max_generation}, population size: {pop_size}') trial_data_dict = {} config_hash = {} # config hash_str to trial_index toolbox = self.init_deap() population = toolbox.population(n=pop_size) for gen in range(1, max_generation + 1): logger.info(f'Running generation: {gen}/{max_generation}') ray_id_to_config = {} pending_ids = [] for individual in population: config = dict(individual.items()) hash_str = util.to_json(config, indent=0) if hash_str not in config_hash: trial_index = self.experiment.info_space.tick('trial')['trial'] config_hash[hash_str] = config['trial_index'] = trial_index ray_id = run_trial.remote(self.experiment, config) ray_id_to_config[ray_id] = config pending_ids.append(ray_id) individual['trial_index'] = config_hash[hash_str] trial_data_dict.update(get_ray_results(pending_ids, ray_id_to_config)) for individual in population: trial_index = individual.pop('trial_index') trial_data = trial_data_dict.get(trial_index, {'fitness': 0}) # if trial errored individual.fitness.values = trial_data['fitness'], preview = 'Fittest of population preview:' for individual in tools.selBest(population, k=min(10, pop_size)): preview += f'\nfitness: {individual.fitness.values[0]}, {individual}' logger.info(preview) # prepare offspring for next generation if gen < max_generation: population = toolbox.select(population, len(population)) # Vary the pool of individuals population = algorithms.varAnd(population, toolbox, cxpb=0.5, mutpb=0.5) ray.worker.cleanup() return trial_data_dict
def generate_specs(spec, const='agent'): ''' Generate benchmark specs with compatible discrete/continuous/both types: - take a spec - for each in benchmark envs - use the template env spec to update spec - append to benchmark specs Interchange agent and env for the reversed benchmark. ''' if const == 'agent': const_name = ps.get(spec, 'agent.0.algorithm.name') variant = 'env' else: const_name = ps.get(spec, 'env.0.name') variant = 'agent' filepath = f'{spec_util.SPEC_DIR}/benchmark_{const_name}.json' if os.path.exists(filepath): logger.info(f'Benchmark for {const_name} exists at {filepath} already, not overwriting.') benchmark_specs = util.read(filepath) return benchmark_specs logger.info(f'Generating benchmark for {const_name}') benchmark_variants = [] benchmark_specs = {} for dist_cont, const_names in BENCHMARK[const].items(): if const_name in const_names: benchmark_variants.extend(BENCHMARK[variant][dist_cont]) for vary_name in benchmark_variants: vary_spec = ENV_TEMPLATES[vary_name] spec_name = f'{const_name}_{vary_name}' benchmark_spec = spec.copy() benchmark_spec['name'] = spec_name benchmark_spec[variant] = [vary_spec] benchmark_specs[spec_name] = benchmark_spec util.write(benchmark_specs, filepath) logger.info(f'Benchmark for {const_name} written to {filepath}.') return benchmark_specs
def close(self): logger.info(f'Trial {self.index} done')
def init_nets(self): ''' Initialize the neural networks used to learn the actor and critic from the spec Below we automatically select an appropriate net based on two different conditions 1. If the action space is discrete or continuous action - Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution. - Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions 2. If the actor and critic are separate or share weights - If the networks share weights then the single network returns a list. - Continuous action spaces: The return list contains 3 elements: The first element contains the mean output for the actor (policy), the second element the std dev of the policy, and the third element is the state-value estimated by the network. - Discrete action spaces: The return list contains 2 element. The first element is a tensor containing the logits for a categorical probability distribution over the actions. The second element contains the state-value estimated by the network. 3. If the network type is feedforward, convolutional, or recurrent - Feedforward and convolutional networks take a single state as input and require an OnPolicyReplay or OnPolicyBatchReplay memory - Recurrent networks take n states as input and require an OnPolicySeqReplay or OnPolicySeqBatchReplay memory ''' net_type = self.net_spec['type'] # options of net_type are {MLPNet, ConvNet, RecurrentNet} x {Shared, Separate} in_dim = self.body.state_dim if self.body.is_discrete: if 'Shared' in net_type: self.share_architecture = True out_dim = [self.body.action_dim, 1] else: assert 'Separate' in net_type self.share_architecture = False out_dim = self.body.action_dim critic_out_dim = 1 else: if 'Shared' in net_type: self.share_architecture = True out_dim = [self.body.action_dim, self.body.action_dim, 1] else: assert 'Separate' in net_type self.share_architecture = False out_dim = [self.body.action_dim, self.body.action_dim] critic_out_dim = 1 self.net_spec['type'] = net_type = net_type.replace('Shared', '').replace('Separate', '') if 'MLP' in net_type and ps.is_list(out_dim) and len(out_dim) > 1: self.net_spec['type'] = 'MLPHeterogenousTails' actor_net_spec = self.net_spec.copy() critic_net_spec = self.net_spec.copy() for k in self.net_spec: if 'actor_' in k: actor_net_spec[k.replace('actor_', '')] = actor_net_spec.pop(k) critic_net_spec.pop(k) if 'critic_' in k: critic_net_spec[k.replace('critic_', '')] = critic_net_spec.pop(k) actor_net_spec.pop(k) NetClass = getattr(net, self.net_spec['type']) # properly set net_spec and action_dim for actor, critic nets if self.share_architecture: # net = actor_critic as one self.net = NetClass(actor_net_spec, self, in_dim, out_dim) else: # main net = actor self.net = NetClass(actor_net_spec, self, in_dim, out_dim) if critic_net_spec['use_same_optim']: critic_net_spec = actor_net_spec self.critic = NetClass(critic_net_spec, self, in_dim, critic_out_dim) logger.info(f'Training on gpu: {self.net.gpu}')
def close(self): logger.info('Experiment done, closing.')
def post_body_init(self): '''Run init for components that need bodies to exist first, e.g. memory or architecture.''' self.flat_nonan_body_a = util.flatten_nonan(self.body_a) self.algorithm.post_body_init() logger.info(util.self_desc(self))
def close(self): logger.info('EnvSpace.close') for env in self.envs: env.close()
def close(self): reload(search) # fixes ray consecutive run crashing due to bad cleanup logger.info('Experiment done and closed.')
def print_memory_info(self): '''Prints size of all of the memory arrays''' for k in self.data_keys: d = getattr(self, k) logger.info(f'Memory for body {self.body.aeb}: {k} :shape: {d.shape}, dtype: {d.dtype}, size: {util.sizeof(d)}MB')
def save_trial_data(trial_spec, trial_df): spec_name = trial_spec['name'] prepath = f'data/{spec_name}/{spec_name}_{util.get_timestamp()}' logger.info(f'Saving trial data to {prepath}_*') util.write(trial_spec, f'{prepath}_spec.json')
def retro_analyze_sessions(predir): '''Retro analyze all sessions''' logger.info('Running retro_analyze_sessions') session_spec_paths = glob(f'{predir}/*_s*_spec.json') util.parallelize(_retro_analyze_session, [(p,) for p in session_spec_paths], num_cpus=util.NUM_CPUS)
def save_trial_data(spec, info_space, trial_fitness_df): '''Save the trial data: spec, trial_fitness_df.''' prepath = get_prepath(spec, info_space, unit='trial') logger.info(f'Saving trial data to {prepath}') util.write(trial_fitness_df, f'{prepath}_trial_fitness_df.csv')
def save(self, epi=None): '''Save net models for algorithm given the required property self.net_names''' if not hasattr(self, 'net_names'): logger.info('No net declared in self.net_names in init_nets(); no models to save.') else: net_util.save_algorithm(self, epi=epi)
def post_body_init(self): '''Run init for agent, env components that need bodies to exist first, e.g. memory or architecture.''' self.clock = self.env_space.get_base_clock() logger.info(util.self_desc(self)) self.agent_space.post_body_init() self.env_space.post_body_init()
def update_lr(self): assert 'lr' in self.optim_param old_lr = self.optim_param['lr'] self.optim_param['lr'] = old_lr * 0.9 logger.info(f'Learning rate decayed from {old_lr} to {self.optim_param["lr"]}') self.optim = net_util.get_optim_multinet(self.params, self.optim_param)
def close(self): logger.info('Trial done and closed.')
def close(self): logger.info('Experiment done')
def close(self): logger.info('AgentSpace.close') for agent in self.agents: agent.close()
def post_body_init(self): '''Initializes the part of algorithm needing a body to exist first.''' self.body = self.agent.nanflat_body_a[0] # single-body algo self.init_algorithm_params() self.init_nets() logger.info(util.self_desc(self))
def post_body_init(self): '''Run init for components that need bodies to exist first, e.g. memory or architecture.''' for env in self.envs: env.post_body_init() logger.info(util.self_desc(self))
def init_tensorboard(self): if not hasattr(self, 'tb_writer'): log_prepath = self.spec['meta']['log_prepath'] self.tb_writer = SummaryWriter(os.path.dirname(log_prepath), filename_suffix=os.path.basename(log_prepath)) self.tb_actions = [] # store actions for tensorboard logger.info(f'Using TensorBoard logging for dev mode. Run `tensorboard --logdir={log_prepath}` to start TensorBoard.')
def __init__(self, DistSessionClass, spec, info_space, global_nets): super(DistSession, self).__init__() self.name = f'w{info_space.get("session")}' self.session = DistSessionClass(spec, info_space, global_nets) logger.info(f'Initialized DistSession {self.session.index}')
def log_summary(self): '''Log the summary for this body when its environment is done''' prefix = self.get_log_prefix() memory = self.memory msg = f'{prefix}, loss: {self.last_loss:.8f}, total_reward: {memory.total_reward:.4f}, last-{memory.avg_window}-epi avg: {memory.avg_total_reward:.4f}' logger.info(msg)
def init_nets(self): '''Initialize the neural networks used to learn the actor and critic from the spec''' body = self.agent.nanflat_body_a[0] # singleton algo state_dim = body.state_dim action_dim = body.action_dim self.is_discrete = body.is_discrete net_spec = self.agent.spec['net'] mem_spec = self.agent.spec['memory'] net_type = self.agent.spec['net']['type'] actor_kwargs = util.compact_dict( dict( hid_layers_activation=_.get(net_spec, 'hid_layers_activation'), optim_param=_.get(net_spec, 'optim_actor'), loss_param=_.get(net_spec, 'loss'), # Note: Not used for training actor clamp_grad=_.get(net_spec, 'clamp_grad'), clamp_grad_val=_.get(net_spec, 'clamp_grad_val'), gpu=_.get(net_spec, 'gpu'), )) if self.agent.spec['net']['use_same_optim']: logger.info('Using same optimizer for actor and critic') critic_kwargs = actor_kwargs else: logger.info('Using different optimizer for actor and critic') critic_kwargs = util.compact_dict( dict( hid_layers_activation=_.get(net_spec, 'hid_layers_activation'), optim_param=_.get(net_spec, 'optim_critic'), loss_param=_.get(net_spec, 'loss'), clamp_grad=_.get(net_spec, 'clamp_grad'), clamp_grad_val=_.get(net_spec, 'clamp_grad_val'), gpu=_.get(net_spec, 'gpu'), )) ''' Below we automatically select an appropriate net based on two different conditions 1. If the action space is discrete or continuous action - Networks for continuous action spaces have two heads and return two values, the first is a tensor containing the mean of the action policy, the second is a tensor containing the std deviation of the action policy. The distribution is assumed to be a Gaussian (Normal) distribution. - Networks for discrete action spaces have a single head and return the logits for a categorical probability distribution over the discrete actions 2. If the actor and critic are separate or share weights - If the networks share weights then the single network returns a list. - Continuous action spaces: The return list contains 3 elements: The first element contains the mean output for the actor (policy), the second element the std dev of the policy, and the third element is the state-value estimated by the network. - Discrete action spaces: The return list contains 2 element. The first element is a tensor containing the logits for a categorical probability distribution over the actions. The second element contains the state-value estimated by the network. 3. If the network type is feedforward, convolutional, or recurrent - Feedforward and convolutional networks take a single state as input and require an OnPolicyReplay or OnPolicyBatchReplay memory - Recurrent networks take n states as input and require an OnPolicyNStepReplay or OnPolicyNStepBatchReplay memory ''' if net_type == 'MLPseparate': self.is_shared_architecture = False self.is_recurrent = False if self.is_discrete: self.actor = getattr(net, 'MLPNet')(state_dim, net_spec['hid_layers'], action_dim, **actor_kwargs) logger.info( "Feedforward net, discrete action space, actor and critic are separate networks" ) else: self.actor = getattr(net, 'MLPHeterogenousHeads')( state_dim, net_spec['hid_layers'], [action_dim, action_dim], **actor_kwargs) logger.info( "Feedforward net, continuous action space, actor and critic are separate networks" ) self.critic = getattr(net, 'MLPNet')(state_dim, net_spec['hid_layers'], 1, **critic_kwargs) elif net_type == 'MLPshared': self.is_shared_architecture = True self.is_recurrent = False if self.is_discrete: self.actorcritic = getattr(net, 'MLPHeterogenousHeads')( state_dim, net_spec['hid_layers'], [action_dim, 1], **actor_kwargs) logger.info( "Feedforward net, discrete action space, actor and critic combined into single network, sharing params" ) else: self.actorcritic = getattr(net, 'MLPHeterogenousHeads')( state_dim, net_spec['hid_layers'], [action_dim, action_dim, 1], **actor_kwargs) logger.info( "Feedforward net, continuous action space, actor and critic combined into single network, sharing params" ) elif net_type == 'Convseparate': self.is_shared_architecture = False self.is_recurrent = False if self.is_discrete: self.actor = getattr(net, 'ConvNet')(state_dim, net_spec['hid_layers'], action_dim, **actor_kwargs) logger.info( "Convolutional net, discrete action space, actor and critic are separate networks" ) else: self.actor = getattr(net, 'ConvNet')(state_dim, net_spec['hid_layers'], [action_dim, action_dim], **actor_kwargs) logger.info( "Convolutional net, continuous action space, actor and critic are separate networks" ) self.critic = getattr(net, 'ConvNet')(state_dim, net_spec['hid_layers'], 1, **critic_kwargs) elif net_type == 'Convshared': self.is_shared_architecture = True self.is_recurrent = False if self.is_discrete: self.actorcritic = getattr(net, 'ConvNet')(state_dim, net_spec['hid_layers'], [action_dim, 1], **actor_kwargs) logger.info( "Convolutional net, discrete action space, actor and critic combined into single network, sharing params" ) else: self.actorcritic = getattr(net, 'ConvNet')( state_dim, net_spec['hid_layers'], [action_dim, action_dim, 1], **actor_kwargs) logger.info( "Convolutional net, continuous action space, actor and critic combined into single network, sharing params" ) elif net_type == 'Recurrentseparate': self.is_shared_architecture = False self.is_recurrent = True if self.is_discrete: self.actor = getattr(net, 'RecurrentNet')( state_dim, net_spec['hid_layers'], action_dim, mem_spec['length_history'], **actor_kwargs) logger.info( "Recurrent net, discrete action space, actor and critic are separate networks" ) else: self.actor = getattr(net, 'RecurrentNet')( state_dim, net_spec['hid_layers'], [action_dim, action_dim], mem_spec['length_history'], **actor_kwargs) logger.info( "Recurrent net, continuous action space, actor and critic are separate networks" ) self.critic = getattr(net, 'RecurrentNet')(state_dim, net_spec['hid_layers'], 1, mem_spec['length_history'], **critic_kwargs) elif net_type == 'Recurrentshared': self.is_shared_architecture = True self.is_recurrent = True if self.is_discrete: self.actorcritic = getattr(net, 'RecurrentNet')( state_dim, net_spec['hid_layers'], [action_dim, 1], mem_spec['length_history'], **actor_kwargs) logger.info( "Recurrent net, discrete action space, actor and critic combined into single network, sharing params" ) else: self.actorcritic = getattr(net, 'RecurrentNet')( state_dim, net_spec['hid_layers'], [action_dim, action_dim, 1], mem_spec['length_history'], **actor_kwargs) logger.info( "Recurrent net, continuous action space, actor and critic combined into single network, sharing params" ) else: logger.warn( "Incorrect network type. Please use 'MLPshared', MLPseparate', Recurrentshared, or Recurrentseparate." ) raise NotImplementedError
def log_metrics(self, metrics, df_mode): '''Log session metrics''' prefix = self.get_log_prefix() row_str = ' '.join([f'{k}: {v:g}' for k, v in metrics.items()]) msg = f'{prefix} [{df_mode}_df metrics] {row_str}' logger.info(msg)
def close(self): logger.info('Trial done, closing.')
def save(net, model_path): '''Save model weights to path''' torch.save(net.state_dict(), util.smart_path(model_path)) logger.info(f'Saved model to {model_path}')
def load(self): '''Load net models for algorithm given the required property self.net_names''' if not hasattr(self, 'net_names'): logger.info('No net declared in self.net_names in init_nets(); no models to load.') else: net_util.load_algorithm(self)
def load(net, model_path): '''Save model weights from a path into a net module''' net.load_state_dict(torch.load(util.smart_path(model_path))) logger.info(f'Loaded model from {model_path}')
def save_trial_data(spec, info_space, trial_fitness_df, trial_fig): '''Save the trial data: spec, trial_fitness_df.''' prepath = util.get_prepath(spec, info_space, unit='trial') logger.info(f'Saving trial data to {prepath}') util.write(trial_fitness_df, f'{prepath}_trial_fitness_df.csv') viz.save_image(trial_fig, f'{prepath}_trial_graph.png')
def run_distributed_sessions(self): logger.info('Running distributed sessions') global_nets = self.init_global_nets() session_datas = self.parallelize_sessions(global_nets) return session_datas
def post_body_init(self): '''Initializes the part of algorithm needing a body to exist first.''' self.init_nets() self.init_algo_params() logger.info(util.self_desc(self))