Beispiel #1
0
    def __init__(self, **kwargs):
        global default_kwargs
        #self.agent = CatMouseAgent()
        self.agent = CatMouseAgent(**kwargs)

        self.N_state_terms = len(self.agent.getStateVec())
        self.N_actions = self.agent.N_actions

        for k, v in kwargs.items():
            if k in default_kwargs.keys():
                default_kwargs[k] = v
            else:
                #assert k in default_kwargs.keys(), 'key error'
                print(
                    f'Passed parameter {k} not in default_kwargs dict! Check')

        self.fname_base = 'CatMouse_DDPG_' + path_utils.get_date_str()

        if kwargs.get('dir', None) is None:
            self.dir = os.path.join(default_kwargs['base_dir'],
                                    self.fname_base)
            os.mkdir(self.dir)
            print(f'\n\nMade dir {self.dir} for run...\n\n')
        else:
            self.dir = kwargs.get('dir', None)

        self.save_params_json(kwargs, 'params_passed')
        self.save_params_json(default_kwargs, 'params_all')

        self.gamma = default_kwargs['gamma']
        self.optim = default_kwargs['optim']
        self.LR_actor = default_kwargs['LR_actor']
        self.LR_critic = default_kwargs['LR_critic']
        self.hidden_size = default_kwargs['hidden_size']
        self.init_weights = default_kwargs['init_weights']
        self.clamp_grad = default_kwargs['clamp_grad']
        self.noise_sigma = default_kwargs['noise_sigma']
        self.noise_theta = default_kwargs['noise_theta']
        self.noise_dt = default_kwargs['noise_dt']
        self.noise_mu = default_kwargs['noise_mu']
        self.noise_decay_limit = default_kwargs['noise_decay_limit']
        self.max_buffer_size = default_kwargs['max_buffer_size']
        self.ER_batch_size = default_kwargs['ER_batch_size']
        self.tau = default_kwargs['tau']
        self.decay_noise = default_kwargs['decay_noise']
        self.noise_method = default_kwargs['noise_method']

        self.setup_NN()
        #self.tb_writer = SummaryWriter()
        self.dat_fname_file = None

        if 'base_dir' in default_kwargs.keys():
            no_base_dir_kwargs = deepcopy(default_kwargs)
            del no_base_dir_kwargs[
                'base_dir']  # To get rid of this for the center dict
        self.train_plot_title = path_utils.linebreak_every_n_spaces(
            path_utils.param_dict_to_label_str(no_base_dir_kwargs))

        self.ER = ExpReplay(self.max_buffer_size, self.ER_batch_size)
Beispiel #2
0
def save_title_db_dict(d):

    root_id = [v['id'] for v in d.values() if v['depth'] == 0][0]
    date_str = path_utils.get_date_str()

    fname = 'title_dict_id_{}_runtime_{}.json'.format(root_id, date_str)

    with open(fname, 'w') as f:
        json.dump(d, f, indent=4)
Beispiel #3
0
def benchmark_param_dicts(params_dict_list, **kwargs):

    '''
    Pass this a list of dicts, where each has the different parameters you want
    to benchmark.

    It then iterates through this list, doing a benchmark for each dict.
    '''

    # Create dir for the results of this benchmark if one isn't provided.
    benchmark_dir = kwargs.get('benchmark_dir', None)
    if benchmark_dir is None:
        benchmark_dir = os.path.join(path_utils.get_output_dir(), 'Benchmark_{}'.format(path_utils.get_date_str()))
        os.mkdir(benchmark_dir)

    for d in params_dict_list:

        # If a run_fname_label is provided, use that to create a more informative dir name.
        # Otherwise, just use the date.
        if 'run_fname_label' in d.keys():
            run_fname_label = d['run_fname_label']
        else:
            run_fname_label = 'vary_params'

        # Base dir for this specific benchmark
        params_dir = os.path.join(benchmark_dir, '{}_{}'.format(run_fname_label, path_utils.get_date_str()))
        os.mkdir(params_dir)

        # To hold the actual runs (FF)
        runs_dir = os.path.join(params_dir, 'runs')
        os.mkdir(runs_dir)

        print('\n\nNow benchmarking params:')
        pp.pprint(d, width=1)
        print('\n\n')
        benchmark_dict = benchmark_param_dict(d, kwargs.get('N_dist', 10), kwargs.get('N_gen', 100), runs_dir)

        # Add to dict
        d['benchmark_dict'] = deepcopy(benchmark_dict)

        # Make plots for this benchmark
        if 'run_plot_label' in d.keys():
            run_plot_label = d['run_plot_label']
        else:
            run_plot_label = run_fname_label

        # Plots for benchmark
        fname = os.path.join(params_dir, f'{run_fname_label}_solve_gens_dist.png')
        plot_benchmark_dist(run_plot_label, benchmark_dict['solve_gens'], 'Solve generation', fname)
        fname = os.path.join(params_dir, f'{run_fname_label}_best_scores_dist.png')
        plot_benchmark_dist(run_plot_label, benchmark_dict['best_scores'], 'Best score', fname)

    # Return passed list, which should have dicts
    # modified with the results
    return params_dict_list
Beispiel #4
0
def benchmark_envs(env_list, **kwargs):

    '''
    Iterates over a list of env names you give it,
    benchmarking it and recording info.

    For each env, it
    '''

    N_dist = kwargs.get('N_dist', 10) # How many evolutions to run, to form a distribution
    N_gen = kwargs.get('N_gen', 1000)

    # Create dir for the results of this benchmark.
    benchmark_dir = os.path.join(path_utils.get_output_dir(), 'Benchmark_{}'.format(path_utils.get_date_str()))
    os.mkdir(benchmark_dir)

    # Dict to hold results on timing, etc.
    benchmark_dict = {}

    for env_name in env_list:

        print(f'\nBenchmarking env {env_name} now...\n')
        # Create a dir for this env.
        env_dir = os.path.join(benchmark_dir, env_name)
        os.mkdir(env_dir)

        env_runs_dir = os.path.join(env_dir, 'runs')
        os.mkdir(env_runs_dir)

        param_dict = deepcopy(kwargs)
        param_dict['env_name'] = env_name

        benchmark_dict[env_name] = benchmark_param_dict(param_dict, N_dist, N_gen, env_runs_dir)
        benchmark_dict[env_name]['env_dir'] = env_dir


    # Save distributions to file
    with open(os.path.join(benchmark_dir, 'benchmark_stats.json'), 'w+') as f:
        json.dump(benchmark_dict, f, indent=4)

    # Plot each env dist.
    for k,v in benchmark_dict.items():

        # Makes sure the run finished
        if 'solve_gens' in v.keys():
            fname = os.path.join(v['env_dir'], f'{k}_solve_gens_dist.png')
            plot_benchmark_dist(k, v['solve_gens'], 'Solve generation', fname)

            fname = os.path.join(v['env_dir'], f'{k}_best_scores_dist.png')
            plot_benchmark_dist(k, v['best_scores'], 'Best score', fname)

            fname = os.path.join(v['env_dir'], f'{k}_all_scores_dist.png')
            plot_benchmark_dist(k, v['all_scores'], 'All scores', fname, N_bins=20, plot_log=True)
Beispiel #5
0
def ramp_difficulty(train_class, **kwargs):

	'''
	This should hopefully "ramp up" the difficulty, letting it
	solve it for the easy case, and then adapt to the harder cases.
	'''

	base_dir = os.path.join(path_utils.get_output_dir(), 'ramp_difficulty_{}'.format(path_utils.get_date_str()))
	os.mkdir(base_dir)

	kwargs['base_dir'] = base_dir

	cm = train_class(**kwargs)

	#cat_speed_rels = [3.0, 3.1, 3.2, 3.3, 3.35, 3.4, 3.45, 3.5, 3.55, 3.6, 3.7, 3.8, 3.9, 4.0]
	N_eps_chunk = kwargs.get('N_eps', 5000)
	cat_speed_rels = {
						3.1 : 1*N_eps_chunk,
						3.2 : 1*N_eps_chunk,
						3.3 : 1*N_eps_chunk,
						3.4 : 2*N_eps_chunk,
						3.5 : 2*N_eps_chunk,
						3.6 : 2*N_eps_chunk,
						3.65 : 2*N_eps_chunk,
						3.7 : 2*N_eps_chunk,
						3.75 : 2*N_eps_chunk,
						3.8 : 2*N_eps_chunk,
					}


	c_s_r_fname = os.path.join(cm.dir, 'c_s_r_list.json')
	with open(c_s_r_fname, 'w+') as f:
		json.dump({'cat_speed_rels' : cat_speed_rels}, f, indent=4)

	for i, (c_s, N_eps) in enumerate(cat_speed_rels.items()):

		print(f'\nRunning with cat_speed_rel = {c_s} now!\n')

		if kwargs.get('reset_buffer', False):
			cm.ER.reset_buffer()

		cm.agent.set_cat_speed(c_s)
		if i==0:
			cm.train(N_eps, kwargs.get('N_steps', 500), reset_hist=True)
		else:
			cm.train(N_eps, kwargs.get('N_steps', 500), reset_hist=False)

	cm.train_epochs.append(cm.total_step)
	cm.plot_train(save_plot=True, show_plot=False)

	cm.save_model()
Beispiel #6
0
    def __init__(self, **kwargs):
        global default_kwargs

        self.agent = CatMouseAgent(**kwargs)

        self.N_state_terms = len(self.agent.getStateVec())
        self.N_actions = self.agent.N_actions

        for k, v in kwargs.items():
            if k in default_kwargs.keys():
                default_kwargs[k] = v
            else:
                print(
                    f'Passed parameter {k} not in default_kwargs dict! Check')
                if k != 'fname_note':
                    #assert k in default_kwargs.keys(), 'key error'
                    pass

        self.fname_base = 'CatMouse_A2C_' + path_utils.get_date_str()

        self.dir = os.path.join(default_kwargs['base_dir'], self.fname_base)
        os.mkdir(self.dir)
        print(f'\n\nMade dir {self.dir} for run...\n\n')

        self.save_params_json(kwargs, 'params_passed')
        self.save_params_json(default_kwargs, 'params_all')

        self.N_batch = default_kwargs['N_batch']
        self.gamma = default_kwargs['gamma']
        self.beta_entropy = default_kwargs['beta_entropy']
        self.optim = default_kwargs['optim']
        self.LR = default_kwargs['LR']
        self.hidden_size = default_kwargs['hidden_size']
        self.clamp_grad = default_kwargs['clamp_grad']
        self.noise_sigma = default_kwargs['noise_sigma']
        self.noise_theta = default_kwargs['noise_theta']
        self.noise_dt = default_kwargs['noise_dt']
        self.sigma_min = default_kwargs['sigma_min']
        self.decay_noise = default_kwargs['decay_noise']

        self.setup_NN()
        self.dat_fname_file = None

        if 'base_dir' in default_kwargs.keys():
            no_base_dir_kwargs = deepcopy(default_kwargs)
            del no_base_dir_kwargs[
                'base_dir']  # To get rid of this for the center dict
        self.train_plot_title = path_utils.linebreak_every_n_spaces(
            path_utils.param_dict_to_label_str(no_base_dir_kwargs))
Beispiel #7
0
    def __init__(self, env_name, **kwargs):

        # Create env, create agent
        self.setup_env(env_name)
        self.agent = Agent.Agent(self.env, **kwargs)

        self.noise_sd = 1.0
        self.max_episode_steps = kwargs.get("max_episode_steps", 500)

        # Get the base dir, which is where runs will be saved to. Default
        # is /output/
        base_dir = kwargs.get("base_dir", path_utils.get_output_dir())

        # Datetime string for labeling the run
        self.dt_str = path_utils.get_date_str()

        # If you don't pass anything, it will create a dir in base_dir to
        # hold the results of this run, but you can supply your own externally.
        self.run_dir = kwargs.get("run_dir", None)
        if self.run_dir is None:
            self.run_dir = os.path.join(
                base_dir, f"{self.env_name}_sample_{self.dt_str}")
            os.mkdir(self.run_dir)

        # For saving the parameters used for the run. Run last in __init__().
        if kwargs.get("load_params_from_dir", False):
            self.load_params_dict()
        else:
            self.run_params = kwargs.copy()
            self.save_params_dict()

        #### Plot params
        self.plot_pt_alpha = 0.2
        self.plot_label_params = {"fontsize": 18}
        self.plot_tick_params = {"fontsize": 13}
        self.plot_title_params = {"fontsize": 18}

        self.plot_params = {
            "plot_pt_alpha": self.plot_pt_alpha,
            "plot_label_params": self.plot_label_params,
            "plot_tick_params": self.plot_tick_params,
            "plot_title_params": self.plot_title_params,
        }
Beispiel #8
0
    def __init__(self, env_name, **kwargs):

        # The search method used. Default is Random Weight Guessing (RWG).
        self.search_method = kwargs.get('search_method', 'RWG')
        assert self.search_method in [
            'RWG', 'gaussian_noise_hill_climb', 'grid_search',
            'bin_grid_search', 'sparse_bin_grid_search'
        ], 'Must supply valid search_method!'

        # Create env, create agent
        self.setup_env(env_name)
        self.agent = Agent.Agent(self.env, **kwargs)

        self.noise_sd = 1.0
        self.max_episode_steps = kwargs.get('max_episode_steps', 500)

        # Get the base dir, which is where runs will be saved to. Default
        # is /output/
        base_dir = kwargs.get('base_dir', path_utils.get_output_dir())

        # Datetime string for labeling the run
        self.dt_str = path_utils.get_date_str()

        # If you don't pass anything, it will create a dir in base_dir to
        # hold the results of this run, but you can supply your own externally.
        self.run_dir = kwargs.get('run_dir', None)
        if self.run_dir is None:
            self.run_dir = os.path.join(base_dir,
                                        f'{self.env_name}_evo_{self.dt_str}')
            os.mkdir(self.run_dir)

        # For saving the parameters used for the run. Run last in __init__().
        if kwargs.get('load_params_from_dir', False):
            self.load_params_dict()
        else:
            self.run_params = kwargs.copy()
            self.save_params_dict()

        #### Plot params
        self.plot_pt_alpha = 0.2
        self.plot_label_params = {'fontsize': 14}
        self.plot_tick_params = {'fontsize': 11}
        self.plot_title_params = {'fontsize': 16}
Beispiel #9
0
    def no_train_episode(self, N_steps):

        self.agent.initEpisode()
        '''

		The GD is done with:
			-v_buffer (batch of V for each state)
			-sigma_buffer (batch of sd for each state)
			-r_buffer (batch of returned r for each action taken in each state)
			-pi_a_buffer (batch of pi_a for each action taken in each state)

		plot_episode() plots:
			-self.last_ep_*_hist (history of * over course of episode)

		other buffers saved but not used (for debugging):
			-s_buffer
			-a_buffer
			-mu_buffer

		'''

        s_ep_hist = []
        R_ep_hist = []
        episode_ending = None
        tot_steps = 0

        for t in range(N_steps):

            # DDPG stuff
            s = self.agent.getStateVec()

            # Get action from actor NN
            a = self.NN_actor.forward(torch.tensor(
                s, dtype=torch.float)).detach().numpy()

            # Iterate, get r, s_next
            r, s_next, done = self.agent.iterate(a)

            s_ep_hist.append(s)
            R_ep_hist.append(r)

            tot_steps = t
            if done:
                s_ep_hist.append(s_next)
                break

        last_r = R_ep_hist[-1]
        if last_r > 0.5:
            episode_ending = 'escaped'
        elif last_r < -0.2:
            episode_ending = 'caught'
        else:
            episode_ending = None

        traj_fname = plot_tools.save_traj_to_file(
            np.array(s_ep_hist),
            self.dir,
            iter='eval_ep_{}'.format(path_utils.get_date_str()),
            cat_speed_rel=self.agent.cat_speed_rel,
            mouse_caught=episode_ending)

        return {
            'traj_fname': traj_fname,
            'mouse_caught': episode_ending,
            'tot_steps': tot_steps
        }
Beispiel #10
0
def benchmark_vary_params(constant_params_dict, vary_params_dict, **kwargs):

    '''
    This is a convenience function to easily vary parameters for benchmarking.
    You pass it constant_params_dict, which is a dict with the values that
    you want to remain constant between runs. Then, pass it vary_params_dict,
    which should have each parameter that you want to vary as a list of the values
    it should take.

    Example:

    constant_params_dict = {
        'env_name' : 'CartPole-v0',
        'N_gen' : 1000,
        'N_dist' : 100,
        'NN' : 'FFNN_multilayer'
    }

    vary_params_dict = {
        'N_hidden_units' : [2, 4, 8],
        'act_fn' : ['tanh', 'relu']
    }

    This will do 3*2 = 6 runs, for each of the combinations of varying parameters.
    '''

    # Create informative dir name
    vary_params = list(vary_params_dict.keys())
    benchmark_dir = os.path.join(
                        path_utils.get_output_dir(),
                        'Benchmark_vary_{}_{}'.format('_'.join(vary_params), path_utils.get_date_str()))
    print(f'\nSaving benchmark run to {benchmark_dir}')
    os.mkdir(benchmark_dir)

    combined_params = {**constant_params_dict, **vary_params_dict}
    # Save params to file
    with open(os.path.join(benchmark_dir, 'run_params.json'), 'w+') as f:
        json.dump(combined_params, f, indent=4)


    # Flatten list, pass to other function
    flat_param_list = vary_params_cross_products(constant_params_dict, vary_params_dict)
    flat_param_list = benchmark_param_dicts(flat_param_list, benchmark_dir=benchmark_dir, **kwargs)

    # Parse results
    for d in flat_param_list:
        benchmark_dict = d['benchmark_dict']

        best_scores = benchmark_dict['best_scores']
        d['mu_best'] = np.mean(best_scores)
        d['sigma_best'] = np.std(best_scores)

        solve_gens = benchmark_dict['solve_gens']
        d['mu_solve_gens'] = np.mean(solve_gens)
        d['sigma_solve_gens'] = np.std(solve_gens)

        #pp.pprint(d, width=1)
        # Get rid of this now
        d.pop('benchmark_dict')

    # Save results to csv for later parsing/plotting
    df = pd.DataFrame(flat_param_list)
    print(tabulate(df, headers=df.columns.values, tablefmt='psql'))
    df_fname = os.path.join(benchmark_dir, 'vary_benchmark_results.csv')
    df.to_csv(df_fname, index=False)

    # Only need to do if more than 2 params were varied.
    if len(vary_params) >= 2:

        # Create heatmap plots dir
        heatmap_dir = os.path.join(benchmark_dir, 'heatmap_plots')
        print(f'\nSaving heatmap plots to {heatmap_dir}')
        os.mkdir(heatmap_dir)

        # Iterate over all unique pairs of vary params, plot heatmaps of them
        for pair in itertools.combinations(vary_params, 2):

            print(f'Making heatmaps for {pair}')

            other_params_flat = [(k, v) for k,v in vary_params_dict.items() if k not in pair]
            other_params = [x[0] for x in other_params_flat]
            other_vals = [x[1] for x in other_params_flat]
            print(f'other params: {other_params}')

            # Create dir for specific pivot
            pivot_name = 'vary_{}_{}'.format(*pair)
            pivot_dir = os.path.join(heatmap_dir, pivot_name)
            os.mkdir(pivot_dir)

            # Select for each of the combos of the other params.
            for other_params_set in itertools.product(*other_vals):
                other_sel_dict = dict(zip(other_params, other_params_set))
                fname_label = path_utils.param_dict_to_fname_str(other_sel_dict)
                df_sel = df.loc[(df[list(other_sel_dict)] == pd.Series(other_sel_dict)).all(axis=1)]

                heatmap_plot(df_sel, *pair, 'mu_best', pivot_dir, label=fname_label)
                heatmap_plot(df_sel, *pair, 'mu_solve_gens', pivot_dir, label=fname_label)
Beispiel #11
0
import path_utils
from Sample import Sample
import os

# env_name = 'CartPole-v0'
# env_name = 'MountainCarContinuous-v0'
# env_name = 'Acrobot-v1'
env_name = "Pendulum-v0"

base_dir = os.path.join(
    path_utils.get_output_dir(),
    "make_gifs_" + env_name + "_" + path_utils.get_date_str(),
)
os.mkdir(base_dir)

e = Sample(
    env_name,
    NN="FFNN",
    N_hidden_layers=1,
    N_hidden_units=4,
    use_bias=True,
    base_dir=base_dir,
    random_dist="uniform",
    random_dist_scaling=10.0,
)

sample_dict = e.sample(10000, N_episodes=10, print_samp_num=True)
e.save_all_sample_stats(sample_dict)


print(sample_dict["best_weights"])
Beispiel #12
0
def hyperparam_search_const(run_fn, center_dict, vary_param_dict, output_dir, **kwargs):

	'''
	This is when you have a "center" dict of kwargs (that could be seen
	as the "control"), and you want to vary other parameters one at a time,
	while keeping the others constant.

	So pass it something like: center_dict = {'param1' : 5, 'param2' : 'g', 'param3' : 492},
	and vary_param_dict = {'param1' : [2, 10], 'param2' : ['h', 'q', 'z']}, and it will
	try the center value, and then 5 other variants.

	The function that will be run, run_fn, has to take only kwargs. You'll probably
	have to write an external wrapper for it, if you want to do something like
	create an object of a class and run a few functions for it.

	output_dir is the path you want this function to build the whole tree structure
	in (it will create run_dir for this).

	This isn't fully general at the moment because of the save_param_R_curves.
	It should be expanded so that fn is passed as an arg.
	'''

	# How many runs of each set of params it will do
	N_runs = kwargs.get('N_runs', 1)
	save_R = kwargs.get('save_R', False)

	# Create the run_dir for all the vary_params
	run_dir = os.path.join(output_dir, 'vary_param_{}'.format(path_utils.get_date_str()))
	os.mkdir(run_dir)

	# Whether to do the center run or not
	center_run = kwargs.get('center_run', True)
	if center_run:
		print('\n\nRunning with center...')
		pp.pprint(center_dict, width=1)

		# Create dir for center run
		center_dir = os.path.join(run_dir, 'center_runs')
		os.mkdir(center_dir)

		param_R_curves = []

		# Deepcopy the center_dict, so varying the copy doesn't mess up anything.
		d = deepcopy(center_dict)
		d['base_dir'] = center_dir
		for i in range(N_runs):
			R_curve = run_fn(**d)
			param_R_curves.append(R_curve)

		if save_R:
			save_param_R_curves(param_R_curves, d, center_dir)


	for param, param_list in vary_param_dict.items():
		print('\n\nNow varying parameter: ', param)
		vary_dir = os.path.join(run_dir, 'vary_{}'.format(param))
		os.mkdir(vary_dir)

		for v in param_list:

			param_R_curves = []

			d = deepcopy(center_dict)
			d[param] = v
			d['base_dir'] = vary_dir
			d['fname_note'] = '{}={}'.format(param, v)

			print('\n\nNow running with parameter {} = {}\n'.format(param, v))
			pp.pprint(d, width=1)

			for i in range(N_runs):
				R_curve = run_fn(**d)
				param_R_curves.append(R_curve)

			if save_R:
				save_param_R_curves(param_R_curves, {param : v}, vary_dir)