def plot_fishers(fishers_data, title=''):
    """Plot Fisher's exact correlation scores with a heatmap."""

    # try to infer the necessary figure size and set up axis
    nrows, ncols = fishers_data.shape
    figsize = (ncols, nrows)
    fig, ax = plt.subplots(figsize=figsize)

    # check for inf values for plotting, since these 
    # cannot be plotted otherwise
    if np.inf in fishers_data.values or -np.inf in fishers_data.values:
        fishers_data = fishers_data.replace(np.inf, 300)
        fishers_data = fishers_data.replace(-np.inf, -300)
        title = title + "\nNB: Fisher's test (+/-)np.inf replaced with 300 for plotting"

    # plot with specifications
    heatmap(
        fishers_data.round().astype(int), 
        ax=ax,
        #robust=True,
        fmt="d",
    )
    ax.set_title(title)
    ax.set_xlabel('')
    ax.set_ylabel('')
Beispiel #2
0
    def final_plots_and_error(self, idx, target, err_func):
        self.final_err = err_func(self.final_y, self.final_yhat).mean().mean()

        fig = plt.figure()
        plot_ts(pd.concat([self.final_X, self.final_y], axis=1), idx=idx, c='steelblue',lw=2, label='actual')
        label_suffix = 'mean across obs'
        if isinstance(idx, str): label_suffix = idx
        plot_ts(self.final_yhat, idx=idx, c='indianred', lw=3.5, label=f'forecast for {label_suffix}')
        plt.title(f'actual and predicted {target}; err: {self.final_err:.4f}')

        fig = plt.figure()
        heatmap(df=pd.concat([self.final_X, self.final_yhat], axis=1), target=target, sort_col=self.final_X.columns[-1], forecast_line=self.n_forecast)
        return
Beispiel #3
0
def main():
    # Read in the data
    sample = pd.read_csv(READ_PATH)
    print sample.info()
    print sample.describe()
    print sample.describe(include=['O'])
   
    # Create summary plots
    plotting.violin(sample)
    plotting.pairplot(sample)
    plotting.pairplot_kde(sample)
    plotting.heatmap(sample)
    plotting.swarmplot(sample)
   
    # Standardize variables
    X = standardize(sample)
    
    # Build models with k=2 through k=10
    models = []
    for k in range(2, 11, 1):
        y_pred, km = build_cluster_model(X, k)
        models.append(('k%d_class' % k, km))
        sample['k%d_class' % k] = y_pred        
        
    # Inertia Analysis
    inertia = np.array([m[1].inertia_ for m in models])
    k_value = np.arange(2, 11, 1)
    plotting.inertia(k_value, inertia)
    plotting.d_inertia(k_value, inertia)
     
    # Pair plots with new color coding
    for k in range(2, 11, 1):
        plotting.pairplot(sample, group='k%d_class' % k)
   
    # Comparison of k2 model with original groupings
    k2_confusion_matrix(sample)
    # Comparison of k3 model with original groupings
    k3_confusion_matrix(sample)
    # Comparison of k5 model with original groupings
    k5_confusion_matrix(sample)

    # Feature-Feature plots comparing pred and truth
    plotting.compare_model(df=sample, model='k5_class', x='heartrate', y='height')
    plotting.compare_model(df=sample, model='k5_class', x='weight', y='height')
plt.ioff()

#######################################
# cost function 1 , gamma=0.99
#######################################
gamma = .99
#Initialize the MarkovDecisionProcess object for method 1 of the reward
mdp1_a = MarkovDecisionProcess(transition=Transitions,
                               reward=Reward_1,
                               method=1,
                               gamma=gamma,
                               epsilon=epsilon)
""" value iteration with method 1"""
V1_a, error_v1_a = mdp1_a.value_iteration(maze.maze)
pi_v1_a = mdp1_a.best_policy(V1_a)
pl.heatmap(V1_a, pi_v1_a, maze.height, maze.width, 'VI', gamma, 1)
pl.plot_error(error_v1_a, 'VI', gamma, 1)
""" policy iteration with method 1"""
error_p1_a, pi_p1_a, U1_a = mdp1_a.policy_iteration(maze.maze)
pl.heatmap(U1_a, pi_p1_a, maze.height, maze.width, 'PI', gamma, 1)
pl.plot_error(error_p1_a, 'PI', gamma, 1)

#######################################
# cost function 2 , gamma=0.99
#######################################
gamma = .99
#Initialize the MarkovDecisionProcess object for method 2 of the reward
mdp2_a = MarkovDecisionProcess(transition=Transitions,
                               reward=Reward_2,
                               method=2,
                               gamma=gamma,
Beispiel #5
0
def collect_entropy_policies(env, epochs, T, MODEL_DIR=''):

    video_dir = 'videos/' + args.exp_name

    direct = os.getcwd() + '/data/'
    experiment_directory = direct + args.exp_name
    print(experiment_directory)

    print(sys.argv)
    if not os.path.exists(experiment_directory):
        os.makedirs(experiment_directory)
        f = open(experiment_directory + '/args', 'w')
        f.write(' '.join(sys.argv))
        f.flush()

    indexes = [1, 5, 10, 15]
    states_visited_indexes = [0, 5, 10, 15]

    states_visited_cumulative = []
    states_visited_cumulative_baseline = []

    running_avg_p = np.zeros(shape=(tuple(ant_utils.num_states)))
    running_avg_p_xy = np.zeros(shape=(tuple(ant_utils.num_states_2d)))
    running_avg_ent = 0
    running_avg_ent_xy = 0

    running_avg_p_baseline = np.zeros(shape=(tuple(ant_utils.num_states)))
    running_avg_p_baseline_xy = np.zeros(
        shape=(tuple(ant_utils.num_states_2d)))
    running_avg_ent_baseline = 0
    running_avg_ent_baseline_xy = 0

    pct_visited = []
    pct_visited_baseline = []
    pct_visited_xy = []
    pct_visited_xy_baseline = []

    running_avg_entropies = []
    running_avg_entropies_xy = []
    running_avg_ps_xy = []
    avg_ps_xy = []

    running_avg_entropies_baseline = []
    running_avg_entropies_baseline_xy = []
    running_avg_ps_baseline_xy = []
    avg_ps_baseline_xy = []

    policies = []
    distributions = []
    initial_state = init_state(env)

    prebuf = ExperienceBuffer()
    env.reset()
    for t in range(10000):
        action = env.action_space.sample()
        obs, reward, done, _ = env.step(action)
        prebuf.store(get_state(env, obs))
        if done:
            env.reset()
            done = False

    prebuf.normalize()
    normalization_factors = prebuf.normalization_factors
    utils.log_statement(normalization_factors)
    prebuf = None
    if not args.gaussian:
        normalization_factors = []

    reward_fn = np.zeros(shape=(tuple(ant_utils.num_states)))

    for i in range(epochs):
        utils.log_statement("*** ------- EPOCH %d ------- ***" % i)

        # clear initial state if applicable.
        if not args.initial_state:
            initial_state = []
        else:
            utils.log_statement(initial_state)
        utils.log_statement("max reward: " + str(np.max(reward_fn)))

        logger_kwargs = setup_logger_kwargs("model%02d" % i,
                                            data_dir=experiment_directory)

        # Learn policy that maximizes current reward function.
        print("Learning new oracle...")
        seed = random.randint(1, 100000)
        sac = AntSoftActorCritic(lambda: gym.make(args.env),
                                 reward_fn=reward_fn,
                                 xid=i + 1,
                                 seed=seed,
                                 gamma=args.gamma,
                                 ac_kwargs=dict(hidden_sizes=[args.hid] *
                                                args.l),
                                 logger_kwargs=logger_kwargs,
                                 normalization_factors=normalization_factors)

        # The first policy is random
        if i == 0:
            sac.soft_actor_critic(epochs=0)
        else:
            sac.soft_actor_critic(epochs=args.episodes,
                                  initial_state=initial_state,
                                  start_steps=args.start_steps)
        policies.append(sac)

        p, _ = sac.test_agent(T, normalization_factors=normalization_factors)
        distributions.append(p)
        weights = utils.get_weights(distributions)

        epoch = 'epoch_%02d' % (i)
        if args.render:
            if i < 10:
                sac.record(T=args.record_steps,
                           n=1,
                           video_dir=video_dir + '/baseline/' + epoch,
                           on_policy=False)
            sac.record(T=args.record_steps,
                       n=1,
                       video_dir=video_dir + '/entropy/' + epoch,
                       on_policy=True)

        # Execute the cumulative average policy thus far.
        # Estimate distribution and entropy.
        print("Executing mixed policy...")
        average_p, average_p_xy, initial_state, states_visited, states_visited_xy = \
            execute_average_policy(env, policies, T, weights,
                                   reward_fn=reward_fn, norm=normalization_factors,
                                   initial_state=initial_state, n=args.n,
                                   render=args.render, video_dir=video_dir+'/mixed/'+epoch, epoch=i,
                                   record_steps=args.record_steps)

        print("Calculating maxEnt entropy...")
        round_entropy = entropy(average_p.ravel())
        round_entropy_xy = entropy(average_p_xy.ravel())

        # Update running averages for maxEnt.
        print("Updating maxEnt running averages...")
        running_avg_ent = running_avg_ent * (
            i) / float(i + 1) + round_entropy / float(i + 1)
        running_avg_ent_xy = running_avg_ent_xy * (
            i) / float(i + 1) + round_entropy_xy / float(i + 1)
        running_avg_p *= (i) / float(i + 1)
        running_avg_p += average_p / float(i + 1)
        running_avg_p_xy *= (i) / float(i + 1)
        running_avg_p_xy += average_p_xy / float(i + 1)

        # update reward function
        print("Update reward function")
        eps = 1 / np.sqrt(ant_utils.total_state_space)
        if args.cumulative:
            reward_fn = grad_ent(running_avg_p)
        else:
            reward_fn = 1.
            average_p += eps
            reward_fn /= average_p
        average_p = None  # delete big array

        # (save for plotting)
        running_avg_entropies.append(running_avg_ent)
        running_avg_entropies_xy.append(running_avg_ent_xy)
        if i in indexes:
            running_avg_ps_xy.append(np.copy(running_avg_p_xy))
            avg_ps_xy.append(np.copy(average_p_xy))

        print("Collecting baseline experience....")
        p_baseline, p_baseline_xy, states_visited_baseline, states_visited_xy_baseline = sac.test_agent_random(
            T, normalization_factors=normalization_factors, n=args.n)

        plotting.states_visited_over_time(states_visited,
                                          states_visited_baseline, i)
        plotting.states_visited_over_time(states_visited_xy,
                                          states_visited_xy_baseline,
                                          i,
                                          ext='_xy')

        # save for cumulative plot.
        if i in states_visited_indexes:
            # average over a whole bunch of rollouts
            # slow: so only do this when needed.
            print("Averaging unique xy states visited....")
            states_visited_xy = compute_states_visited_xy(
                env,
                policies,
                norm=normalization_factors,
                T=T,
                n=args.n,
                N=args.avg_N)
            states_visited_xy_baseline = compute_states_visited_xy(
                env,
                policies,
                norm=normalization_factors,
                T=T,
                n=args.n,
                N=args.avg_N,
                initial_state=initial_state,
                baseline=True)
            states_visited_cumulative.append(states_visited_xy)
            states_visited_cumulative_baseline.append(
                states_visited_xy_baseline)

        print("Compute baseline entropy....")
        round_entropy_baseline = entropy(p_baseline.ravel())
        round_entropy_baseline_xy = entropy(p_baseline_xy.ravel())

        # Update baseline running averages.
        print("Updating baseline running averages...")
        running_avg_ent_baseline = running_avg_ent_baseline * (
            i) / float(i + 1) + round_entropy_baseline / float(i + 1)
        running_avg_ent_baseline_xy = running_avg_ent_baseline_xy * (
            i) / float(i + 1) + round_entropy_baseline_xy / float(i + 1)

        running_avg_p_baseline *= (i) / float(i + 1)
        running_avg_p_baseline += p_baseline / float(i + 1)
        running_avg_p_baseline_xy *= (i) / float(i + 1)
        running_avg_p_baseline_xy += p_baseline_xy / float(i + 1)

        p_baseline = None

        # (save for plotting)
        running_avg_entropies_baseline.append(running_avg_ent_baseline)
        running_avg_entropies_baseline_xy.append(running_avg_ent_baseline_xy)
        if i in indexes:
            running_avg_ps_baseline_xy.append(
                np.copy(running_avg_p_baseline_xy))
            avg_ps_baseline_xy.append(np.copy(p_baseline_xy))

        utils.log_statement(average_p_xy)
        utils.log_statement(p_baseline_xy)

        # Calculate percent of state space visited.
        pct = np.count_nonzero(running_avg_p) / float(running_avg_p.size)
        pct_visited.append(pct)
        pct_xy = np.count_nonzero(running_avg_p_xy) / float(
            running_avg_p_xy.size)
        pct_visited_xy.append(pct_xy)

        pct_baseline = np.count_nonzero(running_avg_p_baseline) / float(
            running_avg_p_baseline.size)
        pct_visited_baseline.append(pct_baseline)
        pct_xy_baseline = np.count_nonzero(running_avg_p_baseline_xy) / float(
            running_avg_p_baseline_xy.size)
        pct_visited_xy_baseline.append(pct_xy_baseline)

        # Print round summary.
        col_headers = ["", "baseline", "maxEnt"]
        col1 = [
            "round_entropy_xy", "running_avg_ent_xy", "round_entropy",
            "running_avg_ent", "% state space xy", "% total state space"
        ]
        col2 = [
            round_entropy_baseline_xy, running_avg_ent_baseline_xy,
            round_entropy_baseline, running_avg_ent_baseline, pct_xy_baseline,
            pct_baseline
        ]
        col3 = [
            round_entropy_xy, running_avg_ent_xy, round_entropy,
            running_avg_ent, pct_xy, pct
        ]
        table = tabulate(np.transpose([col1, col2, col3]),
                         col_headers,
                         tablefmt="fancy_grid",
                         floatfmt=".4f")
        utils.log_statement(table)

        # Plot from round.
        plotting.heatmap(running_avg_p_xy, average_p_xy, i)
        plotting.heatmap1(running_avg_p_baseline_xy, i)

        if i == states_visited_indexes[3]:
            plotting.states_visited_over_time_multi(
                states_visited_cumulative, states_visited_cumulative_baseline,
                states_visited_indexes)

    # save final expert weights to use with the trained oracles.
    weights_file = experiment_directory + '/policy_weights'
    np.save(weights_file, weights)

    # cumulative plots.
    plotting.running_average_entropy(running_avg_entropies,
                                     running_avg_entropies_baseline)
    plotting.running_average_entropy(running_avg_entropies_xy,
                                     running_avg_entropies_baseline_xy,
                                     ext='_xy')

    plotting.heatmap4(running_avg_ps_xy,
                      running_avg_ps_baseline_xy,
                      indexes,
                      ext="cumulative")
    plotting.heatmap4(avg_ps_xy, avg_ps_baseline_xy, indexes, ext="epoch")

    plotting.percent_state_space_reached(pct_visited,
                                         pct_visited_baseline,
                                         ext='_total')
    plotting.percent_state_space_reached(pct_visited_xy,
                                         pct_visited_xy_baseline,
                                         ext="_xy")

    return policies
Beispiel #6
0
import numpy as np
from matplotlib import pyplot as plt
from generate_wavepacket import wavepacket
from plotting import heatmap
from potential import x, y
from config import GRID_SIZE, k, WAVELENGTH, k_step

NORM = 'ortho'

print(np.sum(np.absolute(wavepacket)**2))
fourier = np.fft.fftshift(np.fft.fft2(wavepacket, norm=NORM))
print(np.sum(np.absolute(fourier)**2))
inversed = np.fft.ifft2(fourier, norm=NORM).real
print(np.sum(np.absolute(inversed)**2))

k_step = 2 * np.pi / GRID_SIZE

fig, ax = plt.subplots()
heatmap(inversed, x * k_step, y * k_step, ax=ax, cbarlabel="s")

plt.show()
# directory_to_save = "{}{}_potential_{}_x_{}_y_{}_{}_n_{}_cutoff_{}_grid_{}_wavelength_{}_timestep_{}_lasernum_{}_repeat_{}_retroreflective_{}/".format(
#     PLOT_SAVE_DIR_BASE, PATH, V_0_REL / NUMBER_OF_LASERS, WAVEPACKET_CENTER_X, WAVEPACKET_CENTER_Y, METHOD,
#     POTENTIAL_CHANGE_SPEED, CUTOFF, GRID_SIZE, WAVELENGTH, TIME_STEP_REL, NUMBER_OF_LASERS, REPEATS,
#     not NON_RETROREFLECTIVE)
#
# p = Path("{}otwell".format(directory_to_save))
#
# with p.open('rb') as f:
#     fsz = os.fstat(f.fileno()).st_size
#     out = np.load(f)
#     while f.tell() < fsz:
#         out = np.vstack((out, np.load(f)))
# print(out.reshape(out.shape[0] // 5, 5, 5))

wavef = np.load(
    "{}move_square_potential_0.1_x_929_y_1093_ssf_n_100_cutoff_800_grid_800_wavelength_80_timestep_0.2_lasernum_5_repeat_1_retroreflective_False/Modulation finished_wavefunction.npy"
    .format(PLOT_SAVE_DIR_BASE),
    allow_pickle=True)
fig, ax = plt.subplots()
# im_pot = heatmap(generate_potential(0) / v_rec, x / WAVELENGTH, y / WAVELENGTH,
#                           ax, cbarlabel="Potential / Recoil Energy", cmap=plt.cm.gray)

heatmap(np.abs(wavef)**2,
        x / WAVELENGTH,
        y / WAVELENGTH,
        cbarlabel="Probability Distribution")
annotate(fig, ax, "Probability distribution at the finish of modulation",
         r"$x/\lambda$", r"$y/\lambda$")

# heatmap(np.abs(np.fft.fftshift(np.fft.fft2(wavef, norm=NORM))) ** 2, x, y)
Beispiel #8
0
# Put it all together and produce the final figure

# In[6]:

variables = ['cconstitutive',  'q', 'p', 'pup']
fig, axesgrid = plt.subplots(nrows=2, ncols=2, figsize=(7, 5.0), sharey=True, sharex=True)
ymin, ymax = 0.09, 20.0
axes = axesgrid.flatten()
boundarykwargs = dict(ylimmax=ymax, ylimmin=ymin, lw=7.5, color='w')
for counter, var in enumerate(variables):
    ax = axes[counter]
    cmap = cm.viridis if var != 'cconstitutive' else cm.viridis_r
    cmap.set_bad('darkmagenta', 1.)
    im, cbar = plotting.heatmap(dft.pivot(index='tauenv', columns='pienv', values=var),
                     imshow=True, zlabel=evolimmune.varname_to_tex[var], cmap=cmap, ax=ax,
                     interpolation='bilinear')
    cbar.outline.set_linewidth(0.0)
    if var == 'cconstitutive':
        analysis.plot_interior_boundary(ax, phases['p'], **boundarykwargs)
        analysis.plot_interior_boundary(ax, phases['a'], **boundarykwargs)
    elif var in ['q', 'p']:
        analysis.plot_interior_boundary(ax, qpos, **boundarykwargs)
        if var == 'p':
            analysis.plot_interior_boundary(ax, phases['c'], **boundarykwargs)
    elif var == 'pup':
        analysis.plot_interior_boundary(ax, puppos, **boundarykwargs)
    ax.set_ylabel('')
    ax.set_xlabel('')
    ax.set_xlim(0.0, 1.0)
    ax.set_ylim(ymin, ymax)
Beispiel #9
0
def collect_entropy_policies(env, epochs, T, MODEL_DIR):

    video_dir = 'videos/' + args.exp_name

    reward_fn = np.zeros(shape=(tuple(base_utils.num_states)))
    online_reward_fn = np.zeros(shape=(tuple(base_utils.num_states)))

    # set initial state to base, motionless state.
    seed = []
    if args.env == "Pendulum-v0":
        env.env.state = [np.pi, 0]
        seed = env.env._get_obs()
    elif args.env == "MountainCarContinuous-v0":
        env.env.state = [-0.50, 0]
        seed = env.env.state

    running_avg_p = np.zeros(shape=(tuple(base_utils.num_states)))
    running_avg_ent = 0
    running_avg_entropies = []
    running_avg_ps = []

    running_avg_p_online = np.zeros(shape=(tuple(base_utils.num_states)))
    running_avg_ent_online = 0
    running_avg_entropies_online = []
    running_avg_ps_online = []

    running_avg_p_baseline = np.zeros(shape=(tuple(base_utils.num_states)))
    running_avg_ent_baseline = 0
    running_avg_entropies_baseline = []
    running_avg_ps_baseline = []

    online_average_ps = []
    
    policies = []
    initial_state = init_state(args.env)

    online_policies = []
    online_initial_state = init_state(args.env)

    for i in range(epochs):

        # Learn policy that maximizes current reward function.
        policy = Policy(env, args.gamma, args.lr, base_utils.obs_dim, base_utils.action_dim)
        online_policy = Policy(env, args.gamma, args.lr, base_utils.obs_dim, base_utils.action_dim) 

        if i == 0:
            policy.learn_policy(reward_fn, 
                episodes=0, 
                train_steps=0)
            online_policy.learn_policy(online_reward_fn, 
                episodes=0, 
                train_steps=0)
        else:
            policy.learn_policy(reward_fn, 
                initial_state=initial_state, 
                episodes=args.episodes, 
                train_steps=args.train_steps)
            online_policy.learn_policy(online_reward_fn, 
                initial_state=online_initial_state, 
                episodes=args.episodes, 
                train_steps=args.train_steps)

        policies.append(policy)
        online_policies.append(online_policy)

        epoch = 'epoch_%02d/' % (i) 
        
        a = 10 # average over this many rounds
        p_baseline = policy.execute_random(T,
            render=args.render, video_dir=video_dir+'/baseline/'+epoch)
       
        round_entropy_baseline = scipy.stats.entropy(p_baseline.flatten())
        for av in range(a - 1):
            next_p_baseline = policy.execute_random(T)
            p_baseline += next_p_baseline
            round_entropy_baseline += scipy.stats.entropy(next_p_baseline.flatten())
        p_baseline /= float(a)
        round_entropy_baseline /= float(a) # running average of the entropy

        # Execute the cumulative average policy thus far.
        # Estimate distribution and entropy.
        average_p, round_avg_ent, initial_state = \
            curiosity.execute_average_policy(env, policies, T, 
                initial_state=initial_state, 
                avg_runs=a, 
                render=False)
        online_average_p, online_round_avg_ent, online_initial_state = \
            curiosity.execute_average_policy(env, online_policies, T, 
                initial_state=online_initial_state, 
                avg_runs=a, 
                render=False)

        # Get next distribution p by executing pi for T steps.
        # ALSO: Collect video of each policy
        p = policy.execute(T, initial_state=initial_state, 
            render=args.render, video_dir=video_dir+'/normal/'+epoch)
        p_online = online_policy.execute(T, initial_state=initial_state, 
            render=args.render, video_dir=video_dir+'/online/'+epoch)
        
        # Force first round to be equal
        if i == 0:
            average_p = p_baseline
            round_avg_ent = round_entropy_baseline
            online_average_p = p_baseline
            online_round_avg_ent = round_entropy_baseline

        # If in pendulum, set velocity to 0 with some probability
        if args.env == "Pendulum-v0" and random.random() < 0.3:
            initial_state[1] = 0

        # goal: try online reward structure
        online_reward_fn = online_rewards(online_average_p, online_average_ps, epochs)
        online_average_ps.append(online_average_p)

        reward_fn = grad_ent(average_p)

        # Update experimental running averages.
        running_avg_ent = running_avg_ent * (i)/float(i+1) + round_avg_ent/float(i+1)
        running_avg_p = running_avg_p * (i)/float(i+1) + average_p/float(i+1)
        running_avg_entropies.append(running_avg_ent)
        running_avg_ps.append(running_avg_p)  

        # Update online running averages.
        running_avg_ent_online = running_avg_ent_online * (i)/float(i+1) + online_round_avg_ent/float(i+1)
        running_avg_p_online = running_avg_p_online * (i)/float(i+1) + online_average_p/float(i+1)
        running_avg_entropies_online.append(running_avg_ent_online)
        running_avg_ps_online.append(running_avg_p_online)     

        # Update baseline running averages.
        running_avg_ent_baseline = running_avg_ent_baseline * (i)/float(i+1) + round_entropy_baseline/float(i+1)
        running_avg_p_baseline = running_avg_p_baseline * (i)/float(i+1) + p_baseline/float(i+1)
        running_avg_entropies_baseline.append(running_avg_ent_baseline)
        running_avg_ps_baseline.append(running_avg_p_baseline) 

        print("--------------------------------")
        print("p=")
        print(p)

        print("average_p =") 
        print(average_p)

        print("online_average_p")
        print(online_average_p)

        print("---------------------")

        print("round_avg_ent[%d] = %f" % (i, round_avg_ent))
        print("running_avg_ent = %s" % running_avg_ent)

        print("..........")

        print("online_round_avg_ent[%d] = %f" % (i, online_round_avg_ent))
        print("running_avg_ent_online = %s" % running_avg_ent_online)

        print("..........")

        print("round_entropy_baseline[%d] = %f" % (i, round_entropy_baseline))
        print("running_avg_ent_baseline = %s" % running_avg_ent_baseline)

        print("--------------------------------")

        plotting.heatmap(running_avg_p, average_p, i, args.env)

    plotting.running_average_entropy(running_avg_entropies, running_avg_entropies_baseline)
    plotting.running_average_entropy3(running_avg_entropies, running_avg_entropies_baseline, running_avg_entropies_online)

    indexes = [1,2,5,10]
    plotting.heatmap4(running_avg_ps, running_avg_ps_baseline, indexes)
    plotting.heatmap3x4(running_avg_ps, running_avg_ps_online, running_avg_ps_baseline, indexes)

    return policies
Beispiel #10
0
def collect_entropy_policies(env, epochs, T, MODEL_DIR):

    reward_fn = np.zeros(shape=(tuple(utils.num_states)))

    # set initial state to base, motionless state.
    seed = []
    if args.env == "Pendulum-v0":
        env.env.state = [np.pi, 0]
        seed = env.env._get_obs()
    elif args.env == "MountainCarContinuous-v0":
        env.env.state = [-0.50, 0]
        seed = env.env.state

    reward_fn[tuple(utils.discretize_state(seed))] = 1

    running_avg_p = np.zeros(shape=(tuple(utils.num_states)))
    running_avg_ent = 0
    window_running_avg_p = np.zeros(shape=(tuple(utils.num_states)))
    window_running_avg_ent = 0

    running_avg_p_baseline = np.zeros(shape=(tuple(utils.num_states)))
    running_avg_ent_baseline = 0
    window_running_avg_p_baseline = np.zeros(shape=(tuple(utils.num_states)))
    window_running_avg_ent_baseline = 0

    baseline_entropies = []
    baseline_ps = []
    entropies = []
    ps = []

    average_entropies = []
    average_ps = []

    running_avg_entropies = []
    running_avg_ps = []

    running_avg_entropies_baseline = []
    running_avg_ps_baseline = []

    window_running_avg_ents = []
    window_running_avg_ps = []
    window_running_avg_ents_baseline = []
    window_running_avg_ps_baseline = []

    policies = []
    initial_state = init_state(args.env)

    for i in range(epochs):

        # Learn policy that maximizes current reward function.
        policy = Policy(env, args.gamma, args.lr, utils.obs_dim,
                        utils.action_dim)
        policy.learn_policy(reward_fn, initial_state, args.episodes,
                            args.train_steps)
        policies.append(policy)

        if args.save_models:
            policy.save(MODEL_DIR + 'model_' + str(i) + '.pt')

        # Get next distribution p by executing pi for T steps.
        p_videos = 'cmp_videos/%sp_%d/' % (MODEL_DIR, i)
        p = policy.execute(T,
                           initial_state,
                           render=args.record,
                           video_dir=p_videos)

        a = 10  # average over this many rounds
        baseline_videos = 'cmp_videos/%sbaseline_%d/' % (
            MODEL_DIR, i)  # note that MODEL_DIR has trailing slash
        entropy_videos = 'cmp_videos/%sentropy_%d/' % (MODEL_DIR, i)
        p_baseline = policy.execute_random(
            T, render=False, video_dir=baseline_videos)  # args.episodes?
        round_entropy_baseline = scipy.stats.entropy(p_baseline.flatten())
        for av in range(a - 1):
            next_p_baseline = policy.execute_random(T)
            p_baseline += next_p_baseline
            # print(scipy.stats.entropy(next_p_baseline.flatten()))
            round_entropy_baseline += scipy.stats.entropy(
                next_p_baseline.flatten())
        p_baseline /= float(a)
        round_entropy_baseline /= float(a)  # running average of the entropy

        # note: the entropy is p_baseline is not the same as the computed avg entropy
        # print("baseline compare:")
        # print(round_entropy_baseline) # running average
        # print(scipy.stats.entropy(p_baseline.flatten())) # entropy of final

        # reward_fn = grad_ent(p)

        round_entropy = scipy.stats.entropy(p.flatten())
        entropies.append(round_entropy)
        baseline_entropies.append(round_entropy_baseline)
        ps.append(p)
        baseline_ps.append(p_baseline)

        # Execute the cumulative average policy thus far.
        # Estimate distribution and entropy.
        average_p, round_avg_ent, initial_state = \
            curiosity.execute_average_policy(env, policies, T, initial_state=initial_state, avg_runs=a, render=False, video_dir=entropy_videos)

        # If in pendulum, set velocity to 0 with some probability
        if args.env == "Pendulum-v0" and random.random() < 0.3:
            initial_state[1] = 0

        reward_fn = grad_ent(average_p)

        print(average_p)
        print("!  --------  !")
        print(reward_fn)

        average_ps.append(average_p)
        average_entropies.append(round_avg_ent)

        # Update running average.
        window = 5
        if (i < window):  # add normally
            window_running_avg_ent = window_running_avg_ent * (
                i) / float(i + 1) + round_avg_ent / float(i + 1)
            window_running_avg_p = window_running_avg_ent * (
                i) / float(i + 1) + average_p / float(i + 1)
            window_running_avg_ent_baseline = window_running_avg_ent_baseline * (
                i) / float(i + 1) + round_entropy_baseline / float(i + 1)
            window_running_avg_p_baseline = window_running_avg_p_baseline * (
                i) / float(i + 1) + p_baseline / float(i + 1)

        else:
            window_running_avg_ent = window_running_avg_ent + round_avg_ent / float(
                window) - average_entropies[i - 5] / float(window)
            window_running_avg_p = window_running_avg_p + average_p / float(
                window) - average_ps[i - 5] / float(window)

            window_running_avg_ent_baseline = window_running_avg_ent_baseline + round_entropy_baseline / float(
                window) - baseline_entropies[i - 5] / float(window)
            window_running_avg_p_baseline = window_running_avg_p_baseline + p_baseline / float(
                window) - baseline_ps[i - 5] / float(window)

        running_avg_ent = running_avg_ent * (
            i) / float(i + 1) + round_avg_ent / float(i + 1)
        running_avg_p = running_avg_p * (
            i) / float(i + 1) + average_p / float(i + 1)
        running_avg_entropies.append(running_avg_ent)
        running_avg_ps.append(running_avg_p)

        # Update baseline running averages.
        running_avg_ent_baseline = running_avg_ent_baseline * (
            i) / float(i + 1) + round_entropy_baseline / float(i + 1)
        running_avg_p_baseline = running_avg_p_baseline * (
            i) / float(i + 1) + p_baseline / float(i + 1)
        running_avg_entropies_baseline.append(running_avg_ent_baseline)
        running_avg_ps_baseline.append(running_avg_p_baseline)

        window_running_avg_ents.append(window_running_avg_ent)
        window_running_avg_ps.append(window_running_avg_p)
        window_running_avg_ents_baseline.append(
            window_running_avg_ent_baseline)
        window_running_avg_ps_baseline.append(window_running_avg_p_baseline)

        print("p=")
        print(p)
        print("..........")
        print("round_entropy = %f" % (round_entropy))

        print("---------------------")

        print("average_p =")
        print(average_p)

        print("..........")

        print("round_avg_ent[%d] = %f" % (i, round_avg_ent))
        print("running_avg_ent = %s" % running_avg_ent)
        print("window_running_avg_ent = %s" % window_running_avg_ent)

        print("..........")

        print("round_entropy_baseline[%d] = %f" % (i, round_entropy_baseline))
        print("running_avg_ent_baseline = %s" % running_avg_ent_baseline)
        print("window_running_avg_ent_baseline = %s" %
              window_running_avg_ent_baseline)
        # print("running_avg_p_baseline =")
        # print(running_avg_p_baseline)

        print("----------------------")

        plotting.heatmap(running_avg_p, average_p, i)

    # plotting.smear_lines(running_avg_ps, running_avg_ps_baseline)
    plotting.running_average_entropy(running_avg_entropies,
                                     running_avg_entropies_baseline)
    plotting.running_average_entropy_window(window_running_avg_ents,
                                            window_running_avg_ents_baseline,
                                            window)
    # plotting.difference_heatmap(running_avg_ps, running_avg_ps_baseline)

    indexes = []
    print('which indexes?')
    for i in range(4):
        idx = input("index :")
        indexes.append(int(idx))
    plotting.heatmap4(running_avg_ps, running_avg_ps_baseline, indexes)

    return policies
Beispiel #11
0
fig, axesgrid = plt.subplots(nrows=2,
                             ncols=2,
                             figsize=(7, 5.0),
                             sharey=True,
                             sharex=True)
ymin, ymax = 0.09, 20.0
axes = axesgrid.flatten()
boundarykwargs = dict(ylimmax=ymax, ylimmin=ymin, lw=7.5, color='w')
for counter, var in enumerate(variables):
    ax = axes[counter]
    cmap = cm.viridis if var != 'cconstitutive' else cm.viridis_r
    cmap.set_bad('darkmagenta', 1.)
    im, cbar = plotting.heatmap(dft.pivot(index='tauenv',
                                          columns='pienv',
                                          values=var),
                                imshow=True,
                                zlabel=evolimmune.varname_to_tex[var],
                                cmap=cmap,
                                ax=ax,
                                interpolation='bilinear')
    cbar.outline.set_linewidth(0.0)
    if var == 'cconstitutive':
        analysis.plot_interior_boundary(ax, phases['p'], **boundarykwargs)
        analysis.plot_interior_boundary(ax, phases['a'], **boundarykwargs)
    elif var in ['q', 'p']:
        analysis.plot_interior_boundary(ax, qpos, **boundarykwargs)
        if var == 'p':
            analysis.plot_interior_boundary(ax, phases['c'], **boundarykwargs)
    elif var == 'pup':
        analysis.plot_interior_boundary(ax, puppos, **boundarykwargs)
    ax.set_ylabel('')
    ax.set_xlabel('')