def __init__(self, des, A, E): '''Initializer''' # Initialize the evolution API self.e = evolution.evolution() # Calculate estimated alpha using ratio of H to E for each row ## Solve for alpha as follows ## Use: r = H/E = alpha/(1-alpha) ## (based on alpha = probability of H, (1-alpha) = probability of E) ## Then: solve for alpha --> alpha = H/E / (1+H/E) H = np.sum(A, axis=0) with np.errstate(divide='ignore', invalid='ignore'): r = H.sum(axis=1) / E.sum(axis=1) r = np.nan_to_num(r) # Remove NaN Just in case self.alpha = r / (1 + r) # Set the row normalized set of A matrices for each action self.A = np.copy(A) # copy to avoid rewriting A due to shallow copy for i in range(0, A.shape[0]): self.A[i] = matop.normalize_rows(self.A[i]) # Set row normalized E matrix self.E = matop.normalize_rows(E) # Set desired states self.des = des
def extract_states(self, file, load_pkl=False, store_pkl=True): '''Extract the inputs needed to maximize output''' # If a pkl file does not exist, then we still need to do some dirty # work and load everything from the log files. # We will also store the pkl version to save time in future runs. if load_pkl is False or os.path.exists(file + ".pkl") is False: # Pre-process data sim = simulator.simulator() # Environment sim.load(file, verbose=False) # Load npz log file time, local_states, fitness = sim.extract() # Pre-process data s = matop.normalize_rows(local_states) # Normalize rows # Save a pkl file with the pre-processed data # so that we can be faster later # if we want to reuse the same logfile if store_pkl: fh.save_pkl([time, s, fitness], file + ".pkl") # If the pkl file exists, we are in luck, we can just # use the processed log files directly. else: data = fh.load_pkl(file + ".pkl") time = data[0] s = data[1] fitness = data[2] # Set dimensions of state vector self.dim = s.shape[1] # Return tuple with data return time, s, fitness
def update_H(self, A, policy): ''' Update the H matrix for the chosen policy Update H matrix Matrix H1 holds the cumulative probability of the transition happening for a current policy. This is the probability of the action being taken times the probability of the state transition caused by the action, and then the sum of that. For example: H[0,0] = P((e00 and a00) or (e00 and a1) or ... or (e00 and aN)) = P(e00|a0)*P(a0) + P(e00|a1)*P(a1) + ... + P(e00|aN)*P(aN)) where e00 is a state transition from state 0 to state 0 and a0... aN are the actions 0 to N In essence H[0,0] = P(e00), given that the actions are independent at the local level. ''' # Ensure policy has the correct dimensions policy = self.reshape_policy(A, policy) ## In this routine, we will iterate over each action (columns of policy) H = np.zeros(A[0].shape) for i, p in enumerate(policy.T): H += A[i] * p[:, np.newaxis] # [:,np.newaxis] makes p vertical # Normalize for multiple actions if A.shape[0] > 1: H = matop.normalize_rows(H) # Return updated H return H
def reshape_policy(self, A, policy): '''Reshape the stochastic policy to the correct dimensions''' # Get the number of columns and the policy as a numpy array cols = A.shape[0] policy = np.array(policy) # Resize policy policy = np.reshape(policy, (policy.size // cols, cols)) # If more than 1 column, normalize rows if cols > 1: policy = matop.normalize_rows(policy) return policy
def evaluate_model_values(f, a=0): # Get all the files filelist = load_filelist(f) # Load a transition model v = [] for j, filename in enumerate(filelist): sim.load(f + filename, verbose=False) if j == 0: m = sim.A[a] else: m += sim.A[a] v.append(matop.normalize_rows(m).flatten()) data = np.array(v).T return data
def save_policy(self, policy, pr_actions=None, name="temp"): '''Save the policy in the correct format for use in Swarmulator''' # Resize policy to correct dimensions and normalize, # else assume it's already correct. if pr_actions is not None: policy = np.reshape(policy,(policy.size//pr_actions,pr_actions)) # Normalize rows if needed if pr_actions > 1: policy = matop.normalize_rows(policy) # Save the policy so it can be used by the simulator policy_filename = "conf/policies/%s.txt"%name policy_file = self.sim.path + "/" + policy_filename # Write in the correct format for reading if policy.shape[1] == 1: fh.save_to_txt(policy.T, policy_file) else: fh.save_to_txt(policy, policy_file) # Return the filename return policy_filename
def main(args): #################################################################### # Initialize # Argument parser parser = argparse.ArgumentParser( description='Simulate a task to gather the data for optimization') parser.add_argument('controller', type=str, help="(str) Controller to use") parser.add_argument('folder', type=str, help="(str) Folder to use") parser.add_argument('-format', type=str, default="pdf", help="(str) Save figure format") parser.add_argument('-plot', action='store_true', help="(bool) Animate flag to true") parser.add_argument('-verbose', action='store_true', help="(bool) Animate flag to true") args = parser.parse_args(args) # Load parameters fitness, controller, agent, pr_states, pr_actions = \ parameters.get(args.controller) #################################################################### #################################################################### # Load optimization files files_train = [f for f in os.listdir(args.folder) \ if f.startswith("optimization") and f.endswith('.npz')] # Unpack last file data = np.load(args.folder + files_train[-1]) H0 = data["H0"].astype(float) H1 = data["H1"].astype(float) # Fix rounding errors H0[H0 < 0.01] = 0.00000 H1[H1 < 0.01] = 0.00000 E = matop.normalize_rows(data["E"]) policy = data["policy"] des = data["des"] alpha = data["alpha"] #################################################################### #################################################################### # if -plot # Plot and display relevant results if args.plot: # Calculate parameters ## Calculate Google matrices G0 = np.diag(alpha).dot(H0) + np.diag(1 - alpha).dot(E) G1 = np.diag(alpha).dot(H1) + np.diag(1 - alpha).dot(E) ## PageRank scores prH0 = matop.pagerank(H0) prE = matop.pagerank(E) pr0 = matop.pagerank(G0) pr1 = matop.pagerank(G1) ## Initialize pagerank optimizer for evaluation ## Using dummy inputs, since init not needed p = propt.pagerank_evolve(des, np.array([H0, H1]), E) ## Get original fitness and new fitness f0 = p.pagerank_fitness(pr0, des) f1 = p.pagerank_fitness(pr1, des) # Make a folder to store the figures folder = "figures/pagerank" if not os.path.exists(os.path.dirname(folder)): os.makedirs(os.path.dirname(folder)) #Now let's plot some figures import math xint = range(0, math.ceil(pr1[0].size), 2) # Figure: Plot pagerank H and E plt = pp.setup() plt.bar(np.array(range(prH0[0].size)), prH0[0], alpha=0.5, label="$PR^\pi$, $\mathbf{H^\pi}$ only") plt.bar(np.array(range(prE[0].size)), prE[0], alpha=0.5, label="$PR^\pi$, $\mathbf{E}$ only") plt = pp.adjust(plt) plt.xlabel("State") plt.ylabel("PageRank [-]") matplotlib.pyplot.xticks(xint) plt.legend() plt.savefig("%s/pagerank_original_%s.%s" \ %(folder,controller,args.format)) plt.close() # Figure: Diff plot of pagerank values plt = pp.setup() c = ["blue", "green"] color_list = list(map(lambda x: c[1] if x > 0.01 else c[0], des)) if controller == "forage": plt.bar(range(pr1[0].size), (pr1[0] - pr0[0]) * 1000, label="$PR^\pi-PR^{\pi^\star}$", color=color_list) plt.ylabel("$\Delta$ PageRank (" r"$\times$" r"1000) [-]") else: plt.bar(range(pr1[0].size), (pr1[0] - pr0[0]), label="$PR^\pi-PR^{\pi^\star}$", color=color_list) plt.ylabel("$\Delta$ PageRank [-]") plt = pp.adjust(plt) plt.xlabel("State [-]") matplotlib.pyplot.xticks(xint) # Custom legend custom_lines = [ matplotlib.lines.Line2D([0], [0], color="blue", lw=20), matplotlib.lines.Line2D([0], [0], color="green", lw=20) ] plt.legend(custom_lines, ['Transitional', 'Desired']) plt.savefig("%s/pagerank_diff_%s.%s" % (folder, controller, args.format)) plt.close() return #################################################################### #################################################################### # if -verbose # Display relevant results to terminal if args.verbose: print("\n------- MODEL -------\n") print("\nH0 matrix:\n", H0) print("\nH1 matrix:\n", H1) print("\nE matrix:\n", E) print("\nalpha vector:\n", alpha) print("\n------- POLICY -------\n", policy) # print("\n------- STATS -------\n") # print("Original fitness =", f0[0]) # print("New fitness =", f1[0]) # Check conditions on last file e = 0.00000001 H0[H0 > e] = 1 H1[H1 > e] = 1 E[E > e] = 1 H0 = H0.astype(int) H1 = H1.astype(int) E = E.astype(int) c = verification.verification(H0, H1, E, policy, des) c.verify()
sim.make(controller, agent, clean=True, animation=False, logger=False, verbose=False) # Run it f = [] for j in range(args.iterations): print("----------------------- %i ----------------------" % j) # Generate a random policy policy = np.random.rand(pr_states, pr_actions) policy = np.reshape(policy, (policy.size // pr_actions, pr_actions)) # Resize pol if pr_actions > 1: policy = matop.normalize_rows(policy) # Normalize rows # Benchmark its performance f.append( sim.benchmark(controller, agent, policy, fitness, robots=args.n, runs=args.runs, time_limit=args.t, make=False)) fh.save_pkl( f, "data/%s/benchmark_random_%s_t%i_r%i_runs%i.pkl" % (controller, controller, args.t, args.n, args.runs))