예제 #1
0
    def __init__(self, des, A, E):
        '''Initializer'''

        # Initialize the evolution API
        self.e = evolution.evolution()

        # Calculate estimated alpha using ratio of H to E for each row
        ## Solve for alpha as follows
        ## Use: r = H/E = alpha/(1-alpha)
        ## (based on alpha = probability of H, (1-alpha) = probability of E)
        ## Then: solve for alpha --> alpha = H/E / (1+H/E)
        H = np.sum(A, axis=0)
        with np.errstate(divide='ignore', invalid='ignore'):
            r = H.sum(axis=1) / E.sum(axis=1)
        r = np.nan_to_num(r)  # Remove NaN Just in case
        self.alpha = r / (1 + r)

        # Set the row normalized set of A matrices for each action
        self.A = np.copy(A)  # copy to avoid rewriting A due to shallow copy
        for i in range(0, A.shape[0]):
            self.A[i] = matop.normalize_rows(self.A[i])

        # Set row normalized E matrix
        self.E = matop.normalize_rows(E)

        # Set desired states
        self.des = des
    def extract_states(self, file, load_pkl=False, store_pkl=True):
        '''Extract the inputs needed to maximize output'''

        # If a pkl file does not exist, then we still need to do some dirty
        # work and load everything from the log files.
        # We will also store the pkl version to save time in future runs.
        if load_pkl is False or os.path.exists(file + ".pkl") is False:
            # Pre-process data
            sim = simulator.simulator()  # Environment
            sim.load(file, verbose=False)  # Load npz log file
            time, local_states, fitness = sim.extract()  # Pre-process data
            s = matop.normalize_rows(local_states)  # Normalize rows

            # Save a pkl file with the pre-processed data
            # so that we can be faster later
            # if we want to reuse the same logfile
            if store_pkl:
                fh.save_pkl([time, s, fitness], file + ".pkl")

        # If the pkl file exists, we are in luck, we can just
        # use the processed log files directly.
        else:
            data = fh.load_pkl(file + ".pkl")
            time = data[0]
            s = data[1]
            fitness = data[2]

        # Set dimensions of state vector
        self.dim = s.shape[1]

        # Return tuple with data
        return time, s, fitness
예제 #3
0
    def update_H(self, A, policy):
        '''
		Update the H matrix for the chosen policy
		
		Update H matrix
		Matrix H1 holds the cumulative probability of the transition 
		happening for a current policy.
		This is the probability of the action being taken times the 
		probability of the state transition caused by the action, 
		and then the sum of that.
		For example:
		H[0,0] = P((e00 and a00) or (e00 and a1) or ... or (e00 and aN))
				= P(e00|a0)*P(a0) +  P(e00|a1)*P(a1) + ... + P(e00|aN)*P(aN))
		where e00 is a state transition from state 0 to state 0
		and a0... aN are the actions 0 to N
		In essence H[0,0] = P(e00), given that the actions 
		are independent at the local level.
		'''
        # Ensure policy has the correct dimensions
        policy = self.reshape_policy(A, policy)

        ## In this routine, we will iterate over each action (columns of policy)
        H = np.zeros(A[0].shape)
        for i, p in enumerate(policy.T):
            H += A[i] * p[:, np.newaxis]  # [:,np.newaxis] makes p vertical

        # Normalize for multiple actions
        if A.shape[0] > 1:
            H = matop.normalize_rows(H)

        # Return updated H
        return H
예제 #4
0
    def reshape_policy(self, A, policy):
        '''Reshape the stochastic policy to the correct dimensions'''

        # Get the number of columns and the policy as a numpy array
        cols = A.shape[0]
        policy = np.array(policy)

        # Resize policy
        policy = np.reshape(policy, (policy.size // cols, cols))

        # If more than 1 column, normalize rows
        if cols > 1:
            policy = matop.normalize_rows(policy)

        return policy
예제 #5
0
def evaluate_model_values(f, a=0):
    # Get all the files
    filelist = load_filelist(f)

    # Load a transition model
    v = []
    for j, filename in enumerate(filelist):
        sim.load(f + filename, verbose=False)
        if j == 0:
            m = sim.A[a]
        else:
            m += sim.A[a]
        v.append(matop.normalize_rows(m).flatten())

    data = np.array(v).T

    return data
예제 #6
0
	def save_policy(self, policy, pr_actions=None, name="temp"):
		'''Save the policy in the correct format for use in Swarmulator'''

		# Resize policy to correct dimensions and normalize,
		# else assume it's already correct.
		if pr_actions is not None:
			policy = np.reshape(policy,(policy.size//pr_actions,pr_actions))
			# Normalize rows if needed
			if pr_actions > 1:
				policy = matop.normalize_rows(policy)

		# Save the policy so it can be used by the simulator
		policy_filename = "conf/policies/%s.txt"%name
		policy_file = self.sim.path + "/" + policy_filename
		
		# Write in the correct format for reading
		if policy.shape[1] == 1:
			fh.save_to_txt(policy.T, policy_file)
		else:
			fh.save_to_txt(policy, policy_file)

		# Return the filename
		return policy_filename
def main(args):
    ####################################################################
    # Initialize

    # Argument parser
    parser = argparse.ArgumentParser(
        description='Simulate a task to gather the data for optimization')
    parser.add_argument('controller', type=str, help="(str) Controller to use")
    parser.add_argument('folder', type=str, help="(str) Folder to use")
    parser.add_argument('-format',
                        type=str,
                        default="pdf",
                        help="(str) Save figure format")
    parser.add_argument('-plot',
                        action='store_true',
                        help="(bool) Animate flag to true")
    parser.add_argument('-verbose',
                        action='store_true',
                        help="(bool) Animate flag to true")
    args = parser.parse_args(args)

    # Load parameters
    fitness, controller, agent, pr_states, pr_actions = \
     parameters.get(args.controller)
    ####################################################################

    ####################################################################
    # Load optimization files
    files_train = [f for f in os.listdir(args.folder) \
     if f.startswith("optimization") and f.endswith('.npz')]

    # Unpack last file
    data = np.load(args.folder + files_train[-1])
    H0 = data["H0"].astype(float)
    H1 = data["H1"].astype(float)
    # Fix rounding errors
    H0[H0 < 0.01] = 0.00000
    H1[H1 < 0.01] = 0.00000
    E = matop.normalize_rows(data["E"])
    policy = data["policy"]
    des = data["des"]
    alpha = data["alpha"]
    ####################################################################

    ####################################################################
    # if -plot
    # Plot and display relevant results

    if args.plot:

        # Calculate parameters
        ## Calculate Google matrices
        G0 = np.diag(alpha).dot(H0) + np.diag(1 - alpha).dot(E)
        G1 = np.diag(alpha).dot(H1) + np.diag(1 - alpha).dot(E)

        ## PageRank scores
        prH0 = matop.pagerank(H0)
        prE = matop.pagerank(E)
        pr0 = matop.pagerank(G0)
        pr1 = matop.pagerank(G1)

        ## Initialize pagerank optimizer for evaluation
        ## Using dummy inputs, since init not needed
        p = propt.pagerank_evolve(des, np.array([H0, H1]), E)

        ## Get original fitness and new fitness
        f0 = p.pagerank_fitness(pr0, des)
        f1 = p.pagerank_fitness(pr1, des)

        # Make a folder to store the figures
        folder = "figures/pagerank"
        if not os.path.exists(os.path.dirname(folder)):
            os.makedirs(os.path.dirname(folder))

        #Now let's plot some figures
        import math
        xint = range(0, math.ceil(pr1[0].size), 2)

        # Figure: Plot pagerank H and E
        plt = pp.setup()
        plt.bar(np.array(range(prH0[0].size)),
                prH0[0],
                alpha=0.5,
                label="$PR^\pi$, $\mathbf{H^\pi}$ only")
        plt.bar(np.array(range(prE[0].size)),
                prE[0],
                alpha=0.5,
                label="$PR^\pi$, $\mathbf{E}$ only")
        plt = pp.adjust(plt)
        plt.xlabel("State")
        plt.ylabel("PageRank [-]")
        matplotlib.pyplot.xticks(xint)
        plt.legend()
        plt.savefig("%s/pagerank_original_%s.%s" \
         %(folder,controller,args.format))
        plt.close()

        # Figure: Diff plot of pagerank values
        plt = pp.setup()
        c = ["blue", "green"]
        color_list = list(map(lambda x: c[1] if x > 0.01 else c[0], des))
        if controller == "forage":
            plt.bar(range(pr1[0].size), (pr1[0] - pr0[0]) * 1000,
                    label="$PR^\pi-PR^{\pi^\star}$",
                    color=color_list)
            plt.ylabel("$\Delta$ PageRank (" r"$\times$" r"1000) [-]")
        else:
            plt.bar(range(pr1[0].size), (pr1[0] - pr0[0]),
                    label="$PR^\pi-PR^{\pi^\star}$",
                    color=color_list)
            plt.ylabel("$\Delta$ PageRank [-]")
        plt = pp.adjust(plt)
        plt.xlabel("State [-]")
        matplotlib.pyplot.xticks(xint)

        # Custom legend
        custom_lines = [
            matplotlib.lines.Line2D([0], [0], color="blue", lw=20),
            matplotlib.lines.Line2D([0], [0], color="green", lw=20)
        ]
        plt.legend(custom_lines, ['Transitional', 'Desired'])
        plt.savefig("%s/pagerank_diff_%s.%s" %
                    (folder, controller, args.format))
        plt.close()
        return
    ####################################################################

    ####################################################################
    # if -verbose
    # Display relevant results to terminal
    if args.verbose:
        print("\n------- MODEL -------\n")
        print("\nH0 matrix:\n", H0)
        print("\nH1 matrix:\n", H1)
        print("\nE matrix:\n", E)
        print("\nalpha vector:\n", alpha)
        print("\n------- POLICY -------\n", policy)
        # print("\n------- STATS -------\n")
        # print("Original fitness =", f0[0])
        # print("New fitness =", f1[0])

    # Check conditions on last file
    e = 0.00000001
    H0[H0 > e] = 1
    H1[H1 > e] = 1
    E[E > e] = 1
    H0 = H0.astype(int)
    H1 = H1.astype(int)
    E = E.astype(int)
    c = verification.verification(H0, H1, E, policy, des)
    c.verify()
예제 #8
0
sim.make(controller,
         agent,
         clean=True,
         animation=False,
         logger=False,
         verbose=False)

# Run it
f = []
for j in range(args.iterations):
    print("----------------------- %i ----------------------" % j)
    # Generate a random policy
    policy = np.random.rand(pr_states, pr_actions)
    policy = np.reshape(policy,
                        (policy.size // pr_actions, pr_actions))  # Resize pol
    if pr_actions > 1: policy = matop.normalize_rows(policy)  # Normalize rows

    # Benchmark its performance
    f.append(
        sim.benchmark(controller,
                      agent,
                      policy,
                      fitness,
                      robots=args.n,
                      runs=args.runs,
                      time_limit=args.t,
                      make=False))

fh.save_pkl(
    f, "data/%s/benchmark_random_%s_t%i_r%i_runs%i.pkl" %
    (controller, controller, args.t, args.n, args.runs))