Beispiel #1
0
def main():

	# random seed
	np.random.seed(88)

	# init
	util.get_x0()
	util.get_xd()

	# preallocate
	X = np.zeros( (param.get('n'), param.get('nt')))

	# go sim 
	param['ta_on'] = False	
	x_curr = param.get('x0')
	for k,t in enumerate( param.get('T')):

		u_curr = controller.get_dynamics_u(x_curr,t)
		x_next = x_curr + dynamics.get_dxdt( x_curr, u_curr, t) * param.get('dt')		
		X[:,k] = np.squeeze(x_curr)

		x_curr = x_next
		print('t/T: ' + str(t/param.get('T')[-1]))

	plotter.plot_SS( X, param.get('T'), title = 'No Task Assignment')

	# go sim 
	param['ta_on'] = True
	x_curr = param.get('x0')
	for k,t in enumerate( param.get('T')):

		u_curr = controller.get_dynamics_u(x_curr,t)
		x_next = x_curr + dynamics.get_dxdt( x_curr, u_curr, t) * param.get('dt')		
		X[:,k] = np.squeeze(x_curr)

		x_curr = x_next
		print('t/T: ' + str(t/param.get('T')[-1]))

	plotter.plot_SS( X, param.get('T'), title = 'With Task Assignment')

	# plotter.plot_ss( X[:,-1], param.get('T')[-1])
	plotter.save_figs()
Beispiel #2
0
def fn4():

    pdf_path = os.path.join(os.getcwd(), param.get('fn_plots'))

    # remove if exists
    if os.path.exists(pdf_path):
        os.remove(pdf_path)

    np.random.seed(88)

    util.get_x0()
    util.get_xd()

    X = []
    U = []

    param['controller'] = 'mpc'
    x_curr = param.get('x0')
    for t in param.get('T'):
        try:
            u_curr = controller.get_u(x_curr, t)
            x_next = x_curr + dynamics.get_dxdt(x_curr, u_curr,
                                                t) * param.get('dt')

            x_curr = x_next
            print('t/T = ' + str(t) + '/' + str(param.get('T')[-1]))
        except:
            pass

        X.append(x_curr)
        U.append(u_curr)

    X = np.squeeze(np.asarray(X))
    plotter.plot_SS(X, param.get('T'))
    plotter.save_figs()
    subprocess.call(["xdg-open", pdf_path])
Beispiel #3
0
def main():

    np.random.seed(88)

    util.get_x0()
    util.get_xd()
    dynamics.compute_jacobians()

    C = []
    print('Sim...')
    for u_type in param.get('controllers'):

        param['controller'] = u_type

        print('Controller: ' + str(u_type))

        X = []
        U = []
        V = []
        pbar = []
        vbar = []
        abar = []
        x_curr = param.get('x0')
        count = 0
        for k, t in enumerate(param.get('T')):

            # try:

            if param.get('controller') is 'scp':
                if count == 0:
                    U_scp = controller.get_u(x_curr, t)
                try:
                    u_curr = U_scp[count]
                except:
                    u_curr = np.zeros((param.get('m'), 1))

            elif param.get('controller') is 'mpc':
                if np.mod(count, param.get('mpc_update')) == 0:
                    count = 0
                    U_mpc = controller.get_u(x_curr, t)
                try:
                    u_curr = U_mpc[count]
                except:
                    u_curr = np.zeros((param.get('m'), 1))

            else:
                u_curr = controller.get_u(x_curr, t)

            # except:
            # 	break

            x_next = x_curr + dynamics.get_dxdt(x_curr, u_curr,
                                                t) * param.get('dt')

            X.append(x_curr)
            U.append(u_curr)
            V.append(dynamics.get_V(x_curr, t))

            # for error plot
            pbar.append( np.dot( \
             np.transpose( util.get_my_1()), np.dot( util.get_p_a(), x_curr)))
            vbar.append( np.dot( \
             np.transpose( util.get_my_1()), np.dot( util.get_v_a(), x_curr)))
            abar.append( np.dot( \
             np.transpose( util.get_my_1()), dynamics.get_vdot_a(x_curr)))

            x_curr = x_next
            count += 1
            print('\t' + str(t) + '/' + str(param.get('T')[-1]))

            # if k > 2:
            # 	break

        X = np.squeeze(np.asarray(X))
        U = np.squeeze(np.asarray(U))
        V = np.asarray(V)
        pbar = np.asarray(pbar)
        vbar = np.asarray(vbar)
        abar = np.asarray(abar)

        C.append(controller.calc_cost(U))

        print('Plots...')
        plotter.plot_SS(X, param.get('T'), title=str(u_type) + ' State Space')
        plotter.plot_V(V, param.get('T'), title=str(u_type) + ' Lyapunov')
        plotter.plot_U(U, param.get('T'), title=str(u_type) + ' Controller')
        plotter.plot_test2( \
         np.squeeze(pbar) - np.squeeze(param.get('pd')), \
         np.squeeze(vbar) - np.squeeze(param.get('vd')), \
         np.squeeze(abar) - np.squeeze(param.get('ad')), \
         param.get('T'), title = str(u_type) + ' Errors')
        print('Plots Complete')

    # plotter.plot_cost( C)
    plotter.save_figs()

    if param.get('gif_on'):
        print('Gif...')
        plotter.make_gif(X, param.get('T'))
        print('Gif Complete')
		# # plotting
		if True:
			env.render()

			for i_config in range(1): #range(env.state_dim_per_agent):
				fig,ax = plotter.make_fig()
				# ax.set_title(env.states_name[i_config])			
				for agent in env.agents:
					if env.good_nodes[agent.i]:
						color = 'blue'
					else:
						color = 'red'
					ax.plot(
						times[0:steps],
						states[0:steps,env.agent_idx_to_state_idx(agent.i)+i_config],
						color=color)
				ax.axhline(
					env.desired_ave,
					label='desired',
					color='green')
		
		print('Number of Trajectories:', trajectory_rollout_count)
		print('Data Shape: ', data.shape)

	file = os.path.join(param.il_load_dataset,'rl_data.npy')
	np.save(file, data)

	plotter.save_figs(param.plots_fn)
	plotter.open_figs(param.plots_fn)
def main():

    param = Param()
    env = CityMap(param)

    if param.make_dataset_on:
        print('   making dataset...')
        train_dataset, test_dataset = datahandler.make_dataset(env)
        datahandler.write_dataset(env, train_dataset, test_dataset)

    datahandler.load_dataset(env)

    train_dataset = env.train_dataset
    test_dataset = env.test_dataset

    train_w = env.get_customer_demand(train_dataset, train_dataset[-1, 0])
    test_w = env.get_customer_demand(test_dataset, test_dataset[-1, 0])

    print(train_dataset[0, :])
    print(
        np.linalg.norm([
            train_dataset[0, 4] - train_dataset[0, 2],
            train_dataset[0, 5] - train_dataset[0, 3]
        ]))
    print(test_dataset[0, :])
    print(
        np.linalg.norm([
            test_dataset[0, 4] - test_dataset[0, 2],
            test_dataset[0, 5] - test_dataset[0, 3]
        ]))

    # dataset = [\
    # starttime [s],
    # duration [s],
    # x_p (longitude) [degrees],
    # y_p (latitute) [degrees],
    # x_d (longitude) [degrees],
    # x_d (latitute) [degrees],
    #]

    print('train_dataset.shape', train_dataset.shape)
    print('test_dataset.shape', test_dataset.shape)

    print('train ave_length: ', ave_length(train_dataset))
    print('test ave_length: ', ave_length(test_dataset))

    print('train ave_duration: ', ave_duration(train_dataset))
    print('test ave_duration: ', ave_duration(test_dataset))

    print('train ave_customer_per_time: ',
          ave_customer_per_time(train_dataset))
    print('test ave_customer_per_time: ', ave_customer_per_time(test_dataset))

    print('train ave taxi speed: ', ave_taxi_speed(train_dataset))
    print('test ave taxi speed: ', ave_taxi_speed(test_dataset))

    print('train_dataset[0:1,:]:', train_dataset[0:1, :])
    print('test_dataset[0:1,:]:', test_dataset[0:1, :])

    fig, ax = plt.subplots()
    ax.plot(train_dataset[:, 0])
    ax.set_ylabel('pickup time')
    ax.set_xlabel('customer request #')

    fig, ax = plt.subplots()
    ax.plot(test_dataset[:, 0])
    ax.set_ylabel('pickup time')
    ax.set_xlabel('customer request #')

    fig, ax = plt.subplots()
    ax.plot(train_dataset[:, 1])
    ax.set_ylabel('train duration')
    ax.set_xlabel('customer request #')

    fig, ax = plt.subplots()
    ax.plot(test_dataset[:, 1])
    ax.set_ylabel('test duration')
    ax.set_xlabel('customer request #')

    fig, ax = plt.subplots()
    ax.plot(
        np.linalg.norm([
            train_dataset[:, 4] - train_dataset[:, 2],
            train_dataset[:, 5] - train_dataset[:, 3]
        ],
                       axis=0))
    ax.set_ylabel('train length')
    ax.set_xlabel('customer request #')

    fig, ax = plt.subplots()
    ax.plot(
        np.linalg.norm([
            test_dataset[:, 4] - test_dataset[:, 2],
            test_dataset[:, 5] - test_dataset[:, 3]
        ],
                       axis=0))
    ax.set_ylabel('test length')
    ax.set_xlabel('customer request #')

    print('saving and opening figs...')
    plotter.save_figs('dataset_plots.pdf')
    plotter.open_figs('dataset_plots.pdf')
Beispiel #6
0
def sim(param, env, controllers, initial_state, visualize):
    def run_sim(controller, initial_state):
        states = np.zeros((len(times), env.n))
        actions = np.zeros((len(times) - 1, env.m))
        states[0] = env.reset(initial_state)
        reward = 0
        for step, time in enumerate(times[:-1]):
            state = states[step]
            if param.pomdp_on:
                observation = env.observe()
            else:
                observation = state

            action = controller.policy(observation)
            states[step + 1], r, done, _ = env.step(action)
            reward += r
            actions[step] = action.flatten()
            if done:
                break

        print('reward: ', reward)
        env.close()
        return states, actions, step

    # -------------------------------------------

    # environment
    times = param.sim_times
    device = "cpu"

    if initial_state is None:
        initial_state = env.reset()

    print("Initial State: ", initial_state)

    # run sim
    SimResult = namedtuple('SimResult', ['states', 'actions', 'steps', 'name'])

    for name, controller in controllers.items():
        print("Running simulation with " + name)
        if hasattr(controller, 'policy'):
            result = SimResult._make(
                run_sim(controller, initial_state) + (name, ))
        else:
            result = SimResult._make((controller.states, controller.actions,
                                      controller.steps, name))
        sim_results = []
        sim_results.append(result)

        if param.sim_render_on:
            env.render()

        # plot time varying states
        if param.single_agent_sim:
            for i in range(env.n):
                fig, ax = plotter.subplots()
                ax.set_title(env.states_name[i])
                for result in sim_results:
                    ax.plot(times[0:result.steps],
                            result.states[0:result.steps, i],
                            label=result.name)
                ax.legend()

            for i, name in enumerate(env.deduced_state_names):
                fig, ax = plotter.subplots()
                ax.set_title(name)
                for result in sim_results:
                    deduce_states = np.empty(
                        (result.steps, len(env.deduced_state_names)))
                    for j in range(result.steps):
                        deduce_states[j] = env.deduce_state(result.states[j])

                    ax.plot(times[0:result.steps],
                            deduce_states[:, i],
                            label=result.name)
                ax.legend()

            for i in range(env.m):
                fig, ax = plotter.subplots()
                ax.set_title(env.actions_name[i])
                for result in sim_results:
                    ax.plot(times[0:result.steps],
                            result.actions[0:result.steps, i],
                            label=result.name)
                ax.legend()

        elif param.multi_agent_sim:
            for i_config in range(1):  #range(env.state_dim_per_agent):
                fig, ax = plotter.make_fig()
                ax.set_title(env.states_name[i_config])
                for agent in env.agents:
                    if env.good_nodes[agent.i]:
                        color = 'blue'
                    else:
                        color = 'red'
                    for result in sim_results:
                        ax.plot(
                            times[0:result.steps],
                            result.states[0:result.steps,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          i_config],
                            label=result.name,
                            color=color)
                ax.axhline(env.desired_ave, label='desired', color='green')

        # # plot state space
        # if param.multi_agent_sim:
        # 	fig,ax = plotter.make_fig()
        # 	for agent in env.agents:
        # 		ax.set_title('State Space')
        # 		for result in sim_results:
        # 			ax.plot(
        # 				result.states[0:result.steps,env.agent_idx_to_state_idx(agent.i)],
        # 				result.states[0:result.steps,env.agent_idx_to_state_idx(agent.i)+1],
        # 				label=result.name)

        # extract gains
        if param.il_controller_class in ['PID_wRef', 'PID']:
            controller = controllers['IL']
            for result in sim_results:
                if result.name == 'IL':
                    break
            kp, kd = util.extract_gains(controller,
                                        result.states[0:result.steps])
            fig, ax = plotter.plot(times[1:result.steps],
                                   kp[0:result.steps, 0],
                                   title='Kp pos')
            fig, ax = plotter.plot(times[1:result.steps],
                                   kp[0:result.steps, 1],
                                   title='Kp theta')
            fig, ax = plotter.plot(times[1:result.steps],
                                   kd[0:result.steps, 0],
                                   title='Kd pos')
            fig, ax = plotter.plot(times[1:result.steps],
                                   kd[0:result.steps, 1],
                                   title='Kd theta')

        # extract reference trajectory
        if param.il_controller_class in ['PID_wRef', 'Ref']:
            controller = controllers['IL']
            for result in sim_results:
                if result.name == 'IL':
                    break
            ref_state = util.extract_ref_state(controller, result.states)
            for i in range(env.n):
                fig, ax = plotter.plot(times[1:result.steps + 1],
                                       ref_state[0:result.steps, i],
                                       title="ref " + env.states_name[i])

    plotter.save_figs(param.plots_fn)
    plotter.open_figs(param.plots_fn)

    # visualize
    if visualize:
        # plotter.visualize(param, env, states_deeprl)
        env.visualize(sim_results[0].states[0:result.steps], 0.1)
Beispiel #7
0
def main():

	param = Param()

	param.desired_env_ncell = 2000
	param.xmin_thresh = None
	param.xmax_thresh = None
	param.ymin_thresh = None
	param.ymax_thresh = None

	param.update()

	env = CityMap(param)

	# init datasets
	if param.make_dataset_on:
		print('   making dataset...')
		train_dataset, test_dataset = datahandler.make_dataset(env)
		datahandler.write_dataset(env, train_dataset, test_dataset)
	print('   loading dataset...')
	datahandler.load_dataset(env)

	# env.init_map()

	# fig,ax=plotter.make_fig()
	# env.print_map(fig=fig,ax=ax)

	train_heatmap = env.heatmap(env.train_dataset)
	test_heatmap = env.heatmap(env.test_dataset)

	fig = plt.figure()
	gs = fig.add_gridspec(2,3)
	ax1 = fig.add_subplot(gs[0, 0])
	ax2 = fig.add_subplot(gs[0, 1])
	ax3 = fig.add_subplot(gs[0, 2])
	ax4 = fig.add_subplot(gs[1, :])

	plotter.plot_heatmap(env,train_heatmap,ax1)
	plotter.plot_heatmap(env,train_heatmap,ax2)
	plotter.plot_heatmap(env,test_heatmap,ax3)
	plotter.plot_cell_demand_over_time(env, ax4)
	
	ax2.set_ylim(bottom=41.85)
	ax2.set_ylim(top=42.)
	ax2.set_xlim(left=-87.7) 
	ax2.set_xlim(right=-87.6)

	ax3.set_ylim(bottom=41.85)
	ax3.set_ylim(top=42.)
	ax3.set_xlim(left=-87.7)
	ax3.set_xlim(right=-87.6)

	ax1.axis("off") 
	ax2.axis("off")
	ax3.axis("off")

	ax1.set_title('Chicago',fontsize=10)
	ax2.set_title('October 29, 2016',fontsize=7.5)
	ax3.set_title('October 30, 2016',fontsize=7.5)

	# ax1.get_yaxis().set_visible(False)
	# ax2.get_xaxis().set_visible(False)
	# ax2.get_yaxis().set_visible(False)


	print('saving and opening figs...')
	plotter.save_figs(param.plot_fn)
	plotter.open_figs(param.plot_fn)
Beispiel #8
0
def sim(param, env, controllers, initial_state, visualize):

    # environment
    times = param.sim_times
    device = "cpu"

    if initial_state is None:
        initial_state = env.reset()

    # run sim
    SimResult = namedtuple(
        'SimResult', ['states', 'observations', 'actions', 'steps', 'name'])

    for name, controller in controllers.items():
        print("Running simulation with " + name)
        print("Initial State: ", initial_state)
        if hasattr(controller, 'policy'):
            result = SimResult._make(
                run_sim(param, env, controller, initial_state) + (name, ))
        else:
            observations = []
            result = SimResult._make(
                (controller.states, observations, controller.actions,
                 controller.steps, name))
        sim_results = []
        sim_results.append(result)

        # plot state space
        if param.env_name in [
                'SingleIntegrator', 'SingleIntegratorVelSensing',
                'DoubleIntegrator'
        ]:
            fig, ax = plotter.make_fig()
            ax.set_title('State Space')
            ax.set_aspect('equal')

            for o in env.obstacles:
                ax.add_patch(
                    Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5))

            for agent in env.agents:

                line = ax.plot(
                    result.states[0:result.steps,
                                  env.agent_idx_to_state_idx(agent.i)],
                    result.states[0:result.steps,
                                  env.agent_idx_to_state_idx(agent.i) + 1],
                    alpha=0.5)
                color = line[0].get_color()

                # plot velocity vectors:
                X = []
                Y = []
                U = []
                V = []
                for k in np.arange(0, result.steps, 100):
                    X.append(
                        result.states[k,
                                      env.agent_idx_to_state_idx(agent.i)])
                    Y.append(
                        result.states[k,
                                      env.agent_idx_to_state_idx(agent.i) + 1])
                    if param.env_name in [
                            'SingleIntegrator', 'SingleIntegratorVelSensing'
                    ]:
                        # Singleintegrator: plot actions
                        U.append(result.actions[k, 2 * agent.i + 0])
                        V.append(result.actions[k, 2 * agent.i + 1])
                    elif param.env_name in ['DoubleIntegrator']:
                        # doubleintegrator: plot velocities
                        U.append(
                            result.states[k,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          2])
                        V.append(
                            result.states[k,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          3])

                ax.quiver(X,
                          Y,
                          U,
                          V,
                          angles='xy',
                          scale_units='xy',
                          scale=0.5,
                          color=color,
                          width=0.005)

                # num_obstacles = (result.observations.shape[1] - result.observations[0][0] - 5) / 2
                # print(num_obstacles)
                #
                # print(result.observations)

                plotter.plot_circle(
                    result.states[1, env.agent_idx_to_state_idx(agent.i)],
                    result.states[1,
                                  env.agent_idx_to_state_idx(agent.i) + 1],
                    param.r_agent,
                    fig=fig,
                    ax=ax,
                    color=color)
                # plotter.plot_square(result.states[-1,env.agent_idx_to_state_idx(agent.i)],
                # 	result.states[-1,env.agent_idx_to_state_idx(agent.i)+1],param.r_agent,fig=fig,ax=ax,color=color)
                plotter.plot_square(agent.s_g[0],
                                    agent.s_g[1],
                                    param.r_agent,
                                    angle=45,
                                    fig=fig,
                                    ax=ax,
                                    color=color)

            # draw state for each time step
            robot = 1
            if param.env_name in [
                    'SingleIntegrator', 'SingleIntegratorVelSensing'
            ]:
                for step in np.arange(0, result.steps, 1000):
                    fig, ax = plotter.make_fig()
                    ax.set_title('State at t={} for robot={}'.format(
                        times[step], robot))
                    ax.set_aspect('equal')

                    # plot all obstacles
                    for o in env.obstacles:
                        ax.add_patch(
                            Rectangle(o, 1.0, 1.0, facecolor='gray',
                                      alpha=0.5))

                    # plot overall trajectory
                    line = ax.plot(
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        "--")
                    color = line[0].get_color()

                    # plot current position
                    plotter.plot_circle(
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        param.r_agent,
                        fig=fig,
                        ax=ax,
                        color=color)

                    # plot current observation
                    observation = result.observations[step][robot][0]
                    num_neighbors = int(observation[0])
                    num_obstacles = int(
                        (observation.shape[0] - 3 - 2 * num_neighbors) / 2)

                    robot_pos = result.states[
                        step,
                        env.agent_idx_to_state_idx(robot):env.
                        agent_idx_to_state_idx(robot) + 2]

                    idx = 3
                    for i in range(num_neighbors):
                        pos = observation[idx:idx + 2] + robot_pos
                        ax.add_patch(
                            Circle(pos,
                                   0.25,
                                   facecolor='gray',
                                   edgecolor='red',
                                   alpha=0.5))
                        idx += 2

                    for i in range(num_obstacles):
                        # pos = observation[idx : idx+2] + robot_pos - np.array([0.5,0.5])
                        # ax.add_patch(Rectangle(pos, 1.0, 1.0, facecolor='gray', edgecolor='red', alpha=0.5))
                        pos = observation[idx:idx + 2] + robot_pos
                        ax.add_patch(
                            Circle(pos,
                                   0.5,
                                   facecolor='gray',
                                   edgecolor='red',
                                   alpha=0.5))
                        idx += 2

                    # plot goal
                    goal = observation[1:3] + robot_pos
                    ax.add_patch(
                        Rectangle(goal - np.array([0.2, 0.2]),
                                  0.4,
                                  0.4,
                                  alpha=0.5,
                                  color=color))

                # 	# import matplotlib.pyplot as plt
                # 	# plt.savefig("test.svg")
                # 	# exit()
            elif param.env_name in ['DoubleIntegrator']:
                for step in np.arange(0, result.steps, 1000):
                    fig, ax = plotter.make_fig()
                    ax.set_title('State at t={} for robot={}'.format(
                        times[step], robot))
                    ax.set_aspect('equal')

                    # plot all obstacles
                    for o in env.obstacles:
                        ax.add_patch(
                            Rectangle(o, 1.0, 1.0, facecolor='gray',
                                      alpha=0.5))

                    # plot overall trajectory
                    line = ax.plot(
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        "--")
                    color = line[0].get_color()

                    # plot current position
                    plotter.plot_circle(
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        param.r_agent,
                        fig=fig,
                        ax=ax,
                        color=color)

                    # plot current observation
                    observation = result.observations[step][robot][0]
                    num_neighbors = int(observation[0])
                    num_obstacles = int(
                        (observation.shape[0] - 5 - 4 * num_neighbors) / 2)

                    robot_pos = result.states[
                        step,
                        env.agent_idx_to_state_idx(robot):env.
                        agent_idx_to_state_idx(robot) + 2]

                    X = []
                    Y = []
                    U = []
                    V = []

                    idx = 5
                    for i in range(num_neighbors):
                        pos = observation[idx:idx + 2] + robot_pos
                        X.append(pos[0])
                        Y.append(pos[1])
                        U.append(observation[idx + 2])
                        V.append(observation[idx + 3])
                        # print(np.linalg.norm(observation[idx+2:idx+4]))
                        ax.add_patch(
                            Circle(pos,
                                   param.r_agent,
                                   facecolor='gray',
                                   edgecolor='red',
                                   alpha=0.5))
                        idx += 4

                    for i in range(num_obstacles):
                        pos = observation[idx:idx + 2] + robot_pos - np.array(
                            [0.5, 0.5])
                        ax.add_patch(
                            Rectangle(pos,
                                      1.0,
                                      1.0,
                                      facecolor='gray',
                                      edgecolor='red',
                                      alpha=0.5))
                        # pos = observation[idx : idx+2] + robot_pos
                        # ax.add_patch(Circle(pos, 0.5, facecolor='gray', edgecolor='red', alpha=0.5))
                        idx += 2

                    # plot goal
                    goal = observation[1:3] + robot_pos
                    ax.add_patch(
                        Rectangle(goal - np.array([0.2, 0.2]),
                                  0.4,
                                  0.4,
                                  alpha=0.5,
                                  color=color))
                    X.append(robot_pos[0])
                    Y.append(robot_pos[1])
                    U.append(observation[3])
                    V.append(observation[4])

                    # plot velocity vectors
                    ax.quiver(X,
                              Y,
                              U,
                              V,
                              angles='xy',
                              scale_units='xy',
                              scale=0.5,
                              color='red',
                              width=0.005)

        elif param.env_name == 'Consensus' and param.sim_render_on:
            env.render()
        elif param.sim_render_on:
            env.render()

        # plot time varying states
        if param.single_agent_sim:
            for i in range(env.n):
                fig, ax = plotter.subplots()
                ax.set_title(env.states_name[i])
                for result in sim_results:
                    ax.plot(times[0:result.steps],
                            result.states[0:result.steps, i],
                            label=result.name)
                ax.legend()

            for i, name in enumerate(env.deduced_state_names):
                fig, ax = plotter.subplots()
                ax.set_title(name)
                for result in sim_results:
                    deduce_states = np.empty(
                        (result.steps, len(env.deduced_state_names)))
                    for j in range(result.steps):
                        deduce_states[j] = env.deduce_state(result.states[j])

                    ax.plot(times[0:result.steps],
                            deduce_states[:, i],
                            label=result.name)
                ax.legend()

            for i in range(env.m):
                fig, ax = plotter.subplots()
                ax.set_title(env.actions_name[i])
                for result in sim_results:
                    ax.plot(times[0:result.steps],
                            result.actions[0:result.steps, i],
                            label=result.name)
                ax.legend()

        elif param.env_name == 'Consensus':
            for i_config in range(1):  #range(env.state_dim_per_agent):
                fig, ax = plotter.make_fig()
                # ax.set_title(env.states_name[i_config])
                for agent in env.agents:
                    if env.good_nodes[agent.i]:
                        color = 'blue'
                    else:
                        color = 'red'
                    for result in sim_results:
                        ax.plot(
                            times[0:result.steps],
                            result.states[0:result.steps,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          i_config],
                            label=result.name,
                            color=color)
                ax.axhline(env.desired_ave, label='desired', color='green')

        elif param.env_name in ['SingleIntegrator', 'DoubleIntegrator']:
            for i_config in range(env.state_dim_per_agent):
                fig, ax = plotter.make_fig()
                ax.set_title(env.states_name[i_config])
                for agent in env.agents:
                    for result in sim_results:
                        ax.plot(
                            times[1:result.steps],
                            result.states[1:result.steps,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          i_config],
                            label=result.name)

        # plot time varying actions
        if param.env_name in ['SingleIntegrator', 'DoubleIntegrator']:
            for i_config in range(env.action_dim_per_agent):
                fig, ax = plotter.make_fig()
                ax.set_title(env.actions_name[i_config])
                for agent in env.agents:
                    for result in sim_results:
                        ax.plot(
                            times[1:result.steps],
                            result.actions[1:result.steps,
                                           agent.i * env.action_dim_per_agent +
                                           i_config],
                            label=result.name)

        # extract gains
        if name in ['PID_wRef', 'PID']:
            controller = controllers['IL']
            for result in sim_results:
                if result.name == 'IL':
                    break
            kp, kd = util.extract_gains(controller,
                                        result.states[0:result.steps])
            fig, ax = plotter.plot(times[1:result.steps],
                                   kp[0:result.steps, 0],
                                   title='Kp pos')
            fig, ax = plotter.plot(times[1:result.steps],
                                   kp[0:result.steps, 1],
                                   title='Kp theta')
            fig, ax = plotter.plot(times[1:result.steps],
                                   kd[0:result.steps, 0],
                                   title='Kd pos')
            fig, ax = plotter.plot(times[1:result.steps],
                                   kd[0:result.steps, 1],
                                   title='Kd theta')

        # extract reference trajectory
        if name in ['PID_wRef', 'Ref']:
            controller = controllers['IL']
            for result in sim_results:
                if result.name == 'IL':
                    break
            ref_state = util.extract_ref_state(controller, result.states)
            for i in range(env.n):
                fig, ax = plotter.plot(times[1:result.steps + 1],
                                       ref_state[0:result.steps, i],
                                       title="ref " + env.states_name[i])

        # extract belief topology
        if name in ['IL'] and param.env_name is 'Consensus':
            for result in sim_results:
                if result.name == 'IL':
                    break

            fig, ax = plotter.make_fig()
            belief_topology = util.extract_belief_topology(
                controller, result.observations)
            label_on = True
            if param.n_agents * param.n_agents > 10:
                label_on = False
            for i_agent in range(param.n_agents):
                for j_agent in range(param.n_agents):
                    if not i_agent == j_agent and env.good_nodes[i_agent]:
                        if label_on:
                            plotter.plot(times[0:result.steps + 1],
                                         belief_topology[:, i_agent, j_agent],
                                         fig=fig,
                                         ax=ax,
                                         label="K:{}{}".format(
                                             i_agent, j_agent))
                        else:
                            plotter.plot(times[0:result.steps + 1],
                                         belief_topology[:, i_agent, j_agent],
                                         fig=fig,
                                         ax=ax)

    plotter.save_figs(param.plots_fn)
    plotter.open_figs(param.plots_fn)

    # visualize
    if visualize:
        # plotter.visualize(param, env, states_deeprl)
        env.visualize(sim_results[0].states[0:result.steps], 0.1)
Beispiel #9
0
def fn3():

    pdf_path = os.path.join(os.getcwd(), param.get('fn_plots'))

    # remove if exists
    if os.path.exists(pdf_path):
        os.remove(pdf_path)

    np.random.seed(88)

    util.get_x0()
    util.get_xd()

    # baseline trajectory
    bX = []
    bU = np.zeros((len(param.get('T')), param.get('m')))

    # true perturbed trajectory
    X = []
    Xdot = []
    V = []
    Vdot = []

    # linearized perturbed trajectory
    tX = []
    tXdot = []
    tV = []
    tVdot = []

    # collect baseline
    x_curr = param.get('x0')
    for k, t in enumerate(param.get('T')):
        u_curr = util.list_to_vec(bU[k, :])
        x_next = x_curr + param.get('dt') * dynamics.get_dxdt(
            x_curr, u_curr, t)

        bX.append(x_curr)
        x_curr = x_next
    bX = np.squeeze(np.asarray(bX))

    # now run two trajectories and look at divergence
    eps_x0 = 0.0 * np.random.uniform(size=(param.get('n'), 1))
    eps_u = 0.0 * np.random.uniform(size=(param.get('nt'), param.get('m')))
    x_curr = param.get('x0') + eps_x0
    tx_curr = param.get('x0') + eps_x0
    scpU = bU + eps_u
    for k, t in enumerate(param.get('T')):

        # current control
        u_curr = np.reshape(np.transpose(scpU[k, :]), (param.get('m'), 1))

        # base
        ub = util.list_to_vec(bU[k, :])
        xb = util.list_to_vec(bX[k, :])

        # true
        dxdt = dynamics.get_dxdt(x_curr, u_curr, t)
        x_next = x_curr + dxdt * param.get('dt')
        v = dynamics.get_V(x_curr, t)
        vdot = dynamics.get_LfV(x_curr, t) + np.matmul(
            dynamics.get_LgV(x_curr, t), u_curr)

        # approximate
        F_k, B_k, d_k = dynamics.get_linear_dynamics( xb, \
         ub, t)
        R_k, w_k = dynamics.get_linear_lyapunov(xb, ub, t)

        tx_next = np.matmul( F_k, tx_curr) + \
         np.matmul( B_k, u_curr) + d_k
        tv = np.matmul(R_k, tx_curr) + w_k

        X.append(x_curr)
        Xdot.append(dxdt)
        V.append(v)
        Vdot.append(vdot)

        tX.append(tx_curr)
        tV.append(tv)

        x_curr = x_next
        tx_curr = tx_next

        print('Timestep:' + str(k) + '/' + str(len(param.get('T'))))

    X = np.squeeze(np.asarray(X))
    V = np.reshape(np.asarray(V), (len(V), 1))
    Xdot = np.squeeze(np.asarray(Xdot))

    tX = np.squeeze(np.asarray(tX))
    tV = np.reshape(np.asarray(tV), (len(tV), 1))
    tXdot = np.gradient(tX, param.get('dt'), axis=0)
    tVdot = np.gradient(tV, param.get('dt'), axis=0)

    print(np.shape(X))
    print(np.shape(V))
    print(np.shape(Xdot))
    print(np.shape(tX))
    print(np.shape(tV))
    print(np.shape(tXdot))
    print(np.shape(tVdot))

    epa1 = np.linalg.norm(X[:, 0:2] - tX[:, 0:2], axis=1)
    eva1 = np.linalg.norm(X[:, 2:4] - tX[:, 2:4], axis=1)
    epa2 = np.linalg.norm(X[:, 4:6] - tX[:, 4:6], axis=1)
    eva2 = np.linalg.norm(X[:, 6:8] - tX[:, 6:8], axis=1)
    epb = np.linalg.norm(X[:, 8:10] - tX[:, 8:10], axis=1)
    evb = np.linalg.norm(X[:, 10:12] - tX[:, 10:12], axis=1)

    eXdot = np.linalg.norm(Xdot - tXdot, axis=1)
    eV = np.linalg.norm(V - tV, axis=1)
    eVdot = np.linalg.norm(Vdot - tVdot, axis=1)

    import matplotlib.pyplot as plt

    fig, ax = plt.subplots()
    plt.plot(epa1, eXdot, label='eXdot')
    plt.plot(epa1, eV, label='eV')
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_xlabel('epa')
    plt.legend()

    fig, ax = plt.subplots()
    plt.plot(eva1, eXdot, label='eXdot')
    plt.plot(eva1, eV, label='eV')
    ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_xlabel('eva')
    plt.legend()

    fig, ax = plt.subplots()
    plt.plot(epb, eXdot, label='eXdot')
    plt.plot(epb, eV, label='eV')
    # ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_xlabel('epb')
    plt.legend()

    fig, ax = plt.subplots()
    plt.plot(evb, eXdot, label='eXdot')
    plt.plot(evb, eV, label='eV')
    # ax.set_xscale('log')
    ax.set_yscale('log')
    ax.set_xlabel('evb')
    plt.legend()

    # plt.plot( eX, eVdot, label = 'eVdot')

    plotter.save_figs()
    subprocess.call(["xdg-open", pdf_path])
Beispiel #10
0
def fn2():

    pdf_path = os.path.join(os.getcwd(), param.get('fn_plots'))

    # remove if exists
    if os.path.exists(pdf_path):
        os.remove(pdf_path)

    np.random.seed(88)

    util.get_x0()
    util.get_xd()

    # trjaectory to linearize about
    fX = []
    fU = []
    fV = []
    # scp trajectory
    tX = []
    tU = []
    tV = []
    # integrated trajectory with scp control
    scpV = []
    # linearize about trajectory
    bV = []

    # feedback linearizing baseline
    x_curr = param.get('x0')
    for k, t in enumerate(param.get('T')):
        u_curr = controller.get_fdbk_controller(x_curr, t)
        x_next = x_curr + param.get('dt') * dynamics.get_dxdt(
            x_curr, u_curr, t)
        v = dynamics.get_V(x_curr, t)

        fX.append(x_curr)
        fU.append(u_curr)
        fV.append(v)
        x_curr = x_next
    fX = np.squeeze(np.asarray(fX))
    fU = np.asarray(fU)

    # scp
    scpX, scpU, bX, bU = controller.get_scp_clf_controller()

    # integrate
    tx_curr = param.get('x0')
    x_curr = param.get('x0')
    for k, t in enumerate(param.get('T')):

        ub = util.list_to_vec(bU[k, :])
        xb = util.list_to_vec(bX[k, :])

        u_curr = np.transpose(util.list_to_vec(scpU[k, :]))

        F_k, B_k, d_k = dynamics.get_linear_dynamics(xb, ub, t)

        R_k, w_k = dynamics.get_linear_lyapunov(xb, ub, t)

        tx_next = np.matmul(F_k, tx_curr) + np.matmul(B_k, u_curr) + d_k
        x_next = x_curr + param.get('dt') * dynamics.get_dxdt(
            x_curr, u_curr, t)

        tv = np.matmul(R_k, tx_curr) + w_k
        scpv = dynamics.get_V(x_curr, t)

        tX.append(tx_curr)
        tV.append(tv)
        scpV.append(scpv)
        bV.append(dynamics.get_V(xb, t))

        tx_curr = tx_next
        x_curr = x_next

    tX = np.squeeze(np.asarray(tX))
    bV = np.asarray(bV)

    plotter.plot_SS(fX, param.get('T'), title='Fdbk Linearize SS')
    plotter.plot_V(fV, param.get('T'), title='Fdbk Linearize V')
    plotter.plot_U(fU, param.get('T'), title='Fdbk Linearize U')

    plotter.plot_SS(bX,
                    param.get('T'),
                    title='What SCP is linearizing about: SS')
    plotter.plot_V(bV,
                   param.get('T'),
                   title='What SCP is linearizing about: V')
    # plotter.plot_U(bU, param.get('T'), title = 'What SCP is linearizing about: U')

    plotter.plot_SS(tX, param.get('T'), title='What SCP thinks its doing: SS')
    plotter.plot_V(tV, param.get('T'), title='What SCP thinks its doing: V')
    # plotter.plot_U(tU, param.get('T'), title = 'What SCP thinks its doing: U')

    plotter.plot_SS(scpX,
                    param.get('T'),
                    title='What SCP is actually doing: SS')
    plotter.plot_V(scpV, param.get('T'), title='What SCP is actually doing: V')
    # plotter.plot_U(scpU, param.get('T'), title = 'What SCP is actually doing: U')

    plotter.save_figs()
    subprocess.call(["xdg-open", pdf_path])
Beispiel #11
0
			plotter.plot_totalreward_vs_number_of_agents(sim_results)

	else:
		# micro sim 
		# 	- plot sim 
		if default_param.plot_sim_over_time:
			for sim_result in sim_results:
				controller_name = sim_result["controller_name"]
				times = sim_result["times"]
				plotter.sim_plot_over_time(controller_name,sim_result,times)
		else:
			for sim_result in sim_results:
				for timestep,time in enumerate(sim_result["times"]):
					controller_name = sim_result["controller_name"]
					plotter.render(controller_name,sim_result,timestep)
					# break
				
		# 	- plot reward
		plotter.plot_cumulative_reward(sim_results)

		# 	- plot q value estimation 
		for controller_name in default_param.controller_names:
			if 'bellman' in controller_name:
				plotter.plot_q_error(sim_results)
				break 

		

	print('saving and opening figs...')
	plotter.save_figs(default_param.plot_fn)
	plotter.open_figs(default_param.plot_fn)
Beispiel #12
0
def sim(param, env, controllers, initial_state, visualize):

    # environment
    times = param.sim_times
    device = "cpu"

    if initial_state is None:
        initial_state = env.reset()

    # run sim
    SimResult = namedtuple(
        'SimResult', ['states', 'observations', 'actions', 'steps', 'name'])

    for name, controller in controllers.items():
        print("Running simulation with " + name)
        print("Initial State: ", initial_state)
        if hasattr(controller, 'policy'):
            result = SimResult._make(
                run_sim(param, env, controller, initial_state) + (name, ))
        else:
            observations = []
            result = SimResult._make(
                (controller.states, observations, controller.actions,
                 controller.steps, name))
        sim_results = []
        sim_results.append(result)

        # plot state space
        if param.env_name in [
                'SingleIntegrator', 'SingleIntegratorVelSensing',
                'DoubleIntegrator'
        ]:
            fig, ax = plotter.make_fig()
            ax.set_title('State Space')
            ax.set_aspect('equal')

            for o in env.obstacles:
                ax.add_patch(
                    Rectangle(o, 1.0, 1.0, facecolor='gray', alpha=0.5))

            for agent in env.agents:

                line = ax.plot(
                    result.states[0:result.steps,
                                  env.agent_idx_to_state_idx(agent.i)],
                    result.states[0:result.steps,
                                  env.agent_idx_to_state_idx(agent.i) + 1],
                    alpha=0.5)
                color = line[0].get_color()

                # plot velocity vectors:
                X = []
                Y = []
                U = []
                V = []
                for k in np.arange(0, result.steps, 100):
                    X.append(
                        result.states[k,
                                      env.agent_idx_to_state_idx(agent.i)])
                    Y.append(
                        result.states[k,
                                      env.agent_idx_to_state_idx(agent.i) + 1])
                    if param.env_name in [
                            'SingleIntegrator', 'SingleIntegratorVelSensing'
                    ]:
                        # Singleintegrator: plot actions
                        U.append(result.actions[k, 2 * agent.i + 0])
                        V.append(result.actions[k, 2 * agent.i + 1])
                    elif param.env_name in ['DoubleIntegrator']:
                        # doubleintegrator: plot velocities
                        U.append(
                            result.states[k,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          2])
                        V.append(
                            result.states[k,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          3])

                ax.quiver(X,
                          Y,
                          U,
                          V,
                          angles='xy',
                          scale_units='xy',
                          scale=0.5,
                          color=color,
                          width=0.005)
                plotter.plot_circle(
                    result.states[1, env.agent_idx_to_state_idx(agent.i)],
                    result.states[1,
                                  env.agent_idx_to_state_idx(agent.i) + 1],
                    param.r_agent,
                    fig=fig,
                    ax=ax,
                    color=color)
                plotter.plot_square(agent.s_g[0],
                                    agent.s_g[1],
                                    param.r_agent,
                                    angle=45,
                                    fig=fig,
                                    ax=ax,
                                    color=color)

            # draw state for each time step
            robot = 0
            if param.env_name in ['SingleIntegrator']:
                for step in np.arange(0, result.steps, 1000):
                    fig, ax = plotter.make_fig()
                    ax.set_title('State at t={} for robot={}'.format(
                        times[step], robot))
                    ax.set_aspect('equal')

                    # plot all obstacles
                    for o in env.obstacles:
                        ax.add_patch(
                            Rectangle(o, 1.0, 1.0, facecolor='gray',
                                      alpha=0.5))

                    # plot overall trajectory
                    line = ax.plot(
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        "--")
                    color = line[0].get_color()

                    # plot current position
                    plotter.plot_circle(
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        param.r_agent,
                        fig=fig,
                        ax=ax,
                        color=color)

                    # plot current observation
                    observation = result.observations[step][robot][0]
                    num_neighbors = int(observation[0])
                    num_obstacles = int(
                        (observation.shape[0] - 3 - 2 * num_neighbors) / 2)

                    robot_pos = result.states[
                        step,
                        env.agent_idx_to_state_idx(robot):env.
                        agent_idx_to_state_idx(robot) + 2]

                    idx = 3
                    for i in range(num_neighbors):
                        pos = observation[idx:idx + 2] + robot_pos
                        ax.add_patch(
                            Circle(pos,
                                   0.25,
                                   facecolor='gray',
                                   edgecolor='red',
                                   alpha=0.5))
                        idx += 2

                    for i in range(num_obstacles):
                        # pos = observation[idx : idx+2] + robot_pos - np.array([0.5,0.5])
                        # ax.add_patch(Rectangle(pos, 1.0, 1.0, facecolor='gray', edgecolor='red', alpha=0.5))
                        pos = observation[idx:idx + 2] + robot_pos
                        ax.add_patch(
                            Circle(pos,
                                   0.5,
                                   facecolor='gray',
                                   edgecolor='red',
                                   alpha=0.5))
                        idx += 2

                    # plot goal
                    goal = observation[1:3] + robot_pos
                    ax.add_patch(
                        Rectangle(goal - np.array([0.2, 0.2]),
                                  0.4,
                                  0.4,
                                  alpha=0.5,
                                  color=color))

                # 	# import matplotlib.pyplot as plt
                # 	# plt.savefig("test.svg")
                # 	# exit()
            elif param.env_name in ['DoubleIntegrator']:
                for step in np.arange(0, result.steps, 1000):
                    fig, ax = plotter.make_fig()
                    ax.set_title('State at t={} for robot={}'.format(
                        times[step], robot))
                    ax.set_aspect('equal')

                    # plot all obstacles
                    for o in env.obstacles:
                        ax.add_patch(
                            Rectangle(o, 1.0, 1.0, facecolor='gray',
                                      alpha=0.5))

                    # plot overall trajectory
                    line = ax.plot(
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[0:result.steps,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        "--")
                    color = line[0].get_color()

                    # plot current position
                    plotter.plot_circle(
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot)],
                        result.states[step,
                                      env.agent_idx_to_state_idx(robot) + 1],
                        param.r_agent,
                        fig=fig,
                        ax=ax,
                        color=color)

                    # plot current observation
                    observation = result.observations[step][robot][0]
                    num_neighbors = int(observation[0])
                    num_obstacles = int(
                        (observation.shape[0] - 5 - 4 * num_neighbors) / 2)

                    robot_pos = result.states[
                        step,
                        env.agent_idx_to_state_idx(robot):env.
                        agent_idx_to_state_idx(robot) + 2]

                    X = []
                    Y = []
                    U = []
                    V = []

                    idx = 5
                    for i in range(num_neighbors):
                        pos = observation[idx:idx + 2] + robot_pos
                        X.append(pos[0])
                        Y.append(pos[1])
                        U.append(observation[idx + 2])
                        V.append(observation[idx + 3])
                        # print(np.linalg.norm(observation[idx+2:idx+4]))
                        ax.add_patch(
                            Circle(pos,
                                   param.r_agent,
                                   facecolor='gray',
                                   edgecolor='red',
                                   alpha=0.5))
                        idx += 4

                    for i in range(num_obstacles):
                        pos = observation[idx:idx + 2] + robot_pos - np.array(
                            [0.5, 0.5])
                        ax.add_patch(
                            Rectangle(pos,
                                      1.0,
                                      1.0,
                                      facecolor='gray',
                                      edgecolor='red',
                                      alpha=0.5))
                        # pos = observation[idx : idx+2] + robot_pos
                        # ax.add_patch(Circle(pos, 0.5, facecolor='gray', edgecolor='red', alpha=0.5))
                        idx += 2

                    # plot goal
                    goal = observation[1:3] + robot_pos
                    ax.add_patch(
                        Rectangle(goal - np.array([0.2, 0.2]),
                                  0.4,
                                  0.4,
                                  alpha=0.5,
                                  color=color))
                    X.append(robot_pos[0])
                    Y.append(robot_pos[1])
                    U.append(observation[3])
                    V.append(observation[4])

                    # plot velocity vectors
                    ax.quiver(X,
                              Y,
                              U,
                              V,
                              angles='xy',
                              scale_units='xy',
                              scale=0.5,
                              color='red',
                              width=0.005)

        # plot time varying states
        if param.env_name in ['SingleIntegrator', 'DoubleIntegrator']:
            for i_config in range(env.state_dim_per_agent):
                fig, ax = plotter.make_fig()
                ax.set_title(env.states_name[i_config])
                for agent in env.agents:
                    for result in sim_results:
                        ax.plot(
                            times[1:result.steps],
                            result.states[1:result.steps,
                                          env.agent_idx_to_state_idx(agent.i) +
                                          i_config],
                            label=result.name)

        # plot time varying actions
        if param.env_name in ['SingleIntegrator', 'DoubleIntegrator']:
            for i_config in range(env.action_dim_per_agent):
                fig, ax = plotter.make_fig()
                ax.set_title(env.actions_name[i_config])
                for agent in env.agents:
                    for result in sim_results:
                        ax.plot(
                            times[1:result.steps],
                            result.actions[1:result.steps,
                                           agent.i * env.action_dim_per_agent +
                                           i_config],
                            label=result.name)

                        #
                        if i_config == 5:
                            ax.set_yscale('log')

    plotter.save_figs(param.plots_fn)
    plotter.open_figs(param.plots_fn)

    # visualize
    if visualize:
        # plotter.visualize(param, env, states_deeprl)
        env.visualize(sim_results[0].states[0:result.steps], 0.1)