Beispiel #1
0
    def __init__(self, columns, rows):
        tk.Tk.__init__(self)
        self.columns = columns
        self.rows = rows
        self.gac = GAC(columns, rows)
        self.search = Search(self.gac)
        solution = self.search.a_star()
        "ASDASDJKASHDKAJSD"
        self.canvas = tk.Canvas(self, width=800, height=800, borderwidth=0)
        self.canvas.pack(side="top", fill="both", expand="true")

        menubar = tk.Menu(self)

        mapMenu = tk.Menu(menubar)
        mapMenu.add_command(label="Scenario 0",
                            command=lambda: self.changeMap('scenario0.txt'))
        mapMenu.add_command(label="Scenario 1",
                            command=lambda: self.changeMap('scenario1.txt'))
        mapMenu.add_command(label="Scenario 2",
                            command=lambda: self.changeMap('scenario2.txt'))
        mapMenu.add_command(label="Scenario 3",
                            command=lambda: self.changeMap('scenario3.txt'))
        mapMenu.add_command(label="Scenario 4",
                            command=lambda: self.changeMap('scenario4.txt'))
        mapMenu.add_command(label="Scenario 5",
                            command=lambda: self.changeMap('scenario5.txt'))
        mapMenu.add_command(label="Scenario 6",
                            command=lambda: self.changeMap('scenario6.txt'))
        menubar.add_cascade(label="Maps", menu=mapMenu)
        self.config(menu=menubar)
Beispiel #2
0
def make(path):
    f = open(path, "r")
    count = 0
    size = 0
    colors = 0
    map = []

    #generate map
    for line in f:
        l = parse_line(line)
        if count == 0:
            size = int(l[0])
            colors = int(l[1])
            for i in range(size):
                map.append([0] * size)
        else:
            map[int(l[2])][int(l[1])] = int(l[0]) + 1
            map[int(l[4])][int(l[3])] = (int(l[0]) + 1)

        count += 1

    for row in map:
        print(row)
    print()
    FFNode.size = size
    gac = GAC()
    gen_variables(gac, map, colors)
    gen_constraints(gac, map)

    return gac, size
Beispiel #3
0
def make_csp(path, colors):
    gac = GAC()

    f = open(path, "r")

    NV = 0
    NE = 0

    count = 1
    for line in f:
        l = parse_line(line)
        print(l)
        if count == 1:
            NV = int(l[0])
            NE = int(l[1])

        elif count > 1 and count <= NV + 1:
            print("Making var...")
            name = "v{}".format(l[0])
            print(name)
            print("")
            domain = [i for i in range(colors)]
            gac.add_variable(Vc_var(name, domain, float(l[1]), float(l[2])))

        else:
            print("Making constraint...\n")
            gac.add_constraint(gen_constraint(l[0], l[1]))

        count += 1

    return gac
Beispiel #4
0
def make(path):
    f = open(path, "r")
    width = 0
    height = 0
    map = []

    #generate map
    count = 0
    rows = []
    columns = []

    for line in f:
        l = util.parse_line(line)
        if count == 0:
            width = int(l[0])  #columns
            height = int(l[1])  #rows
            print(l)

        elif (count <= height):
            row = []
            for segment in l:
                row.append(int(segment))
            rows.append(row)
            print(row)

        else:
            col = []
            for segment in l:
                col.append(int(segment))
            columns.append(col)
            print(col)
        count += 1

    bitmap = util.get_bitmap_vector(max(width, height))
    print()

    gac = GAC()
    #gen variables
    vars = gen_variables(rows, columns, width, height, bitmap)
    for var in vars:
        gac.add_variable(var)

    #gen constraints
    constraints = gen_constraints(width, height)
    gac.constraints = constraints

    return gac, width, height
Beispiel #5
0
	def __init__(self, domains, constraint_list):
		'''
		Initializes the values used by astar and gac.
		'''
		#A* INFO
		self.h = 0
		self.g = 0
		self.f = 0
		self.predecessor = None
		self.neighbours = []	

		#GAC INFO
		self.constraints = Constraints("x!=y", ["x", "y"], constraint_list)
		self.domains = domains

		#init gac
		self.gac = GAC()
Beispiel #6
0
    def __init__(self, world_size, args):
        if args.env_name == 'L2M2019Env':
            env = L2M2019Env(visualize=False, difficulty=args.difficulty)
            obs_dim = 99
        else:
            env = gym.make(args.env_name)
            obs_dim = env.observation_space.shape[0]

        act_dim = env.action_space.shape[0]

        self.device = torch.device(args.device)

        self.args = args
        self.world_size = world_size

        self.actor_critic = MLPActorCritic(obs_dim,
                                           act_dim,
                                           hidden_sizes=args.hidden_sizes).to(
                                               self.device)
        self.replay_buffer = [
            ReplayBuffer(obs_dim, act_dim, args.buffer_size)
            for _ in range(1, world_size)
        ]

        self.gac = GAC(self.actor_critic,
                       self.replay_buffer,
                       device=self.device,
                       gamma=args.gamma,
                       alpha_start=args.alpha_start,
                       alpha_min=args.alpha_min,
                       alpha_max=args.alpha_max)

        self.test_len = 0.0
        self.test_ret = 0.0

        self.ob_rrefs = []
        for ob_rank in range(1, world_size):
            ob_info = rpc.get_worker_info(OBSERVER_NAME.format(ob_rank))
            self.ob_rrefs.append(remote(ob_info, Observer, args=(args, )))

        self.agent_rref = RRef(self)
Beispiel #7
0
    def __init__(self, graph):
        tk.Tk.__init__(self)
        self.graph = Graph(graph[0], graph[1], graph[2], graph[3])
        self.gac = GAC(self.graph)
        self.search = Search(self.gac)
        self.graph_size = 800.0
        self.vertex_size = 10.0

        self.ixy, self.x_size, self.y_size = self.getIXY()

        self.canvas = tk.Canvas(self,
                                width=self.graph_size + 50,
                                height=self.graph_size + 50,
                                borderwidth=0)
        self.canvas.pack(side="top", fill="both", expand="true")

        menubar = tk.Menu(self)

        commandmenu = tk.Menu(menubar)
        commandmenu.add_command(label="solve", command=self.drawSolution)
        commandmenu.add_command(label="start animation",
                                command=self.startAnimation)
        commandmenu.add_command(label="increment",
                                command=self.incrementSolution)
        commandmenu.add_command(label="reset", command=self.resetGraph)
        menubar.add_cascade(label="Commands", menu=commandmenu)

        execmenu = tk.Menu(menubar)
        execmenu.add_command(label="2 Colors",
                             command=lambda: self.changeColors('2'))
        execmenu.add_command(label="3 Colors",
                             command=lambda: self.changeColors('3'))
        execmenu.add_command(label="4 Colors",
                             command=lambda: self.changeColors('4'))
        execmenu.add_command(label="5 Colors",
                             command=lambda: self.changeColors('5'))
        execmenu.add_command(label="6 Colors",
                             command=lambda: self.changeColors('6'))
        execmenu.add_command(label="7 Colors",
                             command=lambda: self.changeColors('7'))
        execmenu.add_command(label="8 Colors",
                             command=lambda: self.changeColors('8'))
        execmenu.add_command(label="9 Colors",
                             command=lambda: self.changeColors('9'))
        execmenu.add_command(label="10 Colors",
                             command=lambda: self.changeColors('10'))
        menubar.add_cascade(label="Colors", menu=execmenu)

        mapMenu = tk.Menu(menubar)
        mapMenu.add_command(
            label="graph-color-2",
            command=lambda: self.changeMap('graph-color-2.txt'))
        mapMenu.add_command(
            label="rand-50",
            command=lambda: self.changeMap('rand-50-4-color1.txt'))
        mapMenu.add_command(label="test",
                            command=lambda: self.changeMap('test.txt'))
        mapMenu.add_command(
            label="spiral-500",
            command=lambda: self.changeMap('spiral-500-4-color1.txt'))
        mapMenu.add_command(
            label="graph-color-1",
            command=lambda: self.changeMap('graph-color-1.txt'))
        mapMenu.add_command(
            label="rand-100-6",
            command=lambda: self.changeMap('rand-100-6-color1.txt'))
        mapMenu.add_command(
            label="rand-100-4",
            command=lambda: self.changeMap('rand-100-4-color1.txt'))
        menubar.add_cascade(label="Maps", menu=mapMenu)
        self.config(menu=menubar)

        self.oval = {}

        for edge in self.graph.edges:

            x1 = (self.ixy[edge[0]][1] *
                  (self.graph_size / self.x_size)) + (self.vertex_size / 2)
            y1 = (self.ixy[edge[0]][2] *
                  (self.graph_size / self.y_size)) + (self.vertex_size / 2)
            x2 = (self.ixy[edge[1]][1] *
                  (self.graph_size / self.x_size)) + (self.vertex_size / 2)
            y2 = (self.ixy[edge[1]][2] *
                  (self.graph_size / self.y_size)) + (self.vertex_size / 2)

            self.canvas.create_line(x1, y1, x2, y2)

        for vertex in self.ixy:
            x1 = vertex[1] * (self.graph_size / self.x_size)
            y1 = vertex[2] * (self.graph_size / self.y_size)

            x2 = x1 + self.vertex_size
            y2 = y1 + self.vertex_size

            self.oval[vertex[1],
                      vertex[2]] = self.canvas.create_oval(x1,
                                                           y1,
                                                           x2,
                                                           y2,
                                                           outline="black",
                                                           fill="gray80",
                                                           tag="oval")
Beispiel #8
0
def main(args):

    if 'L2M2019Env' in args.env_name:
        env = L2M2019Env(visualize=False, difficulty=args.difficulty)
        test_env = L2M2019Env(visualize=False, difficulty=args.difficulty)
    else:
        env = gym.make(args.env_name)
        test_env = gym.make(args.env_name)
    device = torch.device(args.device)

    data = np.load('./official_obs_scaler.npz')
    obs_mean, obs_std = data['mean'], data['std']

    # 1.Set some necessary seed.
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    np.random.seed(args.seed)
    env.seed(args.seed)
    test_env.seed(args.seed + 999)

    # 2.Create actor, critic, EnvSampler() and PPO.
    if 'L2M2019Env' in args.env_name:
        obs_dim = 99
    else:
        obs_dim = env.observation_space.shape[0]
    act_dim = env.action_space.shape[0]

    act_high = env.action_space.high
    act_low = env.action_space.low

    actor_critic = MLPActorCritic(obs_dim,
                                  act_dim,
                                  hidden_sizes=args.hidden_sizes).to(device)

    replay_buffer = ReplayBuffer(obs_dim, act_dim, args.buffer_size)

    gac = GAC(actor_critic,
              replay_buffer,
              device=device,
              gamma=args.gamma,
              alpha_start=args.alpha_start,
              alpha_min=args.alpha_min,
              alpha_max=args.alpha_max)

    def act_encoder(y):
        # y = [min, max] ==> x = [-1, 1]
        # if args.env_name == 'L2M2019Env':
        #     return y
        return (y - act_low) / (act_high - act_low) * 2.0 - 1.0

    def act_decoder(x):
        # x = [-1, 1] ==> y = [min, max]
        # if args.env_name == 'L2M2019Env':
        #     return np.abs(x)
        return (x + 1.0) / 2.0 * (act_high - act_low) - act_low

    def get_observation(env):
        obs = np.array(env.get_observation()[242:])

        obs = (obs - obs_mean) / obs_std

        state_desc = env.get_state_desc()
        p_body = [
            state_desc['body_pos']['pelvis'][0],
            -state_desc['body_pos']['pelvis'][2]
        ]
        v_body = [
            state_desc['body_vel']['pelvis'][0],
            -state_desc['body_vel']['pelvis'][2]
        ]
        v_tgt = env.vtgt.get_vtgt(p_body).T

        return np.append(obs, v_tgt)

    def get_reward(env):
        reward = 10.0

        # Reward for not falling down
        state_desc = env.get_state_desc()
        p_body = [
            state_desc['body_pos']['pelvis'][0],
            -state_desc['body_pos']['pelvis'][2]
        ]
        v_body = [
            state_desc['body_vel']['pelvis'][0],
            -state_desc['body_vel']['pelvis'][2]
        ]
        v_tgt = env.vtgt.get_vtgt(p_body).T

        vel_penalty = np.linalg.norm(v_body - v_tgt)

        muscle_penalty = 0
        for muscle in sorted(state_desc['muscles'].keys()):
            muscle_penalty += np.square(
                state_desc['muscles'][muscle]['activation'])

        ret_r = reward - (vel_penalty * 3 + muscle_penalty * 1)

        if vel_penalty < 0.3:
            ret_r += 10

        return ret_r

    # 3.Start training.
    def get_action(o, deterministic=False):
        o = torch.FloatTensor(o.reshape(1, -1)).to(device)
        a = actor_critic.act(o, deterministic)
        return a

    def test_agent():
        test_ret, test_len = 0, 0
        for j in range(args.epoch_per_test):
            _, d, ep_ret, ep_len = test_env.reset(), False, 0, 0
            o = get_observation(test_env)
            while not (d or (ep_len == args.max_ep_len)):
                # Take deterministic actions at test time
                a = get_action(o, True)
                a = act_decoder(a)

                for _ in range(args.frame_skip):
                    _, r, d, _ = test_env.step(a)
                    ep_ret += r
                    ep_len += 1
                    if d: break

                o = get_observation(test_env)

            test_ret += ep_ret
            test_len += ep_len
        return test_ret / args.epoch_per_test, test_len / args.epoch_per_test

    total_step = args.total_epoch * args.steps_per_epoch
    _, d, ep_len = env.reset(), False, 0
    o = get_observation(env)
    for t in range(1, total_step + 1):
        if t <= args.start_steps:
            a = act_encoder(env.action_space.sample())
        else:
            a = get_action(o, deterministic=False)

        a = act_decoder(a)

        r = 0.0
        for _ in range(args.frame_skip):
            _, _, d, _ = env.step(a)
            r += get_reward(env)
            ep_len += 1
            if d: break

        o2 = get_observation(env)

        # Ignore the "done" signal if it comes from hitting the time
        # horizon (that is, when it's an artificial terminal signal
        # that isn't based on the agent's state)

        d = False if ep_len == args.max_ep_len else d

        # if not d:
        #     new_o, new_r, new_o2 = generate_success(o, o2)
        #     replay_buffer.store(new_o, a, new_r * args.reward_scale, new_o2, d)

        # Store experience to replay buffer
        replay_buffer.store(o, a, r * args.reward_scale, o2, d)

        o = o2
        if d or (ep_len == args.max_ep_len):
            _, ep_len = env.reset(obs_as_dict=False), 0
            o = get_observation(env)

        if t >= args.update_after and t % args.steps_per_update == 0:
            for _ in range(args.steps_per_update):
                loss_a, loss_c, alpha = gac.update(args.batch_size)
            gac.update_beta()
            print(
                "loss_actor = {:<22}, loss_critic = {:<22}, alpha = {:<20}, beta = {:<20}"
                .format(loss_a, loss_c, alpha, gac.beta))

        # End of epoch handling
        if t >= args.update_after and t % args.steps_per_epoch == 0:
            test_ret, test_len = test_agent()
            print("Step {:>10}: test_ret = {:<20}, test_len = {:<20}".format(
                t, test_ret, test_len))
            print(
                "-----------------------------------------------------------")
            yield t, test_ret, test_len, actor_critic