def __init__(self, columns, rows): tk.Tk.__init__(self) self.columns = columns self.rows = rows self.gac = GAC(columns, rows) self.search = Search(self.gac) solution = self.search.a_star() "ASDASDJKASHDKAJSD" self.canvas = tk.Canvas(self, width=800, height=800, borderwidth=0) self.canvas.pack(side="top", fill="both", expand="true") menubar = tk.Menu(self) mapMenu = tk.Menu(menubar) mapMenu.add_command(label="Scenario 0", command=lambda: self.changeMap('scenario0.txt')) mapMenu.add_command(label="Scenario 1", command=lambda: self.changeMap('scenario1.txt')) mapMenu.add_command(label="Scenario 2", command=lambda: self.changeMap('scenario2.txt')) mapMenu.add_command(label="Scenario 3", command=lambda: self.changeMap('scenario3.txt')) mapMenu.add_command(label="Scenario 4", command=lambda: self.changeMap('scenario4.txt')) mapMenu.add_command(label="Scenario 5", command=lambda: self.changeMap('scenario5.txt')) mapMenu.add_command(label="Scenario 6", command=lambda: self.changeMap('scenario6.txt')) menubar.add_cascade(label="Maps", menu=mapMenu) self.config(menu=menubar)
def make(path): f = open(path, "r") count = 0 size = 0 colors = 0 map = [] #generate map for line in f: l = parse_line(line) if count == 0: size = int(l[0]) colors = int(l[1]) for i in range(size): map.append([0] * size) else: map[int(l[2])][int(l[1])] = int(l[0]) + 1 map[int(l[4])][int(l[3])] = (int(l[0]) + 1) count += 1 for row in map: print(row) print() FFNode.size = size gac = GAC() gen_variables(gac, map, colors) gen_constraints(gac, map) return gac, size
def make_csp(path, colors): gac = GAC() f = open(path, "r") NV = 0 NE = 0 count = 1 for line in f: l = parse_line(line) print(l) if count == 1: NV = int(l[0]) NE = int(l[1]) elif count > 1 and count <= NV + 1: print("Making var...") name = "v{}".format(l[0]) print(name) print("") domain = [i for i in range(colors)] gac.add_variable(Vc_var(name, domain, float(l[1]), float(l[2]))) else: print("Making constraint...\n") gac.add_constraint(gen_constraint(l[0], l[1])) count += 1 return gac
def make(path): f = open(path, "r") width = 0 height = 0 map = [] #generate map count = 0 rows = [] columns = [] for line in f: l = util.parse_line(line) if count == 0: width = int(l[0]) #columns height = int(l[1]) #rows print(l) elif (count <= height): row = [] for segment in l: row.append(int(segment)) rows.append(row) print(row) else: col = [] for segment in l: col.append(int(segment)) columns.append(col) print(col) count += 1 bitmap = util.get_bitmap_vector(max(width, height)) print() gac = GAC() #gen variables vars = gen_variables(rows, columns, width, height, bitmap) for var in vars: gac.add_variable(var) #gen constraints constraints = gen_constraints(width, height) gac.constraints = constraints return gac, width, height
def __init__(self, domains, constraint_list): ''' Initializes the values used by astar and gac. ''' #A* INFO self.h = 0 self.g = 0 self.f = 0 self.predecessor = None self.neighbours = [] #GAC INFO self.constraints = Constraints("x!=y", ["x", "y"], constraint_list) self.domains = domains #init gac self.gac = GAC()
def __init__(self, world_size, args): if args.env_name == 'L2M2019Env': env = L2M2019Env(visualize=False, difficulty=args.difficulty) obs_dim = 99 else: env = gym.make(args.env_name) obs_dim = env.observation_space.shape[0] act_dim = env.action_space.shape[0] self.device = torch.device(args.device) self.args = args self.world_size = world_size self.actor_critic = MLPActorCritic(obs_dim, act_dim, hidden_sizes=args.hidden_sizes).to( self.device) self.replay_buffer = [ ReplayBuffer(obs_dim, act_dim, args.buffer_size) for _ in range(1, world_size) ] self.gac = GAC(self.actor_critic, self.replay_buffer, device=self.device, gamma=args.gamma, alpha_start=args.alpha_start, alpha_min=args.alpha_min, alpha_max=args.alpha_max) self.test_len = 0.0 self.test_ret = 0.0 self.ob_rrefs = [] for ob_rank in range(1, world_size): ob_info = rpc.get_worker_info(OBSERVER_NAME.format(ob_rank)) self.ob_rrefs.append(remote(ob_info, Observer, args=(args, ))) self.agent_rref = RRef(self)
def __init__(self, graph): tk.Tk.__init__(self) self.graph = Graph(graph[0], graph[1], graph[2], graph[3]) self.gac = GAC(self.graph) self.search = Search(self.gac) self.graph_size = 800.0 self.vertex_size = 10.0 self.ixy, self.x_size, self.y_size = self.getIXY() self.canvas = tk.Canvas(self, width=self.graph_size + 50, height=self.graph_size + 50, borderwidth=0) self.canvas.pack(side="top", fill="both", expand="true") menubar = tk.Menu(self) commandmenu = tk.Menu(menubar) commandmenu.add_command(label="solve", command=self.drawSolution) commandmenu.add_command(label="start animation", command=self.startAnimation) commandmenu.add_command(label="increment", command=self.incrementSolution) commandmenu.add_command(label="reset", command=self.resetGraph) menubar.add_cascade(label="Commands", menu=commandmenu) execmenu = tk.Menu(menubar) execmenu.add_command(label="2 Colors", command=lambda: self.changeColors('2')) execmenu.add_command(label="3 Colors", command=lambda: self.changeColors('3')) execmenu.add_command(label="4 Colors", command=lambda: self.changeColors('4')) execmenu.add_command(label="5 Colors", command=lambda: self.changeColors('5')) execmenu.add_command(label="6 Colors", command=lambda: self.changeColors('6')) execmenu.add_command(label="7 Colors", command=lambda: self.changeColors('7')) execmenu.add_command(label="8 Colors", command=lambda: self.changeColors('8')) execmenu.add_command(label="9 Colors", command=lambda: self.changeColors('9')) execmenu.add_command(label="10 Colors", command=lambda: self.changeColors('10')) menubar.add_cascade(label="Colors", menu=execmenu) mapMenu = tk.Menu(menubar) mapMenu.add_command( label="graph-color-2", command=lambda: self.changeMap('graph-color-2.txt')) mapMenu.add_command( label="rand-50", command=lambda: self.changeMap('rand-50-4-color1.txt')) mapMenu.add_command(label="test", command=lambda: self.changeMap('test.txt')) mapMenu.add_command( label="spiral-500", command=lambda: self.changeMap('spiral-500-4-color1.txt')) mapMenu.add_command( label="graph-color-1", command=lambda: self.changeMap('graph-color-1.txt')) mapMenu.add_command( label="rand-100-6", command=lambda: self.changeMap('rand-100-6-color1.txt')) mapMenu.add_command( label="rand-100-4", command=lambda: self.changeMap('rand-100-4-color1.txt')) menubar.add_cascade(label="Maps", menu=mapMenu) self.config(menu=menubar) self.oval = {} for edge in self.graph.edges: x1 = (self.ixy[edge[0]][1] * (self.graph_size / self.x_size)) + (self.vertex_size / 2) y1 = (self.ixy[edge[0]][2] * (self.graph_size / self.y_size)) + (self.vertex_size / 2) x2 = (self.ixy[edge[1]][1] * (self.graph_size / self.x_size)) + (self.vertex_size / 2) y2 = (self.ixy[edge[1]][2] * (self.graph_size / self.y_size)) + (self.vertex_size / 2) self.canvas.create_line(x1, y1, x2, y2) for vertex in self.ixy: x1 = vertex[1] * (self.graph_size / self.x_size) y1 = vertex[2] * (self.graph_size / self.y_size) x2 = x1 + self.vertex_size y2 = y1 + self.vertex_size self.oval[vertex[1], vertex[2]] = self.canvas.create_oval(x1, y1, x2, y2, outline="black", fill="gray80", tag="oval")
def main(args): if 'L2M2019Env' in args.env_name: env = L2M2019Env(visualize=False, difficulty=args.difficulty) test_env = L2M2019Env(visualize=False, difficulty=args.difficulty) else: env = gym.make(args.env_name) test_env = gym.make(args.env_name) device = torch.device(args.device) data = np.load('./official_obs_scaler.npz') obs_mean, obs_std = data['mean'], data['std'] # 1.Set some necessary seed. torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) env.seed(args.seed) test_env.seed(args.seed + 999) # 2.Create actor, critic, EnvSampler() and PPO. if 'L2M2019Env' in args.env_name: obs_dim = 99 else: obs_dim = env.observation_space.shape[0] act_dim = env.action_space.shape[0] act_high = env.action_space.high act_low = env.action_space.low actor_critic = MLPActorCritic(obs_dim, act_dim, hidden_sizes=args.hidden_sizes).to(device) replay_buffer = ReplayBuffer(obs_dim, act_dim, args.buffer_size) gac = GAC(actor_critic, replay_buffer, device=device, gamma=args.gamma, alpha_start=args.alpha_start, alpha_min=args.alpha_min, alpha_max=args.alpha_max) def act_encoder(y): # y = [min, max] ==> x = [-1, 1] # if args.env_name == 'L2M2019Env': # return y return (y - act_low) / (act_high - act_low) * 2.0 - 1.0 def act_decoder(x): # x = [-1, 1] ==> y = [min, max] # if args.env_name == 'L2M2019Env': # return np.abs(x) return (x + 1.0) / 2.0 * (act_high - act_low) - act_low def get_observation(env): obs = np.array(env.get_observation()[242:]) obs = (obs - obs_mean) / obs_std state_desc = env.get_state_desc() p_body = [ state_desc['body_pos']['pelvis'][0], -state_desc['body_pos']['pelvis'][2] ] v_body = [ state_desc['body_vel']['pelvis'][0], -state_desc['body_vel']['pelvis'][2] ] v_tgt = env.vtgt.get_vtgt(p_body).T return np.append(obs, v_tgt) def get_reward(env): reward = 10.0 # Reward for not falling down state_desc = env.get_state_desc() p_body = [ state_desc['body_pos']['pelvis'][0], -state_desc['body_pos']['pelvis'][2] ] v_body = [ state_desc['body_vel']['pelvis'][0], -state_desc['body_vel']['pelvis'][2] ] v_tgt = env.vtgt.get_vtgt(p_body).T vel_penalty = np.linalg.norm(v_body - v_tgt) muscle_penalty = 0 for muscle in sorted(state_desc['muscles'].keys()): muscle_penalty += np.square( state_desc['muscles'][muscle]['activation']) ret_r = reward - (vel_penalty * 3 + muscle_penalty * 1) if vel_penalty < 0.3: ret_r += 10 return ret_r # 3.Start training. def get_action(o, deterministic=False): o = torch.FloatTensor(o.reshape(1, -1)).to(device) a = actor_critic.act(o, deterministic) return a def test_agent(): test_ret, test_len = 0, 0 for j in range(args.epoch_per_test): _, d, ep_ret, ep_len = test_env.reset(), False, 0, 0 o = get_observation(test_env) while not (d or (ep_len == args.max_ep_len)): # Take deterministic actions at test time a = get_action(o, True) a = act_decoder(a) for _ in range(args.frame_skip): _, r, d, _ = test_env.step(a) ep_ret += r ep_len += 1 if d: break o = get_observation(test_env) test_ret += ep_ret test_len += ep_len return test_ret / args.epoch_per_test, test_len / args.epoch_per_test total_step = args.total_epoch * args.steps_per_epoch _, d, ep_len = env.reset(), False, 0 o = get_observation(env) for t in range(1, total_step + 1): if t <= args.start_steps: a = act_encoder(env.action_space.sample()) else: a = get_action(o, deterministic=False) a = act_decoder(a) r = 0.0 for _ in range(args.frame_skip): _, _, d, _ = env.step(a) r += get_reward(env) ep_len += 1 if d: break o2 = get_observation(env) # Ignore the "done" signal if it comes from hitting the time # horizon (that is, when it's an artificial terminal signal # that isn't based on the agent's state) d = False if ep_len == args.max_ep_len else d # if not d: # new_o, new_r, new_o2 = generate_success(o, o2) # replay_buffer.store(new_o, a, new_r * args.reward_scale, new_o2, d) # Store experience to replay buffer replay_buffer.store(o, a, r * args.reward_scale, o2, d) o = o2 if d or (ep_len == args.max_ep_len): _, ep_len = env.reset(obs_as_dict=False), 0 o = get_observation(env) if t >= args.update_after and t % args.steps_per_update == 0: for _ in range(args.steps_per_update): loss_a, loss_c, alpha = gac.update(args.batch_size) gac.update_beta() print( "loss_actor = {:<22}, loss_critic = {:<22}, alpha = {:<20}, beta = {:<20}" .format(loss_a, loss_c, alpha, gac.beta)) # End of epoch handling if t >= args.update_after and t % args.steps_per_epoch == 0: test_ret, test_len = test_agent() print("Step {:>10}: test_ret = {:<20}, test_len = {:<20}".format( t, test_ret, test_len)) print( "-----------------------------------------------------------") yield t, test_ret, test_len, actor_critic