def create_csp(self, graph_file, domain_size): self.CNET = constraintnet.ConstraintNet() gac = GAC(self.CNET) f = open(graph_file, 'r') number_of_vertices, number_of_edges = [int(x) for x in f.readline().strip().split(' ')] for i in range(number_of_vertices): index, x, y = [i for i in f.readline().strip().split(' ')] vertex = cspvariable.Variable(int(index), float(x), float(y)) gac.variables.append(vertex) for j in range(number_of_edges): i1, i2 = [int(i) for i in f.readline().strip().split(' ')] this_vertex = gac.variables[i1] other_vertex = gac.variables[i2] constraint = cspconstraint.Constraint([this_vertex, other_vertex], "x!=y") self.CNET.add_constraint(this_vertex,constraint) self.CNET.add_constraint(other_vertex,constraint) for k in gac.variables: gac.domains[k] = [self.colors[x] for x in range(domain_size)] f.close() return gac
def make_csp(path, colors): gac = GAC() f = open(path, "r") NV = 0 NE = 0 count = 1 for line in f: l = parse_line(line) print(l) if count == 1: NV = int(l[0]) NE = int(l[1]) elif count > 1 and count <= NV + 1: print("Making var...") name = "v{}".format(l[0]) print(name) print("") domain = [i for i in range(colors)] gac.add_variable(Vc_var(name, domain, float(l[1]), float(l[2]))) else: print("Making constraint...\n") gac.add_constraint(gen_constraint(l[0], l[1])) count += 1 return gac
def __init__(self, columns, rows): tk.Tk.__init__(self) self.columns = columns self.rows = rows self.gac = GAC(columns, rows) self.search = Search(self.gac) solution = self.search.a_star() "ASDASDJKASHDKAJSD" self.canvas = tk.Canvas(self, width=800, height=800, borderwidth=0) self.canvas.pack(side="top", fill="both", expand="true") menubar = tk.Menu(self) mapMenu = tk.Menu(menubar) mapMenu.add_command(label="Scenario 0", command=lambda: self.changeMap('scenario0.txt')) mapMenu.add_command(label="Scenario 1", command=lambda: self.changeMap('scenario1.txt')) mapMenu.add_command(label="Scenario 2", command=lambda: self.changeMap('scenario2.txt')) mapMenu.add_command(label="Scenario 3", command=lambda: self.changeMap('scenario3.txt')) mapMenu.add_command(label="Scenario 4", command=lambda: self.changeMap('scenario4.txt')) mapMenu.add_command(label="Scenario 5", command=lambda: self.changeMap('scenario5.txt')) mapMenu.add_command(label="Scenario 6", command=lambda: self.changeMap('scenario6.txt')) menubar.add_cascade(label="Maps", menu=mapMenu) self.config(menu=menubar)
def make(path): f = open(path, "r") count = 0 size = 0 colors = 0 map = [] #generate map for line in f: l = parse_line(line) if count == 0: size = int(l[0]) colors = int(l[1]) for i in range(size): map.append([0] * size) else: map[int(l[2])][int(l[1])] = int(l[0]) + 1 map[int(l[4])][int(l[3])] = (int(l[0]) + 1) count += 1 for row in map: print(row) print() FFNode.size = size gac = GAC() gen_variables(gac, map, colors) gen_constraints(gac, map) return gac, size
def make(path): f = open(path, "r") width = 0 height = 0 map = [] #generate map count = 0 rows = [] columns = [] for line in f: l = util.parse_line(line) if count == 0: width = int(l[0]) #columns height = int(l[1]) #rows print(l) elif (count <= height): row = [] for segment in l: row.append(int(segment)) rows.append(row) print(row) else: col = [] for segment in l: col.append(int(segment)) columns.append(col) print(col) count += 1 bitmap = util.get_bitmap_vector(max(width, height)) print() gac = GAC() #gen variables vars = gen_variables(rows, columns, width, height, bitmap) for var in vars: gac.add_variable(var) #gen constraints constraints = gen_constraints(width, height) gac.constraints = constraints return gac, width, height
def make(path): f = open(path, "r") width = 0 height = 0 map = [] #generate map count = 0 rows = [] columns = [] for line in f: l = util.parse_line(line) if count == 0: width = int(l[0]) #columns height = int(l[1]) #rows print(l) elif(count <= height): row = [] for segment in l: row.append(int(segment)) rows.append(row) print(row) else: col = [] for segment in l: col.append(int(segment)) columns.append(col) print(col) count += 1 bitmap = util.get_bitmap_vector(max(width, height)) print() gac = GAC() #gen variables vars = gen_variables(rows, columns, width, height, bitmap) for var in vars: gac.add_variable(var) #gen constraints constraints = gen_constraints(width, height) gac.constraints = constraints return gac, width, height
def __init__(self, domains, constraint_list): ''' Initializes the values used by astar and gac. ''' #A* INFO self.h = 0 self.g = 0 self.f = 0 self.predecessor = None self.neighbours = [] #GAC INFO self.constraints = Constraints("x!=y", ["x", "y"], constraint_list) self.domains = domains #init gac self.gac = GAC()
def __init__(self, world_size, args): if args.env_name == 'L2M2019Env': env = L2M2019Env(visualize=False, difficulty=args.difficulty) obs_dim = 99 else: env = gym.make(args.env_name) obs_dim = env.observation_space.shape[0] act_dim = env.action_space.shape[0] self.device = torch.device(args.device) self.args = args self.world_size = world_size self.actor_critic = MLPActorCritic(obs_dim, act_dim, hidden_sizes=args.hidden_sizes).to( self.device) self.replay_buffer = [ ReplayBuffer(obs_dim, act_dim, args.buffer_size) for _ in range(1, world_size) ] self.gac = GAC(self.actor_critic, self.replay_buffer, device=self.device, gamma=args.gamma, alpha_start=args.alpha_start, alpha_min=args.alpha_min, alpha_max=args.alpha_max) self.test_len = 0.0 self.test_ret = 0.0 self.ob_rrefs = [] for ob_rank in range(1, world_size): ob_info = rpc.get_worker_info(OBSERVER_NAME.format(ob_rank)) self.ob_rrefs.append(remote(ob_info, Observer, args=(args, ))) self.agent_rref = RRef(self)
class Probleminstance(): ''' ''' def __init__(self, domains, constraint_list): ''' Initializes the values used by astar and gac. ''' #A* INFO self.h = 0 self.g = 0 self.f = 0 self.predecessor = None self.neighbours = [] #GAC INFO self.constraints = Constraints("x!=y", ["x", "y"], constraint_list) self.domains = domains #init gac self.gac = GAC() def initialize(self): ''' Initializes by running the first domain filtering loop. ''' self.gac.initialize(self.domains, self.constraints) self.domains = self.gac.domain_filtering_loop() self.astar = Astar(self) def solve(self): ''' Runs one iteration of the astar algorithm ''' self.current = self.astar.solve("A*") return [self, self.current, self.astar.prev_current] def is_solution(self): ''' Returns True if this is a solution state, False if not. ''' for domain in self.domains: if len(self.domains[domain]) != 1: return False return True def get_neighbours(self): ''' Returns the neighbours of this node. ''' minlen = float("inf") current_domain = None neighbours = list() for domain in range(len(self.domains)): if len(self.domains[domain]) == 1: continue if (len(self.domains[domain])) < minlen: minlen = len(self.domains[domain]) current_domain = domain for color in self.domains[current_domain]: copy_domains = copy.deepcopy(self.domains) copy_domains[current_domain] = [color] copy_domains = self.gac.rerun(copy_domains, current_domain, self.constraints) pi = Probleminstance(copy_domains, self.constraints.involved) neighbours.append(pi) self.neighbours = neighbours return neighbours def get_arc_cost(self): ''' Returns the cost of moving from this node. ''' return 1 def get_h(self): ''' Sets and returns the h value ''' h = 0 for domain in self.domains: h += len(self.domains[domain]) - 1 self.h = h return self.h def is_illegal(self): ''' Returns True if any constraints are broken in this state, False otherwise. ''' for node in self.constraints.involved: for edge in self.constraints.involved[node]: if (len(self.domains[node]) == 1) and (len(self.domains[edge]) == 1) and (self.domains[node][0] == self.domains[edge][0]): return True return False def __lt__(self, other): ''' Less than comparison method. Compares on f-value primarily, h value if f are equal. ''' if self.f == other.f: return self.h < other.h return self.f < other.f def __eq__(self, other): ''' Equality comparison method. Compares on the equality of domains. ''' return self.domains == other.domains def __str__(self): ''' Print method for this object, returns its domains. ''' return str(self.domains)
def __init__(self, variables, domains, expressions): super(Astar_GAC, self).__init__() self.cnet = CNET(variables, domains, expressions) self.currentState = self.initializeState(self.cnet) self.gac = GAC(self.currentState) self.Astar = AStar(self)
class Astar_GAC(Graph): """Astar_GAC integrates Astar and GAC""" #both def __init__(self, variables, domains, expressions): super(Astar_GAC, self).__init__() self.cnet = CNET(variables, domains, expressions) self.currentState = self.initializeState(self.cnet) self.gac = GAC(self.currentState) self.Astar = AStar(self) @abstractmethod def createNewState( self, variables, constraints): pass def initializeState(self, cnet): """in initState each variable has its full domain. It will be set as root node initilizes cnet""" s = self.createNewState( cnet.variables, cnet.constraints ) s.update('start') self.startNode = s self.stateCounter = 0 self.nofAssumption = 0 self.nofExpanded = 0 return s def search(self): self.currentState = self.gac.domainFiltering(self.currentState) self.stateCounter += 1 if self.currentState.isSolution(): self.printStatistics(self.currentState) return self.currentState return self.iterateSearch() def iterateSearch(self): prev = self.currentState if prev.isSolution(): return prev self.currentState = self.Astar.iterateAStar() # self.currentState.updateColors() self.stateCounter += 1 self.currentState.parent = prev #used for backtracking to find 'shortest path' for statistics self.nofExpanded = self.Astar.nofExpandedNodes if self.currentState.isSolution(): self.printStatistics(self.currentState) return self.currentState self.currentState = self.gac.domainFiltering(self.currentState) return self.currentState def makeAssumption(self, newVI, parentState): """Generate one successor, and make sure all pointers are correct""" newVertices = {} newVIList = [] for vi in parentState.viList: viID = vi.getID() tmpVI = VI(viID, vi.domain.copy()) newVertices[viID] = tmpVI newVIList.append( tmpVI ) for vi in parentState.viList: viID = vi.getID() for neighbor in vi.neighbors: n = newVertices[ neighbor.getID() ] newVertices[viID].add_neighbor( n ) for vi in newVIList: if vi.getID() == newVI.getID(): newVIList.remove(vi) newVIList.append(newVI) else: for vi_n in vi.neighbors: if vi_n.getID() == newVI.getID(): vi.neighbors.remove(vi_n) vi.neighbors.append(newVI) succ = self.createNewState(newVIList, parentState.constraintList) succ.parent = parentState succ.updateUndecided() # maybe not needed succ.ciList = [] constraints = self.cnet.getConstraints() for v in succ.undecidedVariables: for n in v.neighbors: for c in constraints: succ.ciList.append( CI(c,[v,n]) ) return succ def generateSucc(self, state): """ make a guess. start gussing value for variables with min. domain length""" succStates = [] finishedVIs = [] varsCopy = state.undecidedVariables.copy() if not len(varsCopy): return [] otherVIs = sorted(varsCopy, key=lambda v: len(v.domain), reverse=True) betterVI = otherVIs.pop() if betterVI.domain: initID = betterVI.getID() for d in betterVI.domain: newVI = VI( initID, [d]) newVI.neighbors = betterVI.neighbors.copy() successor = self.makeAssumption(newVI, state) succStates.append( self.gac.rerun(successor) ) return succStates else: return [] # Not complete : TODO def printStatistics(self, state): print ( 'The number of unsatisfied constraints = ', self.countUnsatisfiedConstraints(state), '\n' ) print ( 'The total number of verticies without color assignment = ', self.countColorLess(state), '\n' ) print ( 'The total number of nodes in search tree = ', self.stateCounter, '\n' ) print ( 'The total number of nodes poped from agenda and expanded = ', self.nofExpanded, '\n' ) print ( 'The length of the path = ', self.nofAssumption ,'\n') def countColorLess(self, state): nofColorLess = 0 for vi in state.viList: if len(vi.domain) != 1: nofColorLess += 1 return nofColorLess # def countUnsatisfiedConstraints(self, state): # unsatisfied = 0 # varList = state.viList # for c in state.ciList: # for var in varList: # if var in c.variables: # if self.countInconsistentDomainValues(var, c) or not len(var.domain): # unsatisfied += 1 # return unsatisfied def countUnsatisfiedConstraints(self, state): unsatisfied = 0 varList = state.viList for var in varList: if len(var.domain) != 1: unsatisfied += 1 return unsatisfied # Needed for Graph def getGoal(self): return None def countInconsistentDomainValues(self, x, c): pairs = [] nofInconsistency = 0 for k in c.variables: for value in x.domain: pairs.extend( list(itertools.product([value], k.domain)) ) for p in pairs : if not c.constraint( p[0], p[1] ): nofInconsistency += 1 return nofInconsistency
class Agent: def __init__(self, world_size, args): if args.env_name == 'L2M2019Env': env = L2M2019Env(visualize=False, difficulty=args.difficulty) obs_dim = 99 else: env = gym.make(args.env_name) obs_dim = env.observation_space.shape[0] act_dim = env.action_space.shape[0] self.device = torch.device(args.device) self.args = args self.world_size = world_size self.actor_critic = MLPActorCritic(obs_dim, act_dim, hidden_sizes=args.hidden_sizes).to( self.device) self.replay_buffer = [ ReplayBuffer(obs_dim, act_dim, args.buffer_size) for _ in range(1, world_size) ] self.gac = GAC(self.actor_critic, self.replay_buffer, device=self.device, gamma=args.gamma, alpha_start=args.alpha_start, alpha_min=args.alpha_min, alpha_max=args.alpha_max) self.test_len = 0.0 self.test_ret = 0.0 self.ob_rrefs = [] for ob_rank in range(1, world_size): ob_info = rpc.get_worker_info(OBSERVER_NAME.format(ob_rank)) self.ob_rrefs.append(remote(ob_info, Observer, args=(args, ))) self.agent_rref = RRef(self) def select_action(self, obs, deterministic=False): obs = torch.FloatTensor(obs.reshape(1, -1)).to(self.device) a = self.actor_critic.act(obs, deterministic) return a def add_memory(self, ob_id, o, a, r, o2, d): self.replay_buffer[ob_id - 1].store(o, a, r, o2, d) def run_episode(self, n_steps=0, random=False): futs = [] for ob_rref in self.ob_rrefs: # make async RPC to kick off an episode on all observers futs.append( rpc_async(ob_rref.owner(), _call_method, args=(Observer.run_episode, ob_rref, self.agent_rref, n_steps, random))) # wait until all obervers have finished this episode for fut in futs: fut.wait() def add_test_data(self, ret, length): self.test_ret += ret self.test_len += length def test_episode(self): futs, self.test_ret, self.test_len = [], 0.0, 0.0 for ob_rref in self.ob_rrefs: # make async RPC to kick off an episode on all observers futs.append( rpc_async(ob_rref.owner(), _call_method, args=(Observer.test_episode, ob_rref, self.agent_rref))) # wait until all obervers have finished this episode for fut in futs: fut.wait() self.test_ret /= (self.world_size - 1) self.test_len /= (self.world_size - 1) return self.test_ret, self.test_len def update(self): for _ in range(self.args.steps_per_update): loss_a, loss_c, alpha = self.gac.update(self.args.batch_size) self.gac.update_beta() print( "loss_actor = {:<22}, loss_critic = {:<22}, alpha = {:<20}, beta = {:<20}" .format(loss_a, loss_c, alpha, self.gac.beta))
def __init__(self, graph): tk.Tk.__init__(self) self.graph = Graph(graph[0], graph[1], graph[2], graph[3]) self.gac = GAC(self.graph) self.search = Search(self.gac) self.graph_size = 800.0 self.vertex_size = 10.0 self.ixy, self.x_size, self.y_size = self.getIXY() self.canvas = tk.Canvas(self, width=self.graph_size + 50, height=self.graph_size + 50, borderwidth=0) self.canvas.pack(side="top", fill="both", expand="true") menubar = tk.Menu(self) commandmenu = tk.Menu(menubar) commandmenu.add_command(label="solve", command=self.drawSolution) commandmenu.add_command(label="start animation", command=self.startAnimation) commandmenu.add_command(label="increment", command=self.incrementSolution) commandmenu.add_command(label="reset", command=self.resetGraph) menubar.add_cascade(label="Commands", menu=commandmenu) execmenu = tk.Menu(menubar) execmenu.add_command(label="2 Colors", command=lambda: self.changeColors('2')) execmenu.add_command(label="3 Colors", command=lambda: self.changeColors('3')) execmenu.add_command(label="4 Colors", command=lambda: self.changeColors('4')) execmenu.add_command(label="5 Colors", command=lambda: self.changeColors('5')) execmenu.add_command(label="6 Colors", command=lambda: self.changeColors('6')) execmenu.add_command(label="7 Colors", command=lambda: self.changeColors('7')) execmenu.add_command(label="8 Colors", command=lambda: self.changeColors('8')) execmenu.add_command(label="9 Colors", command=lambda: self.changeColors('9')) execmenu.add_command(label="10 Colors", command=lambda: self.changeColors('10')) menubar.add_cascade(label="Colors", menu=execmenu) mapMenu = tk.Menu(menubar) mapMenu.add_command( label="graph-color-2", command=lambda: self.changeMap('graph-color-2.txt')) mapMenu.add_command( label="rand-50", command=lambda: self.changeMap('rand-50-4-color1.txt')) mapMenu.add_command(label="test", command=lambda: self.changeMap('test.txt')) mapMenu.add_command( label="spiral-500", command=lambda: self.changeMap('spiral-500-4-color1.txt')) mapMenu.add_command( label="graph-color-1", command=lambda: self.changeMap('graph-color-1.txt')) mapMenu.add_command( label="rand-100-6", command=lambda: self.changeMap('rand-100-6-color1.txt')) mapMenu.add_command( label="rand-100-4", command=lambda: self.changeMap('rand-100-4-color1.txt')) menubar.add_cascade(label="Maps", menu=mapMenu) self.config(menu=menubar) self.oval = {} for edge in self.graph.edges: x1 = (self.ixy[edge[0]][1] * (self.graph_size / self.x_size)) + (self.vertex_size / 2) y1 = (self.ixy[edge[0]][2] * (self.graph_size / self.y_size)) + (self.vertex_size / 2) x2 = (self.ixy[edge[1]][1] * (self.graph_size / self.x_size)) + (self.vertex_size / 2) y2 = (self.ixy[edge[1]][2] * (self.graph_size / self.y_size)) + (self.vertex_size / 2) self.canvas.create_line(x1, y1, x2, y2) for vertex in self.ixy: x1 = vertex[1] * (self.graph_size / self.x_size) y1 = vertex[2] * (self.graph_size / self.y_size) x2 = x1 + self.vertex_size y2 = y1 + self.vertex_size self.oval[vertex[1], vertex[2]] = self.canvas.create_oval(x1, y1, x2, y2, outline="black", fill="gray80", tag="oval")
def __init__(self): ''' Initializes the general GAC. ''' GAC.__init__(self)
def main(args): if 'L2M2019Env' in args.env_name: env = L2M2019Env(visualize=False, difficulty=args.difficulty) test_env = L2M2019Env(visualize=False, difficulty=args.difficulty) else: env = gym.make(args.env_name) test_env = gym.make(args.env_name) device = torch.device(args.device) data = np.load('./official_obs_scaler.npz') obs_mean, obs_std = data['mean'], data['std'] # 1.Set some necessary seed. torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) np.random.seed(args.seed) env.seed(args.seed) test_env.seed(args.seed + 999) # 2.Create actor, critic, EnvSampler() and PPO. if 'L2M2019Env' in args.env_name: obs_dim = 99 else: obs_dim = env.observation_space.shape[0] act_dim = env.action_space.shape[0] act_high = env.action_space.high act_low = env.action_space.low actor_critic = MLPActorCritic(obs_dim, act_dim, hidden_sizes=args.hidden_sizes).to(device) replay_buffer = ReplayBuffer(obs_dim, act_dim, args.buffer_size) gac = GAC(actor_critic, replay_buffer, device=device, gamma=args.gamma, alpha_start=args.alpha_start, alpha_min=args.alpha_min, alpha_max=args.alpha_max) def act_encoder(y): # y = [min, max] ==> x = [-1, 1] # if args.env_name == 'L2M2019Env': # return y return (y - act_low) / (act_high - act_low) * 2.0 - 1.0 def act_decoder(x): # x = [-1, 1] ==> y = [min, max] # if args.env_name == 'L2M2019Env': # return np.abs(x) return (x + 1.0) / 2.0 * (act_high - act_low) - act_low def get_observation(env): obs = np.array(env.get_observation()[242:]) obs = (obs - obs_mean) / obs_std state_desc = env.get_state_desc() p_body = [ state_desc['body_pos']['pelvis'][0], -state_desc['body_pos']['pelvis'][2] ] v_body = [ state_desc['body_vel']['pelvis'][0], -state_desc['body_vel']['pelvis'][2] ] v_tgt = env.vtgt.get_vtgt(p_body).T return np.append(obs, v_tgt) def get_reward(env): reward = 10.0 # Reward for not falling down state_desc = env.get_state_desc() p_body = [ state_desc['body_pos']['pelvis'][0], -state_desc['body_pos']['pelvis'][2] ] v_body = [ state_desc['body_vel']['pelvis'][0], -state_desc['body_vel']['pelvis'][2] ] v_tgt = env.vtgt.get_vtgt(p_body).T vel_penalty = np.linalg.norm(v_body - v_tgt) muscle_penalty = 0 for muscle in sorted(state_desc['muscles'].keys()): muscle_penalty += np.square( state_desc['muscles'][muscle]['activation']) ret_r = reward - (vel_penalty * 3 + muscle_penalty * 1) if vel_penalty < 0.3: ret_r += 10 return ret_r # 3.Start training. def get_action(o, deterministic=False): o = torch.FloatTensor(o.reshape(1, -1)).to(device) a = actor_critic.act(o, deterministic) return a def test_agent(): test_ret, test_len = 0, 0 for j in range(args.epoch_per_test): _, d, ep_ret, ep_len = test_env.reset(), False, 0, 0 o = get_observation(test_env) while not (d or (ep_len == args.max_ep_len)): # Take deterministic actions at test time a = get_action(o, True) a = act_decoder(a) for _ in range(args.frame_skip): _, r, d, _ = test_env.step(a) ep_ret += r ep_len += 1 if d: break o = get_observation(test_env) test_ret += ep_ret test_len += ep_len return test_ret / args.epoch_per_test, test_len / args.epoch_per_test total_step = args.total_epoch * args.steps_per_epoch _, d, ep_len = env.reset(), False, 0 o = get_observation(env) for t in range(1, total_step + 1): if t <= args.start_steps: a = act_encoder(env.action_space.sample()) else: a = get_action(o, deterministic=False) a = act_decoder(a) r = 0.0 for _ in range(args.frame_skip): _, _, d, _ = env.step(a) r += get_reward(env) ep_len += 1 if d: break o2 = get_observation(env) # Ignore the "done" signal if it comes from hitting the time # horizon (that is, when it's an artificial terminal signal # that isn't based on the agent's state) d = False if ep_len == args.max_ep_len else d # if not d: # new_o, new_r, new_o2 = generate_success(o, o2) # replay_buffer.store(new_o, a, new_r * args.reward_scale, new_o2, d) # Store experience to replay buffer replay_buffer.store(o, a, r * args.reward_scale, o2, d) o = o2 if d or (ep_len == args.max_ep_len): _, ep_len = env.reset(obs_as_dict=False), 0 o = get_observation(env) if t >= args.update_after and t % args.steps_per_update == 0: for _ in range(args.steps_per_update): loss_a, loss_c, alpha = gac.update(args.batch_size) gac.update_beta() print( "loss_actor = {:<22}, loss_critic = {:<22}, alpha = {:<20}, beta = {:<20}" .format(loss_a, loss_c, alpha, gac.beta)) # End of epoch handling if t >= args.update_after and t % args.steps_per_epoch == 0: test_ret, test_len = test_agent() print("Step {:>10}: test_ret = {:<20}, test_len = {:<20}".format( t, test_ret, test_len)) print( "-----------------------------------------------------------") yield t, test_ret, test_len, actor_critic