def set_heat(self, f): x = utils.vector(4) u = utils.vector(2) func = th.function([], f(0, x, u)) def val(p): x.set_value(np.asarray([p[0], p[1], 0., 0.])) return func() self.heat = val
def reset(self): self.num_iters = 0 self.players_head = [ vector(2 * i, 0) for i in range(self.num_players) ] ### TODO: player init self.players_body = [{self.players_head[i]} for i in range(self.num_players)] self.players_dir = [vector(0, 1) for i in range(self.num_players)] self.observation = np.zeros(self.board_shape, dtype=np.int16) self.update_observation()
def __init__(self, T, dyn): self.dyn = dyn self.T = T self.x0 = utils.vector(dyn.nx) self.u = [utils.vector(dyn.nu) for t in range(self.T)] self.x = [] z = self.x0 for t in range(T): z = dyn(z, self.u[t]) self.x.append(z) self.next_x = th.function([], self.x[0])
def __init__(self, T, dyn): self.dyn = dyn # dynamiken for systemet self.T = T # hur manga steg fram den ska kolla self.x0 = utils.vector(dyn.nx) # state vektorn self.u = [utils.vector(dyn.nu) for t in range(self.T)] # matris for de nastkommande T stegen self.x = [] # vektor for alla states z = self.x0 for t in range(T): # hitta dynamiken for alla tidsteg den ska plannera for z = dyn(z, self.u[t]) self.x.append(z) self.next_x = th.function([], self.x[0]) # konverterar grafen av states till en callable object
def dir4_to_a(self, dirr): assert (self.n == 4) a = None if dirr == vector(1, 0): a = 0 elif dirr == vector(0, -1): a = 1 elif dirr == vector(-1, 0): a = 2 elif dirr == vector(0, 1): a = 3 return int(a)
def a_to_4dir(self, a): assert (self.n == 4) v = None if a == 0: v = vector(1, 0) elif a == 1: v = vector(0, -1) elif a == 2: v = vector(-1, 0) elif a == 3: v = vector(0, 1) return v
def constrain(self, other_item, center): AB = utils.vector(self.point, other_item.line.point) n = self.normal if utils.dot(utils.vector(self.point, center), n) < 0: n = (-n[0], -n[1]) v_n = utils.dot(other_item.line.direction, n) AB_n = utils.dot(AB, n) if v_n > 0: other_item.k.set_min(-AB_n / v_n) if v_n < 0: other_item.k.set_max(-AB_n / v_n)
def __init__(self, Tu, dyn, step_per_u=2): self.dyn = dyn # dynamiken for systemet self.Tu = Tu self.step_per_u = step_per_u self.Tx = step_per_u*Tu #self.Tx = Tx # hur manga steg fram den ska kolla self.x0 = utils.vector(dyn.nx) # state vektorn self.u = [utils.vector(dyn.nu) for t in range(self.Tu)] # matris for de nastkommande T stegen self.x = [] # vektor for alla states z = self.x0 for idx in range(Tu): # hitta dynamiken for alla tidsteg den ska plannera for for idx_u in range(step_per_u): z = dyn(z, self.u[idx]) self.x.append(z) self.next_x = th.function([], self.x[0]) # konverterar grafen av states till en callable object
def move(): "Move pacman and all ghosts." writer.undo() writer.write(state['score']) clear() if valid(pacman + aim): pacman.move(aim) index = offset(pacman) if tiles[index] == 1: tiles[index] = 2 state['score'] += 1 x = (index % 20) * 20 - 200 y = 180 - (index // 20) * 20 square(x, y) up() goto(pacman.x + 10, pacman.y + 10) dot(20, 'yellow') for point, course in ghosts: if valid(point + course): point.move(course) else: options = [ vector(5, 0), vector(-5, 0), vector(0, 5), vector(0, -5), ] plan = choice(options) course.x = plan.x course.y = plan.y up() goto(point.x + 10, point.y + 10) dot(20, 'red') update() for point, course in ghosts: if abs(pacman - point) < 20: return ontimer(move, 100)
def move(): "Update object positions." bird.y -= 5 for ball in balls: ball.x -= 3 if randrange(10) == 0: y = randrange(-199, 199) ball = vector(199, y) balls.append(ball) while len(balls) > 0 and not inside(balls[0]): balls.pop(0) if not inside(bird): draw(False) return for ball in balls: if abs(ball - bird) < 15: draw(False) return draw(True) ontimer(move, 50)
def add_item(self, item): u = utils.vector(self.point, item.point) direction = utils.orthogonal(u) center = (.5 * (self.point[0] + item.point[0]), .5 * (self.point[1] + item.point[1])) line = shapes.InfiniteLine(center, direction) new_item = OtherItem(item, line) keep_new_item = True others_to_remove = set() for other in self.others: other.line.constrain(new_item, self.point) new_item.line.constrain(other, self.point) if not other.is_valid(): others_to_remove |= {other} if not new_item.is_valid(): keep_new_item = False break self.others -= others_to_remove if keep_new_item: self.others |= {new_item} return new_item return None
def __init__(self, id, tam_agente, cor=None): # add uma identificação única pro agente self._id = id self._tam_agente = tam_agente # add uma tartaruga específica pro agente self._turtle = Turtle() self._turtle.hideturtle() # define a cor do agente self._cor = cor # REQ # deve definir a cor do agente aleatoriamente (verde, vermelho, rosa, laranja e marrom) # se não for passado no construtor # é um gerador de percursos self._waze = None # add os seguintes comandos # TODO: Conferir estas direções # vector(1, 0) => direita # vector(-1, 0) => esquerda # vector(0, 1) => cima # vector(0, -1) => baixo self.direcao = vector(1, 0) # este vector significa direita
def prox_passo(self): """ Obtém o próximo passo do agente na direção em que se encontra """ dir_x = self.direcao[0] * self.tam_passo dir_y = self.direcao[1] * self.tam_passo passo = vector(dir_x, dir_y) return passo
def run_irl(world, car, reward, theta, data): def gen(): for point in data: for c, x0, u in zip(world.cars, point['x0'], point['u']): c.traj.x0.set_value(x0) for cu, uu in zip(c.traj.u, u): cu.set_value(uu) yield r = car.traj.reward(reward) g = utils.grad(r, car.traj.u) H = utils.hessian(r, car.traj.u) I = tt.eye(utils.shape(H)[0]) reg = utils.vector(1) reg.set_value([1e-1]) H = H - reg[0] * I L = tt.dot(g, tt.dot(tn.MatrixInverse()(H), g)) + tt.log(tn.Det()(-H)) for _ in gen(): pass optimizer = utils.Maximizer(L, [theta], gen=gen, method='gd', eps=0.1, debug=True, iters=1000, inf_ignore=10) optimizer.maximize() print theta.get_value()
def item_that_contains(self, point): for point_item in self.point_items: if not point_item.is_bounded(): continue skip = False for other in point_item.others: edge_normal = utils.orthogonal(other.line.direction) A, B = other.vertices() AO = utils.vector(A, point_item.point) AP = utils.vector(A, point) if utils.dot(AO, edge_normal) * utils.dot(AP, edge_normal) < 0: skip = True break if not skip: return point_item raise Exception("PointSet.item_that_contains: not found")
def __init__(self, *args, **vargs): SimpleOptimizerCar.__init__(self, *args, **vargs) self.social_u = utils.vector(2) self.l = utils.scalar() self.watching = [] self.copyx = None self.copyu = None self.l_default = 3
def mudar_direcao_aleatoriamente(self): """ Escolhe alguma direção aleatoriamente que não seja a atual """ # REQ implementar o método """ k1 = [ vector(1, 0), # este vector significa direita vector(-1, 0), # esquerda vector(0, 1), # cima vector(0, -1), # baixo ] teste=np.random.choice(k1) print(teste) """ passo = [vector(1, 0), vector(-1, 0), vector(0, 1), vector(0, -1)] random = choice(passo) return random
def move(LatLon): global old_pos, init, old_bearing, mesh, bearings, tractor utm_pos = Coordinate(x=LatLon.x, y=LatLon.y) if init is False: tractor.m_p_s = 2 tractor.run() init = True old_pos = utm_pos scene.camera.pos = vec(utm_pos.x, 4, utm_pos.y - 50) scene.camera.axis = vec(0, -4, -50) obj = [] for x in range(-50, 50): for y in range(-50, 50): a = Coordinate(x=utm_pos.x + x * 500, y=utm_pos.y + y * 500 + 500) b = Coordinate(x=utm_pos.x + x * 500 + 500, y=utm_pos.y + y * 500 + 500) c = Coordinate(x=utm_pos.x + x * 500 + 500, y=utm_pos.y + y * 500) d = Coordinate(x=utm_pos.x + x * 500, y=utm_pos.y + y * 500) Q = quad( canvas=None, v0=vertex(pos=vec(a.x, -1, a.y)), v1=vertex(pos=vec(b.x, -1, b.y)), v2=vertex(pos=vec(c.x, -1, c.y)), v3=vertex(pos=vec(d.x, -1, d.y)), ) obj.append(Q) compound(obj, canvas=scene, color=vec(0, .60, 0)) else: curso = utils.bearing(utm_pos, old_pos) a = utils.offset(utm_pos, curso - 90, distance / 2) b = utils.offset(utm_pos, curso + 90, distance / 2) c = utils.offset(old_pos, old_bearing + 90, distance / 2) d = utils.offset(old_pos, old_bearing - 90, distance / 2) Q = quad( v0=vertex(pos=vec(a.x, .5, a.y), color=color.yellow), v1=vertex(pos=vec(b.x, .5, b.y), color=color.yellow), v2=vertex(pos=vec(c.x, .5, c.y), color=color.yellow), v3=vertex(pos=vec(d.x, .5, d.y), color=color.yellow), ) qobj.append(Q) left.append(vec(a.x, .5, a.y)) right.append(vec(b.x, .5, b.y)) old_bearing = curso old_pos = utm_pos bearings.pop() bearings.insert(0, curso) curso = sum(bearings) / len(bearings) camera = utils.offset(utm_pos, curso, 300) x, z = utils.vector(curso + 180) #scene.camera.pos=vec(camera.x, 100, camera.y) #scene.camera.axis=200*vec(x, -0.3 , z) tractor_3d.pos = tractor.get_vec(1.5)
def hard_coded_policy(ob, head, a, board_shape, A_space, eps=0.5): """ head = np.array [y, x] """ def valid(pos): if pos.y >= board_shape[0] or pos.y < 0: return False if pos.x >= board_shape[1] or pos.x < 0: return False if ob[pos.y, pos.x] != 0: return False return True head = vector(head[1], head[0]) forward = head + A_space.a_to_4dir(a) sample = np.random.random() if valid(forward) and sample > eps: selected = forward else: possible = [] right = head + A_space.a_to_4dir(0) down = head + A_space.a_to_4dir(1) left = head + A_space.a_to_4dir(2) up = head + A_space.a_to_4dir(3) if valid(up): possible.append(up) if valid(down): possible.append(down) if valid(left): possible.append(left) if valid(right): possible.append(right) possible = np.array(possible) try: selected = possible[ np.random.randint(0, high=possible.shape[0]), :] except ValueError: selected = forward return A_space.dir4_to_a(vector(selected[0], selected[1]) - head)
def _k_intersect(self, other): u2 = utils.dot(self.direction, self.direction) v2 = utils.dot(other.direction, other.direction) uv = utils.dot(self.direction, other.direction) AC = utils.vector(self.point, other.point) AC_u = utils.dot(AC, self.direction) AC_v = utils.dot(AC, other.direction) lower = u2 * v2 - uv * uv if lower == 0: return None return (AC_u * v2 - AC_v * uv) / lower
def run_irl(world, car, reward, theta, data): def gen(): for point in data: for c, x0, u in zip(world.cars, point['x0'], point['u']): c.traj.x0.set_value(x0) for cu, uu in zip(c.traj.u, u): cu.set_value(uu) yield r = car.traj.reward(reward) g = utils.grad(r, car.traj.u) H = utils.hessian(r, car.traj.u) I = tt.eye(utils.shape(H)[0]) reg = utils.vector(1) reg.set_value([1e-1]) H = H-reg[0]*I L = tt.dot(g, tt.dot(tn.MatrixInverse()(H), g))+tt.log(tn.Det()(-H)) for _ in gen(): pass optimizer = utils.Maximizer(L, [theta], gen=gen, method='gd', eps=0.1, debug=True, iters=1000, inf_ignore=10) optimizer.maximize() print theta.get_value()
class Properties: width = 600 height = 600 time = 0 last_clear_time = 0 running = True bullets = [] enemies = [] timeline = [] me = vector(300, 500) meObj = pygame.Surface((10, 10)) meObj.fill(( 255, 255, 255, )) pygame.draw.rect(meObj, ( 0, 255, 0, ), (0, 0, 10, 10), 10)
def init_board(self): ob = np.zeros(self.board_shape, dtype=np.int16) head_board = np.zeros(self.board_shape, dtype=np.int16) snakes = [] mid_height = int(self.board_shape[0] / 2) for i in range(self.num_players): # init each snake with length init_len x = int((i + 1) * self.board_shape[1] / (self.num_players + 1)) init_vecs = [ vector(x, y) for y in range(mid_height - self.init_len + 1, mid_height + 1) ] snakes.append(deque(init_vecs)) for vec in init_vecs: ob[vec.y, vec.x] = i + 1 head_board[init_vecs[-1].y, init_vecs[-1].x] = i + 1 return ob, head_board, snakes
def get_moves_old(game_map, turns, pid, training=False, graph=False): w = game_map.width h = game_map.height me = game_map.get_me() out = np.zeros((w, h)) graph_o, all_axes, F, Z = get_gx(game_map, training) gradU, gradV = np.gradient(Z, axis=(0, 1)) # gradient of func for idx, ship in enumerate(me.all_ships()): sx, sy = int(ship.x), int(ship.y) sv = vector(ship) # vector # Distance/Magnitude/Norm/Length = np.sqrt(x**2+y**2) = np.sqrt([x,y].dot([x,y]) dx, dy = gradU[sx][sy], gradV[sx][sy] # unit vector of grad @ sx,sy gm = F.norm(dx, dy) u, v = -gm * dx, -gm * dy angle = degrees(np.arctan2(v, u)) % 360 out[sx][sy] = angle is_last_ship = idx == len(me.all_ships()) - 1 is_my_pid = pid == 0 if is_last_ship and is_my_pid and graph: plotter(Z, sv, graph_o, turns, pid, w, h) graph_o.clear() return out
def get_moves(game_map, turns, pid, training=False, graph=False): w = game_map.width h = game_map.height me = game_map.get_me() ships = me.all_ships() out = {} if graph: grad_u, grad_v, graph_objs, Func, gridZ = get_gradient( ships, game_map, graph) else: grad_u, grad_v = get_gradient(ships, game_map, graph) for idx, ship in enumerate(ships): sx, sy = int(ship.x), int(ship.y) # Distance/Magnitude/Norm/Length = np.sqrt(x**2+y**2) = np.sqrt([x,y].dot([x,y]) sv = vector(ship) u, v = grad_u[idx], grad_v[idx] angle = degrees(np.arctan2(v, u)) % 360 out[sx, sy] = angle is_last_ship = idx == len(ships) - 1 is_my_pid = pid == 0 if graph and is_last_ship and is_my_pid: plotter(gridZ, sv, graph_objs, turns, pid, w, h) graph_objs.clear() return out
def __init__(self, T, dyn, x0=None): self.x = [vector(4) if x0 is None else x0] self.u = [vector(2) for _ in range(T)] self.dyn = dyn for t in range(T): self.x.append(dyn(self.x[t], self.u[t]))
else: the_car = None for c in the_world.cars: if isinstance(c, car.UserControlledCar): the_car = c T = the_car.traj.T train = [] for fname in files: with open(fname) as f: us, xs = pickle.load(f) for t in range(T, len(xs[0])-T, T): point = { 'x0': [xseq[t-1] for xseq in xs], 'u': [useq[t:t+T] for useq in us] } train.append(point) theta = utils.vector(5) theta.set_value(np.array([1., -50., 10., 10., -60.])) r = 0.1*feature.control() for lane in the_world.lanes: r = r + theta[0]*lane.gaussian() for fence in the_world.fences: r = r + theta[1]*lane.gaussian() for road in the_world.roads: r = r + theta[2]*road.gaussian(10.) r = r + theta[3]*feature.speed(1.) for car in the_world.cars: if car!=the_car: r = r + theta[4]*car.traj.gaussian() run_irl(the_world, the_car, r, theta, train)
the_car = None for c in the_world.cars: if isinstance(c, car.UserControlledCar): the_car = c T = the_car.traj.T train = [] for fname in files: with open(fname) as f: us, xs = pickle.load(f) for t in range(T, len(xs[0]) - T, T): point = { 'x0': [xseq[t - 1] for xseq in xs], 'u': [useq[t:t + T] for useq in us] } train.append(point) theta = utils.vector(5) theta.set_value(np.array([1., -50., 10., 10., -60.])) r = 0.1 * feature.control() #features, thetas are weights for lane in the_world.lanes: r = r + theta[0] * lane.gaussian() for fence in the_world.fences: r = r + theta[1] * lane.gaussian() for road in the_world.roads: r = r + theta[2] * road.gaussian(10.) r = r + theta[3] * feature.speed(1.) for car in the_world.cars: if car != the_car: r = r + theta[4] * car.traj.gaussian() run_irl(the_world, the_car, r, theta, train)
def __init__( self, dom: domain.Domain, num_queries: int, query_length: int, num_expectation_samples: int, include_previous_query: bool, generate_scenario: bool, objective_fn: ObjectiveFunctionType, beta_pref: float, ) -> None: assert num_queries >= 1, \ "QueryGenerator.__init__: num_queries must be at least 1" assert query_length >= 1, \ "QueryGenerator.__init__: query_length must be at least 1" assert num_expectation_samples >= 1, \ "QueryGenerator.__init__: num_expectation_samples must be \ at least 1" self.domain = dom self.num_queries = num_queries self.query_length = query_length self.num_expectation_samples = num_expectation_samples self.include_previous_query = include_previous_query self.generate_scenario = generate_scenario self.objective_fn = objective_fn self.beta_pref = beta_pref # Variable to store the built computation graph. Set in self.optimizer. self._optimizer = None # List of variables to optimize. self._variables: typing.List[tt.TensorVariable] = [] # List of bounds for variables. self._bounds: typing.Dict[tt.TensorVariable, domain.BoundsType] = {} self.num_generated_queries = self.num_queries if self.include_previous_query: self.num_generated_queries = self.num_queries - 1 # xs[<query>][<time>][<agent>] self.xs: typing.List[typing.List[typing.List[tt.TensorVariable]]] = [] # us[<query>][<time>][<agent>] self.us: typing.List[typing.List[typing.List[tt.TensorVariable]]] = [] if self.include_previous_query: # previous_x0s[<agent>] self.previous_x0s: typing.List[tt.TensorVariable] = \ [utils.vector(self.domain.state_size, name="previous_x0s[%d]" % (i)) for i in range(self.domain.num_agents)] # previous_us[<time>][<agent>] self.previous_us: typing.List[typing.List[tt.TensorVariable]] = \ [[utils.vector(self.domain.control_size, name="previous_us[%d][%d]" % (t, i)) for i in range(self.domain.num_agents)] for t in range(self.query_length)] # previous_xs[<time>][<agent>] self.previous_xs: typing.List[tt.TensorVariable] = \ [self.previous_x0s] for t in range(1, self.query_length): xs = self.previous_xs[t - 1] us = self.previous_us[t - 1] f = self.domain.dynamics_function self.previous_xs.append( [f(xs[i], us[i]) for i in range(self.domain.num_agents)]) self.us.append(self.previous_us) self.xs.append(self.previous_xs) # x0s[<agent>] self.x0s = [ utils.vector(self.domain.state_size, name="x0s[%d]" % (i)) for i in range(self.domain.num_agents) ] # other_us[<time>][<agent>] self.other_us = [[ utils.vector(self.domain.control_size, name="other_us[t=%d][agent=%d]" % (t, i)) for i in range(self.domain.num_others) ] for t in range(self.query_length)] # query_us[<query>][<time>] self.query_us = [[ utils.vector(self.domain.control_size, name="query_us[query=%d][t=%d]" % (i, t)) for t in range(self.query_length) ] for i in range(self.num_generated_queries)] if self.generate_scenario: for i in range(self.domain.num_agents): v = self.x0s[i] self._variables.append(v) self._bounds[v] = self.domain.state_bounds for t in range(self.query_length): for i in range(self.domain.num_others): v = self.other_us[t][i] self._variables.append(v) self._bounds[v] = self.domain.control_bounds for i in range(self.num_generated_queries): for t in range(self.query_length): v = self.query_us[i][t] self._variables.append(v) self._bounds[v] = self.domain.control_bounds for i in range(self.num_generated_queries): # merged_us[time][agent] merged_us = [] for t in range(self.query_length): us_t = [self.query_us[i][t]] for j in range(self.domain.num_others): us_t.append(self.other_us[t][j]) merged_us.append(us_t) self.us.append(merged_us) query_xs = [self.x0s] for t in range(1, self.query_length): xs = query_xs[t - 1] us = merged_us[t - 1] f = self.domain.dynamics_function query_xs.append( [f(xs[i], us[i]) for i in range(self.domain.num_agents)]) self.xs.append(query_xs) # The features summed over the trajectory. self.traj_features_list = [ sum_trajectory_features( self.domain, self.query_length, [self.xs[i][t][0] for t in range(self.query_length)], [self.xs[i][t][1:] for t in range(self.query_length)]) for i in range(self.num_queries) ] # traj_features is dimension num_queries by num_features self.traj_features = tt.stack(self.traj_features_list) # The samples of the weight vector, used to approximate # the expectation in our objective. self.w_samples = utils.matrix(self.num_expectation_samples, self.domain.feature_size, name="w_samples") self._objective = self.objective_fn(self.num_queries, self.num_expectation_samples, self.w_samples, self.traj_features, self.beta_pref) print("Compiling Optimizer") self.optimizer() print("Finished Compiling Optimizer")
"""Flappy, game inspired by Flappy Bird. Exercises 1. Keep score. 2. Vary the speed. 3. Vary the size of the balls. 4. Allow the bird to move forward and back. """ from random import * from turtle import * from utils import vector bird = vector(0, 0) balls = [] def tap(x, y): "Move bird up in response to screen tap." up = vector(0, 30) bird.move(up) def inside(point): "Return True if point on screen." return -200 < point.x < 200 and -200 < point.y < 200 def draw(alive):
def tap(x, y): "Move bird up in response to screen tap." up = vector(0, 30) bird.move(up)
opts = dict(optlist) #db = shelve.open(args[1] if args[1].endswith('.db') else args[1]+'.db', writeback=True) N = int(opts.get('-n', 200)) S = int(opts.get('-s', 2000)) P = float(opts.get('-p', 1.)) method = int(opts.get('-m', 0)) if method == 4: world.avg_case = True phis = [] '''db['W'] = W db['N'] = N db['S'] = S db['P'] = P db['method'] = method''' if method == 2: f = vector(len(W)) phi = f / tt.maximum(1., f.norm(2)) A = matrix(0, len(W)) y = tt.dot(A, phi) p = tt.sum(tt.switch(y < 0, 1., 0.)) q = tt.sum(tt.switch(y > 0, 1., 0.)) #obj = tt.minimum(tt.sum(1.-tt.exp(-tn.relu(y))), tt.sum(1.-tt.exp(-tn.relu(-y)))) obj = p * tt.sum(1. - tt.exp(-tn.relu(y))) + q * tt.sum( 1. - tt.exp(-tn.relu(-y))) optimizer = Maximizer(obj, [f]) if method == 5: cand_phis = [] for i in range(50): x = np.random.normal(size=len(W)) cand_phis.append(x / np.linalg.norm(x)) if method == 6: