def test_mul_inplace(self): finalVector = Vector(2, 2) finalVector *= Vector(3, 3) self.assertEqual(finalVector.x, 6) self.assertEqual(finalVector.y, 6)
def move(self): """ Adds behaviour to the ball object in the game by changing its position. """ self.pos = Vector(*self.velocity) + self.pos
def turn_right(obj): game.snake1.velocity = Vector(*game.snake1.velocity).rotate(270)
def mov(self): self.pos = Vector(*self.v)+self.pos
def can_eat(self, food): return (Vector(food.pos) - self.pos).length() <= self.radius + food.radius
def serve_car(self): self.car.center = self.center self.car.velocity = Vector(6, 0)
def serve_ball(self): self.ball.center = self.center self.ball.velocity = Vector(4, 0).rotate(randint(0, 360))
def step(self, action, last_distance): print("Step") global goal_x global goal_y global done self.car.move(action) xx = goal_x - self.car.x #goal - car location yy = goal_y - self.car.y done = False obsv = self.calc_obs(xx, yy) print(last_distance) # using 2 point distance formula distance = np.sqrt((self.car.x - goal_x) ** 2 + (self.car.y - goal_y) ** 2) print("distance" + str(distance)) # if value of car that particular pixel in sand array > 0 i.e means its on sand if sand[int(self.car.x), int(self.car.y)] > 0: self.car.velocity = Vector(0.5, 0).rotate(self.car.angle) # reducethe vel # print log print(1, goal_x, goal_y, distance, int(self.car.x), int(self.car.y), im.read_pixel(int(self.car.x), int(self.car.y))) # penalize for moving on sand reward = -1 else: # otherwise on the road self.car.velocity = Vector(2, 0).rotate(self.car.angle)# increase vel slightly # living penality reward = -0.2 print(0, goal_x, goal_y, distance, int(self.car.x), int(self.car.y), im.read_pixel(int(self.car.x), int(self.car.y))) # new distance in the right direction, then give reward positive value if distance < last_distance: reward = 0.2 # reward for going towards goal # else: # last_reward = last_reward +(-0.2) # not going near wall area # Adding done condition and negative reward for going near borders if self.car.x < 5: # 5 pixels from the wall self.car.x = 5 reward = -1 done = True if self.car.x > self.width - 5: self.car.x = self.width - 5 reward = -1 done = True # make done true as bad episode if self.car.y < 5: self.car.y = 5 reward = -1 done = True if self.car.y > self.height - 5: self.car.y = self.height - 5 reward = -1 done = True # if within 25 pixels of the destination, then give reward if distance < 25: reward =25 done =True # end if go near destination return obsv, reward, done, distance
def move(self, rotation): print("Inside Move......") self.pos = Vector(*self.velocity) + self.pos print("Rotation is", rotation) self.rotation = rotation self.angle = self.angle + self.rotation
def test_rmul_scalar(self): finalVector = 3 * Vector(2, 2) self.assertEqual(finalVector.x, 6) self.assertEqual(finalVector.y, 6)
def move(self, rotation): # move car and sensors print("car is moving") self.pos = Vector(*self.velocity) + self.pos # storing car position, updating car position after every move on the map print(rotation,type(rotation)) self.rotation = rotation self.angle = self.angle + self.rotation
def test_rmul_list(self): finalVector = (3, 3) * Vector(2, 2) self.assertEqual(finalVector.x, 6) self.assertEqual(finalVector.y, 6)
def test_mul_scalar(self): finalVector = Vector(2, 2) * 3 self.assertEqual(finalVector.x, 6) self.assertEqual(finalVector.y, 6)
def test_mul_inplace_scalar(self): finalVector = Vector(2, 2) finalVector *= 3 self.assertEqual(finalVector.x, 6) self.assertEqual(finalVector.y, 6)
def get_xy_from_latlon(self, lat, lon): '''Return x/y location from latitude/longitude''' (x, y) = latlon_to_unit(lat, lon) # FIXME: grok + document return Vector(x + 1, y + 1) * (TILE_W, TILE_H)
def moveandtrain(self, dt): #global brain global last_reward global scores global last_distance global goal_x global goal_y global longueur global largeur global swap global random_itr longueur = self.width largeur = self.height print("Width", longueur) print("Height", largeur) if first_update: init() print("Calling Self Update................", random_itr) xx = goal_x - self.car.x yy = goal_y - self.car.y print("XX", xx) print("YY", yy) print("Self X", self.car.x) print("Self Y", self.car.y) orientation = Vector(*self.car.velocity).angle((xx, yy)) / 180. print("Orientation", orientation) #last_signal = [self.car.signal1, self.car.signal2, self.car.signal3, orientation, -orientation] #last_signal = [self.car.signal, orientation, -orientation] #action = policy.update(last_reward, last_signal) #print("LastReward",last_reward) #scores.append(brain.score()) #rotation = action2rotation[action] #for i in range(10000): rotation = random.randint(-5, 5) print("Rotation", rotation) #obs = np.array(sand[int(self.car.x)+40:int(self.car.y)+40]) obs = sand[int(self.car.x) - 20:int(self.car.x) + 20, int(self.car.y) - 20:int(self.car.y) + 20] #print("Obs shape",obs.shape) self.car.move(rotation) #new_obs = np.array(sand[int(self.car.x)+40:int(self.car.y)+40]) new_obs = sand[int(self.car.x) - 20:int(self.car.x) + 20, int(self.car.y) - 20:int(self.car.y) + 20] distance = np.sqrt((self.car.x - goal_x)**2 + (self.car.y - goal_y)**2) #self.ball1.pos = self.car.sensor1 #self.ball2.pos = self.car.sensor2 #self.ball3.pos = self.car.sensor3 if sand[int(self.car.x), int(self.car.y)] > 0: self.car.velocity = Vector(0.5, 0).rotate(self.car.angle) print(1, goal_x, goal_y, distance, int(self.car.x), int(self.car.y), im.read_pixel(int(self.car.x), int(self.car.y))) last_reward = -1 else: # otherwise self.car.velocity = Vector(2, 0).rotate(self.car.angle) last_reward = -0.2 print(0, goal_x, goal_y, distance, int(self.car.x), int(self.car.y), im.read_pixel(int(self.car.x), int(self.car.y))) if distance < last_distance: last_reward = 0.1 # else: # last_reward = last_reward +(-0.2) if self.car.x < 5: self.car.x = 5 last_reward = -1 if self.car.x > self.width - 5: self.car.x = self.width - 5 last_reward = -1 if self.car.y < 5: self.car.y = 5 last_reward = -1 if self.car.y > self.height - 5: self.car.y = self.height - 5 last_reward = -1 if distance < 25: if swap == 1: goal_x = 1420 goal_y = 622 swap = 0 else: goal_x = 9 goal_y = 85 swap = 1 last_distance = distance replay_buffer.add((obs, new_obs, rotation, last_reward, 0)) random_itr = random_itr + 1 if random_itr == 10000: policy.train(replay_buffer, 1000)
def update(self, dt): global dqn_rotation global dqn_velocity global last_reward global last_onsand global last_distance global goal_x global goal_y global RIGHT global TOP global GOAL global start global reached RIGHT = self.width TOP = self.height self.ball1.pos = self.car.sensor1 self.ball2.pos = self.car.sensor2 self.ball3.pos = self.car.sensor3 if first_update: init() if GOAL == 'none': return xx = goal_x - self.car.x yy = goal_y - self.car.y orientation = (Vector(*self.car.velocity).angle((xx, yy)) + last_onsand) / 180. # The car uses 4 signals: 3 from three sensors (balls) + 1 from current car status last_signal = [self.car.signal1, self.car.signal2, self.car.signal3, orientation] # Determining actions based on reward and signals (states) action_rotation = dqn_rotation.update(last_reward, last_signal) rotation = action2rotation[action_rotation] action_velocity = dqn_velocity.update(last_reward, last_signal) velocity = action2velocity[action_velocity] # Taking actions self.car.move(rotation) self.car.velocity = Vector(velocity, 0).rotate(self.car.angle) ################################################################################## # Exercise 5 # # Design alternative reward scheme # # How is the car movement differnet? # ################################################################################## reward = 0 # Reward if the car is approaching the goal distance = np.sqrt((self.car.x - goal_x) ** 2 + (self.car.y - goal_y) ** 2) reward += (last_distance - distance) / 10 # Penalize if the car rotates (we prefer to keep going unless it is necessary) if action_rotation == 1 or action_rotation == 2: reward += -0.1 if action_rotation == 3 or action_rotation == 4: reward += -0.2 # Penalize if the car hits obstacles (sands) # On the sands, the car may move quite randomly if sand[int(self.car.x), int(self.car.y)] > 0: reward += -10 random_rotation = random.randint(-100, 100) random_velocity = random.randint(1, velocity) self.car.move(random_rotation) self.car.velocity = Vector(random_velocity, 0).rotate(self.car.angle) last_onsand = random_rotation else: last_onsand = 0 # Penalize if the car hits obstacles (boundary) if self.car.x < BOUNDARY: self.car.x = BOUNDARY reward += -1 if self.car.x > self.width - BOUNDARY: self.car.x = self.width - BOUNDARY reward += -1 if self.car.y < BOUNDARY: self.car.y = BOUNDARY reward += -1 if self.car.y > self.height - BOUNDARY: self.car.y = self.height - BOUNDARY reward += -1 # Reaching the destination! if distance < 100: if reached == 1: pass elif GOAL == 'airport': now = time.time() reached = 1 print('Reach Airport!') print('It takes %i seconds.' % (now-start)) elif GOAL == 'downtown': now = time.time() reached = 1 print('Reach Downtown!') print('It takes %i seconds.'% (now-start)) elif GOAL == 'home': now = time.time() reached = 1 print('Reach Home!') print('It takes %i seconds.'% (now-start)) last_distance = distance
def serve_car(self): # starting the car when we launch the application self.car.center = self.center # the car will start at the center of the map self.car.velocity = Vector( 6, 0 ) # the car will start to go horizontally to the right with a speed of 6
def update(self, dt): #action to take is decided by this function global brain global last_reward global scores global last_distance global goal_x global goal_y global longueur global largeur longueur = self.width largeur = self.height if first_update: init() xx = goal_x - self.car.x yy = goal_y - self.car.y orientation = Vector(*self.car.velocity).angle((xx, yy)) / 180. last_signal = [ self.car.signal1, self.car.signal2, self.car.signal3, orientation, -orientation ] # - ensures explore in both directions #action is output of NN. last_reward obtained, last_signal of all 3 sensors + orientation wrt goal #brain is an instance of Dqn class action = brain.update(last_reward, last_signal) #action to play decided here scores.append(brain.score()) rotation = action2rotation[action] self.car.move(rotation) distance = np.sqrt((self.car.x - goal_x)**2 + (self.car.y - goal_y)**2) self.ball1.pos = self.car.sensor1 self.ball2.pos = self.car.sensor2 self.ball3.pos = self.car.sensor3 if sand[int(self.car.x), int(self.car.y)] > 0: #SLOW DOWN ON SAND self.car.velocity = Vector(1, 0).rotate(self.car.angle) last_reward = -1 else: # otherwise #USUAL SPEED self.car.velocity = Vector(6, 0).rotate(self.car.angle) last_reward = -0.1 if distance < last_distance: #slightly positive as approaching goal last_reward = 0.5 #very close to edge (left, right, bottom, top) if self.car.x < 10: self.car.x = 10 last_reward = -1 if self.car.x > self.width - 10: self.car.x = self.width - 10 last_reward = -1 if self.car.y < 10: self.car.y = 10 last_reward = -1 if self.car.y > self.height - 10: self.car.y = self.height - 10 last_reward = -1 #update goal position, when goal is reached if distance < 100: goal_x = self.width - goal_x goal_y = self.height - goal_y last_distance = distance
def update( self, dt ): # the big update function that updates everything that needs to be updated at each discrete time t when reaching a new state (getting new signals from the sensors) global brain # specifying the global variables (the brain of the car, that is our AI) global last_reward # specifying the global variables (the last reward) global scores # specifying the global variables (the means of the rewards) global last_distance # specifying the global variables (the last distance from the car to the goal) global goal_x # specifying the global variables (x-coordinate of the goal) global goal_y # specifying the global variables (y-coordinate of the goal) global longueur # specifying the global variables (width of the map) global largeur # specifying the global variables (height of the map) longueur = self.width # width of the map (horizontal edge) largeur = self.height # height of the map (vertical edge) print(self.width, self.height) if first_update: # trick to initialize the map only once init() xx = goal_x - self.car.x # difference of x-coordinates between the goal and the car yy = goal_y - self.car.y # difference of y-coordinates between the goal and the car orientation = Vector(*self.car.velocity).angle( (xx, yy) ) / 180. # direction of the car with respect to the goal (if the car is heading perfectly towards the goal, then orientation = 0) last_signal = [ self.car.signal1, self.car.signal2, self.car.signal3, orientation, -orientation ] # our input state vector, composed of the three signals received by the three sensors, plus the orientation and -orientation action = brain.update( last_reward, last_signal ) # playing the action from our ai (the object brain of the dqn class) scores.append( brain.score() ) # appending the score (mean of the last 100 rewards to the reward window) rotation = action2rotation[ action] # converting the action played (0, 1 or 2) into the rotation angle (0°, 20° or -20°) self.car.move( rotation) # moving the car according to this last rotation angle distance = np.sqrt( (self.car.x - goal_x)**2 + (self.car.y - goal_y)**2 ) # getting the new distance between the car and the goal right after the car moved self.ball1.pos = self.car.sensor1 # updating the position of the first sensor (ball1) right after the car moved self.ball2.pos = self.car.sensor2 # updating the position of the second sensor (ball2) right after the car moved self.ball3.pos = self.car.sensor3 # updating the position of the third sensor (ball3) right after the car moved if sand[int(self.car.x), int(self.car.y)] > 0: # if the car is on the sand self.car.velocity = Vector(1, 0).rotate( self.car.angle) # it is slowed down (speed = 1) last_reward = -1 # and reward = -1 else: # otherwise self.car.velocity = Vector(6, 0).rotate( self.car.angle) # it goes to a normal speed (speed = 6) last_reward = -0.2 # and it gets bad reward (-0.2) if distance < last_distance: # however if it getting close to the goal last_reward = 0.1 # it still gets slightly positive reward 0.1 if self.car.x < 10: # if the car is in the left edge of the frame self.car.x = 10 # it is not slowed down last_reward = -1 # but it gets bad reward -1 if self.car.x > self.width - 10: # if the car is in the right edge of the frame self.car.x = self.width - 10 # it is not slowed down last_reward = -1 # but it gets bad reward -1 if self.car.y < 10: # if the car is in the bottom edge of the frame self.car.y = 10 # it is not slowed down last_reward = -1 # but it gets bad reward -1 if self.car.y > self.height - 10: # if the car is in the upper edge of the frame self.car.y = self.height - 10 # it is not slowed down last_reward = -1 # but it gets bad reward -1 if distance < 100: # when the car reaches its goal goal_x = self.width - goal_x # the goal becomes the bottom right corner of the map (the downtown), and vice versa (updating of the x-coordinate of the goal) goal_y = self.height - goal_y # the goal becomes the bottom right corner of the map (the downtown), and vice versa (updating of the y-coordinate of the goal) # Updating the last distance from the car to the goal last_distance = distance
def move(self): self.pos = Vector(*self.velocity) + self.pos
def collide_point(self, x, y): return Vector(x, y).distance(self.center) <= self.width / 2
def serve_ball(self): self.ball.v = Vector(4,0).rotate(randint(0,360))
def update(self, bt ): global brain global last_reward global scores global last_distance global lastCar_x global lastCar_y global longueur global largeur global TIMES global lines global ITE global SCO global ax global last_action longueur = self.width largeur = self.height if first_update: init() #xx = goal_x - self.car.x #yy = goal_y - self.car.y #orientation = Vector(*self.car.velocity).angle((xx, yy)) / 180. # 小车速度 乘上小车向目标方向的修正等一现在小车朝着目标方向开 last_signal = [self.car.signal1, self.car.signal2, self.car.signal3, last_distance, last_action] # 将小车的正负运动方向和小车的传感器的回馈作为输入 action = brain.update(last_reward, last_signal) # 通过学习小车状态和环境回报选择方向 file = open('data_record.txt', 'a') file.write('%.2f'%self.car.signal1 + '\t' + '%.2f'%self.car.signal2 + '\t' + '%.2f'%self.car.signal3 \ + '\t' + '%.2f'%self.car.x + '\t\t' + '%.2f'%self.car.y + '\t\t' + str(action.numpy())+ '\n') file.close() last_action = action scores.append(brain.score()) # 记录每一次学习后的得分 rotation = action2rotation[action] # 通过网络跑出的动作转换成小车的运动方向,左转还是右转还是前进 self.car.move(rotation) if self.car.x >= self.width/2 : dis_x = self.width - self.car.x else: dis_x = self.car.x if self.car.y >= self.height/2 : dis_y = self.height - self.car.y else: dis_y = self.car.y #distance = np.array([dis_x, dis_y]).min() distance = Vector(dis_x, dis_y).rotate(self.car.angle).length() #distance = np.array([np.sqrt((self.car.x - self.width) ** 2), np.sqrt((self.car.y - self.height) ** 2)]).min() #distance = np.sqrt((self.car.x - goal_x)**2 + (self.car.y - goal_y)**2) self.ball1.pos = self.car.sensor1 self.ball2.pos = self.car.sensor2 self.ball3.pos = self.car.sensor3 self.car.velocity = Vector(6, 0).rotate(self.car.angle) if sand[int(self.car.x), int(self.car.y)] > 0: last_reward = -1 else: last_reward = 0.5 if abs(lastCar_x - self.car.x) < 10: last_reward -= 0.5 if abs(lastCar_y - self.car.y) < 10: last_reward -= 0.5 lastCar_x = self.car.x lastCar_y = self.car.y if self.car.x < 10: self.car.x = 10 last_reward -= 1 if self.car.x > self.width - 10: self.car.x = self.width - 10 last_reward -= 1 if self.car.y < 10: self.car.y = 10 last_reward -= 1 if self.car.y > self.height - 10: self.car.y = self.height - 10 last_reward -= 1 if distance - last_distance < 0: last_reward += 0.1 else: last_reward -= 0.5 if distance <= 50: # 小车会向右下角开当与右下角的距离《100时向左上角开周而复始,遇到障碍躲避 last_reward -= 0.5 if distance >100 and distance < 150: last_reward += 1 #goal_x = self.width - goal_x #goal_y = self.height - goal_y last_distance = distance TIMES += 1 if TIMES % 1000 == 0 : ITE.append(TIMES) SCO.append(scores[len(scores) - 1]) Time = time.strftime('%H:%M:%S', time.localtime(time.time())) print("%s saving brain..." % Time) brain.save() lines = ax.plot(ITE, SCO, 'r-', lw=3) plt.pause(0.05) plt.savefig('VirtualScores.png')
def eject_blob(self, x, y): if self.mass >= 50: self.mass -= 10 blob_pos = Vector(x, y) - self.offset self.parent.add_widget(Blob(self.parent, pos=blob_pos))
def _get_rotation(self): v1 = Vector(0, 10) tp = self.to_parent v2 = Vector(*tp(*self.pos)) - tp(self.x, self.y + 10) return -1.0 * (v1.angle(v2) + 180) % 360
def distance(self, other_touch): '''Return the distance between the current touch and another touch. ''' return Vector(self.pos).distance(other_touch.pos)
def _set_center(self, center): if center == self.center: return False t = Vector(*center) - self.center trans = Matrix().translate(t.x, t.y, 0) self.apply_transform(trans)
def turn_left2(obj): game.snake2.velocity = Vector(*game.snake2.velocity).rotate(90)
def test_mul_twovectors(self): finalVector = Vector(2, 2) * Vector(3, 3) self.assertEqual(finalVector.x, 6) self.assertEqual(finalVector.y, 6)