def random_orientation(self, t): while True: stop = False c = np.matrix([random.normalvariate(t.item(0), 10), random.normalvariate(t.item(1), 10), t.item(2)]) d = np.linalg.norm(t - c) if d > 10 and d < 12 and c.item(2) < -2: #o = get_o_from_pts(t, c) dx = c.item(0) - t.item(0) dy = c.item(1) - t.item(1) yaw = math.degrees(math.asin(-dy/d)) yaw2 = math.degrees(math.acos(-dx/d)) if yaw != yaw2: stop = True o = np.matrix([0.0,0.0,yaw]) (x, y), target_in_front = projection(t, c, o, w=float(self.width), h=float(self.height)) if x <= float(self.width) * 0.95 and x >= float(self.width) * 0.05 and y <= float( self.height) * 0.95 and y >= float(self.height) * 0.05 and target_in_front\ and not stop: while True: newC = np.matrix([random.normalvariate(c.item(0), d/2), c.item(1), random.normalvariate(c.item(2), d/2)]) (x, y), target_in_front = projection(t, newC, o, w=float(self.width), h=float(self.height)) if x <= float(self.width) * 0.95 and x >= float(self.width) * 0.05 and \ y <= float(self.height) * 0.95 and y >= float(self.height) * 0.05 and \ newC.item(2) < -2 and target_in_front and d > 10 and d < 15 and c.item(2) < -2: self.c = newC self.o = o (x, y), target_in_front = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height)) #print((x, y)) return (self.c, self.o)
def _reset(self): self.iteration = 0 self.t = np.matrix([-10.0, 10.0, -10.0]) self.o = np.matrix([0.0, 0.0, 0.0]) self.c = np.matrix([-20.0, 10.0, -10.0]) self.v = 0.0 self.r = np.matrix([0.0, 0.0, 0.0]) self.last_d = 1000 self.nb_correct = 0 #self.client.simSetPose(Vector3r(self.c.item(0), self.c.item(1), self.c.item(2)), # self.client.toQuaternion(math.radians(self.o.item(1)), math.radians(self.o.item(0)), # math.radians(self.o.item(2)))) self.image = None self.fw = None # response = self.client.simGetImages([ImageRequest(0, AirSimImageType.Scene)])[0] # self.image = self.get_rbg(response) self.c, self.o = self.random_orientation(t) self._render() #self.client.simSetPose(Vector3r(self.c.item(0), self.c.item(1), self.c.item(2)), # self.client.toQuaternion(math.radians(self.o.item(1)), math.radians(self.o.item(0)), # math.radians(self.o.item(2)))) (x, y), _ = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height)) x = 3 * x / float(self.width) y = 3 * y / float(self.height) self.observation = self.get_obs() return self.observation
def _reset(self): self.iteration = 0 self.t = np.matrix([-10.0, 10.0, -10.0]) self.o = np.matrix([0.0, 0.0, 0.0]) self.c = np.matrix([-20.0, 10.0, -10.0]) self.v = np.matrix([0.0, 0.0, 0.0]) self.r = np.matrix([0.0, 0.0, 0.0]) self.fps = 30.0 self.client.simSetPose( Vector3r(self.c.item(0), self.c.item(1), self.c.item(2)), self.client.toQuaternion(math.radians(self.o.item(1)), math.radians(self.o.item(0)), math.radians(self.o.item(2)))) self.image = None self.fw = None #response = self.client.simGetImages([ImageRequest(0, AirSimImageType.Scene)])[0] #self.image = self.get_rbg(response) self._render() self.observation = self.get_obs(np.matrix([0.5, 0.5])) (x, y) = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height)) x = x / float(self.width) y = y / float(self.height) self.last_loc = np.matrix([x, y]) return self.observation
def _reset(self): #time.sleep(1) self.iteration = 0 self.t = np.matrix([-20.0, -5.0, -10.0]) self.o = np.matrix([0.0, 0.0, 0.0]) self.c = np.matrix([-30.0, -5.0, -10.0]) self.vC = np.matrix([0.0, 0.0, 0.0]) self.vT = np.matrix([0.0, 0.0, 0.0]) self.aC = np.matrix([0.0, 0.0, 0.0]) self.aT = np.matrix([0.0, 0.0, 0.0]) self.r = np.matrix([0.0, 0.0, 0.0]) self.fps = 60.0 self.nb_correct = 0 self.done = False self.reward = 0.0 #self.hunter.simSetPose(Vector3r(self.c.item(0), self.c.item(1), self.c.item(2)), # self.hunter.toQuaternion(math.radians(self.o.item(1)), math.radians(self.o.item(0)), # math.radians(self.o.item(2)))) self.image = None self.fw = None # response = self.hunter.simGetImages([ImageRequest(0, AirSimImageType.Scene)])[0] # self.image = self.get_rbg(response) t = np.matrix([random.normalvariate(self.t.item(0), 5), random.normalvariate(self.t.item(1), 5), random.normalvariate(self.t.item(0), 5)]) while t.item(2) > -5: t = np.matrix([random.normalvariate(self.t.item(0), 5), random.normalvariate(self.t.item(1), 5), random.normalvariate(self.t.item(0), 5)]) self.t = t self.c, self.o = self.random_orientation(self.t) self.hunter.simSetPose(Vector3r(self.c.item(0), self.c.item(1), self.c.item(2)), self.hunter.toQuaternion(math.radians(self.o.item(1)), math.radians(self.o.item(0)), math.radians(self.o.item(2)))) self.hunter.moveByVelocity(0, 0, 1, 0.1) self.target.simSetPose(Vector3r(self.t.item(0), self.t.item(1), self.t.item(2)), self.target.toQuaternion(0,0,0)) self.target.moveByVelocity(0, 0, 1, 0.1) self._render() #self.hunter.simSetPose(Vector3r(self.c.item(0), self.c.item(1), self.c.item(2)), # self.hunter.toQuaternion(math.radians(self.o.item(1)), math.radians(self.o.item(0)), # math.radians(self.o.item(2)))) (x, y), _ = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height)) #print((x, y, _)) x = 3 * x / float(self.width) y = 3 * y / float(self.height) self.observation = self.get_obs() return self.observation
def _get_obs(self): if self.image is None: return None #self.current_state = np.concatenate(list(self.image_queue)) #self.current_state = (self.current_state.flatten()) (x, y), target_in_front = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height)) pix = np.array((x/255.0,y/143.0)).flatten() self.current_state = np.concatenate([np.array(self.v).flatten()/(10.0/self.fps), np.array(self.o).flatten()/360.0, np.array(self.r).flatten()/360.0, np.array(self.t).flatten()/30.0, np.array(self.c).flatten()/30.0, np.array(pix), self.current_state], 0) return self.current_state
def _random_orientation(self, t): i = 51 while i > 50: while True: c = np.matrix([random.normalvariate(t.item(0), 10), random.normalvariate(t.item(1), 10), random.normalvariate(t.item(2), 5)]) d = np.linalg.norm(t - c) if d > 10 and d < 15 and c.item(2) < -5: # break # while True: # o = np.matrix([random.uniform(-180,180), # random.uniform(-180,180), # random.uniform(-180,180)]) o = get_o_from_pts(t, c) (x, y), target_in_front = projection(t, c, o, w=float(self.width), h=float(self.height)) if x == self.width / 2 and y == self.height / 2 and target_in_front: break r = np.matrix([0.0, 0.0, 0.0]) v = np.matrix([0.0, 0.0, 0.0]) #print((x, y)) for i in range(50): j = 0 while True: rot_inc = 5.0 + float(j) / 10.0 vel_inc = 10.0 + float(j) / 10.0 if j > 50: break # print(rot_inc) dC = np.matrix([random.normalvariate(v.item(0), vel_inc / self.fps), random.normalvariate(v.item(1), vel_inc / self.fps), random.normalvariate(v.item(2), vel_inc / self.fps)] ) dO = np.matrix([random.normalvariate(r.item(0), vel_inc / self.fps), random.normalvariate(r.item(1), rot_inc / self.fps), random.normalvariate(r.item(2), rot_inc / self.fps)] ) newC = np.add(c, dC) newO = np.add(o, dO) d = np.linalg.norm(self.t - newC) (x, y), target_in_front = projection(self.t, newC, newO, w=float(self.width), h=float(self.height)) total_v = np.linalg.norm(dC) if x <= float(self.width) * 0.95 and x >= float(self.width) * 0.05 and y <= float( self.height) * 0.95 and y >= float(self.height) * 0.05 \ and d > 10 and d < 15 and newC.item(2) < -5 \ and total_v * self.fps <= 30 \ and target_in_front: break j += 1 c = newC v = dC o = newO r = dO if x <= float(self.width) * 0.95 and x >= float(self.width) * 0.05 and y <= float( self.height) * 0.95 and y >= float(self.height) * 0.05 \ and d > 10 and d < 15 and c.item(2) < -5 \ and target_in_front: break self.last_d = np.linalg.norm(self.t - c) (x, y), target_in_front = projection(self.t, c, o, w=float(self.width), h=float(self.height)) #print((x, y)) return (c, o)
def step(self, raw_action): # action = np.matrix([raw_action.item(0)*float(self.width),raw_action.item(1)*float(self.height)]) # x = self.c.item(0)/self.width # y = self.c.item(1)/self.height # self.reward = 1-((np.linalg.norm(action-self.last_loc))/self.rt2) if self.discrete: # action = [int(raw_action % 3), int(raw_action / 3)] action = raw_action yaw = 1 - action % 3 action = int(action / 3) pitch = 1 - action % 3 action = int(action / 3) roll = 1 - action % 3 action = int(action / 3) acc = 1 - action % 3 else: action = raw_action roll = action.item(0) pitch = action.item(1) yaw = action.item(2) acc = action.item(3) # print(self.iteration) if self.episodes % 100 == 0: if self.fw is None: self.fw = open('./images/episode_' + str(self.episodes) + '/actions.txt', 'w') self.fw.write('(' + str(action) + ')\n') # An action of 0 is the NOOP j = 0 max_v = 10.0 max_r = 360.0 dR = np.matrix([roll, pitch, yaw]) self.v = self.v + acc / self.fps if self.v*self.fps > max_v: self.v = max_v/self.fps self.r = self.r + dR / self.fps if self.r.item(0) > max_r: self.r = np.matrix([self.r.item(0)-max_r, self.r.item(1), self.r.item(2)]) if self.r.item(1) > max_r: self.r = np.matrix([self.r.item(0), self.r.item(1)-max_r, self.r.item(2)]) if self.r.item(2) > max_r: self.r = np.matrix([self.r.item(0), self.r.item(1), self.r.item(2)-max_r]) direction = np.dot(rot_mat(self.r.item(0), self.r.item(1), self.r.item(2)), np.transpose(self.c)) direction = np.transpose(direction) / np.linalg.norm(direction) self.o = self.o + self.r if self.o.item(0) > max_r: self.o = np.matrix([self.o.item(0)-max_r, self.o.item(1), self.o.item(2)]) if self.o.item(1) > max_r: self.o = np.matrix([self.o.item(0), self.o.item(1)-max_r, self.o.item(2)]) if self.o.item(2) > max_r: self.o = np.matrix([self.o.item(0), self.o.item(1), self.o.item(2)-max_r]) self.c = self.c + direction * self.v if self.c.item(2) > -5: self.c = np.matrix([self.c.item(0), self.c.item(1), -5]) self.state = self._render() self.previous_state = self.current_state self.current_state = self._get_obs() (x, y), target_in_front = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height)) pix_d = ((x - self.width / 2) ** 2 + (y - self.height / 2) ** 2) / ( (self.width / 2) ** 2 + (self.height / 2) ** 2) d = np.linalg.norm(self.t - self.c) d_max = 0 # d_norm = d/d_max center_reward = 1 - pix_d dist_reward = 1 / d # self.reward = dist_reward # if d < self.last_d: # self.reward = 1 # elif d > self.last_d: # self.reward = -1 # else: # self.reward = 0 #self.reward = 0 #self.reward = (30.0/d - 30.0/self.last_d) #print(self.reward) self.reward = (self.last_d - d) self.last_d = d #self.last_d = min(d, self.last_d) self.done = (self.iteration > self.max_iter) if x > self.width or x < 0 or y > self.height or y < 0 or d > 30: print((x,y)) self.done = True self.reward = 0 if d < 1: self.done = True self.reward = 0 # self.reward -= 0.1 self.total_reward += self.reward self.iteration += 1 if self.done: # print(self.c) if self.episodes % 100 == 0: self.fw.close() self.fw = None self.episodes += 1 print(str(self.episodes) + ': ' + str(self.total_reward)+ ' Ended At: ' + str(self.reward) + ' @ ' + str(float(self.iteration)) + ' with d: '+str(d) ) self.log_file.write(str(self.episodes) + ': ' + str(self.total_reward)+ ' Ended At: ' + str(self.reward) + ' @ ' + str(float(self.iteration)) + ' with d: '+str(d) + '\n') self.total_reward = 0 return self.current_state, self.reward, self.done, {}
def _step(self, raw_action): # action = np.matrix([raw_action.item(0)*float(self.width),raw_action.item(1)*float(self.height)]) # x = self.c.item(0)/self.width # y = self.c.item(1)/self.height # self.reward = 1-((np.linalg.norm(action-self.last_loc))/self.rt2) #action = [int(raw_action % 3), int(raw_action / 3)] #self.reward = 1 - ((action[0] - self.last_loc[0]) ** 2 + (action[1] - self.last_loc[1]) ** 2) action = raw_action yaw = 1 - action % 3 action /= 3 pitch = 1 - action % 3 action /= 3 roll = 1 - action % 3 action /= 3 acc = 1 - action % 3 # print(self.iteration) if self.episodes % 500 == 0: if self.fw is None: self.fw = open( './images/episode_' + str(self.episodes) + '/actions.txt', 'w') self.fw.write('(' + str(raw_action) + ')\n') # An action of 0 is the NOOP j = 0 max_v = 10.0 max_r = 360.0 dR = np.matrix([roll, pitch, yaw]) self.v = self.v + acc / self.fps if self.v > max_v: v = max_v self.r = self.r + dR / self.fps if self.r.item(0) > max_r: self.r = np.matrix([max_r, self.r.item(1), self.r.item(2)]) if self.r.item(1) > max_r: self.r = np.matrix([self.r.item(0), max_r, self.r.item(2)]) if self.r.item(2) > max_r: self.r = np.matrix([self.r.item(0), self.r.item(1), max_r]) direction = np.dot( rot_mat(self.r.item(0), self.r.item(1), self.r.item(2)), np.transpose(self.c)) direction = np.transpose(direction) / np.linalg.norm(direction) self.o = self.o + self.r self.c = self.c + direction * self.v if self.c.item(2) > -5: self.c = np.matrix([self.c.item(0), self.c.item(1), -5]) self.state = self._render() self.observation = self.get_obs() (x, y), target_in_front = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height)) pix_d = ((x - self.width / 2)**2 + (y - self.height / 2)**2) / ((self.width / 2)**2 + (self.height / 2)**2) d = np.linalg.norm(self.t - self.c) d_max = 0 d_norm = d / d_max center_reward = 1 - pix_d dist_reward = 1 / d #self.reward = dist_reward if d < self.last_d: self.reward = 1 elif d > self.last_d: self.reward = -1 else: self.reward = 0 self.done = (self.iteration > self.max_iter) if x > self.width or x < 0 or y > self.height or y < 0 or d > 30: self.done = True self.reward = -1 if d < 1: self.done = True #self.reward = self.max_iter self.cumulative += self.reward self.iteration += 1 if self.done: #print(self.c) if self.episodes % 500 == 0: self.fw.close() self.fw = None self.episodes += 1 acc = float(self.nb_correct) / float(self.iteration) print( str(self.episodes) + ': ' + str(self.cumulative) + ' Ended At: ' + str(self.reward) + ' @ ' + str(float(self.iteration)) + ' with d: ' + str(d)) self.log_file.write( str(self.episodes) + ': ' + str(self.cumulative) + ' Ended At: ' + str(self.reward) + ' @ ' + str(float(self.iteration)) + ' with d: ' + str(d) + '\n') #self.log_file.write(str(self.episodes) + ': ' + str(self.cumulative) + ' @ ' + str(float(self.iteration)) + '\n') #print('Accuracy: ' + str(acc)) self.acc_file.write(str(acc) + '\n') self.nb_correct = 0 self.cumulative = 0 return self.observation, self.reward, self.done, {}
def _step(self, raw_action): #action = np.matrix([raw_action.item(0)*float(self.width),raw_action.item(1)*float(self.height)]) action = np.matrix( [raw_action.item(0) + 0.5, raw_action.item(1) + 0.5]) #x = self.c.item(0)/self.width #y = self.c.item(1)/self.height self.reward = 1 - ((np.linalg.norm(action - self.last_loc)) / self.rt2) self.cumulative += self.reward self.iteration += 1 #print(self.iteration) if self.episodes % 500 == 0: if self.fw is None: self.fw = open( './images/episode_' + str(self.episodes) + '/actions.txt', 'w') self.fw.write('(' + str(action.item(0)) + ',' + str(action.item(1)) + ')\n') # An action of 0 is the NOOP j = 0 while True: if j > 50: self.done = True if self.episodes % 500 == 0: self.fw.close() self.fw = None self.episodes += 1 print( str(self.episodes) + ': ' + str(self.cumulative / float(self.iteration)) + ' *' + str(self.iteration)) self.log_file.write( str(self.episodes) + ': ' + str(self.cumulative / float(self.iteration)) + ' *' + str(self.iteration) + '\n') self.cumulative = 0 return self.observation, self.reward, self.done, self.info rot_inc = 5.0 + float(j) / 10.0 vel_inc = 1.0 + float(j) / 10.0 #print(rot_inc) dC = np.matrix([ random.normalvariate(self.v.item(0), vel_inc / self.fps), random.normalvariate(self.v.item(1), vel_inc / self.fps), random.normalvariate(self.v.item(2), vel_inc / self.fps) ]) dO = np.matrix([ random.normalvariate(self.r.item(0), vel_inc / self.fps), random.normalvariate(self.r.item(1), rot_inc / self.fps), random.normalvariate(self.r.item(2), rot_inc / self.fps) ]) newC = np.add(self.c, dC) newO = np.add(self.o, dO) d = np.linalg.norm(self.t - newC) (x, y) = projection(self.t, newC, newO, w=float(self.width), h=float(self.height)) total_v = np.linalg.norm(dC) if x <= float(self.width)*0.95 and x >= float(self.width)*0.05 and y <= float(self.height)*0.95 and y >= float(self.height)*0.05 \ and d > 3 and d < 30 and newC.item(2) < -2 \ and total_v*self.fps <= 30: break j += 1 self.c = newC self.v = dC self.o = newO self.r = dO x = x / float(self.width) y = y / float(self.height) self.last_loc = np.matrix([x, y]) self.state = self._render() self.observation = self.get_obs(self.last_loc) self.done = (self.iteration > 100) info = (self.c, self.v, self.o, self.r) self.info = {} #print(action) #print(np.matrix([x,y])) #print(self.reward) if self.done: if self.episodes % 500 == 0: self.fw.close() self.fw = None self.episodes += 1 print( str(self.episodes) + ': ' + str(self.cumulative / float(self.iteration))) self.log_file.write( str(self.episodes) + ': ' + str(self.cumulative / float(self.iteration)) + '\n') self.cumulative = 0 return self.observation, self.reward, self.done, self.info
def _step(self, raw_action): # action = np.matrix([raw_action.item(0)*float(self.width),raw_action.item(1)*float(self.height)]) # x = self.c.item(0)/self.width # y = self.c.item(1)/self.height # self.reward = 1-((np.linalg.norm(action-self.last_loc))/self.rt2) raw_action = raw_action - 1 action = [int(raw_action % 3), int(raw_action / 3)] (x0, y0), target_in_front = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height)) self.c = self.hunter.getPosition() self.c = np.matrix([self.c.x_val, self.c.y_val, self.c.z_val]) #self.vC = self.hunter.getVelocity() #self.vC = np.matrix([self.vC.x_val, self.vC.y_val, self.vC.z_val]) self.o = self.hunter.getOrientation() self.o = np.matrix([math.degrees(self.o.x_val), math.degrees(self.o.y_val), 2*math.degrees(self.o.z_val)]) self.t = self.target.getPosition() self.t = np.matrix([self.t.x_val, self.t.y_val, self.t.z_val]) #self.vT = self.target.getVelocity() #self.vT = np.matrix([self.vT.x_val, self.vT.y_val, self.vT.z_val]) (x, y), target_in_front = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height)) d = np.linalg.norm(self.t - self.c) #print((x,y)) #print((x0,y0)) #print() self.iteration += 1 if x < 0 or y < 0 or x > self.width or y > self.height or \ self.t.item(2) > -2 or self.c.item(2) > -2 or \ d < 5 or d > 25 or not target_in_front: self.done = True if not self.done: x = 3 * x / float(self.width) y = 3 * y / float(self.height) loc = [int(x),int(y)] #print(action) #print(loc) self.reward = 1 - ((action[0] - loc[0]) ** 2 + (action[1] - loc[1]) ** 2) if self.reward == 1: self.nb_correct += 1 self.cumulative += self.reward # print(self.iteration) if self.episodes % self.log_int == 0: if self.fw is None: self.fw = open('./images/episode_' + str(self.episodes) + '/actions.txt', 'w') self.fw.write('(' + str(raw_action) + ')\n') # An action of 0 is the NOOP self.aC = np.matrix([random.normalvariate(mu=self.aC.item(0), sigma=2/self.fps), random.normalvariate(mu=self.aC.item(0), sigma=2/self.fps), random.normalvariate(mu=self.aC.item(0), sigma=2/self.fps)] ) self.aT= np.matrix([random.normalvariate(mu=self.aT.item(0), sigma=2/self.fps), random.normalvariate(mu=self.aT.item(0), sigma=2/self.fps), random.normalvariate(mu=self.aT.item(0), sigma=2/self.fps)] ) self.vC = self.vC+self.aC self.vT = self.vT+self.aT newC = self.c + self.vC newT = self.t + self.vT #self.hunter.moveToPosition(newC.item(0), newC.item(1), newC.item(2), 1) #self.target.moveToPosition(newT.item(0), newT.item(1), newT.item(2), np.linalg.norm(self.vT)) #self.hunter.moveByVelocity(self.vC.item(0), # self.vC.item(1), # self.vC.item(2), # 10) #self.hunter.simSetPose(newC, self.hunter.getOrientation()) #v_temp = self.hunter.getVelocity() #v_temp = np.matrix([v_temp.x_val, v_temp.y_val, v_temp.z_val]) #v_magnitude = np.linalg.norm(v_temp) #if v_magnitude == 0: # print(self.iteration) #self.target.moveByVelocity(self.vT.item(0), # self.vT.item(1), # self.vT.item(2), # 10) #print(self.vT) self.t = newT self.target.simSetPose(Vector3r(newT.item(0), newT.item(1), newT.item(2)), self.target.toQuaternion(0,0,0)) #time.sleep(1) self.state = self._render() self.observation = self.get_obs() self.done = (self.iteration > self.max_iter) # Proper Termination if self.done: if self.episodes % self.log_int == 0 and self.fw is not None: self.fw.close() self.fw = None self.episodes += 1 acc = float(self.nb_correct) / float(self.iteration) print(str(self.episodes) + ': ' + str(self.cumulative / float(self.iteration)) + ' len: '+str(self.iteration)) self.log_file.write(str(self.episodes) + ': ' + str(self.cumulative / float(self.iteration)) + ' len: '+str(self.iteration) + '\n') print('Accuracy: ' + str(acc)) self.acc_file.write(str(acc) + '\n') self.nb_correct = 0 self.cumulative = 0 return self.observation, self.reward, self.done, {}
def random_orientation(self, t): while True: c = np.matrix([ random.normalvariate(t.item(0), 10), random.normalvariate(t.item(1), 10), random.normalvariate(t.item(2), 5) ]) d = np.linalg.norm(t - c) if d > 5 and d < 15 and c.item(2) < -2: # break # while True: # o = np.matrix([random.uniform(-180,180), # random.uniform(-180,180), # random.uniform(-180,180)]) o = get_o_from_pts(t, c) (x, y), target_in_front = projection(t, c, o, w=float(self.width), h=float(self.height)) if x == self.width / 2 and y == self.height / 2: break #print((x, y)) for i in range(50): j = 0 while True: rot_inc = 5.0 + float(j) / 10.0 vel_inc = 10.0 + float(j) / 10.0 if j > 50: break # print(rot_inc) dC = np.matrix([ random.normalvariate(self.v.item(0), vel_inc / self.fps), random.normalvariate(self.v.item(1), vel_inc / self.fps), random.normalvariate(self.v.item(2), vel_inc / self.fps) ]) dO = np.matrix([ 0, 0, random.normalvariate(self.r.item(2), rot_inc / self.fps) ]) newC = np.add(self.c, dC) newO = np.add(self.o, dO) d = np.linalg.norm(self.t - newC) (x, y), target_in_front = projection(self.t, newC, newO, w=float(self.width), h=float(self.height)) total_v = np.linalg.norm(dC) if x <= float(self.width) * 0.9 and x >= float(self.width) * 0.1 and y <= float( self.height) * 0.9 and y >= float(self.height) * 0.1 \ and d > 3 and d < 15 and newC.item(2) < -2 \ and total_v * self.fps <= 10 \ and target_in_front: break j += 1 self.c = newC self.v = dC self.o = newO self.r = dO (x, y), target_in_front = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height)) print((x, y)) self.v = np.matrix([0.0, 0.0, 0.0]) self.r = np.matrix([0.0, 0.0, 0.0]) return (self.c, self.o)
def _step(self, raw_action): # action = np.matrix([raw_action.item(0)*float(self.width),raw_action.item(1)*float(self.height)]) # x = self.c.item(0)/self.width # y = self.c.item(1)/self.height # self.reward = 1-((np.linalg.norm(action-self.last_loc))/self.rt2) action = [int(raw_action % 3), int(raw_action / 3)] #if self.last_loc[0] > 2 or self.last_loc[0] < 0 or self.last_loc[1] < 0 or self.last_loc[1] > 2: # if raw_action == 9: # self.reward = 1 # else: # self.reward = 0 #else: self.reward = 1 - ((action[0] - self.last_loc[0])**2 + (action[1] - self.last_loc[1])**2) if self.reward == 1: self.nb_correct += 1 self.cumulative += self.reward self.iteration += 1 # print(self.iteration) if self.episodes % self.log_freq == 0: if self.act_log is None: self.act_log = open( './images/episode_' + str(self.episodes) + '/actions.txt', 'w') self.act_log.write('(' + str(raw_action) + ')\n') if self.obs_log is None: self.obs_log = open( './images/episode_' + str(self.episodes) + '/actions.txt', 'w') self.obs_log.write('(' + str(raw_action) + ')\n') self.aT = np.matrix([ random.normalvariate(mu=self.aT.item(0), sigma=2 / self.fps), random.normalvariate(mu=self.aT.item(0), sigma=2 / self.fps), random.normalvariate(mu=self.aT.item(0), sigma=2 / self.fps) ]) self.vT = self.vT + self.aT norm_vT = np.linalg.norm(self.vT) if norm_vT * self.fps > 9: self.vT = 9.0 * self.vT / (norm_vT * self.fps) self.t = self.t + self.vT # An action of 0 is the NOOP j = 0 while True: if j > 100: self.done = True if self.episodes % self.log_freq == 0: self.act_log.close() self.act_log = None self.obs_log.close() self.obs_log = None self.episodes += 1 acc = float(self.nb_correct) / float(self.iteration) print( str(self.episodes) + ': ' + str(self.cumulative / float(self.iteration)) + ' *' + str(self.iteration)) self.log_file.write( str(self.episodes) + ': ' + str(self.cumulative / float(self.iteration)) + ' *' + str(self.iteration) + '\n') print('Accuracy: ' + str(acc)) self.acc_file.write(str(acc) + '\n') self.nb_correct = 0 self.cumulative = 0 return self.observation, self.reward, self.done, None rot_inc = 5.0 + float(j) / 10.0 vel_inc = 1.0 + float(j) / 10.0 # print(rot_inc) dC = np.matrix([ random.normalvariate(self.v.item(0), vel_inc / self.fps), random.normalvariate(self.v.item(1), vel_inc / self.fps), random.normalvariate(self.v.item(2), vel_inc / self.fps) ]) dO = np.matrix([ random.normalvariate(self.r.item(0), vel_inc / self.fps), random.normalvariate(self.r.item(1), rot_inc / self.fps), random.normalvariate(self.r.item(2), rot_inc / self.fps) ]) newC = np.add(self.c, dC) newO = np.add(self.o, dO) d = np.linalg.norm(self.t - newC) (x, y), target_in_front = projection(self.t, newC, newO, w=float(self.width), h=float(self.height)) total_v = np.linalg.norm(dC) if x <= float(self.width) * 0.95 and x >= float(self.width) * 0.05 and y <= float( self.height) * 0.95 and y >= float(self.height) * 0.05 \ and d > 3 and d < 15 and newC.item(2) < -2 \ and total_v * self.fps <= 30 and target_in_front: break j += 1 self.c = newC self.v = dC self.o = newO self.r = dO x = 3 * x / float(self.width) y = 3 * y / float(self.height) self.last_loc = [int(x), int(y)] self.state = self._render() self.observation = self.get_obs(self.last_loc) self.done = (self.iteration > self.max_iter) info = (self.c, self.v, self.o, self.r) self.info = {} # print(action) # print(np.matrix([x,y])) # print(self.reward) if self.done: if self.episodes % self.log_freq == 0: self.act_log.close() self.act_log = None self.obs_log.close() self.obs_log = None self.episodes += 1 acc = float(self.nb_correct) / float(self.iteration) print( str(self.episodes) + ': ' + str(self.cumulative / float(self.iteration))) self.log_file.write( str(self.episodes) + ': ' + str(self.cumulative / float(self.iteration)) + '\n') print('Accuracy: ' + str(acc)) self.acc_file.write(str(acc) + '\n') self.nb_correct = 0 self.cumulative = 0 return self.observation, self.reward, self.done, self.info