Ejemplo n.º 1
0
 def random_orientation(self, t):
     while True:
         stop = False
         c = np.matrix([random.normalvariate(t.item(0), 10),
                        random.normalvariate(t.item(1), 10),
                        t.item(2)])
         d = np.linalg.norm(t - c)
         if d > 10 and d < 12 and c.item(2) < -2:
             #o = get_o_from_pts(t, c)
             dx = c.item(0) - t.item(0)
             dy = c.item(1) - t.item(1)
             yaw = math.degrees(math.asin(-dy/d))
             yaw2 = math.degrees(math.acos(-dx/d))
             if yaw != yaw2: stop = True
             o = np.matrix([0.0,0.0,yaw])
             (x, y), target_in_front = projection(t, c, o, w=float(self.width), h=float(self.height))
             if x <= float(self.width) * 0.95 and x >= float(self.width) * 0.05 and y <= float(
                     self.height) * 0.95 and y >= float(self.height) * 0.05 and target_in_front\
                     and not stop:
                 while True:
                     newC = np.matrix([random.normalvariate(c.item(0), d/2),
                                       c.item(1),
                                       random.normalvariate(c.item(2), d/2)])
                     (x, y), target_in_front = projection(t, newC, o, w=float(self.width), h=float(self.height))
                     if x <= float(self.width) * 0.95 and x >= float(self.width) * 0.05 and \
                         y <= float(self.height) * 0.95 and y >= float(self.height) * 0.05 and \
                             newC.item(2) < -2 and target_in_front and d > 10 and d < 15 and c.item(2) < -2:
                         self.c = newC
                         self.o = o
                         (x, y), target_in_front = projection(self.t, self.c, self.o,
                                                              w=float(self.width),
                                                              h=float(self.height))
                         #print((x, y))
                         return (self.c, self.o)
Ejemplo n.º 2
0
    def _reset(self):
        self.iteration = 0
        self.t = np.matrix([-10.0, 10.0, -10.0])
        self.o = np.matrix([0.0, 0.0, 0.0])
        self.c = np.matrix([-20.0, 10.0, -10.0])
        self.v = 0.0
        self.r = np.matrix([0.0, 0.0, 0.0])
        self.last_d = 1000
        self.nb_correct = 0
        #self.client.simSetPose(Vector3r(self.c.item(0), self.c.item(1), self.c.item(2)),
        #                       self.client.toQuaternion(math.radians(self.o.item(1)), math.radians(self.o.item(0)),
        #                                                math.radians(self.o.item(2))))
        self.image = None
        self.fw = None
        # response = self.client.simGetImages([ImageRequest(0, AirSimImageType.Scene)])[0]
        # self.image = self.get_rbg(response)

        self.c, self.o = self.random_orientation(t)
        self._render()
        #self.client.simSetPose(Vector3r(self.c.item(0), self.c.item(1), self.c.item(2)),
        #                       self.client.toQuaternion(math.radians(self.o.item(1)), math.radians(self.o.item(0)),
        #                                                math.radians(self.o.item(2))))

        (x, y), _ = projection(self.t,
                               self.c,
                               self.o,
                               w=float(self.width),
                               h=float(self.height))
        x = 3 * x / float(self.width)
        y = 3 * y / float(self.height)
        self.observation = self.get_obs()
        return self.observation
    def _reset(self):
        self.iteration = 0
        self.t = np.matrix([-10.0, 10.0, -10.0])
        self.o = np.matrix([0.0, 0.0, 0.0])
        self.c = np.matrix([-20.0, 10.0, -10.0])
        self.v = np.matrix([0.0, 0.0, 0.0])
        self.r = np.matrix([0.0, 0.0, 0.0])
        self.fps = 30.0
        self.client.simSetPose(
            Vector3r(self.c.item(0), self.c.item(1), self.c.item(2)),
            self.client.toQuaternion(math.radians(self.o.item(1)),
                                     math.radians(self.o.item(0)),
                                     math.radians(self.o.item(2))))
        self.image = None
        self.fw = None
        #response = self.client.simGetImages([ImageRequest(0, AirSimImageType.Scene)])[0]
        #self.image = self.get_rbg(response)

        self._render()

        self.observation = self.get_obs(np.matrix([0.5, 0.5]))

        (x, y) = projection(self.t,
                            self.c,
                            self.o,
                            w=float(self.width),
                            h=float(self.height))
        x = x / float(self.width)
        y = y / float(self.height)
        self.last_loc = np.matrix([x, y])
        return self.observation
Ejemplo n.º 4
0
    def _reset(self):
        #time.sleep(1)
        self.iteration = 0
        self.t = np.matrix([-20.0, -5.0, -10.0])
        self.o = np.matrix([0.0, 0.0, 0.0])
        self.c = np.matrix([-30.0, -5.0, -10.0])
        self.vC = np.matrix([0.0, 0.0, 0.0])
        self.vT = np.matrix([0.0, 0.0, 0.0])
        self.aC = np.matrix([0.0, 0.0, 0.0])
        self.aT = np.matrix([0.0, 0.0, 0.0])
        self.r = np.matrix([0.0, 0.0, 0.0])
        self.fps = 60.0
        self.nb_correct = 0
        self.done = False
        self.reward = 0.0
        #self.hunter.simSetPose(Vector3r(self.c.item(0), self.c.item(1), self.c.item(2)),
        #                       self.hunter.toQuaternion(math.radians(self.o.item(1)), math.radians(self.o.item(0)),
        #                                                math.radians(self.o.item(2))))
        self.image = None
        self.fw = None
        # response = self.hunter.simGetImages([ImageRequest(0, AirSimImageType.Scene)])[0]
        # self.image = self.get_rbg(response)

        t = np.matrix([random.normalvariate(self.t.item(0), 5),
                            random.normalvariate(self.t.item(1), 5),
                            random.normalvariate(self.t.item(0), 5)])
        while t.item(2) > -5:
            t = np.matrix([random.normalvariate(self.t.item(0), 5),
                                random.normalvariate(self.t.item(1), 5),
                                random.normalvariate(self.t.item(0), 5)])
        self.t = t
        self.c, self.o = self.random_orientation(self.t)
        self.hunter.simSetPose(Vector3r(self.c.item(0), self.c.item(1), self.c.item(2)),
                               self.hunter.toQuaternion(math.radians(self.o.item(1)), math.radians(self.o.item(0)),
                                                        math.radians(self.o.item(2))))
        self.hunter.moveByVelocity(0, 0, 1, 0.1)
        self.target.simSetPose(Vector3r(self.t.item(0), self.t.item(1), self.t.item(2)),
                               self.target.toQuaternion(0,0,0))
        self.target.moveByVelocity(0, 0, 1, 0.1)
        self._render()
        #self.hunter.simSetPose(Vector3r(self.c.item(0), self.c.item(1), self.c.item(2)),
        #                       self.hunter.toQuaternion(math.radians(self.o.item(1)), math.radians(self.o.item(0)),
        #                                                math.radians(self.o.item(2))))

        (x, y), _ = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height))
        #print((x, y, _))
        x = 3 * x / float(self.width)
        y = 3 * y / float(self.height)
        self.observation = self.get_obs()
        return self.observation
Ejemplo n.º 5
0
    def _get_obs(self):
        if self.image is None:
            return None

        #self.current_state = np.concatenate(list(self.image_queue))
        #self.current_state = (self.current_state.flatten())
        (x, y), target_in_front = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height))
        pix = np.array((x/255.0,y/143.0)).flatten()
        self.current_state = np.concatenate([np.array(self.v).flatten()/(10.0/self.fps),
                                             np.array(self.o).flatten()/360.0,
                                             np.array(self.r).flatten()/360.0,
                                             np.array(self.t).flatten()/30.0,
                                             np.array(self.c).flatten()/30.0,
                                             np.array(pix),
                                             self.current_state], 0)

        return self.current_state
Ejemplo n.º 6
0
    def _random_orientation(self, t):
        i = 51
        while i > 50:
            while True:
                c = np.matrix([random.normalvariate(t.item(0), 10),
                               random.normalvariate(t.item(1), 10),
                               random.normalvariate(t.item(2), 5)])
                d = np.linalg.norm(t - c)
                if d > 10 and d < 15 and c.item(2) < -5:
                    #    break
                    # while True:
                    # o = np.matrix([random.uniform(-180,180),
                    #               random.uniform(-180,180),
                    #               random.uniform(-180,180)])
                    o = get_o_from_pts(t, c)
                    (x, y), target_in_front = projection(t, c, o, w=float(self.width), h=float(self.height))
                    if x == self.width / 2 and y == self.height / 2 and target_in_front:
                        break

            r = np.matrix([0.0, 0.0, 0.0])
            v = np.matrix([0.0, 0.0, 0.0])
            #print((x, y))

            for i in range(50):
                j = 0
                while True:
                    rot_inc = 5.0 + float(j) / 10.0
                    vel_inc = 10.0 + float(j) / 10.0
                    if j > 50:
                        break
                    # print(rot_inc)
                    dC = np.matrix([random.normalvariate(v.item(0), vel_inc / self.fps),
                                    random.normalvariate(v.item(1), vel_inc / self.fps),
                                    random.normalvariate(v.item(2), vel_inc / self.fps)]
                                   )
                    dO = np.matrix([random.normalvariate(r.item(0), vel_inc / self.fps),
                                    random.normalvariate(r.item(1), rot_inc / self.fps),
                                    random.normalvariate(r.item(2), rot_inc / self.fps)]
                                   )
                    newC = np.add(c, dC)
                    newO = np.add(o, dO)
                    d = np.linalg.norm(self.t - newC)
                    (x, y), target_in_front = projection(self.t, newC, newO, w=float(self.width),
                                                         h=float(self.height))
                    total_v = np.linalg.norm(dC)
                    if x <= float(self.width) * 0.95 and x >= float(self.width) * 0.05 and y <= float(
                            self.height) * 0.95 and y >= float(self.height) * 0.05 \
                            and d > 10 and d < 15 and newC.item(2) < -5 \
                            and total_v * self.fps <= 30 \
                            and target_in_front:
                        break
                    j += 1
                c = newC
                v = dC
                o = newO
                r = dO
            if x <= float(self.width) * 0.95 and x >= float(self.width) * 0.05 and y <= float(
                    self.height) * 0.95 and y >= float(self.height) * 0.05 \
                    and d > 10 and d < 15 and c.item(2) < -5 \
                    and target_in_front:
                    break
        self.last_d = np.linalg.norm(self.t - c)

        (x, y), target_in_front = projection(self.t, c, o, w=float(self.width), h=float(self.height))
        #print((x, y))
        return (c, o)
Ejemplo n.º 7
0
    def step(self, raw_action):
        # action = np.matrix([raw_action.item(0)*float(self.width),raw_action.item(1)*float(self.height)])
        # x = self.c.item(0)/self.width
        # y = self.c.item(1)/self.height
        # self.reward = 1-((np.linalg.norm(action-self.last_loc))/self.rt2)
        if self.discrete:
            # action = [int(raw_action % 3), int(raw_action / 3)]
            action = raw_action
            yaw = 1 - action % 3
            action = int(action / 3)
            pitch = 1 - action % 3
            action = int(action / 3)
            roll = 1 - action % 3
            action = int(action / 3)
            acc = 1 - action % 3
        else:
            action = raw_action
            roll = action.item(0)
            pitch = action.item(1)
            yaw = action.item(2)
            acc = action.item(3)

        # print(self.iteration)

        if self.episodes % 100 == 0:
            if self.fw is None:
                self.fw = open('./images/episode_' + str(self.episodes) + '/actions.txt', 'w')
            self.fw.write('(' + str(action) + ')\n')

        # An action of 0 is the NOOP
        j = 0

        max_v = 10.0
        max_r = 360.0
        dR = np.matrix([roll, pitch, yaw])
        self.v = self.v + acc / self.fps
        if self.v*self.fps > max_v: self.v = max_v/self.fps
        self.r = self.r + dR / self.fps

        if self.r.item(0) > max_r: self.r = np.matrix([self.r.item(0)-max_r, self.r.item(1), self.r.item(2)])
        if self.r.item(1) > max_r: self.r = np.matrix([self.r.item(0), self.r.item(1)-max_r, self.r.item(2)])
        if self.r.item(2) > max_r: self.r = np.matrix([self.r.item(0), self.r.item(1), self.r.item(2)-max_r])

        direction = np.dot(rot_mat(self.r.item(0), self.r.item(1), self.r.item(2)), np.transpose(self.c))
        direction = np.transpose(direction) / np.linalg.norm(direction)
        self.o = self.o + self.r
        if self.o.item(0) > max_r: self.o = np.matrix([self.o.item(0)-max_r, self.o.item(1), self.o.item(2)])
        if self.o.item(1) > max_r: self.o = np.matrix([self.o.item(0), self.o.item(1)-max_r, self.o.item(2)])
        if self.o.item(2) > max_r: self.o = np.matrix([self.o.item(0), self.o.item(1), self.o.item(2)-max_r])
        self.c = self.c + direction * self.v
        if self.c.item(2) > -5:
            self.c = np.matrix([self.c.item(0), self.c.item(1), -5])

        self.state = self._render()

        self.previous_state = self.current_state
        self.current_state = self._get_obs()

        (x, y), target_in_front = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height))
        pix_d = ((x - self.width / 2) ** 2 + (y - self.height / 2) ** 2) / (
        (self.width / 2) ** 2 + (self.height / 2) ** 2)

        d = np.linalg.norm(self.t - self.c)
        d_max = 0

        # d_norm = d/d_max

        center_reward = 1 - pix_d
        dist_reward = 1 / d

        # self.reward = dist_reward
        # if d < self.last_d:
        #    self.reward = 1
        # elif d > self.last_d:
        #    self.reward = -1
        # else:
        #    self.reward = 0
        #self.reward = 0
        #self.reward = (30.0/d - 30.0/self.last_d)
        #print(self.reward)
        self.reward = (self.last_d - d)
        self.last_d = d
        #self.last_d = min(d, self.last_d)
        self.done = (self.iteration > self.max_iter)

        if x > self.width or x < 0 or y > self.height or y < 0 or d > 30:
            print((x,y))
            self.done = True
            self.reward = 0

        if d < 1:
            self.done = True
            self.reward = 0

        # self.reward -= 0.1

        self.total_reward += self.reward
        self.iteration += 1

        if self.done:
            # print(self.c)
            if self.episodes % 100 == 0:
                self.fw.close()
                self.fw = None
            self.episodes += 1
            print(str(self.episodes) + ': ' + str(self.total_reward)+ ' Ended At: ' + str(self.reward) + ' @ ' + str(float(self.iteration)) + ' with d: '+str(d) )
            self.log_file.write(str(self.episodes) + ': ' + str(self.total_reward)+ ' Ended At: ' + str(self.reward) + ' @ ' + str(float(self.iteration))  + ' with d: '+str(d) + '\n')

            self.total_reward = 0
        return self.current_state, self.reward, self.done, {}
Ejemplo n.º 8
0
    def _step(self, raw_action):
        # action = np.matrix([raw_action.item(0)*float(self.width),raw_action.item(1)*float(self.height)])
        # x = self.c.item(0)/self.width
        # y = self.c.item(1)/self.height
        # self.reward = 1-((np.linalg.norm(action-self.last_loc))/self.rt2)
        #action = [int(raw_action % 3), int(raw_action / 3)]
        #self.reward = 1 - ((action[0] - self.last_loc[0]) ** 2 + (action[1] - self.last_loc[1]) ** 2)
        action = raw_action
        yaw = 1 - action % 3
        action /= 3
        pitch = 1 - action % 3
        action /= 3
        roll = 1 - action % 3
        action /= 3
        acc = 1 - action % 3

        # print(self.iteration)

        if self.episodes % 500 == 0:
            if self.fw is None:
                self.fw = open(
                    './images/episode_' + str(self.episodes) + '/actions.txt',
                    'w')
            self.fw.write('(' + str(raw_action) + ')\n')

        # An action of 0 is the NOOP
        j = 0

        max_v = 10.0
        max_r = 360.0
        dR = np.matrix([roll, pitch, yaw])
        self.v = self.v + acc / self.fps
        if self.v > max_v: v = max_v
        self.r = self.r + dR / self.fps

        if self.r.item(0) > max_r:
            self.r = np.matrix([max_r, self.r.item(1), self.r.item(2)])
        if self.r.item(1) > max_r:
            self.r = np.matrix([self.r.item(0), max_r, self.r.item(2)])
        if self.r.item(2) > max_r:
            self.r = np.matrix([self.r.item(0), self.r.item(1), max_r])

        direction = np.dot(
            rot_mat(self.r.item(0), self.r.item(1), self.r.item(2)),
            np.transpose(self.c))
        direction = np.transpose(direction) / np.linalg.norm(direction)
        self.o = self.o + self.r
        self.c = self.c + direction * self.v
        if self.c.item(2) > -5:
            self.c = np.matrix([self.c.item(0), self.c.item(1), -5])

        self.state = self._render()
        self.observation = self.get_obs()

        (x, y), target_in_front = projection(self.t,
                                             self.c,
                                             self.o,
                                             w=float(self.width),
                                             h=float(self.height))
        pix_d = ((x - self.width / 2)**2 +
                 (y - self.height / 2)**2) / ((self.width / 2)**2 +
                                              (self.height / 2)**2)

        d = np.linalg.norm(self.t - self.c)
        d_max = 0

        d_norm = d / d_max

        center_reward = 1 - pix_d
        dist_reward = 1 / d

        #self.reward = dist_reward
        if d < self.last_d:
            self.reward = 1
        elif d > self.last_d:
            self.reward = -1
        else:
            self.reward = 0

        self.done = (self.iteration > self.max_iter)
        if x > self.width or x < 0 or y > self.height or y < 0 or d > 30:
            self.done = True
            self.reward = -1

        if d < 1:
            self.done = True
            #self.reward = self.max_iter

        self.cumulative += self.reward
        self.iteration += 1

        if self.done:
            #print(self.c)
            if self.episodes % 500 == 0:
                self.fw.close()
                self.fw = None
            self.episodes += 1
            acc = float(self.nb_correct) / float(self.iteration)
            print(
                str(self.episodes) + ': ' + str(self.cumulative) +
                ' Ended At: ' + str(self.reward) + ' @ ' +
                str(float(self.iteration)) + ' with d: ' + str(d))
            self.log_file.write(
                str(self.episodes) + ': ' + str(self.cumulative) +
                ' Ended At: ' + str(self.reward) + ' @ ' +
                str(float(self.iteration)) + ' with d: ' + str(d) + '\n')
            #self.log_file.write(str(self.episodes) + ': ' + str(self.cumulative) + ' @ ' + str(float(self.iteration)) + '\n')
            #print('Accuracy: ' + str(acc))
            self.acc_file.write(str(acc) + '\n')
            self.nb_correct = 0
            self.cumulative = 0
        return self.observation, self.reward, self.done, {}
    def _step(self, raw_action):
        #action = np.matrix([raw_action.item(0)*float(self.width),raw_action.item(1)*float(self.height)])
        action = np.matrix(
            [raw_action.item(0) + 0.5,
             raw_action.item(1) + 0.5])
        #x = self.c.item(0)/self.width
        #y = self.c.item(1)/self.height
        self.reward = 1 - ((np.linalg.norm(action - self.last_loc)) / self.rt2)
        self.cumulative += self.reward
        self.iteration += 1
        #print(self.iteration)

        if self.episodes % 500 == 0:
            if self.fw is None:
                self.fw = open(
                    './images/episode_' + str(self.episodes) + '/actions.txt',
                    'w')
            self.fw.write('(' + str(action.item(0)) + ',' +
                          str(action.item(1)) + ')\n')

        # An action of 0 is the NOOP
        j = 0
        while True:
            if j > 50:
                self.done = True

                if self.episodes % 500 == 0:
                    self.fw.close()
                    self.fw = None
                self.episodes += 1
                print(
                    str(self.episodes) + ': ' +
                    str(self.cumulative / float(self.iteration)) + ' *' +
                    str(self.iteration))
                self.log_file.write(
                    str(self.episodes) + ': ' +
                    str(self.cumulative / float(self.iteration)) + ' *' +
                    str(self.iteration) + '\n')
                self.cumulative = 0
                return self.observation, self.reward, self.done, self.info
            rot_inc = 5.0 + float(j) / 10.0
            vel_inc = 1.0 + float(j) / 10.0
            #print(rot_inc)
            dC = np.matrix([
                random.normalvariate(self.v.item(0), vel_inc / self.fps),
                random.normalvariate(self.v.item(1), vel_inc / self.fps),
                random.normalvariate(self.v.item(2), vel_inc / self.fps)
            ])
            dO = np.matrix([
                random.normalvariate(self.r.item(0), vel_inc / self.fps),
                random.normalvariate(self.r.item(1), rot_inc / self.fps),
                random.normalvariate(self.r.item(2), rot_inc / self.fps)
            ])
            newC = np.add(self.c, dC)
            newO = np.add(self.o, dO)
            d = np.linalg.norm(self.t - newC)
            (x, y) = projection(self.t,
                                newC,
                                newO,
                                w=float(self.width),
                                h=float(self.height))
            total_v = np.linalg.norm(dC)
            if x <= float(self.width)*0.95 and x >= float(self.width)*0.05 and y <= float(self.height)*0.95 and y >= float(self.height)*0.05 \
                    and d > 3 and d < 30 and newC.item(2) < -2 \
                    and total_v*self.fps <= 30:
                break
            j += 1
        self.c = newC
        self.v = dC
        self.o = newO
        self.r = dO
        x = x / float(self.width)
        y = y / float(self.height)
        self.last_loc = np.matrix([x, y])
        self.state = self._render()
        self.observation = self.get_obs(self.last_loc)
        self.done = (self.iteration > 100)
        info = (self.c, self.v, self.o, self.r)
        self.info = {}
        #print(action)
        #print(np.matrix([x,y]))
        #print(self.reward)
        if self.done:
            if self.episodes % 500 == 0:
                self.fw.close()
                self.fw = None
            self.episodes += 1
            print(
                str(self.episodes) + ': ' +
                str(self.cumulative / float(self.iteration)))
            self.log_file.write(
                str(self.episodes) + ': ' +
                str(self.cumulative / float(self.iteration)) + '\n')
            self.cumulative = 0
        return self.observation, self.reward, self.done, self.info
Ejemplo n.º 10
0
    def _step(self, raw_action):
        # action = np.matrix([raw_action.item(0)*float(self.width),raw_action.item(1)*float(self.height)])
        # x = self.c.item(0)/self.width
        # y = self.c.item(1)/self.height
        # self.reward = 1-((np.linalg.norm(action-self.last_loc))/self.rt2)
        raw_action = raw_action - 1
        action = [int(raw_action % 3), int(raw_action / 3)]
        (x0, y0), target_in_front = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height))
        self.c = self.hunter.getPosition()
        self.c = np.matrix([self.c.x_val, self.c.y_val, self.c.z_val])
        #self.vC = self.hunter.getVelocity()
        #self.vC = np.matrix([self.vC.x_val, self.vC.y_val, self.vC.z_val])
        self.o = self.hunter.getOrientation()
        self.o = np.matrix([math.degrees(self.o.x_val),
                            math.degrees(self.o.y_val),
                            2*math.degrees(self.o.z_val)])
        self.t = self.target.getPosition()
        self.t = np.matrix([self.t.x_val, self.t.y_val, self.t.z_val])
        #self.vT = self.target.getVelocity()
        #self.vT = np.matrix([self.vT.x_val, self.vT.y_val, self.vT.z_val])
        (x, y), target_in_front = projection(self.t, self.c, self.o, w=float(self.width), h=float(self.height))
        d = np.linalg.norm(self.t - self.c)
        #print((x,y))
        #print((x0,y0))
        #print()
        self.iteration += 1
        if x < 0 or y < 0 or x > self.width or y > self.height or \
                self.t.item(2) > -2 or self.c.item(2) > -2 or \
                d < 5 or d > 25 or not target_in_front:
            self.done = True
        if not self.done:
            x = 3 * x / float(self.width)
            y = 3 * y / float(self.height)
            loc = [int(x),int(y)]
            #print(action)
            #print(loc)

            self.reward = 1 - ((action[0] - loc[0]) ** 2 + (action[1] - loc[1]) ** 2)
            if self.reward == 1:
                self.nb_correct += 1
            self.cumulative += self.reward
            # print(self.iteration)

            if self.episodes % self.log_int == 0:
                if self.fw is None:
                    self.fw = open('./images/episode_' + str(self.episodes) + '/actions.txt', 'w')
                self.fw.write('(' + str(raw_action) + ')\n')

            # An action of 0 is the NOOP

            self.aC = np.matrix([random.normalvariate(mu=self.aC.item(0), sigma=2/self.fps),
                            random.normalvariate(mu=self.aC.item(0), sigma=2/self.fps),
                           random.normalvariate(mu=self.aC.item(0), sigma=2/self.fps)]
                          )
            self.aT= np.matrix([random.normalvariate(mu=self.aT.item(0), sigma=2/self.fps),
                           random.normalvariate(mu=self.aT.item(0), sigma=2/self.fps),
                           random.normalvariate(mu=self.aT.item(0), sigma=2/self.fps)]
                          )
            self.vC = self.vC+self.aC
            self.vT = self.vT+self.aT
            newC = self.c + self.vC
            newT = self.t + self.vT
            #self.hunter.moveToPosition(newC.item(0), newC.item(1), newC.item(2), 1)
            #self.target.moveToPosition(newT.item(0), newT.item(1), newT.item(2), np.linalg.norm(self.vT))
            #self.hunter.moveByVelocity(self.vC.item(0),
            #                           self.vC.item(1),
            #                           self.vC.item(2),
            #                           10)
            #self.hunter.simSetPose(newC, self.hunter.getOrientation())
            #v_temp = self.hunter.getVelocity()
            #v_temp = np.matrix([v_temp.x_val, v_temp.y_val, v_temp.z_val])
            #v_magnitude = np.linalg.norm(v_temp)
            #if v_magnitude == 0:
            #    print(self.iteration)
            #self.target.moveByVelocity(self.vT.item(0),
            #                           self.vT.item(1),
            #                           self.vT.item(2),
            #                           10)
            #print(self.vT)
            self.t = newT
            self.target.simSetPose(Vector3r(newT.item(0), newT.item(1), newT.item(2)),
                               self.target.toQuaternion(0,0,0))
            #time.sleep(1)

            self.state = self._render()
            self.observation = self.get_obs()
            self.done = (self.iteration > self.max_iter)

        # Proper Termination
        if self.done:
            if self.episodes % self.log_int == 0 and self.fw is not None:
                self.fw.close()
                self.fw = None
            self.episodes += 1
            acc = float(self.nb_correct) / float(self.iteration)
            print(str(self.episodes) + ': ' + str(self.cumulative / float(self.iteration)) + ' len: '+str(self.iteration))
            self.log_file.write(str(self.episodes) + ': ' + str(self.cumulative / float(self.iteration)) + ' len: '+str(self.iteration) + '\n')
            print('Accuracy: ' + str(acc))
            self.acc_file.write(str(acc) + '\n')
            self.nb_correct = 0
            self.cumulative = 0
        return self.observation, self.reward, self.done, {}
    def random_orientation(self, t):
        while True:
            c = np.matrix([
                random.normalvariate(t.item(0), 10),
                random.normalvariate(t.item(1), 10),
                random.normalvariate(t.item(2), 5)
            ])
            d = np.linalg.norm(t - c)
            if d > 5 and d < 15 and c.item(2) < -2:
                #    break
                # while True:
                # o = np.matrix([random.uniform(-180,180),
                #               random.uniform(-180,180),
                #               random.uniform(-180,180)])
                o = get_o_from_pts(t, c)
                (x, y), target_in_front = projection(t,
                                                     c,
                                                     o,
                                                     w=float(self.width),
                                                     h=float(self.height))
                if x == self.width / 2 and y == self.height / 2:
                    break

        #print((x, y))
        for i in range(50):
            j = 0
            while True:
                rot_inc = 5.0 + float(j) / 10.0
                vel_inc = 10.0 + float(j) / 10.0
                if j > 50:
                    break
                # print(rot_inc)
                dC = np.matrix([
                    random.normalvariate(self.v.item(0), vel_inc / self.fps),
                    random.normalvariate(self.v.item(1), vel_inc / self.fps),
                    random.normalvariate(self.v.item(2), vel_inc / self.fps)
                ])
                dO = np.matrix([
                    0, 0,
                    random.normalvariate(self.r.item(2), rot_inc / self.fps)
                ])
                newC = np.add(self.c, dC)
                newO = np.add(self.o, dO)
                d = np.linalg.norm(self.t - newC)
                (x, y), target_in_front = projection(self.t,
                                                     newC,
                                                     newO,
                                                     w=float(self.width),
                                                     h=float(self.height))
                total_v = np.linalg.norm(dC)
                if x <= float(self.width) * 0.9 and x >= float(self.width) * 0.1 and y <= float(
                        self.height) * 0.9 and y >= float(self.height) * 0.1 \
                        and d > 3 and d < 15 and newC.item(2) < -2 \
                        and total_v * self.fps <= 10 \
                        and target_in_front:
                    break
                j += 1
            self.c = newC
            self.v = dC
            self.o = newO
            self.r = dO
        (x, y), target_in_front = projection(self.t,
                                             self.c,
                                             self.o,
                                             w=float(self.width),
                                             h=float(self.height))
        print((x, y))
        self.v = np.matrix([0.0, 0.0, 0.0])
        self.r = np.matrix([0.0, 0.0, 0.0])
        return (self.c, self.o)
    def _step(self, raw_action):
        # action = np.matrix([raw_action.item(0)*float(self.width),raw_action.item(1)*float(self.height)])
        # x = self.c.item(0)/self.width
        # y = self.c.item(1)/self.height
        # self.reward = 1-((np.linalg.norm(action-self.last_loc))/self.rt2)
        action = [int(raw_action % 3), int(raw_action / 3)]
        #if self.last_loc[0] > 2 or self.last_loc[0] < 0 or self.last_loc[1] < 0 or self.last_loc[1] > 2:
        #    if raw_action == 9:
        #        self.reward = 1
        #    else:
        #        self.reward = 0
        #else:
        self.reward = 1 - ((action[0] - self.last_loc[0])**2 +
                           (action[1] - self.last_loc[1])**2)
        if self.reward == 1:
            self.nb_correct += 1
        self.cumulative += self.reward
        self.iteration += 1
        # print(self.iteration)

        if self.episodes % self.log_freq == 0:
            if self.act_log is None:
                self.act_log = open(
                    './images/episode_' + str(self.episodes) + '/actions.txt',
                    'w')
            self.act_log.write('(' + str(raw_action) + ')\n')
            if self.obs_log is None:
                self.obs_log = open(
                    './images/episode_' + str(self.episodes) + '/actions.txt',
                    'w')
            self.obs_log.write('(' + str(raw_action) + ')\n')

        self.aT = np.matrix([
            random.normalvariate(mu=self.aT.item(0), sigma=2 / self.fps),
            random.normalvariate(mu=self.aT.item(0), sigma=2 / self.fps),
            random.normalvariate(mu=self.aT.item(0), sigma=2 / self.fps)
        ])
        self.vT = self.vT + self.aT
        norm_vT = np.linalg.norm(self.vT)
        if norm_vT * self.fps > 9:
            self.vT = 9.0 * self.vT / (norm_vT * self.fps)
        self.t = self.t + self.vT

        # An action of 0 is the NOOP
        j = 0
        while True:
            if j > 100:
                self.done = True

                if self.episodes % self.log_freq == 0:
                    self.act_log.close()
                    self.act_log = None
                    self.obs_log.close()
                    self.obs_log = None
                self.episodes += 1
                acc = float(self.nb_correct) / float(self.iteration)
                print(
                    str(self.episodes) + ': ' +
                    str(self.cumulative / float(self.iteration)) + ' *' +
                    str(self.iteration))
                self.log_file.write(
                    str(self.episodes) + ': ' +
                    str(self.cumulative / float(self.iteration)) + ' *' +
                    str(self.iteration) + '\n')
                print('Accuracy: ' + str(acc))
                self.acc_file.write(str(acc) + '\n')
                self.nb_correct = 0
                self.cumulative = 0
                return self.observation, self.reward, self.done, None
            rot_inc = 5.0 + float(j) / 10.0
            vel_inc = 1.0 + float(j) / 10.0
            # print(rot_inc)
            dC = np.matrix([
                random.normalvariate(self.v.item(0), vel_inc / self.fps),
                random.normalvariate(self.v.item(1), vel_inc / self.fps),
                random.normalvariate(self.v.item(2), vel_inc / self.fps)
            ])
            dO = np.matrix([
                random.normalvariate(self.r.item(0), vel_inc / self.fps),
                random.normalvariate(self.r.item(1), rot_inc / self.fps),
                random.normalvariate(self.r.item(2), rot_inc / self.fps)
            ])
            newC = np.add(self.c, dC)
            newO = np.add(self.o, dO)
            d = np.linalg.norm(self.t - newC)
            (x, y), target_in_front = projection(self.t,
                                                 newC,
                                                 newO,
                                                 w=float(self.width),
                                                 h=float(self.height))
            total_v = np.linalg.norm(dC)
            if x <= float(self.width) * 0.95 and x >= float(self.width) * 0.05 and y <= float(
                    self.height) * 0.95 and y >= float(self.height) * 0.05 \
                    and d > 3 and d < 15 and newC.item(2) < -2 \
                    and total_v * self.fps <= 30 and target_in_front:
                break
            j += 1
        self.c = newC
        self.v = dC
        self.o = newO
        self.r = dO

        x = 3 * x / float(self.width)
        y = 3 * y / float(self.height)
        self.last_loc = [int(x), int(y)]
        self.state = self._render()
        self.observation = self.get_obs(self.last_loc)
        self.done = (self.iteration > self.max_iter)
        info = (self.c, self.v, self.o, self.r)
        self.info = {}
        # print(action)
        # print(np.matrix([x,y]))
        # print(self.reward)
        if self.done:
            if self.episodes % self.log_freq == 0:
                self.act_log.close()
                self.act_log = None
                self.obs_log.close()
                self.obs_log = None
            self.episodes += 1
            acc = float(self.nb_correct) / float(self.iteration)
            print(
                str(self.episodes) + ': ' +
                str(self.cumulative / float(self.iteration)))
            self.log_file.write(
                str(self.episodes) + ': ' +
                str(self.cumulative / float(self.iteration)) + '\n')
            print('Accuracy: ' + str(acc))
            self.acc_file.write(str(acc) + '\n')
            self.nb_correct = 0
            self.cumulative = 0
        return self.observation, self.reward, self.done, self.info