コード例 #1
0
def main(args):
    with tf.Session() as sess:
        oe_insp_0 = np.array([6771.0, 0.0, 0.001, 0.0, math.radians(5.0), 0.0])
        oe_targ_0 = np.array([6771.0, 0.0, 0.0, 0.0, math.radians(5.0), 0.0])
        roe_0 = util.ROEfromOEs(oe_insp_0, oe_targ_0)
        RTN_0 = util.ROE2HILL(roe_0, oe_targ_0[0], oe_targ_0[1])
        w = 0.2
        w_0 = np.array([0, 0, w])
        Pg_0 = np.array([np.linalg.norm(RTN_0), 0, 0])
        # Array to store "feature points" by theta, phi pair in Geometric frame.
        map_0 = np.zeros((360, 360))
        fuel_0 = np.array((50, ))
        state_0 = np.concatenate((roe_0, Pg_0, w_0, fuel_0), axis=0)
        dt = 100
        kwargs = {
            'init_state': state_0,
            'map': map_0,
            'inspOE_0': oe_insp_0,
            'targOE_0': oe_targ_0,
            'RTN_0': RTN_0,
            'dt': dt
        }
        env = gym.make(args['env'], **kwargs)
        np.random.seed(int(args['random_seed']))
        tf.set_random_seed(int(args['random_seed']))
        env.seed(int(args['random_seed']))

        state_dim = len(state_0)
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high[0]
        assert (env.action_space.high[0] == -env.action_space.low[0])
        actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
                             float(args['actor_lr']), float(args['tau']),
                             int(args['minibatch_size']))

        critic = CriticNetwork(sess, state_dim, action_dim,
                               float(args['critic_lr']), float(args['tau']),
                               float(args['gamma']),
                               actor.get_num_trainable_vars())

        actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))

        if args['use_gym_monitor']:
            if not args['render_env']:
                env = wrappers.Monitor(env,
                                       args['monitor_dir'],
                                       video_callable=False,
                                       force=True)
            else:
                env = wrappers.Monitor(env, args['monitor_dir'], force=True)
        print('Beginning Training')
        train(sess, env, args, actor, critic, actor_noise)
        save_path = actor.saver.save(sess, "./results/model.ckpt")
        print("Model saved in path: %s" % save_path)
        if args['use_gym_monitor']:
            env.monitor.close()
コード例 #2
0
def genOnboardPose(ROEpred, depthEst, targetA, targetU):
    def genInertialPositionfromGPS(inertialPosGuess):
        """Generate true inertial position by sampling distribution from guess of inertial position. Guess obtained by
        using predicted ROEs of next state and depth estimate at next state."""
        posGuessMean = inertialPosGuess[0]
        posGuessCov = inertialPosGuess[1]
        truePos = np.random.multivariate_normal(posGuessMean, posGuessCov)
        return truePos

    RTNpred = util.ROE2HILL(ROEpred, targetA, targetU)
コード例 #3
0
 def succAndProbReward(self, state, action):
     ROE = state[0]
     dROE = util.mapActiontoROE(action, self.inspOE[1], self.targOE[0])
     ROE = ROE + dROE
     newROE = util.propagateROE(ROE, self.dt, self.inspOE[0],
                                self.targOE[0])
     newRTN = util.ROE2HILL(newROE, self.targOE[0], self.targOE[1])
     self.inspOE += util.GVEs(self.inspOE, action, self.dt)
     self.targOE += util.GVEs(self.targOE, np.array([0, 0, 0]), self.dt)
     fuel_consumed = np.linalg.norm(action)
     newFuel = state[4] - fuel_consumed
     w = state[2]
     R = util.axisAngleRates(w, self.dt)
     self.Rot = np.dot(R, self.Rot)
     newPg = np.dot(self.Rot, np.transpose(newRTN))
     newMap = util.populateMapFeatures(state[3], state[1])
     return [newROE, newPg, w, newMap, newFuel]
コード例 #4
0
    def step(self, action):
        state = self.state
        ind = action[0]
        """
        if ind<0:
            dROE = np.array([0, 0, 0, 0, 0, 0])
            dROE[1]= math.sqrt(mu)*(math.pow(deputyA,-1.5)-math.pow(chiefA,-1.5))
        else:
            dROE = action[1:5]
        """
        if ind < 0:
            true_action = 0.0 * action[1:4]
        else:
            true_action = ind * action[1:4]
        true_action = [a / float(1000) for a in true_action]
        img_count = 0
        ROE = state[0:6]
        w = state[9:12]
        oldWhereCovered = self.map[np.where(self.map >= 2)]
        oldRTN = util.ROE2HILL(ROE, self.targOE[0], self.targOE[1])
        dROE = util.mapActiontoROE(true_action, self.inspOE[1], self.targOE[0])
        ROE = ROE + dROE
        newMap = np.copy(self.map)
        for i in range(0, self.dt, self.tau):
            if i == 0:
                self.inspOE = util.GVEs(self.inspOE, true_action, 1)
                self.targOE = util.GVEs(self.targOE, np.array([0, 0, 0]), 1)
                newRTN = util.ROE2HILL(ROE, self.targOE[0], self.targOE[1])
                if np.linalg.norm(newRTN) <= 0.1:
                    newMap = util.populateMapFeatures(self.map, state[6:9])
                    img_count += 1
            else:
                self.inspOE = util.GVEs(self.inspOE, np.array([0, 0, 0]), self.tau)
                self.targOE = util.GVEs(self.targOE, np.array([0, 0, 0]), self.tau)
                newROE = util.propagateROE(ROE, self.tau, self.inspOE[0], self.targOE[0])
                newRTN = util.ROE2HILL(newROE, self.targOE[0], self.targOE[1])
                R = util.axisAngleRates(w, self.tau)
                self.Rot = np.dot(R, self.Rot)
                newPg = np.dot(self.Rot, np.transpose(newRTN))
                if np.linalg.norm(newRTN) <= 0.1:
                    newMap = util.populateMapFeatures(newMap, newPg)
                    img_count += 1

        fuel_consumed = np.linalg.norm(true_action)
        newFuel = state[12] - fuel_consumed
        newFuelvec = np.array((newFuel,))
        if img_count == 0:
            self.no_image_count += 1
        else:
            self.no_image_count = 0
        self.state = np.concatenate((newROE, newPg, w, newFuelvec), axis=0)
        self.photos_left += -1
        whereCovered = newMap[np.where(newMap >= 2)]
        self.percent_coverage = float(whereCovered.size) / self.map.size
        newFeatures = whereCovered.size - oldWhereCovered.size
        self.map = np.copy(newMap)
        if np.linalg.norm(newRTN) < np.linalg.norm(oldRTN):
            self.reward += 1
        if self.percent_coverage > 0.70:
            self.done = 1
            self.reward += 10000
            print('Done because finished map')
        else:
            if img_count > 0 and newFeatures <= 200:
                print('Done because not enough new features seen')
                self.reward += -2000
                self.done = 1
        if newFuel <= 0:
            self.done = 1
            print('Done because no more fuel')

        if self.photos_left == 1:
            self.done = 1
            print('Done because no photos left')
        if self.no_image_count >= 10:
            self.done = 1
            self.reward += -500
            print('Done because did not see target for a long time')
        if np.linalg.norm(newRTN) <= 0.001:
            self.done = 1
            self.reward += -10000
            print('Done because hit target')
        if self.done == 1:
            if self.percent_coverage > 0.50:
                self.reward += self.percent_coverage * 100000
            elif self.percent_coverage < 0.1:
                self.reward += -self.percent_coverage * 10000
            else:
                self.reward += self.percent_coverage * 10000
            print('Final Percent Coverage: ', self.percent_coverage)
        print('Actions Left', self.photos_left, 'RTN: ', newRTN, 'Fuel Consumed', fuel_consumed,
              'New Features', newFeatures)
        self.RTNlist.append(newRTN)
        return [self.state, self.reward, self.done, self.map]
コード例 #5
0
import mdp
import util
import math
import numpy as np
import matplotlib.pyplot as plt

#OE's in form [a, u, ex, ey, i, RAAN]. Angles in radians
oe_insp_0 = np.array([6771.0, 0.0, 0.03, 0.0, math.radians(5.0), 0.0])
oe_targ_0 = np.array([6771.0, 0.0, 0.0, 0.0, math.radians(5.0), 0.0])
roe_0 = util.ROEfromOEs(oe_insp_0, oe_targ_0)
RTN_0 = util.ROE2HILL(roe_0, oe_targ_0[0], oe_targ_0[1])
w = 0.0
w_0 = np.array([0, 0, w])
Pg_0 = np.array([np.linalg.norm(RTN_0), 0, 0])
# Array to store "feature points" by theta, phi pair in Geometric frame.
map_0 = np.zeros((360, 360))
fuel_0 = 50
state_0 = [roe_0, Pg_0, w_0, map_0, fuel_0]
dt = 100
iterations = 100000
"""Baseline"""
Insp_wcalc = mdp.InspectionMDP(state_0, oe_insp_0, oe_targ_0, RTN_0, dt)
state = Insp_wcalc.startState()
action = np.array([0, 0, 0])
w_track = []
Pg_baseline_hist = []
Pg_baseline_hist.append(tuple(state[1]))
for i in range(iterations):
    state = Insp_wcalc.succAndProbReward(state, action)
    Pg_baseline_hist.append(tuple(state[1]))
    vec2 = state[1]
コード例 #6
0
def main(args):
    oe_insp_0 = np.array([6771., 0.0, 0.0, 0.000001, math.radians(5.0), 0.0])
    oe_targ_0 = np.array([6771., 0.0, 0.0, 0.0, math.radians(5.0), 0.0])
    roe_0 = util.ROEfromOEs(oe_insp_0, oe_targ_0)
    dt = 500
    RTN_0 = util.ROE2HILL(roe_0, oe_targ_0[0], oe_targ_0[1])
    w = math.sqrt(398600 / math.pow(oe_targ_0[0], 3))
    print("W = ", w)
    w_0 = np.array([0, 0, -w])
    Pg_0 = np.array([np.linalg.norm(RTN_0), 0, 0])
    # Array to store "feature points" by theta, phi pair in Geometric frame.
    map_0 = np.zeros((360, 360))
    num_photos = args['max_episode_len']
    tau = 50
    fuel_0 = np.array((0.1,))
    state_0 = np.concatenate((roe_0, Pg_0, w_0, fuel_0), axis=0)
    kwargs = {'init_state': state_0, 'map': map_0, 'inspOE_0': oe_insp_0, 'targOE_0': oe_targ_0, 'RTN_0': RTN_0,
              'dt': dt, 'num_photos': num_photos, 'tau': tau}
    with tf.Session() as sess:

        env = gym.make(args['env'], **kwargs)
        np.random.seed(int(args['random_seed']))
        tf.set_random_seed(int(args['random_seed']))
        env.seed(int(args['random_seed']))

        state_dim = len(state_0)
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high[0]
        assert (env.action_space.high[0] == -env.action_space.low[0])
        actor = ActorNetwork(sess, state_dim, action_dim, action_bound,
                             float(args['actor_lr']), float(args['tau']),
                             int(args['minibatch_size']))

        critic = CriticNetwork(sess, state_dim, action_dim,
                               float(args['critic_lr']), float(args['tau']),
                               float(args['gamma']),
                               actor.get_num_trainable_vars())

        actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))

        if args['use_gym_monitor']:
            if not args['render_env']:
                env = wrappers.Monitor(
                    env, args['monitor_dir'], video_callable=False, force=True)
            else:
                env = wrappers.Monitor(env, args['monitor_dir'], force=True)
        print('Beginning Training')
        train(sess, env, args, actor, critic, actor_noise)
        save_path = actor.saver.save(sess, "./results/model.ckpt")
        if args['use_gym_monitor']:
            env.monitor.close()
        print('Beginning Testing')
        """
            new_saver = tf.train.import_meta_graph('./results/model.ckpt.meta')
            new_saver.restore(sess, tf.train.latest_checkpoint('./results/'))
        """
        reward_list, action_list, pct_list = test(sess, env, args, actor)
        print('Reward List', reward_list)
        print('Actions List', action_list)
        print('Average Reward', np.average(reward_list))
        print('Average Pct Coverage', np.average(pct_list))

        print('Testing a Random Policy')
        rand_list, rand_actions, pct_list_rand = test_rand_policy(env, args)
        print('Reward List for Random Policy', rand_list)
        print('Actions List for Random Policy ', rand_actions)
        print('Average Reward for Random Policy ', np.average(rand_list))
        print('Average Pct Coverage for Random Policy ', np.average(pct_list_rand))

        print('Testing a Zero Policy')
        zero_list, zero_actions, pct_list_zero = test_zero_policy(env, args)
        print('Reward List for Zero Policy', zero_list)
        print('Actions List for Zero Policy ', zero_actions)
        print('Average Reward for Zero Policy ', np.average(zero_list))
        print('Average Pct Coverage for Zero Policy ', np.average(pct_list_zero))