def main(args): with tf.Session() as sess: oe_insp_0 = np.array([6771.0, 0.0, 0.001, 0.0, math.radians(5.0), 0.0]) oe_targ_0 = np.array([6771.0, 0.0, 0.0, 0.0, math.radians(5.0), 0.0]) roe_0 = util.ROEfromOEs(oe_insp_0, oe_targ_0) RTN_0 = util.ROE2HILL(roe_0, oe_targ_0[0], oe_targ_0[1]) w = 0.2 w_0 = np.array([0, 0, w]) Pg_0 = np.array([np.linalg.norm(RTN_0), 0, 0]) # Array to store "feature points" by theta, phi pair in Geometric frame. map_0 = np.zeros((360, 360)) fuel_0 = np.array((50, )) state_0 = np.concatenate((roe_0, Pg_0, w_0, fuel_0), axis=0) dt = 100 kwargs = { 'init_state': state_0, 'map': map_0, 'inspOE_0': oe_insp_0, 'targOE_0': oe_targ_0, 'RTN_0': RTN_0, 'dt': dt } env = gym.make(args['env'], **kwargs) np.random.seed(int(args['random_seed'])) tf.set_random_seed(int(args['random_seed'])) env.seed(int(args['random_seed'])) state_dim = len(state_0) action_dim = env.action_space.shape[0] action_bound = env.action_space.high[0] assert (env.action_space.high[0] == -env.action_space.low[0]) actor = ActorNetwork(sess, state_dim, action_dim, action_bound, float(args['actor_lr']), float(args['tau']), int(args['minibatch_size'])) critic = CriticNetwork(sess, state_dim, action_dim, float(args['critic_lr']), float(args['tau']), float(args['gamma']), actor.get_num_trainable_vars()) actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) if args['use_gym_monitor']: if not args['render_env']: env = wrappers.Monitor(env, args['monitor_dir'], video_callable=False, force=True) else: env = wrappers.Monitor(env, args['monitor_dir'], force=True) print('Beginning Training') train(sess, env, args, actor, critic, actor_noise) save_path = actor.saver.save(sess, "./results/model.ckpt") print("Model saved in path: %s" % save_path) if args['use_gym_monitor']: env.monitor.close()
def genOnboardPose(ROEpred, depthEst, targetA, targetU): def genInertialPositionfromGPS(inertialPosGuess): """Generate true inertial position by sampling distribution from guess of inertial position. Guess obtained by using predicted ROEs of next state and depth estimate at next state.""" posGuessMean = inertialPosGuess[0] posGuessCov = inertialPosGuess[1] truePos = np.random.multivariate_normal(posGuessMean, posGuessCov) return truePos RTNpred = util.ROE2HILL(ROEpred, targetA, targetU)
def succAndProbReward(self, state, action): ROE = state[0] dROE = util.mapActiontoROE(action, self.inspOE[1], self.targOE[0]) ROE = ROE + dROE newROE = util.propagateROE(ROE, self.dt, self.inspOE[0], self.targOE[0]) newRTN = util.ROE2HILL(newROE, self.targOE[0], self.targOE[1]) self.inspOE += util.GVEs(self.inspOE, action, self.dt) self.targOE += util.GVEs(self.targOE, np.array([0, 0, 0]), self.dt) fuel_consumed = np.linalg.norm(action) newFuel = state[4] - fuel_consumed w = state[2] R = util.axisAngleRates(w, self.dt) self.Rot = np.dot(R, self.Rot) newPg = np.dot(self.Rot, np.transpose(newRTN)) newMap = util.populateMapFeatures(state[3], state[1]) return [newROE, newPg, w, newMap, newFuel]
def step(self, action): state = self.state ind = action[0] """ if ind<0: dROE = np.array([0, 0, 0, 0, 0, 0]) dROE[1]= math.sqrt(mu)*(math.pow(deputyA,-1.5)-math.pow(chiefA,-1.5)) else: dROE = action[1:5] """ if ind < 0: true_action = 0.0 * action[1:4] else: true_action = ind * action[1:4] true_action = [a / float(1000) for a in true_action] img_count = 0 ROE = state[0:6] w = state[9:12] oldWhereCovered = self.map[np.where(self.map >= 2)] oldRTN = util.ROE2HILL(ROE, self.targOE[0], self.targOE[1]) dROE = util.mapActiontoROE(true_action, self.inspOE[1], self.targOE[0]) ROE = ROE + dROE newMap = np.copy(self.map) for i in range(0, self.dt, self.tau): if i == 0: self.inspOE = util.GVEs(self.inspOE, true_action, 1) self.targOE = util.GVEs(self.targOE, np.array([0, 0, 0]), 1) newRTN = util.ROE2HILL(ROE, self.targOE[0], self.targOE[1]) if np.linalg.norm(newRTN) <= 0.1: newMap = util.populateMapFeatures(self.map, state[6:9]) img_count += 1 else: self.inspOE = util.GVEs(self.inspOE, np.array([0, 0, 0]), self.tau) self.targOE = util.GVEs(self.targOE, np.array([0, 0, 0]), self.tau) newROE = util.propagateROE(ROE, self.tau, self.inspOE[0], self.targOE[0]) newRTN = util.ROE2HILL(newROE, self.targOE[0], self.targOE[1]) R = util.axisAngleRates(w, self.tau) self.Rot = np.dot(R, self.Rot) newPg = np.dot(self.Rot, np.transpose(newRTN)) if np.linalg.norm(newRTN) <= 0.1: newMap = util.populateMapFeatures(newMap, newPg) img_count += 1 fuel_consumed = np.linalg.norm(true_action) newFuel = state[12] - fuel_consumed newFuelvec = np.array((newFuel,)) if img_count == 0: self.no_image_count += 1 else: self.no_image_count = 0 self.state = np.concatenate((newROE, newPg, w, newFuelvec), axis=0) self.photos_left += -1 whereCovered = newMap[np.where(newMap >= 2)] self.percent_coverage = float(whereCovered.size) / self.map.size newFeatures = whereCovered.size - oldWhereCovered.size self.map = np.copy(newMap) if np.linalg.norm(newRTN) < np.linalg.norm(oldRTN): self.reward += 1 if self.percent_coverage > 0.70: self.done = 1 self.reward += 10000 print('Done because finished map') else: if img_count > 0 and newFeatures <= 200: print('Done because not enough new features seen') self.reward += -2000 self.done = 1 if newFuel <= 0: self.done = 1 print('Done because no more fuel') if self.photos_left == 1: self.done = 1 print('Done because no photos left') if self.no_image_count >= 10: self.done = 1 self.reward += -500 print('Done because did not see target for a long time') if np.linalg.norm(newRTN) <= 0.001: self.done = 1 self.reward += -10000 print('Done because hit target') if self.done == 1: if self.percent_coverage > 0.50: self.reward += self.percent_coverage * 100000 elif self.percent_coverage < 0.1: self.reward += -self.percent_coverage * 10000 else: self.reward += self.percent_coverage * 10000 print('Final Percent Coverage: ', self.percent_coverage) print('Actions Left', self.photos_left, 'RTN: ', newRTN, 'Fuel Consumed', fuel_consumed, 'New Features', newFeatures) self.RTNlist.append(newRTN) return [self.state, self.reward, self.done, self.map]
import mdp import util import math import numpy as np import matplotlib.pyplot as plt #OE's in form [a, u, ex, ey, i, RAAN]. Angles in radians oe_insp_0 = np.array([6771.0, 0.0, 0.03, 0.0, math.radians(5.0), 0.0]) oe_targ_0 = np.array([6771.0, 0.0, 0.0, 0.0, math.radians(5.0), 0.0]) roe_0 = util.ROEfromOEs(oe_insp_0, oe_targ_0) RTN_0 = util.ROE2HILL(roe_0, oe_targ_0[0], oe_targ_0[1]) w = 0.0 w_0 = np.array([0, 0, w]) Pg_0 = np.array([np.linalg.norm(RTN_0), 0, 0]) # Array to store "feature points" by theta, phi pair in Geometric frame. map_0 = np.zeros((360, 360)) fuel_0 = 50 state_0 = [roe_0, Pg_0, w_0, map_0, fuel_0] dt = 100 iterations = 100000 """Baseline""" Insp_wcalc = mdp.InspectionMDP(state_0, oe_insp_0, oe_targ_0, RTN_0, dt) state = Insp_wcalc.startState() action = np.array([0, 0, 0]) w_track = [] Pg_baseline_hist = [] Pg_baseline_hist.append(tuple(state[1])) for i in range(iterations): state = Insp_wcalc.succAndProbReward(state, action) Pg_baseline_hist.append(tuple(state[1])) vec2 = state[1]
def main(args): oe_insp_0 = np.array([6771., 0.0, 0.0, 0.000001, math.radians(5.0), 0.0]) oe_targ_0 = np.array([6771., 0.0, 0.0, 0.0, math.radians(5.0), 0.0]) roe_0 = util.ROEfromOEs(oe_insp_0, oe_targ_0) dt = 500 RTN_0 = util.ROE2HILL(roe_0, oe_targ_0[0], oe_targ_0[1]) w = math.sqrt(398600 / math.pow(oe_targ_0[0], 3)) print("W = ", w) w_0 = np.array([0, 0, -w]) Pg_0 = np.array([np.linalg.norm(RTN_0), 0, 0]) # Array to store "feature points" by theta, phi pair in Geometric frame. map_0 = np.zeros((360, 360)) num_photos = args['max_episode_len'] tau = 50 fuel_0 = np.array((0.1,)) state_0 = np.concatenate((roe_0, Pg_0, w_0, fuel_0), axis=0) kwargs = {'init_state': state_0, 'map': map_0, 'inspOE_0': oe_insp_0, 'targOE_0': oe_targ_0, 'RTN_0': RTN_0, 'dt': dt, 'num_photos': num_photos, 'tau': tau} with tf.Session() as sess: env = gym.make(args['env'], **kwargs) np.random.seed(int(args['random_seed'])) tf.set_random_seed(int(args['random_seed'])) env.seed(int(args['random_seed'])) state_dim = len(state_0) action_dim = env.action_space.shape[0] action_bound = env.action_space.high[0] assert (env.action_space.high[0] == -env.action_space.low[0]) actor = ActorNetwork(sess, state_dim, action_dim, action_bound, float(args['actor_lr']), float(args['tau']), int(args['minibatch_size'])) critic = CriticNetwork(sess, state_dim, action_dim, float(args['critic_lr']), float(args['tau']), float(args['gamma']), actor.get_num_trainable_vars()) actor_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) if args['use_gym_monitor']: if not args['render_env']: env = wrappers.Monitor( env, args['monitor_dir'], video_callable=False, force=True) else: env = wrappers.Monitor(env, args['monitor_dir'], force=True) print('Beginning Training') train(sess, env, args, actor, critic, actor_noise) save_path = actor.saver.save(sess, "./results/model.ckpt") if args['use_gym_monitor']: env.monitor.close() print('Beginning Testing') """ new_saver = tf.train.import_meta_graph('./results/model.ckpt.meta') new_saver.restore(sess, tf.train.latest_checkpoint('./results/')) """ reward_list, action_list, pct_list = test(sess, env, args, actor) print('Reward List', reward_list) print('Actions List', action_list) print('Average Reward', np.average(reward_list)) print('Average Pct Coverage', np.average(pct_list)) print('Testing a Random Policy') rand_list, rand_actions, pct_list_rand = test_rand_policy(env, args) print('Reward List for Random Policy', rand_list) print('Actions List for Random Policy ', rand_actions) print('Average Reward for Random Policy ', np.average(rand_list)) print('Average Pct Coverage for Random Policy ', np.average(pct_list_rand)) print('Testing a Zero Policy') zero_list, zero_actions, pct_list_zero = test_zero_policy(env, args) print('Reward List for Zero Policy', zero_list) print('Actions List for Zero Policy ', zero_actions) print('Average Reward for Zero Policy ', np.average(zero_list)) print('Average Pct Coverage for Zero Policy ', np.average(pct_list_zero))