def stabSep(dyn): """ See [1] for implementation details References: [1] : https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=78129&tag=1 """ A_s, V, l = spl.schur(dyn.A, sort='lhp') B_s = V.T @ dyn.B C_s = dyn.C @ V min_lmbda = np.min(npl.eigvals(A_s)) threshold = min_lmbda / 1e5 l = sum(npl.eigvals(A_s) < threshold) # remove close to unstable elements # stable components A_n = A_s[:l, :l] B_n = B_s[:l, :] C_n = C_s[:, :l] # cross terms A_c = A_s[:l, l:] # unstable components A_p = A_s[l:, l:] B_p = B_s[l:, :] C_p = C_s[:, l:] # obtain stable and unstable dynamic systems dyn_unstable = Dynamics(A_p, B_p, C_p) B_tilde = np.hstack([B_n, A_c]) dyn_stable = Dynamics(A_n, B_tilde, C_n) return dyn_stable, dyn_unstable, V, l
def simulate(arm, traj, tf, dt_dyn, dt_control=None, u_inj=None): """ Inputs: arm: arm object traj: desired trajectory tf: desired final simulation time dt: desired dt for control u_inj: control injection """ dynamics = Dynamics(arm, dt_dyn) # dynamics = Dynamics(arm, dt_dyn, noise_torsion=[1e-3,0], noise_bending=[1e-3,0,0,0]) if dt_control is None: dt_control = dt_dyn else: dt_control = max(np.floor(dt_control / dt_dyn) * dt_dyn, dt_dyn) T = int(dt_control / dt_dyn) dyn_reduced = Dynamics(arm, dt_control, n=arm.state.n) controller = Controller(dyn_reduced) state_list = [] control_list = [] t_steps = int(np.floor(tf / dt_dyn)) t_arr = np.linspace(0, tf, t_steps + 1) for i, t in enumerate(t_arr): print('Progress: {:.2f}%'.format(float(i) / (len(t_arr) - 1) * 100), end='\r') # mode = finiteStateMachine(y,wp) # mode = 'none' # mode = 'damping' mode = 'mpc' if i % T == 0: j = i // T if j not in u_inj: wp = traj[:, j] y = simulateMeasurements(arm) u = controller.controlStep(y, wp, mode) else: u = {} u['rot'] = u_inj[j]['rot'] u['lat'] = u_inj[j]['lat'] arm = dynamics.dynamicsStep(arm, u) state_list.append(copy.copy(arm.state)) control_list.append(u) return state_list, control_list, t_arr
def main(): # instantiate dynamics dyn = Dynamics() # instantiate leg leg = Leg(dyn, alpha=0, bound=True) # set boudaries t0 = 0 s0 = np.array([1000, 1000, *np.random.randn(4)], dtype=float) l0 = np.random.randn(len(s0)) tf = 1000 sf = np.zeros(len(s0)) leg.set(t0, s0, l0, tf, sf) # define problem udp = Problem(leg, atol=1e-8, rtol=1e-8) prob = pg.problem(udp) # instantiate algorithm uda = pg7.snopt7(True, "/usr/lib/libsnopt7_c.so") uda.set_integer_option("Major iterations limit", 4000) uda.set_integer_option("Iterations limit", 40000) uda.set_numeric_option("Major optimality tolerance", 1e-2) uda.set_numeric_option("Major feasibility tolerance", 1e-4) algo = pg.algorithm(uda) # instantiate population with one chromosome pop = pg.population(prob, 1) #pop = pg.population(prob, 0) #pop.push_back([1000, *l0]) # optimise pop = algo.evolve(pop)
def test_trajectory(traj): ntrailers = traj['ntrailers'] t = traj['t'] x = traj['x'] y = traj['y'] phi = traj['phi'] theta = np.array(traj['theta']) u1 = traj['u1'] u2 = traj['u2'] fu = make_interp_spline(t, np.array([u1, u2]).T, k=3) dynamics = Dynamics(ntrailers) def rhs(t, state): u = fu(t) r = dynamics.rhs(*state, *u) r = np.reshape(r, (-1,)) return r state0 = [x[0], y[0], phi[0]] + list(theta[:,0]) ans = solve_ivp(rhs, [t[0], t[-1]], state0, t_eval=t, max_step=1e-3) plt.gca().set_prop_cycle(None) plt.plot(ans.y[0], ans.y[1]) plt.gca().set_prop_cycle(None) plt.plot(x, y, '--') plt.show()
def body(a, beta): def curr_energy(z, aux=None): return (1 - beta) * init_energy(z) + (beta) * final_energy(z, aux=aux) last_x = a[1] w = a[2] v = a[3] if refresh: refreshed_v = v * tf.sqrt(1 - refreshment) + tf.random_normal( tf.shape(v)) * tf.sqrt(refreshment) else: refreshed_v = tf.random_normal(tf.shape(v)) w = w + beta_diff * (- final_energy(last_x, aux=aux) \ + init_energy(last_x, aux=aux)) dynamics = Dynamics(x_dim, energy_function=curr_energy, eps=step_size, hmc=True, T=leapfrogs) Lx, Lv, px = dynamics.forward(last_x, aux=aux, init_v=refreshed_v) mask = (px - tf.random_uniform(tf.shape(px)) >= 0.) updated_x = tf.where(mask, Lx, last_x) updated_v = tf.where(mask, Lv, -Lv) return (px, updated_x, w, updated_v)
def __init__(self, sess=None, env_name=None, feature_dim=None, encoder_gamma=None, encoder_hidden_size=None, dynamics_hidden_size=None, invdyn_hidden_size=None, encoder_lr=None, dynamics_lr=None, invdyn_lr=None): super(WrappedEnv, self).__init__() self._sess = sess self._env = gym.make(env_name) self._state_dim = self._env.observation_space.shape[0] self._action_dim = self._env.action_space.shape[0] self._feature_dim = feature_dim self._encoder_gamma = encoder_gamma self._experience_buffer_size = 50000 self.observation_space = spaces.Box( np.array([-np.inf] * self._feature_dim), np.array([np.inf] * self._feature_dim)) self.action_space = self._env.action_space self._num_hidden_layers = 2 self._encoder_hidden_sizes = [encoder_hidden_size ] * self._num_hidden_layers self._dynamics_hidden_sizes = [dynamics_hidden_size ] * self._num_hidden_layers self._invdyn_hidden_sizes = [invdyn_hidden_size ] * self._num_hidden_layers self._encoder = Encoder(sess=self._sess, input_dim=self._state_dim, output_dim=feature_dim, hidden_sizes=self._encoder_hidden_sizes, learning_rate=encoder_lr) self._dynamics = Dynamics(sess=self._sess, state_dim=feature_dim, action_dim=self._action_dim, hidden_sizes=self._dynamics_hidden_sizes, learning_rate=dynamics_lr) self._inv_dynamics = InverseDynamics( sess=self._sess, state_dim=feature_dim, action_dim=self._action_dim, hidden_sizes=self._invdyn_hidden_sizes, learning_rate=invdyn_lr) self._state = self._env.reset()
def __init__(self, use_penalty=False, return_states=False): self.dyn = Dynamics(dt=0.02) self.Q = np.array([10, 10, 100, 20.5, 20.5, 20, 9.8, 9.8, 10]) self.Qf = self.Q self.x0 = np.array([0, 0, -5., 0, 0, 0, 0, 0, 0]) self.x_des = self.x0.copy() self.use_penalty = use_penalty self.initialized = False self.return_states = return_states w_max = np.pi / 2 self.u_max = np.array([1, w_max, w_max, w_max]) self.u_min = np.array([0, -w_max, -w_max, -w_max]) self.mu = 10.
def get_hmc_samples(x_dim, eps, energy_function, sess, T=10, steps=200, samples=None): hmc_dynamics = Dynamics(x_dim, energy_function, T=T, eps=eps, hmc=True) hmc_x = tf.placeholder(tf.float32, shape=(None, x_dim)) Lx, _, px, hmc_MH = propose(hmc_x, hmc_dynamics, do_mh_step=True) if samples is None: samples = gaussian.get_samples(n=200) final_samples = [] for t in range(steps): final_samples.append(np.copy(samples)) Lx_, px_, samples = sess.run([Lx, px, hmc_MH[0]], {hmc_x: samples}) return np.array(final_samples)
def __init__(self, make_env, num_timesteps, envs_per_process): self.make_env = make_env self.envs_per_process = envs_per_process self.num_timesteps = num_timesteps self._set_env_vars() self.policy = CnnPolicy(scope='cnn_pol', ob_space=self.ob_space, ac_space=self.ac_space, hidsize=512, feat_dim=512, ob_mean=self.ob_mean, ob_std=self.ob_std, layernormalize=False, nl=tf.nn.leaky_relu) self.feature_extractor = InverseDynamics(policy=self.policy, feat_dim=512, layernormalize=0) self.dynamics = Dynamics(auxiliary_task=self.feature_extractor, mode=MODE, feat_dim=512) self.agent = PpoOptimizer( scope='ppo', ob_space=self.ob_space, ac_space=self.ac_space, policy=self.policy, use_news=0, gamma=.99, lam=.98, #TODO Change this for potentially vastly different results nepochs=3, nminibatches=16, lr=1e-4, cliprange=.1, #TODO Change this as well nsteps_per_seg=256, nsegs_per_env=1, ent_coeff=.001, normrew=1, normadv=1, ext_coeff=0., int_coeff=1., dynamics=self.dynamics) self.agent.to_report['aux'] = tf.reduce_mean( self.feature_extractor.loss) self.agent.total_loss += self.agent.to_report['aux'] self.agent.to_report['dyn_loss'] = tf.reduce_mean(self.dynamics.loss) self.agent.total_loss += self.agent.to_report['dyn_loss'] self.agent.to_report['feat_var'] = tf.reduce_mean( tf.nn.moments(self.feature_extractor.features, [0, 1])[1])
def main(): population = 6400 # Number of agents average_degree = 4 # Average degree of network num_episode = 1 # Number of episodes for taking ensemble average network_type = 'lattice' # Topology of network # Creating new directory for data files new_dirname = "dyn_time_evolution_n" + str(population) + "e" + str( num_episode) + "_" + network_type os.mkdir(new_dirname) os.chdir(new_dirname) simulation = Dynamics(population, average_degree, network_type) for episode in range(num_episode): random.seed() simulation.one_episode(episode)
def __init__(self, SCALE): NOTES, FREQ_DICT = SCALE self.note = Note(NOTES, FREQ_DICT) self.rhythm = Rhythm() self.dynamics = Dynamics() self.equalizerFactor = {} self.equalizerFactor[-0.1] = 1.0 self.equalizerFactor[220] = 1.0 self.equalizerFactor[440] = 0.7 self.equalizerFactor[880] = 0.35 self.equalizerFactor[1760] = 0.15 self.equalizerFactor[3520] = 0.15 self.equalizerFactor[7040] = 0.15 self.equalizerFactor[14080] = 0.15 self.equalizerFactor[28160] = 0.15 self.equalizerFactor[56320] = 0.15 self.equalizerBreakPoints = [ -0.1, 220, 440, 880, 1760, 3520, 7040, 14080, 28160, 56320 ]
def __init__(self, config): self.physical_params = config["physical_params"] self.T = config["T"] self.dt = config["dt"] self.initial_state = config["xinit"] self.goal_state = config["xgoal"] self.ellipse_arc = config["ellipse_arc"] self.deviation_cost = config["deviation_cost"] self.Qf = config["Qf"] self.limits = config["limits"] self.n_state = 6 self.n_nominal_forces = 4 self.tire_model = config["tire_model"] self.initial_guess_config = config["initial_guess_config"] self.puddle_model = config["puddle_model"] self.force_constraint = config["force_constraint"] self.visualize_initial_guess = config["visualize_initial_guess"] self.dynamics = Dynamics(self.physical_params.lf, self.physical_params.lr, self.physical_params.m, self.physical_params.Iz, self.dt)
for i in range(self.dynamics.adim): if time: ax[i].plot(self.times, self.actions[:, i], "k.-") else: ax[i].plot(self.actions[:, i], "k.-") return ax if __name__ == "__main__": from dynamics import Dynamics # instantiate AUV sys = Dynamics() # instantiate leg leg = Leg(sys) # arbitrary boundaries t0 = 0 s0 = np.array([50, 50, 1, 1, 1, 1]) l0 = np.random.randn(len(s0)) tf = 10000 sf = np.random.randn(len(s0)) # set TPBVP problem boundires leg.set(t0, s0, l0, tf, sf) print(leg.mismatch(atol=1e-10, rtol=1e-10))
def getDynamics(cls, n, L, C_ratio=0, bc_start=2, bc_end=0): mdl = LateralFEModel(n, L, C_ratio=C_ratio, bc_start=2, bc_end=0) dyn = Dynamics(mdl.A, mdl.B) return dyn
def calculation(self): start = self.start goal = self.goal theta, theta_future, displacement_rear, displacement_rear_future, steering_step = \ Dynamics(self.wheelbase, search_length=2.5, speed=5, dt=1) # bz = Bezier(start, goal, self.start_heading, self.goal_heading, self.start_steering, # self.mapsize, self.car_length, self.car_width, self.wheelbase, self.mapsize * 3, "NoFound") # bezier_spline = bz.calculation() x = start[0] y = start[1] x_future = x y_future = y x_prev = x y_prev = y heading_state = self.start_heading rotate_angle = heading_state steering_state = self.start_steering found = 0 cost_list = [[0, [x, y], heading_state, steering_state]] next_state = cost_list path_discrete = list([]) # Initialize discrete path global path global path_tree path = [] # Initialize continuous path tree_leaf = [[x, y]] # Initialize search tree leaf (search failed) search = 1 step = 1 path_tree = [] # Initialize continuous path for search trees while found != 1: if search >= self.freeGrid_num: break cost_list.sort(key=lambda x: x[0]) next_state = cost_list.pop(0) path_discrete.append(np.round(next_state[1])) path.append(next_state[1]) [x, y] = next_state[1] [x_future, y_future] = [x, y] heading_state = next_state[2] steering_state = next_state[3] if step > 1: [x_prev, y_prev] = path[step - 1] step += 1 rotate_angle = heading_state if sqrt( np.dot(np.subtract([x, y], goal), np.subtract( [x, y], goal))) <= self.tolerance: found = 1 rotate_matrix = [[np.cos(rotate_angle), -np.sin(rotate_angle)], [np.sin(rotate_angle), np.cos(rotate_angle)]] action = (np.dot(displacement_rear, rotate_matrix)).tolist() action_future = np.dot(displacement_rear_future, rotate_matrix) candidates = np.add([x, y], action) candidates_future = np.add([x_future, y_future], action_future) candidates_round = np.round(candidates).astype(int) heading_state = np.add(heading_state, theta) invalid_ID = [ ((candidates_round[i] == path).all(1).any() | (candidates_round[i] == self.danger_zone).all(1).any() | (candidates_round[i] == tree_leaf).all(1).any()) for i in range(len(candidates_round)) ] remove_ID = np.unique( np.where((candidates < 0) | (candidates > self.mapsize))[0]) candidates = np.delete(candidates, remove_ID, axis=0) candidates_future = np.delete(candidates_future, remove_ID, axis=0) heading_state = np.delete(heading_state, remove_ID, axis=0) candidates = np.delete(candidates, np.where(invalid_ID), axis=0) candidates_future = np.delete(candidates_future, np.where(invalid_ID), axis=0) heading_state = np.delete(heading_state, np.where(invalid_ID), axis=0) if len(candidates) > 0: cost_list = [] for i in range(len(candidates)): diff = np.square(candidates[i] - self.bezier_spline) min_dis = min(np.sqrt(np.sum(diff, axis=1))) diff_future = np.square(candidates_future[i] - self.bezier_spline) min_dis_future = min(np.sqrt(np.sum(diff_future, axis=1))) total_cost = min_dis + min_dis_future cost_list.append([ total_cost, candidates[i], heading_state[i], steering_step[i] ]) else: search += 1 if (next_state[1] == tree_leaf).all(1).any(): tree_leaf.append(np.round([x_prev, y_prev])) else: tree_leaf.append((np.round([x, y])).tolist()) x = start[0] y = start[1] x_future = x y_future = y x_prev = x y_prev = y heading_state = self.start_heading rotate_angle = heading_state steering_state = self.start_steering found = 0 cost_list = [[0, [x, y], heading_state, steering_state]] next_state = cost_list path_discrete = list([]) # Initialize discrete path path_tree.append(path) path = [] # Initialize continuous path step = 1 bz_last = Bezier(next_state[1], goal, next_state[2], self.goal_heading, self.start_steering, self.mapsize, self.car_length, self.car_width, self.wheelbase, self.tolerance * 3, "Found") bz_last = bz_last.calculation() path.append(bz_last) return path, path_tree
def train(): save_path = SAVE_DIR + ENV_NAME + "/" + AUXILIARY_TASK + "/" figure_path = FIGURE_DIR + ENV_NAME + "/" + AUXILIARY_TASK + "/" # Create folders. if not os.path.isdir(save_path): os.makedirs(save_path) if not os.path.isdir(figure_path): os.makedirs(figure_path) # Get observation space and action space. env = make_atari(ENV_NAME) obs_space = env.observation_space action_space = env.action_space # Estimate the mean and standard deviation of observations. env.reset() list_obs = [] for _ in range(RANDOM_STEP): action = action_space.sample() obs, _, done, _ = env.step(action) if done: obs = env.reset() list_obs.append(obs) obs_mean = np.mean(list_obs, 0) obs_std = np.mean(np.std(list_obs, 0)) np.savez_compressed(save_path + "obs_mean_std", obs_mean=obs_mean, obs_std=obs_std) env.close() del env # Build models. dynamics = Dynamics(obs_space, action_space, auxiliary_task=AUXILIARY_TASK, is_training=True) policy = Policy(obs_space, action_space, is_training=True) variables_initializer = tf.global_variables_initializer() # Create environments. par_env = ParallelEnvironment( [make_atari(ENV_NAME) for _ in range(NUM_ENV)]) with tf.Session() as sess: # Initialize variables. sess.run(variables_initializer) saver_dynamics = tf.train.Saver(dynamics.trainable_variables) saver_policy = tf.train.Saver(policy.trainable_variables) # Initialize the running estimate of rewards. sum_reward = np.zeros(NUM_ENV) reward_mean = 0.0 reward_std = 1.0 reward_count = 0 # Initialize the counters. total_rollout_step = 0 total_update_step = 0 total_frame = 0 # Initialize the recording of highest rewards. done_first = np.zeros(NUM_ENV) sum_ext_reward = np.zeros((NUM_ENV, ROLLOUT_STEP)) list_highest_reward = [] num_batch = int(np.ceil(NUM_ENV / BATCH_SIZE)) # Each while loop performs a rollout, which first interacts with the environment and then updates the network. while total_frame < MAX_FRAME: # Initialize buffers. buffer_obs = np.zeros( (NUM_ENV, ROLLOUT_STEP + 1, *obs_space.shape)) buffer_action = np.zeros((NUM_ENV, ROLLOUT_STEP)) buffer_ext_reward = np.zeros((NUM_ENV, ROLLOUT_STEP)) buffer_done = np.zeros((NUM_ENV, ROLLOUT_STEP)) buffer_log_prob = np.zeros((NUM_ENV, ROLLOUT_STEP)) buffer_v = np.zeros((NUM_ENV, ROLLOUT_STEP + 1)) buffer_int_reward = np.zeros((NUM_ENV, ROLLOUT_STEP)) buffer_reward = np.zeros((NUM_ENV, ROLLOUT_STEP)) buffer_sum_reward = np.zeros((NUM_ENV, ROLLOUT_STEP)) buffer_adv = np.zeros((NUM_ENV, ROLLOUT_STEP)) buffer_v_target = np.zeros((NUM_ENV, ROLLOUT_STEP)) # Interact with the environment for ROLLOUT_STEP steps. for step in range(ROLLOUT_STEP): # Get observation. if total_frame == 0: obs = par_env.reset() else: obs, _, _, _ = par_env.get_last_response() obs = (obs - obs_mean) / obs_std # Sample action. action, log_prob, v = sess.run( [policy.sampled_action, policy.sampled_log_prob, policy.v], feed_dict={policy.Obs: np.expand_dims(obs, 1)}) action = np.squeeze(action, 1) log_prob = np.squeeze(log_prob, 1) v = np.squeeze(v, 1) # Interact with the environment. obs_next, extrinsic_reward, done, _ = par_env.step(action) # Update buffers. buffer_obs[:, step] = obs buffer_action[:, step] = action buffer_ext_reward[:, step] = extrinsic_reward buffer_done[:, step] = done buffer_log_prob[:, step] = log_prob buffer_v[:, step] = v if step == ROLLOUT_STEP - 1: # Extra operations for the last time step. obs_next = (obs_next - obs_mean) / obs_std v_next = sess.run( policy.v, feed_dict={policy.Obs: np.expand_dims(obs_next, 1)}) v_next = np.squeeze(v_next, 1) buffer_obs[:, step + 1] = obs_next buffer_v[:, step + 1] = v_next # Update frame counter. total_frame += NUM_ENV # Get the highest reward. for step in range(ROLLOUT_STEP): done_prev = done_first if step == 0 else buffer_done[:, step - 1] sum_ext_reward[:, step] = buffer_ext_reward[:, step] + ( 1 - done_prev) * sum_ext_reward[:, step - 1] done_first[:] = buffer_done[:, ROLLOUT_STEP - 1] highest_reward = np.amax(sum_ext_reward) list_highest_reward.append(highest_reward) # Compute the intrinsic reward. buffer_int_reward[:] = sess.run(dynamics.intrinsic_reward, feed_dict={ dynamics.Obs: buffer_obs[:, :-1], dynamics.ObsNext: buffer_obs[:, 1:], dynamics.Action: buffer_action }) # The total reward is a mixture of extrinsic reward and intrinsic reward. buffer_reward[:] = COEF_EXT_REWARD * np.clip( buffer_ext_reward, -1.0, 1.0) + COEF_INT_REWARD * buffer_int_reward # Normalize reward by dividing it by a running estimate of the standard deviation of the sum of discounted rewards. # 1. Compute the sum of discounted rewards. for step in range(ROLLOUT_STEP): sum_reward = buffer_reward[:, step] + GAMMA * sum_reward buffer_sum_reward[:, step] = sum_reward # 2. Compute mean and standard deviation of the sum of discounted rewards. reward_batch_mean = np.mean(buffer_sum_reward) reward_batch_std = np.std(buffer_sum_reward) reward_batch_count = np.size(buffer_sum_reward) # 3. Update the running estimate of standard deviation. reward_mean, reward_std, reward_count = average_mean_std( reward_mean, reward_std, reward_count, reward_batch_mean, reward_batch_std, reward_batch_count) # 4. Normalize reward. buffer_reward = buffer_reward / reward_std # Compute advantage. # - gae_adv_t = sum((gamma * lambda)^i * adv_(t+l)) over i in [0, inf) # - adv_t = r_t + gamma * v_(t+1) - v_t adv = buffer_reward + GAMMA * buffer_v[:, 1:] - buffer_v[:, :-1] sum_adv = np.zeros(NUM_ENV) for step in range(ROLLOUT_STEP - 1, -1, -1): sum_adv = adv[:, step] + GAMMA * LAMBDA * sum_adv buffer_adv[:, step] = sum_adv # Compute target value. buffer_v_target[:] = buffer_adv + buffer_v[:, :-1] # Normalize advantage with zero mean and unit variance. adv_mean = np.mean(buffer_adv) adv_std = np.std(buffer_adv) buffer_adv = (buffer_adv - adv_mean) / adv_std # Update networks. for epoch in range(EPOCH): random_id = np.arange(NUM_ENV) np.random.shuffle(random_id) for i in range(num_batch): batch_id = random_id[i * BATCH_SIZE:np. minimum(NUM_ENV, (i + 1) * BATCH_SIZE)] _, auxiliary_loss, dyna_loss = sess.run( [ dynamics.train_op, dynamics.auxiliary_loss, dynamics.dyna_loss ], feed_dict={ dynamics.Obs: buffer_obs[batch_id, :-1], dynamics.ObsNext: buffer_obs[batch_id, 1:], dynamics.Action: buffer_action[batch_id] }) _, value_loss, pg_loss, entropy_loss = sess.run( [ policy.train_op, policy.value_loss, policy.pg_loss, policy.entropy_loss ], feed_dict={ policy.Obs: buffer_obs[batch_id, :-1], policy.Action: buffer_action[batch_id], policy.Adv: buffer_adv[batch_id], policy.VTarget: buffer_v_target[batch_id], policy.LogProbOld: buffer_log_prob[batch_id] }) total_update_step += 1 # Update rollout step. total_rollout_step += 1 # Only print the last update step. print("Rollout Step ", total_rollout_step, ", Total Frame ", total_frame, ", Update Step ", total_update_step, ":", sep="") print(" Auxiliary Loss = ", format(auxiliary_loss, ".6f"), ", Dynamics Loss = ", format(dyna_loss, ".6f"), ", Value Loss = ", format(value_loss, ".6f"), ", Policy Loss = ", format(pg_loss, ".6f"), sep="") print(" Highest Reward = ", highest_reward, sep="") if total_rollout_step % AUTOSAVE_STEP == 0: # Save network parameters. saver_dynamics.save(sess, save_path + "dynamics") saver_policy.save(sess, save_path + "policy") # Plot reward. interval = NUM_ENV * ROLLOUT_STEP list_frame = list( range(interval, (total_rollout_step + 1) * interval, interval)) plot_reward(list_frame, list_highest_reward, figure_path) # Save network parameters. saver_dynamics.save(sess, save_path + "dynamics") saver_policy.save(sess, save_path + "policy") # Plot reward. interval = NUM_ENV * ROLLOUT_STEP list_frame = list(range(interval, total_frame + interval, interval)) plot_reward(list_frame, list_highest_reward, figure_path) par_env.close()
from IPython.core.debugger import set_trace from importlib import reload # import sys # sys.path.append("../") import params as P from dynamics import Dynamics from animation import Animation from plotData import plotData from ekf_slam import EKF_SLAM from copy import deepcopy dynamics = Dynamics() dataPlot = plotData() ekf_slam = EKF_SLAM() animation = Animation(ekf_slam) estimate = np.array([]) actual = np.array([]) t = P.t_start while t < P.t_end: t_next_plot = t + P.t_plot while t < t_next_plot: t = t + P.Ts vc = 1.25 + 0.5 * np.cos(2 * np.pi * (0.2) * t) omegac = -0.5 + 0.2 * np.cos(2 * np.pi * (0.6) * t) noise_v = vc + np.random.normal(
np.random.seed(1) dataFile = './data/' + pm.dtFile data = Dataset(dataFile, batchsize=hp.batchsize, train_ratio=.9, dev_ratio=.1) print('Dataset "{}" loaded.'.format(dataFile)) print('featsize: {:d}, trainsize: {:d}, testsize: {:d}'.format( data.featsize, data.trainsize, data.testsize)) model = BayesNN(featsize=data.featsize, M=hp.M, n_hidden=hp.n_hidden, Y_std=data.Y_std) ## op_samples, dninfo = Dynamics(pm.dnType, pm).evolve( model.latvar, get_logp=partial(model.get_logp, fullsize=data.trainsize)) T_rmse = np.zeros([hp.n_repeat, hp.n_round]) T_llh = np.zeros([hp.n_repeat, hp.n_round]) L_time = np.zeros([hp.n_repeat]) for i in range(hp.n_repeat): if i != 0: data.reset() print('Repeat-trial {:d}:'.format(i)) with tf.Session() as sess: tf.global_variables_initializer().run() X_n, Y_n = data.get_batch_for_init_loggamma() sess.run( model.init_loggamma, { model.X_train: X_n * data.X_std + data.X_mean, model.Y_train: Y_n * data.Y_std + data.Y_mean
import hb_msgs.msg import numpy as np from hb_common.datatypes import Params, Command, ROSParameterException from hb_common.helpers import saturate from dynamics import Dynamics # this is just the dynamic parameters from the ROS param server params = Params._from_rosparam() #this is artifically setting all our states and inputs to 0.5 to test our dynamics function state = np.ones((6,1))*0.5 command = Command(left=0.5, right=0.5) #making a dynamics object d = Dynamics() deriv_of_state = d.dynamics(params, state, command) print("\n\n\n\nThis is the derivative of our state!!!") print(deriv_of_state) print("This is the derivative of our state!!!\n\n\n\n") ## for this input: #state = np.ones((6,1))*0.5 #command = Command(left=0.5, right=0.5)
dh9 = DHDef(9, 'R', 0.1, 0, 0, q9 - sympy.pi / 2, 'mdh', dh8, []) dh0.set_succ([dh1]) dh1.set_succ([dh2, dh8]) dh2.set_succ([dh3]) dh3.set_succ([dh4]) dh4.set_succ([dh5]) dh5.set_succ([dh6]) dh6.set_succ([dh7]) dh8.set_succ([dh9]) start_time = time.time() kin = Kinematics(dh0) kin.cal_transfmats() print(kin._coordinates) print(kin._coordinates_t) print(kin._d_coordinates) print(kin._d_coordinates_t) print(kin._dd_coordinates) print(kin._dd_coordinates_t) #kin.draw_frames() dyn = Dynamics(kin) print(dyn._ml2r(dh1._m, dh1._l)) print(dyn._Lmr2I(dh1._L_mat, dh1._m, dh1._r)) dyn.cal_dynamics() print(dyn._tau) print("duration: {}".format(-start_time + time.time()))
two_link_robot_model_file = model_folder + 'two_link_robot_model.pkl' # Create joint variables and define their relations q0, q1, q2, q3, q4, q5, q6, q7, q8, q9 = new_sym('q:10') # q3 = -q2 + q8 # q9 = -q8 + q2 robot_def = RobotDef([(0, -1, [1], 0, 0, 0, 0), (1, 0, [2], 0, 0, -0.21537, q1), (2, 1, [3], 0, -sympy.pi / 2, 0, q2 + sympy.pi / 2)], dh_convention='mdh', friction_type=['Coulomb', 'viscous', 'offset']) geom = Geometry(robot_def) dyn = Dynamics(robot_def, geom) #if not os.path.exists(two_link_robot_model_file): with open(two_link_robot_model_file, 'wr') as f: pickle.dump(dyn.H_b, f) H_b = None if os.path.exists(two_link_robot_model_file): with open(two_link_robot_model_file, 'rb') as f: H_b = pickle.load(f) print(H_b - dyn.H_b) # import io, json # with io.open('two_link_robot_model_file', 'w', encoding='utf-8') as f: # f.write(json.dumps(data, ensure_ascii=False))
def sample_dist(state, actions): global hyperparameters samples = [] dynamics = Dynamics() # dynamics.fit(state,actions) for traj_no in range(state.shape[0]): # trajectory = 6 dynamics = Dynamics() traj_states = state[traj_no, :, :] traj_actions = actions[traj_no, :, :] vals, acts = getPreviousSA(traj_no, traj_states, traj_actions) T, dx = traj_states.shape du = traj_actions.shape[1] hyperparameters = { 'wx': [1 / float(dx) for i in range(dx)], 'wu': [1 / float(du) for i in range(du)] } eta = 1e-16 dynamics.fit(vals, acts) prev_traj_dist = init_traj_dist(traj_states, traj_actions, dynamics, hyperparameters) traj_dist = prev_traj_dist prev_mu, prev_sigma = forward(prev_traj_dist, dynamics) prev_eta = -np.Inf min_eta = prev_eta _MAX_ITER = 5 for iter in range(_MAX_ITER): # Collect samples in simulation for sample in range(10): s, a = get_sample(traj_dist, traj_no) push_sample(traj_no, s, a) vals, acts = getPreviousSA(traj_no, traj_states, traj_actions) dynamics.fit(vals, acts, .01) traj_dist, new_eta = backward(traj_states, traj_actions, dynamics, eta, hyperparameters) print(new_eta) print('try again') mu, sigma = forward(traj_dist, dynamics) if new_eta > prev_eta: min_eta = new_eta # dynamics.fit(new_mu,new_sigma) # # TODO: calculate KL divergence between new traj_dist and prev_traj_dist # # check constraint, that kl_div <= _THRESHOLD # kl_div = calculate_KL_div(mu, prev_mu, traj_dist, prev_traj_dist) # print(kl_div) # if kl_div <= _THRESHOLD: # break prev_traj_dist = traj_dist #Take initial sample samples = np.array([ -np.random.multivariate_normal(mu[t], sigma[t], 1).flatten() for t in range(T) ]) commands = samples[:, 28:] # raw_input() f = open('trajectories/target/Traj{}pred.txt'.format(traj_no + 1), 'w') print('here') for act in commands: f.write("{}\n".format(" ".join(str(x) for x in act.flatten()))) f.close() return samples
return int(np.linalg.norm(self.states[-1, :2] - self.O) <= self.eps) def to_origin(self): self.target = self.O return 2 def done(self): self.gamma = 1 return 2 if __name__ == "__main__": # dynamics from dynamics import Dynamics sys = Dynamics(thrust=10, area=1) # controller #from controller import PID #con = PID(1, 1, 1) # origin & waypoints from farm import Farm env = Farm(5, 10, 10, 20, 5, 40, 50) wps = env.simple_coverage() org = np.array([env.dsx, env.dsy]) # mission mis = Mission(org, wps, sys) mis.simulate(50000)
exec('from ' + sys.argv[1][:-3] + ' import HP, PM') args = HP() pm = PM() print(vars_stat(pm)) data = Dataset(args.dtFilename, args.batchsize) data_W = sum(1 for line in open(args.dtVocname) if line.rstrip()) model = LDA(data.n_tr, data_W, args.K, args.alpha, args.beta, args.sigma, args.n_gsamp) model.set_holdout_logperp(args.perpType, data.ho_train_cts, data.ho_test_cts, args.n_window) theta = args.beta + args.sigma * np.random.normal(size=(args.M, args.K, data_W)) theta_tf = tf.Variable(theta) grads_tf = tf.placeholder(dtype=theta.dtype, shape=theta.shape) op_samples, dninfo = Dynamics(pm.dnType, pm).evolve(theta_tf, L_grad_logp=grads_tf) tr_times = [] with tf.Session() as sess: tf.global_variables_initializer().run() theta_smp, theta_par = zip( *sess.run([dninfo.L_samples, dninfo.L_particles]))[0] for i in range(args.n_round): t_start = time.time() for j in range(args.n_iter): tr_train_cts, tr_test_cts = data.get_batch() grads = model.get_grad_logp(tr_train_cts, theta=theta_par) if j == args.n_iter - 1: break theta_par = sess.run([op_samples, dninfo.L_particles], {grads_tf: grads})[1][0] theta_smp, theta_par = zip(
# arguments n = 10 n_unstable = 3 n_r = 8 # Generate Dynamics Matrices while True: A = np.random.randn(n, n) lmbda = npl.eig(A)[0] if sum(lmbda >= 0) == n_unstable: break B = np.vstack([[1, 0], np.zeros([n - 2, 2]), [0, 1]]) dyn_f = Dynamics(A, B) dyn_r = reduceDynamics(dyn_f, n_r, debug=0) # Setup Simulation parameters x0 = np.zeros(n) x0[-1] = 5 tf = 2 dt = 0.01 t_arr = np.arange(0, tf, dt) # simulate full dynamics x_arr = np.zeros([n, len(t_arr)]) x = x0 for i, t in enumerate(t_arr): dx = dyn_f.A @ x
import sys sys.path.append('..') from cost import CostFunctor from nempc import NEMPC from dynamics import Dynamics import numpy as np import matplotlib.pyplot as plt cost_fn = CostFunctor(use_penalty=False) cost_fn.Q = np.array([10,10,100,2.5,2.5,2,1,1,1]) u_eq = np.array([0.5,0,0,0]) ctrl = NEMPC(cost_fn, 9, 4, cost_fn.u_min, cost_fn.u_max, u_eq, horizon=10, population_size=500, num_parents=10, num_gens=200, mode='tournament', warm_start=True) dyn = Dynamics() t = 0.0 tf = 10.0 ts = 0.02 x = np.array([0,0,-5.0,0,0,0,0,0,0]) x_des = np.array([0,1,-6.0,0,0,0,0,0,0]) state_hist = [] input_hist = [] time_hist = [] while t < tf: print(t, end='\r') time_hist.append(t) state_hist.append(x)
def modelFunc(R, ModVar, UseOp, PlotDetails, tdata, FdataInput, errorbarInput, freq, iterationLength, numberOfEmpties, numberOfPoints, ndims, Plot_Exceptions, plot_SED=False): import numpy as np from dynamics import Dynamics from cosmocalc import cosmocalc if UseOp.runOption == 'LC': import time from EATS_func import eats_function from EATS_func import BL_constants from EATS_func import alphanu_func from EATS_func import kappa_constants import warnings import os from radiation_modules import radiation_function from useful_modules import cgs_constants from radiation_modules import rad_var from radiation_modules import weights from radiation_modules import flux_allocation from radiation_modules import self_absorption if UseOp.runOption != 'fit': from matplotlib import pyplot as plt #Natural constants NatCon = cgs_constants() Kappas = kappa_constants(ModVar.p) # Radiation constants RadCon = BL_constants(ModVar.p) if UseOp.reverseShock: RadConRS = BL_constants(ModVar.pRS) Kappas_RS = kappa_constants(ModVar.pRS) D = cosmocalc( ModVar.z, H0=67.8, WM=0.308 )['DL_cm'] #Distance to burst from observer in cm. Values gathered from http://adsabs.harvard.edu/abs/2015arXiv150201589P ModVar.eB = 10**ModVar.eBlog ModVar.epsilone = 10**ModVar.epsiloneLog ModVar.epsilon = 10**ModVar.epsilonLog ModVar.R_ISM = 10**ModVar.R_ISM_log if ModVar.s == 0: ModVar.A0 = 10**ModVar.nCMLog else: ModVar.A0 = 10**ModVar.A0Log #ModVar.t0 = 10**ModVar.logt0 ModVar.tprompt = 10**ModVar.tprompt_log ModVar.Gamma0 = 10**ModVar.Gamma0log if UseOp.fixedRSFSratio: ModVar.eB3 = np.copy(ModVar.eB) ModVar.epsilone3 = np.copy(ModVar.epsilone) ModVar.pRS = np.copy(ModVar.p) else: ModVar.eB3 = 10**ModVar.eB3log ModVar.epsilone3 = 10**ModVar.epsilone3Log selfAbs = BL_constants(ModVar.p) if UseOp.reverseShock: selfAbsRS = BL_constants(ModVar.pRS) ModVar.E0 = 10**ModVar.E0log * ( 1 - np.cos(ModVar.theta0) ) #E0log is the isotropic energy, here it is corrected for the opening angle ModVar.theta0 ModVar.M0 = ModVar.E0 * NatCon.c**-2 / ModVar.Gamma0 ModVar.M03 = 0. #2*pi*R[0]**3*(1-np.cos(ModVar.theta0)) * n * NatCon.mp /3 #M0/1000 twoPi = 2 * np.pi #ModVar.A0 = n * (NatCon.mp + NatCon.me) * 10**(profile_cutoff*s) mmean = (NatCon.me + NatCon.mp) / 2. #Constant #Get lightcurve from model. Exctract points corresponding to data. Compare (get chi**2) and return the log-likelihood #Find the last time that we want to calculate model for. tobsEnd = np.max(tdata) # + t0) if UseOp.runOption == 'fit': tobsRedUpper = tobsEnd else: tobsRedLower = .01 tobsRedUpper = 2.16e12 #250 days tobsEnd = np.copy(tobsRedUpper) if (UseOp.runOption == 'LC'): dynStartTime = time.time() ############################### ### Running dynamics module ### ############################### ### Tolerance in adaptive stepsize routine tol = 1e-3 sensible_value = False while not sensible_value: if True: #try: #if UseOp.reverseShock: Dyn = Dynamics(R[0], ModVar, UseOp, NatCon, tobsRedUpper * 2, tol) sensible_value = True else: #except NameError as in_error: if str(in_error) in [ 'stepsize', 'bisect', 'gamma_min > gamma_max' ]: print in_error return 1e20, None, None else: raise NameError(in_error) """ print tol print 'lowering tol' if tol < 1e-8: print '\n\n----------------------------\n\nOoops it crashed!\n\n' print ModVar.tprompt return float('-inf'),None,None tol /= 10 """ if (UseOp.runOption == 'LC'): print "Dynamics module time use: %f" % (time.time() - dynStartTime) cosTheta = np.cos(Dyn.theta) chi2 = 0. timeGrid = 100 #How tight the lightcurve grid should be when runOption=='LC' timeGridSigma = 400 if (UseOp.runOption == 'LC'): if UseOp.createMock: lightcurve = np.zeros(np.shape(tdata)) tobsGrid = [] elif plot_SED: lightcurve = np.zeros(np.shape(tdata)) tobsGrid = np.copy(tdata) else: lightcurve = np.zeros([iterationLength, timeGrid]) tobsGrid = np.zeros([iterationLength, timeGrid]) if UseOp.runOption == 'one-sigma': #Evaluating lightcurves to plot one-sigma lightcurve = np.zeros([iterationLength, timeGridSigma]) tobsGrid = np.zeros([iterationLength, timeGridSigma]) if (UseOp.runOption == 'LC'): dynTimeStart = time.time() ############################# #### Spectrum generation #### ############################# ## Setting spectrum generation constants and vectors distance_factor = (1 + ModVar.z) / (2 * D**2) #gamma_min = (p-2)/(p-1)*(1+NatCon.mp/NatCon.me*ModVar.epsilone*(Dyn.Gamma-1)) Dyn.gamma_min[np.where(Dyn.gamma_min < 1)] = 1. if UseOp.reverseShock: tobs_RS_cutoff = Dyn.tobs[Dyn.RS_elements_upper - 1] Rad = rad_var(Dyn, ModVar, UseOp, NatCon, RadCon, RadConRS) else: Rad = rad_var(Dyn, ModVar, UseOp, NatCon, RadCon) if UseOp.reverseShock: #gamma_min_RS_out = (ModVar.pRS-2)/(ModVar.pRS-1)*(1+NatCon.mp/NatCon.me*ModVar.epsilone3*Dyn.gamma43_minus_one) Dyn.gamma_min_RS[np.where(Dyn.gamma_min_RS < 1.)] = 1. #upperRimLim = Dyn.theta + ModVar.alpha #upperRimLim[np.where((Dyn.theta+ModVar.alpha)>np.pi/2)] = np.pi/2 #tobsRim = (1+z) * (Dyn.tburst - Dyn.R * np.cos(upperRimLim) / c) # Observing time at the rim for each radial point. Will use this in setting EATS grid. Note from 9/12 -13 if UseOp.runOption == 'LC' and not UseOp.createMock: Flux = flux_allocation(UseOp, iterationLength, Plot_Exceptions, timeGrid, freq, timeGrid) for nuIte in range( iterationLength): #Loop over all input frequencies or time steps freqArr = np.array([freq[nuIte]]) onePzFreq = (1 + ModVar.z) * freqArr tobsRed = tdata[nuIte, :numberOfEmpties[nuIte]] #+ t0 noChi2 = ((UseOp.runOption == 'LC') & (UseOp.createMock)) | ( UseOp.runOption == 'one-sigma' ) | plot_SED #This is true if we want to produce mock observations, and don't want to read in data if not noChi2: if (UseOp.runOption == 'LC') and ( not UseOp.createMock ): #Creating an equally spaced temporal grid to make smoother plots tdataLC = tobsRed tobsRed = np.logspace(np.log10(tobsRedLower), np.log10(tobsRedUpper), timeGrid) tobsGrid[nuIte] = tobsRed Fdata = FdataInput[nuIte, :numberOfEmpties[nuIte]] errorbar = errorbarInput[nuIte, :numberOfEmpties[nuIte]] elif UseOp.runOption == 'one-sigma': #If plotting the one-sigma range tobsRed = np.logspace(np.log10(tobsRedLower), np.log10(tobsRedUpper), timeGridSigma) tobsGrid[nuIte] = tobsRed #if useEATS: if True: #Equal Arrival Time Surface (EATS) integrator #Allocating space # if mockDim == 'T': EATSsteps = len(tobsRed) F = np.zeros(EATSsteps) if UseOp.reverseShock: PRSprim = np.zeros(EATSsteps) Rad.RS_in_EATS = True if UseOp.thermalComp: thermal_component = np.zeros(EATSsteps) for rimI in range( EATSsteps ): #If we want a frequency resolution, len(tobsRed) = 1 . Note that len(tobsRed) must be a vector still, but with one element. Test: Run a file with only one point! #Finding index for the nearest point behind seaked rim radius. #tobsRimNow = tobsRed[rimI] #Tells the program what observer's time we want to find emission for #indRim = np.argmin(np.abs(tobsRimNow-tobsRim)) #Find what index the point at the rim has with the observer's time we are looking for. #indRim -= (tobsRim[indRim] > tobsRimNow) * (indRim != 0) #Making sure found index is behind seaked point """ #Weights for the rim interpolation weightRim,weight1Rim,weight2Rim = np.log10(Dyn.tobs[indRim+1] / Dyn.tobs[indRim]) , np.log10(Dyn.tobs[indRim+1] / tobsRimNow) , np.log10(tobsRimNow / Dyn.tobs[indRim]) thetaRimPre = np.copy(Dyn.theta[indRim]) lowerLimit_low = Dyn.theta[indRim] - ModVar.alpha upperLimit_low = Dyn.theta[indRim] + ModVar.alpha if upperLimit_low > np.pi/2: upperLimit_low = np.pi/2 thetaRimPre2 = np.copy(Dyn.theta[indRim+1]) lowerLimit_high = Dyn.theta[indRim+1] - ModVar.alpha upperLimit_high = Dyn.theta[indRim+1] + ModVar.alpha if upperLimit_high > np.pi/2: upperLimit_high = np.pi/2 if weightRim < 1e-2 or indRim == 0: #thetaRim = np.copy(thetaRimPre) lowerLimit = np.copy(lowerLimit_low) upperLimit = np.copy(upperLimit_low) else: #thetaRim = (thetaRimPre * weight1Rim + thetaRimPre2 * weight2Rim) / weightRim lowerLimit = (lowerLimit_low * weight1Rim + lowerLimit_high * weight2Rim) / weightRim upperLimit = (upperLimit_low * weight1Rim + upperLimit_high * weight2Rim) / weightRim """ ### New approach. Instead of creating a grid that we interpolate the dynamical values on, we base the EATSurface on the dynamical points. ### Finding first index behind the centre of the EATS first_index = np.argmin( np.abs(Dyn.tobs - tobsRed[rimI]) ) ### Index to the point right behind the foremost point on the EATS if Dyn.tobs[first_index] > tobsRed[ rimI]: ### If this point is in fact ahead of the time point we are on, we step back one step in order to be behind it if first_index != 0: first_index -= 1 else: print 'Something wrong in EATS calculation!' ### Finding the index just behind the point at the very rim of the jet total_angle = Dyn.theta + ModVar.alpha total_angle[np.where(total_angle > np.pi / 2)] = np.pi / 2 last_index = np.argmin( np.abs( (1 + ModVar.z) * (Dyn.tburst - Dyn.R * np.cos(total_angle) / NatCon.c) - tobsRed[rimI])) if ((1 + ModVar.z) * (Dyn.tburst[last_index] - Dyn.R[last_index] * np.cos( total_angle[last_index]) / NatCon.c)) > tobsRed[rimI]: last_index -= 1 if last_index <= 0: raise NameError( "Data point is too early! Please decrease lower limit of radius R array in options.py and run again!" ) tobs_behind = (1 + ModVar.z) * ( Dyn.tburst[last_index] - Dyn.R[last_index] * np.cos(total_angle[last_index]) / NatCon.c) tobs_before = (1 + ModVar.z) * ( Dyn.tburst[last_index + 1] - Dyn.R[last_index + 1] * np.cos(total_angle[last_index + 1]) / NatCon.c) ### Now we want to create an array with obs times of all points on the EATS. Then we integrate using trapzoidal rule intermid_ind = np.arange( last_index + 1, first_index + 1 ) ### intermediate indeces, ranging from last_index+1 to first_index (all indeces inside the EATS) ### Angle Phi is defined from setting ### tobs = (1+z)*(tburst-R*cos(Phi)/c) ### This yields Phi = arccos(c/R*(tburst-tobs/(1+z))) Phi_factor = NatCon.c / Dyn.R[intermid_ind] * ( Dyn.tburst[intermid_ind] - tobsRed[rimI] / (1 + ModVar.z)) nonzero_Phi_factor = np.sum((Phi_factor > 1) | (Phi_factor < -1)) if nonzero_Phi_factor != 0: ### Jet has expanded to 90 degrees print 'EATS crossed pi/2!' intermid_ind = intermid_ind[np.where(Phi_factor < 1)] EATSrings = len( intermid_ind ) + 2 ### Number of points in Dyn class inside EATSurface ### Same for RS if UseOp.reverseShock: where_RS = np.where( Dyn.tobs[intermid_ind] <= tobs_RS_cutoff )[0] ### Finds what rings on the EATS has an RS counter part. intermid_ind_RS = intermid_ind[ where_RS] ### Finds what indeces has an RS counter part. ### Check if we hit RS cutoff try: if np.max(intermid_ind_RS) == (Dyn.RS_elements_upper - 1): intermid_ind_RS = intermid_ind_RS[:-1] where_RS = where_RS[:-1] EATSrings_RS = len(intermid_ind_RS) + 2 ### innermost and edge elements of RS first_index_RS = intermid_ind_RS[-1] last_index_RS = intermid_ind_RS[0] - 1 except: ### len(intermid_ind_RS) = 0 Rad.RS_in_EATS = False ### Weights for interpolating the front point and the edge point if UseOp.reverseShock and Rad.RS_in_EATS: InterWeights = weights(Dyn, UseOp, Rad, ModVar, NatCon, tobsRed[rimI], tobs_behind, tobs_before, first_index, last_index, onePzFreq, first_index_RS, last_index_RS) else: InterWeights = weights(Dyn, UseOp, Rad, ModVar, NatCon, tobsRed[rimI], tobs_behind, tobs_before, first_index, last_index, onePzFreq) ### Setting array containing angle from LoS to EATS rings Phi = np.zeros(EATSrings) #if len(Phi) == 2: ### If there are no Dyn points inside the EATS, we have to interpolate the edges Phi[1:-1] = np.arccos( NatCon.c / Dyn.R[intermid_ind] * (Dyn.tburst[intermid_ind] - tobsRed[rimI] / (1 + ModVar.z))) Phi[0] = InterWeights.Phi_edge #Phi[-1] = #NatCon.c/InterWeights.R_front*(InterWeights.tburst_front - tobsRed[rimI] / (1+ModVar.z)) Phi[-1] = 0. ### Per definition """ if np.sum(Phi < 0) != 0: print Phi if len(intermid_ind) == 0: print len(intermid_ind) print nuIte raw_input(Phi) """ phiInter = np.ones(EATSrings) * 2 * np.pi if ModVar.alpha != 0: #Off-axis partialRingsInd = np.where( (Phi[1:-1] > Dyn.theta[intermid_ind] - ModVar.alpha) & (Phi[1:-1] < Dyn.theta[intermid_ind] + ModVar.alpha) ) #Rings crossing the rim. Happens only when ModVar.alpha != 0 partialRingsInd_edge = ( Phi[0] > InterWeights.theta_edge - ModVar.alpha) & ( Phi[0] < InterWeights.theta_edge + ModVar.alpha) offAxisFoV = (Dyn.theta[intermid_ind[partialRingsInd]]**2 - ModVar.alpha**2 - Phi[1:-1][partialRingsInd] **2) / (2 * ModVar.alpha * Phi[1:-1][partialRingsInd]) if partialRingsInd_edge: offAxisFoV_edge = (InterWeights.theta_edge**2 - ModVar.alpha**2 - Phi[0]**2) / ( 2 * ModVar.alpha * Phi[0]) phiInter[ 0] = 2 * np.pi - 2 * np.arccos(offAxisFoV_edge) ### phiInter at the centre is always 2*pi if theta is larger than alpha, and vice verse if alpha is larger than theta (orphan burst) if InterWeights.theta_edge < ModVar.alpha: ### Orphan phiInter[-1] = 0. offAxisFoV[np.where(offAxisFoV < -1)] = -1. phiInter[partialRingsInd] = 2 * np.pi - 2 * np.arccos( offAxisFoV) if np.isnan(Phi[-1]): print len(intermid_ind) """ print '---' argmin = np.argmin(np.abs(tobsRed[rimI] - Dyn.tobs)) print tobsRed[rimI] print Dyn.tobs[argmin] print Dyn.tobs[argmin-1] print Dyn.tobs[argmin+1] print (1+ModVar.z)*(Dyn.tburst[argmin]-Dyn.R[argmin]*np.cos(Phi[0])/NatCon.c) print intermid_ind print NatCon.c/R_front*(tburst_front - tobsRed[rimI]/(1+ModVar.z)) print 'tobsEnd =',Dyn.tobs[-1]/86400 """ nuPrim = np.zeros(EATSrings) nuPrim[1:-1] = onePzFreq * Dyn.Gamma[intermid_ind] * ( 1 - Dyn.beta[intermid_ind] * np.cos(Phi[1:-1])) nuPrim[0] = InterWeights.nuPrim_edge nuPrim[-1] = InterWeights.nuPrim_front PprimTemp = np.zeros(EATSrings) PprimTemp[1:-1] = radiation_function(Dyn, Rad, UseOp, ModVar, nuPrim, Phi[1:-1], intermid_ind, Kappas, False, True) PprimTemp[0], PprimTemp[-1] = radiation_function( Dyn, Rad, UseOp, ModVar, nuPrim, Phi, intermid_ind, Kappas, False, False, InterWeights, last_index, first_index) ### Interpolating edge points """ if (tobsRed[rimI] > 86400*84) and (freqArr > 1e13): #if np.count_nonzero(PprimTemp) != EATSrings: print 'tobs =',tobsRed[rimI]/86400 print Phi print PprimTemp print freqArr file_name = 'plot_dir/%s.txt'%(tobsRed[rimI]/86400) #np.savetxt(file_name , [Phi , PprimTemp]) plt.plot(Phi,PprimTemp) plt.yscale('log') plt.show() if len(np.where(PprimTemp<=0)[0]) > 0: plt.plot(Phi,PprimTemp) plt.show() """ if UseOp.opticalDepth: tauFS = self_absorption(Dyn, ModVar, selfAbs, Rad, NatCon, InterWeights, nuPrim, intermid_ind, False) tau_factor = np.ones(EATSrings) high_tauFS = np.where(tauFS > 1e-2) medium_tauFS = np.where((tauFS <= 1e-2) & (tauFS > 1e-8)) tau_factor[high_tauFS] = ( 1 - np.exp(-tauFS[high_tauFS])) / tauFS[high_tauFS] tau_factor[medium_tauFS] = ( tauFS[medium_tauFS] - tauFS[medium_tauFS]**2 / 2 + tauFS[medium_tauFS]**4 / 4 - tauFS[medium_tauFS]**6 / 6) / tauFS[medium_tauFS] """ if (tobsRed[rimI] > 80*86400) and (freqArr < 1e10): print tauFS print high_tauFS print medium_tauFS print len(high_tauFS[0]) + len(medium_tauFS[0]) print EATSrings plt.plot(Phi , tau_factor) plt.yscale('log') plt.show() """ if np.count_nonzero(tau_factor) != EATSrings: raw_input('hold it!') ### any lower tau will give tau factor 1 PprimTemp *= tau_factor if UseOp.reverseShock and Rad.RS_in_EATS: PRSprimTemp = np.zeros(EATSrings_RS) PRSprimTemp[1:-1] = radiation_function( Dyn, Rad, UseOp, ModVar, nuPrim[where_RS], Phi[where_RS], intermid_ind_RS, Kappas_RS, True, True) PRSprimTemp[0], PRSprimTemp[-1] = radiation_function( Dyn, Rad, UseOp, ModVar, nuPrim[where_RS], Phi[where_RS], intermid_ind_RS, Kappas_RS, True, False, InterWeights, last_index_RS, first_index_RS) if UseOp.opticalDepth: tauRS_component = self_absorption( Dyn, ModVar, selfAbsRS, Rad, NatCon, InterWeights, nuPrim[where_RS], intermid_ind_RS, True) tauRS = tauFS[:where_RS[-1] + 3] + tauRS_component PRSprimTemp *= (1 - np.exp(-tauRS)) / tauRS #PRSprimTemp[where_RS] = radiation_function(Dyn , numRS , nucRS , nuPrim , beta , Phi , pRS , PRSmaxF , PRSmaxS) #PRSprimTemp[where_angleInd_RS] , rho3primBehind , rho3primForward , thicknessRS_behind , thicknessRS_forward = interpolation_function(Dyn.R , Dyn.Gamma , Dyn.rho4 , Dyn.M3 , numRS , nucRS , nuPrimBehind[where_angleInd_RS] , nuPrimForward[where_angleInd_RS] , Dyn.theta , beta , angleInd_RS , cosAng[where_angleInd_RS] , pRS , PmaxF_RS , PmaxS_RS , weight[where_angleInd_RS] , weight1[where_angleInd_RS] , weight2[where_angleInd_RS]) ### Now we """ #Angular grid xAngInt = np.linspace(0,upperLimit,surfaceRings+1) #The shell is divided into surfaceRing number of rings, with surfaceRings+1 number of borders. xAnd has the central angle of each ring segment, while xAngInt has the border angle. xAng = (xAngInt[:-1] + xAngInt[1:]) / 2 ### Use interpolating when calculating forward shock? F_interpolation = True if xAng[-1] > 0.5: cosAngm1 = np.cos(xAng) - 1 else: cosAngm1 = -xAng**2/2 + xAng**4/24 - xAng**6/720 ### Taylor expansion cosAng = np.cos(xAng) ### Looping along the Equal Arrival Time Surface # angleInd[0] = np.argmin(np.abs((1+z)*(tburst - Dyn.R*cosAngm1[0]/c) - tobsRed[rimI])) + 1 #Index of the time at the surface point in the observer's LoS #if tobs_to_EATS_diff while tobs[angleInd[0]] == tobs[angleInd[0]+1]: ### In case of duplicate entries angleInd[0] += 1 if angleInd[0] == 0: PprimTemp = 0. if UseOp.reverseShock: PRSprimTemp = 0. continue where_angleInd = np.ones(surfaceRings,dtype=bool) ### If this point is before t=0 ### Finds the elements to the EATS for indAng in range(0,surfaceRings): if indAng > 0: angleInd[indAng] = np.copy(angleInd[indAng-1]) if useEATS: EATS_time_shift = (Dyn.R[angleInd[indAng]] * cosAngm1[indAng])/c else: EATS_time_shift = Dyn.R[angleInd[indAng]] / c while tobsRed[rimI] < ((1+z) * (tburst[angleInd[indAng]] - EATS_time_shift)): if angleInd[indAng] == 0: ### If a too early point is sought where_angleInd[indAng] = False break angleInd[indAng] -= 1 #Finding the index corresponding to the shell behind the currently evaluated point on the EATS if tobsRed[rimI] > ((1+z) * (tburst[angleInd[indAng]+1] - (Dyn.R[angleInd[indAng]+1] * cosAngm1[indAng])/c)): print 'tobsFront behind!!!!' if angleInd[indAng] < 0: print 'angleInd < 0' raise SystemExit(0) intermid_ind = angleInd[where_angleInd] ### Finds what elements on the EATsurface has a counterpart in the RS if UseOp.reverseShock: where_angleInd_RS = np.where((intermid_ind >= RS_elements_lower) & ( intermid_ind < (RS_elements_upper-1))) angleInd_RS = np.copy(intermid_ind[where_angleInd_RS]) ### These elements should be used when calculating radiation from RS """ #angle_integ = (np.cos(xAngInt[:-1]) - np.cos(xAngInt[1:])) * phiInter #Angular integration segments if not Plot_Exceptions.RS_only and not Plot_Exceptions.FS_only: if UseOp.reverseShock and Rad.RS_in_EATS: PprimTot = np.copy(PprimTemp) PprimTot[:where_RS[-1] + 3] += PRSprimTemp else: PprimTot = PprimTemp elif Plot_Exceptions.FS_only or not UseOp.reverseShock: PprimTot = PprimTemp elif Plot_Exceptions.RS_only: PprimTot = np.zeros(EATSrings) if Rad.RS_in_EATS: PprimTot[:where_RS[-1] + 3] = PRSprimTemp """ if (tobsRed[rimI] > 84*86400 ) and (freqArr < 1e10): print tobsRed[rimI] / 86400 print freqArr print phiInter[0] plt.plot(Phi,phiInter) plt.show() """ F[rimI] = np.trapz(PprimTot * phiInter, np.cos(Phi)) * distance_factor ### Negative sign is because integration is reversed on the x-axis (angle axis) if UseOp.runOption == 'LC' and not UseOp.createMock: if not Plot_Exceptions.RS_only: Flux.FFS[nuIte, rimI] = np.trapz( PprimTemp * phiInter, np.cos(Phi)) * distance_factor if UseOp.reverseShock and not Plot_Exceptions.FS_only and Rad.RS_in_EATS: Flux.FRS[nuIte, rimI] = np.trapz( PRSprimTemp * phiInter[:where_RS[-1] + 3], np.cos(Phi[:where_RS[-1] + 3])) * distance_factor Flux.Ftotal[nuIte, rimI] = np.copy(F[rimI]) if UseOp.plotComponents and UseOp.runOption == 'LC': if not Plot_Exceptions.RS_only: plt.plot( tobsRed / PlotDetails.scalePlotTime[UseOp.daysOrSec], Flux.FFS[nuIte] * PlotDetails.scaleFluxAxis[UseOp.fluxAxis], '%s--' % PlotDetails.colourCycle[nuIte]) if UseOp.reverseShock: if not Plot_Exceptions.FS_only: plt.plot( tobsRed / PlotDetails.scalePlotTime[UseOp.daysOrSec], Flux.FRS[nuIte] * PlotDetails.scaleFluxAxis[UseOp.fluxAxis], '%s:' % PlotDetails.colourCycle[nuIte]) if UseOp.thermalComp: Fthermal = (1 + z) / (2 * D**2) * thermal_component if UseOp.plotComponents and (UseOp.runOption == 'LC'): colourCycle = [ 'b', 'g', 'r', 'c', 'm', 'y', 'k' ] #Cycle of colours in plotting. Matplotlib standard cycle scalePlotTime = {'d': 86400., 'h': 3600., 'm': 60., 's': 1.} scaleFluxAxis = {'mJy': 1.e3, 'Jy': 1.} if not UseOp.reverseShock and not thermal_only: plt.plot(tobsRed / scalePlotTime[UseOp.daysOrSec], F * scaleFluxAxis[UseOp.fluxAxis], '%s--' % colourCycle[nuIte % len(colourCycle)]) plt.plot(tobsRed / scalePlotTime[UseOp.daysOrSec], Fthermal * scaleFluxAxis[UseOp.fluxAxis], '%s:' % colourCycle[nuIte % len(colourCycle)]) if (not Plot_Exceptions.RS_only) and (not Plot_Exceptions.FS_only): if thermal_only: F = Fthermal else: F += Fthermal if noChi2 == False: ### noChi2 is true if data is not loaded if UseOp.runOption == 'LC': for fInd2 in range(numberOfEmpties[nuIte]): #testF[fInd2] = F[np.argmin(np.abs(tdataLC[fInd2]-tobsRed))] ### Interpolating produced points onto data points middleInd = np.argmin(np.abs(tdataLC[fInd2] - tobsRed)) behindInd = middleInd - ( tdataLC[fInd2] < tobsRed[middleInd] ) ### Index directly behind the model point Fweight1, Fweight2, Fweight = np.log10( tdataLC[fInd2]) - np.log10( tobsRed[behindInd]), np.log10( tobsRed[behindInd + 1]) - np.log10( tdataLC[fInd2]), np.log10( tobsRed[behindInd + 1]) - np.log10( tobsRed[behindInd]) Finter = (F[behindInd] * Fweight2 + F[behindInd + 1] * Fweight1) / Fweight if UseOp.chi2_type == 'lin': chi2 += ((Fdata[fInd2] - Finter) / errorbar[fInd2])**2 elif UseOp.chi2_type == 'log': chi2 += (np.log10( Fdata[fInd2] / Finter))**2 / np.log10( (Fdata[fInd2] + errorbar[fInd2]) / Fdata[fInd2])**2 else: print 'Bad chi2 type %s. Now exiting' % UseOp.chi2_type raise SystemExit(0) else: #if (np.sum(F < 0) == 0): # Avoiding negativ fluxes if UseOp.chi2_type == 'lin': F[np.where(F < 0)] = 0. chi2 += np.sum(((Fdata - F) / errorbar)**2) elif UseOp.chi2_type == 'log': F[np.where(F <= 0)] = 1e-30 chi2 += np.sum( np.log10(Fdata / F)**2 / np.log10( (Fdata + errorbar) / Fdata)**2) if np.count_nonzero(F) != len(F): raw_input('F has %d nonzeros but length %d' % (np.count_nonzero(F), len(F))) if np.isnan(chi2) or np.isinf(chi2): raw_input(F) else: print 'Bad chi2 type %s. Now exiting' % UseOp.chi2_type raise SystemExit(0) if chi2 <= 0: return float('inf'), None, None #else: # print 'Bad flux output. Returning chi2 = \'inf\'' # return float('inf'),float('nan'),float('nan') #If we get a negativ flux, return 'NaN' if (UseOp.runOption == 'LC') or (UseOp.runOption == 'one-sigma'): # print "Loop time = %f"%loopTimerTotal lightcurve[nuIte] = np.copy( F ) #np.concatenate([F , [-1]*(len(lightcurve[nuIte]) - len(F))]) if (UseOp.runOption == 'LC') or (UseOp.runOption == 'one-sigma'): print "Synchrotron time use: %f s" % (time.time() - dynTimeStart) ### Saving flux if not os.path.isdir('Flux'): os.mkdir('Flux/') print 'Created directory Flux' if not Plot_Exceptions.RS_only: if os.path.isfile('Flux/FFS.txt'): os.system('rm Flux/FFS.txt') np.savetxt('Flux/FFS.txt', Flux.FFS) if UseOp.reverseShock and not Plot_Exceptions.FS_only: if os.path.isfile('Flux/FRS.txt'): os.system('rm Flux/FRS.txt') np.savetxt('Flux/FRS.txt', Flux.FRS) if os.path.isfile('Flux/Ftotal.txt'): os.system('rm Flux/Ftotal.txt') np.savetxt('Flux/Ftotal.txt', Flux.Ftotal) np.savetxt('Flux/tobs.txt', tobsRed) if UseOp.allowPrint & (noChi2 == False): print "chi2 = %s\nReduced chi2 = %s" % (chi2, chi2 / (numberOfPoints - ndims)) #print lightcurve #outputCounter = 0 #else: outputCounter += 1 fovAngle = 1 / Dyn.Gamma ### Field of view angle. Not used in EATS integration startJB_index = np.argmin(np.abs(Dyn.theta - ModVar.alpha - fovAngle)) if startJB_index >= len(Dyn.tobs) - 1: startJetBreak = -Dyn.tobs[ startJB_index] ### If jetbreak starts at really late times else: startJB_index -= ( (Dyn.theta[startJB_index] - ModVar.alpha - fovAngle[startJB_index]) < 0 ) ### Making sure the field of view is still just a little bit smaller than the rim startJB_weight1 = Dyn.theta[ startJB_index] - ModVar.alpha - fovAngle[startJB_index] startJB_weight2 = fovAngle[startJB_index + 1] - Dyn.theta[startJB_index + 1] + ModVar.alpha print startJB_weight1 print startJB_weight2 endJB_index = np.argmin(np.abs(Dyn.theta + ModVar.alpha - fovAngle)) if endJB_index >= len(Dyn.tobs) - 1: endJetBreak = -Dyn.tobs[ endJB_index] ### if jetbreak starts at really late times else: endJB_index -= ( (Dyn.theta[endJB_index] + ModVar.alpha) < fovAngle[endJB_index] ) ### Making sure the field of view is still just a little bit smaller than the last crossing of the rim endJB_weight1 = Dyn.theta[endJB_index] + ModVar.alpha - fovAngle[ endJB_index] endJB_weight2 = fovAngle[endJB_index + 1] - Dyn.theta[endJB_index + 1] - ModVar.alpha if startJB_index < len(Dyn.tobs) - 1: startJetBreak = (Dyn.tobs[startJB_index] * startJB_weight2 + Dyn.tobs[startJB_index + 1] * startJB_weight1) / ( startJB_weight1 + startJB_weight2) if endJB_index < len(Dyn.tobs) - 1: endJetBreak = (Dyn.tobs[endJB_index] * endJB_weight2 + Dyn.tobs[endJB_index + 1] * endJB_weight1) / ( endJB_weight1 + endJB_weight2) startJB_text = '%s %f' % ('=' * (startJetBreak > 0) + '>' * (startJetBreak < 0), startJetBreak * (((startJetBreak > 0) * 2) - 1) / 86400) endJB_text = '%s %f' % ('=' * (endJetBreak > 0) + '>' * (endJetBreak < 0), endJetBreak * (((endJetBreak > 0) * 2) - 1) / 86400) print "Field of view started crossing the rim at tobs %s days and covered the entire rim at tobs %s days." % ( startJB_text, endJB_text) return lightcurve, startJetBreak, endJetBreak, tobsGrid, Flux elif UseOp.runOption == 'one-sigma': return lightcurve, 0., 0., tobsGrid, Flux else: return chi2, None, None, None
import numpy as np import params from dynamics import Dynamics from quadrotor_viewer import QuadRotor_Viewer from scipy.spatial.transform import Rotation from mpc import MPC, NMPC, LNMPC from data_viewer import data_viewer import time if __name__ == "__main__": dynamics = Dynamics(params.dt) # viewer = QuadRotor_Viewer() data_view = data_viewer() A, B = dynamics.get_SS(dynamics.state) #LNMPC doesn't seem to work any better # controller = MPC(A, B, params.u_max, params.u_min, T=params.T) #Typicall MPC controller = LNMPC( A, B, params.u_max, params.u_min, T=params.T ) #Non-linear model predictive control relinearizing about the current state # controller = NMPC(params.nu_max, params.nu_min, T = params.T) #Way to slow t0 = params.t0 F_eq = params.mass * 9.81 T_eq = 0.0 u_eq = np.array([F_eq, T_eq, T_eq, T_eq]) xr = np.array([ 5.0, -5.0, -5.0, 0.0, 0.0, 0.0, 0.0, 0.0, np.deg2rad(0), 0.0, 0.0, 0.0 ])
# The Animation.py file is kept in the parent directory, # so the parent directory path needs to be added. sys.path.append('..') from dynamics import Dynamics from animation import Animation t_start = 0.0 # Start time of simulation t_end = 50.0 # End time of simulation t_Ts = P.Ts # Simulation time step t_elapse = 0.1 # Simulation time elapsed between each iteration t_pause = 0.01 # Pause between each iteration user_input = Sliders() # Instantiate Sliders class simAnimation = Animation() # Instantiate Animate class dynam = Dynamics() # Instantiate Dynamics class t = t_start # Declare time variable to keep track of simulation time elapsed while t < t_end: plt.ion() # Make plots interactive plt.figure( user_input.fig.number) # Switch current figure to user_input figure plt.pause(0.001) # Pause the simulation to detect user input # The dynamics of the model will be propagated in time by t_elapse # at intervals of t_Ts. t_temp = t + t_elapse while t < t_temp: dynam.propagateDynamics( # Propagate the dynamics of the model in time
dynamics_lr = 0.01 tf.reset_default_graph() with tf.Session() as sess: encoder = Encoder(sess=sess, input_dim=state_dim, output_dim=feature_dim, gamma=0.98, hidden_sizes=encoder_hidden_sizes, learning_rate=encoder_lr) dynamics = Dynamics(sess=sess, state_dim=feature_dim, action_dim=action_dim, hidden_sizes=dynamics_hidden_sizes, learning_rate=dynamics_lr) sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter("./output", sess.graph) ############################ Test Encoder ###################################### state_1 = np.random.rand(state_dim) state_2 = np.random.rand(state_dim) reward_1 = np.random.rand(1) reward_2 = np.random.rand(1) wasserstein = np.random.rand(1) encoded_1 = encoder.predict(state=state_1)