def __init__(self, sim = None, memory = None): '''init memory space and sim space''' # sim space self.sim = sim if sim else Sim() # memory space with vars and funs if not memory: self.memory = {**sys_funs} else: self.memory = memory
def simulate(starting_temp, ambient_temp, controller, renderer, t=.036): sim = Sim(starting_temp, ambient_temp) for i in range(NUM_TICS): renderer.render(sim, controller) controller.before_tick(sim, tictime=t) sim.tick(tictime=t) controller.after_tick(sim, tictime=t) sim.cleanup()
class Trainer(object): def __init__(self): """ Initializing DDPG """ self.sim = Sim() self.ddpg = DDPG(a_dim=A_DIM, s_dim=S_DIM, batch_size=64, memory_capacity=100000, gamma=GAMMA, lr_a=0.001) #gamma=0.98 self.ep_reward = 0.0 self.current_ep = 0 self.current_step = 0 self.current_action = np.array([.0, .0, .0]) self.done = True # if the episode is done self.var = VAR_INIT self.reward_record = list() self.ep_record = list() self.fig = plt.gcf() self.fig.show() self.fig.canvas.draw() print("Initialized DDPG") """ Setting communication""" self.pc = PC() self.pc.header.frame_id = 'world' self.pub = rospy.Publisher('normalized_state', PC, queue_size=10) self.pub1 = rospy.Publisher('state', PC, queue_size=10) """ self.sub = rospy.Subscriber('robot_pose', PA, self.callback, queue_size=1) self.pub = rospy.Publisher('normalized_state', PC, queue_size=10) rospy.wait_for_service('airpress_control', timeout=5) self.target_PS = PS() self.action_V3 = Vector3() self.state = PA() self.updated = False # if s is updated self.got_callback1 = False self.got_callback2 = False print("Initialized communication") """ """ Reading targets """ """ The data should be w.r.t origin by base position """ self.ends = pickle.load(open('./data/targets.p', 'rb')) self.x_offset = X_OFFSET self.y_offset = Y_OFFSET self.z_offset = Z_OFFSET self.scaler = 1/0.3 self.sample_target() print("Read target data") #self.ddpg.restore_momery() #self.ddpg.restore_model() while not (rospy.is_shutdown()): if self.current_ep < MAX_EPISODES: if self.current_step < MAX_EP_STEPS: #rospy.sleep(0.5) p = self.sim.current_pose s = np.vstack((p, self.target)) s = s[3:,:] s = self.normalize_state(s) norm_a = self.ddpg.choose_action(s.reshape(6)[-S_DIM:]) noise_a = np.random.normal(norm_a, self.var) action = np.clip(noise_a, -1.0, 1.0) self.current_action = action*A_BOUND + A_BOUND try: p_ = self.sim.update_pose(self.current_action) except: print self.ddpg.choose_action(s.reshape(6)[-S_DIM:]) print s.reshape(6)[-S_DIM:] s_ = np.vstack((p_, self.target)) s_ = s_[3:,:] s_ = self.normalize_state(s_) self.compute_reward(s[0,:], s_[1,:]) self.current_step += 1 if self.reward > GOT_GOAL: #self.reward += 1.0 self.ep_reward += self.reward if self.current_ep% 10 ==0: self.reward_record.append(self.ep_reward / self.current_step) self.ep_record.append(self.current_ep) plt.plot(self.ep_record, self.reward_record) plt.ylim([-1.5,0.0]) self.fig.canvas.draw() self.fig.savefig('learning.png') print('\n') print "Target Reached" print("Normalized action:") print norm_a print("Noise action:") print action print("Output action:") print self.current_action print("Reward: %f" % self.reward) print('Episode:', self.current_ep, ' Reward: %f' % self.ep_reward, 'Explore: %.3f' % self.var,) print('*' * 40) self.ddpg.save_model() self.ddpg.save_memory() self.done = True self.current_step = 0 self.current_ep += 1 self.sample_target() self.ep_reward = 0 #self.ddpg.save_memory() #self.ddpg.save_model() """ self.current_action = np.array([.0, .0, .0]) self.action_V3.x, self.action_V3.y, self.action_V3.z \ = self.current_action[0], self.current_action[1], self.current_action[2] self.run_action(self.action_V3) """ else: self.ep_reward += self.reward if self.current_step == MAX_EP_STEPS: if self.current_ep % 10 ==0: self.reward_record.append(self.ep_reward / self.current_step) self.ep_record.append(self.current_ep) plt.plot(self.ep_record, self.reward_record) plt.ylim([-1.5, 0.0]) self.fig.canvas.draw() self.fig.savefig('learning.png') print('\n') print "Target Failed" print("Normalized action:") print norm_a print("Noise action:") print action print("Output action:") print self.current_action print("Reward: %f" % self.reward) print('Episode:', self.current_ep, ' Reward: %f' % self.ep_reward, 'Explore: %.3f' % self.var,) print('*' * 40) self.ddpg.save_model() self.ddpg.save_memory() self.done = False self.current_step = 0 self.current_ep += 1 self.sample_target() self.ep_reward = 0 #self.ddpg.save_memory() #self.ddpg.save_model() """ self.current_action = np.array([.0, .0, .0]) self.action_V3.x, self.action_V3.y, self.action_V3.z \ = self.current_action[0], self.current_action[1], self.current_action[2] self.run_action(self.action_V3) """ self.ddpg.store_transition(s.reshape(6)[-S_DIM:], action, self.reward, s_.reshape(6)[-S_DIM:]) if self.ddpg.pointer > TRAIN_POINT: #if (self.current_step % 10 == 0): #self.var *= VAR_DECAY self.var = max(0.0,self.var-1.02/(MAX_EP_STEPS*MAX_EPISODES)) self.ddpg.learn() self.pub_state(s_) else: p = self.sim.current_pose s = np.vstack((p, self.target)) s = s[3:,:] s = self.normalize_state(s) norm_a = self.ddpg.choose_action(s.reshape(6)[-S_DIM:]) self.current_action = norm_a * A_BOUND + A_BOUND print("Normalized action:") print norm_a print("Current action:") print self.current_action p_ = self.sim.update_pose(self.current_action) s_ = np.vstack((p_, self.target)) s_ = s_[3:,:] print("Distance: %f" % np.linalg.norm(s_[0,:]-s_[1,:])) s_ = self.normalize_state(s_) self.compute_reward(s_[0, :], s_[1, :]) print('Explore: %.2f' % self.var,) print("Reward: %f" % self.reward) rospy.sleep(1) #print print self.ddpg.get_value(s.reshape(6)[-S_DIM:],norm_a,self.reward.reshape((-1,1)),s_.reshape(6)[-S_DIM:]) print '\n' self.pub_state(s_) if self.reward > GOT_GOAL: self.done = True self.current_step = 0 self.current_ep += 1 self.sample_target() print "Target Reached" print("Episode %i Ended" % self.current_ep) print("Reward: %f" % self.reward) print('Episode:', self.current_ep, ' Reward: %d' % self.ep_reward, 'Explore: %.2f' % self.var,) print('*' * 40) self.ep_reward = 0 # self.ddpg.save_memory() # self.ddpg.save_model() """ self.current_action = np.array([.0, .0, .0]) self.action_V3.x, self.action_V3.y, self.action_V3.z \ = self.current_action[0], self.current_action[1], self.current_action[2] self.run_action(self.action_V3) """ else: self.done = False self.current_step += 1 if self.current_step == 2: print "Target Failed" print("Reward: %f" % self.reward) print("Episode %i Ends" % self.current_ep) print( 'Episode:', self.current_ep, ' Reward: %d' % self.ep_reward, 'Explore: %.2f' % self.var,) print('*' * 40) self.current_step = 0 self.current_ep += 1 self.sample_target() self.ep_reward = 0 # self.ddpg.save_memory() # self.ddpg.save_model() """ self.current_action = np.array([.0, .0, .0]) self.action_V3.x, self.action_V3.y, self.action_V3.z \ = self.current_action[0], self.current_action[1], self.current_action[2] self.run_action(self.action_V3) """ """ def callback(self, pa): n_px = (np.array([pa.poses[i].position.x for i in range(4)]) - self.x_offset)*self.scaler n_py = (np.array([pa.poses[i].position.y for i in range(4)]) - self.y_offset)*self.scaler n_pz = (np.array([pa.poses[i].position.z for i in range(4)]) - self.z_offset)*self.scaler self.end = np.array((n_px[3], n_py[3], n_pz[3])) self.n_t = (self.target - np.array([0,0,Z_OFFSET]))*self.scaler self.s = np.concatenate((n_px, n_py, n_pz, self.n_t)) self.pub_state(n_px, n_py, n_pz, self.n_t) self.updated = True """ def sample_target(self): self.target = self.ends[np.random.randint(self.ends.shape[0])] """ def run_action(self,control): try: client = rospy.ServiceProxy('airpress_control', OneSeg) resp = client(control) return resp.status except rospy.ServiceException, e: print "Service call failed: %s"%e """ def compute_reward(self,end,target): error = target - end self.reward = -np.linalg.norm(error) #print np.linalg.norm(error) """ def pub_state(self, n_px, n_py, n_pz, n_t): pts = list() for i in range(4): pt = Point() pt.x = n_px[i] pt.y = n_py[i] pt.z = n_pz[i] pts.append(pt) pt = Point() pt.x, pt.y, pt.z = n_t[0], n_t[1], n_t[2] pts.append(pt) self.pc.points = pts self.pub.publish(self.pc) """ def pub_state(self, state): pts = list() for i in range(state.shape[0]): pt = Point() pt.x = state[i,0] pt.y = state[i,1] pt.z = state[i,2] pts.append(pt) self.pc.points = pts self.pub.publish(self.pc) pts = list() for i in range(state.shape[0]): pt = Point() pt.x = state[i, 0] / 10.0 pt.y = state[i, 1] / 10.0 pt.z = state[i, 2] / 35.0 + 0.42 pts.append(pt) self.pc.points = pts self.pub1.publish(self.pc) def normalize_state(self,state): offset = np.array([0,0,0.42]) scaler = np.array([10,10,35]) s = state - offset s = np.multiply(s, scaler) return s def calculate_dist(self, state): offset = np.array([0, 0, 0.42]) scaler = np.array([10, 10, 35]) s = np.multiply(state,1.0/scaler) s += offset return np.linalg.norm(s)
def __init__(self): """ Initializing DDPG """ self.sim = Sim() self.ddpg = DDPG(a_dim=A_DIM, s_dim=S_DIM, batch_size=64, memory_capacity=100000, gamma=GAMMA, lr_a=0.001) #gamma=0.98 self.ep_reward = 0.0 self.current_ep = 0 self.current_step = 0 self.current_action = np.array([.0, .0, .0]) self.done = True # if the episode is done self.var = VAR_INIT self.reward_record = list() self.ep_record = list() self.fig = plt.gcf() self.fig.show() self.fig.canvas.draw() print("Initialized DDPG") """ Setting communication""" self.pc = PC() self.pc.header.frame_id = 'world' self.pub = rospy.Publisher('normalized_state', PC, queue_size=10) self.pub1 = rospy.Publisher('state', PC, queue_size=10) """ self.sub = rospy.Subscriber('robot_pose', PA, self.callback, queue_size=1) self.pub = rospy.Publisher('normalized_state', PC, queue_size=10) rospy.wait_for_service('airpress_control', timeout=5) self.target_PS = PS() self.action_V3 = Vector3() self.state = PA() self.updated = False # if s is updated self.got_callback1 = False self.got_callback2 = False print("Initialized communication") """ """ Reading targets """ """ The data should be w.r.t origin by base position """ self.ends = pickle.load(open('./data/targets.p', 'rb')) self.x_offset = X_OFFSET self.y_offset = Y_OFFSET self.z_offset = Z_OFFSET self.scaler = 1/0.3 self.sample_target() print("Read target data") #self.ddpg.restore_momery() #self.ddpg.restore_model() while not (rospy.is_shutdown()): if self.current_ep < MAX_EPISODES: if self.current_step < MAX_EP_STEPS: #rospy.sleep(0.5) p = self.sim.current_pose s = np.vstack((p, self.target)) s = s[3:,:] s = self.normalize_state(s) norm_a = self.ddpg.choose_action(s.reshape(6)[-S_DIM:]) noise_a = np.random.normal(norm_a, self.var) action = np.clip(noise_a, -1.0, 1.0) self.current_action = action*A_BOUND + A_BOUND try: p_ = self.sim.update_pose(self.current_action) except: print self.ddpg.choose_action(s.reshape(6)[-S_DIM:]) print s.reshape(6)[-S_DIM:] s_ = np.vstack((p_, self.target)) s_ = s_[3:,:] s_ = self.normalize_state(s_) self.compute_reward(s[0,:], s_[1,:]) self.current_step += 1 if self.reward > GOT_GOAL: #self.reward += 1.0 self.ep_reward += self.reward if self.current_ep% 10 ==0: self.reward_record.append(self.ep_reward / self.current_step) self.ep_record.append(self.current_ep) plt.plot(self.ep_record, self.reward_record) plt.ylim([-1.5,0.0]) self.fig.canvas.draw() self.fig.savefig('learning.png') print('\n') print "Target Reached" print("Normalized action:") print norm_a print("Noise action:") print action print("Output action:") print self.current_action print("Reward: %f" % self.reward) print('Episode:', self.current_ep, ' Reward: %f' % self.ep_reward, 'Explore: %.3f' % self.var,) print('*' * 40) self.ddpg.save_model() self.ddpg.save_memory() self.done = True self.current_step = 0 self.current_ep += 1 self.sample_target() self.ep_reward = 0 #self.ddpg.save_memory() #self.ddpg.save_model() """ self.current_action = np.array([.0, .0, .0]) self.action_V3.x, self.action_V3.y, self.action_V3.z \ = self.current_action[0], self.current_action[1], self.current_action[2] self.run_action(self.action_V3) """ else: self.ep_reward += self.reward if self.current_step == MAX_EP_STEPS: if self.current_ep % 10 ==0: self.reward_record.append(self.ep_reward / self.current_step) self.ep_record.append(self.current_ep) plt.plot(self.ep_record, self.reward_record) plt.ylim([-1.5, 0.0]) self.fig.canvas.draw() self.fig.savefig('learning.png') print('\n') print "Target Failed" print("Normalized action:") print norm_a print("Noise action:") print action print("Output action:") print self.current_action print("Reward: %f" % self.reward) print('Episode:', self.current_ep, ' Reward: %f' % self.ep_reward, 'Explore: %.3f' % self.var,) print('*' * 40) self.ddpg.save_model() self.ddpg.save_memory() self.done = False self.current_step = 0 self.current_ep += 1 self.sample_target() self.ep_reward = 0 #self.ddpg.save_memory() #self.ddpg.save_model() """ self.current_action = np.array([.0, .0, .0]) self.action_V3.x, self.action_V3.y, self.action_V3.z \ = self.current_action[0], self.current_action[1], self.current_action[2] self.run_action(self.action_V3) """ self.ddpg.store_transition(s.reshape(6)[-S_DIM:], action, self.reward, s_.reshape(6)[-S_DIM:]) if self.ddpg.pointer > TRAIN_POINT: #if (self.current_step % 10 == 0): #self.var *= VAR_DECAY self.var = max(0.0,self.var-1.02/(MAX_EP_STEPS*MAX_EPISODES)) self.ddpg.learn() self.pub_state(s_) else: p = self.sim.current_pose s = np.vstack((p, self.target)) s = s[3:,:] s = self.normalize_state(s) norm_a = self.ddpg.choose_action(s.reshape(6)[-S_DIM:]) self.current_action = norm_a * A_BOUND + A_BOUND print("Normalized action:") print norm_a print("Current action:") print self.current_action p_ = self.sim.update_pose(self.current_action) s_ = np.vstack((p_, self.target)) s_ = s_[3:,:] print("Distance: %f" % np.linalg.norm(s_[0,:]-s_[1,:])) s_ = self.normalize_state(s_) self.compute_reward(s_[0, :], s_[1, :]) print('Explore: %.2f' % self.var,) print("Reward: %f" % self.reward) rospy.sleep(1) #print print self.ddpg.get_value(s.reshape(6)[-S_DIM:],norm_a,self.reward.reshape((-1,1)),s_.reshape(6)[-S_DIM:]) print '\n' self.pub_state(s_) if self.reward > GOT_GOAL: self.done = True self.current_step = 0 self.current_ep += 1 self.sample_target() print "Target Reached" print("Episode %i Ended" % self.current_ep) print("Reward: %f" % self.reward) print('Episode:', self.current_ep, ' Reward: %d' % self.ep_reward, 'Explore: %.2f' % self.var,) print('*' * 40) self.ep_reward = 0 # self.ddpg.save_memory() # self.ddpg.save_model() """ self.current_action = np.array([.0, .0, .0]) self.action_V3.x, self.action_V3.y, self.action_V3.z \ = self.current_action[0], self.current_action[1], self.current_action[2] self.run_action(self.action_V3) """ else: self.done = False self.current_step += 1 if self.current_step == 2: print "Target Failed" print("Reward: %f" % self.reward) print("Episode %i Ends" % self.current_ep) print( 'Episode:', self.current_ep, ' Reward: %d' % self.ep_reward, 'Explore: %.2f' % self.var,) print('*' * 40) self.current_step = 0 self.current_ep += 1 self.sample_target() self.ep_reward = 0 # self.ddpg.save_memory() # self.ddpg.save_model() """
def train(): print('dqn_setting') sess = tf.Session() game = Sim(SCREEN_WIDTH, SCREEN_HEIGHT, show_game=False) brain = DQN(sess, SCREEN_WIDTH, SCREEN_HEIGHT, NUM_ACTION) rewards = tf.placeholder(tf.float32, [None]) tf.summary.scalar('avg.reward/ep.', tf.reduce_mean(rewards)) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter('logs', sess.graph) summary_merged = tf.summary.merge_all() # initialize target network brain.update_target_network() # decide action decision time using dqn after this time epsilon = 1.0 # number of frames time_step = 0 total_reward_list = [] # Starting for episode in range(MAX_EPISODE): terminal = False total_reward = 0 # gmae reset and get state (=poisition data) state = game.reset() brain.init_state(state) # set state of dqn while not terminal: # choice random action when epsilon < random var, else ( :epilon > random var), choice with dqn # Later time, unless use random action if np.random.rand() < epsilon: action = random.randrange(NUM_ACTION) else: action = brain.get_action() # epsion decrease if episode > OBSERVE: epsilon -= 1 / 1000 # game updates, get data (state / reward / is_gameover) state, reward, terminal = game.step(action) total_reward += reward # brain save this state brain.remember(state, action, reward, terminal) # After little time, In interval, do training if time_step > OBSERVE and time_step % TRAIN_INTERVAL == 0: brain.train() # In interval, do training if time_step % TARGET_UPDATE_INTERVAL == 0: brain.update_target_network() time_step += 1 print('Number of game: %d Score: %d' % (episode + 1, total_reward)) total_reward_list.append(total_reward) if episode % 10 == 0: summary = sess.run(summary_merged, feed_dict={rewards: total_reward_list}) writer.add_summary(summary, time_step) total_reward_list = [] if episode % 100 == 0: saver.save(sess, 'model/dqn.ckpt', global_step=time_step)
def main(): parser = argparse.ArgumentParser() parser.add_argument("n", type=int, help="Samples per task") parser.add_argument("db_name", help="Name of the database to use") parser.add_argument("bot_name", help="Name of the bot folder") parser.add_argument("features", help="Path to json file with bot features") parser.add_argument("-db_ip", default="localhost", help="Database address") parser.add_argument("-db_pwd", default="root", help="User's password") parser.add_argument("-db_user", default="root", help="Database user") parser.add_argument("-db_port", default="3306", help="Database port") parser.add_argument("-log_file", default="/var/www/vui/state.json", help="Log file to save the state") parser.add_argument('-e', '--epochs', default=180, type=int, help="Epochs for training") parser.add_argument('-m', '--max_history', default=5, type=int, help="History size feature") parser.add_argument('-b', '--batch_size', default=32, type=int, help="Batch size for training") parser.add_argument('-v', '--validation_split', default=0.3, type=float, help="Validation splits for training") args = parser.parse_args() database = "mysql://{}:{}@{}:{}/{}".format(args.db_user, args.db_pwd, args.db_ip, args.db_port, args.db_name) db = Database.get_instance(database=database) #logger = Logger.get_instance(args.log_file) frame_factory = FrameFactory() goal_factory = GoalFactory() with open(args.features, "r") as f: tasks = json.load(f)['tasks'] frames, goals = [], [] for dial in range(int(args.n)): for task in tasks: frames.append(frame_factory.build_frames(task)) goals.append(goal_factory.build_goals(task)) #trainer = Seq2ActionAgentTrainer(sum(frames, []), database, RuleBasedEngine()) trainer = FullAgentTrainer(sum(frames, []), database, RuleBasedEngine()) #trainer = DialogManagerTrainer(sum(frames, []), database) #dumper = #dumper(args.bot_name, trainer, domain_file_path="/var/www/vui/", # virtual_env="/var/www/vui/python_virtualenv/", # base_path="/var/www/vui/python/BOTs/", # python_path="/var/www/vui/python/") im = InteractionManager(trainer) dm, sim = DM(im), Sim(im) # generate validation dialogs n_valid_samples = round(len(frames) * args.validation_split) generate_dialogs(n_valid_samples, im, dm, sim, trainer, frames[:n_valid_samples], goals[:n_valid_samples], story_file_name="valid") # generate training dialogs n_train_samples = len(frames) - n_valid_samples generate_dialogs(n_train_samples, im, dm, sim, trainer, frames[n_valid_samples:], goals[n_valid_samples:], story_file_name="story") print("[...] building domain.yml...") trainer.build_domain_yml() if isinstance(trainer, FullAgentTrainer): print("[...] building nlu training file...") trainer.build_phrases_file() print("[...] building dialogs.json file...") trainer.build_file_for_hit() print("[...] cleaning transcript file...") trainer.build_raw_transcript() if isinstance(trainer, Seq2ActionAgentTrainer): print("[...] building user lexicon file...") trainer.build_user_lexicon() print("[...] computing sentence embeddings...") trainer.compute_sentence_embeddings()
#!/usr/bin/env python """ generate data by simulation Zhiang Chen, Oct 30, 2017 MIT License """ import pickle import numpy as np import rospy from simulator import Sim rospy.init_node('simulator', anonymous=True) sim = Sim() actions = list() poses = list() for x in range(21): for y in range(21): for z in range(21): a = np.array([x,y,z]).astype(np.float32) p = sim.update_pose(a) actions.append(a) poses.append(p) sim_data = {'actions':actions,'poses':poses} pickle.dump( sim_data, open( "./data/sim_data.p", "wb" ) )
def __init__(self): """ Initializing DDPG """ self.sim = Sim() self.pfn = PFN(a_dim=A_DIM, s_dim=S_DIM, batch_size=8, memory_capacity=MEMORY_NUM, lr=0.001, bound=A_BOUND) self.ep_reward = 0.0 self.current_action = np.array([.0, .0, .0]) self.done = True # if the episode is done self.reward_record = list() self.ep_record = list() self.fig = plt.gcf() self.fig.show() self.fig.canvas.draw() print("Initialized PFN") """ Setting communication""" self.pc = PC() self.pc.header.frame_id = 'world' self.pub = rospy.Publisher('normalized_state', PC, queue_size=10) self.pub1 = rospy.Publisher('state', PC, queue_size=10) self.sub = rospy.Subscriber('Robot_1/pose', PS, self.callback, queue_size=1) rospy.wait_for_service('airpress_control', timeout=5) self.target_PS = PS() self.action_V3 = Vector3() self.updated = False # if s is updated self.got_target = False print("Initialized communication") """ Reading targets """ """ The data should be w.r.t origin by base position """ self.ends = pickle.load(open('./data/targets.p', 'rb')) self.x_offset = X_OFFSET self.y_offset = Y_OFFSET self.z_offset = Z_OFFSET self.sample_target() print("Read target data") #self.pfn.restore_momery() self.pfn.restore_model('model_pfn') memory_ep = np.ones((MAX_EP_STEPS, 3 + 3 + 1 + 1)) * -100 self.current_ep = 0 self.current_step = 0 while not (rospy.is_shutdown()): self.updated = False while (not self.updated) & (not rospy.is_shutdown()): rospy.sleep(0.1) real_target = self.real_target.copy() s = self.normalize_state(real_target) action, act_var = self.pfn.choose_action2(s) self.action_V3.x, self.action_V3.y, self.action_V3.z \ = action[0], action[1], action[2] self.run_action(self.action_V3) print '\n' #rospy.sleep(1.0) '''
def __init__(self, *args, **kwds): # begin wxGlade: MyFrame.__init__ kwds["style"] = kwds.get("style", 0) | wx.DEFAULT_FRAME_STYLE wx.Frame.__init__(self, *args, **kwds) self.SetSize((400, 300)) # Menu Bar self.frame_menubar = wx.MenuBar() wxglade_tmp_menu = wx.Menu() wxglade_tmp_menu.Append(wx.ID_SAVE, "Screenshot", "") self.Bind(wx.EVT_MENU, self.OnSave, id=wx.ID_SAVE) wxglade_tmp_menu.Append(wx.ID_EXIT, "Quit", "") self.Bind(wx.EVT_MENU, self.OnQuit, id=wx.ID_EXIT) self.frame_menubar.Append(wxglade_tmp_menu, "File") self.SetMenuBar(self.frame_menubar) # Menu Bar end self.sim_image = ImagePanel(self, wx.ID_ANY) self.sim = Sim() self.single_click = wx.Button(self, wx.ID_ANY, "Single") self.double_click = wx.Button(self, wx.ID_ANY, "Double") self.single_click_copy_1 = wx.Button(self, wx.ID_ANY, "Long") self.down_flip = wx.Button(self, wx.ID_ANY, "Down") self.up_flip = wx.Button(self, wx.ID_ANY, "Up") self.left_flip = wx.Button(self, wx.ID_ANY, "Left") self.right_flip = wx.Button(self, wx.ID_ANY, "Right") self.azimuth = wx.Slider(self, wx.ID_ANY, 0, 0, 359, style=wx.SL_HORIZONTAL | wx.SL_LABELS) self.inclination = wx.Slider(self, wx.ID_ANY, 0, -90, 90, style=wx.SL_HORIZONTAL | wx.SL_LABELS) self.roll = wx.Slider(self, wx.ID_ANY, 0, -180, 180, style=wx.SL_HORIZONTAL | wx.SL_LABELS) self.__set_properties() self.__do_layout() self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.SINGLE_CLICK), self.single_click) self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.DOUBLE_CLICK), self.double_click) self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.LONG_CLICK), self.single_click_copy_1) self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.FLIP_DOWN), self.down_flip) self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.FLIP_UP), self.up_flip) self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.FLIP_LEFT), self.left_flip) self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.FLIP_RIGHT), self.right_flip) self.Bind(wx.EVT_COMMAND_SCROLL, self.orientation_changed, self.azimuth) self.Bind(wx.EVT_COMMAND_SCROLL, self.orientation_changed, self.inclination) self.Bind(wx.EVT_COMMAND_SCROLL, self.orientation_changed, self.roll)
class MyFrame(wx.Frame): def __init__(self, *args, **kwds): # begin wxGlade: MyFrame.__init__ kwds["style"] = kwds.get("style", 0) | wx.DEFAULT_FRAME_STYLE wx.Frame.__init__(self, *args, **kwds) self.SetSize((400, 300)) # Menu Bar self.frame_menubar = wx.MenuBar() wxglade_tmp_menu = wx.Menu() wxglade_tmp_menu.Append(wx.ID_SAVE, "Screenshot", "") self.Bind(wx.EVT_MENU, self.OnSave, id=wx.ID_SAVE) wxglade_tmp_menu.Append(wx.ID_EXIT, "Quit", "") self.Bind(wx.EVT_MENU, self.OnQuit, id=wx.ID_EXIT) self.frame_menubar.Append(wxglade_tmp_menu, "File") self.SetMenuBar(self.frame_menubar) # Menu Bar end self.sim_image = ImagePanel(self, wx.ID_ANY) self.sim = Sim() self.single_click = wx.Button(self, wx.ID_ANY, "Single") self.double_click = wx.Button(self, wx.ID_ANY, "Double") self.single_click_copy_1 = wx.Button(self, wx.ID_ANY, "Long") self.down_flip = wx.Button(self, wx.ID_ANY, "Down") self.up_flip = wx.Button(self, wx.ID_ANY, "Up") self.left_flip = wx.Button(self, wx.ID_ANY, "Left") self.right_flip = wx.Button(self, wx.ID_ANY, "Right") self.azimuth = wx.Slider(self, wx.ID_ANY, 0, 0, 359, style=wx.SL_HORIZONTAL | wx.SL_LABELS) self.inclination = wx.Slider(self, wx.ID_ANY, 0, -90, 90, style=wx.SL_HORIZONTAL | wx.SL_LABELS) self.roll = wx.Slider(self, wx.ID_ANY, 0, -180, 180, style=wx.SL_HORIZONTAL | wx.SL_LABELS) self.__set_properties() self.__do_layout() self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.SINGLE_CLICK), self.single_click) self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.DOUBLE_CLICK), self.double_click) self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.LONG_CLICK), self.single_click_copy_1) self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.FLIP_DOWN), self.down_flip) self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.FLIP_UP), self.up_flip) self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.FLIP_LEFT), self.left_flip) self.Bind(wx.EVT_BUTTON, lambda evt: self.sim.set_input(Input.FLIP_RIGHT), self.right_flip) self.Bind(wx.EVT_COMMAND_SCROLL, self.orientation_changed, self.azimuth) self.Bind(wx.EVT_COMMAND_SCROLL, self.orientation_changed, self.inclination) self.Bind(wx.EVT_COMMAND_SCROLL, self.orientation_changed, self.roll) # end wxGlade def __set_properties(self): # begin wxGlade: MyFrame.__set_properties self.SetTitle("frame") # end wxGlade def __do_layout(self): # begin wxGlade: MyFrame.__do_layout sizer_2 = wx.BoxSizer(wx.VERTICAL) sizer_3 = wx.BoxSizer(wx.HORIZONTAL) sizer_6 = wx.StaticBoxSizer( wx.StaticBox(self, wx.ID_ANY, "Orientation"), wx.HORIZONTAL) grid_sizer_1 = wx.FlexGridSizer(3, 2, 0, 4) sizer_5 = wx.StaticBoxSizer(wx.StaticBox(self, wx.ID_ANY, "Flip"), wx.VERTICAL) sizer_4 = wx.StaticBoxSizer(wx.StaticBox(self, wx.ID_ANY, "Button"), wx.VERTICAL) sizer_2.Add(self.sim_image, 1, wx.EXPAND, 0) sizer_4.Add(self.single_click, 0, wx.ALIGN_CENTER | wx.ALL, 2) sizer_4.Add(self.double_click, 0, wx.ALIGN_CENTER | wx.ALL, 2) sizer_4.Add(self.single_click_copy_1, 0, wx.ALIGN_CENTER | wx.ALL, 2) sizer_3.Add(sizer_4, 0, wx.ALL | wx.EXPAND, 2) sizer_5.Add(self.down_flip, 0, wx.ALIGN_CENTER | wx.ALL, 2) sizer_5.Add(self.up_flip, 0, wx.ALIGN_CENTER | wx.ALL, 2) sizer_5.Add(self.left_flip, 0, wx.ALIGN_CENTER | wx.ALL, 2) sizer_5.Add(self.right_flip, 0, wx.ALIGN_CENTER | wx.ALL, 2) sizer_3.Add(sizer_5, 0, wx.ALL | wx.EXPAND, 2) label_1 = wx.StaticText(self, wx.ID_ANY, "Azimuth:", style=wx.ALIGN_RIGHT) grid_sizer_1.Add(label_1, 0, wx.ALIGN_CENTER_VERTICAL | wx.ALIGN_RIGHT, 0) grid_sizer_1.Add(self.azimuth, 0, wx.ALL | wx.EXPAND, 0) label_2 = wx.StaticText(self, wx.ID_ANY, "Inclination:", style=wx.ALIGN_RIGHT) grid_sizer_1.Add(label_2, 0, wx.ALIGN_CENTER_VERTICAL | wx.ALIGN_RIGHT, 0) grid_sizer_1.Add(self.inclination, 0, wx.ALL | wx.EXPAND, 0) label_3 = wx.StaticText(self, wx.ID_ANY, "Roll:", style=wx.ALIGN_RIGHT) grid_sizer_1.Add(label_3, 0, wx.ALIGN_CENTER_VERTICAL | wx.ALIGN_RIGHT, 0) grid_sizer_1.Add(self.roll, 0, wx.ALL | wx.EXPAND, 0) grid_sizer_1.AddGrowableCol(1) sizer_6.Add(grid_sizer_1, 1, wx.ALL | wx.EXPAND, 3) sizer_3.Add(sizer_6, 4, wx.ALL, 2) sizer_2.Add(sizer_3, 0, wx.EXPAND, 0) self.SetSizer(sizer_2) self.Layout() # end wxGlade def orientation_changed(self, event): # wxGlade: MyFrame.<event_handler> self.sim.sensor.set_orientation(self.azimuth.GetValue(), self.inclination.GetValue(), self.roll.GetValue()) def message_handler(self, message): result = self.sim.handleMessage(message) if message[0:2] == [0x01, 0x78]: self.sim_image.paintNow(self.sim.get_image()) return result def OnSave(self, event): # wxGlade: MyFrame.<event_handler> fname = "screenshot.png" self.sim.get_image().save(fname, "PNG") def OnQuit(self, event): # wxGlade: MyFrame.<event_handler> self.Close()
def __init__(self): """ Initializing DDPG """ self.sim = Sim() self.pfn = PFN(a_dim=A_DIM, s_dim=S_DIM, batch_size=8, memory_capacity=MEMORY_NUM, lr=0.001, bound=A_BOUND) self.ep_reward = 0.0 self.current_action = np.array([.0, .0, .0]) self.done = True # if the episode is done self.reward_record = list() self.ep_record = list() self.fig = plt.gcf() self.fig.show() self.fig.canvas.draw() print("Initialized PFN") """ Setting communication""" self.pc = PC() self.pc.header.frame_id = 'world' self.pub = rospy.Publisher('normalized_state', PC, queue_size=10) self.pub1 = rospy.Publisher('state', PC, queue_size=10) """ Reading targets """ """ The data should be w.r.t origin by base position """ self.ends = pickle.load(open('./data/targets.p', 'rb')) self.x_offset = X_OFFSET self.y_offset = Y_OFFSET self.z_offset = Z_OFFSET self.sample_target() print("Read target data") self.pfn.restore_momery() self.pfn.restore_model() memory_ep = np.ones((MAX_EP_STEPS, 3 + 3 + 1 + 1)) * -100 self.current_ep = 0 self.current_step = 0 while not (rospy.is_shutdown()): if self.current_ep < MAX_EPISODES: if self.current_step < MAX_EP_STEPS: #rospy.sleep(0.5) s = self.normalize_state(self.target) #print 'x' #print s action, prob = self.pfn.choose_action(s) s_ = self.sim.update_pose(action)[-1, :] self.compute_reward(self.target, s_) #print action #print self.pfn.raw_action(s) #print self.target #print s_ #print np.linalg.norm(self.target - s_) #print self.reward transition = np.hstack( (self.target, action, self.reward, prob)) memory_ep[self.current_step] = transition self.current_step += 1 state = np.array([s_, self.target]) self.pub_state(state) if self.pfn.pointer > TRAIN_POINT: self.pfn.learn() else: best_i = np.argsort(memory_ep[:, -2])[-1] best_action = memory_ep[best_i, 3:6] best_s_ = self.sim.update_pose(best_action)[-1, :] best_distance = np.linalg.norm(self.target - best_s_) """ #best action print '*' j = np.argsort(memory_ep[:,-2])[0] print memory_ep[best_i, :] print memory_ep[j, :] print best_s_ print self.target print best_action print best_distance """ self.current_step = 0 mean_action, act_var = self.pfn.choose_action2(s) mean_s_ = self.sim.update_pose(mean_action)[-1, :] mean_distance = np.linalg.norm(mean_s_ - self.target) target_action, w = self.compute_weighted_action(memory_ep) s_ = self.sim.update_pose(target_action)[-1, :] target_distance = np.linalg.norm(s_ - self.target) self.compute_reward(self.target, s_) self.pfn.store_transition(s, target_action, mean_distance, np.var(w)) self.pfn.store_transition(s, best_action, best_distance, w[best_i]) self.current_ep += 1 self.sample_target() memory_ep = np.ones((MAX_EP_STEPS, 3 + 3 + 1 + 1)) * -100 if self.current_ep % 10 == 0: self.reward_record.append(mean_distance) self.ep_record.append(self.current_ep) plt.plot(self.ep_record, self.reward_record) plt.ylim([0.0, 0.1]) self.fig.canvas.draw() self.fig.savefig('learning.png') print('\n') #print s print('Episode:', self.current_ep) print("Mean Action:") print mean_action print("Mean Distance:") print mean_distance print("Action Variance:") print act_var print("Target Action:") print target_action print("Target Distance:") print target_distance print("Weights Variance:") print np.var(w) print('*' * 40) self.pfn.save_model() self.pfn.save_memory() else: s = self.normalize_state(self.target) action, act_var = self.pfn.choose_action2(s) s_ = self.sim.update_pose(action)[-1, :] self.compute_reward(self.target, s_) print("Mean Action:") print action print("Action Variance:") print act_var print("Distance: %f" % np.linalg.norm(self.target - s_)) print("Reward: %f" % self.reward) print '\n' state = np.array([s_, self.target]) self.pub_state(state) rospy.sleep(1) self.sample_target()
class Trainer(object): def __init__(self): """ Initializing DDPG """ self.sim = Sim() self.pfn = PFN(a_dim=A_DIM, s_dim=S_DIM, batch_size=8, memory_capacity=MEMORY_NUM, lr=0.001, bound=A_BOUND) self.ep_reward = 0.0 self.current_action = np.array([.0, .0, .0]) self.done = True # if the episode is done self.reward_record = list() self.ep_record = list() self.fig = plt.gcf() self.fig.show() self.fig.canvas.draw() print("Initialized PFN") """ Setting communication""" self.pc = PC() self.pc.header.frame_id = 'world' self.pub = rospy.Publisher('normalized_state', PC, queue_size=10) self.pub1 = rospy.Publisher('state', PC, queue_size=10) """ Reading targets """ """ The data should be w.r.t origin by base position """ self.ends = pickle.load(open('./data/targets.p', 'rb')) self.x_offset = X_OFFSET self.y_offset = Y_OFFSET self.z_offset = Z_OFFSET self.sample_target() print("Read target data") self.pfn.restore_momery() self.pfn.restore_model() memory_ep = np.ones((MAX_EP_STEPS, 3 + 3 + 1 + 1)) * -100 self.current_ep = 0 self.current_step = 0 while not (rospy.is_shutdown()): if self.current_ep < MAX_EPISODES: if self.current_step < MAX_EP_STEPS: #rospy.sleep(0.5) s = self.normalize_state(self.target) #print 'x' #print s action, prob = self.pfn.choose_action(s) s_ = self.sim.update_pose(action)[-1, :] self.compute_reward(self.target, s_) #print action #print self.pfn.raw_action(s) #print self.target #print s_ #print np.linalg.norm(self.target - s_) #print self.reward transition = np.hstack( (self.target, action, self.reward, prob)) memory_ep[self.current_step] = transition self.current_step += 1 state = np.array([s_, self.target]) self.pub_state(state) if self.pfn.pointer > TRAIN_POINT: self.pfn.learn() else: best_i = np.argsort(memory_ep[:, -2])[-1] best_action = memory_ep[best_i, 3:6] best_s_ = self.sim.update_pose(best_action)[-1, :] best_distance = np.linalg.norm(self.target - best_s_) """ #best action print '*' j = np.argsort(memory_ep[:,-2])[0] print memory_ep[best_i, :] print memory_ep[j, :] print best_s_ print self.target print best_action print best_distance """ self.current_step = 0 mean_action, act_var = self.pfn.choose_action2(s) mean_s_ = self.sim.update_pose(mean_action)[-1, :] mean_distance = np.linalg.norm(mean_s_ - self.target) target_action, w = self.compute_weighted_action(memory_ep) s_ = self.sim.update_pose(target_action)[-1, :] target_distance = np.linalg.norm(s_ - self.target) self.compute_reward(self.target, s_) self.pfn.store_transition(s, target_action, mean_distance, np.var(w)) self.pfn.store_transition(s, best_action, best_distance, w[best_i]) self.current_ep += 1 self.sample_target() memory_ep = np.ones((MAX_EP_STEPS, 3 + 3 + 1 + 1)) * -100 if self.current_ep % 10 == 0: self.reward_record.append(mean_distance) self.ep_record.append(self.current_ep) plt.plot(self.ep_record, self.reward_record) plt.ylim([0.0, 0.1]) self.fig.canvas.draw() self.fig.savefig('learning.png') print('\n') #print s print('Episode:', self.current_ep) print("Mean Action:") print mean_action print("Mean Distance:") print mean_distance print("Action Variance:") print act_var print("Target Action:") print target_action print("Target Distance:") print target_distance print("Weights Variance:") print np.var(w) print('*' * 40) self.pfn.save_model() self.pfn.save_memory() else: s = self.normalize_state(self.target) action, act_var = self.pfn.choose_action2(s) s_ = self.sim.update_pose(action)[-1, :] self.compute_reward(self.target, s_) print("Mean Action:") print action print("Action Variance:") print act_var print("Distance: %f" % np.linalg.norm(self.target - s_)) print("Reward: %f" % self.reward) print '\n' state = np.array([s_, self.target]) self.pub_state(state) rospy.sleep(1) self.sample_target() def compute_weighted_action(self, memory_ep): sum = np.sum(memory_ep[:, -2]) + 1e-6 try: w = memory_ep[:, -2] / sum except: print("Sum %f" % sum) print w target_action = np.average(memory_ep[:, S_DIM:S_DIM + A_DIM], axis=0, weights=w) return target_action, w def sample_target(self): self.target = self.ends[np.random.randint(self.ends.shape[0])] def compute_reward(self, end, target): error = target - end self.reward = 20**(-np.log2(2.5 * np.linalg.norm(error))) #print np.linalg.norm(error) def pub_state(self, state): pts = list() for i in range(state.shape[0]): pt = Point() pt.x = state[i, 0] pt.y = state[i, 1] pt.z = state[i, 2] pts.append(pt) self.pc.points = pts self.pub.publish(self.pc) pts = list() for i in range(state.shape[0]): pt = Point() pt.x = state[i, 0] / 10.0 pt.y = state[i, 1] / 10.0 pt.z = state[i, 2] / 35.0 + 0.42 pts.append(pt) self.pc.points = pts self.pub1.publish(self.pc) def normalize_state(self, state): offset = np.array([0, 0, 0.42]) scaler = np.array([10, 10, 35]) s = state - offset s = np.multiply(s, scaler) return s def calculate_dist(self, state): offset = np.array([0, 0, 0.42]) scaler = np.array([10, 10, 35]) s = np.multiply(state, 1.0 / scaler) s += offset return np.linalg.norm(s)
for i in range(len(self.world)): for j in range(i + 1, len(self.world)): # stop collision with itself if i == j: continue if check_circle_collision(self.world[i], self.world[j]): handle_circle_collision(self.world[i], self.world[j]) # window edge collision for obj in self.world: if obj.min_x() <= 0: obj.vx = positive(obj.vx) elif obj.max_x() >= Sim.WIDTH: obj.vx = negative(obj.vx) if obj.min_y() <= 0: obj.vy = positive(obj.vy) elif obj.max_y() >= Sim.HEIGHT: obj.vy = negative(obj.vy) # rendering Sim.DRAW.rect(Sim.SURFACE, Sim.COLOURS["BLACK"], Sim.SCREEN_RECT) for obj in self.world: pos = (obj.x, obj.y) Sim.DRAW.circle(Sim.SURFACE, obj.colour, pos, obj.r) Sim.DISPLAY.flip() if __name__ == "__main__": rgs = Rigid_Physics_Simulation() sim = Sim(rgs) sim.main()