def __init__(self, n_actions, n_states, obs_shape, gamma=0.99, lr=0.0003, gae_lambda=0.95, entropy_coeff=0.0005, ppo_clip=0.2, mini_batch_size=64, n_epochs=10, clip_value_loss=True, normalize_observation=False, stop_normalize_obs_after_timesteps=50000, fc1=64, fc2=64, environment='None', run=0): self.entropy_coeff = entropy_coeff self.clip_value_loss = clip_value_loss self.gamma = gamma self.ppo_clip = ppo_clip self.n_epochs = n_epochs self.gae_lambda = gae_lambda self.normalize_observation = normalize_observation self.stop_obs_timesteps = stop_normalize_obs_after_timesteps self.timestep = 0 self.actor = ActorNetwork(n_states=n_states, n_actions=n_actions, lr=lr, fc1_dims=fc1, fc2_dims=fc2, chkpt_dir=environment, run=run) self.critic = CriticNetwork(n_states=n_states, lr=lr, fc1_dims=fc1, fc2_dims=fc2, chkpt_dir=environment, run=run) self.memory = PPOMemory(mini_batch_size, gamma, gae_lambda) self.running_stats = RunningStats(shape_states=obs_shape, chkpt_dir=environment, run=run)
def __init__(self, host, port): # Initialize instance variables. self.host = host self.port = port self.do_server_stats = False self.show_responses = False self.stats_latency_full_process = RunningStats() self.stats_latency_network_only = RunningStats() self.stats_server_processing_time = RunningStats() self.image_file_name = None self.latency_start_time = 0 self.loop_count = 0 self.num_repeat = 0 self.filename_list = [] self.filename_list_index = 0 self.json_params = None self.base64 = False logger.debug("host:port = %s:%d" %(self.host, self.port))
tracker = Tracking( config='tracking/experiments/siamrpn_r50_l234_dwxcorr/config.yaml', snapshot='tracking/experiments/siamrpn_r50_l234_dwxcorr/model.pth') detector = Detection( config="./detectron2/configs/COCO-InstanceSegmentation/small.yaml", model= "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" ) reid_module = REID(model=REID_BACKBONE) tracklet = Tracklet(TRACKLET_SIZE) running_stats = RunningStats() def reid_rescore(reid_module, frame, template_features, bboxes, scores): # rescore detection and tracking results with REID module and sort results. batch = [] for bbox in bboxes: target = frame[bbox[1]:bbox[3], bbox[0]:bbox[2], :] # print(target.shape) target = cv2.resize(target, (128, 128)) batch.append(target) batch = np.array(batch).astype(np.float32) if len(batch.shape) == 3:
TIMESTAMP = datetime.now().strftime("%Y%m%d-%H%M%S") SUMMARY_DIR = os.path.join(OUTPUT_RESULTS_DIR, "PPO_LSTM", ENVIRONMENT, TIMESTAMP) env = gym.make(ENVIRONMENT) env = wrappers.Monitor(env, os.path.join(SUMMARY_DIR, ENVIRONMENT), video_callable=None) ppo = PPO(env, SUMMARY_DIR, gpu=False) if MODEL_RESTORE_PATH is not None: ppo.restore_model(MODEL_RESTORE_PATH) t, terminal = 0, False buffer_s, buffer_a, buffer_r, buffer_v, buffer_terminal = [], [], [], [], [] rolling_r = RunningStats() experience, batch_rewards = [], [] for episode in range(EP_MAX): lstm_state = ppo.sess.run( ppo.eval_i_state) # Zero LSTM state at beginning s = env.reset() ep_r, ep_t, ep_a = 0, 0, [] while True: a, v, lstm_state = ppo.eval_state(s, lstm_state) if terminal: # Normalise rewards
class Client: """ Base Client class """ MULTI_THREADED = False # Initialize "Grand total" class variables. stats_latency_full_process = RunningStats() stats_latency_network_only = RunningStats() stats_server_processing_time = RunningStats() def __init__(self, host, port): # Initialize instance variables. self.host = host self.port = port self.do_server_stats = False self.show_responses = False self.stats_latency_full_process = RunningStats() self.stats_latency_network_only = RunningStats() self.stats_server_processing_time = RunningStats() self.image_file_name = None self.latency_start_time = 0 self.loop_count = 0 self.num_repeat = 0 self.filename_list = [] self.filename_list_index = 0 self.json_params = None self.base64 = False logger.debug("host:port = %s:%d" %(self.host, self.port)) def start(self): logger.debug("image file(s) %s %s" %(self.image_file_name, self.filename_list)) def next_file_name(self): """ If the filename_list array is has more than 1, get the next value. """ if len(self.filename_list) > 1: self.filename_list_index += 1 if self.filename_list_index >= len(self.filename_list): self.filename_list_index = 0 self.image_file_name = self.filename_list[self.filename_list_index] def time_open_socket(self): now = time.time() sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM); sock.settimeout(2) result = sock.connect_ex((self.host, self.port)) if result != 0: logger.error("Could not connect to %s on port %d" %(self.host, self.port)) return millis = (time.time() - now)*1000 elapsed = "%.3f" %millis if self.show_responses: logger.info("%s ms to open socket" %(elapsed)) self.stats_latency_network_only.push(millis) Client.stats_latency_network_only.push(millis) def icmp_ping(self): args=[PING, '-c', '1', '-W', '1', self.host] p_ping = subprocess.Popen(args, shell=False, stdout=subprocess.PIPE) # save ping stdout p_ping_out = str(p_ping.communicate()[0]) if (p_ping.wait() == 0): logger.info(p_ping_out) # rtt min/avg/max/mdev = 61.994/61.994/61.994/0.000 ms search = re.search(PING_REGEX, p_ping_out, re.M|re.I) ping_rtt = search.group(2) if self.show_responses: logger.info("%s ms ICMP ping" %(ping_rtt)) self.stats_latency_network_only.push(ping_rtt) Client.stats_latency_network_only.push(ping_rtt) else: logger.error("ICMP ping failed") def process_result(self, result): global TEST_PASS decoded_json = json.loads(result) if 'success' in decoded_json: if decoded_json['success'] == "true": TEST_PASS = True else: TEST_PASS = False if 'latency_start' in decoded_json: millis = (time.time() - decoded_json['latency_start'])*1000 self.stats_latency_network_only.push(millis) Client.stats_latency_network_only.push(millis) else: millis = (time.time() - self.latency_start_time)*1000 self.stats_latency_full_process.push(millis) Client.stats_latency_full_process.push(millis) if 'server_processing_time' in decoded_json: server_processing_time = decoded_json['server_processing_time'] self.stats_server_processing_time.push(float(server_processing_time)) Client.stats_server_processing_time.push(float(server_processing_time)) if self.show_responses: elapsed = "%.3f" %millis logger.info("%s ms to send and receive: %s" %(elapsed, result)) def display_results(self): if not self.show_responses or not Client.MULTI_THREADED: return if self.stats_latency_full_process.n > 0: logger.info("====> Average Latency Full Process=%.3f ms (stddev=%.3f)" %(self.stats_latency_full_process.mean(), self.stats_latency_full_process.stddev())) if self.stats_latency_network_only.n > 0: logger.info("====> Average Latency Network Only=%.3f ms (stddev=%.3f)" %(self.stats_latency_network_only.mean(), self.stats_latency_network_only.stddev())) if self.stats_server_processing_time.n > 0: logger.info("====> Average Server Processing Time=%.3f ms (stddev=%.3f)" %(self.stats_server_processing_time.mean(), Client.stats_server_processing_time.stddev()))
TIMESTAMP = datetime.now().strftime("%Y%m%d-%H%M%S") SUMMARY_DIR = os.path.join(OUTPUT_RESULTS_DIR, "PPO", ENVIRONMENT, TIMESTAMP) #env = gym.make(ENVIRONMENT) env = allCars() #env = wrappers.Monitor(env, os.path.join(SUMMARY_DIR, ENVIRONMENT), video_callable=None) ppo = PPO(env, SUMMARY_DIR, gpu=True) if MODEL_RESTORE_PATH is not None: ppo.restore_model(MODEL_RESTORE_PATH) t, terminal = 0, False buffer_s, buffer_a, buffer_r, buffer_v, buffer_terminal = [], [], [], [], [] rolling_r = RunningStats() # Get prior and set tuning parameters for adaptive regularization weight prior = BasePrior() lambda_store = np.zeros(BATCH + 1) lambda_all = np.zeros(EP_MAX + 1) lambda_max = 8 factor = 0.2 reward_total, reward_diff = [], [] for episode in range(EP_MAX + 1): # Baseline reward using only control prior sp = env.reset_inc() reward_prior = 0.
data_dir = data_dir + "/*/" folders = glob.glob(data_dir) # print(folders[0].split('\\')[-2]) tracker = Tracking(config='tracking/experiments/siamrpn_r50_l234_dwxcorr/config.yaml', snapshot='tracking/experiments/siamrpn_r50_l234_dwxcorr/model.pth') detector = Detection(config="./detectron2/configs/COCO-InstanceSegmentation/small.yaml", model="detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl") reid_module = REID(model=REID_BACKBONE) tracklet = Tracklet(TRACKLET_SIZE) running_stats = RunningStats() def reid_rescore(reid_module, frame, template_features, bboxes, scores): # rescore detection and tracking results with REID module and sort results. batch = [] for bbox in bboxes: target = frame[bbox[1]:bbox[3], bbox[0]:bbox[2], :] # print(target.shape) target = cv2.resize(target, (128, 128)) batch.append(target) batch = np.array(batch).astype(np.float32) if len(batch.shape) == 3: batch = batch[None, :, :, :]
def work(self): hooks = [self.ppo.sync_replicas_hook] sess = tf.train.MonitoredTrainingSession(master=self.server.target, is_chief=(self.wid == 0), checkpoint_dir=SUMMARY_DIR, save_summaries_steps=None, save_summaries_secs=None, hooks=hooks) if self.wid == 0: writer = SummaryWriterCache.get(SUMMARY_DIR) t, episode, terminal = 0, 0, False buffer_s, buffer_a, buffer_r, buffer_v, buffer_terminal = [], [], [], [], [] rolling_r = RunningStats() while not sess.should_stop() and not (episode > EP_MAX and self.wid == 0): s = self.env.reset() ep_r, ep_t, ep_a = 0, 0, [] while True: a, v = self.ppo.evaluate_state(s, sess) # Update ppo if t == BATCH: # or (terminal and t < BATCH): # Normalise rewards rewards = np.array(buffer_r) rolling_r.update(rewards) rewards = np.clip(rewards / rolling_r.std, -10, 10) v_final = [ v * (1 - terminal) ] # v = 0 if terminal, otherwise use the predicted v values = np.array(buffer_v + v_final) terminals = np.array(buffer_terminal + [terminal]) # Generalized Advantage Estimation - https://arxiv.org/abs/1506.02438 delta = rewards + GAMMA * values[1:] * ( 1 - terminals[1:]) - values[:-1] advantage = discount(delta, GAMMA * LAMBDA, terminals) returns = advantage + np.array(buffer_v) advantage = (advantage - advantage.mean()) / np.maximum( advantage.std(), 1e-6) bs, ba, br, badv = np.reshape(buffer_s, (t,) + self.ppo.s_dim), np.vstack(buffer_a), \ np.vstack(returns), np.vstack(advantage) graph_summary = self.ppo.update(bs, ba, br, badv, sess) buffer_s, buffer_a, buffer_r, buffer_v, buffer_terminal = [], [], [], [], [] t = 0 buffer_s.append(s) buffer_a.append(a) buffer_v.append(v) buffer_terminal.append(terminal) ep_a.append(a) if not self.ppo.discrete: a = np.clip(a, self.env.action_space.low, self.env.action_space.high) s, r, terminal, _ = self.env.step(a) buffer_r.append(r) ep_r += r ep_t += 1 t += 1 if terminal: # End of episode summary print('Worker_%i' % self.wid, '| Episode: %i' % episode, "| Reward: %.2f" % ep_r, '| Steps: %i' % ep_t) if self.wid == 0: worker_summary = tf.Summary() worker_summary.value.add(tag="Reward", simple_value=ep_r) # Create Action histograms for each dimension actions = np.array(ep_a) if self.ppo.discrete: add_histogram(writer, "Action", actions, episode, bins=self.ppo.a_dim) else: for a in range(self.ppo.a_dim): add_histogram(writer, "Action/Dim" + str(a), actions[:, a], episode) try: writer.add_summary(graph_summary, episode) except NameError: pass writer.add_summary(worker_summary, episode) writer.flush() episode += 1 break self.env.close() print("Worker_%i finished" % self.wid)
class Agent: def __init__(self, n_actions, n_states, obs_shape, gamma=0.99, lr=0.0003, gae_lambda=0.95, entropy_coeff=0.0005, ppo_clip=0.2, mini_batch_size=64, n_epochs=10, clip_value_loss=True, normalize_observation=False, stop_normalize_obs_after_timesteps=50000, fc1=64, fc2=64, environment='None', run=0): self.entropy_coeff = entropy_coeff self.clip_value_loss = clip_value_loss self.gamma = gamma self.ppo_clip = ppo_clip self.n_epochs = n_epochs self.gae_lambda = gae_lambda self.normalize_observation = normalize_observation self.stop_obs_timesteps = stop_normalize_obs_after_timesteps self.timestep = 0 self.actor = ActorNetwork(n_states=n_states, n_actions=n_actions, lr=lr, fc1_dims=fc1, fc2_dims=fc2, chkpt_dir=environment, run=run) self.critic = CriticNetwork(n_states=n_states, lr=lr, fc1_dims=fc1, fc2_dims=fc2, chkpt_dir=environment, run=run) self.memory = PPOMemory(mini_batch_size, gamma, gae_lambda) self.running_stats = RunningStats(shape_states=obs_shape, chkpt_dir=environment, run=run) # self.optimizer = optim.Adam(list(self.actor.parameters()) + list(self.critic.parameters()), lr=lr, eps=1e-5) def remember(self, state, action, log_probs, value, reward, done): self.memory.store_memory(state, action, log_probs, value, reward, done) def remember_adv(self, advantage_list): self.memory.store_advantage(advantage_list) def save_networks(self): print('--saving networks--') self.actor.save_actor() self.critic.save_critic() if self.normalize_observation: self.running_stats.save_stats() def load_networks(self): print('--loading networks--') self.actor.load_actor() self.critic.load_critic() if self.normalize_observation: self.running_stats.load_stats() def normalize_obs(self, obs): mean, std = self.running_stats() obs_norm = (obs - mean) / (std + 1e-6) return obs_norm def choose_action(self, observation): if self.normalize_observation: if self.timestep < self.stop_obs_timesteps: self.running_stats.online_update(observation) elif self.timestep == self.stop_obs_timesteps: print('No online update for obs Normalization anymore') observation = self.normalize_obs( observation) #Normalize Observations state = T.tensor([observation], dtype=T.float).to(self.actor.device) dist, _ = self.actor(state) value = self.critic(state) action = dist.sample() log_probs = dist.log_prob(action) log_probs = T.sum(log_probs, dim=1, keepdim=True).squeeze().detach().cpu().numpy() value = T.squeeze(value).item() # action = T.squeeze(action).detach().numpy() if action.shape[0] == 1 and action.shape[1] == 1: action = action.detach().cpu().numpy()[0].reshape(1, ) else: action = T.squeeze(action).detach().cpu().numpy() self.timestep += 1 return action, log_probs, value def choose_deterministic_action(self, observation): if self.normalize_observation: observation = self.normalize_obs( observation) #Normalize Observations state = T.tensor([observation], dtype=T.float).to(self.actor.device) _, mean = self.actor(state) action = T.squeeze(mean).detach().cpu().numpy() #.reshape(1, ) return action def learn(self): for _ in range(self.n_epochs): state_arr, action_arr, old_prob_arr, vals_arr, \ reward_arr, dones_arr, advantage_arr, batches = \ self.memory.generate_batches() if self.normalize_observation: #print(state_arr[0:5,:]) state_arr = self.normalize_obs(state_arr) #print(state_arr[0:5,:]) for batch in batches: states = T.tensor(state_arr[batch], dtype=T.float).to(self.actor.device) old_log_probs = T.tensor(old_prob_arr[batch]).to( self.actor.device).detach() actions = T.tensor(action_arr[batch]).to( self.actor.device).detach() critic_value_old = T.tensor(vals_arr[batch]).to( self.actor.device).detach() advantage = T.tensor(advantage_arr[batch]).to( self.actor.device) #returns = T.tensor(reward_arr[batch]).to(self.actor.device) #advantage = returns - critic_value_old # Advantage Normalization per Mini-Batch advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-8) advantage = advantage.detach() ## Actor-Loss dist, _ = self.actor(states) critic_value_new = self.critic(states) critic_value_new = T.squeeze(critic_value_new) new_log_probs = dist.log_prob(actions) new_log_probs = T.sum(new_log_probs, dim=1, keepdim=True).squeeze() prob_ratio = (new_log_probs - old_log_probs).exp() weighted_probs = advantage * prob_ratio weighted_clipped_probs = T.clamp(prob_ratio, 1 - self.ppo_clip, 1 + self.ppo_clip) * advantage ppo_surr_loss = -T.min(weighted_probs, weighted_clipped_probs).mean() entropy_loss = -self.entropy_coeff * dist.entropy().mean() actor_loss = ppo_surr_loss + entropy_loss ## Critic-Loss returns = advantage + critic_value_old # Clipping Value Loss if self.clip_value_loss: v_loss_unclipped = ((critic_value_new - returns)**2) v_clipped = critic_value_old + T.clamp( critic_value_new - critic_value_old, -self.ppo_clip, self.ppo_clip) v_loss_clipped = (v_clipped - returns)**2 v_loss_max = T.max(v_loss_unclipped, v_loss_clipped) critic_loss = 0.5 * v_loss_max.mean() else: critic_loss = 0.5 * ( (critic_value_new - returns)**2).mean() ## Backprop Actor self.actor.optimizer.zero_grad() actor_loss.backward() nn.utils.clip_grad_norm_(parameters=self.actor.parameters(), max_norm=0.5, norm_type=2) self.actor.optimizer.step() ## Backprop Critic self.critic.optimizer.zero_grad() critic_loss.backward() nn.utils.clip_grad_norm_(parameters=self.critic.parameters(), max_norm=0.5, norm_type=2) self.critic.optimizer.step() # loss = critic_loss + actor_loss # self.optimizer.zero_grad() # loss.backward() # nn.utils.clip_grad_norm_(parameters=list(self.actor.parameters()) + list(self.critic.parameters()), # max_norm=0.8, # norm_type=2) # self.optimizer.step() self.memory.clear_memory( ) # Clear Memory to save new samples for next iteration
class Client: """ Base Client class """ MULTI_THREADED = False # Initialize "Grand total" class variables. stats_latency_full_process = RunningStats() stats_latency_network_only = RunningStats() stats_server_processing_time = RunningStats() def __init__(self, host, port): # Initialize instance variables. self.host = host self.port = port self.do_server_stats = False self.show_responses = False self.stats_latency_full_process = RunningStats() self.stats_latency_network_only = RunningStats() self.stats_server_processing_time = RunningStats() self.media_file_name = None self.latency_start_time = 0 self.loop_count = 0 self.num_repeat = 0 self.filename_list = [] self.filename_list_index = 0 self.json_params = None self.base64 = False self.video = None self.resize = True self.resize_long = 240 self.resize_short = 180 self.skip_frames = 1 logger.debug("host:port = %s:%d" % (self.host, self.port)) def start(self): logger.debug("media file(s) %s" % (self.filename_list)) video_extensions = ('mp4', 'avi', 'mov') if self.filename_list[0].endswith(video_extensions): logger.debug("It's a video") self.media_file_name = self.filename_list[0] self.video = cv2.VideoCapture(self.media_file_name) def get_next_image(self): if self.video is not None: for x in range(self.skip_frames): ret, image = self.video.read() if not ret: logger.debug("End of video") return None vw = image.shape[1] vh = image.shape[0] logger.debug("Video size: %dx%d" % (vw, vh)) if self.resize: if vw > vh: resize_w = self.resize_long resize_h = self.resize_short else: resize_w = self.resize_short resize_h = self.resize_long image = cv2.resize(image, (resize_w, resize_h)) logger.debug("Resized image to: %dx%d" % (resize_w, resize_h)) res, image = cv2.imencode('.JPEG', image) image = image.tostring() else: # If the filename_list array has more than 1, get the next value. if len(self.filename_list) > 1: self.filename_list_index += 1 if self.filename_list_index >= len(self.filename_list): self.filename_list_index = 0 else: self.filename_list_index = 0 if self.stats_latency_full_process.n >= self.num_repeat: return None self.media_file_name = self.filename_list[self.filename_list_index] f = open(self.media_file_name, "rb") image = f.read() logger.debug("Image data (first 32 bytes logged): %s" % image[:32]) return image def get_server_stats(self): url = "http://%s:%d%s" % (self.host, self.port, "/server/usage/") if self.tls: url = url.replace("http", "https", 1) logger.info(requests.get(url).content) def time_open_socket(self): now = time.time() sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(2) result = sock.connect_ex((self.host, self.port)) if result != 0: logger.error("Could not connect to %s on port %d" % (self.host, self.port)) return millis = (time.time() - now) * 1000 elapsed = "%.3f" % millis if self.show_responses: logger.info("%s ms to open socket" % (elapsed)) self.stats_latency_network_only.push(millis) Client.stats_latency_network_only.push(millis) def icmp_ping(self): args = [PING, '-c', '1', '-W', '1', self.host] p_ping = subprocess.Popen(args, shell=False, stdout=subprocess.PIPE) # save ping stdout p_ping_out = str(p_ping.communicate()[0]) if (p_ping.wait() == 0): logger.info(p_ping_out) # rtt min/avg/max/mdev = 61.994/61.994/61.994/0.000 ms search = re.search(PING_REGEX, p_ping_out, re.M | re.I) ping_rtt = float(search.group(2)) if self.show_responses: logger.info("%s ms ICMP ping" % (ping_rtt)) self.stats_latency_network_only.push(ping_rtt) Client.stats_latency_network_only.push(ping_rtt) else: logger.error("ICMP ping failed") def process_result(self, result): global TEST_PASS try: decoded_json = json.loads(result) except Exception as e: logger.error("Could not decode result. Exception: %s. Result: %s" % (e, result)) TEST_PASS = False return if 'success' in decoded_json: if decoded_json['success'] == "true": TEST_PASS = True else: TEST_PASS = False if 'latency_start' in decoded_json: millis = (time.time() - decoded_json['latency_start']) * 1000 self.stats_latency_network_only.push(millis) Client.stats_latency_network_only.push(millis) else: millis = (time.time() - self.latency_start_time) * 1000 self.stats_latency_full_process.push(millis) Client.stats_latency_full_process.push(millis) if 'server_processing_time' in decoded_json: server_processing_time = decoded_json['server_processing_time'] self.stats_server_processing_time.push( float(server_processing_time)) Client.stats_server_processing_time.push( float(server_processing_time)) if self.show_responses: elapsed = "%.3f" % millis logger.info("%s ms to send and receive: %s" % (elapsed, result)) def display_results(self): if not self.show_responses or not Client.MULTI_THREADED: return if self.stats_latency_full_process.n > 0: logger.info( "====> Average Latency Full Process=%.3f ms (stddev=%.3f)" % (self.stats_latency_full_process.mean(), self.stats_latency_full_process.stddev())) if self.stats_latency_network_only.n > 0: logger.info( "====> Average Latency Network Only=%.3f ms (stddev=%.3f)" % (self.stats_latency_network_only.mean(), self.stats_latency_network_only.stddev())) if self.stats_server_processing_time.n > 0: logger.info( "====> Average Server Processing Time=%.3f ms (stddev=%.3f)" % (self.stats_server_processing_time.mean(), Client.stats_server_processing_time.stddev()))