def reset(person_waiter: WaitingForPerson, person_checker: CheckingPerson, tracker: Tracker, temp_checker: TemperatureChecker, looker: Looker): """ Resets the instances to their initial state. """ person_waiter.reset() person_checker.reset() temp_checker.reset() tracker.reset() looker.stop()
def train(): processes = [] if os.path.isdir(args.log_dir): ans = input('{} exists\ncontinue and overwrite? y/n: '.format( args.log_dir)) if ans == 'n': return logger.configure(dir=args.log_dir, format_strs=['stdout', 'log', 'csv']) logger.log(args) json.dump(vars(args), open(os.path.join(args.log_dir, 'params.json'), 'w')) torch.set_num_threads(2) start = time.time() policy_update_time, policy_forward_time = 0, 0 step_time_env, step_time_total, step_time_rewarder = 0, 0, 0 visualize_time = 0 rewarder_fit_time = 0 envs = ContextualEnvInterface(args) if args.look: looker = Looker(args.log_dir) actor_critic, agent = initialize_policy(envs) rollouts = RolloutStorage(args.num_steps, args.num_processes, envs.obs_shape, envs.action_space, actor_critic.recurrent_hidden_state_size) rollouts.to(args.device) def copy_obs_into_beginning_of_storage(obs): rollouts.obs[0].copy_(obs) for j in range(args.num_updates): obs = envs.reset( ) # have to reset here to use updated rewarder to sample tasks copy_obs_into_beginning_of_storage(obs) if args.use_linear_lr_decay: update_linear_schedule(agent.optimizer, j, args.num_updates, args.lr) if args.algo == 'ppo' and args.use_linear_clip_decay: agent.clip_param = args.clip_param * (1 - j / float(args.num_updates)) log_marginal = 0 lambda_log_s_given_z = 0 for step in range(args.num_steps): # Sample actions policy_forward_start = time.time() with torch.no_grad(): value, action, action_log_prob, recurrent_hidden_states = actor_critic.act( rollouts.obs[step], rollouts.recurrent_hidden_states[step], rollouts.masks[step]) policy_forward_time += time.time() - policy_forward_start # Obser reward and next obs step_total_start = time.time() obs, reward, done, info = envs.step(action) step_time_total += time.time() - step_total_start step_time_env += info['step_time_env'] step_time_rewarder += info['reward_time'] if args.rewarder == 'unsupervised' and args.clusterer == 'vae': log_marginal += info['log_marginal'].sum().item() lambda_log_s_given_z += info['lambda_log_s_given_z'].sum( ).item() # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done]) rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks) assert all(done) # policy update with torch.no_grad(): next_value = actor_critic.get_value( rollouts.obs[-1], rollouts.recurrent_hidden_states[-1], rollouts.masks[-1]).detach() rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.tau) policy_update_start = time.time() if args.rewarder != 'supervised' and envs.rewarder.fit_counter == 0: value_loss, action_loss, dist_entropy = 0, 0, 0 else: value_loss, action_loss, dist_entropy = agent.update(rollouts) policy_update_time += time.time() - policy_update_start rollouts.after_update() # metrics trajectories = envs.trajectories_current_update state_entropy = calculate_state_entropy(args, trajectories) return_avg = rollouts.rewards.sum() / args.trials_per_update reward_avg = return_avg / (args.trial_length * args.episode_length) log_marginal_avg = log_marginal / args.trials_per_update / ( args.trial_length * args.episode_length) lambda_log_s_given_z_avg = lambda_log_s_given_z / args.trials_per_update / ( args.trial_length * args.episode_length) num_steps = (j + 1) * args.num_steps * args.num_processes num_episodes = num_steps // args.episode_length num_trials = num_episodes // args.trial_length logger.logkv('state_entropy', state_entropy) logger.logkv('value_loss', value_loss) logger.logkv('action_loss', action_loss) logger.logkv('dist_entropy', dist_entropy) logger.logkv('return_avg', return_avg.item()) logger.logkv('reward_avg', reward_avg.item()) logger.logkv('steps', num_steps) logger.logkv('episodes', num_episodes) logger.logkv('trials', num_trials) logger.logkv('policy_updates', (j + 1)) logger.logkv('time', time.time() - start) logger.logkv('policy_forward_time', policy_forward_time) logger.logkv('policy_update_time', policy_update_time) logger.logkv('step_time_rewarder', step_time_rewarder) logger.logkv('step_time_env', step_time_env) logger.logkv('step_time_total', step_time_total) logger.logkv('visualize_time', visualize_time) logger.logkv('rewarder_fit_time', rewarder_fit_time) if args.rewarder == 'unsupervised' and args.clusterer == 'vae': logger.logkv('log_marginal_avg', log_marginal_avg) logger.logkv('lambda_log_s_given_z_avg', lambda_log_s_given_z_avg) logger.dumpkvs() if (j % args.save_period == 0 or j == args.num_updates - 1) and args.log_dir != '': save_model(args, actor_critic, envs, iteration=j) if j % args.rewarder_fit_period == 0: rewarder_fit_start = time.time() envs.fit_rewarder() rewarder_fit_time += time.time() - rewarder_fit_start if (j % args.vis_period == 0 or j == args.num_updates - 1) and args.log_dir != '': visualize_start = time.time() if args.look: looker.look(iteration=j) if args.plot: p = Popen('python visualize.py --log-dir {}'.format( args.log_dir), shell=True) processes.append(p) visualize_time += time.time() - visualize_start
def video(): """ Principal method of the program that reads the data streams, displays the video streams to the user and other messages. """ global image_timestamp global thermal global normal global temp # Define the variable that remember the current state: 'waiting' that awaits # for a person to enter the frame and 'person_detected' in which checks # continuously if the wearer's mask is worn correctly. current_state = 'waiting' looker = Looker() talker = Talker() tracker = Tracker(args['tracker']) detector = FaceAndMaskDetector(args['confidence']) temp_checker = TemperatureChecker() person_waiter = WaitingForPerson(tracker, detector, args['wait']) person_checker = CheckingPerson(tracker, talker, detector, temp_checker, args['value'], args['wait'], args['threshold'], args['state'], args['move']) while True: # Get current frames normal_wrapper.set(normal) curr_normal = normal_wrapper.get() temp_wrapper.set(temp) curr_temp = temp_wrapper.get() thermal_wrapper.set(thermal) curr_thermal = thermal_wrapper.get() # While in the 'waiting' state check if a person is in the frame if current_state == 'waiting': person_waiter.run_prediction(curr_normal) # If a person entered the frame, change the current state if person_waiter.person_in_frame(): current_state = 'person_detected' # While in the 'person_detected' state check if the person is wearing # the mask properly. if current_state == 'person_detected': person_checker.check_person(curr_normal, curr_temp, looker, image_timestamp) if person_checker.mask_ok: print(f'{person_checker.temp_checker.get_temp()} C') sleep(3) person_checker.speak_temperature() reset(person_waiter, person_checker, tracker, temp_checker, looker) looker = Looker() current_state = 'waiting' elif person_checker.lost_tracking: reset(person_waiter, person_checker, tracker, temp_checker, looker) looker = Looker() current_state = 'waiting' frame = vstack((curr_normal, curr_thermal)) # Display the concatenated current frame cv.imshow('Video stream', frame) # Exit if Q pressed if cv.waitKey(1) & 0xFF == ord('q'): break # Close the video stream, stops the thread that centers the camera on the # face and exits the program cv.destroyAllWindows() looker.stop() sys.exit(0)
def train(): processes = [] if os.path.isdir(args.log_dir): ans = input('{} exists\ncontinue and overwrite? y/n: '.format(args.log_dir)) if ans == 'n': return logger.configure(dir=args.log_dir, format_strs=['stdout', 'log', 'csv']) logger.log(args) json.dump(vars(args), open(os.path.join(args.log_dir, 'params.json'), 'w')) torch.set_num_threads(2) start = time.time() policy_update_time, policy_forward_time = 0, 0 step_time_env, step_time_total, step_time_rewarder = 0, 0, 0 visualize_time = 0 rewarder_fit_time = 0 envs = RL2EnvInterface(args) if args.look: looker = Looker(args.log_dir) actor_critic = Policy(envs.obs_shape, envs.action_space, base=RL2Base, base_kwargs={'recurrent': True, 'num_act_dim': envs.action_space.shape[0]}) actor_critic.to(args.device) agent = algo.PPO(actor_critic, args.clip_param, args.ppo_epoch, args.num_mini_batch, args.value_loss_coef, args.entropy_coef, lr=args.lr, eps=args.eps, max_grad_norm=args.max_grad_norm) rollouts = RolloutStorage(args.num_steps, args.num_processes, envs.obs_shape, envs.action_space, actor_critic.recurrent_hidden_state_size) rollouts.to(args.device) def copy_obs_into_beginning_of_storage(obs): obs_raw, obs_act, obs_rew, obs_flag = obs rollouts.obs[0].copy_(obs_raw) rollouts.obs_act[0].copy_(obs_act) rollouts.obs_rew[0].copy_(obs_rew) rollouts.obs_flag[0].copy_(obs_flag) for j in range(args.num_updates): obs = envs.reset() copy_obs_into_beginning_of_storage(obs) if args.use_linear_lr_decay: update_linear_schedule(agent.optimizer, j, args.num_updates, args.lr) if args.algo == 'ppo' and args.use_linear_clip_decay: agent.clip_param = args.clip_param * (1 - j / float(args.num_updates)) episode_returns = [0 for i in range(args.trial_length)] episode_final_reward = [0 for i in range(args.trial_length)] i_episode = 0 log_marginal = 0 lambda_log_s_given_z = 0 for step in range(args.num_steps): # Sample actions policy_forward_start = time.time() with torch.no_grad(): value, action, action_log_prob, recurrent_hidden_states = actor_critic.act( rollouts.get_obs(step), rollouts.recurrent_hidden_states[step], rollouts.masks[step]) policy_forward_time += time.time() - policy_forward_start # Obser reward and next obs step_total_start = time.time() obs, reward, done, info = envs.step(action) step_time_total += time.time() - step_total_start step_time_env += info['step_time_env'] step_time_rewarder += info['reward_time'] log_marginal += info['log_marginal'].sum().item() lambda_log_s_given_z += info['lambda_log_s_given_z'].sum().item() episode_returns[i_episode] += reward.sum().item() if all(done['episode']): episode_final_reward[i_episode] += reward.sum().item() i_episode = (i_episode + 1) % args.trial_length # If done then clean the history of observations. masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done['trial']]) rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks) assert all(done['trial']) with torch.no_grad(): next_value = actor_critic.get_value(rollouts.get_obs(-1), rollouts.recurrent_hidden_states[-1], rollouts.masks[-1]).detach() rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.tau) policy_update_start = time.time() if args.rewarder != 'supervised' and envs.rewarder.fit_counter == 0 and not args.vae_load: value_loss, action_loss, dist_entropy = 0, 0, 0 else: value_loss, action_loss, dist_entropy = agent.update(rollouts) policy_update_time += time.time() - policy_update_start rollouts.after_update() # metrics trajectories_pre = envs.trajectories_pre_current_update state_entropy_pre = calculate_state_entropy(args, trajectories_pre) trajectories_post = envs.trajectories_post_current_update state_entropy_post = calculate_state_entropy(args, trajectories_post) return_avg = rollouts.rewards.sum() / args.trials_per_update reward_avg = return_avg / (args.trial_length * args.episode_length) log_marginal_avg = log_marginal / args.trials_per_update / (args.trial_length * args.episode_length) lambda_log_s_given_z_avg = lambda_log_s_given_z / args.trials_per_update / (args.trial_length * args.episode_length) num_steps = (j + 1) * args.num_steps * args.num_processes num_episodes = num_steps // args.episode_length num_trials = num_episodes // args.trial_length logger.logkv('state_entropy_pre', state_entropy_pre) logger.logkv('state_entropy_post', state_entropy_post) logger.logkv('value_loss', value_loss) logger.logkv('action_loss', action_loss) logger.logkv('dist_entropy', dist_entropy) logger.logkv('return_avg', return_avg.item()) logger.logkv('reward_avg', reward_avg.item()) logger.logkv('steps', (j + 1) * args.num_steps * args.num_processes) logger.logkv('episodes', num_episodes) logger.logkv('trials', num_trials) logger.logkv('policy_updates', (j + 1)) logger.logkv('time', time.time() - start) logger.logkv('policy_forward_time', policy_forward_time) logger.logkv('policy_update_time', policy_update_time) logger.logkv('step_time_rewarder', step_time_rewarder) logger.logkv('step_time_env', step_time_env) logger.logkv('step_time_total', step_time_total) logger.logkv('visualize_time', visualize_time) logger.logkv('rewarder_fit_time', rewarder_fit_time) logger.logkv('log_marginal_avg', log_marginal_avg) logger.logkv('lambda_log_s_given_z_avg', lambda_log_s_given_z_avg) for i_episode in range(args.trial_length): logger.logkv('episode_return_avg_{}'.format(i_episode), episode_returns[i_episode] / args.trials_per_update) logger.logkv('episode_final_reward_{}'.format(i_episode), episode_final_reward[i_episode] / args.trials_per_update) if (j % args.save_period == 0 or j == args.num_updates - 1) and args.log_dir != '': save_model(args, actor_critic, envs, iteration=j) if not args.vae_freeze and j % args.rewarder_fit_period == 0: rewarder_fit_start = time.time() envs.fit_rewarder() rewarder_fit_time += time.time() - rewarder_fit_start if (j % args.vis_period == 0 or j == args.num_updates - 1) and args.log_dir != '': visualize_start = time.time() if args.look: eval_return_avg, eval_episode_returns, eval_episode_final_reward = looker.look(iteration=j) logger.logkv('eval_return_avg', eval_return_avg) for i_episode in range(args.trial_length): logger.logkv('eval_episode_return_avg_{}'.format(i_episode), eval_episode_returns[i_episode] / args.trials_per_update) logger.logkv('eval_episode_final_reward_{}'.format(i_episode), eval_episode_final_reward[i_episode] / args.trials_per_update) if args.plot: p = Popen('python visualize.py --log-dir {}'.format(args.log_dir), shell=True) processes.append(p) visualize_time += time.time() - visualize_start logger.dumpkvs()
def check_person(self, image: ndarray, temp: ndarray, looker: Looker, image_timestamp: Time): """ Checks the tracked person's mask in the current frame. """ locations, predictions = self.detector.detect_and_predict(image) # Draw the detector bounding boxes self.draw_detector(locations, predictions, image) # Decrement the wait counter for every frame where a face is detected if len(predictions) != 0 and self.wait_counter != 0: self.wait_counter -= 1 # Update tracker self.tracker.track_ok, self.bounding_box = self.tracker.update(image) # Check if the tracking was lost if self.last_track_time is not None and time( ) - self.last_track_time >= 4: self.lost_tracking = True return # For every frame decrease the counter if self.counter != 0: self.counter -= 1 # For every set frames, reposition the tracker on the detected face if # the distance between their centers are under the threshold value if self.counter == 0 and self.tracker.track_ok: # Reset the counter to the default value self.counter = self.default_counter_init # Get the tracker bounding box center tracker_center = get_center( point_and_dims_to_points(self.bounding_box)) for box, prediction in zip(locations, predictions): start_x, start_y, end_x, end_y = box detector_center = get_center( ((start_x, start_y), (end_x, end_y))) # Check if the threshold value is met if dist(tracker_center, detector_center) <= self.distance_threshold: self.last_track_time = time() if self.move_time != 0: self.move_time -= 1 self.bounding_box = points_to_point_and_dims( (start_x, start_y, end_x, end_y)) # Reinitialize the tracker and make the robot look at person # if the set number of frames has passed. self.tracker.create_tracker() self.tracker.track_ok = self.tracker.init( image, self.bounding_box) if self.move_time == 0: looker.point_head(detector_center, image_timestamp) self.move_time = self.default_move_time # Get the temperature for the current frame self.temp_checker.add_data(temp, start_x, start_y, end_x, end_y) # Add the prediction type prediction_type, _ = self.prediction_type(prediction) self.add_prediction(prediction_type) max_state = self.get_max_prediction() # Print the message if not self.action_said and self.predictions[ max_state] >= self.state_time: self.speak_message(max_state) self.action_said = True self.last_said = time() if max_state == 'with_mask': self.mask_ok = True # If the message was already printed, check again if the # mask is worn correctly elif self.action_said and self.predictions[ max_state] >= self.state_time: if max_state == 'with_mask': self.mask_ok = True else: self.reset_predictions() if self.last_said is not None and time( ) - self.last_said >= 6: self.action_said = False break # Draw the tracker bounding box self.draw_tracker(self.tracker.track_ok, image, self.bounding_box, self.tracker.name)