def reset(person_waiter: WaitingForPerson, person_checker: CheckingPerson, tracker: Tracker,
          temp_checker: TemperatureChecker, looker: Looker):
    """
    Resets the instances to their initial state.
    """
    person_waiter.reset()
    person_checker.reset()
    temp_checker.reset()
    tracker.reset()
    looker.stop()
Esempio n. 2
0
def train():
    processes = []
    if os.path.isdir(args.log_dir):
        ans = input('{} exists\ncontinue and overwrite? y/n: '.format(
            args.log_dir))
        if ans == 'n':
            return

    logger.configure(dir=args.log_dir, format_strs=['stdout', 'log', 'csv'])
    logger.log(args)
    json.dump(vars(args), open(os.path.join(args.log_dir, 'params.json'), 'w'))

    torch.set_num_threads(2)

    start = time.time()
    policy_update_time, policy_forward_time = 0, 0
    step_time_env, step_time_total, step_time_rewarder = 0, 0, 0
    visualize_time = 0
    rewarder_fit_time = 0

    envs = ContextualEnvInterface(args)
    if args.look:
        looker = Looker(args.log_dir)

    actor_critic, agent = initialize_policy(envs)

    rollouts = RolloutStorage(args.num_steps, args.num_processes,
                              envs.obs_shape, envs.action_space,
                              actor_critic.recurrent_hidden_state_size)
    rollouts.to(args.device)

    def copy_obs_into_beginning_of_storage(obs):
        rollouts.obs[0].copy_(obs)

    for j in range(args.num_updates):

        obs = envs.reset(
        )  # have to reset here to use updated rewarder to sample tasks
        copy_obs_into_beginning_of_storage(obs)

        if args.use_linear_lr_decay:
            update_linear_schedule(agent.optimizer, j, args.num_updates,
                                   args.lr)

        if args.algo == 'ppo' and args.use_linear_clip_decay:
            agent.clip_param = args.clip_param * (1 -
                                                  j / float(args.num_updates))

        log_marginal = 0
        lambda_log_s_given_z = 0

        for step in range(args.num_steps):
            # Sample actions
            policy_forward_start = time.time()
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                    rollouts.obs[step], rollouts.recurrent_hidden_states[step],
                    rollouts.masks[step])
            policy_forward_time += time.time() - policy_forward_start

            # Obser reward and next obs
            step_total_start = time.time()
            obs, reward, done, info = envs.step(action)
            step_time_total += time.time() - step_total_start
            step_time_env += info['step_time_env']
            step_time_rewarder += info['reward_time']
            if args.rewarder == 'unsupervised' and args.clusterer == 'vae':
                log_marginal += info['log_marginal'].sum().item()
                lambda_log_s_given_z += info['lambda_log_s_given_z'].sum(
                ).item()

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in done])
            rollouts.insert(obs, recurrent_hidden_states, action,
                            action_log_prob, value, reward, masks)

        assert all(done)

        # policy update
        with torch.no_grad():
            next_value = actor_critic.get_value(
                rollouts.obs[-1], rollouts.recurrent_hidden_states[-1],
                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, args.use_gae, args.gamma,
                                 args.tau)

        policy_update_start = time.time()
        if args.rewarder != 'supervised' and envs.rewarder.fit_counter == 0:
            value_loss, action_loss, dist_entropy = 0, 0, 0
        else:
            value_loss, action_loss, dist_entropy = agent.update(rollouts)
        policy_update_time += time.time() - policy_update_start
        rollouts.after_update()

        # metrics
        trajectories = envs.trajectories_current_update
        state_entropy = calculate_state_entropy(args, trajectories)

        return_avg = rollouts.rewards.sum() / args.trials_per_update
        reward_avg = return_avg / (args.trial_length * args.episode_length)
        log_marginal_avg = log_marginal / args.trials_per_update / (
            args.trial_length * args.episode_length)
        lambda_log_s_given_z_avg = lambda_log_s_given_z / args.trials_per_update / (
            args.trial_length * args.episode_length)

        num_steps = (j + 1) * args.num_steps * args.num_processes
        num_episodes = num_steps // args.episode_length
        num_trials = num_episodes // args.trial_length

        logger.logkv('state_entropy', state_entropy)
        logger.logkv('value_loss', value_loss)
        logger.logkv('action_loss', action_loss)
        logger.logkv('dist_entropy', dist_entropy)
        logger.logkv('return_avg', return_avg.item())
        logger.logkv('reward_avg', reward_avg.item())
        logger.logkv('steps', num_steps)
        logger.logkv('episodes', num_episodes)
        logger.logkv('trials', num_trials)
        logger.logkv('policy_updates', (j + 1))
        logger.logkv('time', time.time() - start)
        logger.logkv('policy_forward_time', policy_forward_time)
        logger.logkv('policy_update_time', policy_update_time)
        logger.logkv('step_time_rewarder', step_time_rewarder)
        logger.logkv('step_time_env', step_time_env)
        logger.logkv('step_time_total', step_time_total)
        logger.logkv('visualize_time', visualize_time)
        logger.logkv('rewarder_fit_time', rewarder_fit_time)
        if args.rewarder == 'unsupervised' and args.clusterer == 'vae':
            logger.logkv('log_marginal_avg', log_marginal_avg)
            logger.logkv('lambda_log_s_given_z_avg', lambda_log_s_given_z_avg)
        logger.dumpkvs()

        if (j % args.save_period == 0
                or j == args.num_updates - 1) and args.log_dir != '':
            save_model(args, actor_critic, envs, iteration=j)

        if j % args.rewarder_fit_period == 0:
            rewarder_fit_start = time.time()
            envs.fit_rewarder()
            rewarder_fit_time += time.time() - rewarder_fit_start

        if (j % args.vis_period == 0
                or j == args.num_updates - 1) and args.log_dir != '':
            visualize_start = time.time()
            if args.look:
                looker.look(iteration=j)
            if args.plot:
                p = Popen('python visualize.py --log-dir {}'.format(
                    args.log_dir),
                          shell=True)
                processes.append(p)
            visualize_time += time.time() - visualize_start
def video():
    """
    Principal method of the program that reads the data streams, displays
    the video streams to the user and other messages.
    """
    global image_timestamp
    global thermal
    global normal
    global temp

    # Define the variable that remember the current state: 'waiting' that awaits
    # for a person to enter the frame and 'person_detected' in which checks
    # continuously if the wearer's mask is worn correctly.
    current_state = 'waiting'

    looker = Looker()
    talker = Talker()
    tracker = Tracker(args['tracker'])
    detector = FaceAndMaskDetector(args['confidence'])
    temp_checker = TemperatureChecker()
    person_waiter = WaitingForPerson(tracker, detector, args['wait'])
    person_checker = CheckingPerson(tracker, talker, detector, temp_checker, args['value'], args['wait'],
                                    args['threshold'], args['state'], args['move'])

    while True:
        # Get current frames
        normal_wrapper.set(normal)
        curr_normal = normal_wrapper.get()
        temp_wrapper.set(temp)
        curr_temp = temp_wrapper.get()
        thermal_wrapper.set(thermal)
        curr_thermal = thermal_wrapper.get()

        # While in the 'waiting' state check if a person is in the frame
        if current_state == 'waiting':
            person_waiter.run_prediction(curr_normal)

        # If a person entered the frame, change the current state
        if person_waiter.person_in_frame():
            current_state = 'person_detected'

        # While in the 'person_detected' state check if the person is wearing
        # the mask properly.
        if current_state == 'person_detected':
            person_checker.check_person(curr_normal, curr_temp, looker, image_timestamp)
            if person_checker.mask_ok:
                print(f'{person_checker.temp_checker.get_temp()} C')
                sleep(3)
                person_checker.speak_temperature()
                reset(person_waiter, person_checker, tracker, temp_checker, looker)
                looker = Looker()
                current_state = 'waiting'
            elif person_checker.lost_tracking:
                reset(person_waiter, person_checker, tracker, temp_checker, looker)
                looker = Looker()
                current_state = 'waiting'

        frame = vstack((curr_normal, curr_thermal))

        # Display the concatenated current frame
        cv.imshow('Video stream', frame)

        # Exit if Q pressed
        if cv.waitKey(1) & 0xFF == ord('q'):
            break

    # Close the video stream, stops the thread that centers the camera on the
    # face and exits the program
    cv.destroyAllWindows()
    looker.stop()
    sys.exit(0)
Esempio n. 4
0
def train():
    processes = []
    if os.path.isdir(args.log_dir):
        ans = input('{} exists\ncontinue and overwrite? y/n: '.format(args.log_dir))
        if ans == 'n':
            return

    logger.configure(dir=args.log_dir, format_strs=['stdout', 'log', 'csv'])
    logger.log(args)
    json.dump(vars(args), open(os.path.join(args.log_dir, 'params.json'), 'w'))

    torch.set_num_threads(2)

    start = time.time()
    policy_update_time, policy_forward_time = 0, 0
    step_time_env, step_time_total, step_time_rewarder = 0, 0, 0
    visualize_time = 0
    rewarder_fit_time = 0

    envs = RL2EnvInterface(args)
    if args.look:
        looker = Looker(args.log_dir)

    actor_critic = Policy(envs.obs_shape, envs.action_space,
                          base=RL2Base, base_kwargs={'recurrent': True,
                                                     'num_act_dim': envs.action_space.shape[0]})
    actor_critic.to(args.device)
    agent = algo.PPO(actor_critic, args.clip_param, args.ppo_epoch, args.num_mini_batch,
                     args.value_loss_coef, args.entropy_coef, lr=args.lr,
                     eps=args.eps,
                     max_grad_norm=args.max_grad_norm)

    rollouts = RolloutStorage(args.num_steps, args.num_processes,
                        envs.obs_shape, envs.action_space,
                        actor_critic.recurrent_hidden_state_size)
    rollouts.to(args.device)

    def copy_obs_into_beginning_of_storage(obs):
        obs_raw, obs_act, obs_rew, obs_flag = obs
        rollouts.obs[0].copy_(obs_raw)
        rollouts.obs_act[0].copy_(obs_act)
        rollouts.obs_rew[0].copy_(obs_rew)
        rollouts.obs_flag[0].copy_(obs_flag)

    for j in range(args.num_updates):
        obs = envs.reset()
        copy_obs_into_beginning_of_storage(obs)

        if args.use_linear_lr_decay:
            update_linear_schedule(agent.optimizer, j, args.num_updates, args.lr)

        if args.algo == 'ppo' and args.use_linear_clip_decay:
            agent.clip_param = args.clip_param  * (1 - j / float(args.num_updates))

        episode_returns = [0 for i in range(args.trial_length)]
        episode_final_reward = [0 for i in range(args.trial_length)]
        i_episode = 0

        log_marginal = 0
        lambda_log_s_given_z = 0

        for step in range(args.num_steps):
            # Sample actions
            policy_forward_start = time.time()
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                        rollouts.get_obs(step),
                        rollouts.recurrent_hidden_states[step],
                        rollouts.masks[step])
            policy_forward_time += time.time() - policy_forward_start

            # Obser reward and next obs
            step_total_start = time.time()
            obs, reward, done, info = envs.step(action)
            step_time_total += time.time() - step_total_start
            step_time_env += info['step_time_env']
            step_time_rewarder += info['reward_time']
            log_marginal += info['log_marginal'].sum().item()
            lambda_log_s_given_z += info['lambda_log_s_given_z'].sum().item()

            episode_returns[i_episode] += reward.sum().item()
            if all(done['episode']):
                episode_final_reward[i_episode] += reward.sum().item()
                i_episode = (i_episode + 1) % args.trial_length

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0] for done_ in done['trial']])
            rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks)

        assert all(done['trial'])

        with torch.no_grad():
            next_value = actor_critic.get_value(rollouts.get_obs(-1),
                                                rollouts.recurrent_hidden_states[-1],
                                                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, args.use_gae, args.gamma, args.tau)

        policy_update_start = time.time()
        if args.rewarder != 'supervised' and envs.rewarder.fit_counter == 0 and not args.vae_load:
            value_loss, action_loss, dist_entropy = 0, 0, 0
        else:
            value_loss, action_loss, dist_entropy = agent.update(rollouts)
        policy_update_time += time.time() - policy_update_start
        rollouts.after_update()

        # metrics
        trajectories_pre = envs.trajectories_pre_current_update
        state_entropy_pre = calculate_state_entropy(args, trajectories_pre)

        trajectories_post = envs.trajectories_post_current_update
        state_entropy_post = calculate_state_entropy(args, trajectories_post)

        return_avg = rollouts.rewards.sum() / args.trials_per_update
        reward_avg = return_avg / (args.trial_length * args.episode_length)
        log_marginal_avg = log_marginal / args.trials_per_update / (args.trial_length * args.episode_length)
        lambda_log_s_given_z_avg = lambda_log_s_given_z / args.trials_per_update / (args.trial_length * args.episode_length)

        num_steps = (j + 1) * args.num_steps * args.num_processes
        num_episodes = num_steps // args.episode_length
        num_trials = num_episodes // args.trial_length

        logger.logkv('state_entropy_pre', state_entropy_pre)
        logger.logkv('state_entropy_post', state_entropy_post)
        logger.logkv('value_loss', value_loss)
        logger.logkv('action_loss', action_loss)
        logger.logkv('dist_entropy', dist_entropy)
        logger.logkv('return_avg', return_avg.item())
        logger.logkv('reward_avg', reward_avg.item())
        logger.logkv('steps', (j + 1) * args.num_steps * args.num_processes)
        logger.logkv('episodes', num_episodes)
        logger.logkv('trials', num_trials)
        logger.logkv('policy_updates', (j + 1))
        logger.logkv('time', time.time() - start)
        logger.logkv('policy_forward_time', policy_forward_time)
        logger.logkv('policy_update_time', policy_update_time)
        logger.logkv('step_time_rewarder', step_time_rewarder)
        logger.logkv('step_time_env', step_time_env)
        logger.logkv('step_time_total', step_time_total)
        logger.logkv('visualize_time', visualize_time)
        logger.logkv('rewarder_fit_time', rewarder_fit_time)
        logger.logkv('log_marginal_avg', log_marginal_avg)
        logger.logkv('lambda_log_s_given_z_avg', lambda_log_s_given_z_avg)
        for i_episode in range(args.trial_length):
            logger.logkv('episode_return_avg_{}'.format(i_episode),
                         episode_returns[i_episode] / args.trials_per_update)
            logger.logkv('episode_final_reward_{}'.format(i_episode),
                         episode_final_reward[i_episode] / args.trials_per_update)

        if (j % args.save_period == 0 or j == args.num_updates - 1) and args.log_dir != '':
            save_model(args, actor_critic, envs, iteration=j)

        if not args.vae_freeze and j % args.rewarder_fit_period == 0:
            rewarder_fit_start = time.time()
            envs.fit_rewarder()
            rewarder_fit_time += time.time() - rewarder_fit_start

        if (j % args.vis_period == 0 or j == args.num_updates - 1) and args.log_dir != '':
            visualize_start = time.time()
            if args.look:
                eval_return_avg, eval_episode_returns, eval_episode_final_reward = looker.look(iteration=j)
                logger.logkv('eval_return_avg', eval_return_avg)
                for i_episode in range(args.trial_length):
                    logger.logkv('eval_episode_return_avg_{}'.format(i_episode),
                                 eval_episode_returns[i_episode] / args.trials_per_update)
                    logger.logkv('eval_episode_final_reward_{}'.format(i_episode),
                                 eval_episode_final_reward[i_episode] / args.trials_per_update)

            if args.plot:
                p = Popen('python visualize.py --log-dir {}'.format(args.log_dir), shell=True)
                processes.append(p)
            visualize_time += time.time() - visualize_start

        logger.dumpkvs()
Esempio n. 5
0
    def check_person(self, image: ndarray, temp: ndarray, looker: Looker,
                     image_timestamp: Time):
        """
        Checks the tracked person's mask in the current frame.
        """
        locations, predictions = self.detector.detect_and_predict(image)

        # Draw the detector bounding boxes
        self.draw_detector(locations, predictions, image)

        # Decrement the wait counter for every frame where a face is detected
        if len(predictions) != 0 and self.wait_counter != 0:
            self.wait_counter -= 1

        # Update tracker
        self.tracker.track_ok, self.bounding_box = self.tracker.update(image)

        # Check if the tracking was lost
        if self.last_track_time is not None and time(
        ) - self.last_track_time >= 4:
            self.lost_tracking = True
            return

        # For every frame decrease the counter
        if self.counter != 0:
            self.counter -= 1

        # For every set frames, reposition the tracker on the detected face if
        # the distance between their centers are under the threshold value
        if self.counter == 0 and self.tracker.track_ok:
            # Reset the counter to the default value
            self.counter = self.default_counter_init
            # Get the tracker bounding box center
            tracker_center = get_center(
                point_and_dims_to_points(self.bounding_box))
            for box, prediction in zip(locations, predictions):
                start_x, start_y, end_x, end_y = box
                detector_center = get_center(
                    ((start_x, start_y), (end_x, end_y)))
                # Check if the threshold value is met
                if dist(tracker_center,
                        detector_center) <= self.distance_threshold:
                    self.last_track_time = time()
                    if self.move_time != 0:
                        self.move_time -= 1
                    self.bounding_box = points_to_point_and_dims(
                        (start_x, start_y, end_x, end_y))
                    # Reinitialize the tracker and make the robot look at person
                    # if the set number of frames has passed.
                    self.tracker.create_tracker()
                    self.tracker.track_ok = self.tracker.init(
                        image, self.bounding_box)
                    if self.move_time == 0:
                        looker.point_head(detector_center, image_timestamp)
                        self.move_time = self.default_move_time
                    # Get the temperature for the current frame
                    self.temp_checker.add_data(temp, start_x, start_y, end_x,
                                               end_y)
                    # Add the prediction type
                    prediction_type, _ = self.prediction_type(prediction)
                    self.add_prediction(prediction_type)
                    max_state = self.get_max_prediction()
                    # Print the message
                    if not self.action_said and self.predictions[
                            max_state] >= self.state_time:
                        self.speak_message(max_state)
                        self.action_said = True
                        self.last_said = time()
                        if max_state == 'with_mask':
                            self.mask_ok = True
                    # If the message was already printed, check again if the
                    # mask is worn correctly
                    elif self.action_said and self.predictions[
                            max_state] >= self.state_time:
                        if max_state == 'with_mask':
                            self.mask_ok = True
                        else:
                            self.reset_predictions()

                    if self.last_said is not None and time(
                    ) - self.last_said >= 6:
                        self.action_said = False

                    break
        # Draw the tracker bounding box
        self.draw_tracker(self.tracker.track_ok, image, self.bounding_box,
                          self.tracker.name)