Ejemplo n.º 1
0
def main(argv):
    LoggingConfig.setup()

    input_files_path = "/Users/allarviinamae/EduWorkspace/master-thesis-training-videos/backflips"
    op_models_path = "/Users/allarviinamae/EduWorkspace/openpose/models"
    show_video = False

    try:
        opts, args = getopt.getopt(argv, "hi:m:s", ["inputFilesPath=", "opModelsPath=", "showVideo="])
    except getopt.GetoptError:
        logging.info('main.py -i <inputFilesPath> -m <opModelsPath> -s')
        sys.exit(2)

    for opt, arg in opts:
        if opt == '-h':
            logging.info('main.py -i <inputFilesPath> -m <opModelsPath> -s')
            sys.exit()
        elif opt in ("-i", "--inputFilesPath"):
            input_files_path = arg
        elif opt in ("-m", "--opModelsPath"):
            op_models_path = arg
        elif opt in ("-s", "--showVideo"):
            show_video = True

    logging.info(f'Input files path is {input_files_path}')
    logging.info(f'OpenPose models path is {op_models_path}')
    logging.info(f'Show video is {show_video}')

    try:
        # Change these variables to point to the correct folder (Release/x64 etc.)
        # sys.path.append('../../python')
        # If you run `make install` (default path is `/usr/local/python` for Ubuntu), you can also access the
        # OpenPose/python module from there. This will install OpenPose and the python library at your desired
        # installation path. Ensure that this is in your python path in order to use it. sys.path.append(
        # '/usr/local/python')
        from openpose import pyopenpose as op
    except ImportError as e:
        logging.warn(
            'Error: OpenPose library could not be found. Did you enable `BUILD_PYTHON` in CMake and have this Python '
            'script in the right folder?')
        raise e

    # Initializing Python OpenPose wrapper. Constructing OpenPose object allocates GPU memory
    logging.info("Starting OpenPose Python Wrapper...")
    op_wrapper = op.WrapperPython()
    openpose_params = get_openpose_params(op_models_path)
    op_wrapper.configure(openpose_params)
    op_wrapper.start()
    logging.info("OpenPose Python Wrapper started")

    video_processor = VideoProcessor(op_wrapper, show_video)

    input_files = InputFileService.get_input_files(input_files_path)
    input_files.sort()

    for video_to_process in input_files:
        video_processor.process(video_to_process)
Ejemplo n.º 2
0
def handler(event, context):
    for record in event['Records']:
        bucket = record['s3']['bucket']['name']
        key = record['s3']['object']['key']

        try:
            obj = s3_client.get_object(Bucket=bucket, Key=key)
            if obj["Metadata"].get("process", 0):
                # Temporary path where we'll save original object
                original_obj_path = '/tmp/{}{}'.format(uuid.uuid4(),
                                                       key.replace("/", "-"))
                s3_client.download_file(bucket, key, original_obj_path)

                print('Processing object {}...'.format(key))

                # Videos are all stored in 'vid/' folder in S3 so if this part is in the key (pathname) then it is a video
                # otherwise we considered it is a image. Documents are not processed as they don't have the 'process' metadata (yet ?)
                if "vid/" in key:
                    processor = VideoProcessor(
                        s3_client,
                        bucket,
                    )
                else:
                    processor = ImageProcessor(s3_client, bucket)

                processor.process(original_obj_path, obj["Metadata"], key,
                                  obj["Metadata"].get("dest_ext", None))
                return {
                    'statusCode': 200,
                    'body': "Process executed successfully"
                }
        except Exception as e:
            print(e)
            print(
                'Error getting object {} from bucket {}. Make sure they exist and your bucket is in the same region as this function.'
                .format(key, bucket))
            raise e
Ejemplo n.º 3
0
class DeepFacialEmotionInference(object):
    def __init__(
            self,
            # parameters for testing
            model_path,
            batch_size,
            device=None,
            workers=0,
            # parameters for the video processing
            save_size=112,
            nomask=True,
            grey=False,
            quiet=True,
            tracked_vid=False,
            noface_save=False,
            openface_exe='OpenFace/build/bin/FeatureExtraction',
            # parameters for deep feature extraction (Resnet50)
            benchmark_dir='pytorch-benchmarks',
            model_name='resnet50_ferplus_dag',
            feature_layer='pool5_7x7_s1',
            # parameters for snipper sampler
            num_phase=12,
            phase_size=48,
            length=64,
            stride=64,
            # parameters for phase difference extractor
            height=4,
            nbands=2,
            scale_factor=2,
            extract_level=[1, 2]):

        assert os.path.exists(model_path), \
            "Please, download the model checkpoint first."

        self.batch_size = batch_size
        self.workers = workers
        self.num_phase = num_phase
        self.phase_size = phase_size
        self.length = length
        self.stride = stride

        self.device = get_device(device)
        # Face detection and face alignment
        self.video_processor = VideoProcessor(save_size, nomask, grey, quiet,
                                              tracked_vid, noface_save,
                                              openface_exe)
        # From snippets to deep facial features
        self.resnet50_extractor = Resnet50Extractor(benchmark_dir, self.device,
                                                    model_name, feature_layer)
        # Phase and phase differences over time on faces
        self.pd_extractor = Phase_Difference_Extractor(height, nbands,
                                                       scale_factor,
                                                       extract_level,
                                                       self.device, not quiet)

        self.model = Two_Stream_RNN()  # model for FER
        checkpoint = torch.load(model_path, map_location=self.device)
        self.model.load_state_dict(checkpoint['state_dict'])
        self.model = self.model.eval()
        self.model.to(self.device)

        logger.info(f"Loaded checkpoint from {model_path},"
                    f"Epoch:{checkpoint['epoch']}")

        self.label_name = ['valence', 'arousal']  # model output format

    def run_inference_from_video(self, input_video, keep_tmp=True):
        """
        Perform Video-Facial-Emotion recognition on the provided video.
        
        Args:
            input_video (str): path to the video stream to process.
        
        Returns:
            emotions_dict: a dict of dataframes containing the emotion 
                prediction of each video (key by name) per frame.

        Notes:
            - The user can provide a dir for temporary files (snippets, features).
        """
        video_name = os.path.splitext(os.path.basename(input_video))[0]
        tmp_dir = create_dir(
            os.path.join(os.path.dirname(input_video), video_name + "-tmp"))

        # first, the input video is processed using OpenFace
        opface_output_dir = os.path.join(tmp_dir, video_name + "_opface")
        self.video_processor.process(input_video, opface_output_dir)
        logger.info(f"{video_name} processed with OpenFace.")

        # the cropped and aligned faces are then fed to resnet50 for deep feature ext
        feature_dir = os.path.join(tmp_dir, video_name + "_pool5")
        self.resnet50_extractor.run(opface_output_dir,
                                    feature_dir,
                                    video_name=video_name)
        logger.info(f"Deep facial features extracted with pre-trained ResNet.")

        # creating a sequence of inputs for the NN (sampling images)
        dataset = Snippet_Sampler(video_name,
                                  opface_output_dir,
                                  feature_dir,
                                  annot_dir=None,
                                  label_name='valence_arousal',
                                  test_mode=True,
                                  num_phase=self.num_phase,
                                  phase_size=self.phase_size,
                                  length=self.length,
                                  stride=self.stride)

        data_loader = torch.utils.data.DataLoader(dataset,
                                                  batch_size=self.batch_size,
                                                  num_workers=self.workers,
                                                  pin_memory=False)

        av_dict = self.run_inference_from_dataloader(data_loader, self.model)
        logger.info(f"{len(data_loader)} batches for {video_name}.")

        if not keep_tmp:  # tmp folders need to be removed
            logger.info(f"Removing {tmp_dir} as requested.")
            rmtree(tmp_dir)  # this removes nested folders too

        assert len(av_dict) == 1, "This function processes one video only."
        return list(av_dict.values())[0]  # the first and only item is returned

    def run_inference_from_dataloader(self,
                                      dataloader,
                                      train_mean=None,
                                      train_std=None):
        """
        Perform inference on a sequence of (already pre-processed) samples,
        provided as a torch dataloader to simplify processing.
        
        Args:
            dataloader (data.DataLoader): a dataloader containing video features.
            train_mean (list): mean per video, or None
            train_std (list): std per video, or None
        
        Returns:
            video_dict (dict): valence-arousal predictions per video (the name
                of each video is used as a key in the dictionary), each provided
                as a pandas DataFrame, and w.r.t. each frame.
        """
        sample_names = []
        sample_preds = []
        sample_ranges = []

        for i, data_batch in enumerate(dataloader):

            phase_f, rgb_f, label, ranges, names = data_batch
            with torch.no_grad():  # instantiating tensors for current batch
                phase_f = phase_f.type('torch.FloatTensor').to(self.device)
                phase_0, phase_1 = self.phase_diff_output(
                    phase_f, self.pd_extractor)
                rgb_f = Variable(
                    rgb_f.type('torch.FloatTensor').to(self.device))
                phase_0 = Variable(
                    phase_0.type('torch.FloatTensor').to(self.device))
                phase_1 = Variable(
                    phase_1.type('torch.FloatTensor').to(self.device))

            output = self.model([phase_0, phase_1], rgb_f)
            sample_names.append(names)
            sample_ranges.append(ranges)
            sample_preds.append(output.cpu().data.numpy())

        sample_names = np.concatenate([arr for arr in sample_names], axis=0)
        sample_preds = np.concatenate([arr for arr in sample_preds], axis=0)
        n_sample, n_length, n_labels = sample_preds.shape

        if train_mean is not None and train_std is not None:
            # standardise output features if required (mean and std provided)
            trans_sample_preds = sample_preds.reshape(-1, n_labels)
            trans_sample_preds = np.array([
                correct(trans_sample_preds[:, i], train_mean[i], train_std[i])
                for i in range(n_labels)
            ])  # scaling of predictions
            sample_preds = trans_sample_preds.reshape(n_sample, n_length,
                                                      n_labels)

        sample_ranges = np.concatenate([arr for arr in sample_ranges], axis=0)
        video_dict = {
        }  # one entry per video, based on the dataloader provided

        for video in sample_names:
            mask = sample_names == video
            video_ranges = sample_ranges[mask]
            if video not in video_dict.keys():
                max_len = max([ranges[-1] for ranges in video_ranges])
                video_dict[video] = np.zeros((max_len, n_labels))
            video_preds = sample_preds[mask]

            min_f, max_f = 0, 0  # make sure to return full range of video frames
            for rg, pred in zip(video_ranges, video_preds):
                start, end = rg
                video_dict[video][start:end, :] = pred
                min_f = min(min_f, start)
                max_f = max(max_f, end)
            assert (min_f == 0) and (max_f == max_len)

        for video in video_dict.keys():  # creating a dataframe per video
            video_dict[video] = pd.DataFrame(data=video_dict[video],
                                             columns=self.label_name)

        return video_dict

    def phase_diff_output(self, phase_batch, steerable_pyramid):
        """
        Extract the first level and the second level phase difference images.
        """
        sp = steerable_pyramid
        bs, num_frames, num_phases, W, H = phase_batch.size()

        coeff_batch = sp.build_pyramid(
            phase_batch.view(bs * num_frames, num_phases, W, H))
        assert isinstance(coeff_batch, list)

        phase_batch_0 = sp.extract(coeff_batch[0])
        N, n_ch, n_ph, W, H = phase_batch_0.size()

        phase_batch_0 = phase_batch_0.view(N, -1, W, H)
        phase_batch_0 = phase_batch_0.view(bs, num_frames, -1, W, H)
        phase_batch_1 = sp.extract(coeff_batch[1])

        N, n_ch, n_ph, W, H = phase_batch_1.size()
        phase_batch_1 = phase_batch_1.view(N, -1, W, H)
        phase_batch_1 = phase_batch_1.view(bs, num_frames, -1, W, H)

        return phase_batch_0, phase_batch_1