def __load_features(self, partition):
        f_free = load_from_file(self.feature_dir / f'{partition}_freeform.pkl')
        f_north = load_from_file(self.feature_dir /
                                 f'{partition}_northwind.pkl')

        if self.mfcc_enabled:
            f_free = f_free.filter(like='mfcc', axis=1)
            f_north = f_north.filter(like='mfcc', axis=1)

        f_free.index = [
            re.search(r"\d{3}_\d+", i).group() for i in f_free.index
        ]
        f_north.index = [
            re.search(r"\d{3}_\d+", i).group() for i in f_north.index
        ]

        if partition == "training":
            # fix broken key
            f_north.loc["205_2"] = f_north.loc["205_1"]
            f_north = f_north.drop("205_1")

        if partition == "development":
            # fix broken key
            f_north.loc["205_1"] = f_north.loc["205_2"]
            f_north = f_north.drop("205_2")

        f_free.index = [f'{i}_Freeform' for i in f_free.index]
        f_north.index = [f'{i}_Northwin' for i in f_north.index]
        return pd.concat([f_free, f_north], sort=False)
    def get_video_data(self):
        """
			Returns video feature data depending on parameter provided in config file.
			Performs fdhh algorithm if required otherwise return raw video (WARNING: Potential RAM overflow)

			Returns
			-------
			X_train, X_test
			
		"""
        feature_str = 'fdhh' if self.fdhh else 'pca'
        if self.options.mode == 'test':
            feature_path = (f'{self.feature_folder}_FD',
                            f'train_test_{feature_str}.pic')
        else:
            feature_path = (f'{self.feature_folder}_FD',
                            f'train_dev_{feature_str}.pic')

        # Return saved features if exist:
        if not self.options.save_features and os.path.exists(
                f'{feature_path[0]}/{feature_path[1]}'):
            X_train, X_test = load_from_file(
                f'{feature_path[0]}/{feature_path[1]}')
        else:
            X_train, X_test = self.get_train_test()
            '''X_train, X_test = scale(X_train, X_test, scale_type='standard', axis=0, use_boxcox=True, boxcox_axis=0,
			                        use_pandas=True, verbose=self.options.verbose)'''
            X_train, X_test = scale(X_train,
                                    X_test,
                                    scale_type='minmax',
                                    axis=0,
                                    use_pandas=True,
                                    verbose=self.options.verbose)
            if self.fdhh:
                if self.options.verbose:
                    print('Performing FDHH over train and test set...')
                X_train = X_train.groupby(level=0).apply(self.FDHH)
                X_test = X_test.groupby(level=0).apply(self.FDHH)
                if self.options.verbose:
                    print(
                        f'Sparsity in Train fdhh = {np.sum(X_train.values == 0) / X_train.size}'
                    )
                    print(
                        f'Sparsity in Test fdhh = {np.sum(X_test.values == 0) / X_test.size}'
                    )
            else:
                X_train, X_test = self.video_pca(X_train, X_test)

        if self.options.save_features:
            save_to_file(feature_path[0], feature_path[1], (X_train, X_test))
            self.options.save_features = False

        if not self.fdhh:
            X_train = self.split_videos(X_train)
            X_test = self.split_videos(X_test)

        return [X_train, X_test]
    def get_train_test(self):

        if self.options.verbose:
            print(f'Putting together video data...')

        data_parts = ['Training', 'Development']
        if self.options.mode == 'test':
            data_parts.append('Testing')
        all_data = []

        for data_part in data_parts:

            data_path = f'{self.feature_folder}/{data_part}'
            if not os.path.exists(data_path):
                self.encode_videos()

            files = os.listdir(data_path)

            # Pre-allocate memory for the DataFrame:
            idx = []
            for file in files:
                size, n_features = load_from_file(f'{data_path}/{file}').shape
                idx += list(np.repeat(file[:-4], size))
            all_videos = pd.DataFrame(data=np.empty((len(idx), n_features)),
                                      index=idx)
            # Extract videos and put into DataFrame:
            for file in files:
                all_videos.loc[file[:-4]] = load_from_file(
                    f'{data_path}/{file}')
            all_data.append(all_videos)
            del all_videos

        if self.options.mode == 'test':
            X_train, X_test = pd.concat([all_data[0],
                                         all_data[1]]), all_data[2]
        else:
            X_train, X_test = tuple(all_data)
        del all_data

        return X_train, X_test
Exemple #4
0
def viz_experiments(experiments_info, title, file_name,
                    bar=True, curve=True,
                    window=(None,), loc='upper left', save=False):
    """
    Takes list of experiment tuples, plots curves/bar charts for each output type (in case multiple were used)

    :param experiments_info: list of label-file_name tuples
    :param title: heading at the top of the figure
    :param file_name: prefix used when saving barchart figure
    :param bar: boolean (default: True) plot the bar charts
    :param curve: boolean (default: True) plot the learning curves
    :param window: tuple that represents a valid slice, allowing for selective plotting along x-axis for learning curves
    :param loc: position of the legend in the figure, allows repositioning to avoid occlusion in bar charts
    :param save: boolean (default: False)
    """

    experiment_labels = map(lambda (label, name): label, experiments_info)
    experiment_stats = map(lambda (label, name): load_from_file(
        os.path.join(os.environ['modNN_DIR'], 'results', name, 'results.log')), experiments_info)

    # collect all output types used by these experiments
    output_names = set()
    for results in experiment_stats:
        output_names = output_names.union(results.keys())

    # for all output types create different plots
    for output_name in list(output_names):
        output_type_stats = [results[output_name] for results in experiment_stats if output_name in results]

        output_type_title = '{} - {}'.format(title, output_name)
        output_type_file_name = '{}-{}'.format(file_name, output_name)

        if curve:
            plot_learning_curves(experiment_labels, output_type_stats, output_type_title, output_type_file_name, window, save)

        if bar:
            plot_performance(experiment_labels, output_type_stats, output_type_title, output_type_file_name, loc, save)
    def video_pca(self, X_train, X_test):
        if self.options.verbose:
            print('Reducing dimensionality of each frame using PCA...')
        use_saved = self.pars['PCA']['per_frame_use_saved']
        n_components = self.pars['PCA']['per_frame_components']
        pca_path = (self.folders['models_folder'], [f'{self.vgg_v}_pca.pic'])

        if use_saved and os.path.exists(f'{pca_path[0]}/{pca_path[1]}'):
            pca = load_from_file(f'{pca_path[0]}/{pca_path[1]}')
        else:
            pca = None

        X_train, X_test, pca = pca_transform(X_train,
                                             X_test,
                                             n_components,
                                             pca=pca,
                                             use_pandas=True)

        if self.options.verbose:
            print(
                f'Explained variance = {np.sum(pca.explained_variance_ratio_):.2f}'
            )

        return X_train, X_test
    def video_faces(self, video_path, coord_path):
        """
			 Extracts faces from a video returning array of rgb images of faces

			 Parameters
			 ----------
			 video_path : str
				 A path to the video file
				 
			 coord_path : str
			     Folder location and file name of the file with face coordinates
			     
			 Returns
			 -------
			 faces : ndarray (? * 4)
				 An array of faces corresponding to each frame in the video. Leaves nan values if a face is missing
		 """

        cap = cv2.VideoCapture(video_path)

        # Check if camera opened successfully
        if not cap.isOpened():
            print("Error opening video stream or file")
            return None

        video_fps = cap.get(cv2.CAP_PROP_FPS)
        read_fps = 30
        div = video_fps / read_fps

        video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

        coords_present = os.path.exists(coord_path)

        if coords_present:
            all_coords = load_from_file(coord_path)
        else:
            all_coords = np.empty(shape=(video_frames, 4), dtype=np.int64)

        faces = []

        i = -1
        while cap.isOpened():
            ret, frame = cap.read()
            if ret:
                i += 1
                if not i % div:
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                    if not coords_present:
                        all_coords[i] = self.get_face_coords(frame)
                    c = all_coords[i]
                    if (c == -1).all():
                        continue
                    else:
                        face = frame[c[0]:c[1], c[2]:c[3]]
                        face = cv2.resize(
                            face, (self.input_size[0],
                                   self.input_size[1])).astype('float32')
                        faces.append(face)
            else:
                break
        cap.release()

        faces = np.array(faces)

        return faces, all_coords