def __load_features(self, partition): f_free = load_from_file(self.feature_dir / f'{partition}_freeform.pkl') f_north = load_from_file(self.feature_dir / f'{partition}_northwind.pkl') if self.mfcc_enabled: f_free = f_free.filter(like='mfcc', axis=1) f_north = f_north.filter(like='mfcc', axis=1) f_free.index = [ re.search(r"\d{3}_\d+", i).group() for i in f_free.index ] f_north.index = [ re.search(r"\d{3}_\d+", i).group() for i in f_north.index ] if partition == "training": # fix broken key f_north.loc["205_2"] = f_north.loc["205_1"] f_north = f_north.drop("205_1") if partition == "development": # fix broken key f_north.loc["205_1"] = f_north.loc["205_2"] f_north = f_north.drop("205_2") f_free.index = [f'{i}_Freeform' for i in f_free.index] f_north.index = [f'{i}_Northwin' for i in f_north.index] return pd.concat([f_free, f_north], sort=False)
def get_video_data(self): """ Returns video feature data depending on parameter provided in config file. Performs fdhh algorithm if required otherwise return raw video (WARNING: Potential RAM overflow) Returns ------- X_train, X_test """ feature_str = 'fdhh' if self.fdhh else 'pca' if self.options.mode == 'test': feature_path = (f'{self.feature_folder}_FD', f'train_test_{feature_str}.pic') else: feature_path = (f'{self.feature_folder}_FD', f'train_dev_{feature_str}.pic') # Return saved features if exist: if not self.options.save_features and os.path.exists( f'{feature_path[0]}/{feature_path[1]}'): X_train, X_test = load_from_file( f'{feature_path[0]}/{feature_path[1]}') else: X_train, X_test = self.get_train_test() '''X_train, X_test = scale(X_train, X_test, scale_type='standard', axis=0, use_boxcox=True, boxcox_axis=0, use_pandas=True, verbose=self.options.verbose)''' X_train, X_test = scale(X_train, X_test, scale_type='minmax', axis=0, use_pandas=True, verbose=self.options.verbose) if self.fdhh: if self.options.verbose: print('Performing FDHH over train and test set...') X_train = X_train.groupby(level=0).apply(self.FDHH) X_test = X_test.groupby(level=0).apply(self.FDHH) if self.options.verbose: print( f'Sparsity in Train fdhh = {np.sum(X_train.values == 0) / X_train.size}' ) print( f'Sparsity in Test fdhh = {np.sum(X_test.values == 0) / X_test.size}' ) else: X_train, X_test = self.video_pca(X_train, X_test) if self.options.save_features: save_to_file(feature_path[0], feature_path[1], (X_train, X_test)) self.options.save_features = False if not self.fdhh: X_train = self.split_videos(X_train) X_test = self.split_videos(X_test) return [X_train, X_test]
def get_train_test(self): if self.options.verbose: print(f'Putting together video data...') data_parts = ['Training', 'Development'] if self.options.mode == 'test': data_parts.append('Testing') all_data = [] for data_part in data_parts: data_path = f'{self.feature_folder}/{data_part}' if not os.path.exists(data_path): self.encode_videos() files = os.listdir(data_path) # Pre-allocate memory for the DataFrame: idx = [] for file in files: size, n_features = load_from_file(f'{data_path}/{file}').shape idx += list(np.repeat(file[:-4], size)) all_videos = pd.DataFrame(data=np.empty((len(idx), n_features)), index=idx) # Extract videos and put into DataFrame: for file in files: all_videos.loc[file[:-4]] = load_from_file( f'{data_path}/{file}') all_data.append(all_videos) del all_videos if self.options.mode == 'test': X_train, X_test = pd.concat([all_data[0], all_data[1]]), all_data[2] else: X_train, X_test = tuple(all_data) del all_data return X_train, X_test
def viz_experiments(experiments_info, title, file_name, bar=True, curve=True, window=(None,), loc='upper left', save=False): """ Takes list of experiment tuples, plots curves/bar charts for each output type (in case multiple were used) :param experiments_info: list of label-file_name tuples :param title: heading at the top of the figure :param file_name: prefix used when saving barchart figure :param bar: boolean (default: True) plot the bar charts :param curve: boolean (default: True) plot the learning curves :param window: tuple that represents a valid slice, allowing for selective plotting along x-axis for learning curves :param loc: position of the legend in the figure, allows repositioning to avoid occlusion in bar charts :param save: boolean (default: False) """ experiment_labels = map(lambda (label, name): label, experiments_info) experiment_stats = map(lambda (label, name): load_from_file( os.path.join(os.environ['modNN_DIR'], 'results', name, 'results.log')), experiments_info) # collect all output types used by these experiments output_names = set() for results in experiment_stats: output_names = output_names.union(results.keys()) # for all output types create different plots for output_name in list(output_names): output_type_stats = [results[output_name] for results in experiment_stats if output_name in results] output_type_title = '{} - {}'.format(title, output_name) output_type_file_name = '{}-{}'.format(file_name, output_name) if curve: plot_learning_curves(experiment_labels, output_type_stats, output_type_title, output_type_file_name, window, save) if bar: plot_performance(experiment_labels, output_type_stats, output_type_title, output_type_file_name, loc, save)
def video_pca(self, X_train, X_test): if self.options.verbose: print('Reducing dimensionality of each frame using PCA...') use_saved = self.pars['PCA']['per_frame_use_saved'] n_components = self.pars['PCA']['per_frame_components'] pca_path = (self.folders['models_folder'], [f'{self.vgg_v}_pca.pic']) if use_saved and os.path.exists(f'{pca_path[0]}/{pca_path[1]}'): pca = load_from_file(f'{pca_path[0]}/{pca_path[1]}') else: pca = None X_train, X_test, pca = pca_transform(X_train, X_test, n_components, pca=pca, use_pandas=True) if self.options.verbose: print( f'Explained variance = {np.sum(pca.explained_variance_ratio_):.2f}' ) return X_train, X_test
def video_faces(self, video_path, coord_path): """ Extracts faces from a video returning array of rgb images of faces Parameters ---------- video_path : str A path to the video file coord_path : str Folder location and file name of the file with face coordinates Returns ------- faces : ndarray (? * 4) An array of faces corresponding to each frame in the video. Leaves nan values if a face is missing """ cap = cv2.VideoCapture(video_path) # Check if camera opened successfully if not cap.isOpened(): print("Error opening video stream or file") return None video_fps = cap.get(cv2.CAP_PROP_FPS) read_fps = 30 div = video_fps / read_fps video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) coords_present = os.path.exists(coord_path) if coords_present: all_coords = load_from_file(coord_path) else: all_coords = np.empty(shape=(video_frames, 4), dtype=np.int64) faces = [] i = -1 while cap.isOpened(): ret, frame = cap.read() if ret: i += 1 if not i % div: frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) if not coords_present: all_coords[i] = self.get_face_coords(frame) c = all_coords[i] if (c == -1).all(): continue else: face = frame[c[0]:c[1], c[2]:c[3]] face = cv2.resize( face, (self.input_size[0], self.input_size[1])).astype('float32') faces.append(face) else: break cap.release() faces = np.array(faces) return faces, all_coords