def annotations(config_file, data_path): """ Generate training meta data from the annotations in preparation for training. CONFIG_FILE: the config file with settings for the experiment. DATA_PATH: the path to the folder with the data files. """ config = read_config(config_file.name) extracted_path = os.path.join(data_path, config['extracted_path']) train_ann_dir = os.path.join(data_path, config['train_ann_dir']) test_ann_dir = os.path.join(data_path, config['test_ann_dir']) train_annotations_file = os.path.join(data_path, config['train_annotations_full']) test_annotations_file = os.path.join(data_path, config['test_annotations_full']) vid_frame_size = config['vid_frame_size'] sequence_size = config['sequence_size'] stride = config['ann_stride'] fps = config['fps'] print('Generating training annotations file.') process_anns(train_ann_dir, train_annotations_file, extracted_path, vid_frame_size, sequence_size, stride, fps) print('Generating test annotations file.') process_anns(test_ann_dir, test_annotations_file, extracted_path, vid_frame_size, sequence_size, stride, fps)
def videos(config_file, data_path): """ Extract audio and video frames. CONFIG_FILE: the config file with settings for the experiment. DATA_PATH: the path to the folder with the data files. """ config = read_config(config_file.name) test_ann_dir = os.path.join(data_path, config['test_ann_dir']) train_ann_dir = os.path.join(data_path, config['train_ann_dir']) vid_save_path = os.path.join(data_path, config['vid_save_path']) extracted_path = os.path.join(data_path, config['extracted_path']) fps = config['fps'] eps = config['eps'] start_ts_filename = config['start_ts'] if not os.path.exists(vid_save_path): raise Exception('video directory ' + vid_save_path + ' does not exist') create_dir(extracted_path) vids = os.listdir(vid_save_path) nvids = str(len(vids)) for i, vid in enumerate(vids): print('[' + str(i + 1) + '/' + nvids + ' @ ' + str(fps) + 'fps] extracting ' + vid) extract_av(vid, test_ann_dir, train_ann_dir, fps, eps, start_ts_filename, vid_save_path, extracted_path)
def videos(config_file, data_path): """ Download the videos used in the annotations. CONFIG_FILE: the config file with settings for the experiment. DATA_PATH: the path to the root folder where the videos will be saved. """ # Read configuration config = read_config(config_file.name) vid_save_path = os.path.join(data_path, config['vid_save_path']) download_path = os.path.join(data_path, config['download_path']) train_ann_dir = os.path.join(data_path, config['train_ann_dir']) test_ann_dir = os.path.join(data_path, config['test_ann_dir']) create_dir(vid_save_path) create_dir(download_path) vid_urls = get_vid_urls(download_path) annotated_vids = get_annotated_vids(train_ann_dir, test_ann_dir) for vid_id in annotated_vids: if vid_id in vid_urls: vid = vid_urls[vid_id] url = vid['url'] file_name = vid['file_name'] file_hash = vid['file_hash'] print(f'Downloading: {url}') download_file(url, file_name, vid_save_path, file_hash) else: print( f"Warning: video url list does not contain annotated video with id: {vid_id}. Skipping." )
def mfccs(config_file, data_path): """ Extract MFCCs from the dataset. CONFIG_FILE: the config file with settings for the experiment. DATA_PATH: the path to the folder with the data files. """ config = read_config(config_file.name) extracted_path = os.path.join(data_path, config['extracted_path']) train_ann_dir = os.path.join(data_path, config['train_ann_dir']) test_ann_dir = os.path.join(data_path, config['test_ann_dir']) stride = config['stride'] window_size = config['mfcc_window_size'] nmfcc = config['nmfcc'] eps = config['eps'] apply_mean = config['apply_mean'] apply_stddev = config['apply_stddev'] dirs = os.listdir(extracted_path) ndirs = str(len(dirs)) for i, vid_id in enumerate(dirs): print('[' + str(i + 1) + '/' + ndirs + '] generating MFCCs for ' + vid_id) mfccs = gen_mfcc(vid_id, extracted_path, train_ann_dir, test_ann_dir, stride, window_size, nmfcc, eps, apply_mean, apply_stddev) output_path = os.path.join(extracted_path, vid_id, 'mfcc.pkl') pickle.dump(mfccs, open(output_path, 'wb'))
def annotations(config_file, data_path): """ Download and extract the annotations. CONFIG_FILE: the config file with settings for the experiment. DATA_PATH: the path to the root folder where the annotations will be saved. """ # Read configuration config = read_config(config_file.name) download_path = os.path.join(data_path, config['download_path']) create_dir(download_path) print('Fetching training annotations') download_file(ava_asd_train_ann_url, 'train.tar.bz2', download_path, ava_asd_train_ann_hash, extract=True) print('Fetching testing annotations') download_file(ava_asd_test_ann_url, 'test.tar.bz2', download_path, ava_asd_test_ann_hash, extract=True)
def main(config_file, data_path, bot_config): """ Train the audio visual model. CONFIG_FILE: the config file with settings for the experiment. DATA_PATH: the path to the folder with the data files. """ # Start time for measuring experiment start = timer() # Enable memory growth on GPU set_gpu_memory_growth(True) # Read configs config = read_config(config_file.name) # Load model model, loss = get_model(config) # Load data generators train_gen = AvGenerator.from_dict(data_path, DatasetSubset.train, config) test_gen = AvGenerator.from_dict(data_path, DatasetSubset.valid, config) print(train_gen) print(test_gen) # Create list of callbacks to use for training sess_id = secrets.token_urlsafe(5) # Create session id callbacks = get_callbacks(data_path, sess_id, config, bot_config) callbacks.append(train_gen) callbacks.append(test_gen) # Make optimiser and get loss weights optimiser = get_optimiser(config) loss_weights = get_loss_weights(config) # Compile model model.compile(loss=loss, optimizer=optimiser, metrics=['accuracy'], loss_weights=loss_weights) # Dump a summary model.summary() # Run training epochs = config['epochs'] model.fit(train_gen.dataset, epochs=epochs, validation_data=test_gen.dataset, callbacks=callbacks) # Print duration end = timer() duration = end - start print(f"Duration: {datetime.timedelta(seconds=duration)}")
def get_callbacks(data_path, sess_id, config, bot_config_file): """ Get a list of callbacks to use for training. """ # Get config values mode = config['mode'] tb_logdir = config['tb_logdir'] save_best_only = config['save_best_only'] use_earlystopping = config['use_earlystopping'] es_patience = config['es_patience'] callbacks = [] # Model checkpoint model_file_pattern = sess_id + '-' + mode + '-weights-{epoch:02d}-{val_main_out_accuracy:.4f}.hdf5' experiment_path = os.path.join(data_path, 'experiments', sess_id) pathlib.Path(experiment_path).mkdir(parents=True, exist_ok=True) model_path = os.path.join(experiment_path, model_file_pattern) callbacks.append(ModelCheckpoint(model_path, monitor='val_main_out_accuracy', verbose=1, save_best_only=save_best_only, mode='max')) # Tensorboard tb_session_dir = os.path.join(tb_logdir, sess_id) # Puts the results in a unique TensorBoard session pathlib.Path(tb_session_dir).mkdir(parents=True, exist_ok=True) callbacks.append(TensorBoard(log_dir=tb_session_dir, update_freq='batch')) # Early stopping if use_earlystopping: es_patience = es_patience callbacks.append(EarlyStopping(monitor='val_main_out_loss', patience=es_patience)) # Telegram reporting bot if bot_config_file is not None: bot_config = read_config(bot_config_file.name) callbacks.append(UpdateBot.from_dict(bot_config, sess_id=sess_id)) return callbacks
def main(config_file, data_path, weights_file, weights_path, legacy): """ Evaluate a model based on the test set. CONFIG_FILE: the config file with settings for the experiment. DATA_PATH: the path to the folder with the data files. WEIGHTS: the weights to load into the model. """ # Start time for measuring experiment start = timer() if weights_path is not None and legacy: print( "Error: --legacy can only be used with --weights-file, not --weights-path" ) else: # Enable memory growth on GPU set_gpu_memory_growth(True) # Read config config = read_config(config_file.name) # Get test annotations directory test_ann_dir = os.path.join(data_path, config['test_ann_dir']) # Get loss weights optimiser = get_optimiser(config) loss_weights = get_loss_weights(config) if weights_file is not None: # Load model model, loss = get_model(config, weights_file=weights_file) # Compile model model.compile(loss=loss, optimizer=optimiser, metrics=['accuracy'], loss_weights=loss_weights) # Data generator test_gen = AvGenerator.from_dict(data_path, DatasetSubset.test, config) if not legacy: result = evaluate(model, weights_file, test_gen, test_ann_dir) display_evaluation(result) else: evaluate_legacy(model, weights_file, test_gen, loss, optimiser, loss_weights) elif weights_path is not None: # Load model model, loss = get_model(config, weights_file=None) # Compile model model.compile(loss=loss, optimizer=optimiser, metrics=['accuracy'], loss_weights=loss_weights) # List all weights in directory weights_files = glob.glob(f"{weights_path}/*.hdf5") weights_files = natsorted(weights_files) # Data generator test_gen = AvGenerator.from_dict(data_path, DatasetSubset.test, config) # Evaluate each weights file columns = [ 'weights', 'audio_accuracy', 'video_accuracy', 'main_accuracy', 'audio_map', 'video_map', 'main_map', 'audio_ap_sp', 'video_ap_sp', 'main_ap_sp', 'audio_ap_ns', 'video_ap_ns', 'main_ap_ns', 'audio_auc', 'video_auc', 'main_auc', 'orig_main_map', 'orig_video_map', 'orig_audio_map' ] keys_remove = [ 'y_true', 'y_audio_class_ids', 'y_video_class_ids', 'y_main_class_ids' ] results = [] for weights_file in weights_files: # Set weights model.load_weights(weights_file) # Get results and append result = evaluate(model, weights_file, test_gen, test_ann_dir) # Remove unnecessary pairs for k in keys_remove: del result[k] # Add weights name to results and move to the start of the OrderedDict result['weights'] = weights_file result.move_to_end('weights', last=False) results.append(result) file_name = 'evaluation-results.csv' save_csv(results, columns, file_name) print(f"Saved evaluation results to: {file_name}") # Print duration end = timer() duration = end - start print(f"Duration: {datetime.timedelta(seconds=duration)}")
len_n = len(rnspeak) print( 'Generated subset of speaking: {} ({}% of original), nonspeaking: {} ({}% of original)' .format(len_s, len_s / n_filtered, len_n, len_n / n_filtered)) return rspeak, rnspeak def generate_and_save_subset(in_file, out_file, config): """ Generate and save annotation subsets for the given annotation according to the filtering rules. """ speak, nspeak = filter_and_sample(in_file, config) merged = speak + nspeak random.shuffle(merged) save_to_file(merged, out_file) if __name__ == '__main__': config = read_config('config.yaml') # Set seed for debug. Remove later. np.random.seed(0) generate_and_save_subset(config['train_annotations_full'], config['train_annotations_subset'], config) generate_and_save_subset(config['test_annotations_full'], config['test_annotations_subset'], config)