def test(in_audio_list=None, audio_dir=None, ldc_annotations_list=None, lab_dir=None, results_dir=None, model_list=None, model_file=None, feature_type='PLP_0', n_coeffs_per_frame=13, acc_frames=31, samp_period=0.01, window_length=0.025, eval_script=None): ''' Test Speech Activity Detection for a list of files given a specific model. Ideally, many of the input arguments like samp_period, window_length, n_coeffs_per_frame, acc_frames should be read from the model file. Input: in_audio_list : list of audio files (absolute paths) audio_dir : directory where the audio files lie ldc_annotations_list : list with the LDC annotation files lab_dir : directory where the .lab transcription files lie results_dir : directory where the results will be stored model_list : file containing a list of the class names model_file : an HTK formatted mmf file (containing the gmm models for the different classes feature_type : an HTK-formatted string describing the feature_type, e.g., MFCC_0 n_coeffs_per_frame : number of features per frame acc_frames : number of frames to accumulate features over samp_period : the frame period (in seconds) window_length : the frame duration (in seconds) eval_script : the java DARPA evaluation script Output: conf_matrix : return the confusion matrix ''' if not os.path.exists(results_dir): os.makedirs(results_dir) # Run the VAD to get the results vad_gmm.vad_gmm_list(in_audio_list, model_list, model_file, feature_type, n_coeffs_per_frame, acc_frames, results_dir, results_dir, samp_period, window_length) # The annotations are in .lab format, i.e., start_time end_time label per line ref_annotations_list = os.path.join(results_dir,'ref_test_annotations.list') hyp_annotations_list = os.path.join(results_dir,'hyp_test_annotations.list') lists.create_corresponding_list_assert(in_audio_list, lab_dir, ref_annotations_list,'lab') lists.create_corresponding_list_assert(in_audio_list, results_dir, hyp_annotations_list,'rec') # Given the results and the reference annotations, evaluate by estimating a confusion matrix conf_matrix = vad_gmm_evaluate.vad_gmm_evaluate_frames(ref_annotations_list, hyp_annotations_list, samp_period, model_list) msg = "{0} \n {1}".format(model_file, conf_matrix) logging.info(msg) # Estimate accuracy n_instances = np.sum(conf_matrix) n_correct = np.sum(conf_matrix.diagonal()) msg = "Accuracy: {0} / {1} = {2}".format(n_correct, n_instances, float(n_correct) / n_instances ) logging.info(msg) if eval_script is not None and os.path.exists(eval_script): lists.create_corresponding_list_assert(in_audio_list, lab_dir, ref_annotations_list,'lab.frames.txt') lists.create_corresponding_list_assert(in_audio_list, results_dir, hyp_annotations_list,'rec.frames.txt') vad_evaluate_darpa(testing_list=in_audio_list, ref_annotations_list=ldc_annotations_list, hyp_annotations_list=hyp_annotations_list, eval_script=eval_script, audio_dir=audio_dir, smp_period=samp_period, window_length=window_length, results_dir=results_dir, task_id='{0}_{1}_{2}'.format(feature_type, str(n_coeffs_per_frame), str(acc_frames))) return conf_matrix
def test(in_audio_list=None, lab_dir=None, results_dir=None, model_list=None, model_file=None, feature_type='PLP_0', n_coeffs_per_frame=13, acc_frames=31, samp_period=0.01, window_length=0.02): ''' Test Speech Activity Detection for a list of files given a specific model. Ideally, many of the input arguments like samp_period, window_length, n_coeffs_per_frame, acc_frames should be read from the model file. Input: in_audio_list : list of audio files (absolute paths) lab_dir : directory where the .lab transcription files lie results_dir : directory where the results will be stored model_list : file containing a list of the class names model_file : an HTK formatted mmf file (containing the gmm models for the different classes feature_type : an HTK-formatted string describing the feature_type, e.g., MFCC_0 n_coeffs_per_frame : number of features per frame acc_frames : number of frames to accumulate features over samp_period : the frame period (in seconds) window_length : the frame duration (in seconds) Output: conf_matrix : return the confusion matrix ''' if not os.path.exists(results_dir): os.makedirs(results_dir) # Classify using estimate_channel_gmm(in_audio_list, model_list, model_file, feature_type, n_coeffs_per_frame, acc_frames, results_dir, results_dir) # The annotations are in .lab format, i.e., start_time end_time label per line ref_annotations_list = os.path.join(results_dir, 'ref_test_annotations.list') hyp_annotations_list = os.path.join(results_dir, 'hyp_test_annotations.list') lists.create_corresponding_list_assert(in_audio_list, lab_dir, ref_annotations_list, 'lab') lists.create_corresponding_list_assert(in_audio_list, results_dir, hyp_annotations_list, 'rec') # Given the results and the reference annotations, evaluate by estimating a confusion matrix conf_matrix = vad_gmm_evaluate.vad_gmm_evaluate_frames( ref_annotations_list, hyp_annotations_list, samp_period, model_list, mode='single') msg = "{} \n {}".format(model_file, conf_matrix) logging.info(msg) return (conf_matrix)
def test(in_audio_list=None, lab_dir=None, results_dir=None, model_list=None, model_file=None, feature_type='PLP_0', n_coeffs_per_frame=13, acc_frames=31, samp_period=0.01, window_length=0.02): ''' Test Speech Activity Detection for a list of files given a specific model. Ideally, many of the input arguments like samp_period, window_length, n_coeffs_per_frame, acc_frames should be read from the model file. Input: in_audio_list : list of audio files (absolute paths) lab_dir : directory where the .lab transcription files lie results_dir : directory where the results will be stored model_list : file containing a list of the class names model_file : an HTK formatted mmf file (containing the gmm models for the different classes feature_type : an HTK-formatted string describing the feature_type, e.g., MFCC_0 n_coeffs_per_frame : number of features per frame acc_frames : number of frames to accumulate features over samp_period : the frame period (in seconds) window_length : the frame duration (in seconds) Output: conf_matrix : return the confusion matrix ''' if not os.path.exists(results_dir): os.makedirs(results_dir) # Classify using estimate_channel_gmm(in_audio_list, model_list, model_file, feature_type, n_coeffs_per_frame, acc_frames, results_dir, results_dir) # The annotations are in .lab format, i.e., start_time end_time label per line ref_annotations_list = os.path.join(results_dir,'ref_test_annotations.list') hyp_annotations_list = os.path.join(results_dir,'hyp_test_annotations.list') lists.create_corresponding_list_assert(in_audio_list, lab_dir, ref_annotations_list,'lab') lists.create_corresponding_list_assert(in_audio_list, results_dir, hyp_annotations_list,'rec') # Given the results and the reference annotations, evaluate by estimating a confusion matrix conf_matrix = vad_gmm_evaluate.vad_gmm_evaluate_frames(ref_annotations_list, hyp_annotations_list, samp_period, model_list, mode='single') msg = "{} \n {}".format(model_file, conf_matrix) logging.info(msg) return(conf_matrix)
def test(in_audio_list=None, audio_dir=None, ldc_annotations_list=None, lab_dir=None, results_dir=None, model_list=None, model_file=None, feature_type='PLP_0', n_coeffs_per_frame=13, acc_frames=31, samp_period=0.01, window_length=0.025, eval_script=None): ''' Test Speech Activity Detection for a list of files given a specific model. Ideally, many of the input arguments like samp_period, window_length, n_coeffs_per_frame, acc_frames should be read from the model file. Input: in_audio_list : list of audio files (absolute paths) audio_dir : directory where the audio files lie ldc_annotations_list : list with the LDC annotation files lab_dir : directory where the .lab transcription files lie results_dir : directory where the results will be stored model_list : file containing a list of the class names model_file : an HTK formatted mmf file (containing the gmm models for the different classes feature_type : an HTK-formatted string describing the feature_type, e.g., MFCC_0 n_coeffs_per_frame : number of features per frame acc_frames : number of frames to accumulate features over samp_period : the frame period (in seconds) window_length : the frame duration (in seconds) eval_script : the java DARPA evaluation script Output: conf_matrix : return the confusion matrix ''' if not os.path.exists(results_dir): os.makedirs(results_dir) # Run the VAD to get the results vad_gmm.vad_gmm_list(in_audio_list, model_list, model_file, feature_type, n_coeffs_per_frame, acc_frames, results_dir, results_dir, samp_period, window_length) # The annotations are in .lab format, i.e., start_time end_time label per line ref_annotations_list = os.path.join(results_dir, 'ref_test_annotations.list') hyp_annotations_list = os.path.join(results_dir, 'hyp_test_annotations.list') lists.create_corresponding_list_assert(in_audio_list, lab_dir, ref_annotations_list,'lab') lists.create_corresponding_list_assert(in_audio_list, results_dir, hyp_annotations_list,'rec') # Given the results and the reference annotations, evaluate by estimating a confusion matrix conf_matrix = vad_gmm_evaluate.vad_gmm_evaluate_frames(ref_annotations_list, hyp_annotations_list, samp_period, model_list) msg = "{} \n {}".format(model_file, conf_matrix) logging.info(msg) # Estimate accuracy n_instances = np.sum(conf_matrix) n_correct = np.sum(conf_matrix.diagonal()) msg = "Accuracy: {} / {} = {}".format(n_correct, n_instances, float(n_correct) / n_instances ) logging.info(msg) if eval_script is not None and os.path.exists(eval_script): lists.create_corresponding_list_assert(in_audio_list, lab_dir, ref_annotations_list,'lab.frames.txt') lists.create_corresponding_list_assert(in_audio_list, results_dir, hyp_annotations_list,'rec.frames.txt') vad_evaluate_darpa(testing_list=in_audio_list, ref_annotations_list=ldc_annotations_list, hyp_annotations_list=hyp_annotations_list, eval_script=eval_script, audio_dir=audio_dir, smp_period=samp_period, window_length=window_length, results_dir=results_dir, task_id='{}_{}_{}'.format(feature_type, str(n_coeffs_per_frame), str(acc_frames))) return(conf_matrix)