def test(in_audio_list=None, audio_dir=None, ldc_annotations_list=None, lab_dir=None, results_dir=None, model_list=None,
         model_file=None, feature_type='PLP_0', n_coeffs_per_frame=13, acc_frames=31,
         samp_period=0.01, window_length=0.025, eval_script=None):
    '''
    Test Speech Activity Detection for a list of files given a specific model. Ideally, many
    of the input arguments like samp_period, window_length, n_coeffs_per_frame, acc_frames
    should be read from the model file.

    Input:
    in_audio_list : list of audio files (absolute paths)
    audio_dir : directory where the audio files lie
    ldc_annotations_list : list with the LDC annotation files
    lab_dir : directory where the .lab transcription files lie
    results_dir : directory where the results will be stored
    model_list : file containing a list of the class names
    model_file : an HTK formatted mmf file (containing the gmm models for the different classes
    feature_type : an HTK-formatted string describing the feature_type, e.g., MFCC_0
    n_coeffs_per_frame : number of features per frame
    acc_frames : number of frames to accumulate features over
    samp_period : the frame period (in seconds)
    window_length : the frame duration (in seconds)
    eval_script : the java DARPA evaluation script

    Output:
    conf_matrix : return the confusion matrix
    '''
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)

    # Run the VAD to get the results
    vad_gmm.vad_gmm_list(in_audio_list, model_list, model_file, feature_type,
                         n_coeffs_per_frame, acc_frames, results_dir, results_dir, samp_period, window_length)

    # The annotations are in .lab format, i.e., start_time end_time label per line
    ref_annotations_list = os.path.join(results_dir,'ref_test_annotations.list')
    hyp_annotations_list = os.path.join(results_dir,'hyp_test_annotations.list')
    lists.create_corresponding_list_assert(in_audio_list, lab_dir, ref_annotations_list,'lab')
    lists.create_corresponding_list_assert(in_audio_list, results_dir, hyp_annotations_list,'rec')

    # Given the results and the reference annotations, evaluate by estimating a confusion matrix
    conf_matrix = vad_gmm_evaluate.vad_gmm_evaluate_frames(ref_annotations_list, hyp_annotations_list,
                                                           samp_period, model_list)
    msg = "{0} \n {1}".format(model_file, conf_matrix)
    logging.info(msg)

    # Estimate accuracy
    n_instances = np.sum(conf_matrix)
    n_correct = np.sum(conf_matrix.diagonal())
    msg = "Accuracy: {0} / {1} = {2}".format(n_correct, n_instances, float(n_correct) / n_instances )
    logging.info(msg)

    if eval_script is not None and os.path.exists(eval_script):
        lists.create_corresponding_list_assert(in_audio_list, lab_dir, ref_annotations_list,'lab.frames.txt')
        lists.create_corresponding_list_assert(in_audio_list, results_dir, hyp_annotations_list,'rec.frames.txt')
        vad_evaluate_darpa(testing_list=in_audio_list, ref_annotations_list=ldc_annotations_list,
                           hyp_annotations_list=hyp_annotations_list, eval_script=eval_script,
                           audio_dir=audio_dir, smp_period=samp_period, window_length=window_length,
                           results_dir=results_dir, task_id='{0}_{1}_{2}'.format(feature_type, str(n_coeffs_per_frame),
                                                                              str(acc_frames)))
    return conf_matrix
def test(in_audio_list=None,
         lab_dir=None,
         results_dir=None,
         model_list=None,
         model_file=None,
         feature_type='PLP_0',
         n_coeffs_per_frame=13,
         acc_frames=31,
         samp_period=0.01,
         window_length=0.02):
    '''
    Test Speech Activity Detection for a list of files given a specific model. Ideally, many 
    of the input arguments like samp_period, window_length, n_coeffs_per_frame, acc_frames
    should be read from the model file. 
    
    Input:
    in_audio_list : list of audio files (absolute paths)
    lab_dir : directory where the .lab transcription files lie
    results_dir : directory where the results will be stored
    model_list : file containing a list of the class names
    model_file : an HTK formatted mmf file (containing the gmm models for the different classes
    feature_type : an HTK-formatted string describing the feature_type, e.g., MFCC_0
    n_coeffs_per_frame : number of features per frame
    acc_frames : number of frames to accumulate features over
    samp_period : the frame period (in seconds) 
    window_length : the frame duration (in seconds)
    
    Output:
    conf_matrix : return the confusion matrix
    '''
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)

    # Classify using
    estimate_channel_gmm(in_audio_list, model_list, model_file, feature_type,
                         n_coeffs_per_frame, acc_frames, results_dir,
                         results_dir)

    # The annotations are in .lab format, i.e., start_time end_time label per line
    ref_annotations_list = os.path.join(results_dir,
                                        'ref_test_annotations.list')
    hyp_annotations_list = os.path.join(results_dir,
                                        'hyp_test_annotations.list')
    lists.create_corresponding_list_assert(in_audio_list, lab_dir,
                                           ref_annotations_list, 'lab')
    lists.create_corresponding_list_assert(in_audio_list, results_dir,
                                           hyp_annotations_list, 'rec')

    # Given the results and the reference annotations, evaluate by estimating a confusion matrix
    conf_matrix = vad_gmm_evaluate.vad_gmm_evaluate_frames(
        ref_annotations_list,
        hyp_annotations_list,
        samp_period,
        model_list,
        mode='single')
    msg = "{} \n {}".format(model_file, conf_matrix)
    logging.info(msg)

    return (conf_matrix)
def test(in_audio_list=None, lab_dir=None, results_dir=None, model_list=None, 
         model_file=None, feature_type='PLP_0', n_coeffs_per_frame=13, acc_frames=31, 
         samp_period=0.01, window_length=0.02): 
    '''
    Test Speech Activity Detection for a list of files given a specific model. Ideally, many 
    of the input arguments like samp_period, window_length, n_coeffs_per_frame, acc_frames
    should be read from the model file. 
    
    Input:
    in_audio_list : list of audio files (absolute paths)
    lab_dir : directory where the .lab transcription files lie
    results_dir : directory where the results will be stored
    model_list : file containing a list of the class names
    model_file : an HTK formatted mmf file (containing the gmm models for the different classes
    feature_type : an HTK-formatted string describing the feature_type, e.g., MFCC_0
    n_coeffs_per_frame : number of features per frame
    acc_frames : number of frames to accumulate features over
    samp_period : the frame period (in seconds) 
    window_length : the frame duration (in seconds)
    
    Output:
    conf_matrix : return the confusion matrix
    '''
    if not os.path.exists(results_dir):
        os.makedirs(results_dir)
    
    # Classify using 
    estimate_channel_gmm(in_audio_list, model_list, model_file, feature_type, 
            n_coeffs_per_frame, acc_frames, results_dir, results_dir)
    
    # The annotations are in .lab format, i.e., start_time end_time label per line
    ref_annotations_list = os.path.join(results_dir,'ref_test_annotations.list')
    hyp_annotations_list = os.path.join(results_dir,'hyp_test_annotations.list')
    lists.create_corresponding_list_assert(in_audio_list, lab_dir, ref_annotations_list,'lab')
    lists.create_corresponding_list_assert(in_audio_list, results_dir, hyp_annotations_list,'rec')
    
    # Given the results and the reference annotations, evaluate by estimating a confusion matrix
    conf_matrix = vad_gmm_evaluate.vad_gmm_evaluate_frames(ref_annotations_list, hyp_annotations_list, 
                                                           samp_period, model_list, mode='single')
    msg = "{} \n {}".format(model_file, conf_matrix)
    logging.info(msg)
    
    return(conf_matrix)
Ejemplo n.º 4
0
def test(in_audio_list=None, audio_dir=None, ldc_annotations_list=None, lab_dir=None, results_dir=None, model_list=None,
         model_file=None, feature_type='PLP_0', n_coeffs_per_frame=13, acc_frames=31, 
         samp_period=0.01, window_length=0.025, eval_script=None):
    '''
    Test Speech Activity Detection for a list of files given a specific model. Ideally, many 
    of the input arguments like samp_period, window_length, n_coeffs_per_frame, acc_frames
    should be read from the model file. 
    
    Input:
    in_audio_list : list of audio files (absolute paths)
    audio_dir : directory where the audio files lie
    ldc_annotations_list : list with the LDC annotation files
    lab_dir : directory where the .lab transcription files lie
    results_dir : directory where the results will be stored
    model_list : file containing a list of the class names
    model_file : an HTK formatted mmf file (containing the gmm models for the different classes
    feature_type : an HTK-formatted string describing the feature_type, e.g., MFCC_0
    n_coeffs_per_frame : number of features per frame
    acc_frames : number of frames to accumulate features over
    samp_period : the frame period (in seconds) 
    window_length : the frame duration (in seconds)
    eval_script : the java DARPA evaluation script
    
    Output:
    conf_matrix : return the confusion matrix
    '''

    if not os.path.exists(results_dir):
        os.makedirs(results_dir)
    
    # Run the VAD to get the results
    vad_gmm.vad_gmm_list(in_audio_list, model_list, model_file, feature_type,
                         n_coeffs_per_frame, acc_frames, results_dir, results_dir, samp_period, window_length)
    
    # The annotations are in .lab format, i.e., start_time end_time label per line
    ref_annotations_list = os.path.join(results_dir, 'ref_test_annotations.list')
    hyp_annotations_list = os.path.join(results_dir, 'hyp_test_annotations.list')
    lists.create_corresponding_list_assert(in_audio_list, lab_dir, ref_annotations_list,'lab')
    lists.create_corresponding_list_assert(in_audio_list, results_dir, hyp_annotations_list,'rec')
    
    # Given the results and the reference annotations, evaluate by estimating a confusion matrix
    conf_matrix = vad_gmm_evaluate.vad_gmm_evaluate_frames(ref_annotations_list, hyp_annotations_list, 
                                                           samp_period, model_list)
    msg = "{} \n {}".format(model_file, conf_matrix)
    logging.info(msg)

    # Estimate accuracy
    n_instances = np.sum(conf_matrix)
    n_correct = np.sum(conf_matrix.diagonal())
    msg = "Accuracy: {} / {} = {}".format(n_correct, n_instances, float(n_correct) / n_instances )
    logging.info(msg)

    if eval_script is not None and os.path.exists(eval_script):
        lists.create_corresponding_list_assert(in_audio_list, lab_dir, ref_annotations_list,'lab.frames.txt')
        lists.create_corresponding_list_assert(in_audio_list, results_dir, hyp_annotations_list,'rec.frames.txt')
        vad_evaluate_darpa(testing_list=in_audio_list, ref_annotations_list=ldc_annotations_list, 
                           hyp_annotations_list=hyp_annotations_list, eval_script=eval_script, 
                           audio_dir=audio_dir, smp_period=samp_period, window_length=window_length, 
                           results_dir=results_dir, task_id='{}_{}_{}'.format(feature_type, str(n_coeffs_per_frame), 
                                                                              str(acc_frames)))
    return(conf_matrix)