Beispiel #1
0
def score_folder_SD(fileList, fileDict, diarcollar, write_msg, out_path):
    sctk_path = util.get_fs02sctk_path()
    py_val_path = sctk_path + 'scutils/dscore/validate_rttm.py'
    py_score_path = sctk_path + 'scutils/dscore/score.py'
    log_write_path = util.get_logs_path() + util.get_bname(out_path) + '.log'
    derDict = {}
    log_list = []
    unsuc_files = []
    for fn in fileList:
        ref_rttm = fileDict['ref'][fn]
        hyp_rttm = fileDict['hyp'][fn]
        ref_uem = ref_rttm.replace('/RTTM/', '/UEM/').replace('.rttm', '.uem')
        der, curr_log = score_file_SAD(py_val_path, py_score_path, fn,
                                       ref_rttm, hyp_rttm, ref_uem, diarcollar)
        if der != 'NaN':
            derDict[fn] = der
        else:
            unsuc_files.append(fn)
        log_list += curr_log
    with open(log_write_path, 'w') as file:
        file.write('\n'.join(log_list))
        print('\n\nLog File for SD Task - DER evaluation',
              'written to path:\n\t', log_write_path, '\n\n')

    write_msg.append('\n\n')
    write_msg.append('Number of Files to be Evaluated:' + str(len(fileList)) +
                     '\n\n')
    write_msg.append('Number of Files Successfully Evaluated:' +
                     str(len(derDict)) + '\n\n')
    if len(unsuc_files) > 0:
        write_msg.append('Files that could not be evaluated:\n\t' +
                         ' '.join(unsuc_files))
    return derDict, write_msg
def parse_arguments():

    sctk_path = util.get_fs02sctk_path()
    def_out_path = util.get_results_path(
    ) + 'SID_TopN_Result_' + util.getDateTimeStrStamp() + '.txt'


    desc='Wrapper File to generate Top-N Accuracy Scores for FS02 Challenge SID Task.' +\
        'For more information regarding scoring input and hypothesis files, '+\
        'refer below arguments description.'

    ref_mp = 'egs/ref_gt/SID/FS01_SID_uttID2spkID_Dev.txt'
    hyp_mp = 'egs/sys_results/SID/FS01_SID_uttID2spkID_Dev.txt'

    ref_def = sctk_path + ref_mp
    hyp_def = sctk_path + hyp_mp


    ref_str = 'Reference (ground truth) File Path. '+\
        'This file must be the SID ground truth file. '+\
        'Please refer ./'+ref_mp+' file for example.'
    hyp_str = 'Hypothesis (system output) File Path. '+\
        'This directory must include only SAD system output files. '+\
        'Please refer ./'+hyp_mp+' file for example and file format.'
    out_str = 'Output (overall system score) File Path. '+\
        'Default: Result file will stored in '+util.get_results_path()+' directory. '+\
        'Additional log files will be stored in '+util.get_logs_path()
    clr_str = 'Desired Top-N Accuracy for SID evaluation. '+\
        'Default: Top-5 Accuracy.'

    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument('-ref',
                        '--ref',
                        type=str,
                        default=ref_def,
                        help=ref_str)
    parser.add_argument('-hyp',
                        '--hyp',
                        type=str,
                        default=hyp_def,
                        help=hyp_str)
    parser.add_argument('-out',
                        '--out',
                        type=str,
                        default=def_out_path,
                        help=out_str)
    parser.add_argument('-topN', '--topN', type=int, default=5, help=clr_str)

    args = parser.parse_args()
    ref_path = util.processInpPath(args.ref, inpType='file', checkExists=True)
    hyp_path = util.processInpPath(args.hyp, inpType='file', checkExists=True)
    out_path = util.processInpPath(args.out, inpType='file')
    max_TopN = validate_hyp_file(hyp_path)
    topN_num = proc_topN_inp(args.topN, max_TopN)

    return ref_path, hyp_path, out_path, topN_num
def score_SID(fileList, fileDict, topN_num, write_msg, out_path):
    clsfd_Dict = {
        n: {
            'corr': [],
            'incorr': []
        }
        for n in range(1, topN_num + 1)
    }

    for fn in fileList:
        ref_str = fileDict['ref'][fn]
        hyp_str_list = fileDict['hyp'][fn]

        for topn in clsfd_Dict:
            hyp_topn = hyp_str_list[:topn]
            if ref_str in hyp_topn:
                clsfd_Dict[topn]['corr'].append(fn)
            else:
                clsfd_Dict[topn]['incorr'].append(fn)

    topNDict = {
        n: round((100.0 * len(clsfd_Dict[n]['corr'])) / len(fileList), 3)
        for n in clsfd_Dict
    }

    write_msg.append(
        'Individual Results (per file) written to following paths:\n')
    strz = '\t' + '*' * 40 + '\n'
    for n in clsfd_Dict:
        write_path = util.get_logs_path() + util.get_bname(
            out_path) + '.Top-' + str(n)
        write_msg.append(write_path)
        write_list = [
            strz + '\tPer File SID Top-' + str(n) + ' Accuracy Results\n' +
            strz
        ]
        write_list.append('\n\nCorrect Predictions:\n' +
                          ' '.join(clsfd_Dict[n]['corr']))
        write_list.append('\n\nIncorrect Predictions:\n' +
                          ' '.join(clsfd_Dict[n]['incorr']))
        write_list.append('\n')
        util.writeList(write_list, write_path, isOverWrite=True)
    write_msg.append('\n\n\n')
    write_msg.append(strz + '\tTop-N Acurracy System Evaluation Results:\n' +
                     strz)
    for n in topNDict:
        write_msg.append('\tTop-' + str(n) + ' Accuracy : ' +
                         str(topNDict[n]) + ' %')
    write_msg.append(strz + '\n')
    return topNDict, write_msg
Beispiel #4
0
def parse_arguments():

    sctk_path = util.get_fs02sctk_path()
    def_out_path = util.get_results_path(
    ) + 'SD_DER_Result_' + util.getDateTimeStrStamp() + '.txt'
    coll_inps_str = 'Allowed Inputs: 0, 0.25, 0.5, 1, 2'


    desc='Wrapper File to generate DER Scores for FS02 Challenge SD (track1 and track2) Tasks.' +\
        'Scoring mechanism for both tracks will be the same. For more '+\
        'information regarding scoring input and hypothesis files, refer '+\
        'below arguments description. Open-Source Software Credits: '+\
        'This script uses dscore toolkit developed by Neville Ryant for '+\
        'generating DER scores. for more info, refer: (https://github.com/nryant/dscore)'

    ref_mp = 'egs/ref_gt/SD/'
    hyp_mp = 'egs/sys_results/SD/'

    ref_def = sctk_path + ref_mp
    hyp_def = sctk_path + hyp_mp


    ref_str = 'Reference (ground truth) Directory Path. '+\
        'This directory must include only SD ground truth RTTM '+\
        'and UEM folders. Please refer ./'+ref_mp+' directory for examples.'
    hyp_str = 'Hypothesis (system output) Directory Path. '+\
        'This directory must include only diarization system output RTTM files. '+\
        'Please refer ./'+hyp_mp+' directory for examples and file format.'
    out_str = 'Output (per file and overall system score) File Path. '+\
        'Default: Result file will stored in '+util.get_results_path()+' directory. '+\
        'Additional log files if generated will be stored in '+util.get_logs_path()
    clr_str = 'Desired forgiveness Collar for SD evaluation. '+coll_inps_str+\
        ' Default collar length: 0.25 secs.'

    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument('-ref',
                        '--ref',
                        type=str,
                        default=ref_def,
                        help=ref_str)
    parser.add_argument('-hyp',
                        '--hyp',
                        type=str,
                        default=hyp_def,
                        help=hyp_str)
    parser.add_argument('-out',
                        '--out',
                        type=str,
                        default=def_out_path,
                        help=out_str)
    parser.add_argument('-diarcollar',
                        '--diarcollar',
                        type=float,
                        default=0.25,
                        help=clr_str)
    args = parser.parse_args()

    ref_path = proc_sd_ref_files(util.processInpPath(args.ref))
    hyp_path = util.processInpPath(args.hyp)
    out_path = util.processInpPath(args.out, inpType='file')
    diarcollar = proc_sd_collar(args.diarcollar)

    return ref_path, hyp_path, out_path, diarcollar
Beispiel #5
0
def parse_arguments():
    sctk_path = util.get_fs02sctk_path()
    def_out_path = util.get_results_path()+'ASR_WER_Result_'+util.getDateTimeStrStamp()+'.txt'
   
    
    
    desc='Wrapper File to generate WER Scores for FS02 Challenge ASR (track1 and track2) Task.' +\
        'Scoring mechanism for both tracks is the same, but the system output '+\
        'hypothesis file/folder expected from the user will be different. '+\
        '(folder with json files for track-1, and a plain text file for track-2)'+\
        'For more information regarding scoring input and hypothesis files, '+\
        'refer below arguments description. Open-Source Software Credits: '+\
        'This script uses compute-wer tool from the Kaldi Speech Recognition '+\
        'Toolkit. for more info, refer: (http://kaldi-asr.org/doc/tools.html)'
    
    ref_mp = 'egs/ref_gt/ASR/ASR_track'
    hyp_mp = 'egs/sys_results/ASR/ASR_track'
    
    ref_def = sctk_path+ref_mp
    hyp_def = sctk_path+hyp_mp
            
    ref_str = 'Reference (ground truth) Path. '+\
        '(Directory Path for Track-1, and File Path for Track-2) '+\
        'Directory Path for Track-1 must include only ASR ground truth files. '+\
        'For ASR_track1: directory containing json format ground truth files required. '+\
        'Please refer ./'+ref_mp+'1/ directory for examples. '+\
        'For ASR_track2: kaldi "text" file. Refer: { https://kaldi-asr.org/doc/data_prep.html#data_prep_data }. '+\
        '   file contents of File Path for Track-2 must include only FS02_ASR_track2 '+\
        'file-names followed by associated transcripts (like in Kaldi "text" format)'+\
        'Please refer ./'+ref_mp+'2/ directory for examples.'
    hyp_str = 'Hypothesis (system output) Directory/File Path. '+\
        '(Directory Path for Track-1, and File Path for Track-2) '+\
        'Directory Path for Track-1 must include only FS02-ASR system output files. '+\
        'For ASR_track1: directory containing json format system output files required. '+\
        'Please refer ./'+ref_mp+'1/ directory for examples and file format. '+\
        'For ASR_track2: kaldi "text" file. Refer: { https://kaldi-asr.org/doc/data_prep.html#data_prep_data }. '+\
        'file contents of File Path for Track-2 must include only FS02_ASR_track2 '+\
        'file-names followed by associated transcripts (like in Kaldi "text" format)'+\
        'Please refer ./'+ref_mp+'2/ directory for examples and file format.'
    out_str = 'Output (overall system score) File Path. '+\
        'Default: Result file will stored in '+util.get_results_path()+' directory. '+\
        'Additional log files if generated will be stored in '+util.get_logs_path()
    trk_str = 'Track number of the ASR Task to be evaluated. '+\
        'Input Options: (as string) "1" or "2"'
    kld_str = 'base path to the locally installed kaldi directory. '+\
        'e.g. /home/crss/kaldi. This argument is required for'
    
    
    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument('-track', '--track', type=str, default='1', help=trk_str)
    parser.add_argument('-kaldi','--kaldi', type=str, required=True, help=kld_str)
    parser.add_argument('-ref', '--ref', type=str, default=ref_def, help=ref_str)
    parser.add_argument('-hyp', '--hyp', type=str, default=hyp_def, help=hyp_str)
    parser.add_argument('-out', '--out', type=str, default=def_out_path, help=out_str)
     
    args = parser.parse_args()
    
    track_num = proc_track_num(args.track)
    kaldi_path = util.processInpPath(args.kaldi)    
    if args.ref == ref_def or args.hyp == hyp_def:
        ad = str(track_num)+'/'
        t2_name = 'FS01_ASR_track2_transcriptions_Dev'
        print('ref or hyp paths are either not provided, or match default paths.')
        print('Running Script on default example for Track-',str(track_num))
        if track_num == 2:
            args.ref = ref_def+ad+t2_name
            args.hyp = hyp_def+ad+t2_name
        else:
            args.ref = ref_def+ad
            args.hyp = hyp_def+ad
    if track_num == 2:
        ref_path = util.processInpPath(args.ref, inpType='file', checkExists=True)
        hyp_path = util.processInpPath(args.hyp, inpType='file', checkExists=True)
    else:
        ref_path = util.processInpPath(args.ref)
        hyp_path = util.processInpPath(args.hyp)
    out_path = util.processInpPath(args.out, inpType='file')
    
    return ref_path, hyp_path, out_path, track_num, kaldi_path
def parse_arguments():

    sctk_path = util.get_fs02sctk_path()
    def_out_path = util.get_results_path(
    ) + 'SAD_DCF_Result_' + util.getDateTimeStrStamp() + '.txt'
    coll_inps_str = 'Allowed Inputs: 0, 0.25, 0.5, 1, 2'


    desc='Wrapper File to generate DCF Scores for FS02 Challenge SAD Task.' +\
        'For more information regarding scoring input and hypothesis files, '+\
        'refer below arguments description. '+\
        'Open-Source Software Credits: This script uses scoreFile_SAD.pl '+\
        'developed by NIST. for more info, refer: (https://www.nist.gov/'+\
        'itl/iad/mig/nist-open-speech-activity-detection-evaluation)'

    ref_mp = 'egs/ref_gt/SAD/'
    hyp_mp = 'egs/sys_results/SAD/'

    ref_def = sctk_path + ref_mp
    hyp_def = sctk_path + hyp_mp


    ref_str = 'Reference (ground truth) Directory Path. '+\
        'This directory must include only SAD ground truth files. '+\
        'Please refer ./'+ref_mp+' directory for examples.'
    hyp_str = 'Hypothesis (system output) Directory Path. '+\
        'This directory must include only SAD system output files. '+\
        'Please refer ./'+hyp_mp+' directory for examples and file format.'
    out_str = 'Output (per file and overall system score) File Path. '+\
        'Default: Result file will stored in '+util.get_results_path()+' directory. '+\
        'Additional log files if generated will be stored in '+util.get_logs_path()
    clr_str = 'Desired forgiveness Collar for SAD evaluation. '+coll_inps_str+\
        ' Default collar length: 0.5 secs.'

    parser = argparse.ArgumentParser(description=desc)
    parser.add_argument('-ref',
                        '--ref',
                        type=str,
                        default=ref_def,
                        help=ref_str)
    parser.add_argument('-hyp',
                        '--hyp',
                        type=str,
                        default=hyp_def,
                        help=hyp_str)
    parser.add_argument('-out',
                        '--out',
                        type=str,
                        default=def_out_path,
                        help=out_str)
    parser.add_argument('-sadcollar',
                        '--sadcollar',
                        type=float,
                        default=0.5,
                        help=clr_str)
    args = parser.parse_args()

    ref_path = util.processInpPath(args.ref)
    hyp_path = util.processInpPath(args.hyp)
    out_path = util.processInpPath(args.out, inpType='file')
    sadcollar = proc_sad_collar(args.sadcollar)

    return ref_path, hyp_path, out_path, sadcollar