def score_folder_SD(fileList, fileDict, diarcollar, write_msg, out_path): sctk_path = util.get_fs02sctk_path() py_val_path = sctk_path + 'scutils/dscore/validate_rttm.py' py_score_path = sctk_path + 'scutils/dscore/score.py' log_write_path = util.get_logs_path() + util.get_bname(out_path) + '.log' derDict = {} log_list = [] unsuc_files = [] for fn in fileList: ref_rttm = fileDict['ref'][fn] hyp_rttm = fileDict['hyp'][fn] ref_uem = ref_rttm.replace('/RTTM/', '/UEM/').replace('.rttm', '.uem') der, curr_log = score_file_SAD(py_val_path, py_score_path, fn, ref_rttm, hyp_rttm, ref_uem, diarcollar) if der != 'NaN': derDict[fn] = der else: unsuc_files.append(fn) log_list += curr_log with open(log_write_path, 'w') as file: file.write('\n'.join(log_list)) print('\n\nLog File for SD Task - DER evaluation', 'written to path:\n\t', log_write_path, '\n\n') write_msg.append('\n\n') write_msg.append('Number of Files to be Evaluated:' + str(len(fileList)) + '\n\n') write_msg.append('Number of Files Successfully Evaluated:' + str(len(derDict)) + '\n\n') if len(unsuc_files) > 0: write_msg.append('Files that could not be evaluated:\n\t' + ' '.join(unsuc_files)) return derDict, write_msg
def parse_arguments(): sctk_path = util.get_fs02sctk_path() def_out_path = util.get_results_path( ) + 'SID_TopN_Result_' + util.getDateTimeStrStamp() + '.txt' desc='Wrapper File to generate Top-N Accuracy Scores for FS02 Challenge SID Task.' +\ 'For more information regarding scoring input and hypothesis files, '+\ 'refer below arguments description.' ref_mp = 'egs/ref_gt/SID/FS01_SID_uttID2spkID_Dev.txt' hyp_mp = 'egs/sys_results/SID/FS01_SID_uttID2spkID_Dev.txt' ref_def = sctk_path + ref_mp hyp_def = sctk_path + hyp_mp ref_str = 'Reference (ground truth) File Path. '+\ 'This file must be the SID ground truth file. '+\ 'Please refer ./'+ref_mp+' file for example.' hyp_str = 'Hypothesis (system output) File Path. '+\ 'This directory must include only SAD system output files. '+\ 'Please refer ./'+hyp_mp+' file for example and file format.' out_str = 'Output (overall system score) File Path. '+\ 'Default: Result file will stored in '+util.get_results_path()+' directory. '+\ 'Additional log files will be stored in '+util.get_logs_path() clr_str = 'Desired Top-N Accuracy for SID evaluation. '+\ 'Default: Top-5 Accuracy.' parser = argparse.ArgumentParser(description=desc) parser.add_argument('-ref', '--ref', type=str, default=ref_def, help=ref_str) parser.add_argument('-hyp', '--hyp', type=str, default=hyp_def, help=hyp_str) parser.add_argument('-out', '--out', type=str, default=def_out_path, help=out_str) parser.add_argument('-topN', '--topN', type=int, default=5, help=clr_str) args = parser.parse_args() ref_path = util.processInpPath(args.ref, inpType='file', checkExists=True) hyp_path = util.processInpPath(args.hyp, inpType='file', checkExists=True) out_path = util.processInpPath(args.out, inpType='file') max_TopN = validate_hyp_file(hyp_path) topN_num = proc_topN_inp(args.topN, max_TopN) return ref_path, hyp_path, out_path, topN_num
def score_SID(fileList, fileDict, topN_num, write_msg, out_path): clsfd_Dict = { n: { 'corr': [], 'incorr': [] } for n in range(1, topN_num + 1) } for fn in fileList: ref_str = fileDict['ref'][fn] hyp_str_list = fileDict['hyp'][fn] for topn in clsfd_Dict: hyp_topn = hyp_str_list[:topn] if ref_str in hyp_topn: clsfd_Dict[topn]['corr'].append(fn) else: clsfd_Dict[topn]['incorr'].append(fn) topNDict = { n: round((100.0 * len(clsfd_Dict[n]['corr'])) / len(fileList), 3) for n in clsfd_Dict } write_msg.append( 'Individual Results (per file) written to following paths:\n') strz = '\t' + '*' * 40 + '\n' for n in clsfd_Dict: write_path = util.get_logs_path() + util.get_bname( out_path) + '.Top-' + str(n) write_msg.append(write_path) write_list = [ strz + '\tPer File SID Top-' + str(n) + ' Accuracy Results\n' + strz ] write_list.append('\n\nCorrect Predictions:\n' + ' '.join(clsfd_Dict[n]['corr'])) write_list.append('\n\nIncorrect Predictions:\n' + ' '.join(clsfd_Dict[n]['incorr'])) write_list.append('\n') util.writeList(write_list, write_path, isOverWrite=True) write_msg.append('\n\n\n') write_msg.append(strz + '\tTop-N Acurracy System Evaluation Results:\n' + strz) for n in topNDict: write_msg.append('\tTop-' + str(n) + ' Accuracy : ' + str(topNDict[n]) + ' %') write_msg.append(strz + '\n') return topNDict, write_msg
def parse_arguments(): sctk_path = util.get_fs02sctk_path() def_out_path = util.get_results_path( ) + 'SD_DER_Result_' + util.getDateTimeStrStamp() + '.txt' coll_inps_str = 'Allowed Inputs: 0, 0.25, 0.5, 1, 2' desc='Wrapper File to generate DER Scores for FS02 Challenge SD (track1 and track2) Tasks.' +\ 'Scoring mechanism for both tracks will be the same. For more '+\ 'information regarding scoring input and hypothesis files, refer '+\ 'below arguments description. Open-Source Software Credits: '+\ 'This script uses dscore toolkit developed by Neville Ryant for '+\ 'generating DER scores. for more info, refer: (https://github.com/nryant/dscore)' ref_mp = 'egs/ref_gt/SD/' hyp_mp = 'egs/sys_results/SD/' ref_def = sctk_path + ref_mp hyp_def = sctk_path + hyp_mp ref_str = 'Reference (ground truth) Directory Path. '+\ 'This directory must include only SD ground truth RTTM '+\ 'and UEM folders. Please refer ./'+ref_mp+' directory for examples.' hyp_str = 'Hypothesis (system output) Directory Path. '+\ 'This directory must include only diarization system output RTTM files. '+\ 'Please refer ./'+hyp_mp+' directory for examples and file format.' out_str = 'Output (per file and overall system score) File Path. '+\ 'Default: Result file will stored in '+util.get_results_path()+' directory. '+\ 'Additional log files if generated will be stored in '+util.get_logs_path() clr_str = 'Desired forgiveness Collar for SD evaluation. '+coll_inps_str+\ ' Default collar length: 0.25 secs.' parser = argparse.ArgumentParser(description=desc) parser.add_argument('-ref', '--ref', type=str, default=ref_def, help=ref_str) parser.add_argument('-hyp', '--hyp', type=str, default=hyp_def, help=hyp_str) parser.add_argument('-out', '--out', type=str, default=def_out_path, help=out_str) parser.add_argument('-diarcollar', '--diarcollar', type=float, default=0.25, help=clr_str) args = parser.parse_args() ref_path = proc_sd_ref_files(util.processInpPath(args.ref)) hyp_path = util.processInpPath(args.hyp) out_path = util.processInpPath(args.out, inpType='file') diarcollar = proc_sd_collar(args.diarcollar) return ref_path, hyp_path, out_path, diarcollar
def parse_arguments(): sctk_path = util.get_fs02sctk_path() def_out_path = util.get_results_path()+'ASR_WER_Result_'+util.getDateTimeStrStamp()+'.txt' desc='Wrapper File to generate WER Scores for FS02 Challenge ASR (track1 and track2) Task.' +\ 'Scoring mechanism for both tracks is the same, but the system output '+\ 'hypothesis file/folder expected from the user will be different. '+\ '(folder with json files for track-1, and a plain text file for track-2)'+\ 'For more information regarding scoring input and hypothesis files, '+\ 'refer below arguments description. Open-Source Software Credits: '+\ 'This script uses compute-wer tool from the Kaldi Speech Recognition '+\ 'Toolkit. for more info, refer: (http://kaldi-asr.org/doc/tools.html)' ref_mp = 'egs/ref_gt/ASR/ASR_track' hyp_mp = 'egs/sys_results/ASR/ASR_track' ref_def = sctk_path+ref_mp hyp_def = sctk_path+hyp_mp ref_str = 'Reference (ground truth) Path. '+\ '(Directory Path for Track-1, and File Path for Track-2) '+\ 'Directory Path for Track-1 must include only ASR ground truth files. '+\ 'For ASR_track1: directory containing json format ground truth files required. '+\ 'Please refer ./'+ref_mp+'1/ directory for examples. '+\ 'For ASR_track2: kaldi "text" file. Refer: { https://kaldi-asr.org/doc/data_prep.html#data_prep_data }. '+\ ' file contents of File Path for Track-2 must include only FS02_ASR_track2 '+\ 'file-names followed by associated transcripts (like in Kaldi "text" format)'+\ 'Please refer ./'+ref_mp+'2/ directory for examples.' hyp_str = 'Hypothesis (system output) Directory/File Path. '+\ '(Directory Path for Track-1, and File Path for Track-2) '+\ 'Directory Path for Track-1 must include only FS02-ASR system output files. '+\ 'For ASR_track1: directory containing json format system output files required. '+\ 'Please refer ./'+ref_mp+'1/ directory for examples and file format. '+\ 'For ASR_track2: kaldi "text" file. Refer: { https://kaldi-asr.org/doc/data_prep.html#data_prep_data }. '+\ 'file contents of File Path for Track-2 must include only FS02_ASR_track2 '+\ 'file-names followed by associated transcripts (like in Kaldi "text" format)'+\ 'Please refer ./'+ref_mp+'2/ directory for examples and file format.' out_str = 'Output (overall system score) File Path. '+\ 'Default: Result file will stored in '+util.get_results_path()+' directory. '+\ 'Additional log files if generated will be stored in '+util.get_logs_path() trk_str = 'Track number of the ASR Task to be evaluated. '+\ 'Input Options: (as string) "1" or "2"' kld_str = 'base path to the locally installed kaldi directory. '+\ 'e.g. /home/crss/kaldi. This argument is required for' parser = argparse.ArgumentParser(description=desc) parser.add_argument('-track', '--track', type=str, default='1', help=trk_str) parser.add_argument('-kaldi','--kaldi', type=str, required=True, help=kld_str) parser.add_argument('-ref', '--ref', type=str, default=ref_def, help=ref_str) parser.add_argument('-hyp', '--hyp', type=str, default=hyp_def, help=hyp_str) parser.add_argument('-out', '--out', type=str, default=def_out_path, help=out_str) args = parser.parse_args() track_num = proc_track_num(args.track) kaldi_path = util.processInpPath(args.kaldi) if args.ref == ref_def or args.hyp == hyp_def: ad = str(track_num)+'/' t2_name = 'FS01_ASR_track2_transcriptions_Dev' print('ref or hyp paths are either not provided, or match default paths.') print('Running Script on default example for Track-',str(track_num)) if track_num == 2: args.ref = ref_def+ad+t2_name args.hyp = hyp_def+ad+t2_name else: args.ref = ref_def+ad args.hyp = hyp_def+ad if track_num == 2: ref_path = util.processInpPath(args.ref, inpType='file', checkExists=True) hyp_path = util.processInpPath(args.hyp, inpType='file', checkExists=True) else: ref_path = util.processInpPath(args.ref) hyp_path = util.processInpPath(args.hyp) out_path = util.processInpPath(args.out, inpType='file') return ref_path, hyp_path, out_path, track_num, kaldi_path
def parse_arguments(): sctk_path = util.get_fs02sctk_path() def_out_path = util.get_results_path( ) + 'SAD_DCF_Result_' + util.getDateTimeStrStamp() + '.txt' coll_inps_str = 'Allowed Inputs: 0, 0.25, 0.5, 1, 2' desc='Wrapper File to generate DCF Scores for FS02 Challenge SAD Task.' +\ 'For more information regarding scoring input and hypothesis files, '+\ 'refer below arguments description. '+\ 'Open-Source Software Credits: This script uses scoreFile_SAD.pl '+\ 'developed by NIST. for more info, refer: (https://www.nist.gov/'+\ 'itl/iad/mig/nist-open-speech-activity-detection-evaluation)' ref_mp = 'egs/ref_gt/SAD/' hyp_mp = 'egs/sys_results/SAD/' ref_def = sctk_path + ref_mp hyp_def = sctk_path + hyp_mp ref_str = 'Reference (ground truth) Directory Path. '+\ 'This directory must include only SAD ground truth files. '+\ 'Please refer ./'+ref_mp+' directory for examples.' hyp_str = 'Hypothesis (system output) Directory Path. '+\ 'This directory must include only SAD system output files. '+\ 'Please refer ./'+hyp_mp+' directory for examples and file format.' out_str = 'Output (per file and overall system score) File Path. '+\ 'Default: Result file will stored in '+util.get_results_path()+' directory. '+\ 'Additional log files if generated will be stored in '+util.get_logs_path() clr_str = 'Desired forgiveness Collar for SAD evaluation. '+coll_inps_str+\ ' Default collar length: 0.5 secs.' parser = argparse.ArgumentParser(description=desc) parser.add_argument('-ref', '--ref', type=str, default=ref_def, help=ref_str) parser.add_argument('-hyp', '--hyp', type=str, default=hyp_def, help=hyp_str) parser.add_argument('-out', '--out', type=str, default=def_out_path, help=out_str) parser.add_argument('-sadcollar', '--sadcollar', type=float, default=0.5, help=clr_str) args = parser.parse_args() ref_path = util.processInpPath(args.ref) hyp_path = util.processInpPath(args.hyp) out_path = util.processInpPath(args.out, inpType='file') sadcollar = proc_sad_collar(args.sadcollar) return ref_path, hyp_path, out_path, sadcollar