def init_directory(self): # init directory for save the audio make_dir(os.path.join(self.audio_dir, self.tts.getName())) # init directory for save the transcription for asr in self.asrs: make_dir( os.path.join(self.transcription_dir, self.tts.getName(), asr.getName()))
def get_outputfile_for_failed_test_case(self): asrs_dir = "_".join([asr.getName() for asr in self.asrs]) result_dir = os.path.join( self.output_dir, "result", self.tts.getName(), asrs_dir, f"num_iteration_{self.num_iteration}", f"text_batch_size_{self.text_batch_size if self.text_batch_size else 'global' }" ) make_dir(result_dir) experiment_name = f"with-estimator-{self.estimator.getName().replace('/','-')}" if self.estimator else "without-estimator" return os.path.join(result_dir, experiment_name + ".json")
def saveFailedTestCases(self, processed_texts, cases): failed_test_case_dir = os.path.join(self.output_dir, "failed_test_cases", self.tts.getName(), self.target_asr) make_dir(failed_test_case_dir) ids = self.get_id_only(processed_texts) input_texts = self.get_text_only(processed_texts) source_audio_dir = os.path.join(self.audio_dir, self.tts.getName()) for input_text, filename, case in zip(input_texts, ids, cases): if case[self.target_asr] == FAILED_TEST_CASE: src_audio_fpath = source_audio_dir + f"/{filename}.wav" trgt_audio_fpath = failed_test_case_dir + f"/{filename}.wav" os.system(f"cp {src_audio_fpath} {trgt_audio_fpath}") ground_truth_file = failed_test_case_dir + f"/{filename}.txt" f = open(ground_truth_file, 'w+') f.write(input_text) f.close()
def generate(tts_name: str, corpus_path: str, data_dir: str, execution_time_dir:str): tts = create_tts_by_name(tts_name) audio_dir = os.path.join(data_dir, AUDIO_DIR) execution_time_dir = os.path.join(execution_time_dir, AUDIO_DIR, tts_name) make_dir(execution_time_dir) corpus = read_corpus(corpus_path) for i in range(0, 3) : c = corpus[i] text = c.getText() filename = c.getId() start = time.time() tts.generateAudio(text=text, audio_dir=audio_dir, filename=filename) end = time.time() execution_time = end - start fpath = os.path.join(execution_time_dir, filename + ".txt") save_execution_time(fpath=fpath, execution_time=execution_time) print(f"Generate {i}") i += 1 if tts_name in ["google"]: random_number = float(random.randint(15, 40))/10. time.sleep(random_number)
df = df.reset_index(drop=True) print("get sample: " + str(datetime.now())) # get sample data df = df.sample(frac=1, random_state=seed).reset_index(drop=True) N = 20000 sample_df = get_sample_data(df, int(2 * N)) print("preprocess data: " + str(datetime.now())) # text preprocessing data = preprocess_data(sample_df, N) print("write data: " + str(datetime.now())) # prepare folder to save the data directory = config["output_dir"] if len(config["corpus_fpath"].split("/")) > 1: directory = os.path.join( directory, "/".join(config["corpus_fpath"].split("/")[:-1])) make_dir(directory) outfile = os.path.join(config["output_dir"], config["corpus_fpath"]) file = open(outfile, "w+") for s in data: file.write("%s\n" % s) file.close()
def saveTranscription(self, transcription_dir: str, filename: str): transcription_dir = os.path.join(transcription_dir, self.getName()) make_dir(transcription_dir) transcription_path = os.path.join(transcription_dir, filename + ".txt") with open(transcription_path, "w+") as f: f.write(self.getTranscription())
def processText(self, text: str, filename: str): """ Run CrossASR on a single text Description: Given a sentence as input, the program will generate a test case. The program needs some parameters, i.e. a TTS and ASRs used :params text: :params filename: :returns case: :returns execution time: """ execution_time = 0. directory = os.path.join(self.execution_time_dir, AUDIO_DIR, self.getTTS().getName()) make_dir(directory) time_for_generating_audio_fpath = os.path.join(directory, filename + ".txt") audio_fpath = self.getTTS().getAudioPath(text=text, audio_dir=self.audio_dir, filename=filename) if self.recompute or not os.path.exists(audio_fpath): # print(audio_fpath) start_time = time.time() self.getTTS().generateAudio(text=text, audio_fpath=audio_fpath) save_execution_time(fpath=time_for_generating_audio_fpath, execution_time=time.time() - start_time) ## add execution time for generating audio execution_time += get_execution_time( fpath=time_for_generating_audio_fpath) transcription_dir = os.path.join(self.transcription_dir, self.getTTS().getName()) transcriptions = {} for asr in self.asrs: directory = os.path.join(self.execution_time_dir, TRANSCRIPTION_DIR, self.getTTS().getName(), asr.getName()) make_dir(directory) time_for_recognizing_audio_fpath = os.path.join( directory, filename + ".txt") if self.recompute: start_time = time.time() # TODO: # change recognize audio -> input audio instead of fpath # audio = asr.loadAudio(audio_fpath=audio_fpath) # transcription = asr.recognizeAudio(audio=audio) # asr.saveTranscription(transcription_fpath, transcription) transcription = asr.recognizeAudio(audio_fpath=audio_fpath) asr.setTranscription(transcription) asr.saveTranscription(transcription_dir=transcription_dir, filename=filename) save_execution_time(fpath=time_for_recognizing_audio_fpath, execution_time=time.time() - start_time) transcription = asr.loadTranscription( transcription_dir=transcription_dir, filename=filename) num_retry = 0 while transcription == "" and num_retry < self.max_num_retry: start_time = time.time() asr.recognizeAudio(audio_fpath=audio_fpath) asr.saveTranscription(transcription_dir=transcription_dir, filename=filename) save_execution_time(fpath=time_for_recognizing_audio_fpath, execution_time=time.time() - start_time) transcription = asr.loadTranscription( transcription_dir=transcription_dir, filename=filename) if asr.getName() == "wit": random_number = float(random.randint(9, 47)) / 10. time.sleep(random_number) num_retry += 1 transcriptions[asr.getName()] = preprocess_text(transcription) ## add execution time for generating audio execution_time += get_execution_time( fpath=time_for_recognizing_audio_fpath) cases = self.caseDeterminer(text, transcriptions) # if sum(cases.values()) == 0 : # print(text) # print(transcriptions["wav2vec2"]) # print(cases) # print() for asr_name, case in cases.items(): self.saveCase(self.case_dir, self.getTTS().getName(), asr_name, filename, str(case)) # print(f"Execution time: {execution_time}") return cases, execution_time
class CrossASR: def __init__(self, tts: TTS, asrs: [ASR], output_dir: "", target_asr=None, recompute=False, num_iteration=5, time_budget=3600, max_num_retry=0, text_batch_size=None, seed=None, estimator=None): self.tts = tts self.asrs = asrs self.target_asr = target_asr self.output_dir = output_dir self.audio_dir = os.path.join(output_dir, DATA_DIR, AUDIO_DIR) self.transcription_dir = os.path.join(output_dir, DATA_DIR, TRANSCRIPTION_DIR) self.init_directory() ## TODO: make init directory for execution time and case self.execution_time_dir = os.path.join(output_dir, EXECUTION_TIME_DIR) self.case_dir = os.path.join(output_dir, CASE_DIR) self.recompute = recompute self.num_iteration = num_iteration self.time_budget = time_budget self.max_num_retry = max_num_retry self.text_batch_size = text_batch_size self.estimator = estimator self.outputfile_failed_test_case = self.get_outputfile_for_failed_test_case( ) if seed: crossasr.utils.set_seed(seed) ## TODO: convert print into global logging def init_directory(self): # init directory for save the audio make_dir(os.path.join(self.audio_dir, self.tts.getName())) # init directory for save the transcription for asr in self.asrs: make_dir( os.path.join(self.transcription_dir, self.tts.getName(), asr.getName())) def get_outputfile_for_failed_test_case(self): asrs_dir = "_".join([asr.getName() for asr in self.asrs]) result_dir = os.path.join( self.output_dir, "result", self.tts.getName(), asrs_dir, f"num_iteration_{self.num_iteration}", f"text_batch_size_{self.text_batch_size if self.text_batch_size else 'global' }" ) make_dir(result_dir) experiment_name = f"with-estimator-{self.estimator.getName().replace('/','-')}" if self.estimator else "without-estimator" return os.path.join(result_dir, experiment_name + ".json") def getTTS(self): return self.tts def setTTS(self, tts: TTS): self.tts = tts def getASRS(self): return self.asrs def addASR(self, asr: ASR): for curr_asr in self.asrs: if asr.getName() == curr_asr.getName(): # asr is already on the list of asrs return self.asrs.append(asr) def removeASR(self, asr_name: str): for i, asr in enumerate(self.asrs): if asr_name == asr.getName(): break del self.asrs[i] def getOutputDir(self): return self.audio_dir def setOutputDir(self, output_dir: str): self.output_dir = output_dir self.audio_dir = os.path.join(output_dir, DATA_DIR, AUDIO_DIR) self.transcription_dir = os.path.join(output_dir, DATA_DIR, TRANSCRIPTION_DIR) self.execution_time_dir = os.path.join(output_dir, EXECUTION_TIME_DIR) self.case_dir = os.path.join(output_dir, CASE_DIR) def caseDeterminer(self, text: str, transcriptions: str): # word error rate wers = {} is_determinable = False for k, transcription in transcriptions.items(): word_error_rate = wer(text, transcription) wers[k] = word_error_rate if word_error_rate == 0: is_determinable = True case = {} if is_determinable: for k in transcriptions.keys(): if wers[k] == 0: case[k] = SUCCESSFUL_TEST_CASE else: case[k] = FAILED_TEST_CASE else: for k in transcriptions.keys(): case[k] = INDETERMINABLE_TEST_CASE return case def saveCase(self, case_dir: str, tts_name: str, asr_name: str, filename: str, case: str): case_dir = os.path.join(case_dir, tts_name, asr_name) make_dir(case_dir) fpath = os.path.join(case_dir, filename + ".txt") file = open(fpath, "w+") file.write(case) file.close()