def align_json(ref_txt, json_file, filename=None): """ CLI for forced alignment tools Using a reference txt file and a hypothesis gk json file, this time-aligns the reference txt file and outputs an STM file Input ref_txt, str - reference text file containing ground truth json_file, str - hypothesis gk JSON file filename, str - output STM filename """ ref_tokens = preprocess_txt.parse_transcript(ref_txt) gk_json = preprocess_gk_json.preprocess_transcript(json_file) segments = align(gk_json, ref_tokens) if filename is None: filename = basename(sanitize(strip_extension(ref_txt))) + ".stm" # fix segment filename and speaker for seg in segments: seg.filename = strip_extension(filename) seg.speaker = strip_extension(filename) + "UnknownSpeaker" output = time_aligned_text() output.segments = segments output.write(filename)
def validate(self): " validate exemplar object by constraining that the filenames before the extension are the same " audio_filename = basename(strip_extension(self.audio_file.location)) transcript_filename = basename( strip_extension(self.transcript_file.location)) # Audio and transcript filename must match # Audio file must not be empty # Transcript file must not be empty valid = (audio_filename == transcript_filename and os.path.getsize(self.audio_file.location) and os.path.getsize(self.transcript_file.location)) return valid
def combine_transcripts(transcripts, output_file_name): # Get one list of segments out_transcript = reduce(operator.add, transcripts) out_transcript.location = os.path.join( strip_extension(output_file_name) + "." + out_transcript.file_extension ) out_transcript.write(out_transcript.location)
def __init__(self, *args, **kwargs): """ Initialize from location and populate list of SPH, WAV, or MP3 audio files and STM files into segments """ for dictionary in args: if isinstance(dictionary, dict): for key in dictionary: setattr(self, key, dictionary[key]) for key in kwargs: setattr(self, key, kwargs[key]) # only if not defined above should we search for exemplars # based on location if not self.exemplars: # instantiate exemplars for this object to override # static class variable self.exemplars = [] audio_extensions_to_try = ["sph", "wav", "mp3"][::-1] self.exemplars += [ exemplar({ "audio_file": audio_file(fl), "transcript_file": time_aligned_text(strip_extension(fl) + ".stm"), }) for audio_extension in audio_extensions_to_try for fl in (get_files(self.location, audio_extension) if self. location else []) if (os.path.exists(strip_extension(fl) + ".stm")) ] # gather all exemplars from /stm and /sph subdirectories if present self.exemplars += [ exemplar({ "audio_file": audio_file(fl), "transcript_file": time_aligned_text(self.location + "/stm/" + basename(strip_extension(fl)) + ".stm"), }) for audio_extension in audio_extensions_to_try for fl in (get_files(self.location + "/sph/", audio_extension) if self.location else []) if (os.path.exists(self.location + "/stm/" + basename(strip_extension(fl)) + ".stm")) ]
def validate(self): """ Validates exemplar object by constraining that the filenames before the extension are the same """ audio_filename = basename(strip_extension(self.audio_file.location)) transcript_filename = basename( strip_extension(self.transcript_file.location)) # Audio and transcript filename must match # Audio file must not be empty # Transcript file must not be empty valid = (audio_filename == transcript_filename and os.path.getsize(self.audio_file.location) and os.path.getsize(self.transcript_file.location)) # This returns an integer corresponding to the output of the last condition, not a boolean. # Thats just how `and` works in python return bool(valid)
def extract_xlsx(filename, target_folder): """ For an excel spreadsheet, extract to a text file """ working_excel_data_structure = pd.ExcelFile(filename) raw_name = sanitize(strip_extension(basename(filename))) with open(''.join([target_folder, '/', raw_name, ".txt"]), 'a+') as output_file: for sheet in working_excel_data_structure.sheet_names: dump_sheet(output_file, working_excel_data_structure.parse(sheet).values)
def prepare_for_training(self, file_name, sample_rate=16000): """ Converts to single channel (from channel 1) audio file in SPH file format """ if file_name.split(".")[-1] != 'sph': print("Forcing training data to use SPH file format") file_name = strip_extension(file_name) + ".sph" file_name = sanitize_hyphens(file_name) subprocess.call(["sox {} {} rate {} remix -".format(self.location, file_name, sample_rate)], shell=True) # return new object return audio_file(file_name)
def prepare_for_training(self, file_name, sample_rate=16000): """ Converts to single channel (from channel 1) audio file in SPH file format Returns audio_file object on success, else None """ if file_name.split(".")[-1] != "sph": LOGGER.warning( "Forcing training data to use SPH file format for %s", file_name) file_name = strip_extension(file_name) + ".sph" file_name = sanitize_hyphens(file_name) # return None if error code given, otherwise return audio_file object output_file = (audio_file(file_name) if not subprocess.call( "sox -V1 {} {} rate {} remix -".format(self.location, file_name, sample_rate), shell=True, ) else None) return output_file