def _loadpath_word(self, path): """ Load from multiple files... """ log.debug( unicode(self) + " loading transcriptions from multiple files.") wordlevel = {} for filename in type_files(os.listdir(path), LAB_EXT): with codecs.open(os.path.join(path, filename), encoding="utf-8") as infh: text = infh.read() #parsing by assuming words are whitespace delimited: wordlist = text.split() if len(wordlist) == 0: raise Exception("File '%s' is empty..." % (os.path.join(path, filename))) #assuming unique basenames... (a reasonable assumption) key = parse_path(filename)[2] if key in wordlevel: raise Exception("basename '%s' is not unique..." % (key)) wordlevel[key] = " ".join(wordlist) if len(wordlevel) == 0: raise Exception("No transcriptions found in '%s'..." % (path)) return wordlevel
def _loadpath_phone(self, path): """ Load from multiple files... """ log.debug( unicode(self) + " loading transcriptions from multiple files.") phonelevel = {} boundaries = {} filenames = [] try: for ext in Utterance.SUPPORTED_EXTS: filenames.extend(type_files(os.listdir(path), ext)) except OSError: raise if len(set(filenames)) != len(filenames): raise Exception("Non unique basenames exist....") filenames.sort() for filename in filenames: key = parse_path(filename)[2] utt = Utterance(os.path.join(path, filename)) phonelevel[key] = " ".join([entry[1] for entry in utt.entries]) b = [float_to_htk_int(entry[0]) for entry in utt.entries] if all(b) == False: boundaries[key] = None else: boundaries[key] = b return phonelevel, boundaries
def _loadpath_word(self, path): """ Load from multiple files... """ log.debug(unicode(self) + " loading transcriptions from multiple files.") wordlevel = {} for filename in type_files(os.listdir(path), LAB_EXT): with codecs.open(os.path.join(path, filename), encoding="utf-8") as infh: text = infh.read() #parsing by assuming words are whitespace delimited: wordlist = text.split() if len(wordlist) == 0: raise Exception("File '%s' is empty..." % (os.path.join(path, filename))) #assuming unique basenames... (a reasonable assumption) key = parse_path(filename)[2] if key in wordlevel: raise Exception("basename '%s' is not unique..." % (key)) wordlevel[key] = " ".join(wordlist) if len(wordlevel) == 0: raise Exception("No transcriptions found in '%s'..." % (path)) return wordlevel
def _loadpath_phone(self, path): """ Load from multiple files... """ log.debug(unicode(self) + " loading transcriptions from multiple files.") phonelevel = {} boundaries = {} filenames = [] try: for ext in Utterance.SUPPORTED_EXTS: filenames.extend(type_files(os.listdir(path), ext)) except OSError: raise if len(set(filenames)) != len(filenames): raise Exception("Non unique basenames exist....") filenames.sort() for filename in filenames: key = parse_path(filename)[2] utt = Utterance(os.path.join(path, filename)) phonelevel[key] = " ".join([entry[1] for entry in utt.entries]) b = [float_to_htk_int(entry[0]) for entry in utt.entries] if all(b) == False: boundaries[key] = None else: boundaries[key] = b return phonelevel, boundaries
def __init__(self, wavlocation, featsconflocation): """ Initialise... """ if not os.path.isdir(wavlocation): raise Exception("'%s' is not an existing directory..." % wavlocation) log.debug(unicode(self) + " loading audio files at '%s'." % (wavlocation)) self.wavlocation = wavlocation self.wavfilelist = type_files(os.listdir(self.wavlocation), WAVE_EXT) self.wavfilelist.sort() self.hcopy_parms = self._loadFeatConf(featsconflocation)
def __init__(self, wavlocation, featsconflocation): """ Initialise... """ if not os.path.isdir(wavlocation): raise Exception("'%s' is not an existing directory..." % wavlocation) log.debug( unicode(self) + " loading audio files at '%s'." % (wavlocation)) self.wavlocation = wavlocation self.wavfilelist = type_files(os.listdir(self.wavlocation), WAVE_EXT) self.wavfilelist.sort() self.hcopy_parms = self._loadFeatConf(featsconflocation)