Exemple #1
0
    def _loadpath_word(self, path):
        """ Load from multiple files...
        """
        log.debug(
            unicode(self) + " loading transcriptions from multiple files.")

        wordlevel = {}

        for filename in type_files(os.listdir(path), LAB_EXT):

            with codecs.open(os.path.join(path, filename),
                             encoding="utf-8") as infh:
                text = infh.read()

            #parsing by assuming words are whitespace delimited:
            wordlist = text.split()
            if len(wordlist) == 0:
                raise Exception("File '%s' is empty..." %
                                (os.path.join(path, filename)))

            #assuming unique basenames... (a reasonable assumption)
            key = parse_path(filename)[2]
            if key in wordlevel:
                raise Exception("basename '%s' is not unique..." % (key))
            wordlevel[key] = " ".join(wordlist)

        if len(wordlevel) == 0:
            raise Exception("No transcriptions found in '%s'..." % (path))
        return wordlevel
Exemple #2
0
    def _loadpath_phone(self, path):
        """ Load from multiple files...
        """
        log.debug(
            unicode(self) + " loading transcriptions from multiple files.")

        phonelevel = {}
        boundaries = {}

        filenames = []
        try:
            for ext in Utterance.SUPPORTED_EXTS:
                filenames.extend(type_files(os.listdir(path), ext))
        except OSError:
            raise

        if len(set(filenames)) != len(filenames):
            raise Exception("Non unique basenames exist....")

        filenames.sort()

        for filename in filenames:
            key = parse_path(filename)[2]
            utt = Utterance(os.path.join(path, filename))
            phonelevel[key] = " ".join([entry[1] for entry in utt.entries])
            b = [float_to_htk_int(entry[0]) for entry in utt.entries]
            if all(b) == False:
                boundaries[key] = None
            else:
                boundaries[key] = b

        return phonelevel, boundaries
Exemple #3
0
    def _loadpath_word(self, path):
        """ Load from multiple files...
        """
        log.debug(unicode(self) + " loading transcriptions from multiple files.")

        wordlevel = {}

        for filename in type_files(os.listdir(path), LAB_EXT):

            with codecs.open(os.path.join(path, filename), encoding="utf-8") as infh:
                text = infh.read()

            #parsing by assuming words are whitespace delimited:
            wordlist = text.split()
            if len(wordlist) == 0:
                raise Exception("File '%s' is empty..."
                                % (os.path.join(path, filename)))
            
            #assuming unique basenames... (a reasonable assumption)
            key = parse_path(filename)[2]
            if key in wordlevel:
                raise Exception("basename '%s' is not unique..." % (key))
            wordlevel[key] = " ".join(wordlist)

        if len(wordlevel) == 0:
            raise Exception("No transcriptions found in '%s'..."
                            % (path))
        return wordlevel
Exemple #4
0
    def _loadpath_phone(self, path):
        """ Load from multiple files...
        """
        log.debug(unicode(self) + " loading transcriptions from multiple files.")

        phonelevel = {}
        boundaries = {}

        filenames = []
        try:
            for ext in Utterance.SUPPORTED_EXTS:
                filenames.extend(type_files(os.listdir(path), ext))
        except OSError:
            raise

        if len(set(filenames)) != len(filenames):
            raise Exception("Non unique basenames exist....")

        filenames.sort()
        
        for filename in filenames:
            key = parse_path(filename)[2]
            utt = Utterance(os.path.join(path, filename))
            phonelevel[key] = " ".join([entry[1] for entry in utt.entries])
            b = [float_to_htk_int(entry[0]) for entry in utt.entries]
            if all(b) == False:
                boundaries[key] = None
            else:
                boundaries[key] = b

        return phonelevel, boundaries
 def __init__(self, wavlocation, featsconflocation):
     """ Initialise...
     """
     if not os.path.isdir(wavlocation):
         raise Exception("'%s' is not an existing directory..." % wavlocation)
     
     log.debug(unicode(self) + " loading audio files at '%s'." % (wavlocation))
     self.wavlocation = wavlocation
     self.wavfilelist = type_files(os.listdir(self.wavlocation), WAVE_EXT)
     self.wavfilelist.sort()
     self.hcopy_parms = self._loadFeatConf(featsconflocation)
    def __init__(self, wavlocation, featsconflocation):
        """ Initialise...
        """
        if not os.path.isdir(wavlocation):
            raise Exception("'%s' is not an existing directory..." %
                            wavlocation)

        log.debug(
            unicode(self) + " loading audio files at '%s'." % (wavlocation))
        self.wavlocation = wavlocation
        self.wavfilelist = type_files(os.listdir(self.wavlocation), WAVE_EXT)
        self.wavfilelist.sort()
        self.hcopy_parms = self._loadFeatConf(featsconflocation)