def __init__(self, dictfilename, unkstamp=u"UNK", nodump=False): """ Constructor. @param dictfilename is the dictionary file name (HTK-ASCII format) @param unkstamp is the string used to represent a missing pronunciation """ # Symbol to represent missing entries in the dictionary # (also called unknown entries) self._unkstamp = unkstamp self._filename = dictfilename # The pronunciation dictionary self._dict = {} if dictfilename is not None: data = None if nodump is False: # Try first to get the dict from a dump file (at least 2 times faster) data = rutils.load_from_dump( dictfilename ) # Load from ascii if: 1st load, or, dump load error, or dump older than ascii if data is None: self.load_from_ascii( dictfilename ) if nodump is False: rutils.save_as_dump( self._dict, dictfilename ) logging.info('Get dictionary from ASCII file.') else: self._dict = data logging.info('Get dictionary from dumped file.')
def __init__(self, filename=None, nodump=False, casesensitive=False): """ Create a WordsList instance. @param filename (str) is the word list file name, i.e. a file with 1 column. @param nodump (Boolean) allows to disable the creation of a dump file. @param casesensitive (Boolean) the list of word is case-sensitive or not """ self._stw = {} # a list is enough but a dictionary is used because: # with a dictionary it is faster to read tokens from a file and is also # faster to find a token in it! # Set the list of word to be case-sensitive or not. self.casesensitive = casesensitive if filename is not None: data = None #if nodump is False: # Try first to get the dict from a dump file # (at least 2 times faster than the ascii one) data = rutils.load_from_dump( filename ) # Load from ascii if: 1st load, or, dump load error, or dump older than ascii if data is None: self.load_from_ascii( filename ) if nodump is False: rutils.save_as_dump( self._stw, filename ) logging.info('Got word list from ASCII file.') else: self._stw = data logging.info('Got word list from dumped file.')
def __init__(self, dictfilename=None, nodump=False): """ Constructor. @param dictfilename is the dictionary file name (2 columns) @param nodump (Boolean) disable the creation of a dump file """ self._dict = {} if dictfilename is not None: data = None if nodump is False: # Try first to get the dict from a dump file (at least 2 times faster) data = rutils.load_from_dump( dictfilename ) # Load from ascii if: 1st load, or, dump load error, or dump older than ascii if data is None: self.load_from_ascii( dictfilename ) if nodump is False: rutils.save_as_dump( self._dict, dictfilename ) else: self._dict = data
def __init__(self, filename=None, nodump=False): """ Create a new WordsList instance. @param filename is the word list file name (1 column) @param nodump (Boolean) disable the creation of a dump file """ self._stw = {} # a list is enough but: # a dictionary is used because it is faster (to read tokens from # a file / to find a token in this list). if filename is not None: data = None if nodump is False: # Try first to get the dict from a dump file (at least 2 times faster) data = rutils.load_from_dump( filename ) # Load from ascii if: 1st load, or, dump load error, or dump older than ascii if data is None: self.load_from_ascii( filename ) if nodump is False: rutils.save_as_dump( self._stw, filename ) logging.info('Get word list from ASCII file.') else: self._stw = data logging.info('Get word list from dumped file.')
def __init__(self, dictfilename=None, nodump=False): """ Constructor. @param dictfilename is the dictionary file name (2 columns) @param nodump (Boolean) disable the creation of a dump file """ # Symbol to represent missing entries in the dictionary # (also called unknown entries) self._filename = dictfilename # The replacements dictionary self._dict = {} if dictfilename is not None: data = None if nodump is False: # Try first to get the dict from a dump file (at least 2 times faster) data = rutils.load_from_dump( dictfilename ) # Load from ascii if: 1st load, or, dump load error, or dump older than ascii if data is None: self.load_from_ascii( dictfilename ) if nodump is False: rutils.save_as_dump( self._dict, dictfilename ) logging.info('Get dictionary from ASCII file.') else: self._dict = data logging.info('Get dictionary from dumped file.')
def __init__(self, filename=None, nodump=True): """ Constructor. """ self._sum = 0 self._dict = {} if filename is not None: data = None if nodump is False: # Try first to get the dict from a dump file (at least 2 times faster) data = rutils.load_from_dump( filename ) # Load from ascii if: 1st load, or, dump load error, or dump older than ascii if data is None: self.load_from_ascii( filename ) if nodump is False: rutils.save_as_dump( self._dict, filename ) else: self._dict = data