def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'confusion' super().verifysettings() if 'algorithm' not in self.settings: self.settings['algorithm'] = 1 if 'leftcontext' not in self.settings: self.settings['leftcontext'] = 3 if 'rightcontext' not in self.settings: self.settings['rightcontext'] = 3 if 'threshold' not in self.settings: self.settings['threshold'] = 0.8 if 'minocc' not in self.settings: self.settings['minocc'] = 5 self.hapaxer = gethapaxer(self, self.settings) if 'suffixes' not in self.settings: raise Exception("No suffixes specified for " + self.id + "!") self.suffixes = sorted( self.settings['suffixes'], key=lambda x: -1 * len(x)) #sort from long to short #settings for computation of confusible list if 'freqthreshold' not in self.settings: self.settings['freqthreshold'] = 20 if 'maxlength' not in self.settings: self.settings['maxlength'] = 25 #longer words will be ignored if 'minlength' not in self.settings: self.settings['minlength'] = 3 #shorter word will be ignored if 'maxratio' not in self.settings: self.settings['maxratio'] = 0 #no limit if 'debug' in self.settings: self.debug = bool(self.settings['debug']) else: self.debug = False ibasefound = lstfound = False for filename in self.models: if filename.endswith('.ibase'): ibasefound = True self.modelfile = filename elif filename.endswith('.lst'): lstfound = True self.confusiblefile = filename if not ibasefound: raise Exception( "TIMBL models must have the extension ibase, not model file was supplies with that extension" ) if not lstfound: raise Exception( "Specify a model file with extension lst that will store all confusibles found" )
def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'confusion' super().verifysettings() if 'algorithm' not in self.settings: self.settings['algorithm'] = 1 if 'leftcontext' not in self.settings: self.settings['leftcontext'] = 3 if 'rightcontext' not in self.settings: self.settings['rightcontext'] = 3 if 'threshold' not in self.settings: self.threshold = 0.9 else: self.threshold = self.settings['threshold'] if 'freqthreshold' not in self.settings: self.freqthreshold = 2 else: self.freqthreshold = self.settings['freqthreshold'] if 'minlength' not in self.settings: self.minlength = 5 else: self.minlength = self.settings['minlength'] if 'probfactor' not in self.settings: self.probfactor = 10 else: self.probfactor = self.settings['probfactor'] if 'maxdistance' not in self.settings: self.settings['maxdistance'] = 2 if 'debug' in self.settings: self.debug = bool(self.settings['debug']) else: self.debug = False self.hapaxer = gethapaxer(self, self.settings) self.cache = getcache(self.settings, 1000) try: modelfile = self.models[0] if not modelfile.endswith(".ibase"): raise Exception("First model must be a TIMBL instance base model, which must have the extension '.ibase', got " + modelfile + " instead") if len(self.models) > 1: lexiconfile = self.models[1] if not lexiconfile.endswith("colibri.patternmodel"): raise Exception("Second model must be a Colibri pattern model, which must have the extensions '.colibri.patternmodel', got " + modelfile + " instead") except: raise Exception("Expected one or two models, the first a TIMBL instance base, and the optional second a colibri patternmodel, got " + str(len(self.models)) )
def verifysettings(self): if "class" not in self.settings: self.settings["class"] = "confusible" super().verifysettings() if "algorithm" not in self.settings: self.settings["algorithm"] = 1 if "leftcontext" not in self.settings: self.settings["leftcontext"] = 3 if "rightcontext" not in self.settings: self.settings["rightcontext"] = 3 self.hapaxer = gethapaxer(self.settings) if "confusibles" not in self.settings: raise Exception("No confusibles specified for " + self.id + "!") self.confusibles = self.settings["confusibles"] try: modelfile = self.models[0] if not modelfile.endswith(".ibase"): raise Exception("TIMBL models must have the extension ibase, got " + modelfile + " instead") except: raise Exception("Expected one model, got 0 or more")
def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'missingpunctuation' #will be overriden later again super().verifysettings() if 'algorithm' not in self.settings: self.settings['algorithm'] = 1 if 'leftcontext' not in self.settings: self.settings['leftcontext'] = 3 if 'rightcontext' not in self.settings: self.settings['rightcontext'] = 2 if 'deletionthreshold' not in self.settings: self.settings['deletionthreshold'] = 0.95 if 'insertionthreshold' not in self.settings: self.settings['insertionthreshold'] = 0.5 self.hapaxer = gethapaxer(self.settings) try: modelfile = self.models[0] if not modelfile.endswith(".ibase"): raise Exception("TIMBL models must have the extension ibase, got " + modelfile + " instead") except: raise Exception("Expected one model, got 0 or more")
def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'confusion' super().verifysettings() if 'algorithm' not in self.settings: self.settings['algorithm'] = 1 if 'leftcontext' not in self.settings: self.settings['leftcontext'] = 3 if 'rightcontext' not in self.settings: self.settings['rightcontext'] = 3 if 'threshold' not in self.settings: self.settings['threshold'] = 0.8 if 'minocc' not in self.settings: self.settings['minocc'] = 5 self.hapaxer = gethapaxer(self, self.settings) if 'suffixes' not in self.settings: raise Exception("No suffixes specified for " + self.id + "!") self.suffixes = sorted(self.settings['suffixes'], key= lambda x: -1* len(x)) #sort from long to short #settings for computation of confusible list if 'freqthreshold' not in self.settings: self.settings['freqthreshold'] = 20 if 'maxlength' not in self.settings: self.settings['maxlength'] = 25 #longer words will be ignored if 'minlength' not in self.settings: self.settings['minlength'] = 3 #shorter word will be ignored if 'maxratio' not in self.settings: self.settings['maxratio'] = 0 #no limit if 'debug' in self.settings: self.debug = bool(self.settings['debug']) else: self.debug = False ibasefound = lstfound = False for filename in self.models: if filename.endswith('.ibase'): ibasefound = True self.modelfile = filename elif filename.endswith('.lst'): lstfound = True self.confusiblefile = filename if not ibasefound: raise Exception("TIMBL models must have the extension ibase, not model file was supplies with that extension") if not lstfound: raise Exception("Specify a model file with extension lst that will store all confusibles found")
def verifysettings(self): if "class" not in self.settings: self.settings["class"] = "confusible" super().verifysettings() if "algorithm" not in self.settings: self.settings["algorithm"] = 1 if "leftcontext" not in self.settings: self.settings["leftcontext"] = 3 if "rightcontext" not in self.settings: self.settings["rightcontext"] = 3 self.hapaxer = gethapaxer(self.settings) if "suffixes" not in self.settings: raise Exception("No suffixes specified for " + self.id + "!") self.suffixes = sorted(self.settings["suffixes"], key=lambda x: -1 * len(x)) # sort from long to short # settings for computation of confusible list if "freqthreshold" not in self.settings: self.settings["freqthreshold"] = 20 if "maxlength" not in self.settings: self.settings["maxlength"] = 25 # longer words will be ignored if "minlength" not in self.settings: self.settings["minlength"] = 3 # shorter word will be ignored if "maxratio" not in self.settings: self.settings["maxratio"] = 0 # no limit ibasefound = lstfound = False for filename in self.models: if filename.endswith(".ibase"): ibasefound = True self.modelfile = filename elif filename.endswith(".lst"): lstfound = True self.confusiblefile = filename if not ibasefound: raise Exception( "TIMBL models must have the extension ibase, not model file was supplies with that extension" ) if not lstfound: raise Exception("Specify a model file with extension lst that will store all confusibles found")
def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'contexterror' super().verifysettings() if 'algorithm' not in self.settings: self.settings['algorithm'] = 1 if 'leftcontext' not in self.settings: self.settings['leftcontext'] = 3 if 'rightcontext' not in self.settings: self.settings['rightcontext'] = 3 if 'threshold' not in self.settings: self.threshold = self.settings['threshold'] else: self.threshold = 0.9 if 'maxdistance' not in self.settings: self.settings['maxdistance'] = 2 if 'debug' in self.settings: self.debug = bool(self.settings['debug']) else: self.debug = False self.hapaxer = gethapaxer(self.settings) self.cache = getcache(self.settings, 1000) try: modelfile = self.models[0] if not modelfile.endswith(".ibase"): raise Exception("TIMBL models must have the extension ibase, got " + modelfile + " instead") except: raise Exception("Expected one model, got 0 or more")
def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'contexterror' super().verifysettings() if 'leftcontext' not in self.settings: self.settings['leftcontext'] = 3 if 'rightcontext' not in self.settings: self.settings['rightcontext'] = 3 self.maxcontext = max(self.settings['leftcontext'], self.settings['rightcontext']) if 'freqthreshold' not in self.settings: self.threshold = 25 if 'threshold' not in self.settings: self.threshold = self.settings['threshold'] else: self.threshold = 0.9 if 'maxdistance' not in self.settings: self.settings['maxdistance'] = 2 if 'debug' in self.settings: self.debug = bool(self.settings['debug']) else: self.debug = False self.hapaxer = gethapaxer(self.settings) self.cache = getcache(self.settings, 1000) try: modelfile = self.models[0] except: raise Exception("Expected one model, got 0 or more")
def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'confusion' super().verifysettings() if 'algorithm' not in self.settings: self.settings['algorithm'] = 1 if 'leftcontext' not in self.settings: self.settings['leftcontext'] = 3 if 'rightcontext' not in self.settings: self.settings['rightcontext'] = 3 if 'threshold' not in self.settings: self.settings['threshold'] = 0.8 if 'minocc' not in self.settings: self.settings['minocc'] = 5 self.hapaxer = gethapaxer(self, self.settings) if 'confusibles' not in self.settings: raise Exception("No confusibles specified for " + self.id + "!") self.confusibles = self.settings['confusibles'] if 'debug' in self.settings: self.debug = bool(self.settings['debug']) else: self.debug = False try: modelfile = self.models[0] if not modelfile.endswith(".ibase"): raise Exception( "TIMBL models must have the extension ibase, got " + modelfile + " instead") except: raise Exception("Expected one model, got 0 or more")
def verifysettings(self): if 'class' not in self.settings: self.settings[ 'class'] = 'missingpunctuation' #will be overriden later again super().verifysettings() if 'algorithm' not in self.settings: self.settings['algorithm'] = 1 if 'leftcontext' not in self.settings: self.settings['leftcontext'] = 3 if 'rightcontext' not in self.settings: self.settings['rightcontext'] = 2 if 'deletionthreshold' not in self.settings: self.settings['deletionthreshold'] = 0.95 if 'insertionthreshold' not in self.settings: self.settings['insertionthreshold'] = 0.5 if 'capitalizationthreshold' not in self.settings: self.settings['capitalizationthreshold'] = 0.5 if 'debug' in self.settings: self.debug = bool(self.settings['debug']) else: self.debug = False self.hapaxer = gethapaxer(self, self.settings) try: modelfile = self.models[0] if not modelfile.endswith(".ibase"): raise Exception( "TIMBL models must have the extension ibase, got " + modelfile + " instead") except: raise Exception("Expected one model, got 0 or more")
def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'confusion' super().verifysettings() if 'leftcontext' not in self.settings: self.settings['leftcontext'] = 3 if 'rightcontext' not in self.settings: self.settings['rightcontext'] = 3 self.maxcontext = max(self.settings['leftcontext'], self.settings['rightcontext']) if 'freqthreshold' not in self.settings: self.freqthreshold = 25 if 'threshold' not in self.settings: self.threshold = self.settings['threshold'] else: self.threshold = 0.9 if 'maxdistance' not in self.settings: self.settings['maxdistance'] = 2 if 'debug' in self.settings: self.debug = bool(self.settings['debug']) else: self.debug = False self.hapaxer = gethapaxer(self, self.settings) self.cache = getcache(self.settings, 1000) try: self.models[0] except: raise Exception("Expected one model, got 0 or more")
def verifysettings(self): if 'class' not in self.settings: self.settings['class'] = 'confusion' super().verifysettings() if 'algorithm' not in self.settings: self.settings['algorithm'] = 1 if 'leftcontext' not in self.settings: self.settings['leftcontext'] = 3 if 'rightcontext' not in self.settings: self.settings['rightcontext'] = 3 if 'threshold' not in self.settings: self.settings['threshold'] = 0.8 if 'minocc' not in self.settings: self.settings['minocc'] = 5 self.hapaxer = gethapaxer(self, self.settings) if 'confusibles' not in self.settings: raise Exception("No confusibles specified for " + self.id + "!") self.confusibles = self.settings['confusibles'] if 'debug' in self.settings: self.debug = bool(self.settings['debug']) else: self.debug = False try: modelfile = self.models[0] if not modelfile.endswith(".ibase"): raise Exception("TIMBL models must have the extension ibase, got " + modelfile + " instead") except: raise Exception("Expected one model, got 0 or more")