def load_lexemes(self, fname): lexDescrs = yamlReader.read_file(fname, self.errorHandler) for dictDescr in lexDescrs: if sys.getsizeof(self.lexemes) > self.LEX_MEMORY_LIMIT: self.raise_error(u'Not enough memory for the lexemes.') return self.lexemes.append(Lexeme(dictDescr, self.errorHandler)) return len(self.lexemes)
def load_yaml_descrs(self, fnames): """ Load raw descriptions of lexemes, paradigms or derivations from the file or files specified by fnames. Return a list of descriptions. """ if type(fnames) == str: fnames = [fnames] descrs = [] for fname in fnames: descrs += yamlReader.read_file(fname, self.errorHandler) return descrs
def load_stem_conversions(self, fname): """Load stem conversion rules from a file. Return the number of rules loaded.""" if len(self.lexemes) > 0: self.raise_error(u'Loading stem conversions should occur before ' +\ u'loading stems.') return 0 conversionDescrs = yamlReader.read_file(fname, self.errorHandler) self.stemConversions = {} # {conversion name -> StemConversion} for dictSC in conversionDescrs: sc = StemConversion(dictSC, self.errorHandler) self.stemConversions[sc.name] = sc return len(self.stemConversions)
def load_derivations(self, fname): """Load derivations from a file. Return the number of derivations loaded.""" derivDescrs = yamlReader.read_file(fname, self.errorHandler) for dictDescr in derivDescrs: ## self.derivations[u'#deriv#' + dictDescr[u'value']] =\ ## Paradigm(dictDescr, self.errorHandler) dictDescr[u'value'] = u'#deriv#' + dictDescr[u'value'] self.derivations[dictDescr[u'value']] =\ Derivation(dictDescr, self.errorHandler) for paradigm in self.paradigms.values(): derivations.deriv_for_paradigm(paradigm) for derivName, deriv in self.derivations.iteritems(): if derivName.startswith(u'#deriv#paradigm#'): deriv.build_links() print derivName + u': build complete.' #print unicode(self.derivations[u'#deriv#paradigm#Nctt']) deriv.extend_leaves() print derivName + u': leaves extended.' #print unicode(deriv) #print unicode(self.derivations[u'#deriv#N-fӕ#paradigm#Nct']) print u'Leaves extended.' #print unicode(self.derivations[u'#deriv#paradigm#Nct']) for derivName, deriv in self.derivations.iteritems(): p = deriv.to_paradigm() self.paradigms[derivName] = p for derivName in self.derivations: print u'Compiling ' + derivName + u'... ', self.paradigms[derivName].compile_paradigm() print u'compiled.' gc.collect() if derivName == u'#deriv#paradigm#Nctt': fPara = codecs.open(u'test-ossetic/deriv-Nctt-test.txt', 'w', 'utf-8-sig') for f in self.paradigms[derivName].flex: fPara.write(unicode(f)) fPara.close() print u'Derivations compiled.' for lex in self.lexemes: lex.add_derivations() return len(self.derivations)
def load_paradigms(self, fname, pLst=None): """Load paradigms from a file. Return the number of paradigms loaded.""" if len(self.lexemes) > 0: self.raise_error(u'Loading paradigms should occur before ' +\ u'loading stems.') return 0 paraDescrs = yamlReader.read_file(fname, self.errorHandler) for dictDescr in paraDescrs: if sys.getsizeof(self.paradigms) > self.PARADIGM_MEMORY_LIMIT: self.raise_error(u'Not enough memory for the paradigms.') return self.paradigms[dictDescr[u'value']] =\ Paradigm(dictDescr, self.errorHandler) newParadigms = {} for pName, p in self.paradigms.iteritems(): if pLst is None or pName in pLst: p = copy.deepcopy(p) p.compile_paradigm() newParadigms[pName] = p self.paradigms = newParadigms return len(self.paradigms)