Output format is: (Number indicating word-count padded with at least a single zero) (word or rafsi) [optional note re. rafsi source]""" import gc import sys import pickle if "--help" in sys.argv: print(__doc__, file=sys.stderr) raise SystemExit import zirsam import zirsam.config as config import zirsam.morphology as morphology import zirsam.tokens as tokens rafsi_pick = zirsam.resource('r2g.pyk3') rafsi = pickle.load(open(rafsi_pick, 'rb')) words = {} top = 0 def add(word): if word in words: words[word] += 1 else: words[word] = 1 global top top = max(words[word], top) def run_bunch(stdin=None): c = config.Configuration(args=[], stdin=stdin) for word in morphology.Stream(conf=c):
def html(self): taip = type(self).__name__.lower() defin = "" if taip == "brivla": g = "\n {0}".format(self.value) for line in open(zirsam.resource("gismu.txt")).readlines(): line = "\n" + line if g in line: defin = line break if not defin: # assert g in open(zirsam.resource("gismu.txt")).read() g = "\n{0}:".format(self.value) g = self.value for line in open(zirsam.resource("lujvo.txt")).readlines(): line = "\n" + line if g in line: defin = line break if not defin and self.ve_lujvo_rafsi: for raf in self.ve_lujvo_rafsi: raf = " {0} ".format(raf) for line in open(zirsam.resource("gismu.txt")).readlines(): test = line[:20] if raf in test and not (line in defin): # import sys # print("Found a rafsi in", line, file=sys.stderr) defin += line + "\n" break defin = defin.strip() if not defin: defin = "unknown brivla" elif taip == "cmavo": v = self.value if v[0] in "aeiou": v = "." + v else: v = " " + v g = "\n{0} ".format(v) for line in open(zirsam.resource("cmavo.txt")).readlines(): line = "\n" + line if g in line: defin = line.strip() break if not defin: defin = "unknown cmavo" elif taip == "cmene": defin = "name-word" taip = "cmevla" elif defin == "": defin = "this doesn't know about " + taip value = self.value if self.start: value += " " + cgi.escape(self.start.value) if type(self.content) == str: value += " " + cgi.escape(self.content) elif self.content: value += self.content.html() if self.end: value += " " + cgi.escape(self.end.value) for _ in self.modifiers: value += " " + _.html() if 1 or defin: return """<span class="{0}" title="{1}">{2}</span>""".format(taip, defin, value) else: return """<span class="{0}">{2} </span>""".format(taip, defin, self.value)
{Number indicating word-count padded with at least a single zero} {word or rafsi} (optional note re. rafsi source)""" import gc import sys import pickle if "--help" in sys.argv: print(__doc__, file=sys.stderr) raise SystemExit import zirsam import zirsam.config as config import zirsam.morphology as morphology import zirsam.tokens as tokens rafsi_pick = zirsam.resource("r2g.pyk3") rafsi = pickle.load(open(rafsi_pick, "rb")) words = {} top = 0 def add(word): if word in words: words[word] += 1 else: words[word] = 1 global top top = max(words[word], top) def run_bunch(stdin=None):