コード例 #1
0
ファイル: vocab.py プロジェクト: purpleposeidon/zirsam
Output format is:
  (Number indicating word-count padded with at least a single zero) (word or rafsi) [optional note re. rafsi source]"""

import gc
import sys
import pickle
if "--help" in sys.argv:
  print(__doc__, file=sys.stderr)
  raise SystemExit

import zirsam
import zirsam.config as config
import zirsam.morphology as morphology
import zirsam.tokens as tokens

rafsi_pick = zirsam.resource('r2g.pyk3')
rafsi = pickle.load(open(rafsi_pick, 'rb'))
words = {}
top = 0

def add(word):
  if word in words:
    words[word] += 1
  else:
    words[word] = 1
  global top
  top = max(words[word], top)

def run_bunch(stdin=None):
  c = config.Configuration(args=[], stdin=stdin)
  for word in morphology.Stream(conf=c):
コード例 #2
0
ファイル: tokens.py プロジェクト: pookleblinky/zirsam
    def html(self):
        taip = type(self).__name__.lower()
        defin = ""
        if taip == "brivla":
            g = "\n {0}".format(self.value)
            for line in open(zirsam.resource("gismu.txt")).readlines():
                line = "\n" + line
                if g in line:
                    defin = line
                    break
            if not defin:
                # assert g in open(zirsam.resource("gismu.txt")).read()
                g = "\n{0}:".format(self.value)
                g = self.value
                for line in open(zirsam.resource("lujvo.txt")).readlines():
                    line = "\n" + line
                    if g in line:
                        defin = line
                        break
            if not defin and self.ve_lujvo_rafsi:
                for raf in self.ve_lujvo_rafsi:
                    raf = " {0} ".format(raf)
                    for line in open(zirsam.resource("gismu.txt")).readlines():
                        test = line[:20]
                        if raf in test and not (line in defin):
                            # import sys
                            # print("Found a rafsi in", line, file=sys.stderr)
                            defin += line + "\n"
                            break
            defin = defin.strip()
            if not defin:
                defin = "unknown brivla"
        elif taip == "cmavo":
            v = self.value

            if v[0] in "aeiou":
                v = "." + v
            else:
                v = " " + v
            g = "\n{0} ".format(v)
            for line in open(zirsam.resource("cmavo.txt")).readlines():
                line = "\n" + line
                if g in line:
                    defin = line.strip()
                    break
            if not defin:
                defin = "unknown cmavo"
        elif taip == "cmene":
            defin = "name-word"
            taip = "cmevla"
        elif defin == "":
            defin = "this doesn't know about " + taip
        value = self.value
        if self.start:
            value += " " + cgi.escape(self.start.value)
        if type(self.content) == str:
            value += " " + cgi.escape(self.content)
        elif self.content:
            value += self.content.html()
        if self.end:
            value += " " + cgi.escape(self.end.value)
        for _ in self.modifiers:
            value += " " + _.html()
        if 1 or defin:
            return """<span class="{0}" title="{1}">{2}</span>""".format(taip, defin, value)
        else:
            return """<span class="{0}">{2} </span>""".format(taip, defin, self.value)
コード例 #3
0
ファイル: vocab.py プロジェクト: pookleblinky/zirsam
  {Number indicating word-count padded with at least a single zero} {word or rafsi} (optional note re. rafsi source)"""

import gc
import sys
import pickle

if "--help" in sys.argv:
    print(__doc__, file=sys.stderr)
    raise SystemExit

import zirsam
import zirsam.config as config
import zirsam.morphology as morphology
import zirsam.tokens as tokens

rafsi_pick = zirsam.resource("r2g.pyk3")
rafsi = pickle.load(open(rafsi_pick, "rb"))
words = {}
top = 0


def add(word):
    if word in words:
        words[word] += 1
    else:
        words[word] = 1
    global top
    top = max(words[word], top)


def run_bunch(stdin=None):