예제 #1
0
파일: splicer.py 프로젝트: alvations/DLTK
def bananasplit(text):
  """ Dictionary + string search splitter. Only two element splits."""
  txt2tmp(text)
  command = "java -jar banana-split-standalone-0.4.0.jar "+ \
            "igerman98_all.xml < /tmp/tmp.in > /tmp/tmp.out"
  os.system(command)
  for i in codecs.open("/tmp/tmp.out","r","utf8"):
    return " ".join([i for i in i.split() if u']' not in i and u'[' not in i])
예제 #2
0
파일: splicer.py 프로젝트: ikonov/DLTK
def bananasplit(text):
    """ Dictionary + string search splitter. Only two element splits."""
    txt2tmp(text)
    command = "java -jar banana-split-standalone-0.4.0.jar "+ \
              "igerman98_all.xml < /tmp/tmp.in > /tmp/tmp.out"
    os.system(command)
    for i in codecs.open("/tmp/tmp.out", "r", "utf8"):
        return " ".join(
            [i for i in i.split() if u']' not in i and u'[' not in i])
예제 #3
0
파일: splicer.py 프로젝트: alvations/DLTK
def jwordsplitter(text): # Source: http://www.danielnaber.de/jwordsplitter/
  """ Dictionary based compound splitter. Supports multiple splits."""
  txt2tmp(text)
  os.system("java -jar jwordsplitter-3.4.jar /tmp/tmp.in > /tmp/tmp.out")
  for i in codecs.open("/tmp/tmp.out","r","utf8"):
    return "".join([j for j in i.strip().split(",")])
예제 #4
0
파일: splicer.py 프로젝트: alvations/DLTK
def smor(text):
  """ Morphological anlaysis with SMOR. you need SMOR in /usr/bin/ """
  txt2tmp(text)
  os.system("smor < /tmp/tmp.in > /tmp/tmp.out")
  return [i.strip() for i in \
          codecs.open("/tmp/tmp.out","r","utf8").readlines()[3:]]
예제 #5
0
파일: splicer.py 프로젝트: ikonov/DLTK
def smor(text):
    """ Morphological anlaysis with SMOR. you need SMOR in /usr/bin/ """
    txt2tmp(text)
    os.system("smor < /tmp/tmp.in > /tmp/tmp.out")
    return [i.strip() for i in \
            codecs.open("/tmp/tmp.out","r","utf8").readlines()[3:]]
예제 #6
0
파일: splicer.py 프로젝트: ikonov/DLTK
def jwordsplitter(text):  # Source: http://www.danielnaber.de/jwordsplitter/
    """ Dictionary based compound splitter. Supports multiple splits."""
    txt2tmp(text)
    os.system("java -jar jwordsplitter-3.4.jar /tmp/tmp.in > /tmp/tmp.out")
    for i in codecs.open("/tmp/tmp.out", "r", "utf8"):
        return "".join([j for j in i.strip().split(",")])