Beispiel #1
0
def bananasplit(text):
  """ Dictionary + string search splitter. Only two element splits."""
  txt2tmp(text)
  command = "java -jar banana-split-standalone-0.4.0.jar "+ \
            "igerman98_all.xml < /tmp/tmp.in > /tmp/tmp.out"
  os.system(command)
  for i in codecs.open("/tmp/tmp.out","r","utf8"):
    return " ".join([i for i in i.split() if u']' not in i and u'[' not in i])
Beispiel #2
0
def bananasplit(text):
    """ Dictionary + string search splitter. Only two element splits."""
    txt2tmp(text)
    command = "java -jar banana-split-standalone-0.4.0.jar "+ \
              "igerman98_all.xml < /tmp/tmp.in > /tmp/tmp.out"
    os.system(command)
    for i in codecs.open("/tmp/tmp.out", "r", "utf8"):
        return " ".join(
            [i for i in i.split() if u']' not in i and u'[' not in i])
Beispiel #3
0
def jwordsplitter(text): # Source: http://www.danielnaber.de/jwordsplitter/
  """ Dictionary based compound splitter. Supports multiple splits."""
  txt2tmp(text)
  os.system("java -jar jwordsplitter-3.4.jar /tmp/tmp.in > /tmp/tmp.out")
  for i in codecs.open("/tmp/tmp.out","r","utf8"):
    return "".join([j for j in i.strip().split(",")])
Beispiel #4
0
def smor(text):
  """ Morphological anlaysis with SMOR. you need SMOR in /usr/bin/ """
  txt2tmp(text)
  os.system("smor < /tmp/tmp.in > /tmp/tmp.out")
  return [i.strip() for i in \
          codecs.open("/tmp/tmp.out","r","utf8").readlines()[3:]]
Beispiel #5
0
def smor(text):
    """ Morphological anlaysis with SMOR. you need SMOR in /usr/bin/ """
    txt2tmp(text)
    os.system("smor < /tmp/tmp.in > /tmp/tmp.out")
    return [i.strip() for i in \
            codecs.open("/tmp/tmp.out","r","utf8").readlines()[3:]]
Beispiel #6
0
def jwordsplitter(text):  # Source: http://www.danielnaber.de/jwordsplitter/
    """ Dictionary based compound splitter. Supports multiple splits."""
    txt2tmp(text)
    os.system("java -jar jwordsplitter-3.4.jar /tmp/tmp.in > /tmp/tmp.out")
    for i in codecs.open("/tmp/tmp.out", "r", "utf8"):
        return "".join([j for j in i.strip().split(",")])