예제 #1
0
파일: stem.py 프로젝트: sipi/HamSpamGram
def stemFile(path, st):

    tokens = util.tokenizeFile(path)

    for i in range(0, len(tokens)):
        if tokens[i] == "\n":
            print
        else:
            print st.stem(tokens[i].lower()),
예제 #2
0
파일: wesb.py 프로젝트: sipi/wesb
def readConf(path):
  tokens = util.tokenizeFile(path)
  
  screens = []
  state = 0
  i = -1
  while i < len(tokens) - 1:
    i += 1
    token = tokens[i]

    if(state == 0):
      if(token == '[screen]'):
        screen = Screen()
        screens.append(screen)
        state = 1;
  
    elif(state == 1):
      if(token == '[/screen]'):
        state = 0
      elif(token[0] == "["): #add an applet to the current screen
        package_name = "applets."
        module_name = token[1:-1]
        mod = __import__(package_name + module_name)
        applet = getattr(sys.modules[package_name + module_name], module_name)()
        screen.addApplet(applet)
        state = 2
      else:
        screen.__setattr__(token, tokens[i+2]) #set an attribut of the current screen
        i += 2
  
    elif(state == 2):
      if(token == '[/' + module_name + ']'):
        state = 1
      else:
        applet.__setattr__(token, tokens[i+2]) #set an attribut of the current applet
        i += 2

  return screens
예제 #3
0
파일: stem.py 프로젝트: sipi/HamSpamGram
elif stemmer == "porter":
    print >> sys.stderr, "porter"
    st = PorterStemmer()
elif stemmer == "snowball":
    print >> sys.stderr, "snowball"
    st = SnowballStemmer("english")
elif stemmer == "wordnet":
    print >> sys.stderr, "wordnet"
    st = WordNetLemmatizer()

while args:
    conf_path = args.pop()

if len(conf_path) == 0:
    print "not file specified"
# END ARGUMENT PROCESSING

# si le stemmer n'est pas defini on affiche
# la liste des tokens
if len(stemmer) == 0:
    tokens = util.tokenizeFile(conf_path)

    for i in range(0, len(tokens)):
        if tokens[i] == "\n":
            print
        else:
            print tokens[i],
else:
    # lemmatisation
    stemFile(conf_path, st)