Ejemplo n.º 1
0
 def train(self, file_name):
   features = self._extract_words(file(file_name).read())
   model = collections.defaultdict(lambda: 1)
   for f in features:
       model[f] += 1
   for word in model:
     DWords.insert_word(DWord(word, occurrences=model[word]))
Ejemplo n.º 2
0
 def train_with_occurrences(self, file_name):
   f = file(file_name)
   for word_oc in f:
     splitted = word_oc.split(" ")
     if(self.valid_training_group(splitted)):
       splitted_0 = splitted[0]
       splitted_1 = splitted[1]
       d_word = DWords.find_word(splitted_0) or DWord(splitted_0, occurrences=0)
       d_word.occurrences += int(splitted_1)
       DWords.insert_word(d_word)
def load_polarity_or_modifier(file_name, attrname):
  words = file(file_name).read().split("\n")
  for word_value in words:
    if len(word_value.split()) != 2:
      print "Invalid line: \""+word_value+"\""
      continue
    word, value = word_value.split()
    print word_value
    d_word =  DWords.find_word(word) or DWord(word)
    setattr(d_word, attrname, float(value))
    DWords.insert_word(d_word)
def interactive_loader(file_name):
  GROUP_SIZE = 3

  text = file(file_name).read()

  words = parsetree(text, tags=False, chunks=False).words

  for word_group in zip(*[iter(words)]*GROUP_SIZE):
     
    options = ""

    d_words = []

    for word in word_group:
      d_word = DWords.find_word(word.string.lower()) or DWord(word.string.lower())
      if not (d_word.has_polarity() or d_word.is_modifier()):
        d_words.append(d_word)

    if len(d_words) == 0:
      continue


    while len(options)!=len(d_words):
      print "\t".join(map((lambda w: w.word), d_words))
      options = list(raw_input("0:neutral\t1:positive\t2: negative\t3:inversor\t5:minimizer\t6:maximizer\td:descartar\n")[:len(d_words)])
    
    for d_word in d_words:
      option = options.pop(0)
      if option=="0":
        d_word.polarity = 0
      elif option=="1": 
        d_word.polarity = 1 
      elif option=="2":
        d_word.polarity = -1
      elif option == "3":
        d_word.modifier = -1
      elif option == "5":
        d_word.modifier = 0.5
      elif option == "6":
        d_word.modifier = 2

      if option!="d":
        DWords.insert_word(d_word)
        save_in_file(d_word)
    print "\n\n\n"
 def find_word(self, word_string):
   return DWords.find_word(word_string)   
Ejemplo n.º 6
0
  def known(self, words): return set(w for w1 in words for w in [DWords.find_word(w1)] if w)

  def correct(self, word, edits2=False):
Ejemplo n.º 7
0
 def known_edits2(self, word):
   return set(e2 for e1 in self.edits1(word) for e2 in self.edits1(e1) if DWords.find_word(e2))