def train(self, file_name): features = self._extract_words(file(file_name).read()) model = collections.defaultdict(lambda: 1) for f in features: model[f] += 1 for word in model: DWords.insert_word(DWord(word, occurrences=model[word]))
def train_with_occurrences(self, file_name): f = file(file_name) for word_oc in f: splitted = word_oc.split(" ") if(self.valid_training_group(splitted)): splitted_0 = splitted[0] splitted_1 = splitted[1] d_word = DWords.find_word(splitted_0) or DWord(splitted_0, occurrences=0) d_word.occurrences += int(splitted_1) DWords.insert_word(d_word)
def load_polarity_or_modifier(file_name, attrname): words = file(file_name).read().split("\n") for word_value in words: if len(word_value.split()) != 2: print "Invalid line: \""+word_value+"\"" continue word, value = word_value.split() print word_value d_word = DWords.find_word(word) or DWord(word) setattr(d_word, attrname, float(value)) DWords.insert_word(d_word)
def interactive_loader(file_name): GROUP_SIZE = 3 text = file(file_name).read() words = parsetree(text, tags=False, chunks=False).words for word_group in zip(*[iter(words)]*GROUP_SIZE): options = "" d_words = [] for word in word_group: d_word = DWords.find_word(word.string.lower()) or DWord(word.string.lower()) if not (d_word.has_polarity() or d_word.is_modifier()): d_words.append(d_word) if len(d_words) == 0: continue while len(options)!=len(d_words): print "\t".join(map((lambda w: w.word), d_words)) options = list(raw_input("0:neutral\t1:positive\t2: negative\t3:inversor\t5:minimizer\t6:maximizer\td:descartar\n")[:len(d_words)]) for d_word in d_words: option = options.pop(0) if option=="0": d_word.polarity = 0 elif option=="1": d_word.polarity = 1 elif option=="2": d_word.polarity = -1 elif option == "3": d_word.modifier = -1 elif option == "5": d_word.modifier = 0.5 elif option == "6": d_word.modifier = 2 if option!="d": DWords.insert_word(d_word) save_in_file(d_word) print "\n\n\n"
def find_word(self, word_string): return DWords.find_word(word_string)
def known(self, words): return set(w for w1 in words for w in [DWords.find_word(w1)] if w) def correct(self, word, edits2=False):
def known_edits2(self, word): return set(e2 for e1 in self.edits1(word) for e2 in self.edits1(e1) if DWords.find_word(e2))