def __init__(self, database_path, language_path, json_output_path=None, thesaurus_path=None, stopwords_path=None, color=False): if color == False: without_color() database = Database() self.stopwordsFilter = None if thesaurus_path: thesaurus = Thesaurus() thesaurus.load(thesaurus_path) database.set_thesaurus(thesaurus) #print(thesaurus.print_me()) #print("Hello") if stopwords_path: self.stopwordsFilter = StopwordFilter() self.stopwordsFilter.load(stopwords_path) database.load(database_path) # database.print_me() config = LangConfig() config.load(language_path) self.parser = Parser(database, config) self.json_output_path = json_output_path
def make_thesaurus(): start = timer() print("setting up thesaurus") thesaurus = Thesaurus.Thesaurus() thesaurus.build_jaccard_word_pair_table() print("finished setting up thesaurus") end = timer() print(f"{end - start} seconds")
def make_doc_term(): start = timer() print("setting up doc_term_table") thesaurus = Thesaurus.Thesaurus() thesaurus.build_doc_term_table() print("finished setting up doc_term_table") end = timer() print(f"{end - start} seconds")
def __init__( self, textIn, thesaurus=Thesaurus(filename="./thesauruses/thesaurusA.pickle")): self.textOut = None self.thesaurus = thesaurus self.punctuation = ", . ? ! : ; -".split(" ") #Add more ??? self.textIn = textIn
def set_method_type(self, method_type): if method_type == '1': self._method_class = LocalMethod(self) elif method_type == '2': self._method_class = Thesaurus(self) elif method_type == '3': self._method_class = Wordnet(self) elif method_type == '4': self._method_class = MySpellCheker(self)
class Article: encodings = ["utf-8", "cp932", "euc-jp", "iso-2022-jp", "latin_1"] tokenizer = Thesaurus('thesaurus.csv') def __init__(self, path): print(path) self.path = path self.contents = self.preprocess(self.get_contents(path)) # self.contents = self.preprocess(self.get_title(path)) self.tokens = [ token.surface for token in self.tokenizer.tokenize(self.contents) if re.match("カスタム名詞|名詞,(固有|一般|サ変)", token.part_of_speech) ] # print(self.tokens) def get_contents(self, path): exceptions = [] for encoding in self.encodings: try: all = codecs.open(path, 'r', encoding).read() parts = re.split("(?i)<(body|frame)[^>]*>", all, 1) if len(parts) == 3: head, void, body = parts else: print('Cannot split ' + path) body = all return re.sub( "<[^>]+?>", "", re.sub( r"(?is)<(script|style|select|noscript)[^>]*>.*?</\1\s*>", "", body)) except UnicodeDecodeError: continue print('Cannot detect encoding of ' + path) print(exceptions) return None def get_title(self, path): return re.split('\/', path)[-1] def preprocess(self, text): text = re.sub("&[^;]+;", " ", text) text = mojimoji.han_to_zen(text, digit=False) # text = re.sub('(\s| |#)+', " ", text) return text def dense(self, dictionary): values_set = set(dictionary.values()) text = [token for token in self.tokens if token in values_set] corpus = dictionary.doc2bow(text) return matutils.corpus2dense([corpus], len(dictionary)).T[0]
class MyApp(): """Class for a GUI """ def __init__(self, parent,thes=""): """ initialize the GUI with all visible elements and menus """ #self.MyParent of MyApp self.MyParent = parent # import a thesaurus if given as an argument or create an empty one if thes is "": self.t1=Thesaurus("Neuer Thesaurus") else: self.t1=thes self.MyParent.title("uberthesaurus - %s"% self.t1.name) # scrollbars for the listboxes self.scrollbar1 = Scrollbar(self.MyParent, orient=VERTICAL) self.scrollbar2 = Scrollbar(self.MyParent, orient=VERTICAL) # 2 listboxes for des and terms self.deslistbox = Listbox(self.MyParent, yscrollcommand=self.scrollbar1.set, exportselection=0) self.termlistbox = Listbox(self.MyParent, yscrollcommand=self.scrollbar2.set, exportselection=0) #self.termlistbox.bind("<<Double-Button-1>>", lambda event:self.deslistbox.select_set()) self.deslistbox.bind("<<ListboxSelect>>", lambda event: self.update_tlist()) # a frame for changing elements self.myContainer1 = Frame(self.MyParent) # add scrollbars for the listboxes self.scrollbar1.config(command=self.deslistbox.yview) self.scrollbar2.config(command=self.termlistbox.yview) #add buttons for interaktion with des self.add1_button=Button(self.MyParent, text='Hinzufuegen', command=self.add_des, width=10) self.edit1_button=Button(self.MyParent, text='Bearbeiten', command=self.edit_des, width=10) self.del1_button=Button(self.MyParent, text="Loeschen", command=self.del_des, width=10) #add buttons for interaktion with terms self.add2_button=Button(self.MyParent, text='Hinzufuegen', command=self.add_term, width=10) self.edit2_button=Button(self.MyParent, text='Bearbeiten', command=self.edit_term, width=10) self.del2_button=Button(self.MyParent, text="Loeschen", command=self.del_term, width=10) # confige the spacing self.MyParent.columnconfigure(1, weight=0) self.MyParent.columnconfigure(1, pad=0) self.MyParent.columnconfigure(2, pad=7) self.MyParent.columnconfigure(3, pad=7) self.MyParent.rowconfigure(1, weight=0) self.MyParent.rowconfigure(2, weight=0) self.MyParent.rowconfigure(3, weight=1) # place all GUI-elements self.add1_button.grid(row=1, column=0, pady=2, sticky=NW) self.edit1_button.grid(row=2, column=0, pady=2, sticky=NW) self.del1_button.grid(row=3, column=0, pady=2, sticky=NW) self.deslistbox.grid(row=1, column=1, rowspan=3,pady=5, sticky=NS) self.scrollbar1.grid(row=1, column=2, rowspan=3, pady=5, sticky=NS) self.termlistbox.grid(row=1,column=3, rowspan=3, pady=5, sticky=NS) self.scrollbar2.grid(row=1,column=4, rowspan=3, pady=5, sticky=NS) self.add2_button.grid(row=1,column=5, pady=2, sticky=NW) self.edit2_button.grid(row=2,column=5, pady=2, sticky=NW) self.del2_button.grid(row=3,column=5, pady=2, sticky=NW) # self.myContainer1.grid() # Menu self.menu = Menu(self.MyParent) self.MyParent.config(menu=self.menu) self.filemenu = Menu(self.menu) self.menu.add_cascade(label="Datei", menu=self.filemenu) # Main Menu self.filemenu.add_command(label="Neu", command=self.new_thes) self.filemenu.add_command(label="Verbinden", command=self.t1.connect) self.filemenu.add_command(label="Import", command=self.importdatei) self.filemenu.add_command(label="Export", command=self.export) self.filemenu.add_command(label="Schliessen", command=self.exit_prog) def update_dlist(self): """ Updates the listbox for the descriptors""" self.deslistbox.delete(0, END) for elem in sorted(self.t1.entries.keys()): self.deslistbox.insert(END, elem) def update_tlist(self): """ Updates the listbox for the relations and terms""" if self.t1.entries!={}: if self.deslistbox.curselection()!=(): tlist=self.t1.entries[self.deslistbox.get(self.deslistbox.curselection())].get_terms() else: tlist=self.t1.entries[self.deslistbox.get(0)].get_terms() self.termlistbox.delete(0, END) for key,value in sorted(tlist.iteritems()): for elem in value: self.termlistbox.insert(END, key + " "+elem) else: self.termlistbox.delete(0, END) def del_des(self): """ Deletes the selected element of the listbox for the descriptors""" if self.deslistbox.curselection() != (): self.t1.delete_entries(self.deslistbox.get(self.deslistbox.curselection())) self.update_dlist() self.update_tlist() def add_des(self): """ Deletes the selected element of the listbox for the relations and terms""" self.des = tkSimpleDialog.askstring("Deskriptor hinzufuegen", "Deskriptor:") if self.des is not None: self.t1.create_entries(self.des) self.update_dlist() self.update_tlist() def edit_des(self): """Opens up a dialog for descriptor editing""" self.des = tkSimpleDialog.askstring("Deskriptor bearbeiten", "Bearbeiten:") if self.des is not None: self.t1.edit_entries(self.deslistbox.get(self.deslistbox.curselection()),self.des) self.update_dlist() self.update_tlist() def del_term(self): """ Deletes the selected term from the termlist """ if self.termlistbox.curselection() != (): self.term=self.termlistbox.get(self.termlistbox.curselection()) self.term=self.term.split(" ") self.t1.entries[self.deslistbox.get(self.deslistbox.curselection())].remove_term(self.term[0],self.term[1]) self.update_tlist() def add_term(self): """Opens up a dialog for term adding""" self.term = tkSimpleDialog.askstring("Term hinzufuegen", "Rel Term:") if self.term is not None: self.term=self.term.split(" ") self.t1.add(self.deslistbox.get(self.deslistbox.curselection()), self.term[1], self.term[0]) self.update_dlist() self.update_tlist() def edit_term(self): """Opens up a dialog for term/rel editing""" self.rel_term=tkSimpleDialog.askstring("Term bearbeiten", "Rel Term") if self.rel_term is not None: self.rel_term=self.rel_term.split(" ") self.rel_term_old=self.termlistbox.get(self.termlistbox.curselection()).split(" ") if self.rel_term[0]!=self.rel_term_old[0]: self.t1.entries[self.deslistbox.get(self.deslistbox.curselection())].edit_rel(str(self.rel_term_old[0]), str(self.rel_term_old[1]), str(self.rel_term[0])) if self.rel_term[1]!=self.rel_term_old[1]: self.t1.entries[self.deslistbox.get(self.deslistbox.curselection())].edit_term(str(self.rel_term_old[0]), str(self.rel_term_old[1]), str(self.rel_term[1])) self.update_dlist() self.update_tlist() def exit_prog(self): """Exits the program""" self.MyParent.destroy() def new_thes(self): """Clears all entries of the thesaurus""" self.t1.entries={} self.update_dlist() self.update_tlist() self.t1.name="Neuer Thesaurus" self.MyParent.title("uberthesaurus - %s"% self.t1.name) def export(self): """Extracts the filetype and calls the real export method if a valid filename is given""" self.formats = [ ('Comma-separated values','*.csv'), ('JavaScript Object Notation','*.json'), ('Extensible Markup Language','*.xml'), ] self.filename = asksaveasfilename(filetypes=self.formats, title="Den Thesaurus exportieren", defaultextension=".xml") if len(self.filename)>0: self.t1.export_thesaurus(self.filename) self.MyParent.title("uberthesaurus - %s"% self.t1.name) else: print "Keine Datei angegeben." def importdatei(self): """Extracts the filetype and calls the real import method if a valid filename is given""" self.filename = askopenfilename() if len(self.filename)>0: self.t1.import_thesaurus(self.filename) self.update_dlist() self.MyParent.title("uberthesaurus - %s"% self.t1.name) else: print "Keine Datei angegeben."
def __init__(self, parent,thes=""): """ initialize the GUI with all visible elements and menus """ #self.MyParent of MyApp self.MyParent = parent # import a thesaurus if given as an argument or create an empty one if thes is "": self.t1=Thesaurus("Neuer Thesaurus") else: self.t1=thes self.MyParent.title("uberthesaurus - %s"% self.t1.name) # scrollbars for the listboxes self.scrollbar1 = Scrollbar(self.MyParent, orient=VERTICAL) self.scrollbar2 = Scrollbar(self.MyParent, orient=VERTICAL) # 2 listboxes for des and terms self.deslistbox = Listbox(self.MyParent, yscrollcommand=self.scrollbar1.set, exportselection=0) self.termlistbox = Listbox(self.MyParent, yscrollcommand=self.scrollbar2.set, exportselection=0) #self.termlistbox.bind("<<Double-Button-1>>", lambda event:self.deslistbox.select_set()) self.deslistbox.bind("<<ListboxSelect>>", lambda event: self.update_tlist()) # a frame for changing elements self.myContainer1 = Frame(self.MyParent) # add scrollbars for the listboxes self.scrollbar1.config(command=self.deslistbox.yview) self.scrollbar2.config(command=self.termlistbox.yview) #add buttons for interaktion with des self.add1_button=Button(self.MyParent, text='Hinzufuegen', command=self.add_des, width=10) self.edit1_button=Button(self.MyParent, text='Bearbeiten', command=self.edit_des, width=10) self.del1_button=Button(self.MyParent, text="Loeschen", command=self.del_des, width=10) #add buttons for interaktion with terms self.add2_button=Button(self.MyParent, text='Hinzufuegen', command=self.add_term, width=10) self.edit2_button=Button(self.MyParent, text='Bearbeiten', command=self.edit_term, width=10) self.del2_button=Button(self.MyParent, text="Loeschen", command=self.del_term, width=10) # confige the spacing self.MyParent.columnconfigure(1, weight=0) self.MyParent.columnconfigure(1, pad=0) self.MyParent.columnconfigure(2, pad=7) self.MyParent.columnconfigure(3, pad=7) self.MyParent.rowconfigure(1, weight=0) self.MyParent.rowconfigure(2, weight=0) self.MyParent.rowconfigure(3, weight=1) # place all GUI-elements self.add1_button.grid(row=1, column=0, pady=2, sticky=NW) self.edit1_button.grid(row=2, column=0, pady=2, sticky=NW) self.del1_button.grid(row=3, column=0, pady=2, sticky=NW) self.deslistbox.grid(row=1, column=1, rowspan=3,pady=5, sticky=NS) self.scrollbar1.grid(row=1, column=2, rowspan=3, pady=5, sticky=NS) self.termlistbox.grid(row=1,column=3, rowspan=3, pady=5, sticky=NS) self.scrollbar2.grid(row=1,column=4, rowspan=3, pady=5, sticky=NS) self.add2_button.grid(row=1,column=5, pady=2, sticky=NW) self.edit2_button.grid(row=2,column=5, pady=2, sticky=NW) self.del2_button.grid(row=3,column=5, pady=2, sticky=NW) # self.myContainer1.grid() # Menu self.menu = Menu(self.MyParent) self.MyParent.config(menu=self.menu) self.filemenu = Menu(self.menu) self.menu.add_cascade(label="Datei", menu=self.filemenu) # Main Menu self.filemenu.add_command(label="Neu", command=self.new_thes) self.filemenu.add_command(label="Verbinden", command=self.t1.connect) self.filemenu.add_command(label="Import", command=self.importdatei) self.filemenu.add_command(label="Export", command=self.export) self.filemenu.add_command(label="Schliessen", command=self.exit_prog)
textSubstitutions.append(sentenceSubstitutions) return textSubstitutions def obfuscate(self, text): """Apply the functions in sequence to obfuscate the text""" functions = [ self._tokenise, self._substituteSynonyms, self._untokenise ] temp = text for function in functions: temp = function(temp) return temp def __repr__(self): return "Text obfuscator on thesaurus: '{}', with replace factor: {}".format( self._thesaurus.filename, self._replaceFactor) if __name__ == "__main__": seed(0) thesaurus = Thesaurus(filename="./thesauruses/thesaurusA.pickle") obfuscator = Obfuscator(thesaurus, replaceFactor=1) print(obfuscator) text = "The quick brown fox jumped over the lazy dog!" obfuscatedText = obfuscator.obfuscate(text) print(text) print(obfuscatedText)
("jacoby", "N"), ] testpair = (("kiwis", "N"), ("zealanders", "N")) # simcache=False #whether file currently contains valid sims k = 1000 kdisplay = 10 print(sys.argv) Thesaurus.byblo = byblo # take command line argument as to whether this is a byblo file or not if metric == "cosine": compress = True else: compress = False mythes = Thesaurus(vectorfilename, simcachefile, simcache, windows, k, adja, adjb, compress) mythes.readvectors() # if simcache: # check=True # else: # for wordA in words: # for wordB in words: # mythes.outputsim(wordA,wordB,metric) (word1, word2) = testpair if simcache == False: mythes.outputsim(word1, word2, metric) mythes.allpairssims(metric)
def __init__(self, config=None): self._config = config self._parser = Parse(config) self._indexer = Indexer(config) self._model = Word2Vec() self._model_1 = Thesaurus()
from thesaurus import Thesaurus from sys import exit thes = Thesaurus() print('Type HELP to see a list of commands and instructions.\n') while True: query = input('Enter a word to look it up in the thesaurus:\n') if query.strip() == 'HELP': print('QUIT'.ljust(25), 'Exits the program.') print('REQUIRE <word list(s)>'.ljust(25), 'Shows only results from these specific word list(s).') print('REQUIRE ANY'.ljust(25), 'Does not show words that are not in a word list.') print('RESET SETTINGS'.ljust(25), 'Resets to default settings.') print() print('To add a new word list, create a txt file in the filters directory with each word on a new line. Then restart the program.') elif query.strip() == 'QUIT': exit() elif query.strip().lower() == 'reset settings': thes = Thesaurus() print('Settings have been reset.') elif query.strip() == 'require any': thes = Thesaurus(must_match=True) elif len(query.strip().split()) > 1: # change required word list(s)
def init_logic_translator(self, logic_thesaurus_path): """Initialize logic translator""" self._logic_thesaurus = Thesaurus(logic_thesaurus_path, ["Logic"]) jieba.load_userdict(logic_thesaurus_path)
def init_classification(self, customized_thesaurus_path): self._customized_thesaurus = Thesaurus(customized_thesaurus_path, ["Frequency", "Property"]) jieba.load_userdict(customized_thesaurus_path)
class TokenUtil(object): """Tokenizer would initialize the thesaurus and scan all utf-8 encoded vocabulary on the disk.""" def __init__(self, general_thesaurus_path): """Read the general thesaurus""" jieba.initialize(general_thesaurus_path) def init_classification(self, customized_thesaurus_path): self._customized_thesaurus = Thesaurus(customized_thesaurus_path, ["Frequency", "Property"]) jieba.load_userdict(customized_thesaurus_path) def get_keyword(self, content): seg_list = jieba.cut_for_search(content) customized_words = [] for atoken in seg_list: if atoken in self._customized_thesaurus: customized_words.append(atoken) if len(customized_words) > 0: return {'Type': 'customized', 'Token': customized_words} else: return {'Type': 'general', 'Token': content} def init_logic_translator(self, logic_thesaurus_path): """Initialize logic translator""" self._logic_thesaurus = Thesaurus(logic_thesaurus_path, ["Logic"]) jieba.load_userdict(logic_thesaurus_path) def logic_translate(self, content): """Translate the content to logic expression for Baidu Search Engine""" content = strdecode(content) result_list = [] token_list = jieba.tokenize(content) has_logic = False for token in token_list: if token[0] in self._logic_thesaurus: has_logic = True result_list.append({"Type": self._logic_thesaurus.get_attr(token[0])["Logic"], "Content": token[0]}) else: result_list.append({"Type": "Common", "Content": token[0]}) if not has_logic: return {"Type": "general", "Content": content} translate_finish = False or_list = [] not_list = [] and_list = [] while(not translate_finish and len(result_list) > 0): for index in range(len(result_list)): if result_list[index]["Type"] == "NOT": # 如果是NOT逻辑词 if index < len(result_list) - 1 and result_list[index + 1]["Type"] == "Common": # 如果可以合并语句,则进行合并 not_list.append("-(" + result_list[index + 1]["Content"] + ")") del result_list[index + 1] del result_list[index] break else: # 若不能合并语句,则将逻辑词视为普通词语 result_list[index]["Type"] = "Common" if result_list[index]["Type"] == "AND": # 如果是AND逻辑词 if 0 < index < len(result_list) - 1 and result_list[index + 1]["Type"] == "Common" and\ result_list[index - 1]["Type"] == "Common": and_list.append("(" + result_list[index - 1]["Content"] + " " + result_list[index + 1]["Content"] + ")") del result_list[index + 1] del result_list[index] del result_list[index - 1] break else: # 若不能合并语句,则将逻辑词视为普通词语 result_list[index]["Type"] = "Common" if result_list[index]["Type"] == "OR": # 如果是OR连接词 if 0 < index < len(result_list) - 1 and result_list[index + 1]["Type"] == "Common" and\ result_list[index - 1]["Type"] == "Common": or_list.append("(" + result_list[index - 1]["Content"] + " | " + result_list[index + 1]["Content"] + ")") del result_list[index + 1] del result_list[index] del result_list[index - 1] break else: # 若不能合并语句,则将逻辑词视为普通词语 result_list[index]["Type"] = "Common" if index >= len(result_list) - 1: # 所有逻辑词处理完成 translate_finish = True result_content = " ".join([item["Content"] for item in result_list]) + " " + " ".join(and_list) + " " +\ " ".join(or_list) + " " + " ".join(not_list) if(result_content == ""): return {"Type": "general", "Content": content} return {"Type": "logic", "Content": result_content}
parameters=conf.configure(sys.argv) inputfile="allBLESS-dependencies.json" inputpath=os.path.join(parameters['datadir'],inputfile) print inputpath with open(inputpath,'r') as instream: for line in instream: print line pairs = json.loads(inputpath) cluster0=[] cluster1=[] for (w1,w2,target) in pairs: if target==1: cluster1.append(w2) else: cluster0.append(w2) print len(cluster0), cluster0 print len(cluster1),cluster1 exit() words=["chicken","cricket","jaguar"] pos="N" mythes = Thesaurus("",parameters["simfile"],True,False,parameters["k"],1,1,False) mythes.readsomesims(words) for word in words: mythes.displayneighs((word,pos),100)
''' Created on Dec 4, 2012 @author: juliewe ''' #test thesaurus class from thesaurus import Thesaurus from thesaurus import Entry from thesaurus import Neighbours filename='/Volumes/research/calps/data3/mlcl/DisCo/thesauri/exp4-11c.strings' mythesaurus = Thesaurus("test") # e1 = Entry("cat/N","dog/N",0.8) e2 = Entry("cat/N","ostrich/N",0.5) e3 = Entry("dog/N","ostrich/N",0.6) mythesaurus.addEntry(e1) mythesaurus.addEntry(e2) mythesaurus.addEntry(e3) print "Similarity between cat and ostrich is ", mythesaurus.lookupSim("cat","ostrich") print "Similarity between cat and bird is ", mythesaurus.lookupSim("cat","bird") print "Number of entries:", Entry.entrycount print "Number of neighbour sets:", Neighbours.entrycount
def __init__(self, synonym_file, input_file_1, input_file_2, tuple_size): self.thesaurus = Thesaurus(synonym_file) self.input_file_1 = input_file_1 self.input_file_2 = input_file_2 self.tuple_size = tuple_size
class Data: structured_answers = [ AnswerStructure('Ola, tudo bem?', ['Oi tudo bem', 'Oi', 'Tudo bem']), AnswerStructure( 'Um ser humano adulto possui entre 4 a 6 litros de sangue.', [ 'quantos litros de sangue uma pessoa tem ?', 'qual a quantidade de sangue de uma pessoa adulta ?' ]), AnswerStructure('São retirados 450 mililitros numa doação de sangue.', ['quantos litros de sangue doação ?']), AnswerStructure('Celebre frase de Renè Descartes.', [ ' De quem e a famosa frase “ Penso , logo existo ” ?', 'famosa frase “ Penso , logo existo ” ?', 'frase “ Penso , logo existo ” ?', 'Penso , logo existo', 'Penso logo existo' ]), AnswerStructure('O chuveiro elétrico foi inventado no Brasil.', [ 'De onde é a invenção do chuveiro elétrico ?', 'invenção do chuveiro elétrico ?', 'onde foi inventado o chuveiro elétrico ?', 'que país inventou o chuveiro elétrico ?' ]), AnswerStructure( 'Quem inventou o chuveiro elétrico foi o brasileiro Francisco Canho.', [ 'Quem inventou o chuveiro elétrico ?', 'Que pessoa inventou o chuveiro elétrico ?' ]), AnswerStructure( 'Vaticano e Russia são o menor e o maior país do mundo, respectivamente.', [ 'Qual o menor e o maior país do mundo ?', 'menor e o maior país do mundo ?', 'menor e o maior país', 'qual menor e o maior país ?', 'país', 'países' ]), AnswerStructure('Vaticano é o menor país do mundo.', [ 'Qual o menor país do mundo ?', 'menor país do mundo', 'menor país', 'Qual o menor país ?' ]), AnswerStructure('Russia é o maior país do mundo.', [ 'Qual o maior país do mundo ?', 'maior país do mundo ?', 'maior país', 'Qual o maior país ?' ]), AnswerStructure('João Goulart.', [ 'Qual o nome do presidente do Brasil que ficou conhecido como Jango ?', 'Qual o nome do presidente Jango ?', 'nome do presidente Jango ?', 'Jango ?', 'presidente conhecido como Jango' ]), AnswerStructure( 'A velocidade da luz é de 299 792 458 metros por segundo.', ['velocidade da luz em m/s', 'qual a velocidade da luz em m/s ?']), AnswerStructure('A velocidade da luz é de 300.000 Km/s.', [ 'velocidade da luz em km/s', 'qual a velocidade da luz', 'velocidade da luz', 'qual a velocidade da luz em km/s ?' ]), AnswerStructure('42.', [ '6x9', '6 x 9', 'qual a resposta para a vida , o universo e tudo mais ?', 'vida universo e tudo mais', 'resposta para a vida', 'resposta para o universo' ]), ] thesaurus = Thesaurus([ Synonym(['voce', 'vc', 'oce', 'ce', 'vs', 'vossa senhoria']), Synonym([ 'tudo bem', 'Tudo bem com voce', 'Tudo bom', 'Sussa', 'De boas', 'De boa', 'Suave', 'Beleza', 'Blz', 'Firmeza', 'Fmz', 'Como vai', 'Como está', 'Na boa' ]), Synonym(['Ola', 'Oi', 'E ai', 'Dae']), Synonym(['sair', 'fim', 'terminar', 'tchau', 'xau', 'vlw flw', 'flw']), Synonym(['maior', 'mais grande', 'gigante', 'grande']), Synonym(['menor', 'mais pequeno', 'pequeno']), Synonym(['doacao', 'doar', 'doação', 'doaçao', 'doacão']), Synonym([ 'de onde é', 'de onde e', 'onde', 'em que país', 'em que pais', 'em qual pais', 'em qual país', 'em que lugar' ]), Synonym(['a gente', 'nós']), Synonym(['pais', 'país']), Synonym(['paises', 'países']), Synonym([ 'invencao', 'invenção', 'invencão', 'invençao', 'criação', 'criacao' ]), Synonym(['inventor', 'criador']), Synonym(['é', 'e']), Synonym(['qual a', 'qual é a', 'qual e a']), Synonym(['tem', 'possui']), Synonym(['ter', 'possuir']), Synonym([ 'quantidade', 'quantos', 'qual a quantidade', 'qual quantidade', 'quanto' ]), Synonym(['elétrico', 'eletrico']), Synonym(['pessoa', 'ser humano adulto', 'pessoa adulta', 'adulto']), Synonym([ 'que pessoa', 'qual pessoa', 'quem', 'qual foi a pessoa', 'quem foi que' ]), Synonym([ 'metros por segundo', 'm/s', 'metros/s', 'm/sec', 'metros/sec', 'mt/s' ]), Synonym(['quilometros por segundo', 'km/s', 'kilometros por segundo']), ]) words_to_be_ignored = frozenset([ 'da', 'de', 'do', 'a', 'o', 'um', 'uma', 'é', 'para', 'pra', 'com', 'sem', '?', '.', ',' ])