def run_news_through_filter(news): print('Filtering news...') filtered = [] news = [random.choice(news)] for item in news: headline = item['title'].split() altered_sentence = [] for word in headline: w = Word(word) synonym = w.synonyms(relevance=1) if len(synonym) == 0: word_to_use = word else: word_to_use = random.choice(synonym) altered_sentence.append(word_to_use) altered_headline = ' '.join(word for word in altered_sentence) tmp = {'altered_title': altered_headline, 'url': item['url']} filtered.append(tmp) return filtered[0]
def get_word_to_synonyms_dict(self, n, text, tokenized_text): word_to_syns_dict = {} word_to_pos = self.get_important_words(tokenized_text) for w in tokenized_text: if w in word_to_pos: list_of_syns_for_w = [] original_synset = lesk(text, w) if original_synset: word = Word(w) p_o_s = pos_dict_thesaurus[word_to_pos[w]] syns = word.synonyms('all', partOfSpeech=p_o_s) flat_list = [item for sublist in syns for item in sublist] for candidate_syn in flat_list: candidate_synsets = wordnet.synsets(candidate_syn, pos=pos_dict[word_to_pos[w]]) if len(candidate_synsets) > 0: list_sims = [original_synset.wup_similarity(x) for x in candidate_synsets if original_synset.wup_similarity(x)] if len(list_sims) > 0: maxSim = max(list_sims) list_of_syns_for_w.append((candidate_syn, maxSim)) if list_of_syns_for_w: list_of_syns_for_w.sort(key=lambda x: x[1], reverse=True) n_truncate = n if n <= len(list_of_syns_for_w) else len(list_of_syns_for_w) word_to_syns_dict[(w, word_to_pos[w])] = list_of_syns_for_w[:n_truncate] return word_to_syns_dict
def parallel(inputs): (word, word_Dictionary, count, chosen_index) = inputs new_instance = Word(word) synonyms = new_instance.synonyms('all', relevance=[3], partOfSpeech=part) response = findWordInDataset(word_Dictionary, synonyms, word, count) if (response == False): synonyms = new_instance.synonyms('all', relevance=[2], partOfSpeech=part) response = findWordInDataset(word_Dictionary, synonyms, word, count) if (response == False): synonyms = new_instance.synonyms('all', relevance=[1], partOfSpeech=part) response = findWordInDataset(word_Dictionary, synonyms, word, count) return (chosen_index, response)
def cli(word): rWord = Word(word) synonyms = rWord.synonyms() if not synonyms: click.echo("No results.") return for idx, synonym in enumerate(synonyms): click.echo("{0}. {1}".format(idx + 1, synonym))
def find_synonym(word): if word in common: return common[word] w = Word(word) syns = w.synonyms() for syn in syns: if not has5(syn): return syn return word.replace('e', '-')
def get_synonyms(words_dict): word_syns = {} for w in tqdm.tqdm(words_dict): word = Word(w) try: syns = word.synonyms(relevance=3) except: syns = None if syns is not None: word_syns[w] = syns return word_syns
def _read(filename): with open('datsets/categories/%s' % (filename,), 'r') as fp: data = fp.read() words = [s.strip() for s in data.splitlines()] categories[filename] = set(words) for word in words: if word in ["what", "why", "analyse"]: continue w = Word(word) categories[filename].update(w.synonyms()) categories[filename] = list(categories[filename])
def get_syn_classes(words): syns = [] for i in range(len(words)): try: w = Word(words[i]) syn = w.synonyms('all') for j in range(len(syn)): syns = syns + syn[j] except: pass words = words + syns return words
def get_syn(words): syns = [] for i in range(len(words)): try: w = Word(words[i]) syn = w.synonyms('all') for j in range(len(syn)): syns = syns + syn[j] except: #print(words[i],' not found') pass words = words + syns return words
def generate_synonyms_dict(words_list): synonyms_dict = dict() for chars in words_list: try: w = Word(chars) syns = w.synonyms( relevance=3, form='common', ) synonyms_dict[chars] = syns except: synonyms_dict[chars] = [ chars, ] return synonyms_dict
def new_headline(headline): head_list = headline.split() new_head = [] for word in head_list: if word in STOP: new_head.append(word.upper()) continue w = Word(word) syn = w.synonyms() if syn: new = syn[0] new_head.append(new.upper()) else: new_head.append(word.upper()) new_headline_string = (' ').join(new_head) return new_headline_string
def get_synonyms(word, src="wordnet"): synonyms = set() if src == "wordnet": for ss in wn.synsets(word): synonyms |= set(ss.lemma_names()) elif src == "thesaurus": try: w = Word(word) except: return synonyms try: syn = w.synonyms(relevance=[2, 3]) except: return synonyms for s in syn: if len(s.split(' ')) == 1: synonyms.add(s.lower()) return synonyms
def listConcept(word): w = Word(word) iE = inflect.engine() pS = PorterStemmer() wordSyns = w.synonyms() myList1 = [] myList1.append(word) for w in wordSyns: myList1.append(w) myList2 = myList1.copy() for w in myList2: pW = iE.plural(w) myList1.append(pW) myList3 = myList1.copy() for w in myList3: myList1.append(pS.stem(w)) myList1 = list(dict.fromkeys(myList1)) return myList1
def anotherword(response): #Some rudimentary synonyms and antonyms. choice=input('Enter your choice: \n 1.Synonyms.\n2.Antonyms\n') if(int(choice)>2): print("Invalid Choice") exit() word=input("Enter the word:") w = Word(word) what='' if('1') in choice: temp=w.synonyms() what='Synonyms' elif('2') in choice: temp=w.antonyms() what='Antonyms' print('Showing %s of %s'%(what,word)) for t in temp: print(t)
def redact_concept(data, concept): from thesaurus import Word w = Word(concept) w1 = [] for i in w.synonyms(): w1.append(i.lower()) w1.append(concept) concept1 = '' #sent=data.split('\n') for i in nltk.word_tokenize(data): if i.lower() in w1: stats.append([i, len(i), 'Concept']) concept1 += '█' * len(i) concept1 += ' ' elif i == '.': concept1 += i concept1 += '' else: concept1 += i concept1 += ' ' return concept1
def reda_concept(data3, filename, con): tokens = nltk.word_tokenize(data3) w = Word(con) concept = w.synonyms() concept.append(con) for i in concept: for j in range(len(tokens)): if i.lower() == tokens[j].lower(): stats.append( [tokens[j], len(tokens[j]), filename, 'type:Concept']) tokens[j] = '█' * len(i) reda = '' for i in tokens: if i in ['.', ',', ':', ';', '"', '?', '!', '(', ')']: reda = reda + i else: reda = reda + i + ' ' return (data3)
def getSynonyms(word): syns = set() result = vb.synonym(word) if isinstance(result, list): result = json.loads(result) syns.update([synonym['text'] for synonym in result]) result = [] for syn in wordnet.synsets(word): for l in syn.lemmas(): syns.add(l.name()) w = Word(word) syns.update(w.synonyms()) if not syns: apiKey = os.getenv('BIG_HUGE') result = pyhugeconnector.thesaurus_entry(word=word, api_key=apiKey, pos_tag='n', ngram=2, relationship_type='syn') if isinstance(result, list): syns.update(result) return syns
def process_words(self): """ Sort words, look for duplicates, then get synonyms and write to output files. Looking for duplicates here because that is an indication that multiple sources think the term is appropriate for the given grade level. """ # print(self.words) # get duplicate words - the words we want to use words = self.words duplicates = list(set([x for x in words if words.count(x) > 1])) duplicates.sort() """ try a different way """ for word in duplicates: try: w = Word(word) except MisspellingError: continue else: # There are 3 relevance levels you can use, 1 will give the set with the most words # and possibly some irrelevant words. Here we use 3 to make sure everything stays on topic. syns = w.synonyms(relevance=3) if syns: for s in syns: self.outputFiles[1].write(word + " " + s + "\n") ants = w.antonyms(relevance=3) if ants: for a in ants: self.outputFiles[2].write(word + " " + a + "\n") if syns or ants: self.outputFiles[0].write(word + " " + self.gradeLevel + "\n")
def get_synonyms(self, input_word): w = Word(input_word) print(wordnet.synsets('dog')) print(wordnet.synsets('test')) return w.synonyms()
# from PyDictionary import PyDictionary # # dictionary=PyDictionary() # # # print (dictionary.synonym("Life")) from nltk.corpus import wordnet synonyms = [] for syn in wordnet.synsets("goal"): for lm in syn.lemmas(): synonyms.append(lm.name()) print(set(synonyms)) from thesaurus import Word w = Word(purpose) print(w.synonyms())
''' 28-Get synonyms from Thesaurus You may need to pip install thesaurus ''' from thesaurus import Word USERS_WORD = Word('humbug') print(USERS_WORD.synonyms(1))
def synonyms(): """QUERY""" word = request.args.get("word").lower() if word not in wordPath_list: wordPath_list.append(word) # To create word path original_word = wordPath_list[0] """GET SYNONYMS""" # Get result try: results = Word(word) except exceptions.MisspellingError as msplt: session.clear() # return render_template("errorRedirect.html", error=msplt) session['mspltError'] = str(msplt) return redirect(f'/related?word={word}') except exceptions.WordNotFoundError as wnf: session.clear() session['wnf'] = str(wnf) return redirect(f'/related?word={word}') results = Word(word) resultData = results.data # session['word'] = word # To reuse in "/result" # Constructing parameters: part of speech and definitions numberOfOptions = len(results) partsOfSpeech = [ item['partOfSpeech'].strip('.') for item in resultData ] #.strip because for example, in html, div id = "adj.1" will not function. it is supposed to be adj1 definitions = [item['meaning'] for item in resultData] # Constructing parameters: synonyms for each part of speech and definitions synList = results.synonyms('all') flat_synList = [ item for sublist in synList for item in sublist ] # Nothing to do with this part. This is for the next part GET RELATED WORDS! """GET RELATED WORDS""" # Result from helpers Related_words_data = RelatedWords(word) # Filter results: No overlapping with Synonyms, restricted to 10 words, with score rounded Related_words_data = [ item for item in Related_words_data if item['word'] not in flat_synList ] # Avoid overlap with Synonyms Related_words_data = Related_words_data[: 10] # Trimming to avoid irrelevant results # Rounding the 'score' values in the above dict results Related_words_data for item in Related_words_data: item['score'] = round(item['score'], 2) """GET RELATED CONCEPTS""" concepts = Relations("_".join( word.split())) # For example, "eat mice " becomes "eat_mice" # Process data: Get the lists of relations id and relations label relations_list_id = [] for item in concepts: if item['rel']['@id'] not in relations_list_id: relations_list_id.append(item['rel']['@id']) relations_list_label = [] for item in concepts: if item['rel']['label'] not in relations_list_label: relations_list_label.append(item['rel']['label']) # Rounding the 'weight' values to display better in html for item in concepts: item['weight'] = round(item['weight'], 2) # Change the labels to normal (eg. RelatedTo to "is related to") for easier printing out in html label_dict = { 'RelatedTo': 'is related to', 'ExternalURL': 'can be linked to other sites', 'FormOf': 'is a form of', 'IsA': 'is a', 'PartOf': 'is a part of', 'HasA': 'has', 'UsedFor': 'is used for', 'CapableOf': 'is capable of', 'AtLocation': 'can be found in', 'Causes': 'can lead to', 'HasSubevent': 'is followed by', 'HasFirstSubevent': 'starts with', 'HasLastSubevent': 'ends with', 'HasPrerequisite': 'requires', 'HasProperty': 'has property or can be described as', 'MotivatedByGoal': 'in order to', 'ObstructedBy': 'is prevented by', 'Desires': 'typically wants', 'CreatedBy': 'is created by', 'Synonym': 'has similar meaning with', 'Antonym': 'has opposite meaning with', 'DistinctFrom': 'is distinct from', 'SymbolOf': 'is a symbol of', 'DefinedAs': 'can be defined or explained as', 'Entails': 'entails', 'MannerOf': 'is a way of', 'LocatedNear': 'can be found near', 'HasContext': 'is often used in', 'SimilarTo': 'is similar to', 'EtymologicallyRelatedTo': 'has common origin with', 'EtymologicallyDerivedFrom': 'is derived from', 'CausesDesire': 'makes someone want', 'MadeOf': 'is made of', 'ReceivesAction': 'can be', 'InstanceOf': 'is an example of', 'NotDesires': 'typically not want', 'DerivedFrom': 'is derived from' } # Build a dictionary and use it to look up relation labels # Create a new list as a copy of label lists to store real relation list labels, eg 'RelatedTo' to 'is related to' real_relations_list_label_names = relations_list_label.copy() for i in range(len( relations_list_label)): # Populate the list of real relation names if relations_list_label[i] in label_dict.keys(): real_relations_list_label_names[i] = label_dict[ relations_list_label[i]] # List of start node start_node_list = [] for item in concepts: if item['start']['label'] not in start_node_list: start_node_list.append(item['start']['label']) # List for each relation concept_network = {} for item in relations_list_label: concept_network[item] = [ ] # Initiate a list as value for each key/relation for item in relations_list_label: for edge in concepts: if item == edge['rel']['label']: obj_to_append = dict( (i, edge[i]) for i in ('start', 'end', 'rel', 'weight')) # Only take the important stuff concept_network[item].append(obj_to_append) # Problem: sometimes the word queried is 'start', other times it is 'end' word_start_or_end = {} for i in range(len(relations_list_label)): for j in range(len(concept_network[relations_list_label[i]])): if word in concept_network[ relations_list_label[i]][j]['end']['label']: word_start_or_end[relations_list_label[i]] = 'end' else: word_start_or_end[relations_list_label[i]] = 'start' # Return return render_template("results.html", partsOfSpeech=partsOfSpeech, definitions=definitions, synList=synList, numberOfOptions=numberOfOptions, \ resultData=resultData, Related_words_data=Related_words_data, relations_list_label=relations_list_label, concept_network=concept_network, \ real_relations_list_label_names=real_relations_list_label_names, word_start_or_end=word_start_or_end, original_word=original_word, word=word, wordPath_list=wordPath_list)
def generate_output_text(self): text = self.text output_text = text # print(output_text) words = text.split() counts = {} for word in words: if word not in counts: counts[word] = 0 counts[word] += 1 one_word = [] for key, value in counts.items(): if value == 1 and key.isalpha() and len(key) > 2: one_word.append(key) noun = [] verb = [] nlp = spacy.load('en_core_web_sm') doc = nlp(u'' + ' '.join(one_word)) for token in doc: if token.pos_ == 'VERB': verb.append(token.text) if token.pos_ == 'NOUN': noun.append(token.text) # print('\n verb: \n') # print(str(len(verb))) # print(verb) # print('\n noun: \n') # print(str(len(noun))) # print(noun) all_main = verb + noun len_all = len(noun) + len(verb) final_value = int(len_all * self.percent / 100) random.seed(4) temp = random.sample(range(0, len_all), final_value) for i in temp: try: word_str = all_main[i] w = Word(word_str) a1 = list(w.synonyms()) # print('\n'+str(i)+'\n') # print(word_str) # print(a1) if i < len(verb): change_word = self.synalter_Noun_Verb(word_str, a1, 'v') try: search_word = re.search(r'\b(' + word_str + r')\b', output_text) Loc = search_word.start() output_text = output_text[:int( Loc)] + change_word + output_text[int(Loc) + len(word_str):] except: f = 0 else: change_word = self.synalter_Noun_Verb(word_str, a1, 'n') try: search_word = re.search(r'\b(' + word_str + r')\b', output_text) Loc = search_word.start() output_text = output_text[:int( Loc)] + change_word + output_text[int(Loc) + len(word_str):] except: f = 0 except: f = 0 return output_text
for _, records in groupby(sorted(lst, key=keyprop), keyprop) ] a = [{'time': '25 APR', 'total': 10, 'high': 10}, {'time': '26 APR', 'total': 5, 'high': 5}] b = [{'time': '24 APR', 'total': 10, 'high': 10}, {'time': '26 APR', 'total': 15, 'high': 5}] merger = merge_list_of_records_by('time', add) hasil_merge = merger(a+b) print(hasil_merge) print("sinonim with thesaurus==================================================================") # from PyDictionary import PyDictionary # # dictionary = PyDictionary() # print(dictionary.synonym("good")) from thesaurus import Word w = Word('suicidal') syn = w.synonyms() print(syn) sn = SenticNet() try: concept_info_sinonim = sn.concept("suicidal") print(concept_info_sinonim) except Exception as e: print(e)
class Synonym(Text): #class untuk mmengeluarkan synonym kata part_of_speech = ("adj","adv","contradiction","conj","determiner","interj","noun","prefix","prep","pron","verb","abb","phrase","article") def __init__(self,master,width,height,word,column,columnspan,row,partspeech): #inisiasi class self.base_word=Word(word) self.master = master self.width = width self.height = height self.column = column self.row = row self.columnspan = columnspan self.partspeech = partspeech def Search(self): #fungsi mencari synonym kata if check_internet("http://google.com") : #cek koneksi internet temp_str = "" if self.partspeech != 14: #pengecekan tipe part of speech, bukan 14 menandakan tidak "all" if FrontApp.condition_synonym != "all" : list_synonym = self.base_word.synonyms('all',form=FrontApp.condition_synonym,partOfSpeech=Synonym.part_of_speech[self.partspeech],allowEmpty=False) else : list_synonym = self.base_word.synonyms('all',partOfSpeech=Synonym.part_of_speech[self.partspeech],allowEmpty=False) if list_synonym != []: #pengecekan ketersediaan hasil Text.__init__(self,self.master,width=self.width,height=self.height) self.grid(row=self.row,column=self.column,columnspan=self.columnspan) self.config(state=NORMAL) self.delete("1.0",END) for k in list_synonym: for j in k: temp_str += j+"\n" temp_str += "\n" self.insert("1.0",temp_str) self.config(state=DISABLED) temp_str="" else : Text.__init__(self,self.master,width=self.width,height=self.height) self.grid(row=self.row,column=self.column,columnspan=self.columnspan) self.config(state=NORMAL) self.delete("1.0",END) self.insert("1.0","None") self.config(state=DISABLED) else : if FrontApp.condition_synonym == "all": list_synonym = self.base_word.synonyms('all',allowEmpty=False) else : list_synonym = self.base_word.synonyms('all',form=FrontApp.condition_synonym,allowEmpty=False) if list_synonym != [] : Text.__init__(self,self.master,width=self.width,height=self.height) self.grid(row=self.row,column=self.column,columnspan=self.columnspan) self.config(state=NORMAL) self.delete("1.0",END) for k in list_synonym: for j in k: temp_str += j+"\n" temp_str += "\n" self.insert("1.0",temp_str) self.config(state=DISABLED) temp_str="" else : Text.__init__(self,self.master,width=self.width,height=self.height) self.grid(row=self.row,column=self.column,columnspan=self.columnspan) self.config(state=NORMAL) self.delete("1.0",END) self.insert("1.0","None") self.config(state=DISABLED) else : showerror(message="No Connection",title="Connection Error")
def get_synonyms(word): my_word = Word(word) return my_word.synonyms()
def find_similar_relations_using_thesaurus(word): w = Word(word) return w.synonyms()
def tf(self, p): ca = TrueFalsePopup() ca.open() # SYSTEM WILL DECIDE RANDOMLY TO PRINT FALSE OR TRUE m = random.randint(0, 1) print(m) global TFAnswer base_path = os.path.dirname(os.path.realpath(__file__)) xml_file = os.path.join(base_path, "definition.xml") tree = ET.parse(xml_file) root = tree.getroot() for Def in root.findall("Def"): module = Def.find('tags').text if module == p: j = Def.find('description').text print(j) definition = j # PRINT QUESTION if m == 1: ca.ids.L.text = definition TFAnswer = 0 elif m == 0: # BREAK THE DEFINITION INTO INDIVIDUAL WORDS BreakDefinition = word_tokenize(definition) # print(BreakDefinition) # LABEL EACH WORD WITH ITS CORRESPONDING PART OF SPEECH TAG grammar = nltk.pos_tag(BreakDefinition) # print(grammar) # CREATE A LIST OF WORDS THAT HAS THE NN TAG is_noun = lambda pos: pos[:2] == 'NN' list_of_nn = [word for (word, pos) in grammar if is_noun(pos)] # shuffle = random.shuffle(list_of_nn) # print(list_of_nn) # IF THE LIST IS MORE THAN 3, TAKE ONLY 3 if len(list_of_nn) >= 3: choice = random.sample(list_of_nn, 3) else: choice = list_of_nn print(choice) # FOR EACH OF THE WORD GET AN ALTERNATIVE WORD z = [] for i in choice: print(i) thesaurus = Word(i) syn = thesaurus.synonyms() print(syn) if len(syn) < 1: syn = i c = random.choice(syn) z.append(c) print(z) # REWRITE THE DEFINITION WITH THE ALTERNATIVE WORD a = 0 for i in choice: j = BreakDefinition.index(i) BreakDefinition[j] = z[a] a = a + 1 FalseStatement = " ".join(BreakDefinition) print(FalseStatement) la = FalseStatement ca.ids.L.text = la TFAnswer = 1
arg_index += 1 # clean query if parameters.case_folding: query = query.lower() query = re.sub(r'[^ a-zA-Z0-9]', ' ', query) query = re.sub(r'\s+', ' ', query) query_words = query.split(' ') print "before", query_words, "\n" if parameters.thesaurus: synonyms = [] for term in query_words: if term != "": word = Word(term) synonyms += word.synonyms() if term in synonyms: synonyms.remove(term) query_words += synonyms print "After", query_words, "\n" # create accumulators and other data structures accum = {} filenames = [] p = porter.PorterStemmer() # get N f = open(collection + "_index_N", "r") N = eval(f.read()) f.close() # get document lengths/titles
# reading csv file with open(filename, 'r') as csvfile: # creating a csv reader object csvreader = csv.reader(csvfile) # extracting field names through first row # extracting each data row one by one for row in csvreader: row = row[4:len(row)] for ele in row: for x in ele.split(): if x not in words.keys(): words[x] = [x] w = Word(x) syno = w.synonyms() print(syno) if syno is not None: for s in syno: words[x].append(s) print(words) with open(newfile, "w") as f: for values in words.values(): st = '' if len(values) < 8: for ele in values: st += ele + ',' else: for ele in values[0:9]: