def assembleEntry(y): glosses = [] examples = [] etymologies = [] quotations = [] pronunciations = [] pronunciation_entries = set(); partsOfSpeech = [] partsOfSpeechHeads = [] etymology_entries = set(); synonyms = [] word_forms = [] # Preprocessing for entry in y.get('entries', []): # Parts of speech psos = entry.get('partsOfSpeech') or [] try: psos = map(lambda x: x.replace('proper_noun', 'proper noun'), psos) except: print(repr(psos)) print(y['title']) raise if psos: partsOfSpeech.append(u"<B>" + u" ,".join(psos) + u"</B>") partsOfSpeechHeads.append(psos[0]) else: partsOfSpeech.append("") partsOfSpeechHeads.append("") # Word forms elems = [] for wf in entry.get('wordForms') or []: form = wf.get('form') if form: elems.append(form) word_forms.append(elems) # Synonyms synonyms.append(clean_synonyms(entry.get('synonyms', []))) # Pronunciations elems = [] elem = "" # print(entry.get('pronunciations', [])) for pronunciation in entry.get('pronunciations', []): text = pronunciation.get('text') if text: if text not in pronunciation_entries: pronunciation_entries.add(text) elem += text note = pronunciation.get('note') if note: elem += " (" + note + ")" elems.append(elem) elem = "" pronunciations.append(", ".join(elems)) # print(repr(pronunciations[-1])) # Senses gloss_entry = [] example_entry = [] quote_entry = [] for sense in entry.get('senses') or []: gloss_entry.append(stripHtml(sense.get('gloss', ''))) example_entry.append([ replace_newlines(stripHtml(example.get('example', ''))) for example in sense.get('examples', [])]) quote_entry.append([ replace_newlines(stripHtml(quote.get('quote', ''))) for quote in sense.get('quotations', [])]) glosses.append(gloss_entry) examples.append(example_entry) quotations.append(quote_entry) etymology_text = stripHtml(entry.get('etymology', '')) if etymology_text not in etymology_entries: etymology_entries.add(etymology_text) etymologies.append(etymology_text) else: etymologies.append('') # Assemble string # Title s = u"" # s += y['title'] + "\t" # Pronunciations entry_pronuncs = False # pronunciations_filtered = [text for entry in pronunciations for text in entry] pronunciations_filtered = list(filter(None, pronunciations)) if len(pronunciations_filtered) == 1: s += u" " + pronunciations_filtered[0] + "<BR>" else: entry_pronuncs = True # Entries & glosses single_entry = len(glosses) == 1 for (entry_num, entry_glosses) in enumerate(glosses, 1): if entry_num >= 2: s += "<BR>" if not single_entry: s +=u"{0}. ".format(roman.int_to_roman(entry_num)) if entry_pronuncs: s += prep_string(pronunciations[entry_num - 1]) s += partsOfSpeech[entry_num - 1] # Handle word forms pos = partsOfSpeechHeads[entry_num - 1] word = y['title'] if pos == "verb": p = en.conjugate(word, 'p') pp = en.conjugate(word, 'ppart') if p != word + 'ed' or pp != word + 'ed': s += u" (p. " + p + u", pp. " + pp + u")" elif pos == "noun": pl = en.pluralize(word) if pl != word + u's': s += u" (pl. " + pl + ")" elif pos == "adjective": pass # Glosses single_gloss = len(entry_glosses) == 1 for (gloss_num, gloss) in enumerate(entry_glosses, 1): if not single_gloss: s += u" {0:d}.".format(gloss_num) # else: # s += u":" s += u" {0}".format(gloss) s += prep_string(", ".join(synonyms[entry_num - 1]) + u"." if synonyms[entry_num - 1] else "", " Synonyms: ") # s += prep_string(etymologies[entry_num - 1], u" Etymology: ") # Etymologies etymologies_filtered = [etym for etym in etymologies if etym] if etymologies_filtered: s += '<BR><BR><B>Etymology:</B>' if len(etymologies_filtered) == 1: s += etymologies_filtered[0] else: for i in range(0, len(glosses)): if etymologies[i]: s += u" {0}. {1}".format(roman.int_to_roman(i + 1), etymologies[i]) # Examples and Quotes examples_flat = [example for entry in examples for examples in entry for example in examples if example] if examples_flat: s += u"<BR><BR><B>Examples:</B>" for (num_example, example) in enumerate(examples_flat, 1): if len(examples_flat) == 1: s += " " + example else: s += u" {0:d}. {1}".format(num_example, example) quotes_flat = [quote for entry in quotations for quotes in entry for quote in quotes if quote] if quotes_flat: s += u"<BR><BR><B>Quotations:</B>" for (num_quote, quote) in enumerate(quotes_flat, 1): if len(quotes_flat) == 1: s += u" " + quote else: s += u" {0:d}. {1}".format(num_quote, quote) s = escape_characters(s) word_forms_flat = [form for entry in word_forms for form in entry if form] titles = [y['title']] titles.extend(word_forms_flat) if 'verb' in partsOfSpeechHeads: titles.extend(en.lexeme(y['title'])) if 'noun' in partsOfSpeechHeads: titles.append(en.pluralize(y['title'])) if 'adjective' in partsOfSpeechHeads: adj_forms = [en.comparative(y['title']), en.superlative(y['title'])] adj_forms = [form for form in adj_forms if len(form.split(' ')) == 1] titles.extend(adj_forms) titles = unique(titles) if s.strip() == "": s = "Empty article." s = u'|'.join(titles) + u"\n" + s.strip() # return escape_characters(contract_tabs(s)) return s
def article_checker(l, index): if index + 2 < len(l): t = [l[index].lower(), l[index + 1].lower(), l[index + 2].lower()] else: return [] suggestions = [] # trigrams=ngrams(var,3) wnl = WordNetLemmatizer() # print(index,t) if (t[0] in Articles or t[1] in Articles): if t[0] in Articles and superlative(wnl.lemmatize(t[1])) == t[1]: l = "the " + t[1] + " " + t[2] suggestions = suggestions + [l] elif t[1] in Articles and superlative(wnl.lemmatize(t[2])) == t[2]: l = t[0] + " the " + t[2] suggestions = suggestions + [l] else: st = " ".join(t) s = query(st, 1) # if(len(s)==0): suggest = processArt(t) # print(suggest) # print(suggest) final = query(suggest, 6) suggestions = suggestions + list( dict(filter(lambda elem: elem[1] >= 0.2, final.items())).keys()) suggestions = list(filter(lambda elem: elem.split(" ") != t, suggestions)) return suggestions
def superlative(self, word): ''' Given a base-form word (Adj), return back a superlative form Args: word (str): base-form adj Raises: ValueError: [description] ValueError: [description] Returns: str: superlative form ''' if word in self._word2index: return superlative(word) else: try: base_form_word = lemma(word) if base_form_word in self._word2index: return superlative(base_form_word) else: raise ValueError( "Found the base-form for '{}': '{}'. But even the base-form not in vocabulary" .format(word, base_form_word)) except: raise ValueError( "Can not found base-form for '{}'".format(word))
def find_adjective_form(original_form, original_lemma, new_lemma): """ Figure out whether adjective was in basic, comparative, or superlative form, then apply that to new_lemma """ if original_form == comparative(original_lemma): return comparative(new_lemma) elif original_form == superlative(original_lemma): return superlative(new_lemma) else: return new_lemma
def pos_all(word): rlist = [] _rtense = ('infinitive', 'present', 'past', 'future') _rperson = (1, 2, 3) _rnumber = ('singular', 'plural') _rmood = ('indicative', 'imperitive', 'conditional', 'subjuntive') _raspect = ('imperfective', 'perfective', 'progressive') for rtense in _rtense: for rperson in _rperson: for rnumber in _rnumber: for rmood in _rmood: for raspect in _raspect: item = conjugate(word, tense=rtense, person=rperson, number=rnumber, mood=rmood, aspect=raspect, negated=False) if item not in rlist: rlist.append(item) print bcolors.Magenta + "All pos of " + word print_list(rlist, 4) print "Singluar : " + singularize( word) + " Plural : " + pluralize(word) print "Comparative : " + comparative( word) + " Superlative : " + superlative(word)
def acceptPatterns(): original_content = request.form['drunk_text'] text_content_array = original_content.split(' ') text_content = '' for s in text_content_array: text_content += superlative(s) + ' ' s = parsetree(original_content, relations=True, lemmata=True) return repr(s)
def check_pos(pos_tag, word): if pos_tag == 'NN': add(pluralize(word), word, False, "plural") elif pos_tag == 'VB': for lex in lexeme(word): add(lex, word, False, "conjugation") elif pos_tag == 'JJ': comp = comparative(word) add(comp, word, False, "comparative") sup = superlative(word) add(sup, word, False, "superlative")
def reconjugate(syn, tok): tag = tok.tag_ if tag in tag_map: return conjugate(syn, tag_map[tag], parse=True) if tag == 'JJR' or tag == 'RBR': return comparative(syn) if tag == 'JJS' or tag == 'RBS': return superlative(syn) if tag.startswith('N') and tag.endswith('S'): return pluralize(syn) # do nothing return syn
def morphological_error(target, response): if response == pluralize(target): #Checks for the plural form of the target. return True if response == comparative(target) : #Checks for the comparative form of the target. return True if response == superlative(target): #Checks for the superlative form of the target. return True if lemma(target) == lemma(response): #Check to see if the target and response share a lemma. return True return False
def adjectify(self, bottish, english, dictionary): comp = "foo" sup = "far" for definition in dictionary: # if english word is the comparative form of an existing word if english == comparative(definition["english"]): bottish = definition["bottish"] + comp return bottish # or is the superlative form of an existing word elif english == superlative(definition["english"]): bottish = definition["bottish"] + sup return bottish # or is the normal form of an existing comparative form elif comparative(english) == definition["english"]: bottish = definition["bottish"].rstrip(comp) return bottish # or is the normal form of an existing superlative form elif superlative(english) == definition["english"]: bottish = definition["bottish"].rstrip(sup) return bottish return bottish
def tryPOS(word, p, target): if target in p and target not in ['RB', 'DT', 'RP']: if target == 'PRP' or target == 'WP': d = WPD if target == 'PRP': d = PRPD for k in d: if d[k] == word: return k return None return wn.morphy(word) #else if target == 'PRP$' and p == 'PRP': return PRPD.get(word) if target == 'WP$': return WPD.get(word) if p == 'NN': if target == 'NNP': return word else: return pluralize(word) if p == 'NNP': return pluralize(word) if 'VB' in p: t = '' if target == 'VBD': t = PAST if target == 'VBP': t = INFINITIVE if target == 'VBZ': t = PRESENT if target == 'VBN': t = PAST + PARTICIPLE if target == 'VBG': t = PARTICIPLE if t: return conjugate(word, tense=t) ret = '' if target == 'JJR' or target == 'RBR': ret = comparative(word) if target == 'JJS' or target == 'RBS': ret = superlative(word) if not ret or ' ' in ret: return None #default else: return ret
def inflected_forms(self, syn, desc): try: word, pos, _ = desc if pos == 'Verb': from pattern.en import lexeme return [w for w in reversed(lexeme(word)) if w != word] elif pos == 'Noun': from pattern.en import pluralize return [pluralize(word)] elif pos == 'Adjective': from pattern.en import comparative, superlative return [comparative(word), superlative(word)] else: return [] except ImportError: raise MessageException('General', 'unavailable', 'WordData[_, "InflectedForms"]', 'pattern')
def generate_ace_adj_rules(self): # - adj_statement_list = [] # - adj_comp_statement_list = [] # - adj_sup_statement_list = [] # - adv_statement_list = [] for adj in self.adjectives(): bare_word = adj comparative_word = comparative(adj) if len(word_tokenize(comparative_word)) > 1: comparative_word = None superlative_word = superlative(adj) if len(word_tokenize(superlative_word)) > 1: superlative_word = None adverb = get_word_forms(adj)["r"] if len(adverb) == 0: adverb = None adj_statement = "adj_itr({}, {}).".format(adj, adj) yield adj_statement # - adj_statement_list.append(adj_statement) if comparative_word is not None: adj_comp_statement = "adj_itr_comp({}, {}).".format( comparative_word, adj) self._inverse_map[comparative_word] = adj yield adj_comp_statement # - adj_comp_statement_list.append(adj_comp_statement) if superlative_word is not None: adj_sup_statement = "adj_itr_sup({}, {}).".format( superlative_word, adj) self._inverse_map[superlative_word] = adj yield adj_sup_statement # - adj_sup_statement_list.append(adj_sup_statement) if adverb is not None: for adv in adverb: adv_statement = "adv({}, {}).".format(adv, adv) self._inverse_map[adv] = adj yield adv_statement
def annotate_sentence(sentence, index): """ Returns a list of recommendations based on an input sentence """ improvements: List[Recommendation] = [] for word in sentence: if word.text.lower() == "more": head = word.head if head.pos_ in POS_WITH_COMP_SUPER: new_comparative: str = comparative(head.text.lower()) if "more" not in new_comparative: improvements.append( Recommendation( RecommendationType.COMPARATIVE, sentence.text, sentence.start, sentence.end, index, # paragraph index [ sentence.text.replace("more " + head.text, new_comparative) ], RecommendationType.COMPARATIVE + head.text, 1 # Confidence )) if word.text.lower() == "most": head = word.head if head.pos_ in POS_WITH_COMP_SUPER: new_superlative: str = superlative(head.text.lower()) if "most" not in new_superlative: improvements.append( Recommendation( RecommendationType.SUPERLATIVE, sentence.text, sentence.start, sentence.end, index, # paragraph index [ sentence.text.replace("most " + head.text, new_superlative) ], RecommendationType.SUPERLATIVE + head.text, 1 # Confidence )) return improvements
def find_roots(word): all_roots = [word] if word in abbreviations + special_words: return all_roots if word in replace_dict.keys(): word = replace_dict[word] words = word if isinstance(word, list) else [word] roots_lst = [] for word in words: word_url = base_url + "word/" + word content = get_content(word_url) if content is None or "Error 404 (Not Found)" in content: # if we haven't found the word url = base_url + "word/" + lemma(word) content = get_content(url) if content is None or "Error 404 (Not Found)" in content: url = find_top_search_result(word) if url is None: if (word[(-3):] == "est" and superlative(word[:(-3)]) == word): # if it is in superlative form url = find_top_search_result(word[:(-3)]) elif (word[(-2):] == "er" and comparative(word[:(-2)]) == word): url = find_top_search_result(word[:(-2)]) else: print("Warning: Didn't get top search result for", word) content = get_content(url) if content is None or "Error 404 (Not Found)" in content: continue # Truncate content related_entries_index = content.find("Related Entries") content_truncate = content[:related_entries_index] matches = find_all_occurances( match_reg_exp, content_truncate) # here content is a byte string roots = extract_roots(matches) roots.append(word) # the word itself is obviously a related word roots_lst.append(roots) all_roots += list(set(itertools.chain(*roots_lst))) return list(set(all_roots))
def match_morphology(self, word, syn): person = 1 if word.morph.is_third_person is not None: person = 3 if word.morph.tense is not None: syn = conjugate(syn, tense=word.morph.tense, person=person) if word.morph.is_plural is not None and word.pos == NOUN: syn = pluralize(syn) if word.morph.is_singular is not None and word.pos == NOUN: syn = singularize(syn) if word.morph.is_superlative and (word.pos == ADV or word.pos == ADJ): syn = superlative(syn) if word.morph.is_comparative and (word.pos == ADV or word.pos == ADJ): syn = comparative(syn) if word.shape[0] == 'X': syn = syn.capitalize() return syn
def transform_word(word, pos, word_original): words = word.split(' ') result = list() for i, word in enumerate(words): if i == 0: try: if pos == 'JJR' or pos == 'RBR': pos_again = nltk.pos_tag([word])[0][1] if pos_again == 'JJR' or pos_again == 'RBR': result.append(word) else: result.append(comparative(word)) elif pos == 'JJS' or pos == 'RBS': pos_again = nltk.pos_tag([word])[0][1] if pos_again == 'JJS' or pos_again == 'RBS': result.append(word) else: result.append(superlative(word)) elif pos == 'NNS' or pos == 'NNPS': pos_again = nltk.pos_tag([word])[0][1] if pos_again == 'NNS' or pos_again == 'NNPS': result.append(word) else: result.append(pluralize(word)) elif pos == 'VBD': result.append(conjugate(word, 'p')) elif pos == 'VBG': result.append(conjugate(word, 'part')) elif pos == 'VBN': result.append(conjugate(word, 'ppart')) elif pos == 'VBP': if (PRESENT, 1, SG) in tenses(word_original): result.append(conjugate(word, '1sg')) else: result.append(conjugate(word, '2sg')) elif pos == 'VBZ': result.append(conjugate(word, '3sg')) else: result.append(word) except KeyError: result.append(word) else: result.append(word) return ' '.join(result)
def pos_all(word): rlist =[] _rtense =('infinitive', 'present', 'past', 'future') _rperson =(1,2,3) _rnumber=('singular', 'plural') _rmood=('indicative','imperitive','conditional','subjuntive') _raspect=('imperfective','perfective','progressive') for rtense in _rtense: for rperson in _rperson: for rnumber in _rnumber: for rmood in _rmood: for raspect in _raspect: item = conjugate(word, tense = rtense, person = rperson,number = rnumber,mood = rmood,aspect = raspect,negated = False) if item not in rlist: rlist.append(item) print bcolors.Magenta + "All pos of "+word print_list(rlist,4) print "Singluar : " +singularize(word)+" Plural : " +pluralize(word) print "Comparative : " +comparative(word)+" Superlative : " +superlative(word)
def cooccur_targets(self): main_str = self.process_description(self.name) alt_strs = self.processed_alternatives ret = main_str.split() for alt_str in alt_strs: words = alt_str.split() ret.extend(words) # Augment with related words, drawn from WordNet for word in words: related = [ related_word for related_word, p in morphify(word) if p > 0.5 ] ret.extend(related) new_ret = set() # Add all the inflections!!! for word in ret: new_ret.add(word) # Plural+singular new_ret.add(pattern.pluralize(word)) new_ret.add(pattern.singularize(word)) # comparatives comparative = pattern.comparative(word) if "more" not in comparative: new_ret.add(comparative) superlative = pattern.superlative(word) if "most" not in superlative: new_ret.add(superlative) for id, tense in TENSES.items(): if id is None: continue new_ret.add(pattern.conjugate(word, tense)) return set(new_ret) - set([None])
def make_morph_set(lemma, pos): if pos == 'n': return set([lemma, en.pluralize(lemma)]) elif pos == 'v': m = set(en.lexeme(lemma)) m.add(lemma) return m elif pos == 'a': m = set([lemma]) c = en.comparative(lemma) if c and not c.startswith('more '): m.add(c) s = en.superlative(lemma) if s and not s.startswith('most '): m.add(s) return m else: return set([lemma])
# It is slightly less robust than the pluralize() function. for word in [ "parts-of-speech", "children", "dogs'", "wolves", "bears", "kitchen knives", "octopodes", "matrices", "matrixes" ]: print singularize(word) print singularize("our", pos=ADJECTIVE) print print # COMPARATIVE & SUPERLATIVE ADJECTIVES # ------------------------------------ # The comparative() and superlative() functions give the comparative/superlative form of an adjective. # Words with three or more syllables are simply preceded by "more" or "most". for word in ["gentle", "big", "pretty", "hurt", "important", "bad"]: print word, "=>", comparative(word), "=>", superlative(word) print print # VERB CONJUGATION # ---------------- # The lexeme() function returns a list of all possible verb inflections. # The lemma() function returns the base form (infinitive) of a verb. print "lexeme:", lexeme("be") print "lemma:", lemma("was") print # The conjugate() function inflects a verb to another tense. # You can supply: # - tense : INFINITIVE, PRESENT, PAST, # - person: 1, 2, 3 or None,
try: uid = line.split('svg')[1][1:-1] if "-" in uid or hasNumbers(emoji): continue # print(emoji,uid) emojis[emoji] = {"category":"oneemoji","char":uid} searchWord = emoji if "_" in searchWord: searchWord = searchWord.split("_")[0] possibleWords = [] + dictionary.synonym(singularize(searchWord)) + dictionary.synonym(pluralize(searchWord)) goodWords = set() for word in possibleWords: goodWords.add(singularize(word)) goodWords.add(pluralize(word)) goodWords.add(comparative(word)) goodWords.add(superlative(word)) goodWords = goodWords | set(lexeme(word)) actualGoodWords = [] for word in goodWords: if " " not in word: actualGoodWords.append(word) emojis[emoji]["keywords"] = actualGoodWords except: pass pbar.update(1) i = i + 1 if i > 10000000: break pbar.close() with open("emojis3.json","w") as f:
def applyPosTag(atom, pos): # print('untokeninzing', atom, pos) ''' https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html https://web.archive.org/web/20140519174100/https://www.clips.uantwerpen.be/pages/pattern-en ''' # if pos == 'CC': # if pos == 'CD': # if pos == 'DT': # if pos == 'EX': # if pos == 'FW': # if pos == 'IN': # if pos == 'JJ': if pos == 'JJR': return comparative(atom) if pos == 'JJS': return superlative(atom) # if pos == 'LS': # if pos == 'MD': # if pos == 'NN': if pos == 'NNS': return pluralize(atom) # if pos == 'NNP': # if pos == 'NNP': # if pos == 'PDT': # if pos == 'POS': # if pos == 'PRP': # if pos == 'PRP': if pos == 'RB': return getAdverb(atom) if pos == 'RBR': return comparative(atom) if pos == 'RBS': return superlative(atom) # if pos == 'RP': # if pos == 'SYM': # if pos == 'TO': # if pos == 'UH': if pos == 'VB': return conjugate(atom, 'inf') if pos == 'VBD': if atom == "smell": return "smelled" if atom == "dream": return "dreamed" if atom == "grip": return "gripped" return conjugate(atom, 'p') if pos == 'VBG': if pos == "pip": return "piping" return conjugate(atom, 'part') if pos == 'VBN': return conjugate(atom, 'ppart') if pos == 'VBP': return conjugate(atom, '1sg') if pos == 'VBZ': return conjugate(atom, '3sg') # if pos == 'WDT': # if pos == 'WP': # if pos == 'WP$': # if pos == 'WRB': raise Exception("failed to untokenize: " + atom + ", " + pos) return prevAtoms + pos
def get_theme_words(self, theme, k=1, verbose=True, max_val=20, theme_file="saved_objects/theme_words.p", extras_file='saved_objects/extra_adjs.p'): try: with open(theme_file, "rb") as pickle_in: print("loading from file") theme_word_dict = pickle.load(pickle_in) with open(extras_file, "rb") as p_in: extras = pickle.load(p_in) except: print("either file not found") with open(theme_file, "wb") as pickle_in: theme_word_dict = {} pickle.dump(theme_word_dict, pickle_in) with open(extras_file, "wb") as p_in: extras = {} pickle.dump(extras, p_in) if theme not in theme_word_dict: print(theme, "not in file. Generating...") syn = wn.synsets(theme) theme_syns = [ l.name() for s in syn for l in s.lemmas() if l.name() in self.dict_meters ] cases = [] for poem in self.poems: #find poems which have theme syns if any(word in poem for word in theme_syns): for line in poem.split( "\n"): #find lines which have theme syns if any(word in line for word in theme_syns): cases.append(line) print("theme_syns", theme_syns) print(cases) theme_words = {} for case in cases: words = case.split() for i in range(len(words)): if words[i] in theme_syns: good_pos = [ 'JJ', 'JJS', 'RB', 'VB', 'VBP', 'VBD', 'VBZ', 'VBG', 'NN', 'NNS' ] punct = [".", ",", "?", "-", "!"] new_words = [words[i]] left = i - 1 while left >= max(0, i - k): if words[left] in punct: left = max(0, left - 1) if words[left] in self.words_to_pos and words[ left] in self.dict_meters and words[ left] not in self.top_common_words and any( pos in good_pos for pos in self.get_word_pos( words[left])): new_words.append(words[left]) left -= 1 right = i + 1 while right <= min(len(words) - 1, i + k): if words[right] in punct: right = min(len(words) - 1, right + 1) if words[right] in self.words_to_pos and words[ right] in self.dict_meters and words[ right] not in self.top_common_words and any( pos in good_pos for pos in self.get_word_pos( words[right])): new_words.append(words[right]) right += 1 for w in new_words: if not self.get_word_pos( w) or w not in self.dict_meters: continue if w not in theme_words: theme_words[w] = 0 theme_words[w] = min(theme_words[w] + 1, max_val) if "JJ" in self.get_word_pos(w): new_words.append(comparative(w)) #self.words_to_pos[comparative(w)] = ["JJR"] #self.pos_to_words["JJR"].append(comparative(w)) extras[comparative(w)] = ["JJR"] new_words.append(superlative(w)) #self.words_to_pos[superlative(w)] = ["JJS"] #self.pos_to_words["JJS"].append(superlative(w)) extras[superlative(w)] = ["JJS"] #print("adding ", new_words[-2:]) elif "NN" in self.get_word_pos(w): #if "valley" in w: print(w, pluralize(w), w[-1] == "s", self.get_word_pos(w)) if pluralize(w) != w and w[-1] != 's': new_words.append(pluralize(w)) extras[pluralize(w)] = ["NNS"] #print("adding ", new_words[-1]) else: st = self.stemmer.stem(w) if st not in new_words: new_words.append(st) #print("adding ", new_words[-1]) #keep only the ones that come up as synonyms for at least two? theme_words["purple"] = 0 # comes up weirdly often theme_word_dict[theme] = theme_words for w in theme_word_dict[theme]: theme_word_dict[theme][w] *= abs( helper.get_spacy_similarity(theme, w)) #/max_val with open(extras_file, 'wb') as f: pickle.dump(extras, f) with open(theme_file, "wb") as pickle_in: pickle.dump(theme_word_dict, pickle_in) for extra in extras: self.words_to_pos[extra] = extras[extra] self.pos_to_words[extras[extra][0]].append(extra) return theme_word_dict[theme]
def test_superlative(self): # Assert "nice" => "nicest" self.assertEqual(en.superlative("nice"), "nicest") # Assert "important" => "most important" self.assertEqual(en.superlative("important"), "most important") print "pattern.en.superlative()"
relations=True, lemmata=True).split()) # ### Pluralizing and Singularizing the Tokens from pattern.en import pluralize, singularize print(pluralize('leaf')) print(singularize('theives')) # ### Converting Adjective to Comparative and Superlative Degrees from pattern.en import comparative, superlative print(comparative('good')) print(superlative('good')) # ### Finding N-Grams from pattern.en import ngrams print(ngrams("He goes to hospital", n=2)) # ### Finding Sentiments from pattern.en import sentiment print(sentiment("This is an excellent movie to watch. I really love it")) # Explanation: #
# SINGULARIZATION # --------------- # The singularize() function returns the singular form of a plural noun (or adjective). # It is slightly less robust than the pluralize() function. for word in ["parts-of-speech", "children", "dogs'", "wolves", "bears", "kitchen knives", "octopodes", "matrices", "matrixes"]: print(singularize(word)) print(singularize("our", pos=ADJECTIVE)) print("") # COMPARATIVE & SUPERLATIVE ADJECTIVES # ------------------------------------ # The comparative() and superlative() functions give the comparative/superlative form of an adjective. # Words with three or more syllables are simply preceded by "more" or "most". for word in ["gentle", "big", "pretty", "hurt", "important", "bad"]: print("%s => %s => %s" % (word, comparative(word), superlative(word))) print("") # VERB CONJUGATION # ---------------- # The lexeme() function returns a list of all possible verb inflections. # The lemma() function returns the base form (infinitive) of a verb. print("lexeme: %s" % lexeme("be")) print("lemma: %s" % lemma("was")) print("") # The conjugate() function inflects a verb to another tense. # You can supply: # - tense : INFINITIVE, PRESENT, PAST, # - person: 1, 2, 3 or None, # - number: SINGULAR, PLURAL,
from pattern.en import referenced, pluralize, singularize, comparative, superlative, conjugate, number, numerals, lemma, lexeme, tenses,\ PAST, PL, verbs, conjugate, PARTICIPLE, quantify, suggest, ngrams, parse, tag, tokenize, pprint, parsetree, tree, Text, WORD, POS, CHUNK, PNP, REL, LEMMA, sentiment, \ Sentence, Word, Chunk, PNPChunk, modality, wordnet, ADJECTIVE #indefinite article print referenced('university') print referenced('hour') # pluralization and singularization print pluralize('child') print singularize('wolves') # comparative and superlative print comparative('bad') print superlative('bad') # verb conjugation print lexeme('purr') print lemma('purring') print conjugate('purred', '3sg') # he / she / it print 'p' in tenses('purred') # By alias. print PAST in tenses('purred') print(PAST, 1, PL) in tenses('purred') # rule-based conjugation print 'google' in verbs.infinitives print 'googled' in verbs.inflections print conjugate('googled', tense=PARTICIPLE, parse=False) print conjugate('googled', tense=PARTICIPLE, parse=True) # quantification print number("seventy-five point two") # "seventy-five point two" => 75.2 print numerals(2.245, round=2) # 2.245 => "two point twenty-five" print quantify(['goose', 'goose', 'duck', 'chicken', 'chicken', 'chicken']) print quantify({'carrot': 100, 'parrot': 20}) print quantify('carrot', amount=1000)
def calc_main(): st.title("Nimbus Words") st.sidebar.header("Input Options") expander_bar = st.beta_expander("How To Use This App") expander_bar.markdown(""" **Use the Dropdown Box located within the sidebar on the left to choose 1 of the 6 AI text editing features offered by Nimbus Words.** 1) **Summarizer:** Paste in text that will be summarized by our AI model. The first text box will do an automated summary of our program's recommended word count, and the second box beneath that will provide a summary of the exact word count you choose using the slider located within the sidebar. 2) **Tokenizer:** Paste in text that will be analyzed by our AI model. The **Tokenizer** button will provide a breakdown on each word within the phrase, for example 'Google' is an organization, or 'Jeff Bezos' is a proper noun. The **NER** button will display all named entities, for example 'Steve Jobs' is a person. The **Text Relationship** button will display a visual graph of the dependency each word has within a sentence or phrase. 3) **Synonyms:** Paste in text that will be analyzed by our AI model. The **Synonyms** button will provide you with synonyms to the inputted attribute. The **Definition** checkbox will provide definitions for the attribute. The **Example** checkbox will provide examples of the given attribute in a sentence. 4) **Translator:** Paste in text that will be translated by our AI model. The **Translate** button will translate the inputted text into one of the many languages that we have provided, and we will automatically detect which language the inputted text is written in. 5) **Search:** Paste in text that will be preprcoessed by our AI model. The **Search** button will do a filtered search for your input. 6) **Spell Correction:** Paste in text that will be spell-checked by our AI model. The **Correct** button will offer a correct spelling for any grammatical error that are detected. The **Pluralize**, **Singularize**, **Comparative** and **Superlative** checkboxes do exactly as they say, and ouput those options for the input you provided. """) activites = [ "Summary", "Tokenizer", "Synonyms", "Translator", "Search", "Spell Correction" ] choice = st.sidebar.selectbox("Select Activity", activites) if choice == "Summary": st.title('AI Text Summarizer') text = st.text_area("Input Text For Summary", height=300) if st.button("Summarize"): st.success(summary(text)) text_range = st.sidebar.slider("Summarize words Range", 25, 500) text = st.text_area("Input Text For Summary", height=250) if st.button("Summarize with Custom Word Count"): st.warning(summarize(text, word_count=text_range)) # Tokenizer elif choice == "Tokenizer": st.title('Text Tokenizer') row_data = st.text_area("write Text For Tokenizer") docx = nlp(row_data) if st.button("Tokenizer"): spacy_streamlit.visualize_tokens( docx, attrs=['text', 'pos_', 'dep_', 'ent_type_']) if st.button("NER"): spacy_streamlit.visualize_ner(docx, labels=nlp.get_pipe('ner').labels) if st.button("Text Relationship"): spacy_streamlit.visualize_parser(docx) # synonyms elif choice == "Synonyms": st.title('Synonym Generator') text = st.text_area("Enter Text") if st.button("Synonyms"): for syn in wordnet.synsets(text): for i in syn.lemmas(): st.success(i.name()) if st.checkbox("Definition"): for syn in wordnet.synsets(text): st.warning(syn.definition()) if st.checkbox("Example"): for syn in wordnet.synsets(text): st.success(syn.examples()) # Translator elif choice == "Translator": st.title('Speech Tranlation') row_text = st.text_area("Enter Your Text For Translation", height=300) translation_text = TextBlob(row_text) list1 = ["en", "ta", "pa", "gu", "hi", "ur", "kn", "bn", "te"] a = st.selectbox("select", list1) if st.button("search"): #input1 = TextBlob("Simple is better than complex") st.success(translation_text.translate(to=a)) #Search Bar elif choice == "Search": st.title('Web Search') row_text = st.text_input("Search Anything") google = Google(license=None) if st.button("search"): for search_result in google.search(row_text): st.write(search_result.text) st.warning(search_result.url) elif choice == "Spell Correction": st.title('AI Spell Correction') text_data = st.text_area("Enter Text Here") a = TextBlob(text_data) if st.button("Correct"): st.success(a.correct()) st.title('Pluralize & Singularize') text_data1 = st.text_input("Enter a word For pluralize / singularize") if st.checkbox("Pluralize"): st.warning(pluralize(text_data1)) if st.checkbox("Singularize"): st.warning(singularize(text_data1)) st.title('Compartitive & Superlative') text2 = st.text_input("Enter Text For comparative & superlative") if st.checkbox("Comparative"): st.success(comparative(text2)) if st.checkbox("Superlative"): st.success(superlative(text2))
def calc_main(): st.write("Nimbus Words") st.sidebar.header("Input Options") activites = ["Summary", "Tokenizer","Synonyms","Translator","Search","Spell Correction"] choice = st.sidebar.selectbox("Select Activity",activites) if choice == "Summary": st.title('AI Text Summarizer') text = st.text_area("Input Text For Summary",height=300) if st.button("summarize"): st.success(summary(text)) text_range= st.sidebar.slider("Summarize words Range",25,500) text = st.text_area("Input Text For Summary",height=250) if st.button("custom summarization"): st.warning(summarize(text,word_count=text_range)) # Tokenizer elif choice == "Tokenizer": st.title('Text Tokenizer') row_data = st.text_area("write Text For Tokenizer") docx= nlp(row_data) if st.button("Tokenizer"): spacy_streamlit.visualize_tokens(docx,attrs=['text','pos_','dep_','ent_type_']) if st.button("NER"): spacy_streamlit.visualize_ner(docx,labels=nlp.get_pipe('ner').labels) if st.button("Text Relationship"): spacy_streamlit.visualize_parser(docx) # synonyms elif choice == "Synonyms": st.title('Synonym Generator') text = st.text_area("Enter Text") if st.button("Find"): for syn in wordnet.synsets(text): for i in syn.lemmas(): st.success(i.name()) if st.checkbox("Defination"): for syn in wordnet.synsets(text): st.warning(syn.definition()) if st.checkbox("Example"): for syn in wordnet.synsets(text): st.success(syn.examples()) # Translator elif choice == "Translator": st.title('Speech Tranlation') row_text = st.text_area("Enter Your Text For Translation",height=300) translation_text = TextBlob(row_text) list1 = ["en","ta","pa","gu","hi","ur","kn","bn","te"] a= st.selectbox("select",list1) if st.button("search"): #input1 = TextBlob("Simple is better than complex") st.success(translation_text.translate(to=a)) #Search Bar elif choice == "Search": st.title('Web Search') row_text= st.text_input("Search Anything") google = Google(license=None) if st.button("search"): for search_result in google.search(row_text): st.write(search_result.text) st.warning(search_result.url) elif choice == "Spell Correction": st.title('AI Spell Correction') text_data = st.text_area("Enter Text Here") a = TextBlob(text_data) if st.button("Correct"): st.success(a.correct()) st.title('Pluralize & Singularize') text_data1 = st.text_input("Enter a word For pluralize / singularize") if st.checkbox("pluralize"): st.warning(pluralize(text_data1)) if st.checkbox("singularize"): st.warning(singularize(text_data1)) st.title('Compartitive & Superlative') text2 = st.text_input("Enter Text For comparative & superlative") if st.checkbox("comparative"): st.success(comparative(text2)) if st.checkbox("superlative"): st.success(superlative(text2))
def start(position=None, *args): global output, output2, total_length, words if input_box.get('1.0', 'end-1c') == '' or input_box.get('1.0', 'end-1c') == ' ': result_box2.insert(END, 'NOTHING TO PROCESS IN INPUTBOX !!!') result_box1.insert(END, 'NOTHING TO PROCESS IN INPUTBOX !!!') words = word_tokenize(input_box.get('1.0', 'end-1c')) total_length = len(words) taggs = pos_tag(words) output = '' output2 = '' real_words = [] for i in range(len(words)): a = taggs[i][1] print('word: ', words[i], 'tag: ', a) r = [] # not to fin synonym for the name of a person,a decorator etc. if (taggs[i][1] != 'DP' and taggs[i][1] != 'CD' and taggs[i][1] != 'TO' and taggs[i][1] != 'PRP$' and taggs[i][1] != 'IN' and taggs[i][1] != 'PRP' and taggs[i][1] != 'DT' and taggs[i][1] != 'WRB' and taggs[i][1] != 'WR') and (words[i] not in [ '.', ',', '(', ')', '', ' (', '( ', ' )', ') ', ' .', '. ', '!', 'doesn', 't', 'don', '\'', 'i', 'l', ' t', 't ', '\'t', "'", 'wasn', 'didn', 'couldn', 'wouldn', 'weren', 'I', 'L', '1', '|', ';', ':', 's', ' s', 'ain', 'll', '-', '__' ]) and words[i].lower() not in [ 'time', 'second', 'seconds', 'month', 'months', 'year', 'years', 'minute', 'minutes', 'indian', 'countries', 'let' ]: if words[i] != '.' and words[i] != ',' and words[ i] != "'" and words[i] != "\"" and words[ i] != "\"" and words[i] != '"' and words[ i] != ' "' and words[i] != '" ' and words[i] != '?': r = fun(words[i]) progressbar(i) if r != []: print('list of words: ', r) real_words = [] # to make the similar words more similar by changing their tense etc. for j in r: tag = pos_tag([j])[0][1] if tag == a or a == 'JJ': # if already similar real_words.append(j) elif tag != a: #check part-of speech tags and change accordingly if a == 'NNPS': token = nlp(j) w = tokens[0]._.inflect('NNPS', form_num=0) if w != None and pos_tag([w]) == a: real_words.append(tokens[0]._.inflect( 'NNPS', inflect_oov=True, form_num=0)) elif a == 'NNS': token = nlp(j) w = token[0]._.inflect('NNS', inflect_oov=True, form_num=0) if w != None and pos_tag([w]) == a: real_words.append(token[0]._.inflect( 'NNS', inflect_oov=True, form_num=0)) elif a == 'NNP': token = nlp(j) w = token[0]._.inflect('NNP', inflect_oov=True, form_num=0) if w != None and pos_tag([w]) == a: real_words.append(token[0]._.inflect( 'NNP', inflect_oov=True, from_num=0)) elif a == 'NN': token = nlp(j) w = token[0]._.inflect('NN', form_num=0) real_words.append(w) elif a == 'RB': token = nlp(j) w = token[0]._.inflect('RB', inflect_oov=True, form_num=0) real_words.append(token[0]._.inflect("RB", inflect_oov=True, form_num=0)) elif a == 'RBR': token = nlp(j) w = token[0]._.inflect('RBR', inflect_oov=True, form_num=0) if w != None and pos_tag([w]) == a: real_words.append(token[0]._.inflect( 'RBR', inflect_oov=True, form_num=0)) elif a == 'RBS': token = nlp(j) w = token[0]._.inflect('RBS', inflect_oov=True, form_num=0) if w != None and pos_tag([w]) == a: real_words.append(token[0]._.inflect( 'RBS', inflect_oov=True, form_num=0)) elif a == 'VB': tokens = nlp(j) w = tokens[0]._.inflect('VB', inflect_oov=True, form_num=0) if w != None: real_words.append(tokens[0]._.inflect( 'VB', inflect_oov=True, form_num=0)) elif a == 'VBD': tokens = nlp(j) w = tokens[0]._.inflect('VBD', form_num=1) if w != None: real_words.append(tokens[0]._.inflect( 'VBD', inflect_oov=True, form_num=0)) elif a == 'VBG': tokens = nlp(j) w = tokens[0]._.inflect('VBG', inflect_oov=True, form_num=0) if w != None and pos_tag([w]) == a: real_words.append(tokens[0]._.inflect( 'VBG', inflect_oov=True, form_num=0)) elif a == 'VBN ': tokens = nlp(j) w = tokens[0]._.inflect('VBN', inflect_oov=True, form_num=0) if w != None and pos_tag([w]) == a: real_words.append(tokens[0]._.inflect( 'VBN', inflect_oov=True, form_num=0)) elif a == 'VBP': tokens = nlp(j) w = tokens[0]._.inflect('VBP', inflect_oov=True, form_num=0) if w != None and pos_tag([w]) == a: real_words.append(tokens[0]._.inflect( 'VBP', inflect_oov=True, form_num=0)) elif a == 'VBZ': tokens = nlp(j) w = tokens[0]._.inflect('VBZ', inflect_oov=True, form_num=0) if w != None and pos_tag([w]) == a: real_words.append(tokens[0]._.inflect( 'VBZ', inflect_oov=True, form_num=0)) elif a == 'JJR': real_words.append(comparative(j)) elif a == 'JJS': real_words.append(superlative(j)) print('real words: ', real_words) if real_words == [] or r == []: #if no similar word is found output = output + ' ' + words[i] output2 = output2 + ' ' + words[i] else: output_words = [] max_sim = [] token1 = nlp(words[i]) for h in real_words: if h != None and h != '': token2 = nlp(h) f1 = h.replace(' ', '') f = f1.replace('_', ' ') sim = token1.similarity(token2) if h not in output_words and ( f not in output_words ) and words[i].lower() != h.lower() and words[i].lower( ) != f.lower(): #adding appropriate word output_words.append(f) max_sim.append(sim) else: sim = 0 final_listwords = [] for jj in max_sim: final_listwords.append(output_words[max_sim.index( max(max_sim))]) max_sim[max_sim.index(max(max_sim))] = -1 if output_words == []: output = output + ' ' + words[i] output2 = output2 + ' ' + words[i] elif position == None or type(position) != int: print('final_listwords: ', final_listwords) if len(output_words) > 3: output = output + ' ' + final_listwords[0] else: output = output + ' ' + final_listwords[random.randint( 0, len(final_listwords) - 1)] elif position != None and (type(position) == int or type(position) == str): if type( position ) == str: #choose a specific position of word from list of similar words if len(output_words) > int(position): output = output + ' ' + final_listwords[int(position)] elif type(position) == int: if len(output_words) > (position): output = output + ' ' + final_listwords[position] else: output = output + ' ' + final_listwords[len(output_words) - 1] if len(output_words) > 7: output2 = output2 + ' ' + str([words[i]] + final_listwords[0:8]) # if number of similar words is less than 4 than choose randomly elif len(output_words) <= 7: output2 = output2 + ' ' + str([words[i]] + final_listwords[0:]) result_box1.insert(END, output) #to display result result_box2.insert(END, output2)
from pattern.en import pluralize from pprint import pprint from pattern.en import superlative, comparative from pattern.en import sentiment from pattern.en import parse, Sentence, modality from pattern.en import suggest words = ["boy", "boys", "knives", "knife", "drove", "drive"] dic = set() print(dic) w1 = "boy" w2 = "boys" for word in words: if word == singularize(word): print(f"{word} is singular word") dic.add((word, pluralize(word))) else: print(f"{word} is plural word") dic.add((singularize(word), word)) pprint(list(dic)) print(superlative("good")) print(comparative("good")) ##print(suggest("whitle"))
# Boston, MA 02110-1301 USA, from pattern.en import referenced print(referenced('university')) print(referenced('hour')) from pattern.en import pluralize, singularize print(pluralize('child')) print(singularize('wolves')) from pattern.en import comparative, superlative print(comparative('bad')) print(superlative('bad')) from pattern.en import conjugate, lemma, lexeme print(lexeme('purr')) print(lemma('purring')) print(conjugate('purred', '3sg')) # he / she / it from pattern.en import conjugate, lemma, lexeme print(lexeme('purr')) print(lemma('purring')) print(conjugate('purred', '3sg')) # he / she / it from pattern.de import gender, MALE, FEMALE, NEUTRAL print(gender('Katze'))
# --------------- # The singularize() command returns the singular form of a plural noun (or adjective). # It is slightly less robust than the pluralize() command. for word in ["parts-of-speech", "children", "dogs'", "wolves", "bears", "kitchen knives", "octopodes", "matrices", "matrixes"]: print singularize(word) print singularize("our", pos=ADJECTIVE) print print # COMPARATIVE & SUPERLATIVE ADJECTIVES # ------------------------------------ # The comparative() and superlative() commands give the comparative/superlative form of an adjective. # Words with three or more syllables are simply preceded by "more" or "most". for word in ["gentle", "big", "pretty", "hurt", "important", "bad"]: print word, "=>", comparative(word), "=>", superlative(word) print print # VERB CONJUGATION # ---------------- # The lexeme() command returns a list of all possible verb inflections. # The lemma() command returns the base form (infinitive) of a verb. print "lexeme:", lexeme("be") print "lemma:", lemma("was") # The conjugate() command inflects a verb to another tense. # The tense can be given as a constant, e.g. # INFINITIVE, PRESENT_1ST_PERSON_SINGULAR PRESENT_PLURAL, PAST_PARTICIPLE, ... # or as an abbreviated alias: inf, 1sg, 2sg, 3sg, pl, part, 1sgp, 2sgp, 3sgp, ppl, ppart. print conjugate("being", tense="1sg", negated=False)