Ejemplo n.º 1
1
def assembleEntry(y):
    glosses = []
    examples = []
    etymologies = []
    quotations = []
    pronunciations = []
    pronunciation_entries = set();
    partsOfSpeech = []
    partsOfSpeechHeads = []
    etymology_entries = set();
    synonyms = []
    word_forms = []

    # Preprocessing
    for entry in y.get('entries', []):
        # Parts of speech
        psos = entry.get('partsOfSpeech') or []
        try:
            psos = map(lambda x: x.replace('proper_noun', 'proper noun'), psos)
        except:
            print(repr(psos))
            print(y['title'])
            raise
        if psos:
            partsOfSpeech.append(u"<B>" + u" ,".join(psos) + u"</B>")
            partsOfSpeechHeads.append(psos[0])
        else:
            partsOfSpeech.append("")
            partsOfSpeechHeads.append("")

        # Word forms
        elems = []
        for wf in entry.get('wordForms') or []:
            form = wf.get('form')
            if form:
                elems.append(form)
        word_forms.append(elems)

        # Synonyms
        synonyms.append(clean_synonyms(entry.get('synonyms', [])))

        # Pronunciations
        elems = []
        elem = ""
        # print(entry.get('pronunciations', []))
        for pronunciation in entry.get('pronunciations', []):
            text = pronunciation.get('text')
            if text:
                if text not in pronunciation_entries:
                    pronunciation_entries.add(text)
                    elem += text
                    note = pronunciation.get('note')
                    if note:
                        elem += " (" + note + ")"
                    elems.append(elem)
                    elem = ""
        pronunciations.append(", ".join(elems))
        # print(repr(pronunciations[-1]))

        # Senses
        gloss_entry = []
        example_entry = []
        quote_entry = []
        for sense in entry.get('senses') or []:
            gloss_entry.append(stripHtml(sense.get('gloss', '')))
            example_entry.append([ replace_newlines(stripHtml(example.get('example', ''))) for example in sense.get('examples', [])])
            quote_entry.append([ replace_newlines(stripHtml(quote.get('quote', ''))) for quote in sense.get('quotations', [])])
        glosses.append(gloss_entry)
        examples.append(example_entry)
        quotations.append(quote_entry)

        etymology_text = stripHtml(entry.get('etymology', ''))
        if etymology_text not in etymology_entries:
            etymology_entries.add(etymology_text)
            etymologies.append(etymology_text)
        else:
            etymologies.append('')

    # Assemble string

    # Title
    s = u""
    # s += y['title'] + "\t"

    # Pronunciations
    entry_pronuncs = False
    # pronunciations_filtered = [text for entry in pronunciations for text in entry]
    pronunciations_filtered = list(filter(None, pronunciations))
    if len(pronunciations_filtered) == 1:
        s += u" " + pronunciations_filtered[0] + "<BR>"
    else:
        entry_pronuncs = True

    # Entries & glosses
    single_entry = len(glosses) == 1
    for (entry_num, entry_glosses) in enumerate(glosses, 1):
        if entry_num >= 2:
            s += "<BR>"
        if not single_entry:
            s +=u"{0}. ".format(roman.int_to_roman(entry_num))
        if entry_pronuncs:
            s += prep_string(pronunciations[entry_num - 1])
        s += partsOfSpeech[entry_num - 1]

        # Handle word forms
        pos = partsOfSpeechHeads[entry_num - 1]
        word = y['title']
        if pos == "verb":
            p = en.conjugate(word, 'p')
            pp = en.conjugate(word, 'ppart')
            if p != word + 'ed' or pp != word + 'ed':
                s += u" (p. " + p + u", pp. " + pp + u")"
        elif pos == "noun":
            pl = en.pluralize(word)
            if pl != word + u's':
                s += u" (pl. " + pl + ")"
        elif pos == "adjective":
            pass

        # Glosses
        single_gloss = len(entry_glosses) == 1
        for (gloss_num, gloss) in enumerate(entry_glosses, 1):
            if not single_gloss:
                s += u" {0:d}.".format(gloss_num)
            # else:
            #     s += u":"
            s += u" {0}".format(gloss)
        s += prep_string(", ".join(synonyms[entry_num - 1]) + u"." if synonyms[entry_num - 1] else "", " Synonyms: ")
        # s += prep_string(etymologies[entry_num - 1], u" Etymology: ")

    # Etymologies
    etymologies_filtered = [etym for etym in etymologies if etym]
    if etymologies_filtered:
        s += '<BR><BR><B>Etymology:</B>'
        if len(etymologies_filtered) == 1:
            s += etymologies_filtered[0]
        else:
            for i in range(0, len(glosses)):
                if etymologies[i]:
                    s += u" {0}. {1}".format(roman.int_to_roman(i + 1), etymologies[i])

    # Examples and Quotes
    examples_flat = [example for entry in examples for examples in entry for example in examples if example]
    if examples_flat:
        s += u"<BR><BR><B>Examples:</B>"
        for (num_example, example) in enumerate(examples_flat, 1):
            if len(examples_flat) == 1:
                s += " " + example
            else:
                s += u" {0:d}. {1}".format(num_example, example)

    quotes_flat = [quote for entry in quotations for quotes in entry for quote in quotes if quote]
    if quotes_flat:
        s += u"<BR><BR><B>Quotations:</B>"
        for (num_quote, quote) in enumerate(quotes_flat, 1):
            if len(quotes_flat) == 1:
                s += u" " + quote
            else:
                s += u" {0:d}. {1}".format(num_quote, quote)

    s = escape_characters(s)

    word_forms_flat = [form for entry in word_forms for form in entry if form]
    titles = [y['title']]
    titles.extend(word_forms_flat)
    if 'verb' in partsOfSpeechHeads:
        titles.extend(en.lexeme(y['title']))
    if 'noun' in partsOfSpeechHeads:
        titles.append(en.pluralize(y['title']))
    if 'adjective' in partsOfSpeechHeads:
        adj_forms = [en.comparative(y['title']), en.superlative(y['title'])]
        adj_forms = [form for form in adj_forms if len(form.split(' ')) == 1]
        titles.extend(adj_forms)
    titles = unique(titles)

    if s.strip() == "":
        s = "Empty article."
    s = u'|'.join(titles) + u"\n" + s.strip()

    # return escape_characters(contract_tabs(s))
    return s
Ejemplo n.º 2
0
def article_checker(l, index):
    if index + 2 < len(l):
        t = [l[index].lower(), l[index + 1].lower(), l[index + 2].lower()]
    else:
        return []
    suggestions = []
    # trigrams=ngrams(var,3)
    wnl = WordNetLemmatizer()
    # print(index,t)
    if (t[0] in Articles or t[1] in Articles):
        if t[0] in Articles and superlative(wnl.lemmatize(t[1])) == t[1]:
            l = "the " + t[1] + " " + t[2]
            suggestions = suggestions + [l]
        elif t[1] in Articles and superlative(wnl.lemmatize(t[2])) == t[2]:
            l = t[0] + " the " + t[2]
            suggestions = suggestions + [l]
        else:
            st = " ".join(t)
            s = query(st, 1)
            # if(len(s)==0):
            suggest = processArt(t)
            # print(suggest)
            # print(suggest)
            final = query(suggest, 6)

            suggestions = suggestions + list(
                dict(filter(lambda elem: elem[1] >= 0.2,
                            final.items())).keys())
    suggestions = list(filter(lambda elem: elem.split(" ") != t, suggestions))
    return suggestions
Ejemplo n.º 3
0
    def superlative(self, word):
        '''
        Given a base-form word (Adj), return back a superlative form

        Args:
            word (str): base-form adj

        Raises:
            ValueError: [description]
            ValueError: [description]

        Returns:
            str: superlative form
        '''
        if word in self._word2index:
            return superlative(word)
        else:
            try:
                base_form_word = lemma(word)
                if base_form_word in self._word2index:
                    return superlative(base_form_word)
                else:
                    raise ValueError(
                        "Found the base-form for '{}': '{}'. But even the base-form not in vocabulary"
                        .format(word, base_form_word))
            except:
                raise ValueError(
                    "Can not found base-form for '{}'".format(word))
Ejemplo n.º 4
0
def find_adjective_form(original_form, original_lemma, new_lemma):
    """
    Figure out whether adjective was in basic, comparative, or superlative form,
    then apply that to new_lemma
    """
    if original_form == comparative(original_lemma):
        return comparative(new_lemma)
    elif original_form == superlative(original_lemma):
        return superlative(new_lemma)
    else:
        return new_lemma
Ejemplo n.º 5
0
def pos_all(word):
    rlist = []
    _rtense = ('infinitive', 'present', 'past', 'future')
    _rperson = (1, 2, 3)
    _rnumber = ('singular', 'plural')
    _rmood = ('indicative', 'imperitive', 'conditional', 'subjuntive')
    _raspect = ('imperfective', 'perfective', 'progressive')
    for rtense in _rtense:
        for rperson in _rperson:
            for rnumber in _rnumber:
                for rmood in _rmood:
                    for raspect in _raspect:
                        item = conjugate(word,
                                         tense=rtense,
                                         person=rperson,
                                         number=rnumber,
                                         mood=rmood,
                                         aspect=raspect,
                                         negated=False)
                        if item not in rlist:
                            rlist.append(item)

    print bcolors.Magenta + "All pos of " + word
    print_list(rlist, 4)
    print "Singluar    : " + singularize(
        word) + "			Plural      : " + pluralize(word)
    print "Comparative : " + comparative(
        word) + " 			Superlative : " + superlative(word)
Ejemplo n.º 6
0
def acceptPatterns():
    original_content = request.form['drunk_text']
    text_content_array = original_content.split(' ')
    text_content = ''
    for s in text_content_array:
        text_content += superlative(s) + ' '
    s = parsetree(original_content, relations=True, lemmata=True)
    return repr(s)
Ejemplo n.º 7
0
def acceptPatterns():
    original_content = request.form['drunk_text']
    text_content_array = original_content.split(' ')
    text_content = ''
    for s in text_content_array:
        text_content += superlative(s) + ' '
    s = parsetree(original_content, relations=True, lemmata=True)
    return repr(s)
Ejemplo n.º 8
0
def check_pos(pos_tag, word):
    if pos_tag == 'NN':
        add(pluralize(word), word, False, "plural")
    elif pos_tag == 'VB':
        for lex in lexeme(word):
            add(lex, word, False, "conjugation")
    elif pos_tag == 'JJ':
        comp = comparative(word)
        add(comp, word, False, "comparative")
        sup = superlative(word)
        add(sup, word, False, "superlative")
Ejemplo n.º 9
0
def reconjugate(syn, tok):
    tag = tok.tag_
    if tag in tag_map:
        return conjugate(syn, tag_map[tag], parse=True)
    if tag == 'JJR' or tag == 'RBR':
        return comparative(syn)
    if tag == 'JJS' or tag == 'RBS':
        return superlative(syn)
    if tag.startswith('N') and tag.endswith('S'):
        return pluralize(syn)
    # do nothing
    return syn
Ejemplo n.º 10
0
def morphological_error(target, response):
	if response == pluralize(target): 
		#Checks for the plural form of the target.
		return True
	if response == comparative(target) :
		#Checks for the comparative form of the target. 
		return True
	if response == superlative(target): 
		#Checks for the superlative form of the target.
		return True
	if lemma(target) == lemma(response): 
		#Check to see if the target and response share a lemma.
		return True
	return False
Ejemplo n.º 11
0
    def adjectify(self, bottish, english, dictionary):

        comp = "foo"
        sup = "far"

        for definition in dictionary:
            # if english word is the comparative form of an existing word
            if english == comparative(definition["english"]):
                bottish = definition["bottish"] + comp
                return bottish
            # or is the superlative form of an existing word
            elif english == superlative(definition["english"]):
                bottish = definition["bottish"] + sup
                return bottish
            # or is the normal form of an existing comparative form
            elif comparative(english) == definition["english"]:
                bottish = definition["bottish"].rstrip(comp)
                return bottish
            # or is the normal form of an existing superlative form
            elif superlative(english) == definition["english"]:
                bottish = definition["bottish"].rstrip(sup)
                return bottish

        return bottish
Ejemplo n.º 12
0
def tryPOS(word, p, target):
    if target in p and target not in ['RB', 'DT', 'RP']:
        if target == 'PRP' or target == 'WP':
            d = WPD
            if target == 'PRP':
                d = PRPD
            for k in d:
                if d[k] == word:
                    return k
            return None
        return wn.morphy(word)

    #else
    if target == 'PRP$' and p == 'PRP':
        return PRPD.get(word)
    if target == 'WP$':
        return WPD.get(word)
    if p == 'NN':
        if target == 'NNP':
            return word
        else:
            return pluralize(word)
    if p == 'NNP':
        return pluralize(word)
    if 'VB' in p:
        t = ''
        if target == 'VBD':
            t = PAST
        if target == 'VBP':
            t = INFINITIVE
        if target == 'VBZ':
            t = PRESENT
        if target == 'VBN':
            t = PAST + PARTICIPLE
        if target == 'VBG':
            t = PARTICIPLE
        if t:
            return conjugate(word, tense=t)

    ret = ''
    if target == 'JJR' or target == 'RBR':
        ret = comparative(word)
    if target == 'JJS' or target == 'RBS':
        ret = superlative(word)
    if not ret or ' ' in ret:
        return None  #default
    else:
        return ret
Ejemplo n.º 13
0
 def inflected_forms(self, syn, desc):
     try:
         word, pos, _ = desc
         if pos == 'Verb':
             from pattern.en import lexeme
             return [w for w in reversed(lexeme(word)) if w != word]
         elif pos == 'Noun':
             from pattern.en import pluralize
             return [pluralize(word)]
         elif pos == 'Adjective':
             from pattern.en import comparative, superlative
             return [comparative(word), superlative(word)]
         else:
             return []
     except ImportError:
         raise MessageException('General', 'unavailable', 'WordData[_, "InflectedForms"]', 'pattern')
Ejemplo n.º 14
0
 def inflected_forms(self, syn, desc):
     try:
         word, pos, _ = desc
         if pos == 'Verb':
             from pattern.en import lexeme
             return [w for w in reversed(lexeme(word)) if w != word]
         elif pos == 'Noun':
             from pattern.en import pluralize
             return [pluralize(word)]
         elif pos == 'Adjective':
             from pattern.en import comparative, superlative
             return [comparative(word), superlative(word)]
         else:
             return []
     except ImportError:
         raise MessageException('General', 'unavailable', 'WordData[_, "InflectedForms"]', 'pattern')
Ejemplo n.º 15
0
    def generate_ace_adj_rules(self):
        # - adj_statement_list = []
        # - adj_comp_statement_list = []
        # - adj_sup_statement_list = []
        # - adv_statement_list = []
        for adj in self.adjectives():
            bare_word = adj
            comparative_word = comparative(adj)
            if len(word_tokenize(comparative_word)) > 1:
                comparative_word = None

            superlative_word = superlative(adj)
            if len(word_tokenize(superlative_word)) > 1:
                superlative_word = None

            adverb = get_word_forms(adj)["r"]
            if len(adverb) == 0:
                adverb = None

            adj_statement = "adj_itr({}, {}).".format(adj, adj)
            yield adj_statement
            # - adj_statement_list.append(adj_statement)

            if comparative_word is not None:
                adj_comp_statement = "adj_itr_comp({}, {}).".format(
                    comparative_word, adj)

                self._inverse_map[comparative_word] = adj
                yield adj_comp_statement

                # - adj_comp_statement_list.append(adj_comp_statement)

            if superlative_word is not None:
                adj_sup_statement = "adj_itr_sup({}, {}).".format(
                    superlative_word, adj)

                self._inverse_map[superlative_word] = adj
                yield adj_sup_statement

                # - adj_sup_statement_list.append(adj_sup_statement)

            if adverb is not None:
                for adv in adverb:
                    adv_statement = "adv({}, {}).".format(adv, adv)

                    self._inverse_map[adv] = adj
                    yield adv_statement
Ejemplo n.º 16
0
def annotate_sentence(sentence, index):
    """
    Returns a list of recommendations based on an input sentence
    """
    improvements: List[Recommendation] = []

    for word in sentence:
        if word.text.lower() == "more":
            head = word.head
            if head.pos_ in POS_WITH_COMP_SUPER:
                new_comparative: str = comparative(head.text.lower())
                if "more" not in new_comparative:
                    improvements.append(
                        Recommendation(
                            RecommendationType.COMPARATIVE,
                            sentence.text,
                            sentence.start,
                            sentence.end,
                            index,  # paragraph index
                            [
                                sentence.text.replace("more " + head.text,
                                                      new_comparative)
                            ],
                            RecommendationType.COMPARATIVE + head.text,
                            1  # Confidence
                        ))

        if word.text.lower() == "most":
            head = word.head
            if head.pos_ in POS_WITH_COMP_SUPER:
                new_superlative: str = superlative(head.text.lower())
                if "most" not in new_superlative:
                    improvements.append(
                        Recommendation(
                            RecommendationType.SUPERLATIVE,
                            sentence.text,
                            sentence.start,
                            sentence.end,
                            index,  # paragraph index
                            [
                                sentence.text.replace("most " + head.text,
                                                      new_superlative)
                            ],
                            RecommendationType.SUPERLATIVE + head.text,
                            1  # Confidence
                        ))
    return improvements
Ejemplo n.º 17
0
def find_roots(word):
    all_roots = [word]
    if word in abbreviations + special_words:
        return all_roots

    if word in replace_dict.keys():
        word = replace_dict[word]

    words = word if isinstance(word, list) else [word]

    roots_lst = []
    for word in words:
        word_url = base_url + "word/" + word
        content = get_content(word_url)
        if content is None or "Error 404 (Not Found)" in content:  # if we haven't found the word
            url = base_url + "word/" + lemma(word)
            content = get_content(url)
            if content is None or "Error 404 (Not Found)" in content:
                url = find_top_search_result(word)
                if url is None:
                    if (word[(-3):] == "est" and superlative(word[:(-3)])
                            == word):  # if it is in superlative form
                        url = find_top_search_result(word[:(-3)])
                    elif (word[(-2):] == "er"
                          and comparative(word[:(-2)]) == word):
                        url = find_top_search_result(word[:(-2)])
                    else:
                        print("Warning: Didn't get top search result for",
                              word)
                content = get_content(url)
                if content is None or "Error 404 (Not Found)" in content:
                    continue

        # Truncate content
        related_entries_index = content.find("Related Entries")
        content_truncate = content[:related_entries_index]

        matches = find_all_occurances(
            match_reg_exp, content_truncate)  # here content is a byte string

        roots = extract_roots(matches)
        roots.append(word)  # the word itself is obviously a related word
        roots_lst.append(roots)

    all_roots += list(set(itertools.chain(*roots_lst)))
    return list(set(all_roots))
Ejemplo n.º 18
0
 def match_morphology(self, word, syn):
     person = 1
     if word.morph.is_third_person is not None:
         person = 3
     if word.morph.tense is not None:
         syn = conjugate(syn, tense=word.morph.tense, person=person)
     if word.morph.is_plural is not None and word.pos == NOUN:
         syn = pluralize(syn)
     if word.morph.is_singular is not None and word.pos == NOUN:
         syn = singularize(syn)
     if word.morph.is_superlative and (word.pos == ADV or word.pos == ADJ):
         syn = superlative(syn)
     if word.morph.is_comparative and (word.pos == ADV or word.pos == ADJ):
         syn = comparative(syn)
     if word.shape[0] == 'X':
         syn = syn.capitalize()
     return syn
Ejemplo n.º 19
0
def transform_word(word, pos, word_original):
	words = word.split(' ')
	result = list()
	for i, word in enumerate(words):
		if i == 0:
			try:
				if pos == 'JJR' or pos == 'RBR':
					pos_again = nltk.pos_tag([word])[0][1]
					if pos_again == 'JJR' or pos_again == 'RBR':
						result.append(word)
					else:
						result.append(comparative(word))
				elif pos == 'JJS' or pos == 'RBS':
					pos_again = nltk.pos_tag([word])[0][1]
					if pos_again == 'JJS' or pos_again == 'RBS':
						result.append(word)
					else:
						result.append(superlative(word))
				elif pos == 'NNS' or pos == 'NNPS':
					pos_again = nltk.pos_tag([word])[0][1]
					if pos_again == 'NNS' or pos_again == 'NNPS':
						result.append(word)
					else:
						result.append(pluralize(word))
				elif pos == 'VBD':
					result.append(conjugate(word, 'p'))
				elif pos == 'VBG':
					result.append(conjugate(word, 'part'))
				elif pos == 'VBN':
					result.append(conjugate(word, 'ppart'))
				elif pos == 'VBP':
					if (PRESENT, 1, SG) in tenses(word_original):
						result.append(conjugate(word, '1sg'))
					else:
						result.append(conjugate(word, '2sg'))
				elif pos == 'VBZ':
					result.append(conjugate(word, '3sg'))
				else:
					result.append(word)
			except KeyError:
				result.append(word)
		else:
			result.append(word)
	return ' '.join(result)
Ejemplo n.º 20
0
def pos_all(word):
	rlist =[]
	_rtense =('infinitive', 'present', 'past', 'future')
	_rperson =(1,2,3)
	_rnumber=('singular', 'plural')
	_rmood=('indicative','imperitive','conditional','subjuntive')
	_raspect=('imperfective','perfective','progressive')
	for rtense in _rtense:
		for rperson in _rperson:
			for rnumber in _rnumber:
				for rmood in _rmood:
					for raspect in _raspect:
						item = conjugate(word, tense = rtense, person = rperson,number = rnumber,mood = rmood,aspect = raspect,negated = False)
						if item not in rlist:
							rlist.append(item)

	print bcolors.Magenta + "All pos of "+word
	print_list(rlist,4)
	print "Singluar    : " +singularize(word)+"			Plural      : " +pluralize(word)
	print "Comparative : " +comparative(word)+" 			Superlative : " +superlative(word)
Ejemplo n.º 21
0
    def cooccur_targets(self):
        main_str = self.process_description(self.name)
        alt_strs = self.processed_alternatives

        ret = main_str.split()
        for alt_str in alt_strs:
            words = alt_str.split()
            ret.extend(words)

            # Augment with related words, drawn from WordNet
            for word in words:
                related = [
                    related_word for related_word, p in morphify(word)
                    if p > 0.5
                ]
                ret.extend(related)

        new_ret = set()
        # Add all the inflections!!!
        for word in ret:
            new_ret.add(word)

            # Plural+singular
            new_ret.add(pattern.pluralize(word))
            new_ret.add(pattern.singularize(word))

            # comparatives
            comparative = pattern.comparative(word)
            if "more" not in comparative:
                new_ret.add(comparative)
            superlative = pattern.superlative(word)
            if "most" not in superlative:
                new_ret.add(superlative)

            for id, tense in TENSES.items():
                if id is None: continue
                new_ret.add(pattern.conjugate(word, tense))

        return set(new_ret) - set([None])
Ejemplo n.º 22
0
def make_morph_set(lemma, pos):
    if pos == 'n':
      return set([lemma, en.pluralize(lemma)])
    elif pos == 'v':
      m = set(en.lexeme(lemma))
      m.add(lemma)
      return m
    elif pos == 'a':
      m = set([lemma])

      c = en.comparative(lemma)

      if c and not c.startswith('more '):
          m.add(c)

      s = en.superlative(lemma)

      if s and not s.startswith('most '):
          m.add(s)

      return m
    else:
      return set([lemma])
Ejemplo n.º 23
0
# It is slightly less robust than the pluralize() function.
for word in [
        "parts-of-speech", "children", "dogs'", "wolves", "bears",
        "kitchen knives", "octopodes", "matrices", "matrixes"
]:
    print singularize(word)
print singularize("our", pos=ADJECTIVE)
print
print

# COMPARATIVE & SUPERLATIVE ADJECTIVES
# ------------------------------------
# The comparative() and superlative() functions give the comparative/superlative form of an adjective.
# Words with three or more syllables are simply preceded by "more" or "most".
for word in ["gentle", "big", "pretty", "hurt", "important", "bad"]:
    print word, "=>", comparative(word), "=>", superlative(word)
print
print

# VERB CONJUGATION
# ----------------
# The lexeme() function returns a list of all possible verb inflections.
# The lemma() function returns the base form (infinitive) of a verb.
print "lexeme:", lexeme("be")
print "lemma:", lemma("was")
print

# The conjugate() function inflects a verb to another tense.
# You can supply:
# - tense : INFINITIVE, PRESENT, PAST,
# - person: 1, 2, 3 or None,
Ejemplo n.º 24
0
			try:
				uid = line.split('svg')[1][1:-1]
				if "-" in uid or hasNumbers(emoji):
					continue
				# print(emoji,uid)
				emojis[emoji] = {"category":"oneemoji","char":uid}
				searchWord = emoji
				if "_" in searchWord:
					searchWord = searchWord.split("_")[0]
				possibleWords = [] + dictionary.synonym(singularize(searchWord)) + dictionary.synonym(pluralize(searchWord))
				goodWords = set()
				for word in possibleWords:
					goodWords.add(singularize(word))
					goodWords.add(pluralize(word))
					goodWords.add(comparative(word))
					goodWords.add(superlative(word))
					goodWords = goodWords | set(lexeme(word))
				actualGoodWords = []
				for word in goodWords:
					if " " not in word:
						actualGoodWords.append(word)
				emojis[emoji]["keywords"] = actualGoodWords
			except:
				pass
			pbar.update(1)
			i = i + 1
			if i > 10000000:
				break
pbar.close()

with open("emojis3.json","w") as f:
Ejemplo n.º 25
0
def applyPosTag(atom, pos):
    #     print('untokeninzing', atom, pos)
    '''
    https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
    https://web.archive.org/web/20140519174100/https://www.clips.uantwerpen.be/pages/pattern-en
   '''
    #     if pos == 'CC':
    #     if pos == 'CD':
    #     if pos == 'DT':
    #     if pos == 'EX':
    #     if pos == 'FW':
    #     if pos == 'IN':
    #     if pos == 'JJ':
    if pos == 'JJR':
        return comparative(atom)
    if pos == 'JJS':
        return superlative(atom)
#     if pos == 'LS':
#     if pos == 'MD':
#     if pos == 'NN':
    if pos == 'NNS':
        return pluralize(atom)
#     if pos == 'NNP':
#     if pos == 'NNP':
#     if pos == 'PDT':
#     if pos == 'POS':
#     if pos == 'PRP':
#     if pos == 'PRP':
    if pos == 'RB':
        return getAdverb(atom)
    if pos == 'RBR':
        return comparative(atom)
    if pos == 'RBS':
        return superlative(atom)
#     if pos == 'RP':
#     if pos == 'SYM':
#     if pos == 'TO':
#     if pos == 'UH':
    if pos == 'VB':
        return conjugate(atom, 'inf')
    if pos == 'VBD':
        if atom == "smell":
            return "smelled"
        if atom == "dream":
            return "dreamed"
        if atom == "grip":
            return "gripped"
        return conjugate(atom, 'p')
    if pos == 'VBG':
        if pos == "pip":
            return "piping"
        return conjugate(atom, 'part')
    if pos == 'VBN':
        return conjugate(atom, 'ppart')
    if pos == 'VBP':
        return conjugate(atom, '1sg')
    if pos == 'VBZ':
        return conjugate(atom, '3sg')


#     if pos == 'WDT':
#     if pos == 'WP':
#     if pos == 'WP$':
#     if pos == 'WRB':
    raise Exception("failed to untokenize: " + atom + ", " + pos)
    return prevAtoms + pos
Ejemplo n.º 26
0
    def get_theme_words(self,
                        theme,
                        k=1,
                        verbose=True,
                        max_val=20,
                        theme_file="saved_objects/theme_words.p",
                        extras_file='saved_objects/extra_adjs.p'):
        try:
            with open(theme_file, "rb") as pickle_in:
                print("loading from file")
                theme_word_dict = pickle.load(pickle_in)
            with open(extras_file, "rb") as p_in:
                extras = pickle.load(p_in)

        except:
            print("either file not found")
            with open(theme_file, "wb") as pickle_in:
                theme_word_dict = {}
                pickle.dump(theme_word_dict, pickle_in)
            with open(extras_file, "wb") as p_in:
                extras = {}
                pickle.dump(extras, p_in)

        if theme not in theme_word_dict:
            print(theme, "not in file. Generating...")

            syn = wn.synsets(theme)
            theme_syns = [
                l.name() for s in syn for l in s.lemmas()
                if l.name() in self.dict_meters
            ]
            cases = []
            for poem in self.poems:  #find poems which have theme syns
                if any(word in poem for word in theme_syns):
                    for line in poem.split(
                            "\n"):  #find lines which have theme syns
                        if any(word in line for word in theme_syns):
                            cases.append(line)
            print("theme_syns", theme_syns)
            print(cases)
            theme_words = {}
            for case in cases:
                words = case.split()
                for i in range(len(words)):
                    if words[i] in theme_syns:
                        good_pos = [
                            'JJ', 'JJS', 'RB', 'VB', 'VBP', 'VBD', 'VBZ',
                            'VBG', 'NN', 'NNS'
                        ]
                        punct = [".", ",", "?", "-", "!"]
                        new_words = [words[i]]
                        left = i - 1
                        while left >= max(0, i - k):
                            if words[left] in punct: left = max(0, left - 1)
                            if words[left] in self.words_to_pos and words[
                                    left] in self.dict_meters and words[
                                        left] not in self.top_common_words and any(
                                            pos in good_pos
                                            for pos in self.get_word_pos(
                                                words[left])):
                                new_words.append(words[left])
                            left -= 1
                        right = i + 1
                        while right <= min(len(words) - 1, i + k):
                            if words[right] in punct:
                                right = min(len(words) - 1, right + 1)
                            if words[right] in self.words_to_pos and words[
                                    right] in self.dict_meters and words[
                                        right] not in self.top_common_words and any(
                                            pos in good_pos
                                            for pos in self.get_word_pos(
                                                words[right])):
                                new_words.append(words[right])
                            right += 1
                        for w in new_words:
                            if not self.get_word_pos(
                                    w) or w not in self.dict_meters:
                                continue
                            if w not in theme_words: theme_words[w] = 0
                            theme_words[w] = min(theme_words[w] + 1, max_val)
                            if "JJ" in self.get_word_pos(w):
                                new_words.append(comparative(w))
                                #self.words_to_pos[comparative(w)] = ["JJR"]
                                #self.pos_to_words["JJR"].append(comparative(w))
                                extras[comparative(w)] = ["JJR"]

                                new_words.append(superlative(w))
                                #self.words_to_pos[superlative(w)] = ["JJS"]
                                #self.pos_to_words["JJS"].append(superlative(w))
                                extras[superlative(w)] = ["JJS"]

                                #print("adding ", new_words[-2:])
                            elif "NN" in self.get_word_pos(w):
                                #if "valley" in w: print(w, pluralize(w), w[-1] == "s", self.get_word_pos(w))
                                if pluralize(w) != w and w[-1] != 's':
                                    new_words.append(pluralize(w))
                                    extras[pluralize(w)] = ["NNS"]
                                    #print("adding ", new_words[-1])
                            else:
                                st = self.stemmer.stem(w)
                                if st not in new_words:
                                    new_words.append(st)
                                    #print("adding ", new_words[-1])

            #keep only the ones that come up as synonyms for at least two?
            theme_words["purple"] = 0  # comes up weirdly often
            theme_word_dict[theme] = theme_words
            for w in theme_word_dict[theme]:
                theme_word_dict[theme][w] *= abs(
                    helper.get_spacy_similarity(theme, w))  #/max_val
            with open(extras_file, 'wb') as f:
                pickle.dump(extras, f)
        with open(theme_file, "wb") as pickle_in:
            pickle.dump(theme_word_dict, pickle_in)

        for extra in extras:
            self.words_to_pos[extra] = extras[extra]
            self.pos_to_words[extras[extra][0]].append(extra)
        return theme_word_dict[theme]
Ejemplo n.º 27
0
 def test_superlative(self):
     # Assert "nice" => "nicest"
     self.assertEqual(en.superlative("nice"), "nicest")
     # Assert "important" => "most important"
     self.assertEqual(en.superlative("important"), "most important")
     print "pattern.en.superlative()"
Ejemplo n.º 28
0
          relations=True,
          lemmata=True).split())

# ### Pluralizing and Singularizing the Tokens

from pattern.en import pluralize, singularize

print(pluralize('leaf'))
print(singularize('theives'))

# ### Converting Adjective to Comparative and Superlative Degrees

from pattern.en import comparative, superlative

print(comparative('good'))
print(superlative('good'))

# ### Finding N-Grams

from pattern.en import ngrams

print(ngrams("He goes to hospital", n=2))

# ### Finding Sentiments

from pattern.en import sentiment

print(sentiment("This is an excellent movie to watch. I really love it"))

# Explanation:
#
Ejemplo n.º 29
0
# SINGULARIZATION
# ---------------
# The singularize() function returns the singular form of a plural noun (or adjective).
# It is slightly less robust than the pluralize() function.
for word in ["parts-of-speech", "children", "dogs'", "wolves", "bears", "kitchen knives",
             "octopodes", "matrices", "matrixes"]:
    print(singularize(word))
print(singularize("our", pos=ADJECTIVE))
print("")

# COMPARATIVE & SUPERLATIVE ADJECTIVES
# ------------------------------------
# The comparative() and superlative() functions give the comparative/superlative form of an adjective.
# Words with three or more syllables are simply preceded by "more" or "most".
for word in ["gentle", "big", "pretty", "hurt", "important", "bad"]:
    print("%s => %s => %s" % (word, comparative(word), superlative(word)))
print("")

# VERB CONJUGATION
# ----------------
# The lexeme() function returns a list of all possible verb inflections.
# The lemma() function returns the base form (infinitive) of a verb.
print("lexeme: %s" % lexeme("be"))
print("lemma: %s" % lemma("was"))
print("")

# The conjugate() function inflects a verb to another tense.
# You can supply:
# - tense : INFINITIVE, PRESENT, PAST,
# - person: 1, 2, 3 or None,
# - number: SINGULAR, PLURAL,
Ejemplo n.º 30
0
from pattern.en import referenced, pluralize, singularize, comparative, superlative, conjugate, number, numerals, lemma, lexeme, tenses,\
    PAST, PL, verbs, conjugate, PARTICIPLE, quantify, suggest, ngrams, parse, tag, tokenize, pprint, parsetree, tree, Text, WORD, POS, CHUNK, PNP, REL, LEMMA, sentiment, \
    Sentence, Word, Chunk, PNPChunk, modality, wordnet, ADJECTIVE

#indefinite article
print referenced('university')
print referenced('hour')
# pluralization and singularization
print pluralize('child')
print singularize('wolves')
# comparative and superlative
print comparative('bad')
print superlative('bad')
# verb conjugation
print lexeme('purr')
print lemma('purring')
print conjugate('purred', '3sg')  # he / she / it
print 'p' in tenses('purred')  # By alias.
print PAST in tenses('purred')
print(PAST, 1, PL) in tenses('purred')
# rule-based conjugation
print 'google' in verbs.infinitives
print 'googled' in verbs.inflections
print conjugate('googled', tense=PARTICIPLE, parse=False)
print conjugate('googled', tense=PARTICIPLE, parse=True)
# quantification
print number("seventy-five point two")  # "seventy-five point two" => 75.2
print numerals(2.245, round=2)  # 2.245 => "two point twenty-five"
print quantify(['goose', 'goose', 'duck', 'chicken', 'chicken', 'chicken'])
print quantify({'carrot': 100, 'parrot': 20})
print quantify('carrot', amount=1000)
Ejemplo n.º 31
0
def calc_main():
    st.title("Nimbus Words")
    st.sidebar.header("Input Options")
    expander_bar = st.beta_expander("How To Use This App")
    expander_bar.markdown("""

    **Use the Dropdown Box located within the sidebar on the left to choose 1 of the 6 AI text editing features offered by Nimbus Words.** 

    1) **Summarizer:** Paste in text that will be summarized by our AI model. The first text box will do an automated summary of our program's recommended word count, and the second box beneath that will provide a summary of the exact word count you choose using the slider located within the sidebar.  

    2) **Tokenizer:** Paste in text that will be analyzed by our AI model. The **Tokenizer** button will provide a breakdown on each word within the phrase, for example 'Google' is an organization, or 'Jeff Bezos' is a proper noun. The **NER** button will display all named entities, for example 'Steve Jobs' is a person. The **Text Relationship** button will display a visual graph of the dependency each word has within a sentence or phrase. 

    3) **Synonyms:** Paste in text that will be analyzed by our AI model. The **Synonyms** button will provide you with synonyms to the inputted attribute. The **Definition** checkbox will provide definitions for the attribute. The **Example** checkbox will provide examples of the given attribute in a sentence.

    4) **Translator:** Paste in text that will be translated by our AI model. The **Translate** button will translate the inputted text into one of the many languages that we have provided, and we will automatically detect which language the inputted text is written in.

    5) **Search:** Paste in text that will be preprcoessed by our AI model. The **Search** button will do a filtered search for your input.

    6) **Spell Correction:** Paste in text that will be spell-checked by our AI model. The **Correct** button will offer a correct spelling for any grammatical error that are detected. The **Pluralize**, **Singularize**, **Comparative** and **Superlative** checkboxes do exactly as they say, and ouput those options for the input you provided. 

    """)

    activites = [
        "Summary", "Tokenizer", "Synonyms", "Translator", "Search",
        "Spell Correction"
    ]
    choice = st.sidebar.selectbox("Select Activity", activites)
    if choice == "Summary":
        st.title('AI Text Summarizer')
        text = st.text_area("Input Text For Summary", height=300)
        if st.button("Summarize"):
            st.success(summary(text))
        text_range = st.sidebar.slider("Summarize words Range", 25, 500)
        text = st.text_area("Input Text For Summary", height=250)
        if st.button("Summarize with Custom Word Count"):
            st.warning(summarize(text, word_count=text_range))
    # Tokenizer
    elif choice == "Tokenizer":
        st.title('Text Tokenizer')
        row_data = st.text_area("write Text For Tokenizer")
        docx = nlp(row_data)
        if st.button("Tokenizer"):
            spacy_streamlit.visualize_tokens(
                docx, attrs=['text', 'pos_', 'dep_', 'ent_type_'])
        if st.button("NER"):
            spacy_streamlit.visualize_ner(docx,
                                          labels=nlp.get_pipe('ner').labels)
        if st.button("Text Relationship"):
            spacy_streamlit.visualize_parser(docx)
    # synonyms
    elif choice == "Synonyms":
        st.title('Synonym Generator')
        text = st.text_area("Enter Text")
        if st.button("Synonyms"):
            for syn in wordnet.synsets(text):
                for i in syn.lemmas():
                    st.success(i.name())
        if st.checkbox("Definition"):
            for syn in wordnet.synsets(text):
                st.warning(syn.definition())
        if st.checkbox("Example"):
            for syn in wordnet.synsets(text):
                st.success(syn.examples())
    # Translator
    elif choice == "Translator":
        st.title('Speech Tranlation')
        row_text = st.text_area("Enter Your Text For Translation", height=300)
        translation_text = TextBlob(row_text)
        list1 = ["en", "ta", "pa", "gu", "hi", "ur", "kn", "bn", "te"]
        a = st.selectbox("select", list1)
        if st.button("search"):
            #input1 = TextBlob("Simple is better than complex")
            st.success(translation_text.translate(to=a))
    #Search Bar
    elif choice == "Search":
        st.title('Web Search')
        row_text = st.text_input("Search Anything")
        google = Google(license=None)
        if st.button("search"):
            for search_result in google.search(row_text):
                st.write(search_result.text)
                st.warning(search_result.url)
    elif choice == "Spell Correction":
        st.title('AI Spell Correction')
        text_data = st.text_area("Enter Text Here")
        a = TextBlob(text_data)
        if st.button("Correct"):
            st.success(a.correct())
        st.title('Pluralize & Singularize')
        text_data1 = st.text_input("Enter a word For pluralize / singularize")
        if st.checkbox("Pluralize"):
            st.warning(pluralize(text_data1))
        if st.checkbox("Singularize"):
            st.warning(singularize(text_data1))

        st.title('Compartitive & Superlative')
        text2 = st.text_input("Enter Text For comparative & superlative")
        if st.checkbox("Comparative"):
            st.success(comparative(text2))
        if st.checkbox("Superlative"):
            st.success(superlative(text2))
Ejemplo n.º 32
0
def calc_main():
    st.write("Nimbus Words")   
    st.sidebar.header("Input Options") 

    activites = ["Summary", "Tokenizer","Synonyms","Translator","Search","Spell Correction"]
    choice = st.sidebar.selectbox("Select Activity",activites)
    if choice == "Summary":
        st.title('AI Text Summarizer')
        text = st.text_area("Input Text For Summary",height=300)
        if st.button("summarize"):
            st.success(summary(text))
        text_range= st.sidebar.slider("Summarize words Range",25,500)
        text = st.text_area("Input Text For Summary",height=250)
        if st.button("custom summarization"):
           st.warning(summarize(text,word_count=text_range))
    # Tokenizer
    elif choice == "Tokenizer":
        st.title('Text Tokenizer')
        row_data = st.text_area("write Text For Tokenizer")
        docx= nlp(row_data)
        if st.button("Tokenizer"):
            spacy_streamlit.visualize_tokens(docx,attrs=['text','pos_','dep_','ent_type_'])
        if st.button("NER"):
            spacy_streamlit.visualize_ner(docx,labels=nlp.get_pipe('ner').labels)
        if st.button("Text Relationship"):
            spacy_streamlit.visualize_parser(docx)
       # synonyms      
    elif choice == "Synonyms":
        st.title('Synonym Generator')
        text = st.text_area("Enter Text")
        if st.button("Find"):
            for syn in wordnet.synsets(text):
                for i in syn.lemmas():
                    st.success(i.name())
        if st.checkbox("Defination"):
            for syn in wordnet.synsets(text):
                st.warning(syn.definition()) 
        if st.checkbox("Example"):
            for syn in wordnet.synsets(text):
                st.success(syn.examples())
      # Translator          
    elif choice == "Translator":
        st.title('Speech Tranlation')
        row_text = st.text_area("Enter Your Text For Translation",height=300)
        translation_text = TextBlob(row_text)
        list1 = ["en","ta","pa","gu","hi","ur","kn","bn","te"]
        a= st.selectbox("select",list1)
        if st.button("search"):
            #input1 = TextBlob("Simple is better than complex")
            st.success(translation_text.translate(to=a))
    #Search Bar
    elif choice == "Search":
        st.title('Web Search')
        row_text= st.text_input("Search Anything")
        google = Google(license=None)
        if st.button("search"):
            for search_result in google.search(row_text):
                st.write(search_result.text)
                st.warning(search_result.url)
    elif choice == "Spell Correction":
        st.title('AI Spell Correction')
        text_data = st.text_area("Enter Text Here")
        a = TextBlob(text_data)
        if st.button("Correct"):
            st.success(a.correct())
        st.title('Pluralize & Singularize')
        text_data1 = st.text_input("Enter a word For pluralize / singularize")
        if st.checkbox("pluralize"):
            st.warning(pluralize(text_data1))
        if st.checkbox("singularize"):
            st.warning(singularize(text_data1))
        
        st.title('Compartitive & Superlative')
        text2 = st.text_input("Enter Text For comparative & superlative")
        if st.checkbox("comparative"):
            st.success(comparative(text2))
        if st.checkbox("superlative"):
            st.success(superlative(text2))
def start(position=None, *args):

    global output, output2, total_length, words

    if input_box.get('1.0', 'end-1c') == '' or input_box.get('1.0',
                                                             'end-1c') == ' ':
        result_box2.insert(END, 'NOTHING TO PROCESS IN INPUTBOX !!!')
        result_box1.insert(END, 'NOTHING TO PROCESS IN INPUTBOX !!!')

    words = word_tokenize(input_box.get('1.0', 'end-1c'))
    total_length = len(words)
    taggs = pos_tag(words)
    output = ''
    output2 = ''

    real_words = []
    for i in range(len(words)):
        a = taggs[i][1]
        print('word: ', words[i], 'tag: ', a)

        r = []
        # not to fin synonym for the name of a person,a decorator etc.
        if (taggs[i][1] != 'DP' and taggs[i][1] != 'CD' and taggs[i][1] != 'TO'
                and taggs[i][1] != 'PRP$' and taggs[i][1] != 'IN'
                and taggs[i][1] != 'PRP' and taggs[i][1] != 'DT'
                and taggs[i][1] != 'WRB'
                and taggs[i][1] != 'WR') and (words[i] not in [
                    '.', ',', '(', ')', '', ' (', '( ', ' )', ') ', ' .', '. ',
                    '!', 'doesn', 't', 'don', '\'', 'i', 'l', ' t', 't ',
                    '\'t', "'", 'wasn', 'didn', 'couldn', 'wouldn', 'weren',
                    'I', 'L', '1', '|', ';', ':', 's', ' s', 'ain', 'll', '-',
                    '__'
                ]) and words[i].lower() not in [
                    'time', 'second', 'seconds', 'month', 'months', 'year',
                    'years', 'minute', 'minutes', 'indian', 'countries', 'let'
                ]:
            if words[i] != '.' and words[i] != ',' and words[
                    i] != "'" and words[i] != "\"" and words[
                        i] != "\"" and words[i] != '"' and words[
                            i] != ' "' and words[i] != '" ' and words[i] != '?':

                r = fun(words[i])

        progressbar(i)

        if r != []:

            print('list of words: ', r)

            real_words = []
            # to make the similar words more similar by changing their tense etc.
            for j in r:

                tag = pos_tag([j])[0][1]

                if tag == a or a == 'JJ':
                    # if already similar
                    real_words.append(j)

                elif tag != a:  #check part-of speech tags and change accordingly
                    if a == 'NNPS':
                        token = nlp(j)

                        w = tokens[0]._.inflect('NNPS', form_num=0)
                        if w != None and pos_tag([w]) == a:
                            real_words.append(tokens[0]._.inflect(
                                'NNPS', inflect_oov=True, form_num=0))

                    elif a == 'NNS':
                        token = nlp(j)
                        w = token[0]._.inflect('NNS',
                                               inflect_oov=True,
                                               form_num=0)
                        if w != None and pos_tag([w]) == a:

                            real_words.append(token[0]._.inflect(
                                'NNS', inflect_oov=True, form_num=0))
                    elif a == 'NNP':
                        token = nlp(j)
                        w = token[0]._.inflect('NNP',
                                               inflect_oov=True,
                                               form_num=0)
                        if w != None and pos_tag([w]) == a:
                            real_words.append(token[0]._.inflect(
                                'NNP', inflect_oov=True, from_num=0))
                    elif a == 'NN':
                        token = nlp(j)
                        w = token[0]._.inflect('NN', form_num=0)
                        real_words.append(w)
                    elif a == 'RB':
                        token = nlp(j)
                        w = token[0]._.inflect('RB',
                                               inflect_oov=True,
                                               form_num=0)
                        real_words.append(token[0]._.inflect("RB",
                                                             inflect_oov=True,
                                                             form_num=0))
                    elif a == 'RBR':
                        token = nlp(j)
                        w = token[0]._.inflect('RBR',
                                               inflect_oov=True,
                                               form_num=0)
                        if w != None and pos_tag([w]) == a:
                            real_words.append(token[0]._.inflect(
                                'RBR', inflect_oov=True, form_num=0))
                    elif a == 'RBS':
                        token = nlp(j)
                        w = token[0]._.inflect('RBS',
                                               inflect_oov=True,
                                               form_num=0)
                        if w != None and pos_tag([w]) == a:
                            real_words.append(token[0]._.inflect(
                                'RBS', inflect_oov=True, form_num=0))

                    elif a == 'VB':
                        tokens = nlp(j)
                        w = tokens[0]._.inflect('VB',
                                                inflect_oov=True,
                                                form_num=0)
                        if w != None:
                            real_words.append(tokens[0]._.inflect(
                                'VB', inflect_oov=True, form_num=0))
                    elif a == 'VBD':
                        tokens = nlp(j)
                        w = tokens[0]._.inflect('VBD', form_num=1)

                        if w != None:
                            real_words.append(tokens[0]._.inflect(
                                'VBD', inflect_oov=True, form_num=0))
                    elif a == 'VBG':
                        tokens = nlp(j)
                        w = tokens[0]._.inflect('VBG',
                                                inflect_oov=True,
                                                form_num=0)
                        if w != None and pos_tag([w]) == a:
                            real_words.append(tokens[0]._.inflect(
                                'VBG', inflect_oov=True, form_num=0))
                    elif a == 'VBN ':
                        tokens = nlp(j)
                        w = tokens[0]._.inflect('VBN',
                                                inflect_oov=True,
                                                form_num=0)
                        if w != None and pos_tag([w]) == a:
                            real_words.append(tokens[0]._.inflect(
                                'VBN', inflect_oov=True, form_num=0))
                    elif a == 'VBP':
                        tokens = nlp(j)
                        w = tokens[0]._.inflect('VBP',
                                                inflect_oov=True,
                                                form_num=0)
                        if w != None and pos_tag([w]) == a:
                            real_words.append(tokens[0]._.inflect(
                                'VBP', inflect_oov=True, form_num=0))
                    elif a == 'VBZ':
                        tokens = nlp(j)
                        w = tokens[0]._.inflect('VBZ',
                                                inflect_oov=True,
                                                form_num=0)
                        if w != None and pos_tag([w]) == a:
                            real_words.append(tokens[0]._.inflect(
                                'VBZ', inflect_oov=True, form_num=0))
                    elif a == 'JJR':
                        real_words.append(comparative(j))
                    elif a == 'JJS':
                        real_words.append(superlative(j))
        print('real words: ', real_words)
        if real_words == [] or r == []:  #if no similar word is found
            output = output + ' ' + words[i]
            output2 = output2 + ' ' + words[i]
        else:

            output_words = []
            max_sim = []
            token1 = nlp(words[i])

            for h in real_words:
                if h != None and h != '':
                    token2 = nlp(h)

                    f1 = h.replace(' ', '')
                    f = f1.replace('_', ' ')
                    sim = token1.similarity(token2)
                    if h not in output_words and (
                            f not in output_words
                    ) and words[i].lower() != h.lower() and words[i].lower(
                    ) != f.lower():  #adding appropriate word

                        output_words.append(f)
                        max_sim.append(sim)

                else:
                    sim = 0

            final_listwords = []
            for jj in max_sim:
                final_listwords.append(output_words[max_sim.index(
                    max(max_sim))])
                max_sim[max_sim.index(max(max_sim))] = -1
            if output_words == []:
                output = output + ' ' + words[i]
                output2 = output2 + ' ' + words[i]

            elif position == None or type(position) != int:

                print('final_listwords: ', final_listwords)
                if len(output_words) > 3:

                    output = output + ' ' + final_listwords[0]
                else:
                    output = output + ' ' + final_listwords[random.randint(
                        0,
                        len(final_listwords) - 1)]

            elif position != None and (type(position) == int
                                       or type(position) == str):
                if type(
                        position
                ) == str:  #choose a specific position of word from list of similar words
                    if len(output_words) > int(position):
                        output = output + ' ' + final_listwords[int(position)]
                elif type(position) == int:
                    if len(output_words) > (position):
                        output = output + ' ' + final_listwords[position]

                else:
                    output = output + ' ' + final_listwords[len(output_words) -
                                                            1]
            if len(output_words) > 7:
                output2 = output2 + ' ' + str([words[i]] +
                                              final_listwords[0:8])
                # if number of similar words is less than 4 than choose randomly
            elif len(output_words) <= 7:
                output2 = output2 + ' ' + str([words[i]] + final_listwords[0:])
    result_box1.insert(END, output)  #to display result
    result_box2.insert(END, output2)
Ejemplo n.º 34
0
from pattern.en import pluralize
from pprint import pprint
from pattern.en import superlative, comparative
from pattern.en import sentiment
from pattern.en import parse, Sentence, modality
from pattern.en import suggest

words = ["boy", "boys", "knives", "knife", "drove", "drive"]

dic = set()

print(dic)

w1 = "boy"
w2 = "boys"

for word in words:
    if word == singularize(word):
        print(f"{word} is singular word")
        dic.add((word, pluralize(word)))
    else:
        print(f"{word} is plural word")
        dic.add((singularize(word), word))

pprint(list(dic))

print(superlative("good"))
print(comparative("good"))

##print(suggest("whitle"))
Ejemplo n.º 35
0
# Boston, MA 02110-1301 USA,

from pattern.en import referenced

print(referenced('university'))
print(referenced('hour'))

from pattern.en import pluralize, singularize

print(pluralize('child'))
print(singularize('wolves'))

from pattern.en import comparative, superlative

print(comparative('bad'))
print(superlative('bad'))

from pattern.en import conjugate, lemma, lexeme

print(lexeme('purr'))
print(lemma('purring'))
print(conjugate('purred', '3sg'))  # he / she / it

from pattern.en import conjugate, lemma, lexeme

print(lexeme('purr'))
print(lemma('purring'))
print(conjugate('purred', '3sg'))  # he / she / it

from pattern.de import gender, MALE, FEMALE, NEUTRAL
print(gender('Katze'))
Ejemplo n.º 36
0
 def test_superlative(self):
     # Assert "nice" => "nicest"
     self.assertEqual(en.superlative("nice"), "nicest")
     # Assert "important" => "most important"
     self.assertEqual(en.superlative("important"), "most important")
     print "pattern.en.superlative()"
Ejemplo n.º 37
-1
# ---------------
# The singularize() command returns the singular form of a plural noun (or adjective).
# It is slightly less robust than the pluralize() command.
for word in ["parts-of-speech", "children", "dogs'", "wolves", "bears", "kitchen knives", 
             "octopodes", "matrices", "matrixes"]:
    print singularize(word)
print singularize("our", pos=ADJECTIVE)
print
print

# COMPARATIVE & SUPERLATIVE ADJECTIVES
# ------------------------------------
# The comparative() and superlative() commands give the comparative/superlative form of an adjective.
# Words with three or more syllables are simply preceded by "more" or "most".
for word in ["gentle", "big", "pretty", "hurt", "important", "bad"]:
    print word, "=>", comparative(word), "=>", superlative(word)
print
print

# VERB CONJUGATION
# ----------------
# The lexeme() command returns a list of all possible verb inflections.
# The lemma() command returns the base form (infinitive) of a verb.
print "lexeme:", lexeme("be")
print "lemma:", lemma("was")

# The conjugate() command inflects a verb to another tense.
# The tense can be given as a constant, e.g. 
# INFINITIVE, PRESENT_1ST_PERSON_SINGULAR PRESENT_PLURAL, PAST_PARTICIPLE, ...
# or as an abbreviated alias: inf, 1sg, 2sg, 3sg, pl, part, 1sgp, 2sgp, 3sgp, ppl, ppart.
print conjugate("being", tense="1sg", negated=False)