Esempio n. 1
0
def simplify_word(a):

    # print "[{0}],正在分析词汇: {1}".format(time.ctime().split()[3], a),

    try:  #测试是否为动词,如果是则返回
        try_present_verb = en.verb.present(a)  #try
        if en.is_verb(try_present_verb):
            # if try_present_verb != a:
            #     print " 动词现在时化:{0} -> {1}".format(a,try_present_verb)
            # else:
            #     print ""
            return try_present_verb
    except:  #否则继续检查
        pass

    #测试是否是名词
    try_singular_noun = en.noun.singular(a)
    if en.is_noun(try_singular_noun):
        # if try_singular_noun != a:
        #     print " 名词单数化:{0} -> {1}".format(a,try_singular_noun)
        # else:
        #     print ""
        return try_singular_noun

    #如果已经可以判断是名词,动词,形容词,副词,连词
    if en.is_noun(a) or en.is_verb(a) or en.is_adjective(a) or en.is_adverb(
            a) or en.is_connective(a):
        # print ""
        return a

    return ''
def simplify_word(a):

    # print "[{0}],正在分析词汇: {1}".format(time.ctime().split()[3], a),

    try:#测试是否为动词,如果是则返回
        try_present_verb = en.verb.present(a)#try
        if en.is_verb(try_present_verb):
            # if try_present_verb != a:
            #     print " 动词现在时化:{0} -> {1}".format(a,try_present_verb)
            # else:
            #     print ""
            return try_present_verb
    except:#否则继续检查
        pass

    #测试是否是名词
    try_singular_noun = en.noun.singular(a)
    if en.is_noun(try_singular_noun):
        # if try_singular_noun != a:
        #     print " 名词单数化:{0} -> {1}".format(a,try_singular_noun)
        # else:
        #     print ""
        return try_singular_noun

    #如果已经可以判断是名词,动词,形容词,副词,连词
    if en.is_noun(a) or en.is_verb(a) or en.is_adjective(a) or en.is_adverb(a) or en.is_connective(a):
        # print ""
        return a

    return ''
Esempio n. 3
0
def verse(word):
    
    """Creates a small rhyme for a given word.
    
    The rhyme is based on WordNet's description for the word.
    This description is eloquated (alliterated or antonated), incorporated.
    
    """

    g = en.noun.gloss(word)
    words = g.split(" ")
    
    for i in range(len(words)):
        
        w = words[i]
        w = w.replace("\"", "")
        
        if en.is_noun(w):
            w = eloquate(w)
            
        if random(100) > 60:

            if en.is_noun(w): w = incorporate(w).upper()
            if en.is_verb(w): w = incorporate(w, VERB)
            if en.is_adjective(w): w = incorporate(w, ADJECTIVE)
            
        if i > 0 and i % 3 == 0:
            words[i] = words[i] + "\n"
            
        words[i] = w
            
    g = " ".join(words)
    g = g.replace("type A ", "!")
    g = g.replace("group A ", "!")
    return g
Esempio n. 4
0
def verse(word):

    """Creates a small rhyme for a given word.

    The rhyme is based on WordNet's description for the word.
    This description is eloquated (alliterated or antonated), incorporated.

    """

    g = en.noun.gloss(word)
    words = g.split(" ")

    for i in range(len(words)):

        w = words[i]
        w = w.replace("\"", "")

        if en.is_noun(w):
            w = eloquate(w)

        if random(100) > 60:

            if en.is_noun(w): w = incorporate(w).upper()
            if en.is_verb(w): w = incorporate(w, VERB)
            if en.is_adjective(w): w = incorporate(w, ADJECTIVE)

        if i > 0 and i % 3 == 0:
            words[i] = words[i] + "\n"

        words[i] = w

    g = " ".join(words)
    g = g.replace("type A ", "!")
    g = g.replace("group A ", "!")
    return g
Esempio n. 5
0
File: views.py Progetto: mitnk/mc
def normalize(word):
    ## TODO: make this function nicer (UT, shorter).

    ## all verb to present
    try:
        new_word = en.verb.present(word)
        if new_word != word and en.is_verb(new_word):
            return new_word
    except KeyError:
        pass

    new_word = en.noun.singular(word)
    if new_word != word and en.is_noun(new_word):
        return new_word

    if en.is_noun(word):
        new_word = re.sub(r'er$', '', word)
        if new_word != word and en.is_verb(new_word):
            return new_word
        new_word = re.sub(r'r$', '', word)
        if new_word != word and en.is_verb(new_word):
            return new_word
        new_word = re.sub(r'ment$', '', word)
        if new_word != word and en.is_verb(new_word):
            return new_word
        new_word = re.sub(r'ness', '', word)
        if new_word != word and en.is_adjective(new_word):
            return new_word

    ## adv to adj
    ## TODO: is there a quick way to do this in "en" libs
    new_word = re.sub(r'ly$', '', word)
    if new_word != word and en.is_adjective(new_word):
        return new_word

    if word.endswith('ly'):
        new_word = re.sub(r'ly$', '', word) + 'e'
        if new_word != word and en.is_adjective(new_word):
            return new_word

    if en.is_adjective(word):
        new_word = re.sub(r'ory$', '', word) + 'e'
        if new_word != word and en.is_verb(new_word):
            return new_word
        new_word = re.sub(r'ive$', '', word) + 'e'
        if new_word != word and en.is_verb(new_word):
            return new_word
        new_word = re.sub(r'ive$', '', word)
        if new_word != word and en.is_verb(new_word):
            return new_word
        new_word = re.sub(r'er$', '', word)
        if new_word != word and en.is_adjective(new_word):
            return new_word
        new_word = re.sub(r'r$', '', word)
        if new_word != word and en.is_adjective(new_word):
            return new_word

    return word
Esempio n. 6
0
def list_concepts_naturally(brain, arr):
    finalString = ""
    for i in range(0, len(arr)):
        concept = str(arr[i])
        # Remove keyword specifiers, if any.
        concept = concept.lstrip("$(").rstrip(")")

        # Refer to arr[i] by actual name.
        name = brain.get_assertions_with({
            "l": [concept],
            "relation": "has_name"
        })
        if name:
            if isinstance(name[0]["r"], basestring):
                concept = name[0]["r"].title()
            else:
                concept = name[0]["r"][0].title()
        else:
            # TODO: Retrieve the article for concept ("the", "a", "an", or "").
            # article = get_article_for(concept, brain)
            # Due to the number of rules about article omission, we're going to use "the" for now as a simple solution.
            article = "the"
            if article != "":
                article = article + " "
            # Remove possessiveness if necessary for parsing.
            main_concept = concept
            if concept.endswith("'s"):
                main_concept = concept[:-2]
            main_concept = main_concept.rstrip("?:!.,;'")
            # Determine if we should use an article, either definite or indefinite.
            if main_concept != "his":
                if en.is_noun(en.noun.singular(main_concept)):
                    concept = article + concept

                else:  # e.g. gingerbread house >> a gingerbread house
                    words = main_concept.split(" ")
                    allNouns = True
                    for w in words:
                        if not en.is_noun(en.noun.singular(w)):
                            allNouns = False
                    if allNouns:
                        concept = article + concept
        if (i < len(arr) - 2):
            finalString += concept + ", "
        elif (i == len(arr) - 2):
            if len(arr) == 2:
                finalString += concept + " and "
            else:
                finalString += concept + ", and "
        else:
            finalString += concept
    return finalString
def get_tense(word, pos=None):
    infinitive_word = apply_tense(word, 'infinitive')
    if is_verb(infinitive_word) and infinitive_word != '':
        return verb.tense(word)
    else:
        singular = to_singular(word)

        if singular != word and (is_noun(singular) or pos == 'Noun'):
            return 'plural noun'
        elif is_noun(word) or pos == 'Noun':
            return 'singular noun'
        else:
            return 'None'
Esempio n. 8
0
def list_concepts_naturally(brain,arr):
    finalString=""
    for i in range(0, len(arr)):
        concept = str(arr[i])
        # Remove keyword specifiers, if any.
        concept = concept.lstrip("$(").rstrip(")")

        # Refer to arr[i] by actual name.
        name = brain.get_assertions_with({"l":[concept],"relation":"has_name"})
        if name:
            if isinstance(name[0]["r"],basestring):
                concept = name[0]["r"].title()
            else:
                concept = name[0]["r"][0].title()
        else:
            # TODO: Retrieve the article for concept ("the", "a", "an", or "").
            # article = get_article_for(concept, brain)
            # Due to the number of rules about article omission, we're going to use "the" for now as a simple solution.
            article = "the"
            if article!="":
                article = article + " "
            # Remove possessiveness if necessary for parsing.
            main_concept = concept
            if concept.endswith("'s"):
                main_concept=concept[:-2]
            main_concept = main_concept.rstrip("?:!.,;'")
            # Determine if we should use an article, either definite or indefinite.
            if main_concept != "his":
                if en.is_noun(en.noun.singular(main_concept)):
                    concept = article + concept

                else: # e.g. gingerbread house >> a gingerbread house
                    words = main_concept.split(" ")
                    allNouns = True
                    for w in words:
                        if not en.is_noun(en.noun.singular(w)):
                            allNouns = False
                    if allNouns:
                        concept = article + concept
        if (i<len(arr)-2):
            finalString += concept + ", "
        elif (i==len(arr)-2):
            if len(arr)==2:
                finalString+= concept + " and "
            else:
                finalString += concept + ", and "
        else:
            finalString += concept
    return finalString
Esempio n. 9
0
def get_nouns(tokens_tagged):

    # TODO anfangs initialisieren
    # Satzzeichen usw. raus, werden manchmal falsch getaggt 
    r = re.compile(r'[^a-zA-Z]')
    nouns = []
    for i in range(len(tokens_tagged)):
        # new sentence
        s = tokens_tagged[i]
        # for every token in the sentence 
        for j in range(len(s)):
            (w, t) = s[j]
            if t == 'NN' and not r.match(w):
                nouns.append([w, t, unicode(w.lower()), i, j])
            elif t == 'NNS'and not r.match(w):
                nouns.append([w, t, unicode(en.noun.singular(w.lower())), i, j])

    # frequency ermitteln
    count = defaultdict(int)
    for liste in nouns:
        count[liste[2]] += 1
    # nach frequency absteigend sortieren
    sorted_counts = sorted(count.items(), key=operator.itemgetter(1), reverse=True)
    # alle Lemmata, die nicht in Wordnet enthalten sind, entfernen 
    sorted_counts = [(n, c) for (n, c) in sorted_counts if en.is_noun(n)]
    # an alle lemmata die liste ihrer vorkommen anhaengen 
    nouns_all = []
    for (n, c) in sorted_counts:
        liste = [l for l in nouns if l[2]== n]
        nouns_all.append([n, c, liste])

    return nouns_all
    def singular_to_plural(self):

        final_list = []
        st = self.get_sentence()

        list_seperate_by_comma = st.split(",")  # divide the sentence to list of strings by all the ','
        for each in list_seperate_by_comma:

            if each[0] == " ":  # prevent bug
                each = each[1:]
            m = each.split(" ")  # split each sentence to list of words

            plural_list = []

            for each in m:
                if en.is_noun(each):
                    each = en.noun.plural(each)
                elif en.is_adjective(each):
                    each = en.adjective.plural(each)
                elif en.is_connective(each):
                    each = self.my_inflect.plural(each)
                elif en.is_persuasive(each):
                    each = en.persuasive.plural(each)
                plural_list.append(each)

            plural_list = " ".join(plural_list)  # convert each list to string
            final_list.append(plural_list)

        final_list = ", ".join(final_list)
        return final_list
    def find_grammatical_kind(self):

        st = self.get_sentence()
        st = re.sub(",", "", st)  # delete all commas
        result = []

        m = st.split(" ")

        for each in m:
            flag = False
            if en.noun.is_emotion(each):
                result.append("emotion")
                flag = True
            elif en.is_connective(each):
                result.append("connective")
                flag = True
            elif en.is_verb(each):
                result.append("verb")
                flag = True
            elif en.is_adjective(each):
                result.append("adjective")
                flag = True
            elif en.is_noun(each):
                result.append("noun")
                flag = True
            elif en.is_persuasive(each):
                result.append("persuasive")
                flag = True
            elif en.is_number(each):
                result.append("number")
                flag = True
            if flag == False:
                result.append("unclear")

        return result
Esempio n. 12
0
def simplify_word(a):
    #如果已经可以判断是名词,动词,形容词,副词,连词
    if en.is_noun(a) or en.is_verb(a) or en.is_adjective(a) or en.is_adverb(a) or en.is_connective(a):
        return a
    try:#测试是否为动词,如果是则返回
        en.is_verb(en.verb.present(a))
        return en.verb.present(a)
    except:#否则继续检查
        pass
    
    #测试是否是名词
    if en.is_noun(en.noun.singular(a)):
        return en.noun.singular(a)
    otherwordlist.append(a)
    #print a
    return a
Esempio n. 13
0
def translate_x_of_assertion(brain, a):
    prefix = a.relation[:-3]
    prefix_article = en.noun.article(prefix)
    # prefix_article_only = prefix_article.split(" ")[0]
    verb = get_tense(a, "was", a.l, brain)

    toReturn = ""
    if en.is_noun(en.noun.singular(prefix)):
        if is_plural(a.l, brain):
            prefix_article = en.noun.plural(prefix)
        toReturn = list_concepts_naturally(
            brain, a.l
        ) + " " + verb + " " + prefix_article + " of " + list_words_naturally(
            a.r)
    elif en.is_verb(
            en.verb.infinitive(prefix)) and en.verb.infinitive(prefix) != "":
        if hasattr(a, "owner") and len(a.owner) > 0:
            owner = list_concepts_naturally(brain, a.owner)
        else:
            owner = "everyone"
        toReturn = list_concepts_naturally(
            brain, a.l
        ) + " " + prefix + " " + owner + " of " + list_concepts_naturally(
            brain, a.r)
    elif en.is_adjective(prefix):
        # TODO for capable_of >> deal with action, action_object, action_recipient...
        # Similar for used_for >> when used_for is action / verbs
        toReturn = list_concepts_naturally(
            brain,
            a.l) + " " + verb + " " + prefix + " of " + list_words_naturally(
                a.r)
    toReturn = add_end_marks(a, toReturn)
    return toReturn
Esempio n. 14
0
def dict_ingest(path_to_dict):
    noun = []
    verb = []
    adjective = []
    adverb = []
    miscel = []
    f = open(path_to_dict, 'r')
    for l in f:
        word = l.strip()
        if en.is_noun(word):
            noun.append(word)
        elif en.is_verb(word):
            verb.append(word)
        elif en.is_adjective(word):
            adjective.append(word)
        elif en.is_adverb(word):
            adverb.append(word)
        else:
            miscel.append(word)
    print noun[:5]
    print verb[:5]
    print adjective[:5]
    print adverb[:5]
    print miscel[:5]
    return noun, verb, adjective, adverb, miscel
Esempio n. 15
0
def dict_ingest(path_to_dict):
    noun = []
    verb = []
    adjective = []
    adverb = []
    miscel = []
    f = open(path_to_dict,'r')
    for l in f:
        word = l.strip()
        if en.is_noun(word):
            noun.append(word)
        elif en.is_verb(word):
            verb.append(word)
        elif en.is_adjective(word):
            adjective.append(word)
        elif en.is_adverb(word):
            adverb.append(word)
        else:
            miscel.append(word)
    print noun[:5]
    print verb[:5]
    print adjective[:5]
    print adverb[:5]
    print miscel[:5]
    return noun, verb, adjective, adverb, miscel
Esempio n. 16
0
 def is_a_expression(self, word):
     return self.is_a_hash_tag(word)\
            or self.is_negation(word) \
            or en.is_noun(word) \
            or en.is_adjective(word) \
            or en.is_verb(word) \
            or en.is_adverb(word) \
            or self.is_orality(word)
Esempio n. 17
0
 def is_a_expression(self, word):
     return self.is_a_hash_tag(word)\
            or self.is_negation(word) \
            or en.is_noun(word) \
            or en.is_adjective(word) \
            or en.is_verb(word) \
            or en.is_adverb(word) \
            or self.is_orality(word)
Esempio n. 18
0
def stem(word):
	result = en.verb.infinitive(word)
	if len(result) != 0 and en.is_verb(result):
		return result
	result = en.noun.singular(word)
	if len(result) != 0 and en.is_noun(result):
		return result
	return word
    def is_clickable(self, node):
        """ Every node that is a noun is clickable (except the root).
        """

        if en.is_noun(str(node.id.lower())) \
        or self.is_expandable(node.id) and node != self.root:
            return True
        else:
            return False
def cosine_similarity(word1, word2, model):
    if word1 not in model.dictionary:
        ws = [
            w for w in word1.split(u'_')
            if w in model.dictionary and en.is_noun(w)
        ]
        if not ws:
            return 0
        elif len(
                ws
        ) > 1:  # there are at least 2 elements of the list is in dictionary
            # Simple composition by summing the vector
            v1 = model.word_vectors[model.dictionary[
                ws[-1]]] * 0.7 + model.word_vectors[model.dictionary[
                    ws[-2]]] * 0.3
        else:  # has 1 element that is in the dictionary
            v1 = model.word_vectors[model.dictionary[ws[0]]]
    else:
        v1 = model.word_vectors[model.dictionary[word1]]

    if word2 not in model.dictionary:
        ws = [
            w for w in word2.split(u'_')
            if w in model.dictionary and en.is_noun(w)
        ]
        if not ws:
            return 0
        elif len(
                ws
        ) > 1:  # there are at least 2 elements of the list is in dictionary
            # Simple composition by summing the vector
            v2 = model.word_vectors[model.dictionary[
                ws[-1]]] * 0.7 + model.word_vectors[model.dictionary[
                    ws[-2]]] * 0.3
        else:  # has 1 element that is in the dictionary
            v2 = model.word_vectors[model.dictionary[ws[0]]]
    else:
        v2 = model.word_vectors[model.dictionary[word2]]

    try:
        return 1 - cosine(v1, v2)
    except Exception as ex:  # key does
        return 0.0
Esempio n. 21
0
 def is_clickable(self, node):
     
     """ Every node that is a noun is clickable (except the root).
     """
     
     if en.is_noun(str(node.id.lower())) \
     or self.is_expandable(node.id) and node != self.root: 
         return True
     else:
         return False
Esempio n. 22
0
def nouns(list):
    
    """Parses nouns from a list of words.
    """
    
    words = []
    for word in list:
        word = word.strip()
        if en.is_noun(word): words.append(word)
    
    return words
Esempio n. 23
0
File: views.py Progetto: mitnk/mc
def get_gloss(word):
    if en.is_verb(word):
        return en.verb.gloss(word)
    elif en.is_adjective(word):
        return en.adjective.gloss(word)
    elif en.is_adverb(word):
        return en.adverb.gloss(word)
    elif en.is_noun(word):
        return en.noun.gloss(word)
    else:
        return en.wordnet.gloss(word)
Esempio n. 24
0
def nouns(list):

    """Parses nouns from a list of words.
    """

    words = []
    for word in list:
        word = word.strip()
        if en.is_noun(word): words.append(word)

    return words
Esempio n. 25
0
def simplify_word(a):
    
    try:#测试是否为动词,如果是则返回
        en.is_verb(en.verb.present(a))
        return en.verb.present(a)
    except:#否则继续检查
        pass
    
    #测试是否是名词
    if en.is_noun(en.noun.singular(a)):
        return en.noun.singular(a)
    
    #如果已经可以判断是名词,动词,形容词,副词,连词
    if en.is_noun(a) or en.is_verb(a) or en.is_adjective(a) or en.is_adverb(a) or en.is_connective(a):
        return a
        
    
    
    
    otherwordlist.append(a)
    return a
Esempio n. 26
0
    def webpages(self, q):
        new_mean = []
        dict = {}
        webpages = []
        temp = []
        keywords = q.split(' ')
        keywords = filter(None, keywords)
        length = len(keywords)
        global ignorewords
        for i in range(0, len(keywords)):
            if keywords[i] not in ignorewords:
                syn = wn.synsets(keywords[i])
                dict[keywords[i]] = ''
                meanings = [s.lemmas[0].name for s in syn]
                meanings = [str(item).lower() for item in meanings]
                if en.is_noun(keywords[i]):
                    meanings.append(en.noun.plural(keywords[i]))
                if en.is_verb(keywords[i]):
                    meanings.append(en.verb.past(keywords[i]))
                    meanings.append(en.verb.past_participle(keywords[i]))
                    meanings.append(en.verb.present(keywords[i]))
                for item in meanings:
                    if self.similar(item, keywords[i]) != 1:
                        new_mean.append(item)
                if new_mean == []:
                    dict[keywords[i]] = ''
                    poss = [(k, v) if v else (k, ) for k, v in dict.items()]
                    temp.append(list(product(*poss)))
                else:
                    new_mean = list(set(new_mean))
                    for j in range(0, len(new_mean)):
                        dict[keywords[i]] = new_mean[j]
                        poss = [(k, v) if v else (k, )
                                for k, v in dict.items()]
                        temp.append(list(product(*poss)))
                new_mean = []
            else:
                length = length - 1
        temp = [item for sublist in temp for item in sublist]
        for item in temp:
            if len(item) == length:
                new_mean.append(item)
        new_mean = list(set(new_mean))
        for i in range(0, len(new_mean)):
            if i == 0:
                webpages = self.set_of_webpages(new_mean[i])
                #print i,' ',webpages,new_mean[i]
            else:
                temp = self.set_of_webpages(new_mean[i])
                #print i,' ',temp,new_mean[i]
                webpages = list(set(webpages) | set(temp))

        return webpages
Esempio n. 27
0
def generate_word(list, pos):
    #% chance to generate new word
    if random.random() < percentage_chance:
        #repeat until word = pos
        while True:
            #get all synsets of random word in list
            synsets = wn.synsets(list[random.randint(0, len(list) - 1)], pos=pos)
            #get random synset
            synset = synsets[random.randint(0, len(synsets) - 1)]
            ran = random.randint(0,3)
            if ran == 0 and synset.hypernyms():
                synset = synset.hypernyms()[random.randint(0, len(synset.hypernyms()) - 1)]
            elif ran == 1 and synset.hyponyms():
                synset = synset.hyponyms()[random.randint(0, len(synset.hyponyms()) - 1)]
            #get random name from synset that does not contain an _ or - (these make the lib go insane)
            #words = the names of the synset
            words = synset.lemma_names()
            #this loop is to make sure an infinite loop does not occur
            #where you are picking from all invalid choices
            while len(words) > 0:
                word = words[random.randint(0, len(words) - 1)]
                if "_" not in word and "-" not in word:
                    break
                else:
                    words.remove(word)
                    continue
            #if words doesn't have words in it, pick a new word from beginning
            if(len(words) == 0):
                continue
            if ((pos == wn.NOUN and en.is_noun(word)) or 
                (pos == wn.VERB and en.is_verb(word)) or
                (pos == wn.ADJ and en.is_adjective(word))):
                
                #fix word based on pos
                #if verb, make sure the verb has a conjugation,
                #if it does, or is not a verb, the word gets appended to the word array,
                #and a word is returned 
                if pos == wn.VERB:
                    try:
                        en.verb.present(word, person=3, negate=False)
                    except KeyError:
                        continue
                    else:
                        if word not in list:
                            list.append(word)
                        return word
                else:
                    if word not in list:
                        list.append(word)
                    return word
    else:
        #just select a random word from the existing ones
        return list[random.randint(0, len(list) - 1)]
Esempio n. 28
0
   def __init__(self, tableName):
        self.name = tableName
        tableName = tableName.lower()

        if(en.is_noun(tableName)):
          self.wordType = "noun"
          self.mapsTo = [en.noun.plural(tableName), en.noun.singular(tableName)]
        else:
          self.wordType = "verb"
          self.mapsTo = [en.verb.infinitive(tableName), en.verb.present(tableName, person=3, negate=False), en.verb.past(tableName), en.verb.present_participle(tableName)]

        self.columns = []
        self.expose = True;
Esempio n. 29
0
def correct_form(ans, word):
    if "_" in ans:
        return ans
    # print "correcting", ans, "to match", word
    if en.verb.infinitive(word):
        if en.verb.infinitive(ans):
            return en.verb.conjugate(ans, en.verb.tense(word))
        else:
            return ans
    else:
        if en.is_noun(word):
            return en.noun.singular(ans)
        else:
            return ans
Esempio n. 30
0
def correct_form(ans, word):
    if "_" in ans:
        return ans
    # print "correcting", ans, "to match", word
    if en.verb.infinitive(word):
        if en.verb.infinitive(ans):
            return en.verb.conjugate(ans, en.verb.tense(word))
        else:
            return ans
    else:
        if en.is_noun(word):
            return en.noun.singular(ans)
        else:
            return ans
Esempio n. 31
0
def get_article(word, tokens, index):
    article_index = index - 1

    if index <= 0:
        return tokens[0]

    if not is_noun(word) and not is_adjective(word) and not is_adverb(word):
        return tokens[article_index]

    if tokens[article_index] == 'a' or tokens[article_index] == 'an':
        proper_article = noun.article(word).split()[0]
        return proper_article

    return tokens[article_index]
Esempio n. 32
0
 def getcategory(self,word):
     #Higher prirority for verb
     try:
         if(en.verb.present(word)):
             return("v")
     except:
         pass
     
     #Check if it is a noun
     if(en.is_noun(word)):
         return("n")
     
     #Check if it is an adjective
     elif(en.is_adjective(word)):
         return("a")
         
     else:
         return(None)
Esempio n. 33
0
    def getcategory(self, word):
        #Higher prirority for verb
        try:
            if (en.verb.present(word)):
                return ("v")
        except:
            pass

        #Check if it is a noun
        if (en.is_noun(word)):
            return ("n")

        #Check if it is an adjective
        elif (en.is_adjective(word)):
            return ("a")

        else:
            return (None)
Esempio n. 34
0
def convertVerb(srclst):
    dstlst = []
    itemnew = ""
    for item in srclst:
        #print(item)  ############################when nos lib give error
        #if (item.endswith("ed") or item.endswith("ing")) \
        if en.is_verb(item) \
            and (not en.is_noun(item)) \
            and (not en.is_adjective(item)) \
            and (not en.is_adverb(item)) \
            and (item not in WIERDWORDS):
            try:
                itemnew = en.verb.present(item)
            except:
                print "unrecognized word:", item
                itemnew = item
        else:
            itemnew = item
        dstlst.append(itemnew)
    return dstlst
Esempio n. 35
0
    def _lazy_singularize(self, str):
        
        """ Attempts to singularize the given string.
        
        Does some straightforward inflections and checks
        with the en library if the result's plural
        is the same as the given string.
        
        """ 
        
        inflections = [
            ("ves" , "f"),
            ("ies" , "y"),
            ("es"  , ""),
            ("s"   , "")
        ]
        for pl, sg in inflections:
            singular = str.strip(pl) + sg
            if str == en.noun.plural(singular) \
            and en.is_noun(singular):
                return singular

        return str
    def add_forms(self):
        forms = []
        for w in self.phrases:
            if en.is_verb(w.name):
                try:
                    vb = en.verb.infinitive(w.name)
                    vbd = en.verb.past(w.name)
                    vbp1 = en.verb.present(w.name, person=1)
                    vbp2 = en.verb.present(w.name, person=2)
                    vbz = en.verb.present(w.name, person=3)
                    vbg = en.verb.present_participle(w.name)
                    forms.append(Word(vb, "VB"))
                    forms.append(Word(vbd, "VBD"))
                    forms.append(Word(vbp1, "VBP"))
                    forms.append(Word(vbz, "VBZ"))
                    forms.append(Word(vbg, "VBG"))
                except:
                    print "Error in conjugation for verb:" + w.name
            elif en.is_noun(w.name):
                nns = en.noun.plural(w.name)
                forms.append(Word(nns, "NNS"))

        return forms
Esempio n. 37
0
def translate_x_of_assertion(brain,a):
    prefix = a.relation[:-3]
    prefix_article = en.noun.article(prefix)
    # prefix_article_only = prefix_article.split(" ")[0]
    verb = get_tense(a, "was", a.l, brain)

    toReturn = ""
    if en.is_noun(en.noun.singular(prefix)):
        if is_plural(a.l, brain):
            prefix_article = en.noun.plural(prefix)
        toReturn = list_concepts_naturally(brain,a.l) + " "+verb+" " + prefix_article + " of " + list_words_naturally(a.r)
    elif en.is_verb(en.verb.infinitive(prefix)) and en.verb.infinitive(prefix) !="":
        if hasattr(a,"owner") and len(a.owner)>0:
            owner = list_concepts_naturally(brain, a.owner)
        else:
            owner = "everyone"
        toReturn = list_concepts_naturally(brain, a.l) + " "+prefix +" "+owner+ " of " + list_concepts_naturally(brain, a.r)
    elif en.is_adjective(prefix):
        # TODO for capable_of >> deal with action, action_object, action_recipient...
        # Similar for used_for >> when used_for is action / verbs
        toReturn = list_concepts_naturally(brain,a.l) + " "+verb+" " + prefix + " of " + list_words_naturally(a.r)
    toReturn = add_end_marks(a, toReturn)
    return toReturn
    def add_forms(self):
        forms = []
        for w in self.phrases:
            if en.is_verb(w.name):
                try:
                    vb = en.verb.infinitive(w.name)
                    vbd = en.verb.past(w.name)
                    vbp1 = en.verb.present(w.name, person = 1)
                    vbp2 = en.verb.present(w.name, person = 2)
                    vbz = en.verb.present(w.name, person = 3)
                    vbg = en.verb.present_participle(w.name)
                    forms.append(Word(vb,"VB"))
                    forms.append(Word(vbd,"VBD"))
                    forms.append(Word(vbp1,"VBP"))
                    forms.append(Word(vbz,"VBZ"))
                    forms.append(Word(vbg,"VBG"))
                except:
                    print "Error in conjugation for verb:" + w.name
            elif en.is_noun(w.name):
                nns = en.noun.plural(w.name)
                forms.append(Word(nns, "NNS"))

        return forms
Esempio n. 39
0
    def resolvePlural(self, sentence):
      for i in range(len(sentence)):
          word = sentence[i]
          if '<PLURAL>' in word:
              pluralized = en.noun.plural(word[:word.find('<')])
              word = word.replace(word[:word.find('<')], "") # Remove word
              remainingTags = word.replace('<PLURAL>', "") # Remove plural tag
              sentence[i] = pluralized + remainingTags

          # Convert nearest noun to "few", "many", "several" or number to plural
          if word in ["few", "many", "several"] or (word.isdigit() and word != '1'):
            j = i + 1
            while j < len(sentence):
                next = sentence[j]
                nextSplit = next.split('<')
                next_word = nextSplit[0]
                if ('<NOUN>' in next or en.is_noun(next_word)) and '<PLURAL>' not in next:
                    pluralized = en.noun.plural(next_word)
                    tag = ""
                    if (len(nextSplit) == 2):
                        tag = '<' + nextSplit[1]
                    sentence[j] = pluralized + tag
                    break
                j += 1
Esempio n. 40
0
 def _parse(chunk_):
     # as deep as the oceans -> ocean
     noun = clean(chunk_[-1][0])
     if chunk_[-2][0] == "the" and en.is_noun(en.noun.singular(noun)):
         return en.noun.singular(noun)
     return noun
def get_frequncy_dist(dir_path):
    files = os.listdir(dir_path)

    all_words = 0
    words_wt_freq = {}   
    '''get words'''
    for filename in files:
        if (filename.endswith('.srt')):
            file_handler = open(dir_path + '\\' + filename, 'r')
            for line in file_handler :
                for word in line.strip().split():
                    sword = word.strip(punctuation)
                    if (sword.isalpha()):
                        lword = sword.lower()
                        words_wt_freq[lword] = words_wt_freq.get(lword, 0) + 1
                        all_words += 1
            file_handler.close()
    logger.debug('# all words: ' + str (all_words - 1))
    logger.debug('# unique words: ' + str (len(words_wt_freq.keys())))
    lexical_diversity_for_freq(words_wt_freq.values())
    
    lemmatized_words_wt_freq = {}
    for word in words_wt_freq.keys():
        lemmatized_word = nltk.WordNetLemmatizer().lemmatize(word)
        if (word != lemmatized_word and lemmatized_word != None):
            lemmatized_words_wt_freq[lemmatized_word] = lemmatized_words_wt_freq.get(lemmatized_word, 0) + words_wt_freq.get(word)
            #print(lemmatized_word, word)
        else:
            lemmatized_words_wt_freq[word] = words_wt_freq.get(word)
    lemmatized_size = len(lemmatized_words_wt_freq.keys())            
    logger.debug ('# words after lemmatized: ' + str (lemmatized_size) + " diff: " + str (len(words_wt_freq.keys()) - lemmatized_size))
    lexical_diversity_for_freq(lemmatized_words_wt_freq.values())
    words_wt_freq = {} # Save memory

    
    stopwords_en = stopwords.words('english')
    male_names = names.words('male.txt')
    female_names = names.words('female.txt')
    comparative = swadesh.words('en')
    ignore_list = [] ;
    ignore_list.extend(stopwords_en)
    ignore_list.extend(male_names)
    ignore_list.extend(female_names)
    ignore_list.extend(comparative)            
    filtered_words = []

    out_file = open(dir_path + '\\wfd.csv', 'w')
    out_file.write ('Word, Type, Frequency \n')
        
    for word in lemmatized_words_wt_freq.keys():
        if len(word) > 2 and word not in ignore_list:
            filtered_words.append(word)   
        else:
            out_file.write(word + ',stop words,' + str(lemmatized_words_wt_freq.get(word)) + '\n')
    logger.debug ('# words after filtering stop words: ' + str (len(filtered_words)) + " diff: " + str (len(lemmatized_words_wt_freq.keys()) - len(filtered_words)))
    ignore_list = [] #save memory

    '''wordnet has 155k'''                                 
    usual_words = []
    for word in  filtered_words:
        if (len(wordnet.synsets(word)) != 0):
            usual_words.append(word)
        else:
            out_file.write(word + ',not in wordnet,' + str(lemmatized_words_wt_freq.get(word)) + '\n')
    logger.debug ('# words after filtering unused words: ' + str (len(usual_words)) + " diff: " + str (lemmatized_size - len(usual_words)))
    filtered_words = [] # save memory 

    tag_filtered_words_wt_freq = {}
    words_wt_tags = nltk.pos_tag(usual_words)
    for (word, tag) in words_wt_tags:
        if (tag not in ['EX', 'DET', 'CNJ', 'FW', 'MD', 'NP', 'NUM', 'PRO', 'P', 'TO', 'UH', 'WH', 'WP', 'NNP', 'MOD']):
            if(en.is_adverb(word)):
                tag_filtered_words_wt_freq[word] = lemmatized_words_wt_freq[word]  
                #print ('ADV,' + word)
            elif (en.is_adjective(word)):
                tag_filtered_words_wt_freq[word] = lemmatized_words_wt_freq[word]  
                #print ('ADJ,' + word)
            elif (en.is_verb(word)):
                tag_filtered_words_wt_freq[word] = lemmatized_words_wt_freq[word]  
                #print ('VB,' + word)
            elif (en.is_noun(word)):
                tag_filtered_words_wt_freq[word] = lemmatized_words_wt_freq[word]  
                #print ('N,' + word) 
            else:
                if (tag in ['VBZ', 'NNS']):
                    if word.endswith('s'):
                        new_word = word[:-1]
                        tag_filtered_words_wt_freq[new_word] = lemmatized_words_wt_freq[word] + tag_filtered_words_wt_freq.get(new_word, 0)
                        #print (word , new_word,tag)    
                elif (tag == 'VBG'):
                    new_word = en.verb.infinitive(word)
                    if new_word != None and word != new_word:
                        tag_filtered_words_wt_freq[new_word] = lemmatized_words_wt_freq[word] + tag_filtered_words_wt_freq.get(new_word, 0)
                elif (tag == 'JJS'):
                    if word.endswith('est'):
                        new_word = word[:-3]
                        tag_filtered_words_wt_freq[new_word] = lemmatized_words_wt_freq[word] + tag_filtered_words_wt_freq.get(new_word, 0)     
                else:
                    tag_filtered_words_wt_freq[word] = lemmatized_words_wt_freq[word]        
                    #print (word,tag)   
        else:
            out_file.write(word + ',unwanted pos,' + str(lemmatized_words_wt_freq.get(word)) + '\n')
    logger.debug ('# words after filtering unwanted pos:' + str (len(tag_filtered_words_wt_freq.keys())) + " diff: " + str (len(usual_words) - len(tag_filtered_words_wt_freq.keys())))
    lexical_diversity_for_freq(tag_filtered_words_wt_freq.values())
    lemmatized_words_wt_freq = {} # save memory
    usual_words = [] #save memory

    basic_english_vocab = en.basic.words
    non_basic_words = set(tag_filtered_words_wt_freq.keys()).difference(basic_english_vocab)
    non_basic_words_wt_freq = {}
    for non_basic_word in non_basic_words:
        non_basic_words_wt_freq[non_basic_word] = tag_filtered_words_wt_freq[non_basic_word] 
    words_in_both = set(tag_filtered_words_wt_freq.keys()).intersection(basic_english_vocab)
    for word in words_in_both:
        out_file.write(word + ',en.basic.words,' + str(tag_filtered_words_wt_freq.get(word)) + '\n')
    logger.debug ('# words after filtering basic words: ' + str (len(non_basic_words_wt_freq.keys())) + " diff: " + str (len(tag_filtered_words_wt_freq.keys()) - len(non_basic_words_wt_freq.keys())))
    lexical_diversity_for_freq(non_basic_words_wt_freq.values())
    tag_filtered_words_wt_freq = {} #save memory


    fh = open(os.path.join(base.app_root(), 'etc\\basic_words.csv'), 'r')
    my_words = [word.lower() for line in fh for word in line.strip().split()]
    fh.close()
    new_words = set(non_basic_words).difference(my_words)
    words_in_both = set(non_basic_words).intersection(my_words)
    for word in words_in_both:
        out_file.write(word + ',en.basic.words.mine,' + str(non_basic_words_wt_freq.get(word)) + '\n')    
    new_words_wt_freq = {}
    for new_word in new_words:
        new_words_wt_freq[new_word] = non_basic_words_wt_freq[new_word] 
    logger.debug ('# words after filtering my words: ' + str (len(new_words_wt_freq.keys())) + " diff: " + str (len(non_basic_words_wt_freq.keys()) - len(new_words_wt_freq.keys())))
    lexical_diversity_for_freq(new_words_wt_freq.values())
    
    sorted_words = sorted(new_words_wt_freq.items(), key=itemgetter(1, 0))
    for (word, frequency) in sorted_words:
        out_file.write (word + ',lexicon,' + str(frequency) + '\n')
    out_file.close()
    
    return new_words_wt_freq
Esempio n. 42
0
 def _parse(chunk_):
     # as deep as the oceans -> ocean
     noun = clean(chunk_[-1][0])
     if chunk_[-2][0] == "the" and en.is_noun(en.noun.singular(noun)):
         return en.noun.singular(noun)
     return noun
Esempio n. 43
0
import re
import en

if __name__ == "__main__":
    print(en.is_adjective("accomplished"))
    print(en.is_noun("wizard"))
    print(en.is_verb("accomplish"))
    print(
        en.parser.sentence_tag(
            "The day after today, before yesterday. And in pase years, later"))
    en.parser.matches(
        "The day after today, before yesterday. And in pase years, later",
        "JJ NN")
Esempio n. 44
0
import en

# This file just runs some tests to see if en is working.
# To run it, cd to the directory just above en, then
# python2 < _en-test.py

# LEXICAL CATEGORIZATION ############################################################

# Returns True when the given value is a number.
print(1, en.is_number(12))
print(2, en.is_number("twelve"))

# Returns True when the given string is a noun.
# You can also check for is_verb(), is_adjective() and is_adverb().
print(3, en.is_noun("banana"))

# Returns True when the given string is a tag,
# for example HTML or XML.
print(4, en.is_tag("</a>"))

# Return True when the string is a HTML tag,
# for example <a> or <body>.
print(5, en.is_html_tag("</person>"))

# COMMONSENSE #######################################################################

# Returns True if the given word expresses a basic emotion:
# anger, disgust, fear, joy, sadness, surprise.
print(6, en.is_basic_emotion("cheerful"))
Esempio n. 45
0
def pluralize(term):
    if en.is_noun(term):
         pterm = en.noun.plural(term)
         if pterm is not term :
            variations[term].add(pterm)
Esempio n. 46
0
def valid_pos(word):
    if not is_noun(word) and not is_verb(word) and not is_adjective(
            word) and not is_adverb(word) and len(word) < 7:
        return False

    return True
Esempio n. 47
0
def singularize(term):
    if en.is_noun(term):
        sterm = en.noun.singular(term)
        if sterm is not term:
            variations[term].add(sterm)
Esempio n. 48
0
def pluralize(term):
    if en.is_noun(term):
        pterm = en.noun.plural(term)
        if pterm is not term:
            variations[term].add(pterm)
Esempio n. 49
0
def toverb(term):
    if en.is_noun(term):
        vterm = lmtzr.lemmatize(term, 'v')
        if vterm is not term:
            variations[term].add(vterm)
Esempio n. 50
0
def singularize(term):
    if en.is_noun(term):
         sterm = en.noun.singular(term)
         if sterm is not term :
            variations[term].add(sterm)
Esempio n. 51
0
import en

print en.is_basic_emotion("anxious")
print en.is_persuasive("money")
print en.noun.is_emotion("anger")


print en.adjective.is_emotion("anxious", boolean=False)

print en.is_noun("comptuer")
print en.spelling.suggest("computer")[0]
print en.verb.is_emotion("love", boolean=False)
print en.verb.infinitive("announced")
print en.verb.infinitive("dont")
print en.is_verb("went")
a=en.verb.infinitive("dont")
print en.verb.is_emotion(a, boolean=False)
print en.is_noun("people")

print en.is_noun(en.noun.singular("adore"))
print en.noun.lexname("book")
print en.noun.lexname("music")
print en.noun.lexname("water")
print en.noun.lexname("fear")
print en.noun.lexname("love")
print en.noun.lexname("like")
print en.noun.lexname("hate")
print en.noun.lexname("overcome")
print en.adverb.lexname("actually")

Esempio n. 52
0
def toverb(term):
     if en.is_noun(term):
        vterm=lmtzr.lemmatize(term,'v')
        if vterm is not term:
           variations[term].add(vterm)
    def __init__(self, w, isTop): #maybe add time of post, what subreddit it came from?
        self.words = w
        self.verbCount = 0;
        self.nounCount = 0;
        self.adjCount = 0;
        self.connectiveCount = 0;
        self.other = 0

        global topVerb
        global topVerb
        global topNoun
        global topAdj
        global topCon
        global topOther
        global topCount

        global botVerb
        global botNoun
        global botAdj
        global botCon
        global botOther
        global botCount

        self.count = 0
        for word in self.words:
            self.count += 1
            fixedWord = unicode(word).lower()
            if en.is_verb(fixedWord):
                if(isTop):
                    topVerb += 1
                else:
                    botVerb += 1

                self.verbCount += 1
            elif en.is_noun(fixedWord):
                if(isTop):
                    topNoun += 1
                else:
                    botNoun += 1

                self.nounCount += 1
            elif en.is_adjective(fixedWord):
                if(isTop):
                    topAdj += 1
                else:
                    botAdj += 1

                self.adjCount += 1
            elif en.is_connective(fixedWord):
                if(isTop):
                    topCon += 1
                else:
                    botCon += 1

                self.connectiveCount += 1
            else:
                if(isTop):
                    topOther += 1
                else:
                    botOther += 1

                self.other += 1
        if isTop:
            topCount += self.count
        else:
            botCount += self.count