def checkgrammar(inp):
    sent = nltk.word_tokenize(inp.lower())
    suggestions = [[word] for word in sent]
    postag = nltk.pos_tag(nltk.word_tokenize(inp))
    # print(postag)
    start = time.time()
    for i in range(len(sent)):
        x = sent[i].lower()
        if(sent[i] in articles.split('/')):
            suggestions[i] = api(options(sent,i,articles)) or [sent[i]]
        elif(sent[i] in demonstrative_pronouns.split('/')):
            suggestions[i] = api(options(sent,i,demonstrative_pronouns)) or [sent[i]]
        elif(sent[i] in preposition.split('/')):
            x = options(sent,i,preposition)
            if(i<len(sent)-2):
                x = x + ' '+sent[i+2]
            suggestions[i] = api(x) or [sent[i]]
        elif(sent[i] in possesives_1.split('/')):
            suggestions[i] = api(options(sent,i,possesives_1),2) or [sent[i]]
        elif(sent[i] in possesives_2.split('/')):
            suggestions[i] = api(options(sent,i,possesives_2),2) or [sent[i]]
        elif(sent[i] in possesives_3.split('/')):
            suggestions[i] = api(options(sent,i,possesives_3),2) or [sent[i]]
        elif(sent[i] in quantifiers.split('/')):
            suggestions[i] = api(options(sent,i,quantifiers),2) or [sent[i]]
        elif(sent[i] in interrogative_pronouns.split('/')):
            suggestions[i] = api(options(sent,i,interrogative_pronouns)) or [sent[i]]
        elif(sent[i] in auxillary_verbs):
            l1 = list(get_word_forms(wn().lemmatize(sent[i],'v'))['v'])
            verbs_combined = '"'+'"/"'.join(word for word in l1)+'"'
            if(l1):
                suggestions[i] = api(options(sent,i,verbs_combined,),2,sent[i]) or [sent[i]]
        elif(postag[i][1].startswith('VB')):
            l1 = list(get_word_forms(wn().lemmatize(sent[i],'v'))['v'])
            verbs_combined = '"'+'"/"'.join(word for word in l1)+'"'
            if(l1):
                suggestions[i] = api(options(sent,i,verbs_combined,),1,sent[i]) or [sent[i]]
        elif(postag[i][1].startswith('NN') or (i<len(sent)-1 and postag[i][1].startswith('JJ') and postag[i+1][1].startswith('NN'))):
            if(i==0):
                suggestions[i] = api(articles+' '+options(sent,i,sent[i])) or [sent[i]]
            else:
                if(postag[i-1][1].startswith('VB')):
                    suggestions[i] = [x+' '+sent[i] for x in api(sent[i-1]+' ? '+sent[i],2)] or [sent[i]]
        sent[i] = suggestions[i][0]
    end = time.time()
    newsent = nltk.word_tokenize(inp)
    for i in range(len(sent)):
        if newsent[i] in suggestions[i]:
            suggestions[i].remove(newsent[i])
    #print(end-start)
    return(suggestions)
Ejemplo n.º 2
0
def preprocess_word_forms():
    start = time.time()
    dictionary = {}
    eqclasses = {}
    for word in wordlist:
        wordset = set()
        forms = get_word_forms(word)
        for pos in forms:
            wordset |= forms[pos]
        for wordform in wordset: 
            try:
                dictionary[wordform] |= wordset
            except KeyError:
                dictionary[wordform] = wordset
    for word in dictionary:
        frzset = frozenset(dictionary[word])
        wordhash = hash(frzset)
        goodkey = False
        while not goodkey:
            try:
                if frzset.issubset(eqclasses[wordhash]) or frzset.issuperset(eqclasses[wordhash]):
                    eqclasses[wordhash] |= frzset
                    goodkey = True
                else:
                    wordhash += 1
            except KeyError:
                eqclasses[wordhash] = frzset
                goodkey = True
        dictionary[word] = wordhash
    print "TIME: "+ str(time.time()-start)
    return (dictionary, eqclasses)
Ejemplo n.º 3
0
def get_wordforms(word):
    from word_forms.word_forms import get_word_forms
    try:
        allwords=[]
        temp=get_word_forms(word)
        if len(temp['n'])==0 and len(temp['v'])==0 and len(temp['a'])==0 and len(temp['r'])==0:
            raise Exception
        temp["Noun"]=temp["n"]
        del temp["n"]
        temp["Adjective"]=temp["a"]
        del temp["a"]
        temp["Adverb"]=temp["r"]
        del temp["r"]
        temp["Verb"]=temp["v"]
        del temp["v"]
        in_str=""
        for key in temp.keys():
            if len(temp[key])>0:
                # print("Temporary Information: ",list(temp[key]))
                for i in list(temp[key]):
                    if i not in allwords:
                        allwords.append(i)
                in_str=in_str+key+" Forms: "+" ".join(temp[key])+"<br>"
        # print(in_str)
        # print("All words: ",allwords)
        return [in_str,allwords]
    except :
        return [" No word forms"," "]
Ejemplo n.º 4
0
def listIntersection(lst0, lst1, expand=False):
    '''
    Find the intersection of list lst0 and lst1.
    -------
    params:
    -------
    lst0:  list of key strings.
    lst1:  string.
    -------
    return:
    -------
    list of values contained in both lists.
    '''
    if expand:
        values = []
        for value in lst0:
            sub_values = forms2list(get_word_forms(value))

            for sub_value in sub_values:
                if sub_value in lst1:
                    values.append(value)
                    break
        return values
    else:
        return [value for value in lst0 if value in lst1]
def find_word_forms(word):
    dic = get_word_forms(word)
    words = ""
    for form in dic:
        for w in dic[form]:
            words += w + " "
    return words
Ejemplo n.º 6
0
def generate_inflections(word):
    inflections_dict = get_word_forms(word)
    inflections = set()
    for forms in inflections_dict.values():
        for form in forms:
            inflections.add(form.lower())
    return list(inflections)
Ejemplo n.º 7
0
 def get_lemma(word):
     word_forms = get_word_forms(word.text).values()
     flat_list_of_forms = list(set(chain.from_iterable(word_forms)))
     if len(flat_list_of_forms) > 3:
         return sorted(flat_list_of_forms, key=len)[0]
     else:
         return word.lemma_
Ejemplo n.º 8
0
def token_lemma_mapping(word_dict):
    token_lemma_map = {}
    for k, v in word_dict.items():
        external_forms = get_word_forms(k)
        all_lemmas = set([])
        for e_k, e_v in external_forms.items():
            all_lemmas = all_lemmas.union(e_v)
        token_lemma_map[k] = all_lemmas
    return token_lemma_map
Ejemplo n.º 9
0
def get_wordforms(word):
    MINIMAL_WORDFORMS_REQ = 6
    regex = re.compile('[^a-zA-Z]')
    clean_word = regex.sub('', word)
    #print(clean_word)
    if len(clean_word) > 0:
        pos_tagged_ngramm = nltk.pos_tag([clean_word])
        for word_el in pos_tagged_ngramm:
            #print(word_el)
            pos = get_wordnet_pos(word_el[1])
            if pos:
                lemma = lemmatizer.lemmatize(word_el[0], pos=pos)
            else:
                lemma = word_el[0]
            break
        if clean_word == lemma:
            wordforms_dict = get_word_forms(clean_word)
            wordform_set = extract_variants_from_dict(wordforms_dict)
        else:
            wordforms_dict = get_word_forms(clean_word)
            wordform_set = extract_variants_from_dict(wordforms_dict)

            wordforms_lemma_dict = get_word_forms(lemma)
            wordform_set_from_lemma = extract_variants_from_dict(
                wordforms_lemma_dict)
            wordform_set = wordform_set.union(wordform_set_from_lemma)

        if len(wordform_set) < MINIMAL_WORDFORMS_REQ:
            if lemma in dct_mixed_words:
                more_words_number = MINIMAL_WORDFORMS_REQ - len(wordform_set)
                add_words_list = dct_mixed_words[lemma][:more_words_number]
                #print("additional words", add_words_list)
                add_words_list = set(add_words_list[:6])
                wordform_set = wordform_set.union(add_words_list)
            #else:
            #print("NOT_ENOUGH_WORDS_AND_NOT_ENOUGH_MIXED_WORDS",word)
        wordform_list = list(wordform_set)
        if word in wordform_list:
            wordform_list.remove(word)
        return wordform_list
    else:
        return []
Ejemplo n.º 10
0
def get_lemmas(word):
    other_forms = list(
        set([
            item
            for sublist in [list(x) for x in get_word_forms(word).values()]
            for item in sublist if item != word
        ]))
    min_matching_len = min(len(word), 3)
    return [
        x for x in other_forms if (any(s.isupper() for s in x) == False)
        & (x[0:min_matching_len] == word[0:min_matching_len])
    ]
Ejemplo n.º 11
0
 def lemmatize(self, word):
     for lemmatizer in self.lemmatizers:
         base_form = lemmatizer.lemmatize(word)
         if word != base_form and base_form is not None:
             return {base_form}
     word_forms = get_word_forms(word)
     all_forms = set()
     for base_forms_pos in word_forms:
         for base_form in word_forms[base_forms_pos]:
             if base_form != word:
                 all_forms.add(base_form)
     return all_forms
def find_lemma_map(name, word_set, dictionary, bypass_lemma=False):
    '''
    this function will generate a csv file that maps from the word in SEND and 
    the lemmas exist in our dictionary. if no lemma exists in the dictionary,
    we will return a empty string. If multiple appears, we will have a joined
    string separated by comma.
    '''
    from nltk.stem.wordnet import WordNetLemmatizer
    lemmatizer = WordNetLemmatizer()
    from word_forms.word_forms import get_word_forms

    lemma_map = dict()
    for word in word_set:
        # every word will have an empty matching first
        lemma_map[word] = ''
        # first priority is given to those words appear in both sets
        if word in dictionary:
            lemma_map[word] = word
        else:
            if bypass_lemma:
                continue
            # if it does not appear in dictionary, we transform in different ways
            # and see if we can find a close match
            lemma_word = lemmatizer.lemmatize(word)
            if lemma_word in dictionary:
                lemma_map[word] = lemma_word
            else:
                # we will use this last resort of external libraries. the output
                # will then be mannually examined with the output since we only
                # have a couple thousands of words here.
                external_forms = get_word_forms(lemma_word)
                for form in external_forms.keys():
                    words = external_forms[form]
                    for w in words:
                        if w in dictionary:
                            if lemma_map[word] == '':
                                lemma_map[word] = w
                            else:
                                lemma_map[word] = lemma_map[word] + "," + w
    # write the dict to a csv file for mannual examination
    missing_match = 0
    for word in lemma_map.keys():
        if lemma_map[word] == '':
            missing_match += 1
    print('Missing Count: %s, Total Count: %s' %
          (missing_match, len(lemma_map.keys())))
    output_file = "../warriner_valence/word_lemma_" + name + ".csv"
    with open(output_file, mode='w') as csv_file:
        file_writer = csv.writer(csv_file, delimiter=',')
        for w in lemma_map.keys():
            row = [w, lemma_map[w]]
            file_writer.writerow(row)
Ejemplo n.º 13
0
    def getOptionlist(self, checkWord, optionPass):
        checkWord = checkWord.lower()
        outdic = {}
        outdic = get_word_forms(checkWord)

        if optionPass in ['NNS', 'NN', 'NNP', 'NNPS']:
            return list(outdic.get('n'))
        elif optionPass in ['JJ', 'JJR', 'JJS']:
            return list(outdic.get('a'))
        elif optionPass in ['RB', 'RBR', 'RBS']:
            return list(outdic.get('r'))
        elif optionPass in ['VB', 'VBD', 'VBG', 'VBN', 'VBZ']:
            return list(outdic.get('v'))
Ejemplo n.º 14
0
def get_all_word_forms(word):
    """Takes a word as input and returns a collection of variants of the word.

	Parameters:
	word (string): a specific word

	Returns:
	words (list): the different variants of the input word

	"""
    forms_dict = get_word_forms(word)

    return list(dict.fromkeys(flatten(forms_dict.values())))
def lemmatize(word):
    """
    Out of all the related word forms of ``word``, return the smallest form that appears first in the dictionary
    """
    forms = [
        word for pos_form in get_word_forms(word).values() for word in pos_form
    ]
    forms.sort()
    forms.sort(key=len)
    try:
        return forms[0]
    except IndexError:
        raise ValueError("{} is not a real word".format(word))
Ejemplo n.º 16
0
def extract_word(url):
    # parsing html and getting clear text from it
    words_set = set()
    req = Request(str(url), headers={'User-Agent': 'Mozilla/5.0'})
    try:
        html = urlopen(req).read()
        #html = urllib.request.urlopen(str(url))
        soup = BeautifulSoup(html, features="lxml")
        data = soup.findAll(text=True)

        def visible(element):
            if element.parent.name in [
                    'style', 'script', '[document]', 'head', 'title'
            ]:
                return False
            elif re.match('<!--.*-->', str(element.encode('utf-8'))):
                return False
            return True

        result = filter(visible, data)
        list_to_str = ' '.join([str(element) for element in list(result)])

        # sentence tokenizing the clear text
        sent = nltk.sent_tokenize(list_to_str)
        # operations to extract words
        for item in sent:
            tokens = nltk.word_tokenize(item)
            # removing punctuation
            table = str.maketrans('', '', string.punctuation)
            stripped = [word.translate(table) for word in tokens]
            # taking only alphabet
            words = [word for word in stripped if word.isalpha()]
            for word in words:
                word = ''.join([char for char in word if not char.isdigit()])
                # removing hexadecimal
                word = re.sub(r'[^\x00-\x7f]', r'', word)
                if len(word) >= 1:
                    words_set.add(str(word.casefold()))
                    # to get different form of a word
                    word_form = get_word_forms(word)
                    for item in word_form.values():
                        for inner_item in item:
                            words_set.add(str(inner_item.casefold()))
        return words_set

    except:
        with open('unavailable_url.csv', 'a') as file:
            writer = csv.writer(file)
            writer.writerow([count])
            writer.writerow([url])
        return set("page " + str(count) + " not available")
def main():
    # Command line arguments
    ap = argparse.ArgumentParser()
    ap.add_argument('corpus', help='The corpus file')
    ap.add_argument('out_file', help='The output file')
    ap.add_argument('nc_vocab', help='The vocabulary file')
    args = ap.parse_args()

    logger.info('Reading the vocabulary')
    with codecs.open(args.nc_vocab, 'r', 'utf-8') as f_in:
        nc_vocab = frozenset([line.strip() for line in f_in])

    logger.info('Computing variations...')
    variations = {}
    for nc in tqdm.tqdm(nc_vocab):
        w1, w2 = nc.split('\t')
        curr_nc = nc.replace('\t', '_')
        for w1_form, w2_form in itertools.product(
                get_word_forms(w1)['n'],
                get_word_forms(w2)['n']):
            curr_variation = '_'.join((w1_form, w2_form)).lower()
            variations[curr_variation] = curr_nc

    logger.info('Counting the number of sentences in the corpus')
    num_instances = corpus_size(args.corpus)

    logger.info('Processing...')
    with codecs.open(args.corpus, 'r', 'utf-8') as f_in:
        with codecs.open(args.out_file, 'w', 'utf-8') as f_out:
            try:
                for line in tqdm.tqdm(f_in, total=num_instances):
                    for sentence in get_sentences_with_bigrams(
                            line.strip().lower(), variations):
                        f_out.write(sentence + '\n')

            except Exception as err:
                logger.error(err)
def nltk_word_forms_dictionary_refiner(input_dictionary):
    secondary_input_dictionary = dict(input_dictionary)
    for i in input_dictionary.keys():
        current_word_forms = get_word_forms(i)
        for parts_of_speech in current_word_forms.keys():
            for word_form in current_word_forms[parts_of_speech]:
                if word_form not in input_dictionary.keys():
                    secondary_input_dictionary[word_form] = {}
    update_input_dictionary = dict(secondary_input_dictionary)
    for i in update_input_dictionary.keys():
        current_word_forms = get_word_forms(i)
        for parts_of_speech in current_word_forms.keys():
            for word_form in current_word_forms[parts_of_speech]:
                # this is a for loop through all the related words
                if word_form in secondary_input_dictionary.keys():
                    for k in secondary_input_dictionary[word_form].keys():
                        # this forces the keys into a list each time
                        if k in list(update_input_dictionary[i]):
                            update_input_dictionary[i][
                                k] += secondary_input_dictionary[word_form][k]
                        else:
                            update_input_dictionary[i][
                                k] = secondary_input_dictionary[word_form][k]
    return update_input_dictionary
Ejemplo n.º 19
0
    def generate_ace_adj_rules(self):
        # - adj_statement_list = []
        # - adj_comp_statement_list = []
        # - adj_sup_statement_list = []
        # - adv_statement_list = []
        for adj in self.adjectives():
            bare_word = adj
            comparative_word = comparative(adj)
            if len(word_tokenize(comparative_word)) > 1:
                comparative_word = None

            superlative_word = superlative(adj)
            if len(word_tokenize(superlative_word)) > 1:
                superlative_word = None

            adverb = get_word_forms(adj)["r"]
            if len(adverb) == 0:
                adverb = None

            adj_statement = "adj_itr({}, {}).".format(adj, adj)
            yield adj_statement
            # - adj_statement_list.append(adj_statement)

            if comparative_word is not None:
                adj_comp_statement = "adj_itr_comp({}, {}).".format(
                    comparative_word, adj)

                self._inverse_map[comparative_word] = adj
                yield adj_comp_statement

                # - adj_comp_statement_list.append(adj_comp_statement)

            if superlative_word is not None:
                adj_sup_statement = "adj_itr_sup({}, {}).".format(
                    superlative_word, adj)

                self._inverse_map[superlative_word] = adj
                yield adj_sup_statement

                # - adj_sup_statement_list.append(adj_sup_statement)

            if adverb is not None:
                for adv in adverb:
                    adv_statement = "adv({}, {}).".format(adv, adv)

                    self._inverse_map[adv] = adj
                    yield adv_statement
Ejemplo n.º 20
0
def lemma_to_forms(lemma):
    """Return all forms of a lemma.

    Args:
        lemma (unicode)

    Returns:
        forms (set[unicode]): unique list of forms
    """
    from word_forms.word_forms import get_word_forms  # this import is slow, so do it lazily

    forms = set()
    forms.add(lemma)  # original word always counts as a form

    # include forms of all POS
    for pos, pos_forms in get_word_forms(lemma).items():
        forms.update(pos_forms)
    return forms
Ejemplo n.º 21
0
def get_related_forms(ws, should_ban=False):

    banned_words = set()

    nouns = set()
    adjs = set()
    advs = set()
    verbs = set()

    for idx, w in enumerate(ws, start=1):
        if w in banned_words and should_ban:
            continue

        # word net words
        wn_ws = get_word_forms(w)

        nns = wn_ws['n']
        adj = wn_ws['a']
        adv = wn_ws['r']
        vrb = wn_ws['v']

        candidates = nns.union(*[adv, adj, vrb])

        nouns = nouns.union(nns)
        adjs = adjs.union(adj)
        advs = advs.union(adv)
        verbs = verbs.union(vrb)

        if w in candidates:
            candidates.remove(w)

        for cand in candidates:
            if cand in ws:
                banned_words.add(cand)

    return nouns, adjs, advs, verbs
Ejemplo n.º 22
0
def word_forms(string):
    proc_word = get_words(string)
    word_dict = {}
    for wd in proc_word:
        word_dict[wd] = get_word_forms(wd)
    return word_dict
analyzer = SentimentIntensityAnalyzer()

tags = list(Tag.objects.all())
tags_synonyms = {
    tag: set(
        chain.from_iterable(
            [word.lemma_names() for word in wn.synsets(tag.tag_title)]))
    for tag in tags
}
optimised_tags = {}
stop = set(stopwords.words('english'))

for tag, syns in tags_synonyms.items():
    optimised_tags[tag] = set()
    for syn in syns:
        words = get_word_forms(syn)

        for key, values in words.items():
            for word in values:
                optimised_tags[tag].add(word)


def get_sentiments(response):
    sentiment_dict = {'pos': 0, 'neg': 0, 'neu': 0}
    for line in response:
        for sentence in response.split("."):
            ret = get_sentiment(sentence)
            sentiment_dict[ret] += 1
    if sentiment_dict['pos'] + sentiment_dict['neg'] == 0:
        return 50.0
    return round(
Ejemplo n.º 24
0
def get_flattened_word_forms(word):
    wfs = get_word_forms(word)
    merged = list()
    for k, s in wfs.items():
        merged.extend(s)
    return merged
def getSynonyms(p):
    regex = re.compile('[@_!#$%^&*()<>?/}{~:]')
    j = 0
    corrections = {}
    wordsLists = nltk.word_tokenize(p)
    for word in wordsLists:
        if (word in iitblingo.keys()):
            corrections[word] = [iitblingo[word]]
        else:
            wordsList = nltk.word_tokenize(word)
            wordsList = [w for w in wordsList if not w in stop_words]
            tagged = nltk.pos_tag(wordsList)
            if (len(tagged) != 0):
                if (tagged[0][1] == 'JJ' or tagged[0][1] == 'JJR'
                        or tagged[0][1] == 'JJS' or tagged[0][1] == 'RB'
                        or tagged[0][1] == 'RBR' or tagged[0][1] == 'RBS'
                        or tagged[0][1] == 'VB' or tagged[0][1] == 'VBD'
                        or tagged[0][1] == 'VBG' or tagged[0][1] == 'VBN'
                        or tagged[0][1] == 'VBZ'):
                    string2 = ' '
                    if (j - 1 >= 0 and j + 1 < len(wordsLists)):
                        string2 = wordsLists[
                            j - 1] + ' ' + wordsLists[j] + ' ' + wordsLists[j +
                                                                            1]
                    num2 = 0
                    syns = wordnet.synsets(WordNetLemmatizer().lemmatize(
                        tagged[0][0], 'v'))
                    ans = {}
                    x = 0
                    for syn in syns:
                        for w in syn.lemmas():
                            if (w.name().lower() !=
                                    WordNetLemmatizer().lemmatize(
                                        tagged[0][0], 'v')
                                    and regex.search(w.name()) == None):

                                verbs_combined = w.name().lower()
                                if (tagged[0][1] == 'VB'
                                        or tagged[0][1] == 'VBZ'
                                        or tagged[0][1] == 'VBD'
                                        or tagged[0][1] == 'VBG'
                                        or tagged[0][1] == 'VBN'):
                                    l1 = list(
                                        get_word_forms(
                                            WordNetLemmatizer().lemmatize(
                                                w.name().lower(), 'v'))['v'])
                                    if (len(l1) == 0):
                                        x += 1
                                        continue
                                    else:
                                        verbs_combined = '"' + '"/"'.join(
                                            words for words in l1) + '"'
                                if (string2 != ' ' and string2 != ""):
                                    string2 = wordsLists[
                                        j -
                                        1] + ' ' + verbs_combined + ' ' + wordsLists[
                                            j + 1]
                                k = w.name()
                                if (num2 == 0):
                                    if (string2 != ' ' and string2 != ""):
                                        encoded_query = urllib.parse.quote(
                                            string2)
                                        params = {
                                            'corpus': 'eng-us',
                                            'query': encoded_query,
                                            'topk': 3
                                        }
                                        params = '&'.join(
                                            '{}={}'.format(name, value)
                                            for name, value in params.items())
                                        response = requests.get(
                                            'https://api.phrasefinder.io/search?'
                                            + params)
                                        if (len(response.json()['phrases']) !=
                                                0):
                                            # print(response.json())
                                            num2 = response.json(
                                            )['phrases'][0]['mc']
                                            l = [
                                                i['tt']
                                                for i in response.json()
                                                ['phrases'][0]['tks']
                                                if i['tg'] == 2
                                            ]
                                            # print(num2)
                                            if (len(l) != 0):
                                                k = l[0]
                                ans[k] = 10 * num2
                                num2 = 0
                                x += 1
                    out = [
                        x[0] for x in sorted(ans.items(),
                                             key=lambda x: -1 * x[1])[:3]
                    ]
                    corrections[tagged[0][0]] = out

            j = j + 1
    return (corrections)
Ejemplo n.º 26
0
 def runTest(self):
     self.assertEqual(get_word_forms(self.text_input), self.expected_output,
                      self.description)
Ejemplo n.º 27
0
def get_variants_and_derivatives(word):
    return get_word_forms(word)
Ejemplo n.º 28
0
from word_forms.word_forms import get_word_forms
get_word_forms("president")
Ejemplo n.º 29
0
                ans = {}
                x = 0
                for syn in syns:
                    for w in syn.lemmas():
                        if (w.name().lower() != WordNetLemmatizer().lemmatize(
                                tagged[0][0], 'v')
                                and regex.search(w.name()) == None):
                            # print(w.name().lower())
                            verbs_combined = w.name().lower()
                            if (tagged[0][1] == 'VB' or tagged[0][1] == 'VBZ'
                                    or tagged[0][1] == 'VBD'
                                    or tagged[0][1] == 'VBG'
                                    or tagged[0][1] == 'VBN'):
                                l1 = list(
                                    get_word_forms(
                                        WordNetLemmatizer().lemmatize(
                                            w.name().lower(), 'v'))['v'])
                                if (len(l1) == 0):
                                    x += 1
                                    continue
                                else:
                                    verbs_combined = '"' + '"/"'.join(
                                        words for words in l1) + '"'
                            # print(verbs_combined)

                            # if(string1!=' 'and string1!=""):
                            # 	string1 = wordsLists[j-2] + ' ' + wordsLists[j-1] + ' ' + verbs_combined
                            if (string2 != ' ' and string2 != ""):
                                string2 = wordsLists[
                                    j -
                                    1] + ' ' + verbs_combined + ' ' + wordsLists[
Ejemplo n.º 30
0
from word_forms.word_forms import get_word_forms
from word_forms.lemmatizer import lemmatize

print(lemmatize("help"))
print(get_word_forms(lemmatize("help"))['n'])

for word in get_word_forms(lemmatize("help"))['n']:
    print(word)
Ejemplo n.º 31
0
from word_forms.word_forms import get_word_forms
#Changes POS tags to be compatible with word_forms
outdic = {}
outdic = get_word_forms("secret")
print(outdic)
print("noun", list(outdic.get('n')))
print("adj", list(outdic.get('a')))
print("adv", list(outdic.get('r')))
print("verb", list(outdic.get('v')))