def convert_postag(complex_word, candidates): specific_tag = NLP.pos_tag(complex_word)[0][1] generic_tag = get_type(specific_tag) # print(generic_tag) final_candidates = set() if generic_tag == "NN": ### Nouns # print(generic_tag) for candidate in candidates: candidate_tag = NLP.pos_tag(candidate)[0][1] if specific_tag == "NNS" and candidate_tag != "NNS": candidate = pluralize(candidate) # print("pluraaal ", candidate) elif specific_tag == "NN" and candidate_tag == "NNS": candidate = singularize(candidate) # print("singulaaar" , candidate) # print("wwilll add") final_candidates.add(candidate) elif generic_tag == "ADJ": ## Adjectives for candidate in candidates: candidate_tag = NLP.pos_tag(candidate)[0][1] if specific_tag == "JJR" and candidate_tag != "JJR": candidate = comparative(candidate) # print(candidate , "jjr") elif specific_tag == "JJS" and candidate_tag != "JJS": # print(candidate , "jjs") candidate = superlative(candidate) # print(candidate , "added") final_candidates.add(candidate) elif generic_tag == "VB": ## Verbs complex_tense = tenses(complex_word) if (len(complex_tense)) < 1: return candidates for candidate in candidates: # print("my tense" , complex_tense.upper() ," candidate " , candidate , " ", tenses(candidate)[0][0] ) if len(tenses(candidate)) > 0 and tenses( candidate)[0][0] != complex_tense: if complex_tense == "past": candidate = conjugate(candidate, tense=PAST) elif complex_tense == "present": candidate = conjugate(candidate, tense=PRESENT) elif complex_tense == "future": candidate = conjugate(candidate, tense=FUTURE) elif complex_tense == "infinitive": candidate = conjugate(candidate, tense=INFINITIVE) final_candidates.add(candidate) else: final_candidates = candidates return final_candidates
def detect_line_tense(poem): poem_verb_set = [] for line in poem: line_verb = "" if "'" in line: line = replace_contractions(line) for word, t in tag(line, tokenize=True): if t.startswith("V"): line_verb = str(word) poem_verb_set.append(line_verb) line_tenses = [] for line_verb in poem_verb_set: if not line_verb: continue possible_tenses = [] for tense in tenses(line_verb): possible_tenses.append(tense[0]) try: line_tenses.append(detect_overall_tense(possible_tenses)) except IndexError: line_tenses.append('') return line_tenses
def write_hypo(parent, count, list_of_neighbors): return_dict = {} for index in range(0, len(list_of_neighbors)): s = wordnet.synsets(list_of_neighbors[index]) if len(s) > 0: s = s[0] synomyms = s.synonyms hypernyms = s.hypernyms() hyponyms = s.hyponyms() holonyms = s.holonyms() meronyms = s.meronyms() singulars = [singularize(list_of_neighbors[index])] plurals = [pluralize(list_of_neighbors[index])] comparatives = [comparative(list_of_neighbors[index])] superlatives = [superlative(list_of_neighbors[index])] lemmas = [lemma(list_of_neighbors[index])] lexemes = [lexeme(list_of_neighbors[index])] tensess = [tenses(list_of_neighbors[index])] suggests = [suggest(list_of_neighbors[index])] neighbors_with_link_string = None if parent in synomyms: neighbors_with_link_string = str( list_of_neighbors[index]) + "[SYNO]" elif parent in hypernyms: neighbors_with_link_string = str( list_of_neighbors[index]) + "[HYPER]" elif parent in hyponyms: neighbors_with_link_string = str( list_of_neighbors[index]) + "[HYPO]" elif parent in holonyms: neighbors_with_link_string = str( list_of_neighbors[index]) + "[HOLO]" elif parent in meronyms: neighbors_with_link_string = str( list_of_neighbors[index]) + "[MERO]" elif parent in singulars: neighbors_with_link_string = str( list_of_neighbors[index]) + "[PLURAL]" elif parent in plurals: neighbors_with_link_string = str( list_of_neighbors[index]) + "[SINGULAR]" elif parent in comparatives: neighbors_with_link_string = str( list_of_neighbors[index]) + "[COMPA]" elif parent in superlatives: neighbors_with_link_string = str( list_of_neighbors[index]) + "[SUPERLA]" elif parent in lemmas: neighbors_with_link_string = str( list_of_neighbors[index]) + "[LEMMA]" elif parent in lexemes: neighbors_with_link_string = str( list_of_neighbors[index]) + "[LEXEME]" elif parent in tensess: neighbors_with_link_string = str( list_of_neighbors[index]) + "[TENSE]" elif parent in suggests: neighbors_with_link_string = str( list_of_neighbors[index]) + "[MISPELL]" if neighbors_with_link_string: try: return_dict[word][1].append( neighbors_with_link_string) except: return_dict[word] = (count, [neighbors_with_link_string]) return return_dict
def post_process(self, tokens: List[str]): # tokens = sentence.split(" ") lemmatizer = nltk.stem.wordnet.WordNetLemmatizer() result = [] for i, (prev_token, token, next_token) in \ enumerate(zip_longest([""] + tokens[:-1], tokens, tokens[1:], fillvalue="")): prev_prev_token = tokens[i - 2] if i >= 2 else "" # capitalise if i == 0: token = token[0].upper() + token[1:] if token == '1' and next_token == 'th': try: if tokens[i + 2] == 'to' and tokens[i + 3] == 'last': token = '' except IndexError: pass if next_token == 'last' and token == 'to' and prev_token == 'th' and prev_prev_token == '1': token = '' if (token == 'a' or token == 'A' ) and len(next_token) > 0 and next_token[0] in self.vocals: token = token + "n" elif token == 'into' and next_token == 'between': token = 'in' elif token == 'th' and prev_token.endswith('1'): token = 'st' try: if next_token == 'to' and tokens[i + 2] == 'last': token = '' except IndexError: pass elif token == 'th' and prev_token.endswith('2'): token = 'nd' elif token == 'th' and prev_token.endswith('3'): token = 'rd' elif prev_token == 'to' \ and (token.endswith("ed") or token.endswith("ing") or tenses(token) and tenses(token)[0][0] == 'past'): token = lemmatizer.lemmatize(token, 'v') elif prev_token == 'in' or prev_token == 'from' and (any( t[0] == 'past' for t in tenses(token))): # VERY HACKY if (prev_prev_token in [ 'refrained', "refused", "prohibited", "prevented", "hindered" ] or (prev_token == "in" and prev_prev_token == 'succeed')): token = lemmatizer.lemmatize(token, 'v') try: token = pattern_en.verbs[token][5] except: if not token.endswith("ing"): token = lemmatizer.lemmatize(token).rsplit( "e", 1)[0] + "ing" elif prev_token in ('not', "n't") and prev_prev_token in ("could", "would", "did"): token = lemmatizer.lemmatize(token, 'v') else: pass result.append(token) return result
def pass2act(doc, rec=False): parse = nlp(doc) newdoc = '' for sent in parse.sents: # Init parts of sentence to capture: subjpass = '' subj = '' verb = '' verbtense = '' adverb = {'bef': '', 'aft': ''} part = '' prep = '' agent = '' aplural = False advcltree = None aux = list(list(nlp('. .').sents)[0]) # start with 2 'null' elements xcomp = '' punc = '.' # Analyse dependency tree: for word in sent: if word.dep_ == 'advcl': if word.head.dep_ in ('ROOT', 'auxpass'): advcltree = word.subtree if word.dep_ == 'nsubjpass': if word.head.dep_ == 'ROOT': subjpass = ''.join( w.text_with_ws.lower() if w.tag_ not in ( 'NNP', 'NNPS') else w.text_with_ws for w in word.subtree).strip() if word.dep_ == 'nsubj': subj = ''.join(w.text_with_ws.lower() if w.tag_ not in ( 'NNP', 'NNPS') else w.text_with_ws for w in word.subtree).strip() if word.head.dep_ == 'auxpass': if word.head.head.dep_ == 'ROOT': subjpass = subj if word.dep_ in ('advmod', 'npadvmod', 'oprd'): if word.head.dep_ == 'ROOT': if verb == '': adverb['bef'] = ''.join( w.text_with_ws.lower() if w.tag_ not in ( 'NNP', 'NNPS') else w.text_with_ws for w in word.subtree).strip() else: adverb['aft'] = ''.join( w.text_with_ws.lower() if w.tag_ not in ( 'NNP', 'NNPS') else w.text_with_ws for w in word.subtree).strip() if word.dep_ == 'auxpass': if word.head.dep_ == 'ROOT': if not subjpass: subjpass = subj if word.dep_ in ('aux', 'auxpass', 'neg'): if word.head.dep_ == 'ROOT': aux += [word] if word.dep_ == 'ROOT': verb = word.text if word.tag_ == 'VB': verbtense = en.INFINITIVE elif word.tag_ == 'VBD': verbtense = en.PAST elif word.tag_ == 'VBG': verbtense = en.PRESENT verbaspect = en.PROGRESSIVE elif word.tag_ == 'VBN': verbtense = en.PAST else: try: verbtense = en.tenses(word.text)[0][0] except IndexError: pass if word.dep_ == 'prt': if word.head.dep_ == 'ROOT': part = ''.join(w.text_with_ws.lower() if w.tag_ not in ( 'NNP', 'NNPS') else w.text_with_ws for w in word.subtree).strip() if word.dep_ == 'prep': if word.head.dep_ == 'ROOT': prep = ''.join(w.text_with_ws.lower() if w.tag_ not in ( 'NNP', 'NNPS') else w.text_with_ws for w in word.subtree).strip() if word.dep_.endswith('obj'): if word.head.dep_ == 'agent': if word.head.head.dep_ == 'ROOT': agent = ''.join( w.text + ', ' if w.dep_ == 'appos' else ( w.text_with_ws.lower() if w.tag_ not in ( 'NNP', 'NNPS') else w.text_with_ws) for w in word.subtree).strip() aplural = word.tag_ in ('NNS', 'NNPS') if word.dep_ in ('xcomp', 'ccomp', 'conj'): if word.head.dep_ == 'ROOT': xcomp = ''.join(w.text_with_ws.lower() if w.tag_ not in ( 'NNP', 'NNPS') else w.text_with_ws for w in word.subtree).strip() that = xcomp.startswith('that') xcomp = pass2act(xcomp, True).strip(' .') if not xcomp.startswith('that') and that: xcomp = 'that ' + xcomp if word.dep_ == 'punct' and not rec: if word.text != '"': punc = word.text # exit if not passive: if subjpass == '': newdoc += str(sent) + ' ' continue # if no agent is found: if agent == '': # what am I gonna do? BITconEEEEEEECT!!!! newdoc += str(sent) + ' ' continue # invert nouns: agent = nouninv(agent) subjpass = nouninv(subjpass) # F*****G CONJUGATION!!!!!!!!!!!!!: auxstr = '' num = en.SINGULAR if not aplural or agent in ('he', 'she') else en.PLURAL aux.append(aux[0]) verbaspect = None for (pp, p, a, n) in zip(aux, aux[1:], aux[2:], aux[3:]): if a.lemma_ == '.': continue if a.lemma_ == 'not': if p.lemma_ == 'be': if n.lemma_ == 'be': verbtense = en.tenses(a.text)[0][0] auxstr += en.conjugate('be', tense=en.tenses(p.text)[0][0], number=num) + ' ' verbaspect = en.PROGRESSIVE else: auxstr += en.conjugate('do', tense=en.tenses(p.text)[0][0], number=num) + ' ' verbtense = en.INFINITIVE auxstr += 'not ' elif a.lemma_ == 'be': if p.lemma_ == 'be': verbtense = en.tenses(a.text)[0][0] auxstr += en.conjugate( 'be', tense=en.tenses(a.text)[0][0], number=num) + ' ' verbaspect = en.PROGRESSIVE elif p.tag_ == 'MD': verbtense = en.INFINITIVE elif a.lemma_ == 'have': num == en.PLURAL if p.tag_ == 'MD' else num auxstr += en.conjugate( 'have', tense=en.tenses(a.text)[0][0], number=num) + ' ' if n.lemma_ == 'be': verbaspect = en.PROGRESSIVE verbtense = en.tenses(n.text)[0][0] else: auxstr += a.text_with_ws auxstr = auxstr.lower().strip() if verbaspect: verb = en.conjugate(verb, tense=verbtense, aspect=verbaspect) else: verb = en.conjugate(verb, tense=verbtense) advcl = '' if advcltree: for w in advcltree: if w.pos_ == 'VERB' and en.tenses( w.text)[0][4] == en.PROGRESSIVE: advcl += 'which ' + en.conjugate( w.text, tense=en.tenses(verb)[0][0]) + ' ' else: advcl += w.text_with_ws newsent = ' '.join( list( filter(None, [ agent, auxstr, adverb['bef'], verb, part, subjpass, adverb['aft'], advcl, prep, xcomp ]))) + punc if not rec: newsent = newsent[0].upper() + newsent[1:] newdoc += newsent + ' ' return newdoc