def inject_sva_error(example): tok_sentence, features_seq = example[0].split(), example[1] verb_indices = [] for i in range(len(features_seq)): if features_seq[i]['universal_postag'] == 'VERB': verb_indices.append(i) if len(verb_indices) > 0: j = random.choice(verb_indices) lexeme = pattern.lexeme(tok_sentence[j]) if len(lexeme) > 0: tok_sentence[j] = random.choice(lexeme) return [' '.join(tok_sentence), features_seq]
def write_hypo(parent, count, list_of_neighbors): return_dict = {} for index in range(0, len(list_of_neighbors)): s = wordnet.synsets(list_of_neighbors[index]) if len(s) > 0: s = s[0] synomyms = s.synonyms hypernyms = s.hypernyms() hyponyms = s.hyponyms() holonyms = s.holonyms() meronyms = s.meronyms() singulars = [singularize(list_of_neighbors[index])] plurals = [pluralize(list_of_neighbors[index])] comparatives = [comparative(list_of_neighbors[index])] superlatives = [superlative(list_of_neighbors[index])] lemmas = [lemma(list_of_neighbors[index])] lexemes = [lexeme(list_of_neighbors[index])] tensess = [tenses(list_of_neighbors[index])] suggests = [suggest(list_of_neighbors[index])] neighbors_with_link_string = None if parent in synomyms: neighbors_with_link_string = str( list_of_neighbors[index]) + "[SYNO]" elif parent in hypernyms: neighbors_with_link_string = str( list_of_neighbors[index]) + "[HYPER]" elif parent in hyponyms: neighbors_with_link_string = str( list_of_neighbors[index]) + "[HYPO]" elif parent in holonyms: neighbors_with_link_string = str( list_of_neighbors[index]) + "[HOLO]" elif parent in meronyms: neighbors_with_link_string = str( list_of_neighbors[index]) + "[MERO]" elif parent in singulars: neighbors_with_link_string = str( list_of_neighbors[index]) + "[PLURAL]" elif parent in plurals: neighbors_with_link_string = str( list_of_neighbors[index]) + "[SINGULAR]" elif parent in comparatives: neighbors_with_link_string = str( list_of_neighbors[index]) + "[COMPA]" elif parent in superlatives: neighbors_with_link_string = str( list_of_neighbors[index]) + "[SUPERLA]" elif parent in lemmas: neighbors_with_link_string = str( list_of_neighbors[index]) + "[LEMMA]" elif parent in lexemes: neighbors_with_link_string = str( list_of_neighbors[index]) + "[LEXEME]" elif parent in tensess: neighbors_with_link_string = str( list_of_neighbors[index]) + "[TENSE]" elif parent in suggests: neighbors_with_link_string = str( list_of_neighbors[index]) + "[MISPELL]" if neighbors_with_link_string: try: return_dict[word][1].append( neighbors_with_link_string) except: return_dict[word] = (count, [neighbors_with_link_string]) return return_dict
def get_marks(data, image_file): keywords_matched = 0 #maximum_marks = 5 maximum_marks = data[0] keywords = [] keywords = data[3].copy() # keywords=['data','mine','database','characterization','knowledge','background','task','classify','associate','visualize','predict','cluster'] expected_keywords = len(keywords) #expected_no_of_words = 200 expected_no_of_words = data[1] #expected_no_of_sentences = 15 expected_no_of_sentences = data[2] print() print("----------------------------------------------") print() spaced_keywords = [] for word in keywords: if ' ' in word: spaced_keywords.append(word) for word in spaced_keywords: keywords.remove(word) print(spaced_keywords) print(keywords) # extended_keywords = [] # for word in keywords: # for syn in wn.synsets(word): # for l in syn.lemmas(): # extended_keywords.append(l.name()) forms = [ ] #We'll store the derivational forms in a set to eliminate duplicates for word in keywords: for alemma in wn.lemmas(word): #for each "alemma" lemma in WordNet forms.append(alemma.name()) #add the lemma itself for related_lemma in alemma.derivationally_related_forms( ): #for each related lemma forms.append(related_lemma.name()) #add the related lemma verb = [] for word in keywords: verb.extend(lexeme(word)) keywords.extend(forms) keywords.extend(verb) keywords = [x.lower() for x in keywords] keywords = list(set(keywords)) print() print("----------------------------------------------") print() print(keywords) print() with io.open(image_file, 'rb') as image_file: content = image_file.read() image_file.close() image = vision.types.Image(content=content) response = client.text_detection(image=image) texts = response.text_annotations string = texts[0].description.replace( '\n', ' ').lower() #for converting to lower case string = re.sub('[^A-Za-z0-9.]+', ' ', string) #for eliminating special character #here pyenchant is called print("-----------------------------------------------") print() print(string) print() word_list = word_tokenize(string) #for word spliting no_of_words = len(word_list) word_list = word_enchant(word_list) if no_of_words > expected_no_of_words: no_of_words = expected_no_of_words sent_list = sent_tokenize(string) print(sent_list) print() print("------------------------------------------------") print() no_of_sentences = len(sent_list) if no_of_sentences > expected_no_of_sentences: no_of_sentences = expected_no_of_sentences print('no_of_words:', no_of_words) print('no_of_sentences:', no_of_sentences) list_of_matched_keywords = [] for keyword in keywords: if (keyword in word_list): keywords_matched = keywords_matched + 1 list_of_matched_keywords.append(keyword) for sent in spaced_keywords: for sentence in sent_list: if sent in sentence: keywords_matched = keywords_matched + 1 list_of_matched_keywords.append(sent) else: match_ratio = similar(sentence, sent) if match_ratio > 0.4: keywords_matched = keywords_matched + 1 list_of_matched_keywords.append(sent) if keywords_matched > expected_keywords: keywords_matched = expected_keywords print('no of keywords matched:', keywords_matched) print('keywords matched: ', list_of_matched_keywords) keywords_percentage = 0.60 * (keywords_matched / expected_keywords) word_percentage = 0.30 * (no_of_words / expected_no_of_words) sentence_percentage = 0.10 * (no_of_sentences / expected_no_of_sentences) print('keywords_percentage:', keywords_percentage) print('word_percentage:', word_percentage) print('sentence_percentage:', sentence_percentage) total_marks = maximum_marks * (keywords_percentage + word_percentage + sentence_percentage) total_marks = round(total_marks, 1) digit = total_marks * 10 if (digit % 10 < 5): total_marks = math.floor(total_marks) if (digit % 10 > 5): total_marks = math.ceil(total_marks) print('total_marks:', total_marks) print() print("(^-^)(^-^)(^-^)(^-^)(^-^)(^-^)(^-^)(^-^)(^-^)(^-^)(^-^)") print() return total_marks
from pattern.text.en import conjugate, lemma, lexeme print (lemma('gave')) print (lexeme('gave'))