Ejemplo n.º 1
0
def inject_sva_error(example):
    tok_sentence, features_seq = example[0].split(), example[1]
    verb_indices = []
    for i in range(len(features_seq)):
        if features_seq[i]['universal_postag'] == 'VERB':
            verb_indices.append(i)

    if len(verb_indices) > 0:
        j = random.choice(verb_indices)
        lexeme = pattern.lexeme(tok_sentence[j])
        if len(lexeme) > 0:
            tok_sentence[j] = random.choice(lexeme)

    return [' '.join(tok_sentence), features_seq]
        def write_hypo(parent, count, list_of_neighbors):

            return_dict = {}

            for index in range(0, len(list_of_neighbors)):
                s = wordnet.synsets(list_of_neighbors[index])
                if len(s) > 0:
                    s = s[0]

                    synomyms = s.synonyms
                    hypernyms = s.hypernyms()
                    hyponyms = s.hyponyms()
                    holonyms = s.holonyms()
                    meronyms = s.meronyms()
                    singulars = [singularize(list_of_neighbors[index])]
                    plurals = [pluralize(list_of_neighbors[index])]
                    comparatives = [comparative(list_of_neighbors[index])]
                    superlatives = [superlative(list_of_neighbors[index])]
                    lemmas = [lemma(list_of_neighbors[index])]
                    lexemes = [lexeme(list_of_neighbors[index])]
                    tensess = [tenses(list_of_neighbors[index])]
                    suggests = [suggest(list_of_neighbors[index])]

                    neighbors_with_link_string = None

                    if parent in synomyms:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[SYNO]"
                    elif parent in hypernyms:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[HYPER]"
                    elif parent in hyponyms:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[HYPO]"
                    elif parent in holonyms:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[HOLO]"
                    elif parent in meronyms:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[MERO]"
                    elif parent in singulars:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[PLURAL]"
                    elif parent in plurals:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[SINGULAR]"
                    elif parent in comparatives:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[COMPA]"
                    elif parent in superlatives:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[SUPERLA]"
                    elif parent in lemmas:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[LEMMA]"
                    elif parent in lexemes:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[LEXEME]"
                    elif parent in tensess:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[TENSE]"
                    elif parent in suggests:
                        neighbors_with_link_string = str(
                            list_of_neighbors[index]) + "[MISPELL]"

                    if neighbors_with_link_string:
                        try:
                            return_dict[word][1].append(
                                neighbors_with_link_string)
                        except:
                            return_dict[word] = (count,
                                                 [neighbors_with_link_string])
            return return_dict
Ejemplo n.º 3
0
def get_marks(data, image_file):

    keywords_matched = 0
    #maximum_marks = 5
    maximum_marks = data[0]

    keywords = []
    keywords = data[3].copy()

    # keywords=['data','mine','database','characterization','knowledge','background','task','classify','associate','visualize','predict','cluster']
    expected_keywords = len(keywords)

    #expected_no_of_words = 200
    expected_no_of_words = data[1]

    #expected_no_of_sentences = 15
    expected_no_of_sentences = data[2]

    print()
    print("----------------------------------------------")
    print()

    spaced_keywords = []
    for word in keywords:
        if ' ' in word:
            spaced_keywords.append(word)
    for word in spaced_keywords:
        keywords.remove(word)

    print(spaced_keywords)
    print(keywords)

    # extended_keywords = []
    # for word in keywords:
    #     for syn in wn.synsets(word):
    #         for l in syn.lemmas():
    #             extended_keywords.append(l.name())

    forms = [
    ]  #We'll store the derivational forms in a set to eliminate duplicates
    for word in keywords:
        for alemma in wn.lemmas(word):  #for each "alemma" lemma in WordNet
            forms.append(alemma.name())  #add the lemma itself
            for related_lemma in alemma.derivationally_related_forms(
            ):  #for each related lemma
                forms.append(related_lemma.name())  #add the related lemma

    verb = []
    for word in keywords:
        verb.extend(lexeme(word))

    keywords.extend(forms)
    keywords.extend(verb)

    keywords = [x.lower() for x in keywords]
    keywords = list(set(keywords))

    print()
    print("----------------------------------------------")
    print()
    print(keywords)
    print()

    with io.open(image_file, 'rb') as image_file:
        content = image_file.read()
    image_file.close()
    image = vision.types.Image(content=content)

    response = client.text_detection(image=image)
    texts = response.text_annotations
    string = texts[0].description.replace(
        '\n', ' ').lower()  #for converting to lower case
    string = re.sub('[^A-Za-z0-9.]+', ' ',
                    string)  #for eliminating special character

    #here pyenchant is called
    print("-----------------------------------------------")
    print()
    print(string)
    print()

    word_list = word_tokenize(string)  #for word spliting
    no_of_words = len(word_list)
    word_list = word_enchant(word_list)

    if no_of_words > expected_no_of_words:
        no_of_words = expected_no_of_words

    sent_list = sent_tokenize(string)

    print(sent_list)
    print()
    print("------------------------------------------------")
    print()

    no_of_sentences = len(sent_list)
    if no_of_sentences > expected_no_of_sentences:
        no_of_sentences = expected_no_of_sentences

    print('no_of_words:', no_of_words)
    print('no_of_sentences:', no_of_sentences)

    list_of_matched_keywords = []
    for keyword in keywords:
        if (keyword in word_list):
            keywords_matched = keywords_matched + 1
            list_of_matched_keywords.append(keyword)
    for sent in spaced_keywords:
        for sentence in sent_list:
            if sent in sentence:
                keywords_matched = keywords_matched + 1
                list_of_matched_keywords.append(sent)
            else:
                match_ratio = similar(sentence, sent)
                if match_ratio > 0.4:
                    keywords_matched = keywords_matched + 1
                    list_of_matched_keywords.append(sent)
    if keywords_matched > expected_keywords:
        keywords_matched = expected_keywords
    print('no of keywords matched:', keywords_matched)
    print('keywords matched: ', list_of_matched_keywords)

    keywords_percentage = 0.60 * (keywords_matched / expected_keywords)
    word_percentage = 0.30 * (no_of_words / expected_no_of_words)
    sentence_percentage = 0.10 * (no_of_sentences / expected_no_of_sentences)

    print('keywords_percentage:', keywords_percentage)
    print('word_percentage:', word_percentage)
    print('sentence_percentage:', sentence_percentage)

    total_marks = maximum_marks * (keywords_percentage + word_percentage +
                                   sentence_percentage)
    total_marks = round(total_marks, 1)
    digit = total_marks * 10
    if (digit % 10 < 5):
        total_marks = math.floor(total_marks)
    if (digit % 10 > 5):
        total_marks = math.ceil(total_marks)
    print('total_marks:', total_marks)

    print()
    print("(^-^)(^-^)(^-^)(^-^)(^-^)(^-^)(^-^)(^-^)(^-^)(^-^)(^-^)")
    print()
    return total_marks
Ejemplo n.º 4
0
from pattern.text.en import conjugate, lemma, lexeme
print (lemma('gave'))
print (lexeme('gave'))