Ejemplo n.º 1
0
def pronounResolution(doc):
    coref = Coref()
    clusters = coref.one_shot_coref(
        utterances=u"Their parents love them very much.",
        context=u"I kown a twins")

    resolved_utterance_text = coref.get_resolved_utterances()
    print(resolved_utterance_text)
Ejemplo n.º 2
0
def rnn(filename):
    bucket_name = "novelnet"
    coref = Coref()
    print("job " + filename + " is started")
    s3 = boto3.resource('s3')
    s3.Object("novelnet", filename).download_file("test")
    with open("test", 'r') as f:
        text = f.read()
    #print(text)
    relationship = Relationship(id=filename,
                                pipeline=coref,
                                text=text,
                                threshold=20,
                                verbose=False)
    relationship.report()
    doc_name = 'doc' + filename + '.pkl'
    clusters_name = 'clusters' + filename + '.pkl'
    mentions_name = 'mentions' + filename + '.pkl'
    ner_name = 'ner' + filename + '.txt'
    s3.Object(bucket_name, "results/" + doc_name).upload_file(doc_name)
    s3.Object(bucket_name,
              "results/" + clusters_name).upload_file(clusters_name)
    s3.Object(bucket_name,
              "results/" + mentions_name).upload_file(mentions_name)
    s3.Object(bucket_name, "results/" + ner_name).upload_file(ner_name)
    os.remove("status")
    os.remove("test")
    print("job " + filename + " is finished")
Ejemplo n.º 3
0
def resolve_corefs(text):
    print("resolving pronoun co-refs...")
    document = nlp(text)
    with HiddenPrints():
        coref = Coref()
    context = ""
    for sentence in document:
        # print(str(sentence))
        if "they " in str(sentence):
            context += " " + str(sentence).strip()
            continue
        clusters = coref.one_shot_coref(utterances=str(sentence).strip(),
                                        context=context)
        resolved_utterance_text = coref.get_resolved_utterances()
        # print(resolved_utterance_text)
        context += " ".join(resolved_utterance_text).strip()
    return context
Ejemplo n.º 4
0
def main():
    with open('data/pre-nlp/reviews.json') as fr:
        reviews = json.load(fr)

    coref = Coref()
    progress = 1
    coref_reviews = []
    for review in reviews:
        text = review['text']
        try:
            coref.one_shot_coref(utterances=text)
        except Exception as e:
            print e.message
            continue
        review['text'] = coref.get_resolved_utterances()[0]
        coref_reviews.append(review.copy())
        print progress
        progress += 1

    with open('data/pre-nlp/reviews-after-coreference.json', 'w') as fw:
        fw.write(json.dumps(coref_reviews))
Ejemplo n.º 5
0
def extractRel(raw_text, endpoint):
    temppath = 'StanfordOpenIE/'
    tempfile = 'input.txt'

    text = raw_text.lower()
    logger.info(text)
    coref = Coref()
    clusters = coref.one_shot_coref(utterances=unicode(text))
    resolved_coref = coref.get_most_representative()
    mentions = coref.get_clusters()
    resolved_coref = {str(k): str(v) for k, v in resolved_coref.items()}

    for key, val in resolved_coref.items():
        text = re.sub(
            str(' ') + key + str(' '),
            str(' ') + str(val) + str(' '), text)

    relation_tuples = list()
    concept_pairs = conceptsimilarity.filter_pairs(text, endpoint)
    for concept1, concept2 in concept_pairs:
        sentences = text.split('.')
        for sentence in sentences:
            if concept1 in sentence and concept2 in sentence:
                with open(temppath + tempfile, 'w') as f:
                    f.write(sentence)
                    f.close()
                for sub, rel, pred in conceptmap.relations(tempfile):
                    logger.info("concept1 %s", concept1)
                    logger.info("concept2 %s", concept2)
                    logger.info("sub %s", sub)
                    logger.info("rel %s", rel)
                    logger.info("pred %s", pred)
                    # Here we are selecting the relations that appear near to each other in the paragraph
                    # For now, concepts that lie in the same sentence and are related in openie are selected
                    if ((concept1 in sub) and
                        (concept2 in pred)) or ((concept1 in pred) and
                                                (concept2 in sub)):
                        relation_tuples.append((sub, rel, pred))
    return relation_tuples
Ejemplo n.º 6
0
def doCoref(text):

    # Initialize Coref object and counters
    coref = Coref()

    results = resolve(text, coref)

    # fd = open("corefoutput.txt", "w")
    # for y in results:
    #     fd.write(str(y[0]) + "\n")
    # fd.close()

    return results
Ejemplo n.º 7
0
def process_text(path='Harry_Potter_and_the_Sorcerers_Stone.txt'):
    coref = Coref()

    with open("Harry_Potter_and_the_Sorcerers_Stone.txt", 'r') as f:
        text = f.read()

    relationship = RelationshipGolden(0,
                                      pipeline=coref,
                                      text=text,
                                      threshold=25,
                                      verbose=False)
    relationship.build_relationship()
    relationship.report()
    relationship.export_graph()
Ejemplo n.º 8
0
def resolve_pronouns(doc):
    coref = Coref()
    coref.one_shot_coref(utterances=doc.text)
    mentions = coref.get_mentions()
    #print(mentions, coref.get_scores())
    clusters = coref.get_clusters(remove_singletons=True)
    alias_groups = []
    for cluster in clusters[0].values():
        # cluster here is a list of mention indices
        aliases = []
        indices = []
        for mention_index in cluster:
            mention = mentions[mention_index]
            aliases.append(mention.text)
            indices.append((mention.start, mention.end))
        alias_groups.append((aliases, indices))
    return alias_groups
Ejemplo n.º 9
0
from neuralcoref import Coref

coref = Coref()
clusters = coref.continuous_coref(utterances=u"John wanted to marry Mary. He was a doctor and she was a nurse. She went to the market one day when Henry met her and proposed her. He was a local don.")
print(clusters)

mentions = coref.get_mentions()
#print(mentions)

utterances = coref.get_utterances()
print(utterances)

resolved_utterance_text = coref.get_resolved_utterances()
print(resolved_utterance_text)

print(coref.get_scores())
sentences=paragraph.split('.')

for item in sentences[:1]:
	print item
'''
'''
COREFERENCE RESOLUTION

'''
con = "lamp on table"
utt = "chair near to table"

paragraph = "there is a table.there is a lamp on it.it has white colour"
sentences = paragraph.split('.')

coref = Coref()
clusters = coref.one_shot_coref(utterances=unicode(utt), context=unicode(con))
a = coref.get_mentions()
first = str(a[0])
one = []
for item in a:
    one.append(str(item))
two = first.split()

utterances = coref.get_utterances()

resolved_utterance_text = coref.get_resolved_utterances()
print(resolved_utterance_text)

diff = list(set(two) - set(one))
Ejemplo n.º 11
0
from neuralcoref import Coref

coref = Coref()
clusters = coref.one_shot_coref(utterances=u"She loves him.",
                                context=u"My sister has a dog.")
mentions = coref.get_mentions()
utterances = coref.get_utterances()
resolved_utterance_text = coref.get_resolved_utterances()
most_representative = coref.get_most_representative()

print(clusters)
print(mentions)
print(utterances)
print(resolved_utterance_text)
print(most_representative)
Ejemplo n.º 12
0
# -*- coding: utf-8 -*-

from nltk.corpus import wordnet as wn
import numpy as np
import data_helpers
import auxilary_data_helper
from tokenizeData import Lemmatizer
import itertools
import pickle
from keras.models import load_model
from neuralcoref import Coref

import sent2vec
sent2vec_model = sent2vec.Sent2vecModel()
sent2vec_model.load_model('torontobooks_unigrams.bin')
coref = Coref()


def evaluate_on_test(x_test, y_test, model):
    evaluate = model.evaluate(x_test, y_test)
    print("Evaluated against test dataset: " + evaluate)


def predict_movie(testStrs,
                  model,
                  labels_dict,
                  vocabulary,
                  event_voc,
                  ners_voc,
                  multiple=False):
    #testStrs = ["A young girl rises to fame in Broadway while many other theatre figures are jealous and disgusted. She starts as a general assistant to another bright star of Broadway and slowly she becomes the new star in the town. Another highschool girl offers to help and becomes a maid to Eve."]
Ejemplo n.º 13
0
def extract_triplets(sentence):
    nlp = spacy.load('en')
    
    # tok = nlp("John is a doctor and Mary is a nurse")
    # svos = find_triplets(tok)
    # #print(svos)

    # make all occupation words to lower
    # for higher accuracy
    words = sentence.split()
    cleaned_words = []
    for word in words:
        cleaned_word = word
        check_word = word.strip(",").strip(";").strip(".")
        if is_occupation(check_word.lower()):
            cleaned_word = cleaned_word.lower()
        cleaned_words.append(cleaned_word)

    sentence = " ".join(cleaned_words)

    #print(sentence)

    ne_tree = nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sentence)))
    iob_tagged = nltk.tree2conlltags(ne_tree)
    #print(iob_tagged)
    male_counter = 0
    female_counter = 0
    mappings = {}

    for name in ENGLISH_MALE:
        mappings[name] = []
    for name in ENGLISH_FEMALE:
        mappings[name] = []

    mappings[DEFAULT] = []

    b_persons = []

    for ent in iob_tagged:
        if ent[1] == 'NNP':
            #print(is_male(ent[0].lower()))
            if is_male(ent[0].lower()) and male_counter < len(ENGLISH_MALE):
                sentence = sentence.replace(ent[0], ENGLISH_MALE[male_counter].capitalize())
                mappings[ENGLISH_MALE[male_counter]].append(ent[0])
                male_counter+=1
            elif is_female(ent[0].lower()) and female_counter < len(ENGLISH_FEMALE):
                sentence = sentence.replace(ent[0], ENGLISH_FEMALE[female_counter].capitalize())
                mappings[ENGLISH_FEMALE[female_counter]].append(ent[0])
                female_counter+=1
            else:
                sentence = sentence.replace(ent[0], DEFAULT)
                mappings[DEFAULT].append(ent[0])




    coref = Coref()
    clusters = coref.continuous_coref(utterances=sentence)

    sentence = coref.get_resolved_utterances()[0]
    print(sentence)

    tok = nlp(sentence)


    svos = find_triplets(tok)
   
    count = 0

    new_svos = []

    for triplet in svos:
        if triplet[0].capitalize() not in b_persons:
            if triplet[0] != DEFAULT:
                if triplet[0] in mappings.keys():
                    popped = triplet
                    addition = (mappings[popped[0]][0], popped[1], popped[2])
                    new_svos.append(addition)
            else:
                popped = triplet
                addition = (mappings[popped[0][count]], popped[1], popped[2])
                new_svos.append(addition)
                count += 1
        else:
            new_svos.append(triplet)
    
    return new_svos
Ejemplo n.º 14
0
                            prev_noun = resolve_noun
                            break

    return doc_list


sentence = "Louie is a quite fellow." \
           " But that doesn't mean he will endure anything." \
           " Samantha loves this about him." \
           " Why wouldn't she?" \
           " Her whole childhood was under his shadow." \
           " John admired him, but he didn't know him, like she knew him." \
           " Louie used to say 'I know life'." \
           " He did."

coref = Coref()
cluster = coref.continuous_coref(utterances=sentence)
resolved_sentence = coref.get_resolved_utterances()
print(resolved_sentence)
"""en_nlp = spacy.load('en_core_web_md')
# sentence = sentence.lower()
print(sentence)
en_doc = en_nlp(u'' + sentence)
prop_noun = ""
anaphora_mappings = {}

prop_noun_entities, prop_noun_entities_pos = get_named_entities(en_doc)
# prop_noun_entities = get_noun_chunks(en_doc, prop_noun_entities)

for entity in prop_noun_entities.keys():
    anaphora_mappings = map_entity_pronoun(prop_noun_entities, entity, anaphora_mappings)
Ejemplo n.º 15
0
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.pipeline import Pipeline
from sklearn.naive_bayes import MultinomialNB
from skmultilearn.problem_transform import LabelPowerset

# nlp libraries/api
import en_core_web_lg
from spacy import displacy
import gensim
from neuralcoref import Coref

spacy = en_core_web_lg.load()
coref = Coref(nlp=spacy)

# Load opinion lexicon
neg_file = open("../neg_words.txt",encoding = "ISO-8859-1")
pos_file = open("../pos_words.txt",encoding = "ISO-8859-1")
neg = [line.strip() for line in neg_file.readlines()]
pos = [line.strip() for line in pos_file.readlines()]
opinion_words = neg + pos

# Setup nltk corpora path and Google Word2Vec location
# google_vec_file = 'GoogleNews-vectors-negative300.bin'
# word2vec = gensim.models.KeyedVectors.load_word2vec_format(google_vec_file, binary=True)
# pickle.dump(word2vec, open("word2vec_google.pkl", 'wb'))
word2vec = pickle.load(open("../word2vec_google.pkl", 'rb'))

# load the Multi-label binarizer