Esempio n. 1
0
def number_match(fileparse):
    synonyms = set({u'number', u'integer', u'figure', u'digit', u'character', u'symbol',
    u'cardinal', u'ordinal', u'amount', u'quanity', u'total', u'aggregate', u'tally', u'quota',
    u'limit'})
    pattern = r'[\d\s]+'
    
    for cid in {k: v for k,v in fileparse.nps.items() if not v.get('ref')}:
        check_for_number = False
        for syn in synonyms:
            if ratio(fileparse.nps[cid]['text'].lower(), syn) > .9:
                check_for_number = True
        if not check_for_number:
            continue
 
        numbers = []
        for parse in fileparse.parses:
            numbers.extend(findall(pattern, parse.text))
        longest = ''
        if numbers:
            for num in numbers:
                if len(num) > len(longest):
                    longest = num
        if longest:
            aid = _get_cid(fileparse.nps, longest, cid)
            if not aid:
                aid = _mk_coref_id()
                data = {'text': longest, 'ref': None}
                fileparse.nps[aid] = data
            fileparse.nps[cid]['ref'] = aid
Esempio n. 2
0
def word_inclusion(fileparse):
    for cid in {k: v for k,v in fileparse.nps.items() if not v.get('ref')}:
        if cid != '4':
            continue
        for parse in fileparse.parses:
            words = [w.lower() for w in word_tokenize(fileparse.nps[cid]['text'])]
            text = parse.text.lower()
            anaphor = ''
            num_found = 0
            majority = len(words) / 2
            for word in words:
                if text.find(word + ' ') != -1:
                    num_found += 1
            if num_found >= majority:
                first_index = len(text)
                last_index = 0
                for word in words:
                    if text.find(word + ' ') != -1 and text.find(word + ' ') < first_index:
                        first_index = text.find(word)
                    if text.find(word) != -1 and text.find(word ) + len(word) > last_index:
                        last_index = text.find(word) + len(word)
                if first_index < last_index:
                    anaphor = text[first_index:last_index]
            if anaphor:
                aid = _get_cid(fileparse.nps, anaphor, cid)
                if not aid:
                    aid = _mk_coref_id()
                    data = {'text': anaphor, 'ref': None}
                    fileparse.nps[aid] = data
                fileparse.nps[cid]['ref'] = aid
                break
Esempio n. 3
0
def pronouns(fileparse):
    for cid in {k: v for k,v in fileparse.nps.items() if not v.get('ref')}:
        proposal = hobbs(fileparse, cid)
        if proposal:
            text = ' '.join(proposal)
            aid = _get_cid(fileparse.nps, text, cid)
            if not aid:
                aid = _mk_coref_id()
                data = {'text': text, 'ref': None}
                fileparse.nps[aid] = data
            fileparse.nps[cid]['ref'] = aid
Esempio n. 4
0
def exact_match(fileparse):
    for cid in {k: v for k,v in fileparse.nps.items() if not v.get('ref')}:
        num_found = 0
        for parse in fileparse.parses:
            num_found += parse.text.count(fileparse.nps[cid]['text'])
        if num_found > 1:
            aid = _get_cid(fileparse.nps, fileparse.nps[cid]['text'], cid)
            if not aid:
                aid = _mk_coref_id()
                data = {'text': fileparse.nps[cid]['text'], 'ref': None}
                fileparse.nps[aid] = data
            fileparse.nps[cid]['ref'] = aid
Esempio n. 5
0
def levenshtein_inclusion(fileparse):
    for cid in {k: v for k,v in fileparse.nps.items() if not v.get('ref')}:
        referent = fileparse.nps[cid]['text'].lower()
        
        # Search Tagged corefs
        for aid in fileparse.nps:
            anaphor = fileparse.nps[aid]['text'].lower()
            cRatio = 0.6
            temp_ratio = ratio(referent, anaphor)
            if temp_ratio > cRatio:
                cRatio = temp_ratio
                fileparse.nps[cid]['ref'] = aid
        
        
        for parse in fileparse.parses:
            text = parse.text.lower()
            dist = maxint
            proposal = ''
            while len(text) > 2:
                if distance(text, referent) < dist:
                    dist = distance(text, referent)
                    proposal = text
                text = text[1:]
                if distance(text, referent) < dist:
                    dist = distance(text, referent)
                    proposal = text
                text = text[:-1]
                if distance(text, referent) < dist:
                    dist = distance(text, referent)
                    proposal = text
            if ratio(text, referent) > 0.3:
                aid = _get_cid(fileparse.nps, fileparse.nps[cid]['text'], cid)
                if not aid:
                    aid = _mk_coref_id()
                    data = {'text': fileparse.nps[cid]['text'], 'ref': None}
                    fileparse.nps[aid] = data
                fileparse.nps[cid]['ref'] = aid