Exemple #1
0
def match(mention, dependencies, stem_pos=False, corelex=False):
    mention = mention.split()
    #results = list()
    events = list()
    dependent_event = ['dependent', 'governor_pos']
    governor_event = ['dependent_pos', 'governor']
    negations = list()

    for dependency in dependencies:
        if dependency['relation'] == 'neg':
            if (dependency['governor_pos'].startswith('JJ')
                or dependency['governor_pos'].startswith('VB')):
                negations.append(dependency['governor'])
            else:
                negations.append(dependency['dependent'])

    for dependency in dependencies:
        if dependency['dependent']==mention[-1]:
            if (dependency['governor_pos'].startswith('JJ')
                or dependency['governor_pos'].startswith('VB')):
                #results.append(dependency)
                if stem_pos:
                    dependency['governor_pos'] = dependency['governor_pos'][:2]
                if corelex:
                    dependent = lookup(dependency['dependent'])
                    if dependent != '':
                        dependency['dependent'] = dependent
                event = "({} {} _)".format(*map(lambda key: dependency[key], dependent_event))
                if dependency['governor'] in negations:
                    event = "NEGATION {}".format(event)
                events.append(event)
            continue
        if dependency['governor']==mention[-1]:
            if dependency['dependent'] not in mention:
                if (dependency['dependent_pos'].startswith('JJ')
                    or dependency['dependent_pos'].startswith('VB')):
                    #results.append(dependency)
                    if stem_pos:
                        dependency['dependent_pos'] = dependency['dependent_pos'][:2]
                    if corelex:
                        governor = lookup(dependency['governor'])
                        if governor != '':
                            dependency['governor'] = governor
                    event = "(_ {} {})".format(*map(lambda key: dependency[key], governor_event))
                    if dependency['dependent'] in negations:
                        event = "NEG {}".format(event)
                    events.append(event)
    return events
Exemple #2
0
def get_ngrams(feature_vector,
               tokens,
               prefix=None,
               n=1,
               add_null_tokens=False,
               binary_output=False,
               stem=False,
               use_lowercase=True,
               corelex=True,
               generalize=True):
    global stemmer
    if prefix == None:
        prefix = __get_measure(n)
        if stem:
            prefix='stem'+prefix
    if use_lowercase:
        tokens=[token.lower() for token in tokens]
    if stem:
        if stemmer == None:
            import nltk.stem
            stemmer = nltk.stem.PorterStemmer()
        tokens = [stemmer.stem(word) for word in tokens]

    if corelex:
        import corelex
        tokens = [corelex.lookup(word) if ''!=corelex.lookup(word) else word for word in tokens]

    if generalize:
        tokens = ["_NUMERIC_" if re.search(r'\d', word) else word for word in tokens]

    if n==1:
        n_grams = tokens
    else:
        n_grams = list()
        if add_null_tokens:
            unigrams = ['-nil-' for i in range(n-1)]+tokens+['-nil-' for i in range(n-1)]
        else:
            unigrams=tokens
        for i in range(len(unigrams)-n+1):
            n_grams.append(' '.join(unigrams[i:i+n]))
    word_counts = Counter(n_grams)
    total_words = len(n_grams)
    for word, count in word_counts.items():
        if binary_output:
            feature_vector[prefix+':'+word]=True
        else:
            feature_vector[prefix+':'+word]=count/float(total_words)