コード例 #1
0
def genVerbnetFeatures(word, pos, features):
    if pos != 'V':
        return
    vids=vn.classids(word)
    for vid in vids:
        v=vn.vnclass(vid)
        types=[t.attrib['type'] for t in v.findall('THEMROLES/THEMROLE/SELRESTRS/SELRESTR')]
        for type in types:
            fstr = "THEMETYPE_"+type
            features.append(fstr)
コード例 #2
0
    def getThematicRoles(self, verb):
        thematicRoles = []
        for verbClass in self.getClasses(verb):
            for themrole in vn.vnclass(verbClass).findall(
                    'THEMROLES/THEMROLE'):
                role = themrole.attrib['type']
                for selrestr in themrole.findall('SELRESTRS/SELRESTR'):
                    role += '[%(Value)s%(type)s]' % selrestr.attrib
                thematicRoles.append(role)

        return thematicRoles
コード例 #3
0
ファイル: qa_utils.py プロジェクト: mjhosseini/entgraph_eval
def is_transitive(lemma):
    try:
        cids = verbnet.classids(lemma)
        frames = verbnet.frames(verbnet.vnclass(cids[0]))
        ret = False
        # for frame in frames:
        #     print "primary:", frame['description']['primary']
        #     ret = ret or "Transitive" in frame['description']['primary']

        ret = "Transitive" in frames[0]['description']['primary']
        return ret
    except:
        return False
コード例 #4
0
    def test_remove_before_verb(self):
        """
        Whenever we detect that the sentence starts with a verb, we'll remove it from
        the VerbNet syntax
        """
        from nltk.corpus import verbnet

        buy_first_classid = verbnet.classids('buy')[0]
        buy_first_syntax = verbnet.vnclass(buy_first_classid).find('FRAMES/FRAME/SYNTAX')

        altered_syntax = remove_before_v(buy_first_syntax)
        wanted_syntax = ET.fromstring("""<SYNTAX><VERB /><NP value="Theme" /></SYNTAX>""")

        self.assertEqual(syntax_to_str(altered_syntax), syntax_to_str(wanted_syntax))
コード例 #5
0
ファイル: getframes.py プロジェクト: AllanRamsay/COMP34411
def getFrames(verb, frames):
    for classid in verbnet.classids(verb):
        vnclass = verbnet.pprint(verbnet.vnclass(classid))
        members = re.compile("\s+").split(membersPattern.search(vnclass).group("members"))
        for i in framePattern.finditer(vnclass):
            frame = mergeintrans(mergeNPs("%s"%(i.group("frame"))))
            frame = scomp.sub("SCOMP", frame)
            frame = german.sub("VERB", frame)
            frame = shifted.sub("NP VERB NP", frame)
            frame = finalPPs.sub("", frame)
            if frame in frames:
                frames[frame] += members
            else:
                frames[frame] = members
    return frames
コード例 #6
0
def GetVerbnetRestrictions(vnclass):
  role_restrictions = {}

  while True:
    for role in vnclass.findall('THEMROLES/THEMROLE'):
      restrictions = role.find('SELRESTRS')
      if restrictions:
        restriction_set = set()
        for restriction in restrictions.findall('SELRESTR'):
          predicate = restriction.attrib
          restriction_set.add((predicate['Value'], predicate['type']))

        total = (restrictions.get('logic', 'and'), list(restriction_set))
        role_restrictions[role.attrib['type']] = total

    if vnclass.tag == 'VNCLASS':
      break
    else:
      parent_class = vnclass.attrib['ID'].rsplit('-', 1)[0]
      vnclass = verbnet.vnclass(parent_class)

  return role_restrictions
コード例 #7
0
ファイル: nlputils.py プロジェクト: ping543f/KGen
    def get_verbnet_args(verb, verbose=False):
        lemmatizer = WordNetLemmatizer()
        lemmatized_verb = lemmatizer.lemmatize(verb.lower(), 'v')

        classids = verbnet.classids(lemma=lemmatized_verb)
        if verbose:
            print('Class IDs for "{}": {}'.format(lemmatized_verb, classids))

        if len(classids) < 1:
            if verbose:
                print(
                    'No entry found on verbnet for "{}". Attempting WordNet synsets!'
                    .format(lemmatized_verb))

            wn_synsets = wordnet.synsets(lemmatized_verb)
            for synset in wn_synsets:
                if len(synset.lemmas()) < 1:
                    continue

                candidate = str(synset.lemmas()[0].name())
                classids = verbnet.classids(lemma=candidate)
                if verbose:
                    print('Class IDs for "{}": {}'.format(candidate, classids))

                if len(classids) > 0:
                    break

            if len(classids) < 1:
                if verbose:
                    print(
                        'Unable to find entries on verbnet for neither of the synsets... Will go recursive now (which is not a good thing!)'
                    )

                for synset in wn_synsets:
                    if len(synset.lemmas()) < 1:
                        continue

                    candidate = str(synset.hypernyms()[0].lemmas()[0].name())
                    return NLPUtils.get_verbnet_args(candidate,
                                                     verbose=verbose)

                if verbose:
                    print('Exhausted attempts... returning an empty list.')
                return []

        for id in classids:
            class_number = id[id.find('-') + 1:]
            try:
                v = verbnet.vnclass(class_number)
                roles = [
                    t.attrib['type'] for t in v.findall('THEMROLES/THEMROLE')
                ]
                pass
            except ValueError:
                print('VN class number not found: {}'.format(class_number))

                # Will handle these both below
                v = [None]
                roles = []
                pass

            while len(roles) < 1 and len(v) > 0:
                fallback_class_number = class_number[:class_number.rfind('-')]
                if verbose:
                    print('No roles found for class {}, falling back to {}.'.
                          format(class_number, fallback_class_number))
                class_number = fallback_class_number

                try:
                    v = verbnet.vnclass(class_number)
                    roles = [
                        t.attrib['type']
                        for t in v.findall('THEMROLES/THEMROLE')
                    ]
                    pass
                except ValueError:
                    # Go on with the loop
                    v = [None]
                    roles = []
                    pass

            if len(roles) > 0:
                if verbose:
                    print('Roles found: {}'.format(roles))

                return roles

        return None
コード例 #8
0
# #     print(i)
# random.shuffle(featuresset)
# classifier = nltk.NaiveBayesClassifier.train(featuresset)
# save_classifier_NBC(classifier)

#-----------------------------------------testing---------------------------------------------------
input = "He need a ride from his home."
verb_list, frames_list = prim_fram(input)
print(frames_list)
print(nltk.pos_tag(nltk.word_tokenize(input)))
print(verb_list)
for r in range(len(verb_list)):
    keys = []
    ids = vb.classids(verb_list[r])
    for i in ids:
        u = vb.vnclass(i)
        for j in [l.attrib['type'] for l in u.findall('THEMROLES/THEMROLE/SELRESTRS/SELRESTR')]:
            keys.append(j)
        for j in [l.attrib['type'] for l in u.findall('THEMROLES/THEMROLE')]:
            keys.append(j)
        for j in [l.attrib['value'] for l in u.findall('FRAMES/FRAME/SEMANTICS/PRED')]:
            keys.append(j)
    f = open("tmp/features_verbs.txt","r")
    word_features = []

    for l,i in enumerate(f):
        word_features.append(i)
    f.close()

    def find_features(document, input):
        words = set(document)
コード例 #9
0
def GetVerbnetRestrictions(vnclass):
    role_restrictions = {}

    while True:
        for role in vnclass.findall('THEMROLES/THEMROLE'):
            restrictions = role.find('SELRESTRS')
            if restrictions:
                restriction_set = set()
                for restriction in restrictions.findall('SELRESTR'):
                    predicate = restriction.attrib
                    restriction_set.add(
                        (predicate['Value'], predicate['type']))

                total = (restrictions.get('logic',
                                          'and'), list(restriction_set))
                role_restrictions[role.attrib['type']] = total

        if vnclass.tag == 'VNCLASS':
            break
        else:
            parent_class = vnclass.attrib['ID'].rsplit('-', 1)[0]
            vnclass = verbnet.vnclass(parent_class)

    return role_restrictions


vnclasses = verbnet.classids('drink')
v = verbnet.vnclass('39.1-2')
GetVerbnetRestrictions(v)
コード例 #10
0
from nltk.corpus import verbnet

my_classids = verbnet.classids(lemma='take')
print(my_classids)
# my_lemmas = verbnet.lemmas(my_classids)
# my_longid = longid(my_shortid)
# my_shortid = shortid(my_longid)
for i in my_classids:
    my_vnclass = verbnet.vnclass(i)
    # my_wordnetids = verbnet.wordnetids(mi)
    # Human-friendly methods
    verbnet.pprint(my_vnclass)
    # vnframe = my_vnclass.findall('FRAMES/FRAME')
    # print(verbnet.pprint_description(vnframe))
    # print(verbnet.pprint_frames(vnframe))
    print(verbnet.pprint_members(my_vnclass))
    # print(verbnet.pprint_semantics(vnframe))
    print(verbnet.pprint_subclasses(my_vnclass))
    # print(verbnet.pprint_syntax(vnframe))
    # x = verbnet.pprint_themroles(my_vnclass)
    print(verbnet.pprint_themroles(my_vnclass))
    '''for j in x.split("]"):
        print(j)'''
コード例 #11
0
def process_srl(srl_output, actual_data, just_phrases):
    porter_stemmer = PorterStemmer()
    wn_lem = WordNetLemmatizer()
    file_open = open (srl_output, "r")
    output    = file_open.read()
    srl_output = output.split("\n================\n")
    srl_list = []
    [srl_list.append(line.strip()) for line in srl_output]

    phrase_sentence = create_vector(just_phrases)

    corpus_data = create_vector(actual_data)
    number = 0
    for line in corpus_data:
        sline       = line.split("\t")
        sense       = sline[2] # figurative or literal
        metaphor    = sline[1] # along the line <- the metaphor itself
        try:
            current_srl = srl_list[number].split("\n") # semantic role labeling of give sentece
        except:
            import pdb; pdb.set_trace()

        #mtokens = metaphor.split(" ")
        mtokens_t = word_tokenize(phrase_sentence[number])
        mtokens_t = [w for w in mtokens_t if not w.decode('utf8') in nlcor.stopwords.words('english')]
        mtokens   = filter(lambda word: word not in ",-'", mtokens_t)
        sane_mt = [mt.decode('utf8') for mt in mtokens]
        pos_mtokens = nltk.pos_tag(sane_mt)
        only_verbs = [tkn[0] for tkn in pos_mtokens if 'VB' in tkn[1]]
        #print "==============================================="
        line_score = 0
        token_count = 1
        number += 1
        #print "phrase tokens: %s" % mtokens_t
        #print "only verbs: %s" % only_verbs

        for mtoken in only_verbs:
            vnclasses = verbnet.classids(mtoken)
            if not vnclasses:
                vnclasses = verbnet.classids(wn_lem.lemmatize(mtoken))
                if not vnclasses:
                    continue
            #print "vnclasses: %s" % vnclasses

            mindex = [index for index, sl in enumerate(current_srl) if porter_stemmer.stem(mtoken) in sl.decode('utf8')]
            if not mindex:
         #       print 0
                continue
            token_count += 1

            class_score = 0
            class_count = 1
            #print '----- %s -----' % mtoken
            for vn in vnclasses:
                v=verbnet.vnclass(vn)
                try:
                    restrictions = GetVerbnetRestrictions(v)
                except:
                    continue

             #   print restrictions
                if restrictions:
                    class_score = check_validity(current_srl, mindex[0], restrictions)
                    class_count += 1
                    #print class_score
                else:
                    #print "No restrictions for %s" % vn
                    pass
            if class_count < 2:
                avg_class_score = class_score / class_count
            else:
                avg_class_score = class_score / (class_count - 1)
            #print '---------------'

            line_score += avg_class_score
            token_count += 1
        if token_count < 2:
            avg_line_score = line_score / token_count
        else:
            avg_line_score = line_score / (token_count - 1)

#        print "%s - %s - %s" % (sline[1], sline[2], line_score)
        print avg_line_score
コード例 #12
0
from nltk.corpus import wordnet as wn
from itertools import product
from nltk.stem.wordnet import WordNetLemmatizer
from pathlib import Path
import pandas as pd
import os
import nltk
import re
from nltk.corpus import verbnet as vn
from xml.etree import ElementTree

from stanfordcorenlp import StanfordCoreNLP

datapath = Path(__file__).resolve().parents[2]
nlp = StanfordCoreNLP(
    '/home/ruta/master-thesis/tools/stanford-corenlp-full-2018-10-05')

vn_31_2 = ElementTree.tostring(vn.vnclass('escape-51.1'))
コード例 #13
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

from nltk.corpus import verbnet
verbnet.lemmas()[20:25]
verbnet.classids()[:5]
verbnet.classids('accept')
verbnet.vnclass('remove-10.1')  # doctest: +ELLIPSIS
verbnet.vnclass('10.1')  # doctest: +ELLIPSIS
vn_31_2 = verbnet.vnclass('admire-31.2')
for themrole in vn_31_2.findall('THEMROLES/THEMROLE'):
    print(themrole.attrib['type'])
    for selrestr in themrole.findall('SELRESTRS/SELRESTR'):
        print('[%(Value)s%(type)s]' % selrestr.attrib)
    print()

print(verbnet.pprint('57'))
コード例 #14
0
print(rte.fileids())  # doctest: +ELLIPSIS
rtepairs = rte.pairs(['rte2_test.xml', 'rte3_test.xml'])
print(rtepairs)  # doctest: +ELLIPSIS
print(rtepairs[5])
print(rtepairs[5].text)  # doctest: +NORMALIZE_WHITESPACE
print(rtepairs[5].hyp)
print(rtepairs[5].value)
xmltree = rte.xml('rte3_dev.xml')
print(xmltree)  # doctest: +SKIP
print(xmltree[7].findtext('t'))  # doctest: +NORMALIZE_WHITESPACE
# verbnet
# nltk.download('verbnet')
print(verbnet.lemmas()[20:25])
print(verbnet.classids()[:5])
print(verbnet.classids('accept'))
print(verbnet.vnclass('remove-10.1'))  # doctest: +ELLIPSIS
print(verbnet.vnclass('10.1'))  # doctest: +ELLIPSIS
vn_31_2 = verbnet.vnclass('admire-31.2')
for themrole in vn_31_2.findall('THEMROLES/THEMROLE'):
    print(themrole.attrib['type'])
    for selrestr in themrole.findall('SELRESTRS/SELRESTR'):
        print('[%(Value)s%(type)s]' % selrestr.attrib)
    print()
print(verbnet.pprint('57'))
# nps_chat
# nltk.download('nps_chat')
print(nltk.corpus.nps_chat.words())
print(nltk.corpus.nps_chat.tagged_words())
print(nltk.corpus.nps_chat.tagged_posts())  # doctest: +NORMALIZE_WHITESPACE
print(nltk.corpus.nps_chat.xml_posts())  # doctest: +ELLIPSIS
posts = nltk.corpus.nps_chat.xml_posts()
コード例 #15
0
from nltk.corpus import verbnet

def GetVerbnetRestrictions(vnclass):
  role_restrictions = {}

  while True:
    for role in vnclass.findall('THEMROLES/THEMROLE'):
      restrictions = role.find('SELRESTRS')
      if restrictions:
        restriction_set = set()
        for restriction in restrictions.findall('SELRESTR'):
          predicate = restriction.attrib
          restriction_set.add((predicate['Value'], predicate['type']))

        total = (restrictions.get('logic', 'and'), list(restriction_set))
        role_restrictions[role.attrib['type']] = total

    if vnclass.tag == 'VNCLASS':
      break
    else:
      parent_class = vnclass.attrib['ID'].rsplit('-', 1)[0]
      vnclass = verbnet.vnclass(parent_class)

  return role_restrictions

vnclasses = verbnet.classids('drink')
v=verbnet.vnclass('39.1-2')
GetVerbnetRestrictions(v)