Esempio n. 1
0
 def initialize_lookup(self):
     if self.initialized:
         return
     try:
         fn.frames_by_lemma("dog")
     except LookupError:
         nltk_download(self.config, "framenet_v17")
     self.initialized = True
Esempio n. 2
0
def get_lemma_frames(lemm):

    print(lemm)
    fs = fn.frames_by_lemma(lemm)
    for f in fs:
        lunits = [lunit.split('.')[0] for lunit in f['lexUnit']]
        if lemm in lunits:
            print(True)
        else:
            print(False)

    return fn.frames_by_lemma(lemm)
    def get_frames_by_trans(self, trans):
        if nltk == True:
            lemma = trans
            frames = fn.frames_by_lemma(lemma)
            if len(frames) == 0:
                lemma = r'(?i)' + str(trans)
                frames = fn.frames_by_lemma(lemma)
            frames = [i.name for i in frames]
            return frames

        else:
            print(
                'please install nltk FrameNet first. refer: http://www.nltk.org/howto/framenet.html'
            )
            return False
Esempio n. 4
0
 def framenet_frames_all(self, tweet):
     """
     count the total number of invoced frames in framenet
     for every token and every possible tag according to wiktionary, looks up frames and counts them
     
     >>> f=Featurizer()
     >>> f.init_wiktionary("wiktionary/en.tags.li")
     >>> f.framenet_frames_all("the little guy")
     >>> f.printFeatures()
     """
     from nltk.corpus import framenet as fn
     if not self.wiktionary:
         print >> sys.stderr, "call init_wiktionary before using framenet"
         raise Error("init_wiktionary needed before calling this method")
     words = tweet
     if self.lowercase:
         words = words.lower()
     if self.remove_stopwords:
         words = self.DELIM.join([
             w for w in words.split(self.DELIM)
             if w not in ENGLISH_STOP_WORDS
         ])
     for token in words.split(self.DELIM):
         for tag in self.wiktionary[token]:
             token_tag = token + "." + tag[0].lower()  #initial tag of UPOS
             print token_tag
             frames = fn.frames_by_lemma(r'(?i)\b{}\b'.format(token_tag))
             for frame in frames:
                 f = self.PREFIX_FRAMENET + frame['name']
                 self.d[f] = self.d.get(f, 0) + 1
Esempio n. 5
0
def parGetFrame(lemma):
    frame = fn.frames_by_lemma("donkey")
    if frame != []:
        f = frame.pop()
        return 'asdf'
    else:
        return 'asdfasd'
Esempio n. 6
0
 def lookup_with_POS(self, candidate):
     word, word_pos = candidate
     if word_pos in self.pos_tag_mapping:
         word += "." + self.pos_tag_mapping[word_pos]
     frames = fn.frames_by_lemma(word)
     if not frames:
         return None
     return self.disambiguate(frames, candidate, override=defs.disam.first)
def find_frames(word):
    frames_list = []
    fn_results = fn.frames_by_lemma(word)
    for item in fn_results:
        name = item.name
        #print(name)
        frames_list.append(name)
    return frames_list
Esempio n. 8
0
def word_similarity(word):
    match_found = False
    fn_results = fn.frames_by_lemma(word)
    if fn_results:
        for item in fn_results:
            id = item.ID
            if id in (54, 61, 57, 690, 7):
                #if id in (43,523):
                match_found = True

    return match_found
Esempio n. 9
0
def verb_to_frames(verb):
    syn_frame_dict = {}
    wnsynsets = wn.synsets(verb, wn.VERB)
    for syn in wnsynsets:
        lemmas = syn.lemma_names()
        frames = []
        for lem in lemmas:
            # just collect frame ID
            fids = [frame.ID for frame in fn.frames_by_lemma(r'(?i)' + lem)]
            frames.extend(fids)
        syn_frame_dict[syn] = frames
    return syn_frame_dict
Esempio n. 10
0
def invoke_frame(token: str):
    word = token.lower()
    lu_list = [(i.name, i.definition) for i in fn.lus()]
    lu_temp = set([i for i in lu_list if word == i[0].split('.')[0]])
    frames = []

    for lu, def_ in lu_temp:
        fr = fn.frames_by_lemma(r'(?i)' + lu)
        # print(len(fr), fr[0].ID)
        if len(frames) == 0:
            frames.append(fr[0])
        else:
            if fr[0] not in frames:
                frames.append(fr[0])

    return frames
Esempio n. 11
0
 def lookup_(self, candidate):
     # http://www.nltk.org/howto/framenet.html
     word = candidate
     # in framenet, pos-disambiguation is done via the lookup
     if self.disambiguation == defs.disam.pos:
         frames = self.lookup_with_POS(candidate)
     else:
         frames = fn.frames_by_lemma(word)
         if not frames:
             return 
         frames = self.disambiguate(frames, candidate)
     if not frames:
         return None
     activations = {x.name: 1 for x in frames}
     if self.do_spread_activation:
         parent_activations = self.spread_activation(frames, self.spread_steps, 1)
         activations = {**activations, **parent_activations}
     return activations
Esempio n. 12
0
def find_frames(lemma, pos_tag):      
    simple_tag = None
    if SIMPLE_TAGS.has_key(pos_tag):
        simple_tag = SIMPLE_TAGS[pos_tag] 
        
    if simple_tag:
        key = lemma+'.'+simple_tag
    else:
        key = lemma

    frames = []
    
    if FRAME_CACHE.has_key(key):
        frames.append(FRAME_CACHE[key])
    else:    
        try:
            frames = fn.frames_by_lemma(key)
            if frames:
                for f in frames:
                    FRAME_CACHE[key] = f
        except:
            pass
    return frames
Esempio n. 13
0
def demo():
    from pprint import pprint
    from nltk.corpus import framenet as fn

    #
    # It is not necessary to explicitly build the indexes by calling
    # buildindexes(). We do this here just for demo purposes. If the
    # indexes are not built explicitely, they will be built as needed.
    #
    print('Building the indexes...')
    fn.buildindexes()

    #
    # Get some statistics about the corpus
    #
    print('Number of Frames:', len(fn.frames()))
    print('Number of Lexical Units:', len(fn.lexical_units()))
    print('Number of annotated documents:', len(fn.documents()))
    print()

    #
    # Frames
    #
    print(
        'getting frames whose name matches the (case insensitive) regex: "(?i)medical"'
    )
    medframes = fn.frames(r'(?i)medical')
    print('Found {0} Frames whose name matches "(?i)medical":'.format(
        len(medframes)))
    print([(f.name, f.ID) for f in medframes])

    #
    # store the first frame in the list of frames
    #
    tmp_id = medframes[0].ID
    m_frame = fn.frame(tmp_id)  # reads all info for the frame

    #
    # get the frame relations
    #
    print(
        '\nNumber of frame relations for the "{0}" ({1}) frame:'.format(
            m_frame.name, m_frame.ID), len(m_frame.frameRelation))
    for fr in m_frame.frameRelation:
        print('   ', fr.type + ":", fr.relatedFrame)

    #
    # get the names of the Frame Elements
    #
    print(
        '\nNumber of Frame Elements in the "{0}" frame:'.format(m_frame.name),
        len(m_frame.FE))
    print('   ', [x.name for x in m_frame.FE])

    #
    # get the names of the "Core" Frame Elements
    #
    print('\nThe "core" Frame Elements in the "{0}" frame:'.format(
        m_frame.name))
    print('   ', [x.name for x in m_frame.FE if x.coreType == "Core"])

    #
    # get all of the Lexical Units that are incorporated in the
    # 'Ailment' FE of the 'Medical_conditions' frame (id=239)
    #
    print('\nAll Lexical Units that are incorporated in the "Ailment" FE:')
    m_frame = fn.frame(239)
    ailment_lus = [x for x in m_frame.lexUnit if x.incorporatedFE == 'Ailment']
    print([x.name for x in ailment_lus])

    #
    # get all of the Lexical Units for the frame
    #
    print('\nNumber of Lexical Units in the "{0}" frame:'.format(m_frame.name),
          len(m_frame.lexUnit))
    print('  ', [x.name for x in m_frame.lexUnit[:5]], '...')

    #
    # get basic info on the second LU in the frame
    #
    tmp_id = m_frame.lexUnit[1].ID  # grab the id of the second LU
    luinfo = fn.lu_basic(tmp_id)  # get basic info on the LU
    print('\nInformation on the LU: {0}'.format(luinfo.name))
    pprint(luinfo)

    #
    # Get a list of all of the corpora used for fulltext annotation
    #
    print('\nNames of all of the corpora used for fulltext annotation:')
    allcorpora = set([x.corpname for x in fn.documents()])
    pprint(list(allcorpora))

    #
    # Get the names of the annotated documents in the first corpus
    #
    firstcorp = list(allcorpora)[0]
    firstcorp_docs = fn.documents(firstcorp)
    print('\nNames of the annotated documents in the "{0}" corpus:'.format(
        firstcorp))
    pprint([x.filename for x in firstcorp_docs])

    #
    # Search for frames containing LUs whose name attribute matches a
    # regexp pattern.
    #
    # Note: if you were going to be doing a lot of this type of
    #       searching, you'd want to build an index that maps from
    #       lemmas to frames because each time frames_by_lemma() is
    #       called, it has to search through ALL of the frame XML files
    #       in the db.
    print(
        '\nSearching for all Frames that have a lemma that matches the regexp: "^run.v$":'
    )
    pprint(fn.frames_by_lemma(r'^run.v$'))
Esempio n. 14
0
def frame_idname_list(lemma):
    frames = fn.frames_by_lemma(lemma)
    retq = []
    for f in frames:
        retq.append((f["ID"], f["name"]))
    return tuple(retq)
Esempio n. 15
0
            try:
                sentences = str(st.tag_text(sentence)).replace(
                    "</sentence>\n", "</sentence>|")
                #         print sentences
                for sentence in sentences.split("|"):
                    if sentence.strip() == '': continue
                    #             print sentence
                    for tree in fromstring(sentence):
                        lemma = tree.items()[2][1].lower()
                        #                 print lemma

                        # Lemmas to skip (use re.match)
                        if lemma == "that" or lemma == "this": continue
                        try:
                            # Query FrameNet -- frame names
                            frames = fn.frames_by_lemma(lemma)
                            framenames = ""
                            for frame in frames:
                                #                           print frame.name,
                                #                           print len(frames)
                                #                           continue
                                # Cutoff point
                                if len(frames) > numframes: continue
                                framenames = "".join(
                                    [framenames, "|", frame.name])
                            if framenames != "":
                                print "".join([
                                    field[0], "|", field[1], "|FRM_01|", lemma,
                                    framenames
                                ])
Esempio n. 16
0
def demo():
    from pprint import pprint
    from nltk.corpus import framenet as fn

    #
    # It is not necessary to explicitly build the indexes by calling
    # buildindexes(). We do this here just for demo purposes. If the
    # indexes are not built explicitely, they will be built as needed.
    #
    print('Building the indexes...')
    fn.buildindexes()

    #
    # Get some statistics about the corpus
    #
    print('Number of Frames:', len(fn.frames()))
    print('Number of Lexical Units:', len(fn.lexical_units()))
    print('Number of annotated documents:', len(fn.documents()))
    print()

    #
    # Frames
    #
    print('getting frames whose name matches the (case insensitive) regex: "(?i)medical"')
    medframes = fn.frames(r'(?i)medical')
    print(
        'Found {0} Frames whose name matches "(?i)medical":'.format(len(medframes)))
    print([(f.name, f.ID) for f in medframes])

    #
    # store the first frame in the list of frames
    #
    tmp_id = medframes[0].ID
    m_frame = fn.frame(tmp_id)  # reads all info for the frame

    #
    # get the frame relations
    #
    print(
        '\nNumber of frame relations for the "{0}" ({1}) frame:'.format(m_frame.name,
                                                                        m_frame.ID),
        len(m_frame.frameRelation))
    for fr in m_frame.frameRelation:
        print('   ', fr.type + ":", fr.relatedFrame)

    #
    # get the names of the Frame Elements
    #
    print(
        '\nNumber of Frame Elements in the "{0}" frame:'.format(m_frame.name),
        len(m_frame.FE))
    print('   ', [x.name for x in m_frame.FE])

    #
    # get the names of the "Core" Frame Elements
    #
    print(
        '\nThe "core" Frame Elements in the "{0}" frame:'.format(m_frame.name))
    print('   ', [x.name for x in m_frame.FE if x.coreType == "Core"])

    #
    # get all of the Lexical Units that are incorporated in the
    # 'Ailment' FE of the 'Medical_conditions' frame (id=239)
    #
    print('\nAll Lexical Units that are incorporated in the "Ailment" FE:')
    m_frame = fn.frame(239)
    ailment_lus = [x for x in m_frame.lexUnit if x.incorporatedFE == 'Ailment']
    print([x.name for x in ailment_lus])

    #
    # get all of the Lexical Units for the frame
    #
    print('\nNumber of Lexical Units in the "{0}" frame:'.format(m_frame.name),
          len(m_frame.lexUnit))
    print('  ', [x.name for x in m_frame.lexUnit[:5]], '...')

    #
    # get basic info on the second LU in the frame
    #
    tmp_id = m_frame.lexUnit[1].ID  # grab the id of the second LU
    luinfo = fn.lu_basic(tmp_id)  # get basic info on the LU
    print('\nInformation on the LU: {0}'.format(luinfo.name))
    pprint(luinfo)

    #
    # Get a list of all of the corpora used for fulltext annotation
    #
    print('\nNames of all of the corpora used for fulltext annotation:')
    allcorpora = set([x.corpname for x in fn.documents()])
    pprint(list(allcorpora))

    #
    # Get the names of the annotated documents in the first corpus
    #
    firstcorp = list(allcorpora)[0]
    firstcorp_docs = fn.documents(firstcorp)
    print(
        '\nNames of the annotated documents in the "{0}" corpus:'.format(firstcorp))
    pprint([x.filename for x in firstcorp_docs])

    #
    # Search for frames containing LUs whose name attribute matches a
    # regexp pattern.
    #
    # Note: if you were going to be doing a lot of this type of
    #       searching, you'd want to build an index that maps from
    #       lemmas to frames because each time frames_by_lemma() is
    #       called, it has to search through ALL of the frame XML files
    #       in the db.
    print('\nSearching for all Frames that have a lemma that matches the regexp: "^run.v$":')
    pprint(fn.frames_by_lemma(r'^run.v$'))
def travel_verb_instances(data):

    travel = data["Travel verbs"].str.cat(sep=',')
    travel_verbs = travel.split(",")
    travel_verbs = convert_to_base_verb(travel_verbs)
    counter1 = Counter(travel_verbs)
    travel_verb_dict = dict(counter1)

    verbs = []
    counts = []
    is_verbs_in_vn = []
    is_verbs_in_fn = []
    is_verbs_in_both = []
    is_verbs_not_in_both = []
    fn_frames = []
    is_synonyms_available = []
    synonyms = []
    is_any_syns_in_net = []
    syns_in_vn = []
    syns_in_fn = []

    for verb in travel_verb_dict:

        verbs.append(verb)
        counts.append(travel_verb_dict[verb])
        verb = verb.lower()
        fn_frame = []
        fn_results = []
        #print(fn_frame)
        syn_list = []
        syn_in_vn = []
        syn_not_in_vn = []
        syn_in_fn = []
        syn_not_in_fn = []

        is_any_syn_in_net = 0
        is_synonyms = 0

        is_in_vn = verbs_in_verbnet(verb)
        is_in_fn = verbs_in_framenet(verb)

        # print("Verb is in verbnet %s",is_in_vn)
        # print("Verb is in framenet %s", is_in_fn)

        is_not_in_both = 1 if not is_in_vn and not is_in_fn else 0
        is_in_both = 1 if is_in_vn and is_in_fn else 0

        # print("Verb is in both %s", is_in_both)
        # print("Verb is not in both %s", is_not_in_both)

        if is_in_fn:
            fn_results = fn.frames_by_lemma(verb)
            if fn_results:
                for item in fn_results:
                    name = item.name
                    fn_frame.append(name)

        if is_not_in_both:
            syn_list = find_synonyms(verb)

            is_synonyms = 1 if len(syn_list) > 0 else 0

            for syn in syn_list:
                is_syn_in_vn = verbs_in_verbnet(syn)
                is_syn_in_fn = verbs_in_framenet(syn)

                syn_in_vn.append(
                    syn) if is_syn_in_vn else syn_not_in_vn.append(syn)
                syn_in_fn.append(
                    syn) if is_syn_in_fn else syn_not_in_fn.append(syn)

                is_any_syn_in_net = is_any_syn_in_net + 1 if is_syn_in_vn or is_syn_in_fn else is_any_syn_in_net

        is_verbs_in_vn.append(is_in_vn)
        is_verbs_in_fn.append(is_in_fn)
        is_verbs_in_both.append(is_in_both)
        is_verbs_not_in_both.append(is_not_in_both)
        fn_frames.append((",".join(fn_frame)))
        is_synonyms_available.append(is_synonyms)
        synonyms.append((",").join(syn_list))
        is_any_syns_in_net.append(is_any_syn_in_net)
        syns_in_vn.append((",".join(syn_in_vn)))
        syns_in_fn.append((",".join(syn_in_fn)))

        # print("==========Syn list============")
        # print(syn_list)
        #
        # print("========Syn in VN========")
        # print(syn_in_vn)
        #
        # print("========Syn in FN========")
        # print(syn_in_fn)
        #
        # print(is_any_syn_in_net)
        #
        # print(fn_frames)
        # print("================================================================================")

    # print(verbs)
    # print(counts)
    # print(is_verbs_in_vn)
    # print(is_verbs_in_fn)
    # print(is_verbs_in_both)
    # print(is_verbs_not_in_both)
    # print(fn_frames)
    # print(is_synonyms_available)
    # print(synonyms)
    # print(is_any_syns_in_net)
    # print(syns_in_vn)
    # print(syns_in_fn)

    #final_list = list(zip(verbs,counts,is_verbs_in_vn,is_verbs_in_fn,is_verbs_in_both,is_verbs_not_in_both,fn_frames,is_synonyms_available,synonyms,is_any_syns_in_net,syn_in_vn,syn_in_fn))

    #print(data,final_list)

    df = pd.DataFrame(np.column_stack([
        verbs, counts, is_verbs_in_vn, is_verbs_in_fn, is_verbs_in_both,
        is_verbs_not_in_both, fn_frames, is_synonyms_available, synonyms,
        is_any_syns_in_net, syns_in_vn, syns_in_fn
    ]),
                      columns=[
                          'Verb', 'Count', 'Is in VerbNet', 'Is in FrameNet',
                          'Is in both', 'is not in both', 'FN Frames',
                          'Is Synonyms', 'Synonyms', 'Is any syn in Net',
                          'Synonyms in VN', 'Synonyms in FN'
                      ])

    return df
def verbs_in_framenet(verb):

    fn_results = fn.frames_by_lemma(verb)
    print(fn_results)
    return 1 if fn_results else 0
Esempio n. 19
0
        #sense2freq = {}
        max_count = 0
        max_synset = 0
        for i, s in enumerate(synsets):
            freq = 0
            for lemma in s.lemmas():
                freq += lemma.count()
            if freq > max_count:
                max_count = freq
                max_synset = i
            #sense2freq[i] = freq
        synset_dict[word] = [max_synset]

if first_fn_match and use_framenet:
    for word in synset_dict.keys():
        if len(fn.frames_by_lemma(word)) > 0:
            #print word
            frame_dict[word] = [
                fn.frames_by_lemma(word)[0].name
            ]  #map(lambda x: x.name, fn.frames_by_lemma(word)[0])

###############################

wordcount_filename = 'WordNet-InfoContent-3.0/ic-brown-add1.dat'
lines = []

with open(wordcount_filename) as f:
    lines = f.readlines()

word_id_dict = {}
for line in lines[1:]:
def nvConflict(path,content):

    all_words = []
    model_2 = Word2Vec([content], size=100, window=5, min_count=1, workers=4)

    # read in every file from the full text annotation folder(or other annotation data folder)
    # read in as a tree structure using xml.etree.ElementTree
    for filename in os.listdir(path):
        if not filename.endswith('.xml'):
            continue
        fullname = os.path.join(path, filename)
        tree = ET.parse(fullname)
        tree = tree.getroot()
        t = tostring(tree)
        t = t.lower()
        tree = ET.fromstring(t)
        full_sentence = []

        new_list = []

        final_result_list = []


        # read each sentence of each annotation file
        for sentence in tree:
            each_sentence = []
            each_nvpair = []
            each_sentence.append(filename)
            each_sentence.append("This following is a new sentence: ")
            each_sentence.append(sentence[0].text)
            result_list = []

            for annot in sentence.iter():  # text, annotationSet

                # annot.attrib, e.g. <annotationSet cDate="10/26/2009 04:28:59 PDT Mon" luID="5511" luName="people.n" frameID="304" frameName="People" status="MANUAL" ID="6558815">
                for x, y in annot.attrib.items():
                    # when the phrase type is noun; it's different kind of noun;
                    # please see http://www.surdeanu.info/mihai/teaching/ista555-fall13/readings/PennTreebankConstituents.html for details
                    if y == 'nn' or y == 'nns' or y == 'nnp' or y == 'nnps':
                        print('--------------')
                        print('This sentence has the following noun:')
                        start = annot.attrib.get('start')
                        start = int(start)
                        end = annot.attrib.get('end')
                        end = int(end)
                        end = end + 1
                        clause_1 = sentence[0].text[start:end]
                        print(clause_1)
                        each_sentence.append(clause_1)
                        each_sentence.append('This is the type of noun:')
                        each_sentence.append(y)
                        print(y)
                        # slice_list.append(fn.annotations(clause_1))
                        fm1 = fn.frames_by_lemma(clause_1)
                        print(fm1)
                        each_sentence.append('This lemma evoked the following frame:')
                        each_sentence.append(fm1)
                        new_list.append(clause_1)
                        each_nvpair.append(clause_1)
                        all_words.append(clause_1)
                        result_list.append(clause_1)

                    # when the phrase type is verb; it's different kind of verb
                    elif y == 'vvd' or y == 'vb' or y == 'vbd' or y == 'vbg' or y == 'vbn' or y == 'vbp' or y == 'vbz':
                        print('--------------')
                        print(sentence[0].text)
                        print('This sentence has the following verb:')
                        start = annot.attrib.get('start')
                        start = int(start)
                        print(start)
                        end = annot.attrib.get('end')
                        end = int(end)
                        end = end + 1
                        print(sentence[0].text[start:end])
                        clause_2 = sentence[0].text[start:end]
                        each_sentence.append(clause_2)
                        each_sentence.append('This is the type of verb:')
                        each_sentence.append(y)
                        print(y)
                        # slice_list.append(sentence[0].text[start:end])
                        each_sentence.append('This lemma evoked the following frame:')
                        fm2 = fn.frames_by_lemma(clause_2)
                        print(fm2)
                        each_sentence.append(fm2)
                        new_list.append(clause_2)
                        each_nvpair.append(clause_2)
                        all_words.append(clause_2)
                        result_list.append(clause_2)


            full_sentence.append(each_sentence)
            full_sentence.append("\n")


            try:
                sim = []
                avg = 0
                for i in range(len(result_list)):
                    for j in range(i + 1, len(result_list)):
                        print(result_list[i])
                        print(result_list[j])
                        print(model_2.similarity(result_list[i], result_list[j]))
                        sim.append(result_list[i])
                        sim.append(result_list[j])
                        sim.append(model_2.similarity(result_list[i], result_list[j]))
                        avg += model_2.similarity(result_list[i], result_list[j])

                avg = avg/(i*j)
                print(avg)


                result_list.append(sim)
                result_list.append(sentence[0].text)
                result_list.append(avg)


                final_result_list.append(result_list)

                final_result_list.append("\n")

            except:
                pass


    with open('/Users/mac/Desktop/final_testing_3', 'w') as file_handler:
        try:
            for item in final_result_list:
                file_handler.write("{}\n".format(item))
        except:
            pass
Esempio n. 21
0
     # May erratically truncate extremely long sentence strings!
        try:
          sentences = str(st.tag_text(sentence)).replace("</sentence>\n","</sentence>|")
#         print sentences
          for sentence in sentences.split("|"):
              if sentence.strip() == '' : continue
#             print sentence
              for tree in fromstring(sentence):
                  lemma = tree.items()[2][1].lower()
#                 print lemma

                  # Lemmas to skip (use re.match)
                  if lemma == "that" or lemma == "this" : continue
                  try:
                        # Query FrameNet -- frame names
                        frames = fn.frames_by_lemma(lemma) ; framenames = ""
                        for frame in frames:
#                           print frame.name,
#                           print len(frames)
#                           continue
                            # Cutoff point
                            if len(frames) > numframes : continue
                            framenames = "".join([framenames,"|",frame.name])
                        if framenames != "": print "".join([field[0],"|",field[1],"|FRM_01|",lemma,framenames])

                        # Core Frame Elements
                        for frame in frames:
                            if len(frames) > numframes : continue
                            ID = frame.ID ; framecores = ""
                            cores = [(fename,fe.ID) for fename,fe in fn.frame(ID).FE.items() if fe.coreType=='Core']
                            for core in cores: framecores = "".join([framecores,"|",core[0]])
word1 = "melt"
word2 = "oxidize"

input = word1

vn_results = vn.classids(lemma=input)

if not vn_results:
    print(input + ' not in verbnet.')
else:
    print('verbnet:')
    for ele in vn_results:
        print(ele)
    print("")

fn_results = fn.frames_by_lemma(input)

if not fn_results:
    print(input + ' not in framenet.')
else:
    print('framenet:')
    for ele in fn_results:
        print(ele)
    print("")

pb_results = []
try:
    pb_results = pb.rolesets(input)
except ValueError:
    print(input + ' not in propbank.')
def nvConflict(path):

    # read in every file from the full text annotation folder(or other annotation data folder)
    # read in as a tree structure using xml.etree.ElementTree
    for filename in os.listdir(path):
        if not filename.endswith('.xml'):
            continue
        fullname = os.path.join(path, filename)
        tree = ET.parse(fullname)
        tree = tree.getroot()
        t = tostring(tree)
        t = t.lower()
        tree = ET.fromstring(t)
        full_sentence = []
        # slice_list = []

        # read each sentence of each annotation file
        for sentence in tree:
            each_sentence = []
            each_sentence.append(filename)
            each_sentence.append("This following is a new sentence: ")
            each_sentence.append(sentence[0].text)

            for annot in sentence.iter():  # text, annotationSet

                # annot.attrib, e.g. <annotationSet cDate="10/26/2009 04:28:59 PDT Mon" luID="5511" luName="people.n" frameID="304" frameName="People" status="MANUAL" ID="6558815">
                for x, y in annot.attrib.items():
                    # when the phrase type is noun; it's different kind of noun;
                    # please see http://www.surdeanu.info/mihai/teaching/ista555-fall13/readings/PennTreebankConstituents.html for details
                    if y == 'nn' or y == 'nns' or y == 'nnp' or y == 'nnps':
                        print('--------------')
                        print('This sentence has the following noun:')
                        start = annot.attrib.get('start')
                        start = int(start)
                        end = annot.attrib.get('end')
                        end = int(end)
                        end = end + 1
                        clause_1 = sentence[0].text[start:end]
                        print(clause_1)
                        # slice_list.append(clause_1)
                        each_sentence.append(clause_1)
                        each_sentence.append('This is the type of noun:')
                        each_sentence.append(y)
                        print(y)
                        # slice_list.append(fn.annotations(clause_1))
                        fm1 = fn.frames_by_lemma(clause_1)
                        print(fm1)
                        each_sentence.append(
                            'This lemma evoked the following frame:')
                        each_sentence.append(fm1)

                    # when the phrase type is verb; it's different kind of verb
                    elif y == 'vvd' or y == 'vb' or y == 'vbd' or y == 'vbg' or y == 'vbn' or y == 'vbp' or y == 'vbz':
                        print('--------------')
                        print(sentence[0].text)
                        print('This sentence has the following verb:')
                        start = annot.attrib.get('start')
                        start = int(start)
                        print(start)
                        end = annot.attrib.get('end')
                        end = int(end)
                        end = end + 1
                        print(sentence[0].text[start:end])
                        clause_2 = sentence[0].text[start:end]
                        each_sentence.append(clause_2)
                        each_sentence.append('This is the type of verb:')
                        each_sentence.append(y)
                        print(y)
                        # slice_list.append(sentence[0].text[start:end])
                        each_sentence.append(
                            'This lemma evoked the following frame:')
                        fm2 = fn.frames_by_lemma(clause_2)
                        print(fm2)
                        each_sentence.append(fm2)

            full_sentence.append(each_sentence)
            full_sentence.append("\n")

        print(full_sentence)

        filename = open('/Users/mac/Desktop/find_nv_conflict/' + filename, "w")
        filename.write(str(full_sentence))
 def getFrameLemma(self, lemma):
     frame = fn.frames_by_lemma(lemma + '.v')
     if len(frame) > 0:
         return frame[0]
     return None
Esempio n. 25
0
 def lookup(self, word):
     frames = fn.frames_by_lemma(word)
     return [f['name'] for f in frames]
Esempio n. 26
0

f = fn.frames(r'(?i)perception')
len(fn.frames())
f = fn.frame(66)

f.ID
f.definition
set(f.lexUnit.keys())

[x.name for x in f.FE]

f.frameRelations


fn.frames_by_lemma(r'(?i)a little')




fn.lu(256).name
fn.lu(256).definition
fn.lu(256).frame
fn.lu(256).lexeme



docs = fn.documents()
len(docs)
docs[0].keys()
docs[0].filename
Esempio n. 27
0
    def process(text='',
                lang='en',
                coreferences=False,
                constituents=False,
                dependencies=False,
                expressions=False,
                **kwargs) -> OrderedDict:
        # build nlp-json
        j: OrderedDict = get_base()
        j['meta']['DC.language'] = lang
        d: OrderedDict = get_base_document(1)
        #j['documents'][d['id']] = d
        j['documents'].append(d)
        d['meta']['DC.source'] = 'NLTK {}'.format(nltk_version)
        j['meta']['DC.language'] = lang
        d['text'] = text

        # collect parsers
        lemmatizer = get_lemmatizer()
        stemmer = get_stemmer()

        # tokenization and pos
        words = []
        for sent in segment(text):
            for token in sent:
                words.append(token.value)

        # create the token list
        t_id = 1
        for word, xpos in pos_tag(words):
            wordnet_pos = get_wordnet_pos(xpos)
            lemma = lemmatizer(word, pos=wordnet_pos)

            # start the token
            t = {'id': t_id, 'text': word, 'stem': stemmer(word)}
            #d['tokenList'][t['id']] = t
            d['tokenList'].append(t)
            t_id += 1

            # wordnet
            try:
                synsets = wordnet.synsets(lemma, pos=wordnet_pos)
                senses = {}
                for s in synsets:
                    hyponyms = [
                        y for x in s.hyponyms() for y in x.lemma_names()
                    ]
                    hypernyms = [
                        y for x in s.hypernyms() for y in x.lemma_names()
                    ]
                    synonyms = s.lemma_names()[1:]
                    examples = s.examples()
                    sense = {
                        'wordnetId': s.name(),
                        'definition': s.definition()
                    }
                    if synonyms:
                        sense['synonyms'] = synonyms
                    if hypernyms:
                        sense['hypernyms'] = hypernyms
                    if hyponyms:
                        sense['hyponyms'] = hyponyms
                    if examples:
                        sense['examples'] = examples

                    antonyms = []
                    for l in s.lemmas():
                        if l.antonyms():
                            for a in l.antonyms():
                                antonyms.append(a.name())
                    if antonyms:
                        sense['antonyms'] = antonyms

                    senses[sense['wordnetId']] = sense

                if senses:
                    t['synsets'] = senses
            except:
                pass

            # verbnet
            try:
                verbs = dict((class_id, {
                    'classId': class_id,
                    'frames': vn.frames(class_id)
                }) for class_id in vn.classids(word))

                if verbs:
                    t['verbFrames'] = verbs
            except:
                pass

            # framenet
            try:
                frame_net = {}
                frames = invoke_frame(word)
                if frames is not None:
                    for fr in frames:
                        lu_temp = []
                        for lu in fn.lus(r'(?i)' + word.lower()):
                            fr_ = fn.frames_by_lemma(r'(?i)' + lu.name)
                            if len(fr_):
                                if fr_[0] == fr:
                                    lu_temp.append({
                                        'name': lu.name,
                                        'definition': lu.definition,
                                        'pos': lu.name.split('.')[1]
                                    })
                        frame_net[fr.ID] = {
                            'name': fr.name,
                            'frameId': fr.ID,
                            'definition': fr.definition,
                            # 'relations':fr.frameRelations,
                            'lu': lu_temp
                        }
                if frame_net:
                    t['frames'] = frame_net
            except:
                pass

        return remove_empty_fields(j)
def getFrame(lex):
   frames = fn.frames_by_lemma(lex)
   for frame in frames:
      print frame.name +" " +str(frame.ID)
Esempio n. 29
0
import nltk
from pprint import pprint
from nltk.tokenize import word_tokenize
from nltk.corpus import framenet as fn

i = 0
list = word_tokenize('add boiling water into the cup.')
postagged = nltk.pos_tag(list)

print('frames:')
for x in list:
    print('(' + "'" + x + "'" + ',' + "'" + str(fn.frames_by_lemma(x)) + "'" +
          ')')

print('postags:')
for i in range(len(postagged)):
    print(postagged[i])

print(nltk.pos_tag(list))
print(postagged[1])

print(list)
Esempio n. 30
0
sent = input("Enter the sentence: ")

no_punct = ""
for char in sent:
    if char not in punct:
        no_punct = no_punct + char

list = word_tokenize(no_punct.lower())
i = 0

postagged = nltk.pos_tag(list)

print('words:' + str(list))

for x in list:
    print('frame' + '(' + x + ',' + str(fn.frames_by_lemma(x)) + ')')

for i in range(len(postagged)):
    print('nltk_pos' + str(postagged[i]))

with open('morph.xml', 'rt') as f:
    tree = ElementTree.parse(f)

with open('types.xml', 'rt') as f2:
    tree2 = ElementTree.parse(f2)

for node in tree.iter('entry'):
    for x in list:
        name = node.attrib.get('word')
        pos = node.attrib.get('pos')
Esempio n. 31
0

from nltk.stem.wordnet import WordNetLemmatizer
lmtzr = WordNetLemmatizer()
lmtzr.lemmatize('humidity')


from nltk.stem.lancaster import LancasterStemmer
st = LancasterStemmer()
st.stem('luminous') 



lemma('humidity')

frames = fn.frames_by_lemma(r'skin')
for f in frames:
    print '%s - %s\n' % (f.name, f.definition)

fn.lexical_units(r'')
    
fn.frames_by_lemma(r'(?i)a little')    
    




for f in ('reflect', 'bank'):
    taxonomy.append(f, type='angle')

for f in ('bank', 'financial-institution'):