Esempio n. 1
0
def frame_chose():
    fs = fn.frames()
    fs_dic = {}
    fs_ID = []
    for f in fs:
        fs_ID.append(f.ID)
        fs_dic[f.name] = []
        lexes = f.lexUnit
        for lex in lexes:
            fs_dic[f.name].append(lexes[lex].name)

    fs_ID_copy = fs_ID
    result = []
    for f1 in fs_ID:
        fs_ID_copy.remove(f1)
        f1_name = fn.frame(f1).name
        set1 = set(fs_dic[f1_name])
        for f2 in fs_ID_copy:
            f2_name = fn.frame(f2).name
            set2 = set(fs_dic[f2_name])
            r = list(set1 & set2)
            result.append((f1_name, f2_name, r, len(r)))

    result = sorted(result, key=lambda x: (x[3]), reverse=True)

    frame_chose = []
    for r in result:
        if r[3] >= 10:
            frame_chose.append(r)

    return frame_chose
def map_terms_to_senses(frames):
    # for each frame
    for frame in frames:
        f = fn.frame(list(frame[0].keys())[0])
        # for each section in a single frame
        for index in range(len(frame)):
            # for a term in a single section
            for value in frame[index].keys():
                # if contains . or pos tag
                w = value
                if not is_normalidez(value):
                    w = normalizes_string(value)

                # FRAME NAME
                if index == FRAME_NAME:
                    # Ctx(w) f.definition
                    best_sense = wsd.lesk_algorithm(w, f.definition)
                    frame[index][value] = str(best_sense).strip()

                # FRAME ELEMENT
                if index == FRAME_ELEMENT:
                    # Ctx(w) = f.FE[value].definition
                    best_sense = wsd.lesk_algorithm(w, f.FE[value].definition)
                    frame[index][value] = str(best_sense).strip()

                # LEXICAL UNIT
                if index == LEXICAL_UNIT:
                    # Ctx(w) = f.lexUnit[value].definition
                    best_sense = wsd.lesk_algorithm(
                        w, f.lexUnit[value].definition)
                    frame[index][value] = str(best_sense).strip()

    return frames
 def getFrameSynset(self, synset):
     try:
         if synset in self.frame_dict:
             #print synset,self.frame_dict[synset]
             frame = fn.frame(self.frame_dict[synset].capitalize())
             return frame
     except:
         pass
     return None
Esempio n. 4
0
def main():
    frame1 = []
    frame2 = []
    accuracy = []

    fs = frame_chose()
    for r in fs:
        f1 = fn.frame(r[0])
        f2 = fn.frame(r[1])
        lex_list = r[2]
        acc = sent_chosen(f1, f2, lex_list)

        frame1.append(r[0])
        frame2.append(r[1])
        accuracy.append(acc)

    ja_dict = {"Frame1": frame1, "Frame2": frame2, "Accuracy": accuracy}
    df_ja = pd.DataFrame(data=ja_dict)
    df_ja.to_csv('lesk_output/frame_random.csv')
Esempio n. 5
0
    def findCoreType(self, wordList):
        dictim = []
        for word in wordList:
            word_ = '^{}$'.format(word)

            if len(fn.lus(word_)) > 0:
                ID = fn.lus(word_)[0].frame.ID
                dicti = [fename for fename, fe in fn.frame(ID).FE.items() if fe.coreType == 'Core']
                if len(dicti) > 0:
                    dictim.append(dicti[0])
        return dictim
 def get_frame_definition(self, frame):
     if nltk == True:
         frame_idx = self.fn17_idx[frame]
         f = fn.frame(frame_idx)
         definition = f.definition
         return definition
     else:
         print(
             'please install nltk FrameNet first. refer: http://www.nltk.org/howto/framenet.html'
         )
         return False
Esempio n. 7
0
def compare_frames(frame1, frame2):
    if frame1 == frame2:
        return 2
    frame_relations = []
    for relation in fn.frame(frame1).frameRelations:
        frame_relations.extend(
            [relation.superFrameName.lower(),
             relation.subFrameName.lower()])
    print(frame_relations)
    if frame2 in frame_relations:
        return 1
    else:
        return 0
Esempio n. 8
0
    def map(self, frame_name, slot_value, slot_type):
        """ Map a FrameNet frame to a wordnet sense.

        Args:
            frame_name (str): exact name of the frame (no regex expression)
            slot_value (str): value of the slot to map.
            slot_type (FrameNetSlotType): type of the slot to map

        Returns:
            nltk.corpus.wordnet.Synset: best wordnet sense for the given frame slot value.
        """
        synset_lemma = slot_value.split('.')[
            0]  # get rid of POS (eg. existence.n)
        frame = fn.frame(frame_name)

        return self._best_sense(frame, slot_value, slot_type, synset_lemma)
Esempio n. 9
0
def getFrameSetForStudent(surname, list_len=5):
    frameList = []
    nof_frames = len(fn.frames())
    base_idx = (
        abs(int(hashlib.sha512(surname.encode('utf-8')).hexdigest(), 16)) %
        nof_frames)
    print('\nstudent: ' + surname)
    framenet_IDs = get_frams_IDs()
    i = 0
    offset = 0
    seed(1)
    while i < list_len:
        fID = framenet_IDs[(base_idx + offset) % nof_frames]
        f = fn.frame(fID)
        fNAME = f.name
        print('\tID: {a:4d}\tframe: {framename}'.format(a=fID,
                                                        framename=fNAME))
        offset = randint(0, nof_frames)
        frameList.append(fID)
        i += 1
    return frameList
Esempio n. 10
0
doccollections = ['NYT_19980407','NYT_19980403','NYT_19980315','APW_19980429','APW_19980424','APW_19980314']

IN = re.compile(r'.*\bin\b(?!\b.+ing)')

for doccol in doccollections:
    for doc in nltk.corpus.ieer.parsed_docs(doccol):
        relations = nltk.sem.extract_rels('PER', 'LOC', doc, corpus='ieer', pattern = IN)
        for relation in relations:
            print nltk.sem.relextract.rtuple(relation)
            


f = fn.frames(r'(?i)perception')
len(fn.frames())
f = fn.frame(66)

f.ID
f.definition
set(f.lexUnit.keys())

[x.name for x in f.FE]

f.frameRelations


fn.frames_by_lemma(r'(?i)a little')



Esempio n. 11
0
def demo():
    from pprint import pprint
    from nltk.corpus import framenet as fn

    #
    # It is not necessary to explicitly build the indexes by calling
    # buildindexes(). We do this here just for demo purposes. If the
    # indexes are not built explicitely, they will be built as needed.
    #
    print('Building the indexes...')
    fn.buildindexes()

    #
    # Get some statistics about the corpus
    #
    print('Number of Frames:', len(fn.frames()))
    print('Number of Lexical Units:', len(fn.lexical_units()))
    print('Number of annotated documents:', len(fn.documents()))
    print()

    #
    # Frames
    #
    print(
        'getting frames whose name matches the (case insensitive) regex: "(?i)medical"'
    )
    medframes = fn.frames(r'(?i)medical')
    print('Found {0} Frames whose name matches "(?i)medical":'.format(
        len(medframes)))
    print([(f.name, f.ID) for f in medframes])

    #
    # store the first frame in the list of frames
    #
    tmp_id = medframes[0].ID
    m_frame = fn.frame(tmp_id)  # reads all info for the frame

    #
    # get the frame relations
    #
    print(
        '\nNumber of frame relations for the "{0}" ({1}) frame:'.format(
            m_frame.name, m_frame.ID), len(m_frame.frameRelation))
    for fr in m_frame.frameRelation:
        print('   ', fr.type + ":", fr.relatedFrame)

    #
    # get the names of the Frame Elements
    #
    print(
        '\nNumber of Frame Elements in the "{0}" frame:'.format(m_frame.name),
        len(m_frame.FE))
    print('   ', [x.name for x in m_frame.FE])

    #
    # get the names of the "Core" Frame Elements
    #
    print('\nThe "core" Frame Elements in the "{0}" frame:'.format(
        m_frame.name))
    print('   ', [x.name for x in m_frame.FE if x.coreType == "Core"])

    #
    # get all of the Lexical Units that are incorporated in the
    # 'Ailment' FE of the 'Medical_conditions' frame (id=239)
    #
    print('\nAll Lexical Units that are incorporated in the "Ailment" FE:')
    m_frame = fn.frame(239)
    ailment_lus = [x for x in m_frame.lexUnit if x.incorporatedFE == 'Ailment']
    print([x.name for x in ailment_lus])

    #
    # get all of the Lexical Units for the frame
    #
    print('\nNumber of Lexical Units in the "{0}" frame:'.format(m_frame.name),
          len(m_frame.lexUnit))
    print('  ', [x.name for x in m_frame.lexUnit[:5]], '...')

    #
    # get basic info on the second LU in the frame
    #
    tmp_id = m_frame.lexUnit[1].ID  # grab the id of the second LU
    luinfo = fn.lu_basic(tmp_id)  # get basic info on the LU
    print('\nInformation on the LU: {0}'.format(luinfo.name))
    pprint(luinfo)

    #
    # Get a list of all of the corpora used for fulltext annotation
    #
    print('\nNames of all of the corpora used for fulltext annotation:')
    allcorpora = set([x.corpname for x in fn.documents()])
    pprint(list(allcorpora))

    #
    # Get the names of the annotated documents in the first corpus
    #
    firstcorp = list(allcorpora)[0]
    firstcorp_docs = fn.documents(firstcorp)
    print('\nNames of the annotated documents in the "{0}" corpus:'.format(
        firstcorp))
    pprint([x.filename for x in firstcorp_docs])

    #
    # Search for frames containing LUs whose name attribute matches a
    # regexp pattern.
    #
    # Note: if you were going to be doing a lot of this type of
    #       searching, you'd want to build an index that maps from
    #       lemmas to frames because each time frames_by_lemma() is
    #       called, it has to search through ALL of the frame XML files
    #       in the db.
    print(
        '\nSearching for all Frames that have a lemma that matches the regexp: "^run.v$":'
    )
    pprint(fn.frames_by_lemma(r'^run.v$'))
Esempio n. 12
0
                    list(range(len(lu_nameID_dict.keys()))),
                    key=i)
                for LU in LU_choice_number:
                    selectedLU = lu_list[LU]
                    if selectedLU.split(
                    )[3][0:2] == "ID":  #To account for more than one word LU
                        lu_ID = int(selectedLU.split()[3][:-1].replace(
                            "ID=", ""))
                    else:
                        lu_ID = int(selectedLU.split()[4][:-1].replace(
                            "ID=", ""))
                    lu_name = lu_nameID_dict[lu_ID]
                    st.write("You selected LU: ", LU, ".", lu_name)

                    associatedFrame = fn.lu(lu_ID).frame.name
                    lu_frame = fn.frame(associatedFrame)
                    st.write("Frame: ", associatedFrame)
                    st.write("Reference: ", lu_frame.URL)
                    FE_list = []
                    for element in lu_frame.FE:
                        FE_list.append(element)
                    st.write("Frame Elemenet(s): ")
                    st.write(FE_list)
                    st.write("Annotator Summary:")
                    st.write("File Annotated:", filename.name)
                    st.write("Selected Key:", key_list[q])
                    st.write("JSON PATH: ", jpath[q - 1][0])
                    st.write("Selected LU:", lu_name)
                    st.write("LU's Frame:", associatedFrame, "(", lu_frame.URL,
                             ")")
Esempio n. 13
0
import json
import kfn
import pprint
from nltk.corpus import framenet as fn

#get all lus
lus = kfn.lus()
print(len(lus))

#get lus
lus = kfn.lus_by_lemma('나누다')
print(lus)

#get lu by lu_id
lu = kfn.lu(lus[0]['lu_id'])
pprint.pprint(lu)

frame_id = lu['fid']
f = fn.frame(frame_id)
print(f.name)
print(f.definition)

#get annotations by lu_id
annotations = kfn.annotation(lus[0]['lu_id'])
print(annotations)
Esempio n. 14
0
            result.append((f1_name, f2_name, r, len(r)))

    result = sorted(result, key=lambda x: (x[3]), reverse=True)

    frame_chose = []
    for r in result:
        if r[3] >= 10:
            frame_chose.append(r)

    return frame_chose


if __name__ == '__main__':
    fs = frame_chose()
    for r in fs:
        f1 = fn.frame(r[0])
        f2 = fn.frame(r[1])
        lex_list = r[2]
        ls1 = f1.lexUnit
        ls2 = f2.lexUnit

        sents = []
        frames = []
        for l in lex_list:
            l_list = l.split('.')
            ss1 = ls1[l].exemplars
            ss2 = ls2[l].exemplars

            for s in ss1:
                sents.append(s.text)
            for s in ss2:
Esempio n. 15
0
                  try:
                        # Query FrameNet -- frame names
                        frames = fn.frames_by_lemma(lemma) ; framenames = ""
                        for frame in frames:
#                           print frame.name,
#                           print len(frames)
#                           continue
                            # Cutoff point
                            if len(frames) > numframes : continue
                            framenames = "".join([framenames,"|",frame.name])
                        if framenames != "": print "".join([field[0],"|",field[1],"|FRM_01|",lemma,framenames])

                        # Core Frame Elements
                        for frame in frames:
                            if len(frames) > numframes : continue
                            ID = frame.ID ; framecores = ""
                            cores = [(fename,fe.ID) for fename,fe in fn.frame(ID).FE.items() if fe.coreType=='Core']
                            for core in cores: framecores = "".join([framecores,"|",core[0]])
                            if framecores != "": print "".join([field[0],"|",field[1],"|FRM_02|",frame.name,framecores])
                  except (AttributeError, nltk.corpus.reader.framenet.FramenetError):
                        continue
        except (UnicodeDecodeError, UnicodeEncodeError, IndexError, AssertionError):
          # Tag failed UTF-8 lines NA to enable repair
          print "".join([field[0],"|",field[1],"|FRM_01","|NA"])
          continue

# Clean up
fp.close()

# EOF
Esempio n. 16
0

def addframeRelations(frame):
	for frameRelation in frame.frameRelations:
		if 'Parent' in frameRelation:
			parentKatum=frame_.get(frameRelation.Parent.name)
			childKatum=frame_.get(frameRelation.Child.name)
			childKatum._is(frameRelations,False)
			childKatum._is(parentKatum,False)
			parentKatum._is(frameRelations,False)


katum.load('wordnet-verbnet-framenet-noframerelations.datum', atum())
generalThing = datum.thing
framenetRoot=generalThing.find("framenet")
frame_=framenetRoot.find("frame")
for frame in frame_.I:
	framenetFrame=fn.frame(frame.O)
	for frameRelation in framenetFrame.frameRelations:
		if 'Parent' in frameRelation:
			parentKatum=frame_.find(frameRelation.Parent.name)
			if 'Child' in frameRelation:
				childKatum=frame_.find(frameRelation.Child.name)
				if(parentKatum!=None and childKatum!=None):			
					childKatum._is(parentKatum,False)			




generalThing.save('wordnet-verbnet-framenet-fr.datum')
Esempio n. 17
0
                                framenames = "".join(
                                    [framenames, "|", frame.name])
                            if framenames != "":
                                print "".join([
                                    field[0], "|", field[1], "|FRM_01|", lemma,
                                    framenames
                                ])

                            # Core Frame Elements
                            for frame in frames:
                                if len(frames) > numframes: continue
                                ID = frame.ID
                                framecores = ""
                                cores = [
                                    (fename, fe.ID)
                                    for fename, fe in fn.frame(ID).FE.items()
                                    if fe.coreType == 'Core'
                                ]
                                for core in cores:
                                    framecores = "".join(
                                        [framecores, "|", core[0]])
                                if framecores != "":
                                    print "".join([
                                        field[0], "|", field[1], "|FRM_02|",
                                        frame.name, framecores
                                    ])
                        except (AttributeError,
                                nltk.corpus.reader.framenet.FramenetError):
                            continue
            except (UnicodeDecodeError, UnicodeEncodeError, IndexError,
                    AssertionError):
Esempio n. 18
0
fn.lus('prejudice.n')[0].frame.frameRelations == fn.frame_relations('Partiality')


fn.lus('look.n')[0].frame
fn.lus('look.n')[1].frame


for f in fn.lus('look.n'):
    print f.frame.name


result = fn.frames(r'(?i)erception')

print result
f = fn.frame(1301)

f.ID
f.definition
for u in f.lexUnit:
    print u

fn.lexical_units('r(?i)look')


from pattern.en import wordnet


[x for x in f.FE]
f.frameRelations
Esempio n. 19
0
def get_coretype(frame_name, fe):
    """Return the core type of the given FE from the set of string types
    {'Core', 'Peripheral', 'Extra-Thematic'}
    """
    return framenet.frame(frame_name).FE[fe].coreType
Esempio n. 20
0
def demo():
    from pprint import pprint
    from nltk.corpus import framenet as fn

    #
    # It is not necessary to explicitly build the indexes by calling
    # buildindexes(). We do this here just for demo purposes. If the
    # indexes are not built explicitely, they will be built as needed.
    #
    print('Building the indexes...')
    fn.buildindexes()

    #
    # Get some statistics about the corpus
    #
    print('Number of Frames:', len(fn.frames()))
    print('Number of Lexical Units:', len(fn.lexical_units()))
    print('Number of annotated documents:', len(fn.documents()))
    print()

    #
    # Frames
    #
    print('getting frames whose name matches the (case insensitive) regex: "(?i)medical"')
    medframes = fn.frames(r'(?i)medical')
    print(
        'Found {0} Frames whose name matches "(?i)medical":'.format(len(medframes)))
    print([(f.name, f.ID) for f in medframes])

    #
    # store the first frame in the list of frames
    #
    tmp_id = medframes[0].ID
    m_frame = fn.frame(tmp_id)  # reads all info for the frame

    #
    # get the frame relations
    #
    print(
        '\nNumber of frame relations for the "{0}" ({1}) frame:'.format(m_frame.name,
                                                                        m_frame.ID),
        len(m_frame.frameRelation))
    for fr in m_frame.frameRelation:
        print('   ', fr.type + ":", fr.relatedFrame)

    #
    # get the names of the Frame Elements
    #
    print(
        '\nNumber of Frame Elements in the "{0}" frame:'.format(m_frame.name),
        len(m_frame.FE))
    print('   ', [x.name for x in m_frame.FE])

    #
    # get the names of the "Core" Frame Elements
    #
    print(
        '\nThe "core" Frame Elements in the "{0}" frame:'.format(m_frame.name))
    print('   ', [x.name for x in m_frame.FE if x.coreType == "Core"])

    #
    # get all of the Lexical Units that are incorporated in the
    # 'Ailment' FE of the 'Medical_conditions' frame (id=239)
    #
    print('\nAll Lexical Units that are incorporated in the "Ailment" FE:')
    m_frame = fn.frame(239)
    ailment_lus = [x for x in m_frame.lexUnit if x.incorporatedFE == 'Ailment']
    print([x.name for x in ailment_lus])

    #
    # get all of the Lexical Units for the frame
    #
    print('\nNumber of Lexical Units in the "{0}" frame:'.format(m_frame.name),
          len(m_frame.lexUnit))
    print('  ', [x.name for x in m_frame.lexUnit[:5]], '...')

    #
    # get basic info on the second LU in the frame
    #
    tmp_id = m_frame.lexUnit[1].ID  # grab the id of the second LU
    luinfo = fn.lu_basic(tmp_id)  # get basic info on the LU
    print('\nInformation on the LU: {0}'.format(luinfo.name))
    pprint(luinfo)

    #
    # Get a list of all of the corpora used for fulltext annotation
    #
    print('\nNames of all of the corpora used for fulltext annotation:')
    allcorpora = set([x.corpname for x in fn.documents()])
    pprint(list(allcorpora))

    #
    # Get the names of the annotated documents in the first corpus
    #
    firstcorp = list(allcorpora)[0]
    firstcorp_docs = fn.documents(firstcorp)
    print(
        '\nNames of the annotated documents in the "{0}" corpus:'.format(firstcorp))
    pprint([x.filename for x in firstcorp_docs])

    #
    # Search for frames containing LUs whose name attribute matches a
    # regexp pattern.
    #
    # Note: if you were going to be doing a lot of this type of
    #       searching, you'd want to build an index that maps from
    #       lemmas to frames because each time frames_by_lemma() is
    #       called, it has to search through ALL of the frame XML files
    #       in the db.
    print('\nSearching for all Frames that have a lemma that matches the regexp: "^run.v$":')
    pprint(fn.frames_by_lemma(r'^run.v$'))
Esempio n. 21
0
def get_coretype(frame_name, fe):
    """Return the core type of the given FE from the set of string types
    {'Core', 'Peripheral', 'Extra-Thematic'}
    """
    return framenet.frame(frame_name).FE[fe].coreType