Ejemplo n.º 1
0
def check(a, b):
    """Used to check for common frames of given two words using the FrameNet"""
    if a not in dictionary:
        dictionary[a] = set([lu.frame.name for lu in fn.lus(name=r'(^|\s)%s(\s.+)?\.%s' % a)])
    if b not in dictionary:
        dictionary[b] = set([lu.frame.name for lu in fn.lus(name=r'(^|\s)%s(\s.+)?\.%s' % b)])

    return len(dictionary[a].intersection(dictionary[b])) > 0
Ejemplo n.º 2
0
def check(a, b):
    """Used to check for common frames of given two words using the FrameNet"""
    if a not in dictionary:
        dictionary[a] = set(
            [lu.frame.name for lu in fn.lus(name=r'(^|\s)%s(\s.+)?\.%s' % a)])
    if b not in dictionary:
        dictionary[b] = set(
            [lu.frame.name for lu in fn.lus(name=r'(^|\s)%s(\s.+)?\.%s' % b)])

    return len(dictionary[a].intersection(dictionary[b])) > 0
Ejemplo n.º 3
0
    def findCoreType(self, wordList):
        dictim = []
        for word in wordList:
            word_ = '^{}$'.format(word)

            if len(fn.lus(word_)) > 0:
                ID = fn.lus(word_)[0].frame.ID
                dicti = [fename for fename, fe in fn.frame(ID).FE.items() if fe.coreType == 'Core']
                if len(dicti) > 0:
                    dictim.append(dicti[0])
        return dictim
Ejemplo n.º 4
0
def get_frames():
    lus = framenet.lus()
    print('num lus', len(lus))

    some_lu = random.choice(lus)
    print('Some LU:', some_lu.name, some_lu.POS, some_lu.frame.name)

    lus2frames = defaultdict(set)

    for lu in lus:
        lus2frames[lu.name].add(lu.frame.name)

    frames = chain.from_iterable(lus2frames.values())
    frames = sorted(set(frames))
    print('num frames', len(frames))


    # In[18]:

    mlb = MultiLabelBinarizer()
    mlb.fit(lus2frames.values())
    # lb = LabelBinarizer()
    # lb.fit(frames)

    return lus, lus2frames, frames, mlb
Ejemplo n.º 5
0
 def hieve_nltk_verbs(self, wildcard='<UNK>'):
     import nltk
     nltk.download('propbank')
     nltk.download('framenet_v17')
     from nltk.corpus import propbank
     from nltk.corpus import framenet as fn
     verbs = [x.lower() for x in propbank.verbs()]
     for i in range(len(fn.lus())):
         try:
             x = fn.lu(i).name[:-2].lower()
             x = x[:x.rindex('.')]
             verbs.append(x)
         except:
             pass
     verbs = set(verbs)
     verbs |= self.event_vocab
     for tt in self.text:
         rb = []
         for x in tt:
             if x not in verbs:
                 rb.append(wildcard)
             else:
                 rb.append(x)
         self.text_verbs.append(rb)
     print("Tokenized hieve text with FrameNet and PropBank verbs.")
Ejemplo n.º 6
0
def frames(words):
	regex = r"(?i)"+words
	
	#print len(f)
	"""for w in f:
		name = w.name.split(".")[0];
		if words == name:
			print w.name, w.ID"""
	"""for w in names:
		if word == w:
			print w
			a = True
			print "word found" """
	f = fn.lus()
	word = words + ".n"
	nameSet = set()
	names = set([w.name for w in f])
	found_in_framenet = word in names
		
	if found_in_framenet:
            print "found"
            print found_in_framenet
        else:
            print "not found"	
		

	"""if(a):
Ejemplo n.º 7
0
def frame_count(verb: str) -> int:
    """ Counts the amount of evoked frames in FrameNet per verb.

    :param verb: String. Input verb for which the amount of evoked frames will be counted
    :return: Integer. Amount of evoked frames
    """
    verb_regex = regex(verb)
    frames = fn.lus(verb_regex)  # Returns a list with all frames evoked by the verb.
    return len(frames)
Ejemplo n.º 8
0
def getLU(phrase, tag):
    lus = fn.lus(r'(?i)%s'%phrase)
    if len(lus) == 0:
        return None
    #print "Lus: ",
    #print [x['name'] for x in lus][:min(20, len(lus))]
    exactlu = exactLU(phrase, lus)
    if exactlu:
        return exactlu
    else:
        #print "mulLus: ",
        #print [x['name'] for x in lus][:min(10, len(lus))]
        return rndLU(phrase, tag, lus)
Ejemplo n.º 9
0
def getLU(phrase, tag):
    lus = fn.lus(r'(?i)%s' % phrase)
    if len(lus) == 0:
        return None
    #print "Lus: ",
    #print [x['name'] for x in lus][:min(20, len(lus))]
    exactlu = exactLU(phrase, lus)
    if exactlu:
        return exactlu
    else:
        #print "mulLus: ",
        #print [x['name'] for x in lus][:min(10, len(lus))]
        return rndLU(phrase, tag, lus)
Ejemplo n.º 10
0
def getluHash():
    luHash = {} #luname:luids
    for x in fn.lus():
        #name = x['name'][:x['name'].rfind(".")]
        name = x['name']
        id = x['ID']
        #id = x['frame']['ID']
        if name not in luHash:
            luHash[name] = [id]
        else:
            if id not in luHash[name]:
                luHash[name].append(id)
    print "..All LUs in FrameNet have been loaded into dict. #Item: " + str(len(luHash))
    return luHash
Ejemplo n.º 11
0
def getluHash():
    luHash = {}  #luname:luids
    for x in fn.lus():
        #name = x['name'][:x['name'].rfind(".")]
        name = x['name']
        id = x['ID']
        #id = x['frame']['ID']
        if name not in luHash:
            luHash[name] = [id]
        else:
            if id not in luHash[name]:
                luHash[name].append(id)
    print "..All LUs in FrameNet have been loaded into dict. #Item: " + str(
        len(luHash))
    return luHash
Ejemplo n.º 12
0
def invoke_frame(token: str):
    word = token.lower()
    lu_list = [(i.name, i.definition) for i in fn.lus()]
    lu_temp = set([i for i in lu_list if word == i[0].split('.')[0]])
    frames = []

    for lu, def_ in lu_temp:
        fr = fn.frames_by_lemma(r'(?i)' + lu)
        # print(len(fr), fr[0].ID)
        if len(frames) == 0:
            frames.append(fr[0])
        else:
            if fr[0] not in frames:
                frames.append(fr[0])

    return frames
Ejemplo n.º 13
0
def get_lu_instance(verb: str, rand=False) -> object:
    """ Retrieves a Lexical Unit Object from FrameNet given a verb.

    Note: If several Lexical Units for the given verb exist, and rand is set to True (default), a random Lexical Unit
    will be retrieved. If rand is set to False, the first entry of Lexical Units will be retrieved.

    :param verb: String. A verb for which the Lexical Unit shall be retrieved
    :param rand: Boolean. If True, returned object will be chosen pseudo randomly. Else, first element will be returned
    :return: Object: Lexical Unit Object which can be processed within the FrameNet API
    """
    lu = regex(verb)
    lus_list = fn.lus(lu)
    if rand is False:
        return lus_list[0]
    amount_lus = len(lus_list)
    random_index = random.randint(0, amount_lus-1)
    return lus_list[random_index]
Ejemplo n.º 14
0
def map_cfs_lus(verbs: list, cfs: dict) -> dict:
    """Maps the Connotation Frames to the Lexical Units in FrameNet.

    Note: The distinction between ambiguous verbs and unambiguous verbs has to be made before. So the input list 'verbs'
    should already be filtered.

    The return dict looks like this:
    all verbs: { ( "verb", (Lexical Unit IDs) ) : {Connotation Frame} }
    unambiguous verbs: { ( "verb", Lexical Unit ID ) : {Connotation Frame} }

    :param verbs: List. Common words that occur both in the Connotation Frame Lexicon and in FrameNet
    :param cfs: Dictionary. Keys are verbs as strings, values are the Connotation Frames as nested dictionaries
    :return:
    """
    mapping = {}

    for verb in verbs:
        connotation_frame = cfs[verb]

        key_information = []
        key_information.append(verb)

        verb_regex = fn_pre.regex(verb)
        lus = fn.lus(verb_regex)

        if len(lus) == 1:
            lu = lus[0].ID
            key_information.append(
                lu
            )  # Distinction between single occurences and multiple occurences is crucial,
            # otherwise one will get an exception
        else:
            int_lus = []
            for lu in lus:
                int_lus.append(lu.ID)

            key_information.append(tuple(int_lus))

        information = tuple(key_information)

        mapping[information] = connotation_frame

    return mapping
Ejemplo n.º 15
0
    def get_frames(pos_tags: Iterable, frame_cache: dict, verbose: bool = False) -> set:
        results = set()

        # iterate through each token, and create a dict of token -> words
        for token, pos in pos_tags:

            search_word = token.lower()

            # Ignore single-letter words and stopwords
            if pos[0] == ['N'] or len(search_word) < 2 or search_word in STOPWORDS:
                continue

            # Get wordnet-pos. Ignore words with no wordnet pos tag
            pos = nlp.get_wordnet_pos(pos)
            if pos == '':
                # add search_word to results set if eligible
                normalized_token = nlp.normalize_text(search_word, lemmatize=False, ignore_num=True)
                if len(normalized_token) > 1:
                    results.add(normalized_token)
                continue

            # If lemma is not a stop-word, use that instead of lowercase token
            lemma = LEMMATIZER.lemmatize(search_word, pos)
            if lemma not in STOPWORDS:
                search_word = lemma

            # Get lexical units matching the search word and pos
            search_word = nlp.normalize_text(search_word, lemmatize=False, ignore_num=True).replace('.', '')

            # Load frames for missing tokens from FrameNet if it does not exist in cache
            key = '%s__%s' % (search_word, pos)
            if key not in frame_cache:
                frame_cache[key] = sorted(
                    set(lu.frame.name for lu in fn.lus(r'(?i)(^|\s)(%s)(\s.+)?\.%s' % (search_word, pos))))

            # add the frames from current key to the results set
            results.update(frame_cache[key])
        if verbose:
            print('Frames: %d' % len(results))
        else:
            print('.', end='', flush=True)
        return results
Ejemplo n.º 16
0
def intersect_lemmas_with_framenet(corpus_lemmas, wikidata_properties):
    """
     Intersect verb lemmas extracted from the input corpus with FrameNet Lexical Units (LUs).

     :param dict corpus_lemmas: dict of verb lemmas with their ranking scores
     :param dict wikidata_properties: dict with all Wikidata properties
     :return: a dictionary of corpus lemmas enriched with FrameNet LUs data (dicts)
     :rtype: dict
    """
    # Each FrameNet LU triggers one frame, so assign them to the same corpus lemma
    enriched = defaultdict(list)
    for corpus_lemma, score in corpus_lemmas.iteritems():
        # Look up the FrameNet LUs given the corpus lemma
        # Ensure exact match, as the lookup can be done only via regex
        lus = framenet.lus(r'^%s\.' % corpus_lemma)
        if lus:
            logger.debug("Found %d FrameNet Lexical Units (LUs) that match the corpus lemma '%s': %s" % (
                len(lus), corpus_lemma, lus))
            for lu in lus:
                lu_label = lu['name']
                # Skip non-verbal LUs
                if lu['POS'] != 'V':
                    logger.debug("Skipping non-verbal LU '%s' ..." % lu_label)
                    continue
                logger.debug("Processing FrameNet LU '%s' ..." % lu_label)
                frame = lu['frame']
                frame_label = frame['name']
                core_fes = []
                extra_fes = []
                logger.debug("Processing Frame Elements (FEs) ...")
                fes = frame['FE']
                for fe_label, fe_data in fes.iteritems():
                    # Skip numerical FEs
                    if fe_label in NUMERICAL_FES:
                        logger.debug("Skipping numerical FE '%s', frame '%s' ..." % (fe_label, frame_label))
                        continue
                    mapping = defaultdict(list)
                    # Compute exact matches between FEs and Wikidata properties labels and aliases
                    for pid, p_label_and_aliases in wikidata_properties.iteritems():
                        # Lowercase for better matching
                        p_label = p_label_and_aliases['label'].lower()
                        p_aliases = [p_alias.lower() for p_alias in p_label_and_aliases.get('aliases', [])]
                        fe = fe_label.lower()
                        if fe == p_label:
                            logger.debug("FE '%s' maps to '%s' label '%s'" % (fe_label, pid, p_label))
                            mapping[pid].append(p_label_and_aliases)
                        elif p_aliases and fe in p_aliases:
                            logger.debug("FE '%s' maps to one of '%s' aliases: %s" % (fe_label, pid, p_aliases))
                            mapping[pid].append(p_label_and_aliases)
                    fe_type = fe_data['coreType']
                    semantic_type_object = fe_data['semType']
                    semantic_type = semantic_type_object['name'] if semantic_type_object else None
                    to_be_added = {
                        'fe': fe_label,
                        'type': fe_type,
                        'semantic_type': semantic_type,
                        'mapping': mapping
                    }
                    if fe_type == 'Core':
                        core_fes.append(to_be_added)
                    else:
                        extra_fes.append(to_be_added)
                # Skip frames with no mapping to Wikidata
                if not core_fes and not extra_fes:
                    logger.debug("No '%s' FEs could be mapped to Wikidata. Skipping ..." % frame_label)
                    continue
                logger.debug("Core FEs: %s" % core_fes)
                logger.debug("Extra FEs: %s" % extra_fes)
                intersected_lu = {
                    'lu': lu_label,
                    'frame': frame_label,
                    'pos': lu['POS']
                }
                if core_fes:
                    intersected_lu['core_fes'] = core_fes
                if extra_fes:
                    intersected_lu['extra_fes'] = extra_fes
                enriched[score].append(intersected_lu)
                logger.debug("Corpus lemma '%s' enriched with frame data: %s" %
                             (corpus_lemma, json.dumps(intersected_lu, indent=2)))
    # Order by decreasing score
    return OrderedDict(sorted(enriched.items(), key=lambda x: x[0], reverse=True))
Ejemplo n.º 17
0
def hand_engineering(prot, batch_size, data, data_dev):
    '''
        Hand engineered feature extraction. Supports the following - UD,
        Verbnet classids, Wordnet supersenses, concreteness ratings, LCS
        eventivity scores
    '''
    home = expanduser("~")
    framnet_posdict = {
        'V': 'VERB',
        'N': 'NOUN',
        'A': 'ADJ',
        'ADV': 'ADV',
        'PREP': 'ADP',
        'NUM': 'NUM',
        'INTJ': 'INTJ',
        'ART': 'DET',
        'C': 'CCONJ',
        'SCON': 'SCONJ',
        'PRON': 'PRON',
        'IDIO': 'X',
        'AVP': 'ADV'
    }
    # Load the features
    features = {}
    with open(home + '/Desktop/protocols/data/features-2.tsv', 'r') as f:
        for line in f.readlines():
            feats = line.split('\t')
            features[feats[0]] = (feats[1].split(), feats[2].split())

    # Load the predpatt objects for creating features
    files = [
        '/Downloads/UD_English-r1.2/en-ud-train.conllu',
        '/Downloads/UD_English-r1.2/en-ud-dev.conllu',
        '/Downloads/UD_English-r1.2/en-ud-test.conllu'
    ]
    home = expanduser("~")
    options = PredPattOpts(resolve_relcl=True,
                           borrow_arg_for_relcl=True,
                           resolve_conj=False,
                           cut=True)  # Resolve relative clause
    patt = {}

    for file in files:
        path = home + file
        with open(path, 'r') as infile:
            for sent_id, ud_parse in load_conllu(infile.read()):
                patt[file[33:][:-7] + " " + sent_id] = PredPatt(ud_parse,
                                                                opts=options)

    data['Structure'] = data['Split.Sentence.ID'].map(lambda x:
                                                      (patt[x], features[x]))
    data_dev['Structure'] = data_dev['Split.Sentence.ID'].map(
        lambda x: (patt[x], features[x]))

    raw_x = data['Structure'].tolist()
    raw_dev_x = data_dev['Structure'].tolist()

    all_x = raw_x + raw_dev_x
    all_feats = '|'.join(['|'.join(all_x[i][1][0]) for i in range(len(all_x))])
    feature_cols = Counter(all_feats.split('|'))

    # All UD dataset features
    all_ud_feature_cols = list(
        feature_cols.keys()) + [(a + "_dep") for a in feature_cols.keys()]

    # Concreteness
    f = open(home + '/Desktop/protocols/data/concrete.pkl', 'rb')
    concreteness = pickle.load(f)
    if prot == 'arg':
        conc_cols = ['concreteness']
    else:
        conc_cols = ['concreteness', 'max_conc', 'min_conc']
    f.close()

    # LCS eventivity
    from lcsreader import LexicalConceptualStructureLexicon
    lcs = LexicalConceptualStructureLexicon(
        home + '/Desktop/protocols/data/verbs-English.lcs')
    lcs_feats = ['lcs_eventive', 'lcs_stative']

    # Wordnet supersenses(lexicographer names)
    supersenses = list(
        set(['supersense=' + x.lexname() for x in wordnet.all_synsets()]))

    # Framenet
    lem2frame = {}
    for lm in framenet.lus():
        for lemma in lm['lexemes']:
            lem2frame[lemma['name'] + '.' +
                      framnet_posdict[lemma['POS']]] = lm['frame']['name']
    frame_names = ['frame=' + x.name for x in framenet.frames()]

    # Verbnet classids
    verbnet_classids = ['classid=' + vcid for vcid in verbnet.classids()]

    # Lexical features
    lexical_feats = [
        'can', 'could', 'should', 'would', 'will', 'may', 'might', 'must',
        'ought', 'dare', 'need'
    ] + [
        'the', 'an', 'a', 'few', 'another', 'some', 'many', 'each', 'every',
        'this', 'that', 'any', 'most', 'all', 'both', 'these'
    ]

    dict_feats = {}
    for f in verbnet_classids + lexical_feats + supersenses + frame_names + lcs_feats + all_ud_feature_cols + conc_cols:
        dict_feats[f] = 0

    x_pd = pd.DataFrame([
        features_func(sent_feat=sent,
                      token=token,
                      lemma=lemma,
                      dict_feats=dict_feats.copy(),
                      prot=prot,
                      concreteness=concreteness,
                      lcs=lcs,
                      l2f=lem2frame) for sent, token, lemma in
        zip(raw_x, data['Root.Token'].tolist(), data['Lemma'].tolist())
    ])

    dev_x_pd = pd.DataFrame([
        features_func(sent_feat=sent,
                      token=token,
                      lemma=lemma,
                      dict_feats=dict_feats.copy(),
                      prot=prot,
                      concreteness=concreteness,
                      lcs=lcs,
                      l2f=lem2frame)
        for sent, token, lemma in zip(raw_dev_x, data_dev['Root.Token'].tolist(
        ), data_dev['Lemma'].tolist())
    ])

    # Figure out which columns to drop(they're always zero)
    todrop1 = dev_x_pd.columns[(dev_x_pd == 0).all()].values.tolist()
    todrop = x_pd.columns[(x_pd == 0).all()].values.tolist()
    intdrop = [a for a in todrop if a not in todrop1]
    cols_to_drop = cols_to_drop = list(set(todrop) - set(intdrop))

    x = x_pd.drop(cols_to_drop, axis=1).values.tolist()
    dev_x = dev_x_pd.drop(cols_to_drop, axis=1).values.tolist()

    x = [[a[:] for a in x[i:i + batch_size]]
         for i in range(0, len(data), batch_size)]
    dev_x = [[a[:] for a in dev_x[i:i + batch_size]]
             for i in range(0, len(data_dev), batch_size)]
    return x, dev_x
generalThing = datum.thing
framenetRoot=generalThing.find("framenet")
frameElement=framenetRoot.find("frame element")
lexicalUnit=framenetRoot.find("lexical unit")
semType=framenetRoot.find("semantic type")
id_=framenetRoot.find("id")
frames=framenetRoot.find("frame")

for fE in fn.fes():
	if fE.semType!=None:
		semanticTypeKatum=exactSemType(fE.semType)
		frameElementkatum=exactFE(fE)
		if(semanticTypeKatum!=None and frameElementkatum!=None):
			frameElementkatum._is(semanticTypeKatum,False)

for lU in fn.lus():
	if len(lU.semTypes)!=0:
		for semTypeInstance in lU.semTypes:
			semanticTypeKatum=exactSemType(semTypeInstance)
			lUkatum=exactlU(lU)
			if(semanticTypeKatum!=None and lUkatum!=None):
				lUkatum._is(semanticTypeKatum,False)

for frame in fn.frames():
	if len(frame.semTypes)!=0:
		for semTypeInstance in frame.semTypes:
			semanticTypeKatum=exactSemType(semTypeInstance)
			frameKatum=exactFrame(frame)
			if(semanticTypeKatum!=None and frameKatum!=None):
				frameKatum._is(semanticTypeKatum,False)
Ejemplo n.º 19
0
def intersect_lemmas_with_framenet(corpus_lemmas, wikidata_properties):
    """
     Intersect verb lemmas extracted from the input corpus with FrameNet Lexical Units (LUs).

     :param dict corpus_lemmas: dict of verb lemmas with their ranking scores
     :param dict wikidata_properties: dict with all Wikidata properties
     :return: a dictionary of corpus lemmas enriched with FrameNet LUs data (dicts)
     :rtype: dict
    """
    # Each FrameNet LU triggers one frame, so assign them to the same corpus lemma
    enriched = defaultdict(list)
    for corpus_lemma, score in corpus_lemmas.iteritems():
        # Look up the FrameNet LUs given the corpus lemma
        # Ensure exact match, as the lookup can be done only via regex
        lus = framenet.lus(r'^%s\.' % corpus_lemma)
        if lus:
            logger.debug(
                "Found %d FrameNet Lexical Units (LUs) that match the corpus lemma '%s': %s"
                % (len(lus), corpus_lemma, lus))
            for lu in lus:
                lu_label = lu['name']
                # Skip non-verbal LUs
                if lu['POS'] != 'V':
                    logger.debug("Skipping non-verbal LU '%s' ..." % lu_label)
                    continue
                logger.debug("Processing FrameNet LU '%s' ..." % lu_label)
                frame = lu['frame']
                frame_label = frame['name']
                core_fes = []
                extra_fes = []
                logger.debug("Processing Frame Elements (FEs) ...")
                fes = frame['FE']
                for fe_label, fe_data in fes.iteritems():
                    # Skip numerical FEs
                    if fe_label in NUMERICAL_FES:
                        logger.debug(
                            "Skipping numerical FE '%s', frame '%s' ..." %
                            (fe_label, frame_label))
                        continue
                    mapping = defaultdict(list)
                    # Compute exact matches between FEs and Wikidata properties labels and aliases
                    for pid, p_label_and_aliases in wikidata_properties.iteritems(
                    ):
                        # Lowercase for better matching
                        p_label = p_label_and_aliases['label'].lower()
                        p_aliases = [
                            p_alias.lower()
                            for p_alias in p_label_and_aliases.get(
                                'aliases', [])
                        ]
                        fe = fe_label.lower()
                        if fe == p_label:
                            logger.debug("FE '%s' maps to '%s' label '%s'" %
                                         (fe_label, pid, p_label))
                            mapping[pid].append(p_label_and_aliases)
                        elif p_aliases and fe in p_aliases:
                            logger.debug(
                                "FE '%s' maps to one of '%s' aliases: %s" %
                                (fe_label, pid, p_aliases))
                            mapping[pid].append(p_label_and_aliases)
                    fe_type = fe_data['coreType']
                    semantic_type_object = fe_data['semType']
                    semantic_type = semantic_type_object[
                        'name'] if semantic_type_object else None
                    to_be_added = {
                        'fe': fe_label,
                        'type': fe_type,
                        'semantic_type': semantic_type,
                        'mapping': mapping
                    }
                    if fe_type == 'Core':
                        core_fes.append(to_be_added)
                    else:
                        extra_fes.append(to_be_added)
                # Skip frames with no mapping to Wikidata
                if not core_fes and not extra_fes:
                    logger.debug(
                        "No '%s' FEs could be mapped to Wikidata. Skipping ..."
                        % frame_label)
                    continue
                logger.debug("Core FEs: %s" % core_fes)
                logger.debug("Extra FEs: %s" % extra_fes)
                intersected_lu = {
                    'lu': lu_label,
                    'frame': frame_label,
                    'pos': lu['POS']
                }
                if core_fes:
                    intersected_lu['core_fes'] = core_fes
                if extra_fes:
                    intersected_lu['extra_fes'] = extra_fes
                enriched[score].append(intersected_lu)
                logger.debug(
                    "Corpus lemma '%s' enriched with frame data: %s" %
                    (corpus_lemma, json.dumps(intersected_lu, indent=2)))
    # Order by decreasing score
    return OrderedDict(
        sorted(enriched.items(), key=lambda x: x[0], reverse=True))
Ejemplo n.º 20
0
local_verbnet_api_path = "C:/Users/Kevin/PycharmProjects/verbnet/api/"
sys.path.append(local_verbnet_api_path)
import verbnet

VN_LOC = "C:/Users/Kevin/PycharmProjects/lexical_resources/verbnet3.3/"

vn = verbnet.VerbNetParser(directory=VN_LOC)
possible_classes = {
    "-".join(c.split("-")[1:]):
    [m.name for m in vn.verb_classes_dict[c].members]
    for c in vn.verb_classes_dict
}

possible_frames = {}
for lu in framenet.lus():
    if lu.frame.name not in possible_frames:
        possible_frames[lu.frame.name] = [lu.lexemes[0].name]
    else:
        possible_frames[lu.frame.name].append(lu.lexemes[0].name)


class Mapping():
    def __init__(self, member, vn_class, fn_frame):
        self.member = member
        self.vn_class = vn_class
        self.fn_frame = fn_frame
        self.errors = self.verify()

    def __str__(self):
        return self.member + " " + self.vn_class + " " + self.fn_frame
Ejemplo n.º 21
0
        key_list = key_list + temp_key
        value_list = value_list + temp_value

    st.write(key_list)

    #Shortcircuit Option
    shortcircuit_option = st.checkbox(
        "For a given key would you like to see its lexical unit? (shortcircuit to FrameNet option)"
    )
    if shortcircuit_option == True:
        option_of_number = st.multiselect(
            "For which key(s) would you like to see its lexical unit?",
            list(range(len(key_list))))
        for q in option_of_number:
            st.write("You selected key: ", q, ".", key_list[q])
            lu = fn.lus(r'%s' % key_list[q])
            lu_list = []  #List for contructing the string
            lu_nameID_dict = {
            }  #Dictionary for mapping LU_ID w/ LU Name (TODO: Replace the list w/ this dictionary for efficiency)
            for lexical_unit in lu:
                lexical_unit_LU_Name = lexical_unit['name']
                lexical_unit_LU_ID = lexical_unit['ID']
                lu_nameID_dict[lexical_unit_LU_ID] = lexical_unit_LU_Name
                input_str = "LU Name=" + str(
                    lexical_unit_LU_Name) + " (LU ID=" + str(
                        lexical_unit_LU_ID) + ")"
                lu_list.append(input_str)
            if len(lu_list) == 0:
                st.write("No Lexical Units for the selected key")
                st.write("JSON PATH: ", jpath[q - 1][0])
            else:
Ejemplo n.º 22
0
))

for r in DBPedia().search(sparql, start=1, count=1000):
    print '%s (%s)' % (r.person.name, r.place.name)






##______________________________Framenet______________________________

from nltk.corpus import framenet as fn
fn.lu(3238).frame.lexUnit['glint.v'] is fn.lu(3238)

fn.frame_by_name('Replacing') is fn.lus('replace.v')[0].frame

fn.lus('prejudice.n')[0].frame.frameRelations == fn.frame_relations('Partiality')


fn.lus('look.n')[0].frame
fn.lus('look.n')[1].frame


for f in fn.lus('look.n'):
    print f.frame.name


result = fn.frames(r'(?i)erception')

print result
Ejemplo n.º 23
0
    def process(text='',
                lang='en',
                coreferences=False,
                constituents=False,
                dependencies=False,
                expressions=False,
                **kwargs) -> OrderedDict:
        # build nlp-json
        j: OrderedDict = get_base()
        j['meta']['DC.language'] = lang
        d: OrderedDict = get_base_document(1)
        #j['documents'][d['id']] = d
        j['documents'].append(d)
        d['meta']['DC.source'] = 'NLTK {}'.format(nltk_version)
        j['meta']['DC.language'] = lang
        d['text'] = text

        # collect parsers
        lemmatizer = get_lemmatizer()
        stemmer = get_stemmer()

        # tokenization and pos
        words = []
        for sent in segment(text):
            for token in sent:
                words.append(token.value)

        # create the token list
        t_id = 1
        for word, xpos in pos_tag(words):
            wordnet_pos = get_wordnet_pos(xpos)
            lemma = lemmatizer(word, pos=wordnet_pos)

            # start the token
            t = {'id': t_id, 'text': word, 'stem': stemmer(word)}
            #d['tokenList'][t['id']] = t
            d['tokenList'].append(t)
            t_id += 1

            # wordnet
            try:
                synsets = wordnet.synsets(lemma, pos=wordnet_pos)
                senses = {}
                for s in synsets:
                    hyponyms = [
                        y for x in s.hyponyms() for y in x.lemma_names()
                    ]
                    hypernyms = [
                        y for x in s.hypernyms() for y in x.lemma_names()
                    ]
                    synonyms = s.lemma_names()[1:]
                    examples = s.examples()
                    sense = {
                        'wordnetId': s.name(),
                        'definition': s.definition()
                    }
                    if synonyms:
                        sense['synonyms'] = synonyms
                    if hypernyms:
                        sense['hypernyms'] = hypernyms
                    if hyponyms:
                        sense['hyponyms'] = hyponyms
                    if examples:
                        sense['examples'] = examples

                    antonyms = []
                    for l in s.lemmas():
                        if l.antonyms():
                            for a in l.antonyms():
                                antonyms.append(a.name())
                    if antonyms:
                        sense['antonyms'] = antonyms

                    senses[sense['wordnetId']] = sense

                if senses:
                    t['synsets'] = senses
            except:
                pass

            # verbnet
            try:
                verbs = dict((class_id, {
                    'classId': class_id,
                    'frames': vn.frames(class_id)
                }) for class_id in vn.classids(word))

                if verbs:
                    t['verbFrames'] = verbs
            except:
                pass

            # framenet
            try:
                frame_net = {}
                frames = invoke_frame(word)
                if frames is not None:
                    for fr in frames:
                        lu_temp = []
                        for lu in fn.lus(r'(?i)' + word.lower()):
                            fr_ = fn.frames_by_lemma(r'(?i)' + lu.name)
                            if len(fr_):
                                if fr_[0] == fr:
                                    lu_temp.append({
                                        'name': lu.name,
                                        'definition': lu.definition,
                                        'pos': lu.name.split('.')[1]
                                    })
                        frame_net[fr.ID] = {
                            'name': fr.name,
                            'frameId': fr.ID,
                            'definition': fr.definition,
                            # 'relations':fr.frameRelations,
                            'lu': lu_temp
                        }
                if frame_net:
                    t['frames'] = frame_net
            except:
                pass

        return remove_empty_fields(j)
        else:
            lcs = \
             LexicalConceptualStructureLexicon(path2lcs + '/verbs-English.lcs')
            with open(path2lcs + '/' + pickled_filename, 'wb') as f:
                pickle.dump(lcs, f)
        lcs_feats = ['lcs_eventive', 'lcs_stative']
        type_embedder['lcs'] = lcs

        # Wordnet supersenses(lexicographer names)
        synsets = wordnet.all_synsets()
        supersenses = \
              sorted(list(set(['supersense=' + x.lexname() for x in synsets])))

        # Framenet
        lem2frame = {}
        for lm in framenet.lus():
            for lemma in lm['lexemes']:
                (lem2frame[lemma['name'] + '.' + \
                        framenet_posdict[lemma['POS']]]) = lm['frame']['name']
        frame_names = sorted(['frame=' + x.name for x in framenet.frames()])
        type_embedder['lem2frame'] = lem2frame

        # Verbnet classids
        verbnet_classids = \
                     sorted(['classid=' + vcid for vcid in verbnet.classids()])

        type_hand_features = (verbnet_classids + supersenses + frame_names +
                              lcs_feats + conc_cols)
        input_size += len(type_hand_features)
        for f in type_hand_features:
            type_embedder['embedder'][f] = 0