Example #1
0
def apply_custom_classification_rules(classified, language, overwrite=False):
    """ Implements simple custom, classifier-agnostic rules for
        recognizing some frame elements

        :param dict classified: an item produced by the classifier
        :param str language: Language of the sentence
        :param bool overwrite: Tells the priority in case the rules assign a
         role to the same chunk recognized by the classifier
        :return: The same item with augmented FEs
    """

    chunk_to_fe = {fe['chunk']: fe for fe in classified['fes']}

    # if not already done, normalize numerica FEs
    if not any(fe['fe'] in ['Time', 'Duration'] for fe in classified['fes']):
        numerical = normalize_numerical_fes(language, classified['text'])
        for each in numerical:
            old = chunk_to_fe.get(each['chunk'])

            if old is None:
                chunk_to_fe[each['chunk']] = each
            elif overwrite:
                chunk_to_fe[each['chunk']] = each
                logger.debug(
                    'chunk "%s" was assigned role %s, assigning %s instead',
                    old['chunk'], old['fe'], each['fe'])

    # all places recognized by the entity linker are FEs
    for entity in classified.get('linked_entities', []):
        typeof_place = 'http://dbpedia.org/ontology/Place'
        if typeof_place in entity['types']:
            fe = {
                'fe': 'Place',
                'chunk': entity['chunk'],
                'score': entity['confidence'],
                'link': entity,
            }

            old = chunk_to_fe.get(entity['chunk'])
            if old is None:
                chunk_to_fe[fe['chunk']] = fe
            elif overwrite or typeof_place not in old.get('link', {}).get(
                    'types', []):
                chunk_to_fe[fe['chunk']] = fe
                logger.debug(
                    'chunk "%s" was assigned role %s, assigning %s instead',
                    old['chunk'], old['fe'], 'Place')

    classified['fes'] = chunk_to_fe.values()

    # check that no chunk is assigned more than one role
    assert len(set(fe['chunk']
                   for fe in classified['fes'])) == len(classified['fes'])

    return classified
Example #2
0
def apply_custom_classification_rules(classified, language, overwrite=False):
    """ Implements simple custom, classifier-agnostic rules for
        recognizing some frame elements

        :param dict classified: an item produced by the classifier
        :param str language: Language of the sentence
        :param bool overwrite: Tells the priority in case the rules assign a
         role to the same chunk recognized by the classifier
        :return: The same item with augmented FEs
    """

    chunk_to_fe = {fe['chunk']: fe for fe in classified['fes']}

    # if not already done, normalize numerica FEs
    if not any(fe['fe'] in ['Time', 'Duration'] for fe in classified['fes']):
        numerical = normalize_numerical_fes(language, classified['text'])
        for each in numerical:
            old = chunk_to_fe.get(each['chunk'])

            if old is None:
                chunk_to_fe[each['chunk']] = each
            elif overwrite:
                chunk_to_fe[each['chunk']] = each
                logger.debug('chunk "%s" was assigned role %s, assigning %s instead',
                             old['chunk'], old['fe'], each['fe'])

    # all places recognized by the entity linker are FEs
    for entity in classified.get('linked_entities', []):
        typeof_place = 'http://dbpedia.org/ontology/Place'
        if typeof_place in entity['types']:
            fe = {
                'fe': 'Place',
                'chunk': entity['chunk'],
                'score': entity['confidence'],
                'link': entity,
            }

            old = chunk_to_fe.get(entity['chunk'])
            if old is None:
                chunk_to_fe[fe['chunk']] = fe
            elif overwrite or typeof_place not in old.get('link', {}).get('types', []):
                chunk_to_fe[fe['chunk']] = fe
                logger.debug('chunk "%s" was assigned role %s, assigning %s instead',
                             old['chunk'], old['fe'], 'Place')

    classified['fes'] = chunk_to_fe.values()

    # check that no chunk is assigned more than one role
    assert len(set(fe['chunk'] for fe in classified['fes'])) == len(classified['fes'])

    return classified
Example #3
0
    def label_sentence(self, sentence, normalize_numerical, score_type, core_weight):
        """ Labels a single sentence

            :param sentence: Sentence data to label
            :param normalize_numerical: Automatically normalize numerical FEs
            :param score_type: Which type of score (if any) to use to
             compute the classification confidence
            :param core_weight: Weight of the core FEs (used in the scoring)
            :return: Labeled data
        """
        logger.debug('processing sentence "%s"', sentence['text'])
        if not sentence.get('url'):
            logger.warn('a sentence is missing the url, skipping it')
            return None
        elif not sentence.get('text', '').strip():
            return None

        tagged = sentence['tagged'] if 'tagged' in sentence else self.tagger.tag_one(sentence['text'])

        # Normalize + annotate numerical FEs
        numerical_fes = []
        if normalize_numerical:
            numerical_fes.extend(list(normalize_numerical_fes(self.language, sentence['text'])))

        for token, pos, lemma in tagged:
            if lemma not in self.frame_data or not pos.startswith(self.frame_data[lemma]['pos']):
                continue

            frame = self.frame_data[lemma]
            if not frame['ontology_to_fe'].keys():
                logger.debug('missing FE types for frame %s, skipping',
                             frame['frame'])
                continue

            logger.debug('trying frame %s with FE of types %s', frame['frame'],
                         frame['ontology_to_fe'].keys())

            assigned_fes = self.assign_frame_elements(sentence['linked_entities'], frame)
            all_fes = numerical_fes + assigned_fes
            if assigned_fes or numerical_fes:
                logger.debug('assigning frame: %s and FEs %s', frame['frame'], all_fes)
                labeled = {
                    'name': sentence['name'],
                    'url': sentence['url'],
                    'text': sentence['text'],
                    'linked_entities': sentence['linked_entities'],
                    'frame': frame['frame'],
                    'fes': all_fes,
                    'lu': lemma,
                }
                break
            else:
                logger.debug('no FEs assigned for frame %s, trying another one', frame['frame'])
        else:
            logger.debug('did not assign any frame to sentence "%s"', sentence['text'])
            return None

        if score_type:
            labeled['score'] = scoring.compute_score(labeled,
                                                     score_type,
                                                     core_weight)

        assert 'lu' in labeled and labeled['fes']

        final = apply_custom_classification_rules(labeled, self.language)
        return final