Python EntityTag Examples

Programming Language: Python

Namespace/Package Name: aleph.model

Class/Type: EntityTag

Examples at hotexamples.com: 3

Python EntityTag - 3 examples found. These are the top rated real world Python examples of aleph.model.EntityTag extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

EntityTag(1)

by_package(1)

collection(1)

delete_set(1)

entity_id(1)

package_id(1)

Example #1

Show file

File: indexer.py Project: vied12/aleph

def index_package(package, plain_text, normalized_text):
    es.json_encoder = JSONEncoder
    body = {
        'id': package.id,
        'collection': package.collection
    }
    source = package.source
    if source is None:
        log.error("No source for package %r, skipping", package)
        return

    body['name'] = source.meta.get('name')
    body['slug'] = source.meta.get('slug')
    body['title'] = source.meta.get('title') or body['name']
    body['source_url'] = source.meta.get('source_url')
    body['created_at'] = source.meta.get('created_at')
    body['updated_at'] = source.meta.get('updated_at')
    body['filed_at'] = source.meta.get('filed_at')
    body['extension'] = source.meta.get('extension')
    body['mime_type'] = source.meta.get('mime_type')

    if plain_text.exists():
        body['text'] = plain_text.fh().read()
        summary = source.meta.get('summary') or body.get('text')
        body['summary'] = html_summary(summary)

    if normalized_text.exists():
        body['normalized'] = normalized_text.fh().read()

    if not body['title']:
        log.error("No title for package %r, skipping", package)
        return

    body['entities'] = EntityTag.by_package(package.collection, package.id)
    body['attributes'] = generate_attributes(source.meta)

    log.info("Indexing: %r", body['title'])
    es.index(es_index, DOC_TYPE, body, package.id)

Example #2

Show file

File: indexer.py Project: OpenOil-UG/aleph

def index_package(package, plain_text, normalized_text):
    es.json_encoder = JSONEncoder
    body = {
        'id': package.id,
        'collection': package.collection
    }
    source = package.source
    if source is None:
        log.error("No source for package %r, skipping", package)
        return

    body['name'] = source.meta.get('name')
    body['slug'] = source.meta.get('slug')
    body['title'] = source.meta.get('title') or body['name']
    body['source_url'] = source.meta.get('source_url')
    body['created_at'] = source.meta.get('created_at')
    body['updated_at'] = source.meta.get('updated_at')
    body['filed_at'] = source.meta.get('filed_at')
    body['extension'] = source.meta.get('extension')
    body['mime_type'] = source.meta.get('mime_type')

    if plain_text.exists():
        body['text'] = plain_text.fh().read()
        summary = source.meta.get('summary') or body.get('text')
        body['summary'] = html_summary(summary)

    if normalized_text.exists():
        body['normalized'] = normalized_text.fh().read()

    if not body['title']:
        log.error("No title for package %r, skipping", package)
        return

    body['entities'] = EntityTag.by_package(package.collection, package.id)
    body['attributes'] = generate_attributes(source.meta)

    log.info("Indexing: %r", body['title'])
    es.index(es_index, DOC_TYPE, body, package.id)

Example #3

Show file

    def analyze(self, normalized):
        text = normalized.data()
        EntityTag.delete_set(normalized.package.collection,
                             normalized.package.id)

        entities = set()
        for rex, matches in self.expressions():
            for match in rex.finditer(text):
                _, match, _ = match.groups()
                entities.update(matches[match])

        for entity in entities:
            tag = EntityTag()
            tag.collection = normalized.package.collection
            tag.package_id = normalized.package.id
            tag.entity_id = entity
            db.session.add(tag)

        db.session.commit()

        if len(entities):
            log.info("Tagged %r with %d entities", normalized.package.id,
                     len(entities))