Python Entity.all Exemples, aleph.model.Entity.all Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : entities_api.py Projet : tomjie/aleph

def all():
    q = Entity.all()
    q = q.filter(Entity.state == Entity.STATE_ACTIVE)
    clause = Collection.id.in_(authz.collections(authz.READ))
    q = q.filter(Entity.collections.any(clause))
    q = q.order_by(Entity.id.asc())
    return jsonify(Pager(q, limit=100))

Exemple #2

0

Afficher le fichier

Fichier : regex_entity.py Projet : andkamau/aleph

    def _generate(self):
        latest = Entity.latest()
        if self.latest is not None and self.latest >= latest:
            return

        self.latest = latest
        self.matches = defaultdict(set)

        q = Entity.all()
        q = q.options(joinedload('other_names'))
        q = q.filter(Entity.state == Entity.STATE_ACTIVE)
        for entity in q:
            for term in entity.regex_terms:
                self.matches[normalize_strong(term)].add(entity.id)

        self.regexes = []
        terms = self.matches.keys()
        terms = [t for t in terms if len(t) > 2]
        for i in count(0):
            terms_slice = terms[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
            if not len(terms_slice):
                break
            body = '|'.join(terms_slice)
            rex = re.compile('( |^)(%s)( |$)' % body)
            # rex = re.compile('(%s)' % body)
            self.regexes.append(rex)

        log.info('Generating entity tagger: %r (%s terms)',
                 latest, len(terms))

Exemple #3

0

Afficher le fichier

    def _generate(self):
        latest = Entity.latest()
        if latest is None:
            return
        if self.latest is not None and self.latest >= latest:
            return
        self.latest = latest

        matches = {}
        q = Entity.all()
        q = q.filter(Entity.state == Entity.STATE_ACTIVE)
        for entity in q:
            for term in entity.regex_terms:
                if term in matches:
                    matches[term].append(entity.id)
                else:
                    matches[term] = [entity.id]

        if not len(matches):
            self.automaton = None
            return

        self.automaton = Automaton()
        for term, entities in matches.iteritems():
            self.automaton.add_word(term.encode('utf-8'), entities)
        self.automaton.make_automaton()
        log.info('Generated automaton with %s terms', len(matches))

Exemple #4

0

Afficher le fichier

Fichier : corasick_entity.py Projet : KarrieK/aleph

    def build_automaton(self):
        q = Entity.all()
        q = q.filter(Entity.schema.in_(self.TYPES.keys()))

        matches = {}
        for entity in q:
            tag = self.TYPES.get(entity.schema)
            if tag is None:
                continue
            for name in entity.names:
                if name is None or len(name) > 120:
                    continue
                match = self.match_form(name)
                if match is None:
                    continue
                if match in matches:
                    matches[match].append((name, tag))
                else:
                    matches[match] = [(name, tag)]

        if not len(matches):
            return

        automaton = Automaton()
        for term, entities in matches.iteritems():
            automaton.add_word(term, entities)
        automaton.make_automaton()
        return automaton

Exemple #5

0

Afficher le fichier

Fichier : corasick_entity.py Projet : DtorrX/aleph

    def _generate(self):
        latest = Entity.latest()
        if latest is None:
            return
        if self.latest is not None and self.latest >= latest:
            return
        self.latest = latest

        matches = {}
        q = Entity.all()
        for entity in q:
            tag = self.TYPES.get(entity.schema)
            if tag is None:
                continue
            for name in entity.names:
                if name is None or len(name) > 120:
                    continue
                match = match_form(name)
                # TODO: this is a weird heuristic, but to avoid overly
                # aggressive matching it may make sense:
                if match is None or ' ' not in match:
                    continue
                if match in matches:
                    matches[match].append((name, tag))
                else:
                    matches[match] = [(name, tag)]

        if not len(matches):
            return

        for term, entities in matches.iteritems():
            self.automaton.add_word(term.encode('utf-8'), entities)
        self.automaton.make_automaton()
        log.info('Generated automaton with %s terms', len(matches))

Exemple #6

0

Afficher le fichier

Fichier : corasick_entity.py Projet : kkrbalam/aleph

    def _generate(self):
        latest = Entity.latest()
        if latest is None:
            return
        if self.latest is not None and self.latest >= latest:
            return
        self.latest = latest

        matches = {}
        q = Entity.all()
        for entity in q:
            tag = self.TYPES.get(entity.schema)
            if tag is None:
                continue
            for term in entity.regex_terms:
                if term in matches:
                    matches[term].append((entity.name, tag))
                else:
                    matches[term] = [(entity.name, tag)]

        if not len(matches):
            return

        for term, entities in matches.iteritems():
            self.automaton.add_word(term.encode('utf-8'), entities)
        self.automaton.make_automaton()
        log.info('Generated automaton with %s terms', len(matches))

Exemple #7

0

Afficher le fichier

Fichier : entities_api.py Projet : rlugojr/aleph

def all():
    q = Entity.all()
    q = q.filter(Entity.state == Entity.STATE_ACTIVE)
    clause = Collection.id.in_(authz.collections(authz.READ))
    q = q.filter(Entity.collections.any(clause))
    q = q.order_by(Entity.id.asc())
    return jsonify(Pager(q, limit=100))

Exemple #8

0

Afficher le fichier

Fichier : regex.py Projet : backgroundcheck/aleph

    def _generate(self):
        latest = Entity.latest()
        if self.latest is not None and self.latest >= latest:
            return

        self.latest = latest
        self.matches = defaultdict(set)

        q = Entity.all()
        q = q.options(joinedload('other_names'))
        q = q.filter(Entity.state == Entity.STATE_ACTIVE)
        for entity in q:
            for term in entity.regex_terms:
                self.matches[normalize_strong(term)].add(entity.id)

        self.regexes = []
        terms = self.matches.keys()
        terms = [t for t in terms if len(t) > 2]
        for i in count(0):
            terms_slice = terms[i * BATCH_SIZE:(i + 1) * BATCH_SIZE]
            if not len(terms_slice):
                break
            body = '|'.join(terms_slice)
            rex = re.compile('( |^)(%s)( |$)' % body)
            # rex = re.compile('(%s)' % body)
            self.regexes.append(rex)

        log.info('Generating entity tagger: %r (%s terms)', latest, len(terms))

Exemple #9

0

Afficher le fichier

Fichier : entities.py Projet : simonwoerpel/aleph

def load_entities():
    tx = get_graph().begin()
    q = Entity.all()
    q = q.filter(Entity.state == Entity.STATE_ACTIVE)
    for entity in q:
        load_entity(tx, entity)
    tx.commit()

Exemple #10

0

Afficher le fichier

Fichier : entities.py Projet : rlugojr/aleph

def load_entities():
    graph = get_graph()
    tx = graph.begin()
    q = Entity.all()
    q = q.filter(Entity.state == Entity.STATE_ACTIVE)
    for i, entity in enumerate(q):
        load_entity(tx, entity)
        if i > 0 and i % 10000 == 0:
            tx.commit()
            tx = graph.begin()
    tx.commit()

Exemple #11

0

Afficher le fichier

def load_entities():
    graph = get_graph()
    tx = graph.begin()
    q = Entity.all()
    q = q.filter(Entity.state == Entity.STATE_ACTIVE)
    for i, entity in enumerate(q):
        load_entity(tx, entity)
        if i > 0 and i % 10000 == 0:
            tx.commit()
            tx = graph.begin()
    tx.commit()

Exemple #12

0

Afficher le fichier

Fichier : regex_entity.py Projet : CodeForAfrica/aleph

    def _generate(self):
        latest = Entity.latest()
        if self.latest is not None and self.latest >= latest:
            return
        self.latest = latest

        matches = defaultdict(set)
        q = Entity.all()
        q = q.options(joinedload('other_names'))
        q = q.filter(Entity.state == Entity.STATE_ACTIVE)
        for entity in q:
            for term in entity.regex_terms:
                matches[term].add(entity.id)

        if not len(matches):
            self.automaton = None
            return

        self.automaton = Automaton()
        for term, entities in matches.items():
            self.automaton.add_word(term.encode('utf-8'), entities)
        self.automaton.make_automaton()
        log.info('Generated automaton with %s terms', len(matches))

Exemple #13

0

Afficher le fichier

Fichier : regex_entity.py Projet : tomjie/aleph

    def _generate(self):
        latest = Entity.latest()
        if self.latest is not None and self.latest >= latest:
            return
        self.latest = latest

        matches = defaultdict(set)
        q = Entity.all()
        q = q.options(joinedload('other_names'))
        q = q.filter(Entity.state == Entity.STATE_ACTIVE)
        for entity in q:
            for term in entity.regex_terms:
                matches[term].add(entity.id)

        if not len(matches):
            self.automaton = None
            return

        self.automaton = Automaton()
        for term, entities in matches.items():
            self.automaton.add_word(term.encode('utf-8'), entities)
        self.automaton.make_automaton()
        log.info('Generated automaton with %s terms', len(matches))

Exemple #14

0

Afficher le fichier

def index():
    collection_ids = match_ids('collection', authz.collections(authz.READ))
    q = Entity.all()
    q = q.filter(Entity.collection_id.in_(collection_ids))
    return jsonify(Pager(q))

Exemple #15

0

Afficher le fichier

Fichier : entities_api.py Projet : stefanw/aleph

def index():
    collection_ids = match_ids('collection', authz.collections(authz.READ))
    q = Entity.all()
    q = q.filter(Entity.collection_id.in_(collection_ids))
    return jsonify(Pager(q))