def index_matches(collection, matches, sync=False): """Index cross-referencing matches.""" actions = [] for (score, entity, match_collection_id, match) in matches: xref_id = hash_data((entity.id, collection.id, match.id)) text = ensure_list(entity.get_type_values(registry.name)) text.extend(match.get_type_values(registry.name)) actions.append({ "_id": xref_id, "_index": xref_index(), "_source": { "score": score, "entity_id": entity.id, "collection_id": collection.id, "match_id": match.id, "match_collection_id": match_collection_id, "countries": match.get_type_values(registry.country), "schema": match.schema.name, "text": text, "created_at": datetime.utcnow(), }, }) if len(actions): log.info("Indexing %d xref matches...", len(actions)) bulk_actions(actions, sync=sync)
def index_matches(collection, matches, sync=False): """Index cross-referencing matches.""" actions = [] for (score, entity, match_collection_id, match) in matches: xref_id = hash_data((entity.id, collection.id, match.id)) text = ensure_list(entity.get_type_values(registry.name)) text.extend(match.get_type_values(registry.name)) actions.append({ '_id': xref_id, '_index': xref_index(), '_source': { 'score': score, 'entity_id': entity.id, 'collection_id': collection.id, 'match_id': match.id, 'match_collection_id': match_collection_id, 'countries': match.get_type_values(registry.country), 'schema': match.schema.name, 'text': text, 'created_at': datetime.utcnow(), } }) if len(actions): log.info("Indexing %d xref matches...", len(actions)) bulk_actions(actions, sync=sync)
def index_collection_entities(collection, sync=False): """Re-index all documents in a collection in one go.""" from aleph.index.documents import generate_collection_docs def _generate(): for entity in Entity.by_collection(collection.id): entity_id, index, body = index_operation(entity.to_dict()) yield {'_id': entity_id, '_index': index, '_source': body} yield from generate_collection_docs(collection) bulk_actions(_generate(), sync=sync)
def index_collection_entities(collection, sync=False): """Re-index all documents in a collection in one go.""" from aleph.index.documents import generate_collection_docs def _generate(): for entity in Entity.by_collection(collection.id): entity_id, index, body = index_operation(entity.to_dict()) yield { '_id': entity_id, '_index': index, '_source': body } yield from generate_collection_docs(collection) bulk_actions(_generate(), sync=sync)
def index_bulk(collection, entities, sync=False): """Index a set of entities.""" entities = (format_proxy(p, collection) for p in entities) bulk_actions(entities, sync=sync)
def index_matches(collection, matches, sync=False): """Index cross-referencing matches.""" bulk_actions(_index_form(collection, matches), sync=sync)
def index_bulk(collection, entities, job_id=None, sync=False): """Index a set of entities.""" actions = [] for entity in entities: actions.append(format_proxy(entity, collection, job_id=job_id)) bulk_actions(actions, sync=sync)
def index_document(document, shallow=False, sync=False): log.info("Index document [%s]: %s", document.id, document.name) operations = generate_document(document, shallow=shallow) bulk_actions(operations, sync=sync)
def index_bulk(collection_id, entities, merge=True): """Index a set of entities.""" actions = _index_updates(collection_id, entities, merge=merge) bulk_actions(actions, sync=merge)
def index_bulk(collection, entities, sync=False): """Index a set of entities.""" actions = [] for entity in entities: actions.append(index_proxy(entity, collection)) bulk_actions(actions, sync=sync)