예제 #1
0
 def get_index(self):
     # schema = self.parser.getlist('filter:schema')
     # if len(schema):
     #     return entities_read_index(schema=schema, descendants=False)
     schemata = self.parser.getlist('filter:schemata')
     if len(schemata):
         return entities_read_index(schema=schemata)
     return entities_read_index()
예제 #2
0
def iter_entities(authz=None, collection_id=None, schemata=None,
                  includes=None, excludes=None):
    """Scan all entities matching the given criteria."""
    filters = []
    if authz is not None:
        filters.append(authz_query(authz))
    if collection_id is not None:
        filters.append({'term': {'collection_id': collection_id}})
    if ensure_list(schemata):
        filters.append({'terms': {'schemata': ensure_list(schemata)}})
    source = {}
    if ensure_list(includes):
        source['includes'] = ensure_list(includes)
    if ensure_list(excludes):
        source['excludes'] = ensure_list(excludes)
    query = {
        'query': {'bool': {'filter': filters}},
        'sort': ['_doc'],
        '_source': source
    }
    index = entities_read_index(schema=schemata)
    for res in scan(es, index=index, query=query, scroll='1410m'):
        entity = unpack_result(res)
        if entity is not None:
            yield entity
예제 #3
0
def delete_entity(entity_id, exclude=None, sync=False):
    """Delete an entity from the index."""
    query = {'query': {'ids': {'values': str(entity_id)}}}
    es.delete_by_query(index=entities_read_index(exclude=exclude),
                       body=query,
                       wait_for_completion=sync,
                       refresh=refresh_sync(sync))
예제 #4
0
def expand_group(node):
    if node.type.group is None or node.value is None:
        return
    value = str(node.value)
    query = {
        'query': {
            'term': {
                node.type.group: value
            }
        },
        '_source': {
            'includes': ['schema', 'properties']
        }
    }
    for res in scan(es, index=entities_read_index(), query=query):
        entity_id = res.get('_id')
        source = res.get('_source')
        properties = source.get('properties')
        schema = model.get(source.get('schema'))
        for prop in schema.properties.values():
            if prop.type != node.type:
                continue
            values = properties.get(prop.name)
            values = node.type.normalize_set(values)
            if value not in values:
                continue
            if prop.reverse:
                yield Link(node, prop.reverse, entity_id)
            else:
                yield Link(node, prop, entity_id, inverted=True)
예제 #5
0
def delete_entities(collection_id, schema=None, bulk_only=False):
    """Delete entities from a collection."""
    filters = [{'term': {'collection_id': collection_id}}]
    if bulk_only:
        filters.append({'term': {'bulk': True}})
    if schema is not None:
        filters.append({'term': {'schemata': schema.name}})
    query = {'bool': {'filter': filters}}
    query_delete(entities_read_index(schema), query)
예제 #6
0
def get_collection_stats(collection_id):
    """Compute some statistics on the content of a collection."""
    key = cache.key('cstats', collection_id)
    data = cache.get_complex(key)
    if data is not None:
        return data

    log.info("Generating collection stats: %s", collection_id)
    query = {
        'size': 0,
        'query': {
            'bool': {
                'filter': [{
                    'term': {
                        'collection_id': collection_id
                    }
                }]
            }
        },
        'aggs': {
            'schemata': {
                'terms': {
                    'field': 'schema',
                    'size': 1000
                }
            },
            'countries': {
                'terms': {
                    'field': 'countries',
                    'size': 500
                }
            },
            'languages': {
                'terms': {
                    'field': 'languages',
                    'size': 10
                }
            },
        }
    }
    result = search_safe(index=entities_read_index(), body=query)
    aggregations = result.get('aggregations', {})
    data = {'count': result['hits']['total']}

    for facet in ['schemata', 'countries', 'languages']:
        data[facet] = {}
        for bucket in aggregations[facet]['buckets']:
            data[facet][bucket['key']] = bucket['doc_count']
    expire = randint(3600 * 3, 3600 * 12)
    cache.set_complex(key, data, expire=expire)
    return data
예제 #7
0
def entity_tags(entity, authz):
    """Do a search on tags of an entity."""
    # NOTE: This must also work for documents.
    FIELDS = [
        'names',
        'emails',
        'phones',
        'addresses',
        'identifiers'
    ]
    pivots = []
    queries = []
    # Go through all the tags which apply to this entity, and find how
    # often they've been mentioned in other entities.
    for field in FIELDS:
        for value in entity.get(field, []):
            if value is None or not len(value):
                continue
            queries.append({})
            queries.append({
                'size': 0,
                'query': {
                    'bool': {
                        'filter': [
                            authz_query(authz),
                            field_filter_query(field, value)
                        ],
                        'must_not': [
                            {'ids': {'values': [entity.get('id')]}},
                        ]
                    }
                }
            })
            pivots.append((field, value))

    if not len(queries):
        return

    res = es.msearch(index=entities_read_index(), body=queries)
    for (field, value), resp in zip(pivots, res.get('responses', [])):
        total = resp.get('hits', {}).get('total')
        if total is not None and total > 0:
            yield (field, value, total)
예제 #8
0
def entity_references(entity, authz):
    """Given a particular entity, find all the references to it from other
    entities, grouped by the property where they are used."""
    schema = model[entity.get('schema')]

    # Generate all the possible mention locations.
    properties = []
    queries = []
    for prop in model.properties:
        if prop.type != registry.entity:
            continue
        if not schema.is_a(prop.range):
            continue

        field = 'properties.%s' % prop.name
        queries.append({})
        queries.append({
            'size': 0,
            'query': {
                'bool': {
                    'filter': [
                        authz_query(authz),
                        {'term': {'schemata': prop.schema.name}},
                        {'term': {field: entity.get('id')}},
                    ]
                }
            }
        })
        properties.append(prop)

    if not len(queries):
        return

    # Run a count search (with schema facet?)
    res = es.msearch(index=entities_read_index(), body=queries)
    for prop, resp in zip(properties, res.get('responses', [])):
        total = resp.get('hits', {}).get('total')
        if total is not None and total > 0:
            yield (prop, total)
예제 #9
0
파일: alerts.py 프로젝트: nt0z/aleph
def check_alert(alert_id):
    alert = Alert.by_id(alert_id)
    if alert is None or alert.role is None:
        return
    if not alert.role.is_alertable:
        return
    authz = Authz.from_role(alert.role)
    query = alert_query(alert, authz)
    result = search_safe(index=entities_read_index(), body=query)
    for result in result.get('hits').get('hits', []):
        entity = unpack_result(result)
        if entity is None:
            continue
        log.info('Alert [%s]: %s', alert.query, entity.get('name'))
        params = {'alert': alert, 'role': alert.role, 'entity': entity}
        publish(Events.MATCH_ALERT,
                actor_id=entity.get('uploader_id'),
                params=params)

    alert.update()
    db.session.commit()
    db.session.close()
예제 #10
0
파일: xref.py 프로젝트: nt0z/aleph
def xref_item(proxy):
    """Cross-reference an entity or document, given as an indexed document."""
    query = match_query(proxy)
    if query == none_query():
        return

    query = {
        'query': query,
        'size': 100,
        '_source': {
            'includes': ['schema', 'properties', 'collection_id']
        }
    }
    matchable = list(proxy.schema.matchable_schemata)
    index = entities_read_index(schema=matchable)
    result = search_safe(index=index, body=query)
    results = result.get('hits').get('hits')
    for result in results:
        result = unpack_result(result)
        if result is not None:
            other = model.get_proxy(result)
            score = compare(model, proxy, other)
            yield score, result.get('collection_id'), other
예제 #11
0
def entities_by_ids(ids, authz=None, cached=True):
    """Iterate over unpacked entities based on a search for the given
    entity IDs."""
    for i in range(0, len(ids), MAX_PAGE):
        chunk = ids[i:i + MAX_PAGE]
        if not len(chunk):
            return
        query = bool_query()
        query['bool']['filter'].append({'ids': {'values': chunk}})
        if authz is not None:
            query['bool']['filter'].append(authz_query(authz))
        query = {
            'query': query,
            '_source': {'excludes': ['text']},
            'size': min(MAX_PAGE, len(chunk))
        }
        result = search_safe(index=entities_read_index(),
                             body=query,
                             ignore=[404],
                             request_cache=cached)
        for doc in result.get('hits', {}).get('hits', []):
            entity = unpack_result(doc)
            if entity is not None:
                yield entity