Ejemplo n.º 1
0
def entity_tags(entity, authz=None):
    """Do a search on tags of an entity."""
    proxy = model.get_proxy(entity)
    Thing = model.get(Entity.THING)
    types = [registry.name, registry.email, registry.identifier,
             registry.iban, registry.phone, registry.address]
    facets = []
    # Go through all the tags which apply to this entity, and find how
    # often they've been mentioned in other entities.
    for type_ in types:
        if type_.group is None:
            continue
        for fidx, value in enumerate(proxy.get_type_values(type_)):
            if type_.specificity(value) < 0.1:
                continue
            schemata = model.get_type_schemata(type_)
            schemata = [s for s in schemata if s.is_a(Thing)]
            index = entities_read_index(schemata)
            alias = '%s_%s' % (type_.name, fidx)
            facets.append((index, alias, type_.group, type_.group, value))

    res = _filters_faceted_query(facets, authz=authz)
    for (_, alias, field, _, value) in facets:
        total = res.get(alias, 0)
        if total > 1:
            yield (field, value, total)
Ejemplo n.º 2
0
def entity_tags(entity, authz):
    """Do a search on tags of an entity."""
    proxy = model.get_proxy(entity)
    Thing = model.get(Entity.THING)
    types = [registry.name, registry.email, registry.identifier,
             registry.iban, registry.phone, registry.address]
    facets = []
    # Go through all the tags which apply to this entity, and find how
    # often they've been mentioned in other entities.
    for type_ in types:
        if type_.group is None:
            continue
        for fidx, value in enumerate(proxy.get_type_values(type_)):
            if type_.specificity(value) < 0.1:
                continue
            schemata = model.get_type_schemata(type_)
            schemata = [s for s in schemata if s.is_a(Thing)]
            index = entities_read_index(schemata)
            alias = '%s_%s' % (type_.name, fidx)
            facets.append((index, alias, type_.group, type_.group, value))

    res = _filters_faceted_query(authz, facets)
    for (_, alias, field, _, value) in facets:
        total = res.get(alias, 0)
        if total > 1:
            yield (field, value, total)
Ejemplo n.º 3
0
def get_collection_facet(collection_id, facet, refresh=False):
    """Compute some statistics on the content of a collection."""
    key = cache.object_key(Collection, collection_id, facet)
    data = cache.get_complex(key)
    if not refresh and data is not None:
        return data

    query = {'term': {'collection_id': collection_id}}
    query = {
        'size': 0,
        'query': {'bool': {'filter': [query]}},
        'aggs': {
            'values': {'terms': {'field': facet, 'size': 300}},
            'total': {'cardinality': {'field': facet}}
        }
    }
    schemata = set()
    facet_type = registry.groups.get(facet)
    if facet_type is not None:
        schemata = model.get_type_schemata(facet_type)
    result = es.search(index=entities_read_index(schema=schemata),
                       body=query,
                       request_timeout=3600,
                       timeout='20m')
    aggregations = result.get('aggregations')
    values = {}
    for bucket in aggregations.get('values').get('buckets', []):
        values[bucket['key']] = bucket['doc_count']
    data = {
        'values': values,
        'total': aggregations.get('total').get('value', 0)
    }
    cache.set_complex(key, data, expires=cache.EXPIRE)
    return data
Ejemplo n.º 4
0
def checksums_count(checksums):
    """Query how many documents mention a checksum."""
    schemata = model.get_type_schemata(registry.checksum)
    index = entities_read_index(schemata)
    body = []
    for checksum in checksums:
        body.append({"index": index})
        query = {"term": {"checksums": checksum}}
        body.append({"size": 0, "query": query})
    results = es.msearch(body=body)
    for checksum, result in zip(checksums, results.get("responses", [])):
        total = result.get("hits", {}).get("total", {}).get("value", 0)
        yield checksum, total
Ejemplo n.º 5
0
 def __init__(self, query, node, prop=None, limit=0, count=False):
     self.graph = query.graph
     self.graph.add(node.proxy)
     self.node = node
     self.id = node.id
     self.limit = limit or 0
     self.count = count
     self.entities = []
     self.prop = prop
     if prop is not None:
         self.index = entities_read_index(prop.schema)
         field = 'properties.%s' % prop.name
         self.filter = field_filter_query(field, node.value)
         self.id = prop.qname
     else:
         schemata = model.get_type_schemata(self.node.type)
         self.index = entities_read_index(schemata)
         self.filter = field_filter_query(node.type.group, node.value)
Ejemplo n.º 6
0
def entity_tags(entity, authz):
    """Do a search on tags of an entity."""
    proxy = model.get_proxy(entity)
    Thing = model.get(Entity.THING)
    types = [registry.name, registry.email, registry.identifier,
             registry.iban, registry.phone, registry.address]
    pivots = []
    queries = []
    # Go through all the tags which apply to this entity, and find how
    # often they've been mentioned in other entities.
    for type_ in types:
        if type_.group is None:
            continue
        for value in proxy.get_type_values(type_):
            if type_.specificity(value) < 0.1:
                continue
            schemata = model.get_type_schemata(type_)
            schemata = [s for s in schemata if s.is_a(Thing)]
            index = entities_read_index(schemata)
            queries.append({'index': index})
            queries.append({
                'size': 0,
                'query': {
                    'bool': {
                        'filter': [
                            authz_query(authz),
                            field_filter_query(type_.group, value)
                        ],
                        'must_not': [
                            {'ids': {'values': [entity.get('id')]}},
                        ]
                    }
                }
            })
            pivots.append((type_.group, value))

    if not len(queries):
        return

    res = es.msearch(body=queries)
    for (field, value), resp in zip(pivots, res.get('responses', [])):
        total = resp.get('hits', {}).get('total')
        if total is not None and total > 0:
            yield (field, value, total)
Ejemplo n.º 7
0
def entity_tags(proxy, authz, prop_types=DEFAULT_TAGS):
    """For a given proxy, determine how many other mentions exist for each
    property value associated, if it is one of a set of types."""
    queries = {}
    lookup = {}
    values = set()
    for prop, value in proxy.itervalues():
        if prop.type not in prop_types:
            continue
        if prop.specificity(value) > 0.1:
            values.add((prop.type, value))

    type_names = [t.name for t in prop_types]
    log.debug("Tags[%s]: %s values", type_names, len(values))
    for (type_, value) in values:
        key = type_.node_id(value)
        lookup[key] = (type_, value)
        # Determine which indexes may contain further mentions (only things).
        schemata = model.get_type_schemata(type_)
        schemata = [s for s in schemata if s.is_a(Entity.THING)]
        index = entities_read_index(schemata)
        queries[(index, key)] = field_filter_query(type_.group, value)

    _, counts = _counted_msearch(queries, authz)
    results = []
    for key, count in counts.items():
        if count > 1:
            type_, value = lookup[key]
            result = {
                "id": key,
                "field": type_.group,
                "value": value,
                "count": count - 1,
            }
            results.append(result)

    results.sort(key=lambda p: p["count"], reverse=True)
    # pprint(results)
    return results
Ejemplo n.º 8
0
 def test_model_type_schemata(self):
     schema = model.get_type_schemata(registry.iban)
     assert model.get('BankAccount') in schema, schema
     assert model.get('CourtCase') not in schema, schema