コード例 #1
0
def index_collection(collection, sync=False):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    data = get_collection(collection.id)
    if data is None:
        return

    log.info(
        "[%s] Index: %s (%s things)...",
        collection,
        data.get("label"),
        data.get("count"),
    )
    text = [data.get("label")]
    text.append(normalize(data.get("label")))
    text.append(normalize(data.get("foreign_id")))
    text.append(normalize(data.get("summary")))
    data["text"] = text
    data.pop("id", None)
    return index_safe(collections_index(),
                      collection.id,
                      data,
                      refresh=refresh_sync(sync))
コード例 #2
0
ファイル: collections.py プロジェクト: pudo/aleph
def index_collection(collection, sync=False):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    data = get_collection(collection.id)
    data.pop('id', None)
    return index_safe(collections_index(), collection.id, data,
                      refresh=refresh_sync(sync))
コード例 #3
0
def index_single(obj, data, texts):
    """Indexing aspects common to entities and documents."""
    data['bulk'] = False
    data['roles'] = obj.collection.roles
    data['collection_id'] = obj.collection.id
    data['created_at'] = obj.created_at
    data['updated_at'] = obj.updated_at
    data = finalize_index(data, obj.model, texts)
    data = clean_dict(data)
    return index_safe(entity_index(), obj.id, data)
コード例 #4
0
ファイル: entities.py プロジェクト: pudo/aleph
def index_entity(entity, sync=False):
    """Index an entity."""
    if entity.deleted_at is not None:
        return delete_entity(entity.id)

    entity_id, index, data = index_operation(entity.to_dict())
    refresh = refresh_sync(sync)
    # This is required if an entity changes its type:
    # delete_entity(entity_id, exclude=proxy.schema, sync=False)
    return index_safe(index, entity_id, data, refresh=refresh)
コード例 #5
0
ファイル: entities.py プロジェクト: fork-for-review/aleph
def index_entity(entity, sync=False):
    """Index an entity."""
    if entity.deleted_at is not None:
        return delete_entity(entity.id)

    entity_id, index, data = index_operation(entity.to_dict())
    refresh = refresh_sync(sync)
    # This is required if an entity changes its type:
    # delete_entity(entity_id, exclude=proxy.schema, sync=False)
    return index_safe(index, entity_id, data, refresh=refresh)
コード例 #6
0
ファイル: entities.py プロジェクト: mustafaascha/aleph
def index_single(obj, proxy, data, texts, sync=False):
    """Indexing aspects common to entities and documents."""
    data = finalize_index(proxy, data, texts)
    data['bulk'] = False
    data['collection_id'] = obj.collection.id
    data['created_at'] = obj.created_at
    data['updated_at'] = obj.updated_at
    # pprint(data)
    refresh = 'wait_for' if sync else False
    return index_safe(entity_index(), obj.id, data, refresh=refresh)
コード例 #7
0
def index_collection(collection, sync=False):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    data = get_collection(collection.id)
    data.pop('id', None)
    return index_safe(collections_index(),
                      collection.id,
                      data,
                      refresh=refresh_sync(sync))
コード例 #8
0
def index_single(obj, proxy, data, texts, sync=False):
    """Indexing aspects common to entities and documents."""
    data = finalize_index(proxy, data, texts)
    data['bulk'] = False
    data['collection_id'] = obj.collection_id
    data['created_at'] = obj.created_at
    data['updated_at'] = obj.updated_at
    # pprint(data)
    index = entities_write_index(proxy.schema)
    refresh = refresh_sync(sync)
    if settings.ENTITIES_INDEX_SPLIT:
        delete_entity(obj.id, exclude=proxy.schema, sync=False)
    return index_safe(index, obj.id, data, refresh=refresh)
コード例 #9
0
def index_notification(event, actor_id, params, channels, sync=False):
    """Index a notification."""
    params = params or {}
    params = {n: get_entity_id(params.get(n)) for n in event.params.keys()}
    channels = list(set([c for c in channels if c is not None]))
    data = {
        'actor_id': actor_id,
        'params': params,
        'event': event.name,
        'channels': channels,
        'created_at': datetime.utcnow(),
    }
    index = notifications_index()
    id_ = hash_data((actor_id, event.name, channels, params))
    return index_safe(index, id_, data, refresh=refresh_sync(sync))
コード例 #10
0
ファイル: collections.py プロジェクト: we1l1n/aleph
def index_collection(collection, sync=False):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    log.info("Index [%s]: %s", collection.id, collection.label)
    data = get_collection(collection.id)
    text = [data.get('label')]
    text.append(normalize(data.get('label')))
    text.append(normalize(data.get('foreign_id')))
    text.append(normalize(data.get('summary')))
    data['text'] = text
    data.pop('id', None)
    return index_safe(collections_index(),
                      collection.id,
                      data,
                      refresh=refresh_sync(sync))
コード例 #11
0
ファイル: notifications.py プロジェクト: sunu/aleph
def index_notification(event, actor_id, params, channels, sync=False):
    """Index a notification."""
    params = params or {}
    data = {}
    for param, value in params.items():
        value = get_entity_id(value)
        if value is not None:
            data[param] = str(value)
    channels = list(set([c for c in channels if c is not None]))
    data = {
        "actor_id": actor_id,
        "params": data,
        "event": event.name,
        "channels": channels,
        "created_at": datetime.utcnow(),
    }
    index = notifications_index()
    id_ = hash_data((actor_id, event.name, channels, params))
    return index_safe(index, id_, data, sync=sync)
コード例 #12
0
ファイル: collections.py プロジェクト: jbaehne/aleph
def index_collection(collection, sync=False):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    data = {
        'foreign_id': collection.foreign_id,
        'created_at': collection.created_at,
        'updated_at': collection.updated_at,
        'label': collection.label,
        'kind': collection.kind,
        'summary': collection.summary,
        'category': Collection.DEFAULT,
        'publisher': collection.publisher,
        'publisher_url': collection.publisher_url,
        'info_url': collection.info_url,
        'data_url': collection.data_url,
        'casefile': collection.casefile,
        'secret': collection.secret,
        'collection_id': collection.id,
        'schemata': {},
        'team': []
    }
    texts = [v for v in data.values() if isinstance(v, str)]

    if collection.category in Collection.CATEGORIES:
        data['category'] = collection.category

    if collection.creator is not None:
        data['creator'] = {
            'id': collection.creator.id,
            'type': collection.creator.type,
            'name': collection.creator.name
        }
        texts.append(collection.creator.name)

    for role in collection.team:
        data['team'].append({
            'id': role.id,
            'type': role.type,
            'name': role.name
        })
        texts.append(role.name)

    stats = get_collection_stats(collection.id)
    data['count'] = stats['count']

    # expose entities by schema count.
    thing = model.get(Entity.THING)
    for schema, count in stats['schemata'].items():
        schema = model.get(schema)
        if schema is not None and schema.is_a(thing):
            data['schemata'][schema.name] = count

    # if no countries or langs are given, take the most common from the data.
    countries = ensure_list(collection.countries)
    countries = countries or stats['countries'].keys()
    data['countries'] = registry.country.normalize_set(countries)

    languages = ensure_list(collection.languages)
    languages = languages or stats['languages'].keys()
    data['languages'] = registry.language.normalize_set(languages)

    texts.extend([normalize(t, ascii=True) for t in texts])
    data['text'] = index_form(texts)
    return index_safe(collections_index(), collection.id, data, refresh=sync)
コード例 #13
0
ファイル: collections.py プロジェクト: gavinrozzi/aleph
def index_collection(collection):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    data = {
        'foreign_id': collection.foreign_id,
        'created_at': collection.created_at,
        'updated_at': collection.updated_at,
        'label': collection.label,
        'kind': collection.kind,
        'summary': collection.summary,
        'category': Collection.DEFAULT,
        'publisher': collection.publisher,
        'publisher_url': collection.publisher_url,
        'info_url': collection.info_url,
        'data_url': collection.data_url,
        'casefile': collection.casefile,
        'roles': collection.roles,
        'schemata': {},
        'team': []
    }
    texts = [v for v in data.values() if isinstance(v, str)]

    if collection.category in Collection.CATEGORIES:
        data['category'] = collection.category

    if collection.creator is not None:
        data['creator'] = {
            'id': collection.creator.id,
            'type': collection.creator.type,
            'name': collection.creator.name
        }
        texts.append(collection.creator.name)

    for role in collection.team:
        data['team'].append({
            'id': role.id,
            'type': role.type,
            'name': role.name
        })
        texts.append(role.name)

    # Compute some statistics on the content of a collection.
    query = {
        'size': 0,
        'query': {
            'bool': {
                'filter': [{
                    'term': {
                        'collection_id': collection.id
                    }
                }, {
                    'term': {
                        'schemata': Entity.THING
                    }
                }]
            }
        },
        'aggs': {
            'schema': {
                'terms': {
                    'field': 'schema',
                    'size': 1000
                }
            },
            'countries': {
                'terms': {
                    'field': 'countries',
                    'size': 500
                }
            },
            'languages': {
                'terms': {
                    'field': 'languages',
                    'size': 100
                }
            },
        }
    }
    result = search_safe(index=entities_index(), body=query)
    aggregations = result.get('aggregations')
    data['count'] = result['hits']['total']

    # expose entities by schema count.
    for schema in aggregations['schema']['buckets']:
        data['schemata'][schema['key']] = schema['doc_count']

    # if no countries or langs are given, take the most common from the data.
    countries = collection.countries
    if countries is None or not len(countries):
        countries = aggregations['countries']['buckets']
        countries = [c['key'] for c in countries]
    data['countries'] = exactitude.countries.normalize_set(countries)

    languages = collection.languages
    if languages is None or not len(languages):
        languages = aggregations['languages']['buckets']
        languages = [c['key'] for c in languages]
    data['languages'] = exactitude.languages.normalize_set(languages)

    texts.extend([normalize(t, ascii=True) for t in texts])
    data['text'] = index_form(texts)
    data = index_safe(collections_index(), collection.id, data)
    refresh_index(index=collections_index())
    return data