コード例 #1
0
def create():
    require(request.authz.logged_in)
    data = parse_request(CollectionSchema)
    role = Role.by_id(request.authz.id)
    collection = create_collection(data, role=role)
    refresh_index(collections_index())
    return view(collection.id)
コード例 #2
0
def delete_collection(collection_id):
    """Delete all documents from a particular collection."""
    es.delete(index=collections_index(),
              doc_type='doc',
              refresh=True,
              id=collection_id,
              ignore=[404])
コード例 #3
0
def get_instance_stats(authz):
    # Compute entity stats:
    query = {
        'size': 0,
        'query': {
            'bool': {
                'filter':
                [authz_query(authz), {
                    'term': {
                        'schemata': Entity.THING
                    }
                }]
            }
        },
        'aggs': {
            'schema': {
                'terms': {
                    'field': 'schema',
                    'size': 1000
                }
            }
        }
    }
    result = es.search(index=entities_index(), body=query)
    aggregations = result.get('aggregations')
    data = {'count': result.get('hits').get('total'), 'schemata': {}}
    for schema in aggregations.get('schema').get('buckets'):
        key = schema.get('key')
        data['schemata'][key] = schema.get('doc_count')

    # Compute collection stats (should we return categories?)
    query = {'size': 0, 'query': {'bool': {'filter': [authz_query(authz)]}}}
    result = es.search(index=collections_index(), body=query)
    data['collections'] = result.get('hits').get('total')
    return data
コード例 #4
0
def delete_collection(collection_id, sync=False):
    """Delete all documents from a particular collection."""
    es.delete(collections_index(),
              doc_type='doc',
              id=str(collection_id),
              refresh=sync,
              ignore=[404])
コード例 #5
0
def delete_collection(collection_id, wait=True):
    """Delete all documents from a particular collection."""
    delete_entities(collection_id, wait=wait)
    delete_documents(collection_id, wait=wait)
    es.delete(index=collections_index(),
              doc_type='doc',
              id=collection_id,
              ignore=[404])
コード例 #6
0
ファイル: collections.py プロジェクト: gavinrozzi/aleph
def get_collection(collection_id):
    """Fetch a collection from the index."""
    result = es.get(index=collections_index(),
                    doc_type='doc',
                    id=collection_id,
                    ignore=[404],
                    _source_exclude=['text'])
    return unpack_result(result)
コード例 #7
0
ファイル: collections.py プロジェクト: GelLiNN/aleph
def delete_collection(collection_id, wait=True):
    """Delete all documents from a particular collection."""
    query = {'term': {'collection_id': collection_id}}
    query_delete(records_index(), query, wait=wait)
    query_delete(entities_index(), query, wait=wait)
    es.delete(index=collections_index(),
              doc_type=collection_type(),
              id=collection_id,
              ignore=[404])
コード例 #8
0
ファイル: mapping.py プロジェクト: nt0z/aleph
def configure_collections():
    mapping = {
        "dynamic_templates": [
            {
                "fields": {
                    "match": "schemata.*",
                    "mapping": {"type": "long"}
                }
            }
        ],
        "properties": {
            "label": {
                "type": "text",
                "analyzer": "icu_latin",
                "fields": {"kw": KEYWORD}
            },
            "collection_id": KEYWORD,
            "foreign_id": KEYWORD,
            "languages": KEYWORD,
            "countries": KEYWORD,
            "category": KEYWORD,
            "summary": RAW_TEXT,
            "publisher": KEYWORD,
            "publisher_url": KEYWORD,
            "data_url": KEYWORD,
            "info_url": KEYWORD,
            "kind": KEYWORD,
            "text": LATIN_TEXT,
            "casefile": {"type": "boolean"},
            "secret": {"type": "boolean"},
            "created_at": {"type": "date"},
            "updated_at": {"type": "date"},
            "count": {"type": "long"},
            "schemata": {"type": "object"},
            "creator": {
                "type": "object",
                "properties": {
                    "id": KEYWORD,
                    "type": KEYWORD,
                    "name": {
                        "type": "text",
                        "fields": {"kw": KEYWORD}
                    }
                }
            },
            "team": {
                "type": "object",
                "properties": {
                    "id": KEYWORD,
                    "type": KEYWORD,
                    "name": KEYWORD
                }
            },
        }
    }
    configure_index(collections_index(), mapping, index_settings())
コード例 #9
0
ファイル: expand.py プロジェクト: renesugar/aleph
    def _resolve_index(self, cache):
        queries = OrderedDict()
        for (type_, id_) in cache.keys():
            if type_ in [Collection]:
                index = collections_index()
                queries[(type_, id_)] = {'_index': index, '_id': id_}
            elif type_ in [Document, Entity]:
                index = entities_index()
                queries[(type_, id_)] = {'_index': index, '_id': id_}

        if not len(queries):
            return

        results = es.mget(body={'docs': queries.values()},
                          _source_exclude=['text'])
        for key, doc in zip(queries.keys(), results['docs']):
            cache[key] = unpack_result(doc)
コード例 #10
0
ファイル: admin.py プロジェクト: atom-cmd/eskom-enquiry
def upgrade_search():
    """Add any missing properties to the index mappings."""
    INDEXES = [
        (collections_index(), COLLECTION_MAPPING),
        (entity_index(), ENTITY_MAPPING),
        (record_index(), RECORD_MAPPING),
    ]
    for (index, mapping) in INDEXES:
        log.info("Creating index: %s", index)
        settings = deepcopy(INDEX_SETTINGS)
        if index == record_index():
            # optimise records for bulk write
            settings['index']['refresh_interval'] = '-1'
        es.indices.create(index, body=settings, ignore=[404, 400])
        es.indices.put_mapping(index=index, doc_type='doc', body=mapping)
        es.indices.open(index=index, ignore=[400, 404])
        es.indices.refresh(index=index, ignore=[400, 404])
        es.indices.clear_cache(index=index, ignore=[400, 404])
コード例 #11
0
    def _resolve_index(self, cache):
        queries = []
        for (type_, id_) in cache.keys():
            if type_ in [Collection]:
                index = collections_index()
                query = {'_index': index, '_id': id_}
                queries.append(((type_, id_), query))
            elif type_ in [Document, Entity]:
                for index in entities_index_list():
                    query = {'_index': index, '_id': id_}
                    queries.append(((type_, id_), query))

        if not len(queries):
            return

        results = es.mget(body={'docs': [q[1] for q in queries]},
                          _source_exclude=['text'])
        for (key, _), doc in zip(queries, results['docs']):
            if cache.get(key) is None:
                cache[key] = unpack_result(doc)
コード例 #12
0
def get_instance_stats(authz):
    # Compute entity stats:
    query = {
        'size': 0,
        'query': {
            'bool': {
                'filter': [
                    authz_query(authz),
                    # {'term': {'schemata': Entity.THING}}
                ]
            }
        }
    }
    entities = es.search(index=entities_index(), body=query)

    # Compute collection stats (should we return categories?)
    query = {'size': 0, 'query': {'bool': {'filter': [authz_query(authz)]}}}
    collections = es.search(index=collections_index(), body=query)
    return {
        'entities': entities.get('hits').get('total'),
        'collections': collections.get('hits').get('total')
    }
コード例 #13
0
ファイル: __init__.py プロジェクト: mustafaascha/aleph
 def get_index(self):
     return collections_index()
コード例 #14
0
ファイル: collections.py プロジェクト: gavinrozzi/aleph
def index_collection(collection):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    data = {
        'foreign_id': collection.foreign_id,
        'created_at': collection.created_at,
        'updated_at': collection.updated_at,
        'label': collection.label,
        'kind': collection.kind,
        'summary': collection.summary,
        'category': Collection.DEFAULT,
        'publisher': collection.publisher,
        'publisher_url': collection.publisher_url,
        'info_url': collection.info_url,
        'data_url': collection.data_url,
        'casefile': collection.casefile,
        'roles': collection.roles,
        'schemata': {},
        'team': []
    }
    texts = [v for v in data.values() if isinstance(v, str)]

    if collection.category in Collection.CATEGORIES:
        data['category'] = collection.category

    if collection.creator is not None:
        data['creator'] = {
            'id': collection.creator.id,
            'type': collection.creator.type,
            'name': collection.creator.name
        }
        texts.append(collection.creator.name)

    for role in collection.team:
        data['team'].append({
            'id': role.id,
            'type': role.type,
            'name': role.name
        })
        texts.append(role.name)

    # Compute some statistics on the content of a collection.
    query = {
        'size': 0,
        'query': {
            'bool': {
                'filter': [{
                    'term': {
                        'collection_id': collection.id
                    }
                }, {
                    'term': {
                        'schemata': Entity.THING
                    }
                }]
            }
        },
        'aggs': {
            'schema': {
                'terms': {
                    'field': 'schema',
                    'size': 1000
                }
            },
            'countries': {
                'terms': {
                    'field': 'countries',
                    'size': 500
                }
            },
            'languages': {
                'terms': {
                    'field': 'languages',
                    'size': 100
                }
            },
        }
    }
    result = search_safe(index=entities_index(), body=query)
    aggregations = result.get('aggregations')
    data['count'] = result['hits']['total']

    # expose entities by schema count.
    for schema in aggregations['schema']['buckets']:
        data['schemata'][schema['key']] = schema['doc_count']

    # if no countries or langs are given, take the most common from the data.
    countries = collection.countries
    if countries is None or not len(countries):
        countries = aggregations['countries']['buckets']
        countries = [c['key'] for c in countries]
    data['countries'] = exactitude.countries.normalize_set(countries)

    languages = collection.languages
    if languages is None or not len(languages):
        languages = aggregations['languages']['buckets']
        languages = [c['key'] for c in languages]
    data['languages'] = exactitude.languages.normalize_set(languages)

    texts.extend([normalize(t, ascii=True) for t in texts])
    data['text'] = index_form(texts)
    data = index_safe(collections_index(), collection.id, data)
    refresh_index(index=collections_index())
    return data
コード例 #15
0
ファイル: collections.py プロジェクト: gavinrozzi/aleph
def delete_collection(collection_id):
    """Delete all documents from a particular collection."""
    q = {'ids': {'values': str(collection_id)}}
    query_delete(collections_index(), q)
    refresh_index(index=collections_index())
コード例 #16
0
def delete(id):
    collection = get_db_collection(id, request.authz.WRITE)
    delete_collection(collection)
    refresh_index(collections_index())
    return ('', 204)
コード例 #17
0
ファイル: expand.py プロジェクト: public-people/aleph
 def _type_dispatch(self, type_):
     if type_ in [Collection]:
         return collections_index()
     if type_ in [Document, Entity]:
         return entities_index()
     return type_
コード例 #18
0
ファイル: triples.py プロジェクト: Ro9ueAdmin/aleph
def query_collections():
    q = {'query': {'match_all': {}}, 'size': 9999}
    res = es.search(index=collections_index(), body=q)
    return res
コード例 #19
0
def index_collection(collection, sync=False):
    """Index a collection."""
    if collection.deleted_at is not None:
        return delete_collection(collection.id)

    data = {
        'foreign_id': collection.foreign_id,
        'created_at': collection.created_at,
        'updated_at': collection.updated_at,
        'label': collection.label,
        'kind': collection.kind,
        'summary': collection.summary,
        'category': Collection.DEFAULT,
        'publisher': collection.publisher,
        'publisher_url': collection.publisher_url,
        'info_url': collection.info_url,
        'data_url': collection.data_url,
        'casefile': collection.casefile,
        'secret': collection.secret,
        'collection_id': collection.id,
        'schemata': {},
        'team': []
    }
    texts = [v for v in data.values() if isinstance(v, str)]

    if collection.category in Collection.CATEGORIES:
        data['category'] = collection.category

    if collection.creator is not None:
        data['creator'] = {
            'id': collection.creator.id,
            'type': collection.creator.type,
            'name': collection.creator.name
        }
        texts.append(collection.creator.name)

    for role in collection.team:
        data['team'].append({
            'id': role.id,
            'type': role.type,
            'name': role.name
        })
        texts.append(role.name)

    stats = get_collection_stats(collection.id)
    data['count'] = stats['count']

    # expose entities by schema count.
    thing = model.get(Entity.THING)
    for schema, count in stats['schemata'].items():
        schema = model.get(schema)
        if schema is not None and schema.is_a(thing):
            data['schemata'][schema.name] = count

    # if no countries or langs are given, take the most common from the data.
    countries = ensure_list(collection.countries)
    countries = countries or stats['countries'].keys()
    data['countries'] = registry.country.normalize_set(countries)

    languages = ensure_list(collection.languages)
    languages = languages or stats['languages'].keys()
    data['languages'] = registry.language.normalize_set(languages)

    texts.extend([normalize(t, ascii=True) for t in texts])
    data['text'] = index_form(texts)
    return index_safe(collections_index(), collection.id, data, refresh=sync)