Esempio n. 1
0
def ensure_collection(foreign_id, label):
    authz = Authz.from_role(Role.load_cli_user())
    config = {
        'foreign_id': foreign_id,
        'label': label,
    }
    create_collection(config, authz)
    return Collection.by_foreign_id(foreign_id)
Esempio n. 2
0
def bulkload(file_name):
    """Load entities from the specified mapping file."""
    log.info("Loading bulk data from: %s", file_name)
    config = load_mapping_file(file_name)
    for foreign_id, data in config.items():
        data['foreign_id'] = foreign_id
        data['label'] = data.get('label', foreign_id)
        create_collection(data)
        collection = Collection.by_foreign_id(foreign_id)
        queue_task(collection, OP_BULKLOAD, payload=data)
Esempio n. 3
0
def crawldir(path, language=None, foreign_id=None):
    """Crawl the given directory."""
    path = Path(path)
    if foreign_id is None:
        foreign_id = 'directory:%s' % slugify(path)
    create_collection({'foreign_id': foreign_id, 'label': path.name})
    collection = Collection.by_foreign_id(foreign_id)
    log.info('Crawling %s to %s (%s)...', path, foreign_id, collection.id)
    crawl_directory(collection, path)
    log.info('Complete. Make sure a worker is running :)')
Esempio n. 4
0
File: manage.py Progetto: wdsn/aleph
def crawldir(path, language=None, foreign_id=None):
    """Crawl the given directory."""
    path = Path(path)
    if foreign_id is None:
        foreign_id = 'directory:%s' % slugify(path)
    authz = Authz.from_role(Role.load_cli_user())
    config = {'foreign_id': foreign_id, 'label': path.name, 'casefile': False}
    create_collection(config, authz)
    collection = Collection.by_foreign_id(foreign_id)
    log.info('Crawling %s to %s (%s)...', path, foreign_id, collection.id)
    crawl_directory(collection, path)
    log.info('Complete. Make sure a worker is running :)')
Esempio n. 5
0
def create():
    require(request.authz.logged_in)
    data = parse_request(CollectionSchema)
    role = Role.by_id(request.authz.id)
    sync = get_flag('sync')
    collection = create_collection(data, role=role, sync=sync)
    return serialize_data(collection, CollectionSchema)
Esempio n. 6
0
def create():
    """
    ---
    post:
      summary: Create a collection
      description: Create a collection with the given metadata
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CollectionCreate'
      tags:
        - Collection
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Collection'
    """
    require(request.authz.logged_in)
    data = parse_request("CollectionCreate")
    sync = get_flag("sync", True)
    collection = create_collection(data, request.authz, sync=sync)
    return view(collection.get("id"))
Esempio n. 7
0
def create():
    require(request.authz.logged_in)
    data = parse_request(CollectionSchema)
    role = Role.by_id(request.authz.id)
    collection = create_collection(data, role=role)
    refresh_index(collections_index())
    return view(collection.id)
Esempio n. 8
0
def create():
    """
    ---
    post:
      summary: Create a collection
      description: Create a collection with the given metadata
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/CollectionCreate'
      tags:
        - Collection
      responses:
        '200':
          description: OK
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/Collection'
    """
    require(request.authz.logged_in)
    data = parse_request('CollectionCreate')
    sync = get_flag('sync')
    collection = create_collection(data, request.authz, sync=sync)
    return CollectionSerializer.jsonify(collection)
Esempio n. 9
0
def create():
    require(request.authz.logged_in)
    data = parse_request(CollectionCreateSchema)
    role = Role.by_id(request.authz.id)
    sync = get_flag('sync')
    collection = create_collection(data, role=role, sync=sync)
    return CollectionSerializer.jsonify(collection)
Esempio n. 10
0
def bulk_load(config):
    """Bulk load entities from a CSV file or SQL database.

    This is done by mapping the rows in the source data to entities and links
    which can be understood by the entity index.
    """
    from aleph.logic.collections import create_collection
    for foreign_id, data in config.items():
        data['foreign_id'] = foreign_id
        data['label'] = data.get('label', foreign_id)
        collection = create_collection(data)
        for query in dict_list(data, 'queries', 'query'):
            bulk_load_query.apply_async([collection.id, query], priority=6)
Esempio n. 11
0
File: bulk.py Progetto: pudo/aleph
def bulk_load(config):
    """Bulk load entities from a CSV file or SQL database.

    This is done by mapping the rows in the source data to entities and links
    which can be understood by the entity index.
    """
    from aleph.logic.collections import create_collection
    for foreign_id, data in config.items():
        data['foreign_id'] = foreign_id
        data['label'] = data.get('label', foreign_id)
        collection = create_collection(data)
        collection_id = collection.get('id')
        # FIXME: this does not perform collection metadata validation.
        for query in keys_values(data, 'queries', 'query'):
            bulk_load_query.apply_async([collection_id, query], priority=6)
Esempio n. 12
0
def crawldir(path, language=None, foreign_id=None):
    """Crawl the given directory."""
    path = decode_path(os.path.abspath(os.path.normpath(path)))
    if path is None or not os.path.exists(path):
        log.error("Invalid path: %r", path)
        return
    path_name = os.path.basename(path)

    if foreign_id is None:
        foreign_id = 'directory:%s' % slugify(path)

    collection = create_collection(foreign_id, {
        'label': path_name,
        'languages': language
    })
    log.info('Crawling %s to %s...', path, foreign_id)
    document = Document.by_keys(collection=collection, foreign_id=path)
    document.file_name = path_name
    db.session.commit()
    ingest_document(document, path)
Esempio n. 13
0
def create():
    require(request.authz.logged_in)
    data = parse_request(CollectionCreateSchema)
    sync = get_flag('sync')
    collection = create_collection(data, request.authz, sync=sync)
    return CollectionSerializer.jsonify(collection)