def ensure_collection(foreign_id, label): authz = Authz.from_role(Role.load_cli_user()) config = { 'foreign_id': foreign_id, 'label': label, } create_collection(config, authz) return Collection.by_foreign_id(foreign_id)
def bulkload(file_name): """Load entities from the specified mapping file.""" log.info("Loading bulk data from: %s", file_name) config = load_mapping_file(file_name) for foreign_id, data in config.items(): data['foreign_id'] = foreign_id data['label'] = data.get('label', foreign_id) create_collection(data) collection = Collection.by_foreign_id(foreign_id) queue_task(collection, OP_BULKLOAD, payload=data)
def crawldir(path, language=None, foreign_id=None): """Crawl the given directory.""" path = Path(path) if foreign_id is None: foreign_id = 'directory:%s' % slugify(path) create_collection({'foreign_id': foreign_id, 'label': path.name}) collection = Collection.by_foreign_id(foreign_id) log.info('Crawling %s to %s (%s)...', path, foreign_id, collection.id) crawl_directory(collection, path) log.info('Complete. Make sure a worker is running :)')
def crawldir(path, language=None, foreign_id=None): """Crawl the given directory.""" path = Path(path) if foreign_id is None: foreign_id = 'directory:%s' % slugify(path) authz = Authz.from_role(Role.load_cli_user()) config = {'foreign_id': foreign_id, 'label': path.name, 'casefile': False} create_collection(config, authz) collection = Collection.by_foreign_id(foreign_id) log.info('Crawling %s to %s (%s)...', path, foreign_id, collection.id) crawl_directory(collection, path) log.info('Complete. Make sure a worker is running :)')
def create(): require(request.authz.logged_in) data = parse_request(CollectionSchema) role = Role.by_id(request.authz.id) sync = get_flag('sync') collection = create_collection(data, role=role, sync=sync) return serialize_data(collection, CollectionSchema)
def create(): """ --- post: summary: Create a collection description: Create a collection with the given metadata requestBody: content: application/json: schema: $ref: '#/components/schemas/CollectionCreate' tags: - Collection responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Collection' """ require(request.authz.logged_in) data = parse_request("CollectionCreate") sync = get_flag("sync", True) collection = create_collection(data, request.authz, sync=sync) return view(collection.get("id"))
def create(): require(request.authz.logged_in) data = parse_request(CollectionSchema) role = Role.by_id(request.authz.id) collection = create_collection(data, role=role) refresh_index(collections_index()) return view(collection.id)
def create(): """ --- post: summary: Create a collection description: Create a collection with the given metadata requestBody: content: application/json: schema: $ref: '#/components/schemas/CollectionCreate' tags: - Collection responses: '200': description: OK content: application/json: schema: $ref: '#/components/schemas/Collection' """ require(request.authz.logged_in) data = parse_request('CollectionCreate') sync = get_flag('sync') collection = create_collection(data, request.authz, sync=sync) return CollectionSerializer.jsonify(collection)
def create(): require(request.authz.logged_in) data = parse_request(CollectionCreateSchema) role = Role.by_id(request.authz.id) sync = get_flag('sync') collection = create_collection(data, role=role, sync=sync) return CollectionSerializer.jsonify(collection)
def bulk_load(config): """Bulk load entities from a CSV file or SQL database. This is done by mapping the rows in the source data to entities and links which can be understood by the entity index. """ from aleph.logic.collections import create_collection for foreign_id, data in config.items(): data['foreign_id'] = foreign_id data['label'] = data.get('label', foreign_id) collection = create_collection(data) for query in dict_list(data, 'queries', 'query'): bulk_load_query.apply_async([collection.id, query], priority=6)
def bulk_load(config): """Bulk load entities from a CSV file or SQL database. This is done by mapping the rows in the source data to entities and links which can be understood by the entity index. """ from aleph.logic.collections import create_collection for foreign_id, data in config.items(): data['foreign_id'] = foreign_id data['label'] = data.get('label', foreign_id) collection = create_collection(data) collection_id = collection.get('id') # FIXME: this does not perform collection metadata validation. for query in keys_values(data, 'queries', 'query'): bulk_load_query.apply_async([collection_id, query], priority=6)
def crawldir(path, language=None, foreign_id=None): """Crawl the given directory.""" path = decode_path(os.path.abspath(os.path.normpath(path))) if path is None or not os.path.exists(path): log.error("Invalid path: %r", path) return path_name = os.path.basename(path) if foreign_id is None: foreign_id = 'directory:%s' % slugify(path) collection = create_collection(foreign_id, { 'label': path_name, 'languages': language }) log.info('Crawling %s to %s...', path, foreign_id) document = Document.by_keys(collection=collection, foreign_id=path) document.file_name = path_name db.session.commit() ingest_document(document, path)
def create(): require(request.authz.logged_in) data = parse_request(CollectionCreateSchema) sync = get_flag('sync') collection = create_collection(data, request.authz, sync=sync) return CollectionSerializer.jsonify(collection)