Python AlephAPI.load_collection_by_foreign_id Beispiele

Programmiersprache: Python

Namespace / Paketname: alephclient.api

Klasse / Typ: AlephAPI

Methode / Funktion: load_collection_by_foreign_id

Beispiele auf hotexamples.com: 5

Python AlephAPI.load_collection_by_foreign_id - 5 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die alephclient.api.AlephAPI.load_collection_by_foreign_id, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

AlephAPI(13)

load_collection_by_foreign_id(5)

_make_url(2)

get_collection_by_foreign_id(2)

ingest_upload(2)

_request(1)

filter_collections(1)

get_collection(1)

get_entity(1)

Beispiel #1

Datei anzeigen

Datei: crawldir.py Projekt: mynameisfiber/alephclient

def crawl_dir(api: AlephAPI, path: str, foreign_id: str, config: Dict):
    """Crawl a directory and upload its content to a collection

    params
    ------
    path: path of the directory
    foreign_id: foreign_id of the collection to use.
    language: language hint for the documents
    """
    _path = Path(path).resolve()
    collection = api.load_collection_by_foreign_id(foreign_id, config)
    collection_id = collection.get('id')
    _queue: Queue = Queue()
    _queue.put((_path, None, 1))
    threads = []
    for i in range(settings.THREADS):
        args = (_queue, api, collection_id, _path)
        thread = threading.Thread(target=_upload, args=args)
        thread.daemon = True
        thread.start()
        threads.append(thread)

    # block until all tasks are done
    _queue.join()
    for thread in threads:
        thread.join()

Beispiel #2

Datei anzeigen

Datei: crawldir.py Projekt: wayne9qiu/alephclient

def crawl_dir(api: AlephAPI,
              path: str,
              foreign_id: str,
              config: Dict,
              index: bool = True):
    """Crawl a directory and upload its content to a collection

    params
    ------
    path: path of the directory
    foreign_id: foreign_id of the collection to use.
    language: language hint for the documents
    """
    root = Path(path).resolve()
    collection = api.load_collection_by_foreign_id(foreign_id, config)
    crawler = CrawlDirectory(api, collection, root, index=index)
    threads = []
    for i in range(settings.THREADS):
        thread = threading.Thread(target=crawler.execute)
        thread.daemon = True
        thread.start()
        threads.append(thread)

    # block until all tasks are done
    crawler.queue.join()
    for thread in threads:
        thread.join()

Beispiel #3

Datei anzeigen

def bulk_load(api: AlephAPI, mapping_file: str):
    data = load_config_file(mapping_file)
    if not isinstance(data, dict):
        raise AlephException('mapping_file has to be a json dictionary')
    for foreign_id, config in data.items():
        collection = api.load_collection_by_foreign_id(foreign_id, config)
        collection_id = collection['id']
        log.info(f"Bulk mapping collection ID: {collection_id}")
        api.map_collection(collection_id, data)

Beispiel #4

Datei anzeigen

def crawl_dir(
    api: AlephAPI,
    path: str,
    foreign_id: str,
    config: Dict,
    index: bool = True,
    nojunk: bool = False,
    parallel: int = 1,
):
    """Crawl a directory and upload its content to a collection

    params
    ------
    path: path of the directory
    foreign_id: foreign_id of the collection to use.
    language: language hint for the documents
    """
    root = Path(path).resolve()
    collection = api.load_collection_by_foreign_id(foreign_id, config)
    crawler = CrawlDirectory(api, collection, root, index=index, nojunk=nojunk)
    consumers = []

    # Use one thread to produce using scandir and at least one to consume
    # files for upload.
    producer = threading.Thread(target=crawler.crawl, daemon=True)
    producer.start()
    for i in range(max(1, parallel)):
        consumer = threading.Thread(target=crawler.consume, daemon=True)
        consumer.start()
        consumers.append(consumer)

    # Block until the producer is done with queueing the tree.
    producer.join()

    # Block until the file upload queue is drained.
    crawler.queue.join()

    # Poison the queue to signal end to each consumer.
    for consumer in consumers:
        crawler.queue.put((None, None))

    # Block until all file upload queue consumers are done.
    for consumer in consumers:
        consumer.join()

Beispiel #5

Datei anzeigen

Datei: load.py Projekt: alephdata/example-personadeinteres

def load_entities(json_file, root_path):
    api = AlephAPI()
    collection = api.load_collection_by_foreign_id('zz_occrp_pdi')
    cid = collection.get('id')
    api.write_entities(cid, generate_entities(json_file, root_path, api, cid))