Example #1
0
def download_db(url: str = CONCEPTNET_DB_URL,
                db_path: PathOrStr = CONCEPTNET_DB_NAME,
                delete_compressed_db: bool = True) -> None:
    """Download compressed ConceptNet dump and extract it.

    Args:
        url: Link to compressed ConceptNet database.
        db_path: Path to resulting database.
        delete_compressed_db: Delete compressed database after extraction.
    """

    print("Download compressed database")
    db_path = Path(db_path).expanduser().resolve()
    if db_path.is_dir():
        db_path = _generate_db_path(db_path)
        if db_path.is_file():
            raise FileExistsError(17, "File already exists", str(db_path))
    compressed_db_path = _get_download_destination_path(db_path.parent, url)
    if compressed_db_path.is_file():
        raise FileExistsError(17, "File already exists",
                              str(compressed_db_path))
    downloader = SmartDL(url, str(compressed_db_path))
    downloader.start()
    try:
        with zipfile.ZipFile(str(compressed_db_path), 'r') as zip_f:
            print("Extract compressed database (this can take a few minutes)")
            zip_f.extractall(db_path.parent)
        if db_path.name != CONCEPTNET_DB_NAME:
            Path(db_path.parent / CONCEPTNET_DB_NAME).rename(db_path)
    finally:
        if delete_compressed_db and compressed_db_path.is_file():
            compressed_db_path.unlink()
Example #2
0
def connect(
    db_path: PathOrStr = CONCEPTNET_DB_NAME,
    db_download_url: Optional[str] = CONCEPTNET_DB_URL,
    delete_compressed_db: bool = True,
    dump_download_url: str = CONCEPTNET_DUMP_DOWNLOAD_URL,
    load_dump_edge_count: int = CONCEPTNET_EDGE_COUNT,
    delete_compressed_dump: bool = True,
    delete_dump: bool = True,
) -> None:
    """Connect to ConceptNet database.

    This function connects to ConceptNet database. If it does not exists, there are two options: to download ready
    database or to download the compressed ConceptNet dump, extract it, and load it
    into database (pass `db_download_url=None` for this option).

    Args:
        db_path: Path to the database.
        db_download_url: Link to compressed ConceptNet database. Pass `None` to build the db from dump.
        delete_compressed_db: Delete compressed database after extraction.
        dump_download_url: Link to compressed ConceptNet dump.
        load_dump_edge_count: Number of edges to load from the beginning of the dump file. Can be useful for testing.
        delete_compressed_dump: Delete compressed dump after unpacking.
        delete_dump: Delete dump after loading into database.
    """
    db_path = Path(db_path).expanduser().resolve()
    if db_path.is_dir():
        db_path = _generate_db_path(db_path)
    try:
        if db_path.is_file():
            _open_db(path=db_path)
        else:
            raise FileNotFoundError(2, "No such file", str(db_path))
    except FileNotFoundError:
        print(f"File not found: {db_path}")
        if db_download_url is not None:
            download_db(
                url=db_download_url,
                db_path=db_path,
                delete_compressed_db=delete_compressed_db,
            )
            _open_db(db_path)
        else:
            prepare_db(
                db_path=db_path,
                dump_download_url=dump_download_url,
                load_dump_edge_count=load_dump_edge_count,
                delete_compressed_dump=delete_compressed_dump,
                delete_dump=delete_dump,
            )
Example #3
0
def prepare_db(
    db_path: PathOrStr,
    dump_download_url: str = CONCEPTNET_DUMP_DOWNLOAD_URL,
    load_dump_edge_count: int = CONCEPTNET_EDGE_COUNT,
    delete_compressed_dump: bool = True,
    delete_dump: bool = True,
):
    """Prepare ConceptNet database.

    This function downloads the compressed ConceptNet dump, extracts it, and loads it into database. First two steps
    are optional, and are executed only if needed.

    Args:
        db_path: Path to the resulting database.
        dump_download_url: Link to compressed ConceptNet dump.
        load_dump_edge_count: Number of edges to load from the beginning of the dump file. Can be useful for testing.
        delete_compressed_dump: Delete compressed dump after extraction.
        delete_dump: Delete dump after loading into database.
    """

    db_path = Path(db_path).expanduser().resolve()
    if db_path.is_dir():
        db_path = _generate_db_path(db_path)
        if db_path.is_file():
            raise FileExistsError(
                17, "File already exists and it is not a valid database",
                str(db_path))

    print("Prepare database")
    compressed_dump_path = _get_download_destination_path(
        db_path.parent, CONCEPTNET_DUMP_DOWNLOAD_URL)
    dump_path = compressed_dump_path.with_suffix('')

    db_path.parent.mkdir(parents=True, exist_ok=True)

    load_dump_to_db_ = partial(
        load_dump_to_db,
        dump_path=dump_path,
        db_path=db_path,
        edge_count=load_dump_edge_count,
        delete_dump=delete_dump,
    )
    extract_compressed_dump_ = partial(
        extract_compressed_dump,
        compressed_dump_path=compressed_dump_path,
        delete_compressed_dump=delete_compressed_dump,
    )
    download_dump_ = partial(
        download_dump,
        url=dump_download_url,
        out_dir_path=db_path.parent,
    )

    try:
        load_dump_to_db_()
    except FileNotFoundError:
        try:
            extract_compressed_dump_()
            load_dump_to_db_()
        except FileNotFoundError:
            download_dump_()
            extract_compressed_dump_()
            load_dump_to_db_()
    finally:
        if delete_compressed_dump and compressed_dump_path.is_file():
            compressed_dump_path.unlink()
        if delete_dump and dump_path.is_file():
            dump_path.unlink()