def download_db(url: str = CONCEPTNET_DB_URL, db_path: PathOrStr = CONCEPTNET_DB_NAME, delete_compressed_db: bool = True) -> None: """Download compressed ConceptNet dump and extract it. Args: url: Link to compressed ConceptNet database. db_path: Path to resulting database. delete_compressed_db: Delete compressed database after extraction. """ print("Download compressed database") db_path = Path(db_path).expanduser().resolve() if db_path.is_dir(): db_path = _generate_db_path(db_path) if db_path.is_file(): raise FileExistsError(17, "File already exists", str(db_path)) compressed_db_path = _get_download_destination_path(db_path.parent, url) if compressed_db_path.is_file(): raise FileExistsError(17, "File already exists", str(compressed_db_path)) downloader = SmartDL(url, str(compressed_db_path)) downloader.start() try: with zipfile.ZipFile(str(compressed_db_path), 'r') as zip_f: print("Extract compressed database (this can take a few minutes)") zip_f.extractall(db_path.parent) if db_path.name != CONCEPTNET_DB_NAME: Path(db_path.parent / CONCEPTNET_DB_NAME).rename(db_path) finally: if delete_compressed_db and compressed_db_path.is_file(): compressed_db_path.unlink()
def connect( db_path: PathOrStr = CONCEPTNET_DB_NAME, db_download_url: Optional[str] = CONCEPTNET_DB_URL, delete_compressed_db: bool = True, dump_download_url: str = CONCEPTNET_DUMP_DOWNLOAD_URL, load_dump_edge_count: int = CONCEPTNET_EDGE_COUNT, delete_compressed_dump: bool = True, delete_dump: bool = True, ) -> None: """Connect to ConceptNet database. This function connects to ConceptNet database. If it does not exists, there are two options: to download ready database or to download the compressed ConceptNet dump, extract it, and load it into database (pass `db_download_url=None` for this option). Args: db_path: Path to the database. db_download_url: Link to compressed ConceptNet database. Pass `None` to build the db from dump. delete_compressed_db: Delete compressed database after extraction. dump_download_url: Link to compressed ConceptNet dump. load_dump_edge_count: Number of edges to load from the beginning of the dump file. Can be useful for testing. delete_compressed_dump: Delete compressed dump after unpacking. delete_dump: Delete dump after loading into database. """ db_path = Path(db_path).expanduser().resolve() if db_path.is_dir(): db_path = _generate_db_path(db_path) try: if db_path.is_file(): _open_db(path=db_path) else: raise FileNotFoundError(2, "No such file", str(db_path)) except FileNotFoundError: print(f"File not found: {db_path}") if db_download_url is not None: download_db( url=db_download_url, db_path=db_path, delete_compressed_db=delete_compressed_db, ) _open_db(db_path) else: prepare_db( db_path=db_path, dump_download_url=dump_download_url, load_dump_edge_count=load_dump_edge_count, delete_compressed_dump=delete_compressed_dump, delete_dump=delete_dump, )
def prepare_db( db_path: PathOrStr, dump_download_url: str = CONCEPTNET_DUMP_DOWNLOAD_URL, load_dump_edge_count: int = CONCEPTNET_EDGE_COUNT, delete_compressed_dump: bool = True, delete_dump: bool = True, ): """Prepare ConceptNet database. This function downloads the compressed ConceptNet dump, extracts it, and loads it into database. First two steps are optional, and are executed only if needed. Args: db_path: Path to the resulting database. dump_download_url: Link to compressed ConceptNet dump. load_dump_edge_count: Number of edges to load from the beginning of the dump file. Can be useful for testing. delete_compressed_dump: Delete compressed dump after extraction. delete_dump: Delete dump after loading into database. """ db_path = Path(db_path).expanduser().resolve() if db_path.is_dir(): db_path = _generate_db_path(db_path) if db_path.is_file(): raise FileExistsError( 17, "File already exists and it is not a valid database", str(db_path)) print("Prepare database") compressed_dump_path = _get_download_destination_path( db_path.parent, CONCEPTNET_DUMP_DOWNLOAD_URL) dump_path = compressed_dump_path.with_suffix('') db_path.parent.mkdir(parents=True, exist_ok=True) load_dump_to_db_ = partial( load_dump_to_db, dump_path=dump_path, db_path=db_path, edge_count=load_dump_edge_count, delete_dump=delete_dump, ) extract_compressed_dump_ = partial( extract_compressed_dump, compressed_dump_path=compressed_dump_path, delete_compressed_dump=delete_compressed_dump, ) download_dump_ = partial( download_dump, url=dump_download_url, out_dir_path=db_path.parent, ) try: load_dump_to_db_() except FileNotFoundError: try: extract_compressed_dump_() load_dump_to_db_() except FileNotFoundError: download_dump_() extract_compressed_dump_() load_dump_to_db_() finally: if delete_compressed_dump and compressed_dump_path.is_file(): compressed_dump_path.unlink() if delete_dump and dump_path.is_file(): dump_path.unlink()