def download_from_nexus(uri, config_file_path, output_path, nexus_endpoint, nexus_bucket, unzip=False): forge = KnowledgeGraphForge( config_file_path, endpoint=nexus_endpoint, bucket=nexus_bucket) dataset = forge.retrieve(id=uri) filepath = os.path.join(output_path, dataset.distribution.name) print("Downloading the file to '{}'".format(filepath)) forge.download( dataset, path=output_path, overwrite=True, follow="distribution.contentUrl") if unzip: print(f"Decompressing ...") with zipfile.ZipFile(f"{filepath}", 'r') as zip_ref: zip_ref.extractall(output_path) return dataset
def _preprocess_data(data, data_type, auth=None): """Preprocess input data according to the specified type. Possoble data types are: - "raw" use data as is provided in the request - "json_pgframe" create a PandasPGFrame from the provided JSON repr - "nexus_dataset" download a JSON dataset from Nexus and create a PandasPGFrame from this representation # - collection of Nexus resources to build a PG from # - (then i guess we need a bucket/org/project/token) """ if data_type == "raw": # Use passed data as is return data elif data_type == "json_pgframe": return PandasPGFrame.from_json(data) elif data_type == "nexus_dataset": if auth is None: raise ValueError( "To use Nexus-hosted property graph as the dataset " "authentication token should be provided in the " "request header") forge = KnowledgeGraphForge(app.config["FORGE_CONFIG"], endpoint=data["endpoint"], bucket=data["bucket"], token=auth) resource = forge.retrieve(data["resource_id"]) forge.download(resource, "distribution.contentUrl", app.config["DOWNLOAD_DIR"]) downloaded_file = os.path.join(app.config["DOWNLOAD_DIR"], resource.distribution.name) graph = PandasPGFrame.load_json(downloaded_file) os.remove(downloaded_file) return graph else: raise ValueError("Unknown data type")