コード例 #1
0
 def download_raw_dataset(self):
     """Download the raw dataset files and store in the cache location."""
     with upload_output_directory(self.raw_dataset_path) as (tmpdir, _):
         for url in self.download_url:
             filename = url.split("/")[-1]
             fs, _ = get_fs_and_path(url)
             fs.get(url, os.path.join(tmpdir, filename), recursive=True)
コード例 #2
0
ファイル: summarize.py プロジェクト: ludwig-ai/ludwig
def download_artifacts(
        bench_config: Dict[str, Any], base_experiment: str,
        experimental_experiment: str,
        download_base_path: str) -> List[Union[Tuple[str, str], Any]]:
    """Download benchmarking artifacts for two experiments.

    bench_config: bench config file. Can be the same one that was used to run
        these experiments.
    base_experiment: name of the experiment we're comparing against.
    experimental_experiment: name of the experiment we're comparing.
    download_base_path: base path under which live the stored artifacts of
        the benchmarking experiments.
    """
    protocol, _ = fsspec.core.split_protocol(download_base_path)
    fs, _ = get_fs_and_path(download_base_path)
    local_dir = os.path.join(os.getcwd(), "visualize-temp")
    os.makedirs(local_dir, exist_ok=True)

    coroutines = []
    for experiment in bench_config["datasets"]:
        dataset_name = experiment["dataset_name"]
        for experiment_name in [base_experiment, experimental_experiment]:
            coroutines.append(
                download_one(fs, download_base_path, dataset_name,
                             experiment_name, local_dir))
    loop = asyncio.get_event_loop()
    futures = asyncio.gather(*coroutines, return_exceptions=True)
    downloaded_names = loop.run_until_complete(futures)
    loop.close()
    return downloaded_names
コード例 #3
0
def create_file(url):
    _, path = get_fs_and_path(url)
    logging.info(f"saving url '{url}' to path '{path}'")
    with tempfile.TemporaryDirectory() as tmpdir:
        file_path = os.path.join(tmpdir, path)
        os.makedirs(os.path.dirname(file_path))
        with open(file_path, "w"):
            return path
コード例 #4
0
ファイル: utils.py プロジェクト: ludwig-ai/ludwig
def export_artifacts(experiment: Dict[str, str], report_path: str,
                     experiment_output_directory: str,
                     export_base_path: str) -> None:
    """Save the experiment artifacts to the `bench_export_directory`.

    experiment: experiment dict that contains "dataset_name" (e.g. ames_housing),
        "experiment_name" (specified by user), and "config_path" (path to experiment config.
        Relative to ludwig/benchmarks/configs).
    report_path: path where the experiment metrics report is
        saved.
    experiment_output_directory: path where the model, data,
        and logs of the experiment are saved.
    export_base_path: remote or local path (directory) where artifacts are
        exported. (e.g. s3://benchmarking.us-west-2.ludwig.com/bench/ or your/local/bench/)
    """
    protocol, _ = fsspec.core.split_protocol(export_base_path)
    fs, _ = get_fs_and_path(export_base_path)
    try:
        export_full_path = os.path.join(export_base_path,
                                        experiment["dataset_name"],
                                        experiment["experiment_name"])
        fs.put(report_path,
               os.path.join(export_full_path, REPORT_JSON),
               recursive=True)
        fs.put(
            os.path.join("configs", experiment["config_path"]),
            os.path.join(export_full_path, CONFIG_YAML),
            recursive=True,
        )
        fs.put(
            os.path.join(experiment["dataset_name"], EXPERIMENT_RUN, "model",
                         MODEL_HYPERPARAMETERS_FILE_NAME),
            os.path.join(export_full_path, MODEL_HYPERPARAMETERS_FILE_NAME),
            recursive=True,
        )

        # zip experiment directory to export
        try:
            shutil.make_archive("artifacts", "zip",
                                experiment_output_directory)
            fs.put("artifacts.zip",
                   os.path.join(export_full_path, "artifacts.zip"),
                   recursive=True)
            os.remove("artifacts.zip")
        except Exception as e:
            logging.error(
                f"Couldn't export '{experiment_output_directory}' to bucket")
            logging.error(e)

        print("Uploaded metrics report and experiment config to\n\t",
              export_full_path)
    except ClientError as e:
        logging.error(translate_boto_error(e))
コード例 #5
0
def read_remote_parquet(path: str):
    fs, path = get_fs_and_path(path)
    return read_parquet(path, filesystem=PyFileSystem(FSSpecHandler(fs)))