コード例 #1
0
ファイル: test_datasets.py プロジェクト: pyronear/pyro-risks
    def test_download(self, mock_url_retrieve, mock_fname):
        with tempfile.TemporaryDirectory() as destination:
            full_path = os.path.join(destination, "client/")

            mock_fname.return_value = self._mock_fname("tar.gz")
            mock_url_retrieve.return_value = self._make_tarfile(destination).read()
            utils.download(url="url", default_extension="csv", destination=full_path)
            self.assertTrue(Path(full_path, "test_tar.csv").is_file())

            mock_fname.return_value = self._mock_fname("zip")
            mock_url_retrieve.return_value = self._make_zipfile(destination).read()
            utils.download(url="url", default_extension="csv", destination=full_path)
            self.assertTrue(Path(full_path, "test_zip.csv").is_file())

            mock_fname.return_value = self._mock_fname("gz")
            mock_url_retrieve.return_value = self._make_gzipfile(destination).read()
            utils.download(url="url", default_extension="csv", destination=full_path)
            self.assertTrue(Path(full_path, "test_gz.csv").is_file())

            mock_fname.return_value = self._mock_fname("csv")
            mock_url_retrieve.return_value = self._make_csv(destination).read()
            utils.download(
                url="url", default_extension="csv", unzip=False, destination=full_path
            )
            self.assertTrue(Path(full_path, "test_csv.csv").is_file())

            mock_fname.return_value = self._mock_fname("gz")
            mock_url_retrieve.return_value = self._make_gzipfile(destination).read()
            utils.download(
                url="url", default_extension="csv", unzip=False, destination=full_path
            )
            self.assertTrue(Path(full_path, "test_gz.gz").is_file())

            mock_fname.return_value = self._mock_fname("csv")
            self.assertRaises(ValueError, utils.download, "url", "csv", True, full_path)
コード例 #2
0
ファイル: main.py プロジェクト: dataJSA/pyro-risks
def _download(url: str, default_extension: str, unzip: bool, destination: str):
    click.echo(f"Download {cfg.DATASET} dataset in {destination}")
    download(
        url=url,
        default_extension=default_extension,
        unzip=unzip,
        destination=destination,
    )
コード例 #3
0
ファイル: load.py プロジェクト: pyronear/pyro-risks
def load_dataset(
    url: Optional[str] = None,
    path: Optional[str] = None,
    usecols: Optional[List[str]] = None,
    pipeline_cols: Optional[List[str]] = None,
    destination: str = None,
) -> Tuple[pd.DataFrame, pd.Series]:
    """
    Load Pyro Risks training datasets.

    Download and load Pyro Risks training datasets.

    Args:
        url: Training dataset URL. Defaults to None.
        path: Dataset full path. Defaults to None.
        usecols: Subset of the dataset columns. Defaults to None.
        pipeline_cols: Subset of the dataset used for training. Defaults to None.
        destination: folder where the dataset should be saved. Defaults to None.

    Returns:
        Tuple[pd.DataFrame, pd.Series]
    """
    url = cfg.ERA5T_VIIRS_PIPELINE if url is None else url
    path = os.path.join(cfg.DATA_REGISTRY,
                        cfg.DATASET) if path is None else path
    usecols = ([cfg.DATE_VAR, cfg.ZONE_VAR, cfg.TARGET] +
               cfg.PIPELINE_ERA5T_VARS if usecols is None else usecols)
    pipeline_cols = ([cfg.DATE_VAR, cfg.ZONE_VAR] + cfg.PIPELINE_ERA5T_VARS
                     if pipeline_cols is None else pipeline_cols)
    destination = cfg.DATA_REGISTRY if destination is None else destination

    if not os.path.isfile(path):
        download(url=url,
                 default_extension="csv",
                 unzip=False,
                 destination=destination)

    df = pd.read_csv(path, usecols=usecols)
    df["day"] = df["day"].apply(lambda x: datetime.strptime(
        str(x), "%Y-%m-%d") if not pd.isnull(x) else x)

    X = df[pipeline_cols]
    y = df[cfg.TARGET]
    return X, y