Python AzureDLFileSystem.infoの例

プログラミング言語: Python

名前空間/パッケージ名: azure.datalake.store.core

クラス/型: AzureDLFileSystem

メソッド/関数: info

hotexamples.comのコード掲載数: 5

Python AzureDLFileSystem.info - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのazure.datalake.store.core.AzureDLFileSystem.infoの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

info(5)

open(5)

AzureDLFileSystem(4)

ls(1)

よく使われるメソッド

info (5)

open (5)

AzureDLFileSystem (4)

ls (1)

コード例 #1

ファイルを表示

ファイル: ncs_reader.py プロジェクト: yinxiEquinor/gordo

    def _verify_tag_path_exist(adls_file_system_client: core.AzureDLFileSystem,
                               path: str):
        """
        Verify that the tag path exists, if not the `adls_file_system_client.info` will raise a FileNotFound error.

        Parameters
        ----------
        adls_file_system_client: core.AzureDLFileSystem
            the AzureDLFileSystem client to use
        path : str
            Path of tag to be checked if exists.
        """
        adls_file_system_client.info(f"{path}")

コード例 #2

ファイルを表示

    def read_tag_files(adls_file_system_client: core.AzureDLFileSystem,
                       tag: SensorTag, years: range) -> pd.Series:
        """
        Download tag files for the given years into dataframes,
        and return as one dataframe.

        Parameters
        ----------
        adls_file_system_client: core.AzureDLFileSystem
            the AzureDLFileSystem client to use
        tag: SensorTag
            the tag to download data for
        years: range
            range object providing years to include

        Returns
        -------
        pd.Series:
            Series with all years for one tag.
        """
        tag_base_path = NcsReader.base_path_from_asset(tag.asset)

        if not tag_base_path:
            raise ValueError(f"Unable to find base path from tag {tag} ")
        all_years = []

        for year in years:
            file_path = tag_base_path + f"/{tag.name}/{tag.name}_{year}.csv"
            logger.info(f"Parsing file {file_path}")

            info = adls_file_system_client.info(file_path)
            file_size = info.get("length") / (1024**2)
            logger.info(f"File size: {file_size:.2f}MB")

            with adls_file_system_client.open(file_path, "rb") as f:
                df = pd.read_csv(
                    f,
                    sep=";",
                    header=None,
                    names=["Sensor", tag.name, "Timestamp", "Status"],
                    usecols=[tag.name, "Timestamp"],
                    dtype={tag.name: np.float32},
                    parse_dates=["Timestamp"],
                    date_parser=lambda col: pd.to_datetime(col, utc=True),
                    index_col="Timestamp",
                )

                all_years.append(df)
                logger.info(f"Done parsing file {file_path}")

        combined = pd.concat(all_years)

        # There often comes duplicated timestamps, keep the last
        if combined.index.duplicated().any():
            combined = combined[~combined.index.duplicated(keep="last")]

        return combined[tag.name]

コード例 #3

ファイルを表示

ファイル: ncs_reader.py プロジェクト: yinxiEquinor/gordo

    def read_tag_files(
        self,
        adls_file_system_client: core.AzureDLFileSystem,
        tag: SensorTag,
        years: range,
        dry_run: Optional[bool] = False,
        remove_status_codes: Optional[list] = [0],
        dl_base_path: Optional[str] = None,
    ) -> pd.Series:
        """
        Download tag files for the given years into dataframes,
        and return as one dataframe.

        Parameters
        ----------
        adls_file_system_client: core.AzureDLFileSystem
            the AzureDLFileSystem client to use
        tag: SensorTag
            the tag to download data for
        years: range
            range object providing years to include
        dry_run: Optional[bool]
            if True, don't download data, just check info, log, and return
        remove_status_codes: Optional[list]
            Removes data with Status code(s) in the list. By default it removes data
            with Status code 0.
        dl_base_path: Optional[str]
            Base bath used to override the asset to path dictionary. Useful for demos
            and other non-production settings.

        Returns
        -------
        pd.Series:
            Series with all years for one tag.
        """
        tag_base_path = (dl_base_path if dl_base_path else
                         NcsReader.base_path_from_asset(tag.asset))

        if not tag_base_path:
            raise ValueError(f"Unable to find base path from tag {tag} ")
        all_years = []
        logger.info(f"Downloading tag: {tag} for years: {years}")
        tag_name_encoded = quote(tag.name, safe=" ")

        NcsReader._verify_tag_path_exist(
            adls_file_system_client, f"{tag_base_path}/{tag_name_encoded}/")

        dir_path = f"{tag_base_path}/{tag_name_encoded}"
        for year in years:
            file_path = None
            file_lookup = None
            for v in self.file_lookups:
                file_path = v.lookup(adls_file_system_client, dir_path,
                                     tag_name_encoded, year)
                if file_path is not None:
                    file_lookup = v
                    break
            if file_lookup is None:
                continue
            file_type = file_lookup.file_type
            logger.info(f"Parsing file {file_path}")

            try:
                info = adls_file_system_client.info(file_path)
                file_size = info.get("length") / (1024**2)
                logger.debug(
                    f"File size for file {file_path}: {file_size:.2f}MB")

                if dry_run:
                    logger.info("Dry run only, returning empty frame early")
                    return pd.Series()
                before_downloading = timeit.default_timer()
                with adls_file_system_client.open(file_path, "rb") as f:
                    df = file_type.read_df(f)
                    df = df.rename(columns={"Value": tag.name})
                    df = df[~df["Status"].isin(remove_status_codes)]
                    df.sort_index(inplace=True)
                    all_years.append(df)
                    logger.info(
                        f"Done in {(timeit.default_timer()-before_downloading):.2f} sec {file_path}"
                    )

            except FileNotFoundError as e:
                logger.debug(f"{file_path} not found, skipping it: {e}")

        try:
            combined = pd.concat(all_years)
        except Exception as e:
            logger.debug(f"Not able to concatinate all years: {e}.")
            return pd.Series(name=tag.name, data=None)

        # There often comes duplicated timestamps, keep the last
        if combined.index.duplicated().any():
            combined = combined[~combined.index.duplicated(keep="last")]

        return combined[tag.name]

コード例 #4

ファイルを表示

def is_file(client: core.AzureDLFileSystem, path: str):
    try:
        info = client.info(path)
    except FileNotFoundError:
        return False
    return info["type"] == "FILE"

コード例 #5

ファイルを表示

    def read_tag_files(
        adls_file_system_client: core.AzureDLFileSystem,
        tag: SensorTag,
        years: range,
        dry_run: Optional[bool] = False,
        remove_status_codes: Optional[list] = [0],
        dl_base_path: Optional[str] = None,
    ) -> pd.Series:
        """
        Download tag files for the given years into dataframes,
        and return as one dataframe.

        Parameters
        ----------
        adls_file_system_client: core.AzureDLFileSystem
            the AzureDLFileSystem client to use
        tag: SensorTag
            the tag to download data for
        years: range
            range object providing years to include
        dry_run: Optional[bool]
            if True, don't download data, just check info, log, and return
        remove_status_codes: Optional[list]
            Removes data with Status code(s) in the list. By default it removes data
            with Status code 0.
        dl_base_path: Optional[str]
            Base bath used to override the asset to path dictionary. Useful for demos
            and other non-production settings.

        Returns
        -------
        pd.Series:
            Series with all years for one tag.
        """
        tag_base_path = (
            dl_base_path if dl_base_path else NcsReader.base_path_from_asset(tag.asset)
        )

        if not tag_base_path:
            raise ValueError(f"Unable to find base path from tag {tag} ")
        all_years = []
        logger.info(f"Downloading tag: {tag} for years: {years}")
        tag_name_encoded = quote(tag.name, safe=" ")

        NcsReader._verify_tag_path_exist(
            adls_file_system_client, f"{tag_base_path}/{tag_name_encoded}/"
        )

        for year in years:
            file_path = (
                f"{tag_base_path}/{tag_name_encoded}/{tag_name_encoded}_{year}.csv"
            )
            logger.info(f"Parsing file {file_path}")

            try:
                info = adls_file_system_client.info(file_path)
                file_size = info.get("length") / (1024 ** 2)
                logger.debug(f"File size for file {file_path}: {file_size:.2f}MB")

                if dry_run:
                    logger.info("Dry run only, returning empty frame early")
                    return pd.Series()

                with adls_file_system_client.open(file_path, "rb") as f:
                    df = pd.read_csv(
                        f,
                        sep=";",
                        header=None,
                        names=["Sensor", tag.name, "Timestamp", "Status"],
                        usecols=[tag.name, "Timestamp", "Status"],
                        dtype={tag.name: np.float32},
                        parse_dates=["Timestamp"],
                        date_parser=lambda col: pd.to_datetime(col, utc=True),
                        index_col="Timestamp",
                    )
                    df = df[~df["Status"].isin(remove_status_codes)]
                    all_years.append(df)
                    logger.info(f"Done parsing file {file_path}")

            except FileNotFoundError as e:
                logger.debug(f"{file_path} not found, skipping it: {e}")

        try:
            combined = pd.concat(all_years)
        except Exception as e:
            logger.debug(f"Not able to concatinate all years: {e}.")
            return pd.Series(name=tag.name, data=None)

        # There often comes duplicated timestamps, keep the last
        if combined.index.duplicated().any():
            combined = combined[~combined.index.duplicated(keep="last")]

        return combined[tag.name]