예제 #1
0
 def _save_df(cls, df: pd.DataFrame) -> DPTmpFile:
     fn = DPTmpFile(ArrowFormat.ext)
     df = to_df(df)
     process_df(df)
     ArrowFormat.save_file(fn.name, df)
     log.debug(f"Saved df to {fn} ({os.path.getsize(fn.file)} bytes)")
     return fn
예제 #2
0
def save_df(df: pd.DataFrame) -> DPTmpFile:
    """Export a df for uploading"""
    fn = DPTmpFile(ArrowFormat.ext)
    # create a copy of the df to process
    df = to_df(df)
    # process_df called in Arrow.save_file
    # process_df(df)
    ArrowFormat.save_file(fn.name, df)
    log.debug(f"Saved df to {fn} ({os.path.getsize(fn.file)} bytes)")
    return fn
예제 #3
0
    def download_df(self) -> pd.DataFrame:
        """
        Download the blob and return it as a Dataframe

        Returns:
            A pandas dataframe generated from the blob
        """
        with DPTmpFile(ArrowFormat.ext) as fn:
            do_download_file(self.data_url, fn.name)
            return ArrowFormat.load_file(fn.name)
예제 #4
0
def save_load_arrow(tmp_path: Path, df: pd.DataFrame) -> pd.DataFrame:
    """Roundtrip via an arrow file"""
    fn = mktemp(".arrow", dir=tmp_path)
    ArrowFormat.save_file(str(fn), df.copy(deep=True))
    return ArrowFormat.load_file(fn)