def select(self, dfs: DataFrames, statement: str) -> DataFrame: _dfs = { k: self.execution_engine.to_df(v).as_pandas() # type: ignore for k, v in dfs.items() } df = run_sql_on_pandas(statement, _dfs) return self.execution_engine.to_df(df)
def select(self, dfs: DataFrames, statement: str) -> DataFrame: dask_dfs = { k: self.execution_engine.to_df(v).native # type: ignore for k, v in dfs.items() } df = run_sql_on_dask(statement, dask_dfs) return DaskDataFrame(df)
def select(self, dfs: DataFrames, statement: str) -> DataFrame: sql_engine = create_engine("sqlite:///:memory:") for k, v in dfs.items(): v.as_pandas().to_sql(k, sql_engine, if_exists="replace", index=False) df = pd.read_sql_query(statement, sql_engine) return PandasDataFrame(df)
def select(self, dfs: DataFrames, statement: str) -> DataFrame: _dfs = { k: self.execution_engine.to_df(v).as_pandas() # type: ignore for k, v in dfs.items() } df = run_sql_on_pandas( statement, _dfs, ignore_case=self.execution_engine.compile_conf.get( FUGUE_CONF_SQL_IGNORE_CASE, False), ) return self.execution_engine.to_df(df)
def zip_all( self, dfs: DataFrames, how: str = "inner", partition_spec: PartitionSpec = EMPTY_PARTITION_SPEC, temp_path: Optional[str] = None, to_file_threshold: Any = -1, ) -> DataFrame: """Zip multiple dataframes together with given partition specifications. :param dfs: |DataFramesLikeObject| :param how: can accept ``inner``, ``left_outer``, ``right_outer``, ``full_outer``, ``cross``, defaults to ``inner`` :param partition_spec: |PartitionLikeObject|, defaults to empty. :param temp_path: file path to store the data (used only if the serialized data is larger than ``to_file_threshold``), defaults to None :param to_file_threshold: file byte size threshold, defaults to -1 :return: a zipped dataframe, the metadata of the dataframe will indicated it's zipped :Notice: * Please also read :meth:`~.zip` * If ``dfs`` is dict like, the zipped dataframe will be dict like, If ``dfs`` is list like, the zipped dataframe will be list like * It's fine to contain only one dataframe in ``dfs`` For more details and examples, read :ref:`Zip & Comap <tutorial:/tutorials/execution_engine.ipynb#zip-&-comap>`. """ assert_or_throw(len(dfs) > 0, "can't zip 0 dataframes") pairs = list(dfs.items()) has_name = dfs.has_key if len(dfs) == 1: return self._serialize_by_partition( pairs[0][1], partition_spec, pairs[0][0], temp_path, to_file_threshold, has_name=has_name, ) df = self.zip( pairs[0][1], pairs[1][1], how=how, partition_spec=partition_spec, temp_path=temp_path, to_file_threshold=to_file_threshold, df1_name=pairs[0][0] if has_name else None, df2_name=pairs[1][0] if has_name else None, ) for i in range(2, len(dfs)): df = self.zip( df, pairs[i][1], how=how, partition_spec=partition_spec, temp_path=temp_path, to_file_threshold=to_file_threshold, df2_name=pairs[i][0] if has_name else None, ) return df
def select(self, dfs: DataFrames, statement: str) -> DataFrame: for k, v in dfs.items(): self.execution_engine.register(v, k) # type: ignore return SparkDataFrame( self.execution_engine.spark_session.sql(statement) # type: ignore )