def get_pandas_df(self, bql, parameters=None): """ Returns a Pandas DataFrame for the results produced by a BigQuery query. The DbApiHook method must be overridden because Pandas doesn't support PEP 249 connections, except for SQLite. See: https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447 https://github.com/pydata/pandas/issues/6900 :param bql: The BigQuery SQL to execute. :type bql: string """ service = self.get_service() project = self._get_field('project') connector = BigQueryPandasConnector(project, service) schema, pages = connector.run_query(bql) dataframe_list = [] while len(pages) > 0: page = pages.pop() dataframe_list.append(gbq_parse_data(schema, page)) if len(dataframe_list) > 0: return concat(dataframe_list, ignore_index=True) else: return gbq_parse_data(schema, [])
def get_pandas_df(self, bql, parameters=None, dialect='legacy'): """ Returns a Pandas DataFrame for the results produced by a BigQuery query. The DbApiHook method must be overridden because Pandas doesn't support PEP 249 connections, except for SQLite. See: https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447 https://github.com/pydata/pandas/issues/6900 :param bql: The BigQuery SQL to execute. :type bql: string :param parameters: The parameters to render the SQL query with (not used, leave to override superclass method) :type parameters: mapping or iterable :param dialect: Dialect of BigQuery SQL – legacy SQL or standard SQL :type dialect: string in {'legacy', 'standard'}, default 'legacy' """ service = self.get_service() project = self._get_field('project') connector = BigQueryPandasConnector(project, service, dialect=dialect) schema, pages = connector.run_query(bql) dataframe_list = [] while len(pages) > 0: page = pages.pop() dataframe_list.append(gbq_parse_data(schema, page)) if len(dataframe_list) > 0: return concat(dataframe_list, ignore_index=True) else: return gbq_parse_data(schema, [])