Example #1
0
    def get_pandas_df(self, bql, parameters=None):
        """
        Returns a Pandas DataFrame for the results produced by a BigQuery
        query. The DbApiHook method must be overridden because Pandas
        doesn't support PEP 249 connections, except for SQLite. See:

        https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447
        https://github.com/pydata/pandas/issues/6900

        :param bql: The BigQuery SQL to execute.
        :type bql: string
        """
        service = self.get_service()
        project = self._get_field('project')
        connector = BigQueryPandasConnector(project, service)
        schema, pages = connector.run_query(bql)
        dataframe_list = []

        while len(pages) > 0:
            page = pages.pop()
            dataframe_list.append(gbq_parse_data(schema, page))

        if len(dataframe_list) > 0:
            return concat(dataframe_list, ignore_index=True)
        else:
            return gbq_parse_data(schema, [])
Example #2
0
    def get_pandas_df(self, bql, parameters=None, dialect='legacy'):
        """
        Returns a Pandas DataFrame for the results produced by a BigQuery
        query. The DbApiHook method must be overridden because Pandas
        doesn't support PEP 249 connections, except for SQLite. See:

        https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447
        https://github.com/pydata/pandas/issues/6900

        :param bql: The BigQuery SQL to execute.
        :type bql: string
        :param parameters: The parameters to render the SQL query with (not used, leave to override superclass method)
        :type parameters: mapping or iterable
        :param dialect: Dialect of BigQuery SQL – legacy SQL or standard SQL
        :type dialect: string in {'legacy', 'standard'}, default 'legacy'
        """
        service = self.get_service()
        project = self._get_field('project')
        connector = BigQueryPandasConnector(project, service, dialect=dialect)
        schema, pages = connector.run_query(bql)
        dataframe_list = []

        while len(pages) > 0:
            page = pages.pop()
            dataframe_list.append(gbq_parse_data(schema, page))

        if len(dataframe_list) > 0:
            return concat(dataframe_list, ignore_index=True)
        else:
            return gbq_parse_data(schema, [])
    def get_pandas_df(self, bql, parameters=None, dialect='legacy'):
        """
        Returns a Pandas DataFrame for the results produced by a BigQuery
        query. The DbApiHook method must be overridden because Pandas
        doesn't support PEP 249 connections, except for SQLite. See:

        https://github.com/pydata/pandas/blob/master/pandas/io/sql.py#L447
        https://github.com/pydata/pandas/issues/6900

        :param bql: The BigQuery SQL to execute.
        :type bql: string
        :param parameters: The parameters to render the SQL query with (not
            used, leave to override superclass method)
        :type parameters: mapping or iterable
        :param dialect: Dialect of BigQuery SQL – legacy SQL or standard SQL
        :type dialect: string in {'legacy', 'standard'}, default 'legacy'
        """
        service = self.get_service()
        project = self._get_field('project')
        connector = BigQueryPandasConnector(project, service, dialect=dialect)
        schema, pages = connector.run_query(bql)
        dataframe_list = []

        while len(pages) > 0:
            page = pages.pop()
            dataframe_list.append(gbq_parse_data(schema, page))

        if len(dataframe_list) > 0:
            return concat(dataframe_list, ignore_index=True)
        else:
            return gbq_parse_data(schema, [])