def main():
    noaa_goes = BigQueryHelper(active_project="bigquery-public-data",
                               dataset_name="noaa_goes16")
    noaa_goes.list_tables()

    noaa_goes.table_schema('abi_l1b_radiance')
    print(noaa_goes.head("abi_l1b_radiance", num_rows=10))

    query = """
    SELECT dataset_name, platform_id, scene_id FROM `bigquery-public-data.noaa_goes16.abi_l1b_radiance` WHERE geospatial_westbound_longitude<120 and geospatial_eastbound_longitude>75 and geospatial_northbound_latitude<50 and geospatial_southbound_latitude>30
    """

    print("Query size in GB is %f " % noaa_goes.estimate_query_size(query))
Exemple #2
0
class TestBQHelper(unittest.TestCase):
    def setUp(self):
        self.my_bq = BigQueryHelper("bigquery-public-data", "openaq")
        self.query = "SELECT location FROM `bigquery-public-data.openaq.global_air_quality`"
        # Query randomized so it won't hit the cache across multiple test runs
        self.randomizable_query = """
            SELECT value FROM `bigquery-public-data.openaq.global_air_quality`
            WHERE value = {0}"""

    def test_list_tables(self):
        self.assertEqual(self.my_bq.list_tables(), ['global_air_quality'])

    def test_list_schema(self):
        self.assertEqual(len(self.my_bq.table_schema('global_air_quality')),
                         11)

    def test_estimate_query_size(self):
        self.assertIsInstance(self.my_bq.estimate_query_size(self.query),
                              float)

    def test_query_to_pandas(self):
        self.assertIsInstance(self.my_bq.query_to_pandas(self.query),
                              DataFrame)

    def test_query_safe_passes(self):
        self.assertIsInstance(self.my_bq.query_to_pandas_safe(self.query),
                              DataFrame)

    def test_query_safe_fails(self):
        # Different query must be used for this test to ensure we don't hit the
        # cache and end up passing by testing a query that would use zero bytes.
        fail_query = self.randomizable_query.format(random())
        self.assertIsNone(self.my_bq.query_to_pandas_safe(fail_query, 10**-10))

    def test_head(self):
        self.assertIsInstance(self.my_bq.head('global_air_quality'), DataFrame)

    def test_useage_tracker(self):
        self.my_bq.query_to_pandas(self.randomizable_query.format(random()))
        self.assertNotEqual(self.my_bq.total_gb_used_net_cache, 0)

    def test_bad_query_raises_right_error(self):
        with self.assertRaises(BadRequest):
            self.my_bq.query_to_pandas("Not a valid query")

    def test_list_nested_schema(self):
        nested_helper = BigQueryHelper("bigquery-public-data", "github_repos")
        self.assertEqual(len(nested_helper.table_schema('commits')), 33)
Exemple #3
0
 def test_list_nested_schema(self):
     nested_helper = BigQueryHelper("bigquery-public-data", "github_repos")
     self.assertEqual(len(nested_helper.table_schema('commits')), 33)
Exemple #4
0
GROUP BY
  Day_of_Week
ORDER BY
  Day_of_Week;
        """
response2 = stackOverflow.query_to_pandas_safe(query2, max_gb_scanned=10)
response2.head(10)

query3 = """SELECT
  EXTRACT(YEAR FROM creation_date) AS Year,
  COUNT(*) AS Number_of_Questions,
  ROUND(100 * SUM(IF(answer_count > 0, 1, 0)) / COUNT(*), 1) AS Percent_Questions_with_Answers
FROM
  `bigquery-public-data.stackoverflow.posts_questions`
GROUP BY
  Year
HAVING
  Year > 2008 AND Year < 2016
ORDER BY
  Year;
        """
response3 = stackOverflow.query_to_pandas_safe(query3)
response3.head(10)

for t in tabelas:
	esquemas_tabelas[t] = bq_assistant.table_schema(t)

obj = esquemas_tabelas["tags"].head()
print(obj)
print(obj.values)
Exemple #5
0
from bq_helper import BigQueryHelper
from google.cloud import bigquery

#before executing this code you have to export your key : export GOOGLE_APPLICATION_CREDENTIALS="path to your key.json"

client = bigquery.Client(project='Oceans')

bq_assistant = BigQueryHelper(active_project="bigquery-public-data",
                              dataset_name="noaa_icoads")

bq_assistant.list_tables()
bq_assistant.table_schema('icoads_core_2005')

query = """SELECT latitude, longitude, wind_direction_true, wind_speed, present_weather, sea_level_pressure, air_temperature,  wetbulb_temperature, sea_surface_temp,
            total_cloud_amount, cloud_height, wave_direction, wave_period, wave_height, swell_direction, swell_period, swell_height, timestamp
            FROM `bigquery-public-data.noaa_icoads.icoads_core_2005`
                        """

#execute the query.
res = bq_assistant.query_to_pandas_safe(query)

#save the result.
res.to_csv("resultat_total_20052.csv", sep=',')