def main(): noaa_goes = BigQueryHelper(active_project="bigquery-public-data", dataset_name="noaa_goes16") noaa_goes.list_tables() noaa_goes.table_schema('abi_l1b_radiance') print(noaa_goes.head("abi_l1b_radiance", num_rows=10)) query = """ SELECT dataset_name, platform_id, scene_id FROM `bigquery-public-data.noaa_goes16.abi_l1b_radiance` WHERE geospatial_westbound_longitude<120 and geospatial_eastbound_longitude>75 and geospatial_northbound_latitude<50 and geospatial_southbound_latitude>30 """ print("Query size in GB is %f " % noaa_goes.estimate_query_size(query))
class TestBQHelper(unittest.TestCase): def setUp(self): self.my_bq = BigQueryHelper("bigquery-public-data", "openaq") self.query = "SELECT location FROM `bigquery-public-data.openaq.global_air_quality`" # Query randomized so it won't hit the cache across multiple test runs self.randomizable_query = """ SELECT value FROM `bigquery-public-data.openaq.global_air_quality` WHERE value = {0}""" def test_list_tables(self): self.assertEqual(self.my_bq.list_tables(), ['global_air_quality']) def test_list_schema(self): self.assertEqual(len(self.my_bq.table_schema('global_air_quality')), 11) def test_estimate_query_size(self): self.assertIsInstance(self.my_bq.estimate_query_size(self.query), float) def test_query_to_pandas(self): self.assertIsInstance(self.my_bq.query_to_pandas(self.query), DataFrame) def test_query_safe_passes(self): self.assertIsInstance(self.my_bq.query_to_pandas_safe(self.query), DataFrame) def test_query_safe_fails(self): # Different query must be used for this test to ensure we don't hit the # cache and end up passing by testing a query that would use zero bytes. fail_query = self.randomizable_query.format(random()) self.assertIsNone(self.my_bq.query_to_pandas_safe(fail_query, 10**-10)) def test_head(self): self.assertIsInstance(self.my_bq.head('global_air_quality'), DataFrame) def test_useage_tracker(self): self.my_bq.query_to_pandas(self.randomizable_query.format(random())) self.assertNotEqual(self.my_bq.total_gb_used_net_cache, 0) def test_bad_query_raises_right_error(self): with self.assertRaises(BadRequest): self.my_bq.query_to_pandas("Not a valid query") def test_list_nested_schema(self): nested_helper = BigQueryHelper("bigquery-public-data", "github_repos") self.assertEqual(len(nested_helper.table_schema('commits')), 33)
def test_list_nested_schema(self): nested_helper = BigQueryHelper("bigquery-public-data", "github_repos") self.assertEqual(len(nested_helper.table_schema('commits')), 33)
GROUP BY Day_of_Week ORDER BY Day_of_Week; """ response2 = stackOverflow.query_to_pandas_safe(query2, max_gb_scanned=10) response2.head(10) query3 = """SELECT EXTRACT(YEAR FROM creation_date) AS Year, COUNT(*) AS Number_of_Questions, ROUND(100 * SUM(IF(answer_count > 0, 1, 0)) / COUNT(*), 1) AS Percent_Questions_with_Answers FROM `bigquery-public-data.stackoverflow.posts_questions` GROUP BY Year HAVING Year > 2008 AND Year < 2016 ORDER BY Year; """ response3 = stackOverflow.query_to_pandas_safe(query3) response3.head(10) for t in tabelas: esquemas_tabelas[t] = bq_assistant.table_schema(t) obj = esquemas_tabelas["tags"].head() print(obj) print(obj.values)
from bq_helper import BigQueryHelper from google.cloud import bigquery #before executing this code you have to export your key : export GOOGLE_APPLICATION_CREDENTIALS="path to your key.json" client = bigquery.Client(project='Oceans') bq_assistant = BigQueryHelper(active_project="bigquery-public-data", dataset_name="noaa_icoads") bq_assistant.list_tables() bq_assistant.table_schema('icoads_core_2005') query = """SELECT latitude, longitude, wind_direction_true, wind_speed, present_weather, sea_level_pressure, air_temperature, wetbulb_temperature, sea_surface_temp, total_cloud_amount, cloud_height, wave_direction, wave_period, wave_height, swell_direction, swell_period, swell_height, timestamp FROM `bigquery-public-data.noaa_icoads.icoads_core_2005` """ #execute the query. res = bq_assistant.query_to_pandas_safe(query) #save the result. res.to_csv("resultat_total_20052.csv", sep=',')