def test_upload_data_dataset_not_found(self): test_size = 10 df = make_mixed_dataframe_v2(test_size) with tm.assertRaises(gbq.GenericGBQException): gbq.create_table('pydata_pandas_bq_testing2.new_test', gbq.generate_bq_schema(df), PROJECT_ID)
def test_generate_bq_schema(self): df = tm.makeMixedDataFrame() schema = gbq.generate_bq_schema(df) test_schema = {'fields': [{'name': 'A', 'type': 'FLOAT'}, {'name': 'B', 'type': 'FLOAT'}, {'name': 'C', 'type': 'STRING'}, {'name': 'D', 'type': 'TIMESTAMP'}]} self.assertEqual(schema, test_schema)
def test_upload_data_if_table_exists_fail(self): table_name = 'new_test2' test_size = 10 df = make_mixed_dataframe_v2(test_size) gbq.create_table('pydata_pandas_bq_testing.' + table_name, gbq.generate_bq_schema(df), PROJECT_ID) # Test the default value of if_exists is 'fail' with tm.assertRaises(gbq.TableCreationError): gbq.to_gbq(df, "pydata_pandas_bq_testing." + table_name, PROJECT_ID) # Test the if_exists parameter with value 'fail' with tm.assertRaises(gbq.TableCreationError): gbq.to_gbq(df, "pydata_pandas_bq_testing." + table_name, PROJECT_ID, if_exists='fail')
def test_generate_bq_schema_deprecated(): # 11121 Deprecation of generate_bq_schema with tm.assert_produces_warning(FutureWarning): df = make_mixed_dataframe_v2(10) gbq.generate_bq_schema(df)
# limitations under the License. import sys import pandas as pd import numpy as np import json from pandas.io import gbq from gcloud import storage from cStringIO import StringIO #------------------------------------------ # This script reads the new line demilited JSON file # and tries to generate the schema based on the column values #------------------------------------------ filename = sys.argv[1] filehandle = open(filename, "r") # convert new line JSON into dict; json_string = '[%s]' % ','.join(filehandle.readlines()) # a little time consuming, but is worth converting into dataframe df = pd.read_json(json_string) # this can be replaced by read_csv for tab-demilited files filehandle.close() # use gbq generate bq schema schema = gbq.generate_bq_schema(df, default_type='STRING')["fields"] print json.dumps(schema, indent=4)