def test_to_api_repr_csv(self): ec = external_config.ExternalConfig('CSV') options = external_config.CSVOptions() options.allow_quoted_newlines = True options.encoding = 'encoding' options.field_delimiter = 'fieldDelimiter' options.quote_character = 'quote' options.skip_leading_rows = 123 options.allow_jagged_rows = False ec._options = options exp_resource = { 'sourceFormat': 'CSV', 'csvOptions': { 'fieldDelimiter': 'fieldDelimiter', 'skipLeadingRows': '123', 'quote': 'quote', 'allowQuotedNewlines': True, 'allowJaggedRows': False, 'encoding': 'encoding', }, } got_resource = ec.to_api_repr() self.assertEqual(got_resource, exp_resource)
def test_to_api_repr_csv(self): ec = external_config.ExternalConfig("CSV") options = external_config.CSVOptions() options.allow_quoted_newlines = True options.encoding = "encoding" options.field_delimiter = "fieldDelimiter" options.quote_character = "quote" options.skip_leading_rows = 123 options.allow_jagged_rows = False ec._options = options exp_resource = { "sourceFormat": "CSV", "csvOptions": { "fieldDelimiter": "fieldDelimiter", "skipLeadingRows": "123", "quote": "quote", "allowQuotedNewlines": True, "allowJaggedRows": False, "encoding": "encoding", }, } got_resource = ec.to_api_repr() self.assertEqual(got_resource, exp_resource)
def test_to_api_repr_base(self): ec = external_config.ExternalConfig('') ec.source_uris = self.SOURCE_URIS ec.max_bad_records = 17 ec.autodetect = True ec.ignore_unknown_values = False ec.compression = 'compression' ec.schema = [ schema.SchemaField('full_name', 'STRING', mode='REQUIRED') ] exp_schema = { 'fields': [ { 'name': 'full_name', 'type': 'STRING', 'mode': 'REQUIRED', 'description': None, }, ] } got_resource = ec.to_api_repr() exp_resource = { 'sourceFormat': '', 'sourceUris': self.SOURCE_URIS, 'maxBadRecords': 17, 'autodetect': True, 'ignoreUnknownValues': False, 'compression': 'compression', 'schema': exp_schema } self.assertEqual(got_resource, exp_resource)
def test_to_api_repr_base(self): ec = external_config.ExternalConfig("") ec.source_uris = self.SOURCE_URIS ec.max_bad_records = 17 ec.autodetect = True ec.ignore_unknown_values = False ec.compression = "compression" ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")] exp_schema = { "fields": [ { "name": "full_name", "type": "STRING", "mode": "REQUIRED", "description": None, } ] } got_resource = ec.to_api_repr() exp_resource = { "sourceFormat": "", "sourceUris": self.SOURCE_URIS, "maxBadRecords": 17, "autodetect": True, "ignoreUnknownValues": False, "compression": "compression", "schema": exp_schema, } self.assertEqual(got_resource, exp_resource)
def test_to_api_repr_sheets(self): ec = external_config.ExternalConfig('GOOGLE_SHEETS') options = external_config.GoogleSheetsOptions() options.skip_leading_rows = 123 ec._options = options exp_resource = { 'sourceFormat': 'GOOGLE_SHEETS', 'googleSheetsOptions': {'skipLeadingRows': '123'}, } got_resource = ec.to_api_repr() self.assertEqual(got_resource, exp_resource)
def test_parquet_options_setter_non_parquet_format(self): from google.cloud.bigquery.format_options import ParquetOptions parquet_options = ParquetOptions.from_api_repr({ "enumAsString": False, "enableListInference": True }) ec = external_config.ExternalConfig( external_config.ExternalSourceFormat.CSV) with self.assertRaisesRegex(TypeError, "Cannot set.*source format is CSV"): ec.parquet_options = parquet_options
def test_to_api_repr_sheets(self): ec = external_config.ExternalConfig("GOOGLE_SHEETS") options = external_config.GoogleSheetsOptions() options.skip_leading_rows = 123 options.range = "Sheet1!A5:B10" ec._options = options exp_resource = { "sourceFormat": "GOOGLE_SHEETS", "googleSheetsOptions": {"skipLeadingRows": "123", "range": "Sheet1!A5:B10"}, } got_resource = ec.to_api_repr() self.assertEqual(got_resource, exp_resource)
def test_parquet_options_setter(self): from google.cloud.bigquery.format_options import ParquetOptions parquet_options = ParquetOptions.from_api_repr({ "enumAsString": False, "enableListInference": True }) ec = external_config.ExternalConfig( external_config.ExternalSourceFormat.PARQUET) ec.parquet_options = parquet_options # Setting Parquet options should be reflected in the generic options attribute. self.assertFalse(ec.options.enum_as_string) self.assertTrue(ec.options.enable_list_inference)
def test_to_api_repr_hive_partitioning(self): hive_partitioning = external_config.HivePartitioningOptions() hive_partitioning.source_uri_prefix = "http://foo/bar" hive_partitioning.mode = "STRINGS" ec = external_config.ExternalConfig("FORMAT_FOO") ec.hive_partitioning = hive_partitioning got_resource = ec.to_api_repr() expected_resource = { "sourceFormat": "FORMAT_FOO", "hivePartitioningOptions": { "sourceUriPrefix": "http://foo/bar", "mode": "STRINGS", }, } self.assertEqual(got_resource, expected_resource)
def test_to_api_repr_parquet(self): from google.cloud.bigquery.format_options import ParquetOptions ec = external_config.ExternalConfig( external_config.ExternalSourceFormat.PARQUET) options = ParquetOptions.from_api_repr( dict(enumAsString=False, enableListInference=True)) ec._options = options exp_resource = { "sourceFormat": external_config.ExternalSourceFormat.PARQUET, "parquetOptions": { "enumAsString": False, "enableListInference": True }, } got_resource = ec.to_api_repr() self.assertEqual(got_resource, exp_resource)
def test_parquet_options_getter(self): from google.cloud.bigquery.format_options import ParquetOptions parquet_options = ParquetOptions.from_api_repr({ "enumAsString": True, "enableListInference": False }) ec = external_config.ExternalConfig( external_config.ExternalSourceFormat.PARQUET) self.assertIsNone(ec.parquet_options.enum_as_string) self.assertIsNone(ec.parquet_options.enable_list_inference) ec._options = parquet_options self.assertTrue(ec.parquet_options.enum_as_string) self.assertFalse(ec.parquet_options.enable_list_inference) self.assertIs(ec.parquet_options, ec.options)
def test_to_api_repr_bigtable(self): ec = external_config.ExternalConfig("BIGTABLE") options = external_config.BigtableOptions() options.ignore_unspecified_column_families = True options.read_rowkey_as_string = False ec._options = options fam1 = external_config.BigtableColumnFamily() fam1.family_id = "familyId" fam1.type_ = "type" fam1.encoding = "encoding" fam1.only_read_latest = False col1 = external_config.BigtableColumn() col1.qualifier_string = "q" col1.field_name = "fieldName1" col1.type_ = "type1" col1.encoding = "encoding1" col1.only_read_latest = True col2 = external_config.BigtableColumn() col2.qualifier_encoded = b"q" col2.field_name = "fieldName2" col2.type_ = "type2" col2.encoding = "encoding2" fam1.columns = [col1, col2] options.column_families = [fam1] qualifier_encoded = base64.standard_b64encode(b"q").decode("ascii") exp_resource = { "sourceFormat": "BIGTABLE", "bigtableOptions": { "ignoreUnspecifiedColumnFamilies": True, "readRowkeyAsString": False, "columnFamilies": [ { "familyId": "familyId", "type": "type", "encoding": "encoding", "columns": [ { "qualifierString": "q", "fieldName": "fieldName1", "type": "type1", "encoding": "encoding1", "onlyReadLatest": True, }, { "qualifierEncoded": qualifier_encoded, "fieldName": "fieldName2", "type": "type2", "encoding": "encoding2", }, ], "onlyReadLatest": False, } ], }, } got_resource = ec.to_api_repr() self.assertEqual(got_resource, exp_resource)
def test_schema_empty(self): ec = external_config.ExternalConfig('') ec.schema = [] got = ec.to_api_repr() want = {'sourceFormat': '', 'schema': {'fields': []}} self.assertEqual(got, want)
def test_to_api_repr_source_format(self): ec = external_config.ExternalConfig('CSV') got = ec.to_api_repr() want = {'sourceFormat': 'CSV'} self.assertEqual(got, want)
def test_to_api_repr_source_format(self): ec = external_config.ExternalConfig("CSV") got = ec.to_api_repr() want = {"sourceFormat": "CSV"} self.assertEqual(got, want)
def test_schema_empty(self): ec = external_config.ExternalConfig("") ec.schema = [] got = ec.to_api_repr() want = {"sourceFormat": "", "schema": {"fields": []}} self.assertEqual(got, want)
def test_schema_None(self): ec = external_config.ExternalConfig("") ec.schema = None got = ec.to_api_repr() want = {"sourceFormat": "", "schema": None} self.assertEqual(got, want)
def test_connection_id(self): ec = external_config.ExternalConfig("") self.assertIsNone(ec.connection_id) ec.connection_id = "path/to/connection" self.assertEqual(ec.connection_id, "path/to/connection")
def test_to_api_repr_bigtable(self): ec = external_config.ExternalConfig('BIGTABLE') options = external_config.BigtableOptions() options.ignore_unspecified_column_families = True options.read_rowkey_as_string = False ec._options = options fam1 = external_config.BigtableColumnFamily() fam1.family_id = 'familyId' fam1.type_ = 'type' fam1.encoding = 'encoding' fam1.only_read_latest = False col1 = external_config.BigtableColumn() col1.qualifier_string = 'q' col1.field_name = 'fieldName1' col1.type_ = 'type1' col1.encoding = 'encoding1' col1.only_read_latest = True col2 = external_config.BigtableColumn() col2.qualifier_encoded = b'q' col2.field_name = 'fieldName2' col2.type_ = 'type2' col2.encoding = 'encoding2' fam1.columns = [col1, col2] options.column_families = [fam1] qualifier_encoded = base64.standard_b64encode(b'q').decode('ascii') exp_resource = { 'sourceFormat': 'BIGTABLE', 'bigtableOptions': { 'ignoreUnspecifiedColumnFamilies': True, 'readRowkeyAsString': False, 'columnFamilies': [ { 'familyId': 'familyId', 'type': 'type', 'encoding': 'encoding', 'columns': [ { 'qualifierString': 'q', 'fieldName': 'fieldName1', 'type': 'type1', 'encoding': 'encoding1', 'onlyReadLatest': True, }, { 'qualifierEncoded': qualifier_encoded, 'fieldName': 'fieldName2', 'type': 'type2', 'encoding': 'encoding2', }, ], 'onlyReadLatest': False, } ], }, } got_resource = ec.to_api_repr() self.assertEqual(got_resource, exp_resource)
def test_schema_None(self): ec = external_config.ExternalConfig('') ec.schema = None got = ec.to_api_repr() want = {'sourceFormat': '', 'schema': None} self.assertEqual(got, want)
def test_parquet_options_getter_non_parquet_format(self): ec = external_config.ExternalConfig( external_config.ExternalSourceFormat.CSV) self.assertIsNone(ec.parquet_options)