def test_merge_schemas_records(self): schemas = [ bigquery_schema.translate_json_to_schema( {'recordField': { 'field1': 'string' }}), bigquery_schema.translate_json_to_schema( {'recordfield': { 'field1': 'string', 'field2': [2] }}) ] merged_schema = bigquery_schema.merge_schemas(schemas) self.assertEqual(merged_schema, [{ 'name': 'recordField', 'field_type': 'RECORD', 'mode': 'NULLABLE', 'fields': [{ 'name': 'field1', 'field_type': 'STRING', 'mode': 'NULLABLE' }, { 'name': 'field2', 'field_type': 'NUMERIC', 'mode': 'REPEATED' }] }])
def test_merge_schemas_records(self): schemas = [ bigquery_schema.translate_json_to_schema( {'record_field': { 'field1': 'string' }}), bigquery_schema.translate_json_to_schema( {'record_field': { 'field1': 'string', 'field2': [2] }}) ] merged_schema = bigquery_schema.merge_schemas(schemas) self.assertEqual(len(merged_schema), 1) record_field = merged_schema[0] self.assertEqual(record_field.field_type, 'RECORD') self.assertEqual(len(record_field.fields), 2) _, field1 = bigquery_schema._get_field_by_name(record_field.fields, 'field1') self.assertEqual(field1.field_type, 'STRING') self.assertEqual(field1.mode, 'NULLABLE') _, field2 = bigquery_schema._get_field_by_name(record_field.fields, 'field2') self.assertEqual(field2.field_type, 'NUMERIC') self.assertEqual(field2.mode, 'REPEATED')
def test_merge_array_schemas_records(self): schemas = [ bigquery_schema.translate_json_to_schema( {'field1': [{ 'nested1': 'value1' }]}), bigquery_schema.translate_json_to_schema( {'field1': [{ 'nested2': 'value1' }]}) ] merged_schema = bigquery_schema.merge_schemas(schemas) self.assertEqual(merged_schema, [{ 'name': 'field1', 'field_type': 'RECORD', 'mode': 'REPEATED', 'fields': [{ 'name': 'nested1', 'field_type': 'STRING', 'mode': 'NULLABLE' }, { 'name': 'nested2', 'field_type': 'STRING', 'mode': 'NULLABLE' }] }])
def bigquery_schema_for_asset_type(cls, asset_type, include_resource, include_iam_policy): """Returns the BigQuery schema for the asset type. Args: asset_type: CAI asset type. include_resource: if resource schema should be included. include_iam_policy: if IAM policy schema should be included. """ cache_key = '{}.{}.{}'.format(asset_type, include_resource, include_iam_policy) if cache_key in cls._schema_cache: return cls._schema_cache[cache_key] api_name = cls.get_api_name_for_asset_type(asset_type) discovery_documents_map = cls._get_discovery_documents_map() discovery_documents = discovery_documents_map[api_name] resource_name = resource_name_for_asset_type(asset_type) # merge all asset versions into a single schema. schemas = [ cls._translate_resource_to_schema(resource_name, document) for document in discovery_documents ] merged_schema = bigquery_schema.merge_schemas(schemas) asset_type_schema = cls._convert_to_asset_schema( merged_schema, include_resource, include_iam_policy) cls._schema_cache[cache_key] = asset_type_schema return asset_type_schema
def _get_schema_for_resource(cls, discovery_documents, resource_name): """Translate API discovery documents to a BigQuery schema.""" schemas = [ cls._translate_resource_to_schema(resource_name, document) for document in discovery_documents ] merged_schema = bigquery_schema.merge_schemas(schemas) return merged_schema
def test_merge_schemas_basic(self): schemas = [ bigquery_schema.translate_json_to_schema({'field1': 'string'}), bigquery_schema.translate_json_to_schema({'field2': 3}) ] merged_schema = bigquery_schema.merge_schemas(schemas) self.assertEqual(len(merged_schema), 2) _, field1 = bigquery_schema._get_field_by_name(merged_schema, 'field1') self.assertEqual(field1.field_type, 'STRING') _, field2 = bigquery_schema._get_field_by_name(merged_schema, 'field2') self.assertEqual(field2.field_type, 'NUMERIC')
def test_addtional_properties_merge_schema_simple(self): rest_schema = [{ 'name': 'property_1', 'field_type': 'STRING', 'description': 'description-1', 'mode': 'NULLABLE' }, { 'name': 'property_2', 'field_type': 'RECORD', 'description': 'description-2', 'mode': 'REPEATED', 'fields': [{ 'name': 'name', 'field_type': 'STRING', 'description': 'additionalProperties name', 'mode': 'NULLABLE' }, { 'name': 'value', 'field_type': 'STRING', 'description': 'description-2.', 'mode': 'NULLABLE' }] }] document = { 'property_1': 'value_1', 'property_2': { 'add_prop_1': 'add_value_1', 'add_prop_2': 'add_value_2' }, 'property_3': 'value_3' } document_schema = bigquery_schema.translate_json_to_schema(document) self.assertEqual( bigquery_schema.merge_schemas([rest_schema, document_schema]), rest_schema + [{ 'name': 'property_3', 'field_type': 'STRING', 'mode': 'NULLABLE' }])
def test_merge_schemas_basic(self): schemas = [ bigquery_schema.translate_json_to_schema({'field1': 'string'}), bigquery_schema.translate_json_to_schema({'field2': 3}) ] merged_schema = bigquery_schema.merge_schemas(schemas) self.assertEqual(merged_schema, [ { 'name': 'field1', 'field_type': 'STRING', 'mode': 'NULLABLE' }, { 'name': 'field2', 'field_type': 'NUMERIC', 'mode': 'NULLABLE' }, ])
def test_addtional_properties_merge_schema_object(self): self.maxDiff = None rest_schema = [{ 'name': 'property_1', 'field_type': 'STRING', 'description': 'description-1', 'mode': 'NULLABLE' }, { 'name': 'property_2', 'field_type': 'RECORD', 'description': 'description-2', 'mode': 'REPEATED', 'fields': [{ 'name': 'name', 'field_type': 'STRING', 'description': 'additionalProperties name', 'mode': 'NULLABLE' }, { 'name': 'value', 'field_type': 'RECORD', 'mode': 'NULLABLE' }] }] document = { 'property_1': 'value_1', 'property_2': { 'add_prop_1': { 'key_1': 1 }, 'add_prop_2': { 'key_1': 2 } }, 'property_3': 'value_3' } document_schema = bigquery_schema.translate_json_to_schema(document) self.assertEqual( bigquery_schema.merge_schemas([rest_schema, document_schema]), [{ 'name': 'property_1', 'field_type': 'STRING', 'description': 'description-1', 'mode': 'NULLABLE' }, { 'name': 'property_2', 'field_type': 'RECORD', 'description': 'description-2', 'mode': 'REPEATED', 'fields': [{ 'name': 'name', 'field_type': 'STRING', 'description': 'additionalProperties name', 'mode': 'NULLABLE' }, { 'name': 'value', 'field_type': 'RECORD', 'mode': 'NULLABLE', 'fields': [{ 'name': 'key_1', 'field_type': 'NUMERIC', 'mode': 'NULLABLE' }] }] }, { 'name': 'property_3', 'field_type': 'STRING', 'mode': 'NULLABLE' }])
def add_input(self, schema, element): resource_schema = self.element_to_schema(element) json_schema = bigquery_schema.translate_json_to_schema(element) return bigquery_schema.merge_schemas( [schema, resource_schema, json_schema])
def merge_accumulators(self, accumulators): return bigquery_schema.merge_schemas(accumulators)
def add_input(self, schema, element): new_schema = bigquery_schema.translate_json_to_schema(element) return bigquery_schema.merge_schemas([schema, new_schema])