def test_get_conversion_type_linked_identity(self): column_specification = ColumnSpecification(self.schema_template, "someschema.uuid", "someotherschema") self.assertEqual(ConversionType.LINKED_EXTERNAL_REFERENCE, column_specification.get_conversion_type())
def test_get_conversion_type_linked_external_reference_identity(self): column_specification = ColumnSpecification( self.schema_template, "someschema.external_reference_property", "someotherschema") self.assertEqual(ConversionType.LINKING_DETAIL, column_specification.get_conversion_type())
def test_build_raw_external_reference(self): # given: external_raw_spec = { # 'value_type': 'string', 'identifiable': True, 'external_reference': True } # and: non_external_raw_spec = copy.deepcopy(external_raw_spec) non_external_raw_spec['external_reference'] = False # when: external_spec = ColumnSpecification.build_raw('profile.uuid', 'profile', 'personal_info', external_raw_spec) non_external_spec = ColumnSpecification.build_raw( 'user.id', 'user', 'personal_info', non_external_raw_spec) # then: self.assertEqual('profile.uuid', external_spec.field_name) self.assertEqual('profile', external_spec.object_type) self.assertEqual('personal_info', external_spec.main_category) # self.assertEqual(DataType.STRING, external_spec.data_type) self.assertTrue(external_spec.is_external_reference()) # then: self.assertEqual('user.id', non_external_spec.field_name) self.assertFalse(non_external_spec.is_external_reference())
def test_get_conversion_type_member_field(self): # given: column_spec = ColumnSpecification('user.name', 'user', 'user_data', DataType.STRING) # expect: self.assertEqual(ConversionType.MEMBER_FIELD, column_spec.get_conversion_type())
def test_get_conversion_type_linking_detail(self): # given: column_spec = ColumnSpecification('item.description', 'record', 'invoice_detail', DataType.STRING) # expect: self.assertEqual(ConversionType.LINKING_DETAIL, column_spec.get_conversion_type())
def test_look_up_linked_object_field(self): column_specification = ColumnSpecification(self.schema_template, "someschema.value", "some_linked_field") self.assertEqual('somedomain', column_specification.domain_type) self.assertEqual('some_linked_field', column_specification.context_concrete_type) self.assertEqual(ConversionType.LINKING_DETAIL, column_specification.get_conversion_type())
def test_get_conversion_type_field_of_list_element(self): # given: column_spec = ColumnSpecification('product.product_name', 'product', 'store_entity', DataType.STRING, multivalue_parent=True) # expect: self.assertEqual(ConversionType.FIELD_OF_LIST_ELEMENT, column_spec.get_conversion_type())
def _assert_correct_converter_single_value(self, data_type: DataType, expected_converter_type): # given: column_spec = ColumnSpecification('field', 'object_type', 'main_category', data_type) # when: converter = column_spec.determine_converter() # then: self.assertIsInstance(converter, expected_converter_type)
def test_get_conversion_type_linked_identity(self): # given: column_spec = ColumnSpecification('account.number', 'user', 'profile_type', DataType.STRING, identity=True) # expect: self.assertEqual(ConversionType.LINKED_IDENTITY, column_spec.get_conversion_type())
def test_get_conversion_type_identity(self): # given: column_spec = ColumnSpecification('product.product_id', 'product', 'store_entry', DataType.STRING, identity=True) # expect: self.assertEqual(ConversionType.IDENTITY, column_spec.get_conversion_type())
def test_get_conversion_type_external_reference_identity(self): # given: column_spec = ColumnSpecification('account.uuid', 'user', 'profile_type', DataType.STRING, identity=True, external_reference=True) # expect: self.assertEqual(ConversionType.EXTERNAL_REFERENCE, column_spec.get_conversion_type())
def test__column_specification_creation_identifiable__succeeds(self): column_specification = ColumnSpecification(self.schema_template, "someschema.protocol_id", "someschema") self.assertTrue(column_specification.is_identity()) self.assertEqual('someschema', column_specification.context_concrete_type) self.assertEqual('somedomain', column_specification.domain_type) self.assertEqual('someschema.protocol_id', column_specification.field_name) self.assertEqual(DataType.INTEGER, column_specification.data_type) self.assertEqual(ConversionType.IDENTITY, column_specification.get_conversion_type())
def _assert_correct_converter_multivalue(self, data_type): # given: column_spec = ColumnSpecification('field', 'object_type', 'main_category', data_type, multivalue=True) # when: converter = column_spec.determine_converter() # then: self.assertIsInstance(converter, ListConverter) self.assertEqual(data_type, converter.base_type)
def test_build_raw_spec_with_parent_spec(self): # given: raw_spec = {'value_type': 'boolean', 'multivalue': True} # and: raw_single_value_parent_spec = {'multivalue': False} # and: raw_multi_value_parent_spec = {'multivalue': True} # when: single_column_spec = ColumnSpecification.build_raw( '', '', '', raw_spec, parent=raw_single_value_parent_spec) multi_column_spec = ColumnSpecification.build_raw( '', '', '', raw_spec, parent=raw_multi_value_parent_spec) # then: self.assertFalse(single_column_spec.is_field_of_list_element()) self.assertTrue(multi_column_spec.is_field_of_list_element())
def determine_strategy(column_spec: ColumnSpecification): strategy = DO_NOTHING if column_spec is not None: field_name = column_spec.field_name converter = column_spec.determine_converter() conversion_type = column_spec.get_conversion_type() if ConversionType.MEMBER_FIELD == conversion_type: strategy = DirectCellConversion(field_name, converter) elif ConversionType.FIELD_OF_LIST_ELEMENT == conversion_type: strategy = ListElementCellConversion(field_name, converter) elif ConversionType.LINKING_DETAIL == conversion_type: strategy = LinkingDetailCellConversion(field_name, converter) elif ConversionType.IDENTITY == conversion_type: strategy = IdentityCellConversion(field_name, converter) elif ConversionType.LINKED_IDENTITY == conversion_type: strategy = LinkedIdentityCellConversion(field_name, column_spec.main_category) elif ConversionType.EXTERNAL_REFERENCE == conversion_type: strategy = ExternalReferenceCellConversion(field_name, column_spec.main_category) return strategy
def test__column_specification_creation_string_type__succeeds(self): column_specification = ColumnSpecification(self.schema_template, "someschema.value", "someschema", order_of_occurrence=7) self.assertFalse(column_specification.multivalue) self.assertEqual('someschema', column_specification.context_concrete_type) self.assertEqual('somedomain', column_specification.domain_type) self.assertEqual('someschema.value', column_specification.field_name) self.assertEqual(DataType.STRING, column_specification.data_type) self.assertEqual(7, column_specification.order_of_occurrence)
def test_determine_converter_for_multivalue_type(self): data_types_to_test = [ DataType.BOOLEAN, DataType.INTEGER, DataType.STRING, DataType.UNDEFINED ] for data_type in data_types_to_test: sample_metadata_schema_json = { "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://schema.humancellatlas.org/module/protocol/2.0.2/someschema", "description": "Just a plain old test schema", "type": "object", "properties": { "multivalue_property": { "description": "Some generic multivalue property", "type": "array", "multivalue": True, "items": { "type": data_type.value } } } } schema_template = SchemaTemplate( json_schema_docs=[sample_metadata_schema_json]) column_specification = ColumnSpecification( schema_template, "someschema.multivalue_property", "someschema") self.assertEqual(column_specification.field_name, "someschema.multivalue_property") self.assertTrue(column_specification.is_multivalue()) self.assertIsInstance(column_specification.determine_converter(), ListConverter) self.assertEqual( column_specification.determine_converter().base_type, data_type)
def test_build_raw_multivalue(self): # given: raw_int_array_spec = {'value_type': 'integer', 'multivalue': True} # when: int_array_column_spec = ColumnSpecification.build_raw( 'sample.numbers', 'user', 'profile_entry', raw_int_array_spec) # then: self.assertEqual('sample.numbers', int_array_column_spec.field_name) self.assertEqual('profile_entry', int_array_column_spec.main_category) self.assertEqual(DataType.INTEGER, int_array_column_spec.data_type) self.assertTrue(int_array_column_spec.is_multivalue()) self.assertFalse(int_array_column_spec.is_identity())
def test_look_up_nested_object_field(self): nested_sample_metadata_schema_json = { "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://schema.humancellatlas.org/module/somedomain/2.0.2/someschema", "description": "Just a plain old test schema", "required": [], "type": "object", "properties": { "some_parent_property": { "description": "A parent property", "type": "array", "multivalue": True, "items": { "type": "integer" }, "properties": { "some_child_property": { "description": "A child property", "type": "string", } } } } } schema_template = SchemaTemplate( json_schema_docs=[nested_sample_metadata_schema_json]) column_specification = ColumnSpecification( schema_template, "someschema.some_parent_property.some_child_property", "someschema") self.assertFalse(column_specification.multivalue) self.assertTrue(column_specification.is_field_of_list_element()) self.assertEqual(ConversionType.FIELD_OF_LIST_ELEMENT, column_specification.get_conversion_type())
def test_determine_converter_for_single_value(self): data_types_to_test = [ DataType.BOOLEAN, DataType.INTEGER, DataType.STRING, DataType.UNDEFINED ] expected_respective_converter = [ BooleanConverter, IntegerConverter, StringConverter, DefaultConverter ] for data_type, expected_converter in zip( data_types_to_test, expected_respective_converter): sample_metadata_schema_json = { "$schema": "http://json-schema.org/draft-07/schema#", "$id": "https://schema.humancellatlas.org/module/protocol/2.0.2/someschema", "description": "Just a plain old test schema", "type": "object", "properties": { "some_property": { "description": "Some generic property", "type": data_type.value, "multivalue": False, } } } schema_template = SchemaTemplate( json_schema_docs=[sample_metadata_schema_json]) column_specification = ColumnSpecification( schema_template, "someschema.some_property", "someschema") self.assertIsInstance(column_specification.determine_converter(), expected_converter) self.assertEqual(column_specification.get_conversion_type(), ConversionType.MEMBER_FIELD)
def _define_column_spec(self, header, object_type, order_of_occurence=1): if header is not None: parent_path, __ = utils.split_field_chain(header) raw_spec = self.lookup(header) raw_parent_spec = self.lookup(parent_path) concrete_type = utils.extract_root_field(header) main_category = self.get_domain_entity(concrete_type) column_spec = ColumnSpecification.build_raw( header, object_type, main_category, raw_spec, parent=raw_parent_spec, order_of_occurence=order_of_occurence) else: column_spec = None return column_spec
def test_build_raw_single_type(self): # given: raw_string_spec = { 'value_type': 'string', 'multivalue': False, 'identifiable': True } # when: string_column_spec = ColumnSpecification.build_raw( 'user.name', 'user', 'profile_entry', raw_string_spec) # then: self.assertEqual('user.name', string_column_spec.field_name) self.assertEqual('profile_entry', string_column_spec.main_category) self.assertEqual(DataType.STRING, string_column_spec.data_type) self.assertFalse(string_column_spec.is_multivalue()) self.assertTrue(string_column_spec.is_identity())
def create_row_template(self, ingest_worksheet: IngestWorksheet): concrete_type = self.get_concrete_type(ingest_worksheet.title) domain_type = self.get_domain_type(concrete_type) column_headers = ingest_worksheet.get_column_headers() cell_conversions = [] context = self._determine_context(concrete_type, ingest_worksheet) header_counter = {} for header in column_headers: if not header_counter.get(header): header_counter[header] = 0 header_counter[header] = header_counter[header] + 1 column_spec = ColumnSpecification(self.template, header, concrete_type, context=context, order_of_occurrence=header_counter[header]) strategy = conversion_strategy.determine_strategy(column_spec) cell_conversions.append(strategy) default_values = None if not ingest_worksheet.is_module_tab(): default_values = self._define_default_values(concrete_type) return RowTemplate(domain_type, concrete_type, cell_conversions, default_values=default_values)