Example #1
0
    def test_is_project__returns_false(self):
        # given
        worksheet = spreadsheet_utils.create_worksheet('not a Project', [])
        ingest_worksheet = IngestWorksheet(worksheet)

        # when and then
        self.assertFalse(ingest_worksheet.is_project())
Example #2
0
    def test_is_project__returns_true__when_case_is_lower(self):
        # given
        worksheet = spreadsheet_utils.create_worksheet('project', [])
        ingest_worksheet = IngestWorksheet(worksheet)

        # when and then
        self.assertTrue(ingest_worksheet.is_project())
    def test_get_data_rows(self):
        # given:
        start_row_idx = 6
        header_row_idx = 4

        header_row = ['name', 'address', 'mobile', 'email']
        expected_data_row = ['Jane Doe', 'Cambridge', '12-345-67', '*****@*****.**']

        rows = [[], [], [], [], [], []]  # initialise 6 rows
        rows[header_row_idx-1] = header_row
        rows[start_row_idx-1] = expected_data_row

        worksheet = spreadsheet_utils.create_worksheet('person', rows)

        # when
        ingest_worksheet = IngestWorksheet(worksheet, header_row_idx=header_row_idx)
        data_rows = ingest_worksheet.get_data_rows(start_row=start_row_idx)

        data_row_values = []
        for row in data_rows:
            cell_values = [cell.value for cell in row]
            data_row_values.append(cell_values)

        # then:
        self.assertEqual(len(data_row_values), 1)
        self.assertEqual(data_row_values, [expected_data_row])
Example #4
0
    def test_is_project_module__returns_true(self):
        # given
        worksheet = spreadsheet_utils.create_worksheet(
            'Project - Contributors', [])
        ingest_worksheet = IngestWorksheet(worksheet)

        # when and then
        self.assertTrue(ingest_worksheet.is_project_module())
    def test_get_title(self):
        # given:
        workbook = create_test_workbook('User', 'User - SN Profiles')
        user_sheet = workbook.get_sheet_by_name('User')
        sn_profiles_sheet = workbook.get_sheet_by_name('User - SN Profiles')

        # and:
        user = IngestWorksheet(user_sheet)
        sn_profiles = IngestWorksheet(sn_profiles_sheet)

        # expect:
        self.assertEqual('User', user.title)
        self.assertEqual('User - SN Profiles', sn_profiles.title)
    def test_get_column_headers_includes_blank_cells(self):
        # given:
        header_row = 4
        rows = [['name', 'address', '', 'email']]
        worksheet = spreadsheet_utils.create_worksheet('person', rows, start_row=header_row)

        # when:
        ingest_worksheet = IngestWorksheet(worksheet, header_row_idx=header_row)
        column_headers = ingest_worksheet.get_column_headers()

        # then:
        self.assertEqual(len(column_headers), 4)
        self.assertEqual(column_headers, ['name', 'address', '', 'email'])
Example #7
0
    def test__has_column__returns_false__when_column_is_empty(self):
        # given:
        rows = [['name', 'address', 'mobile', 'email'],
                ['Jane Doe', 'Cambridge', '12-345-67', '*****@*****.**']]

        worksheet = spreadsheet_utils.create_worksheet('person', rows)
        ingest_worksheet = IngestWorksheet(worksheet, header_row_idx=1)

        # when:
        result = ingest_worksheet.has_column('')

        # then:
        self.assertFalse(result)
    def test_do_import(self):
        # given:
        row_template = MagicMock('row_template')
        no_errors = []

        # and:
        john_doe = MetadataEntity(object_id='profile_1')
        emma_jackson = MetadataEntity(object_id='profile_2')
        row_template.do_import = MagicMock('import_row', side_effect=[(john_doe, no_errors), (emma_jackson, no_errors)])

        # and:
        mock_template_manager = MagicMock('template_manager')
        mock_template_manager.create_row_template = MagicMock(return_value=row_template)
        mock_template_manager.get_header_row = MagicMock(return_value=['header1', 'header2'])
        mock_template_manager.get_concrete_type = MagicMock(return_value='concrete_entity')

        # and:
        workbook = Workbook()
        worksheet = workbook.create_sheet('user_profile')
        worksheet['A4'] = 'header'
        worksheet['A6'] = 'john'
        worksheet['A7'] = 'emma'

        # when:
        worksheet_importer = WorksheetImporter(mock_template_manager)
        profiles, errors = worksheet_importer.do_import(IngestWorksheet(worksheet))

        # then:
        self.assertEqual(2, len(profiles))
        self.assertIn(john_doe, profiles)
        self.assertIn(emma_jackson, profiles)
        self.assertEqual(errors, [])

        # and: domain and concrete type should be set
        pass
Example #9
0
    def test_create_row_template_with_default_values(self, determine_strategy,
                                                     look_up):
        # given:
        schema_template = MagicMock('schema_template')
        ingest_api = MagicMock(name='ingest_api')

        # and:
        schema_url = 'http://schema.sample.com/profile'
        self._mock_schema_lookup(schema_template,
                                 schema_url=schema_url,
                                 main_category='profile',
                                 object_type='profile_type')

        # and:
        look_up.return_value = MagicMock('column_spec')
        determine_strategy.return_value = FakeConversion('')

        # and:
        workbook = Workbook()
        worksheet = workbook.create_sheet('profile')
        worksheet['A4'] = 'profile.name'
        ingest_worksheet = IngestWorksheet(worksheet)

        # when:
        template_manager = TemplateManager(schema_template, ingest_api)
        template_manager.get_schema_url = MagicMock(return_value=schema_url)
        row_template = template_manager.create_row_template(ingest_worksheet)

        # then:
        content_defaults = row_template.default_values
        self.assertIsNotNone(content_defaults)
        self.assertEqual(schema_url, content_defaults.get('describedBy'))
        self.assertEqual('profile', content_defaults.get('schema_type'))
Example #10
0
 def do_import(self, ingest_worksheet: IngestWorksheet):
     row_template = self.template.create_row_template(ingest_worksheet)
     records = []
     rows = ingest_worksheet.get_data_rows()
     for index, row in enumerate(rows):
         metadata = row_template.do_import(row)
         if not metadata.object_id:
             metadata.object_id = self._generate_id()
         records.append(metadata)
     return records
Example #11
0
 def select_importable_worksheets(self, worksheet_titles: List[str]):
     worksheets = []
     for title in worksheet_titles:
         worksheets.extend([
             IngestWorksheet(worksheet)
             for worksheet in self.workbook.worksheets
             if (title not in SPECIAL_TABS) and (
                 title.lower() in worksheet.title.lower())
         ])
     return worksheets
Example #12
0
    def add_entity_uuids(self, submission: Submission):
        worksheets = {}
        col_idx = 1
        entities = [
            entity for entity in submission.get_entities()
            if entity.spreadsheet_location
        ]
        for entity in entities:
            worksheet_title = entity.spreadsheet_location.get(
                'worksheet_title')
            row_index = entity.spreadsheet_location.get('row_index')

            if not worksheets.get(worksheet_title):
                worksheet = self.workbook[worksheet_title]
                ingest_worksheet = IngestWorksheet(worksheet=worksheet)
                worksheets[worksheet_title] = ingest_worksheet

            column_header = f'{entity.concrete_type}.uuid'

            if column_header not in ingest_worksheet.get_column_headers():
                ingest_worksheet.insert_column_with_header(
                    column_header, col_idx)

            ingest_worksheet = worksheets.get(worksheet_title)
            ingest_worksheet.cell(row=row_index,
                                  column=col_idx).value = entity.uuid
Example #13
0
 def do_import(self, ingest_worksheet: IngestWorksheet):
     records = []
     worksheet_errors = []
     try:
         row_template = self.template.create_row_template(ingest_worksheet)
         rows = ingest_worksheet.get_data_rows()
         for index, row in enumerate(rows):
             metadata, row_errors = row_template.do_import(row, ingest_worksheet.is_module_tab())
             for error in row_errors:
                 if 'location' in error:
                     error["location"] = f'sheet={ingest_worksheet.title} row={index}, {error["location"]}'
                 else:
                     error["location"] = f'sheet={ingest_worksheet.title} row={index}'
                 worksheet_errors.append(error)
             if not metadata.object_id:
                 metadata.object_id = self._generate_id()
             records.append(metadata)
     except Exception as e:
         worksheet_errors.append({
             "location": f'sheet={ingest_worksheet.title}',
             "type": e.__class__.__name__,
             "detail": str(e)
         })
     return records, worksheet_errors
Example #14
0
    def create_row_template(self, ingest_worksheet: IngestWorksheet):
        concrete_type = self.get_concrete_type(ingest_worksheet.title)
        domain_type = self.get_domain_type(concrete_type)
        column_headers = ingest_worksheet.get_column_headers()
        cell_conversions = []

        context = self._determine_context(concrete_type, ingest_worksheet)
        header_counter = {}
        for header in column_headers:
            if not header_counter.get(header):
                header_counter[header] = 0
            header_counter[header] = header_counter[header] + 1

            column_spec = ColumnSpecification(self.template, header, concrete_type, context=context,
                                              order_of_occurrence=header_counter[header])
            strategy = conversion_strategy.determine_strategy(column_spec)
            cell_conversions.append(strategy)

        default_values = None
        if not ingest_worksheet.is_module_tab():
            default_values = self._define_default_values(concrete_type)

        return RowTemplate(domain_type, concrete_type, cell_conversions,
                           default_values=default_values)
    def test_create_row_template_with_default_values(self, determine_strategy):
        # given:
        schema_template = MagicMock('schema_template')
        ingest_api = MagicMock(name='mock_ingest_api')

        # and:
        domain_entity = "profile/profile_type"
        schema_url = "http://schema.sample.com/profile"
        schema_template.get_tabs_config = MagicMock()
        schema_template.lookup_metadata_schema_name_given_title = MagicMock(
            return_value="profile_type")
        schema_template.get_latest_schema = MagicMock(return_value=schema_url)
        schema = {
            "schema": {
                "domain_entity": domain_entity,
                "url": schema_url
            }
        }
        property_schema = {
            "description": "Property description",
            "value_type": "string"
        }
        spec_map = {"profile_type": schema, "profile.name": property_schema}
        schema_template.lookup_property_from_template = lambda key: spec_map.get(
            key)

        # and:
        determine_strategy.return_value = FakeConversion('')

        # and:
        workbook = Workbook()
        worksheet = workbook.create_sheet('profile')
        worksheet['A4'] = 'profile.name'
        ingest_worksheet = IngestWorksheet(worksheet)

        # when:
        template_manager = TemplateManager(schema_template, ingest_api)
        template_manager.get_schema_url = MagicMock(return_value=schema_url)
        row_template = template_manager.create_row_template(ingest_worksheet)

        # then:
        content_defaults = row_template.default_values
        self.assertIsNotNone(content_defaults)
        self.assertEqual(schema_url, content_defaults.get('describedBy'))
        self.assertEqual('profile', content_defaults.get('schema_type'))
    def test_is_module_tab(self):
        # given:
        workbook = create_test_workbook('Product', 'Product - History')
        product_sheet = workbook.get_sheet_by_name('Product')
        history_sheet = workbook.get_sheet_by_name('Product - History')

        # and:
        product = IngestWorksheet(product_sheet)
        history = IngestWorksheet(history_sheet)

        # expect:
        self.assertFalse(product.is_module_tab())
        self.assertTrue(history.is_module_tab())
    def test_do_import_no_id_metadata(self):
        # given:
        row_template = MagicMock('row_template')
        no_errors = []
        # and:
        paper_metadata = MetadataEntity(content={'product_name': 'paper'},
                                        links={'delivery': ['123', '456']})
        pen_metadata = MetadataEntity(content={'product_name': 'pen'},
                                      links={'delivery': ['789']})
        row_template.do_import = MagicMock(side_effect=([(paper_metadata, no_errors), (pen_metadata, no_errors)]))

        # and:
        mock_template_manager = MagicMock('template_manager')
        mock_template_manager.create_row_template = MagicMock(return_value=row_template)
        mock_template_manager.get_header_row = MagicMock(return_value=['header1', 'header2'])
        mock_template_manager.get_concrete_type = MagicMock(return_value='concrete_entity')

        # and:
        workbook = Workbook()
        worksheet = workbook.create_sheet('product')
        worksheet['A6'] = 'paper'
        worksheet['A7'] = 'pen'

        # when:
        worksheet_importer = WorksheetImporter(mock_template_manager)
        results, errors = worksheet_importer.do_import(IngestWorksheet(worksheet))

        # then:
        self.assertEqual(2, len(results))
        self.assertIn(paper_metadata, results)
        self.assertIn(pen_metadata, results)
        self.assertEqual(errors, [])

        # and: object id should be assigned
        paper_id = paper_metadata.object_id
        self.assertIsNotNone(paper_id)
        pen_id = pen_metadata.object_id
        self.assertIsNotNone(pen_id)
        self.assertNotEqual(paper_id, pen_id)
Example #18
0
    def test_create_row_template_with_none_header(self, determine_strategy):
        # given:
        schema_template = MagicMock('schema_template')
        ingest_api = MagicMock(name='ingest_api')

        # and:
        do_nothing_strategy = FakeConversion('')
        determine_strategy.return_value = do_nothing_strategy

        # and:
        self._mock_schema_lookup(schema_template)

        # and:
        workbook = Workbook()
        worksheet = workbook.create_sheet('sample')
        worksheet['A4'] = None
        ingest_worksheet = IngestWorksheet(worksheet)

        # when:
        template_manager = TemplateManager(schema_template, ingest_api)
        row_template = template_manager.create_row_template(ingest_worksheet)

        # then:
        self.assertEqual(0, len(row_template.cell_conversions))
Example #19
0
    def test_create_row_template(self, determine_strategy, look_up):
        # given:
        template = MagicMock(name='schema_template')
        ingest_api = MagicMock(name='ingest_api')

        # and:
        concrete_type = 'user'
        template.get_tab_key = MagicMock(return_value=concrete_type)

        # and:
        spec_map = {
            'user': {
                'schema': {
                    'domain_entity': 'main_category/subdomain'
                }
            }
        }
        template.lookup = lambda key: spec_map.get(key, None)

        # and: set up column spec
        name_column_spec = MagicMock(name='name_column_spec')
        numbers_column_spec = MagicMock(name='numbers_column_spec')
        look_up.side_effect = [name_column_spec, numbers_column_spec]

        # and:
        name_strategy = MagicMock('name_strategy')
        numbers_strategy = MagicMock('numbers_strategy')
        determine_strategy.side_effect = [name_strategy, numbers_strategy]

        # and: prepare worksheet
        header_row_idx = 4
        workbook = Workbook()
        worksheet = workbook.create_sheet('sample')
        worksheet[f'A{header_row_idx}'] = 'user.profile.first_name'
        worksheet[f'B{header_row_idx}'] = 'numbers'

        ingest_worksheet = IngestWorksheet(worksheet,
                                           header_row_idx=header_row_idx)

        # when:
        template_manager = TemplateManager(template, ingest_api)
        row_template: RowTemplate = template_manager.create_row_template(
            ingest_worksheet)

        # then:
        expected_calls = [
            call(template,
                 'user.profile.first_name',
                 concrete_type,
                 context=concrete_type,
                 order_of_occurrence=1),
            call(template,
                 'numbers',
                 concrete_type,
                 context=concrete_type,
                 order_of_occurrence=1)
        ]
        look_up.assert_has_calls(expected_calls)
        determine_strategy.assert_has_calls(
            [call(name_column_spec),
             call(numbers_column_spec)])

        # and:
        self.assertIsNotNone(row_template)
        self.assertEqual('main_category', row_template.domain_type)
        self.assertEqual(concrete_type, row_template.concrete_type)
        self.assertEqual(2, len(row_template.cell_conversions))
        self.assertTrue(name_strategy in row_template.cell_conversions)
        self.assertTrue(numbers_strategy in row_template.cell_conversions)
Example #20
0
    def test_create_row_template_for_module_worksheet(self, determine_strategy,
                                                      look_up):
        # given:
        template = MagicMock(name='schema_template')
        ingest_api = MagicMock(name='ingest_api')

        # TODO define method in SchemaTemplate that returns domain and concrete types #module-tabs
        # and:
        concrete_type = 'product'
        template.get_tab_key = MagicMock(return_value=concrete_type)

        # and:
        spec_map = {
            'product': {
                'schema': {
                    'domain_entity': 'merchandise/product'
                }
            }
        }
        template.lookup = lambda key: spec_map.get(key, None)

        # and:
        template_mgr = TemplateManager(template, ingest_api)

        # and:
        workbook = create_test_workbook('Product - Reviews')
        reviews_worksheet = workbook.get_sheet_by_name('Product - Reviews')
        reviews_worksheet['A4'] = 'product.info.id'
        reviews_worksheet['B4'] = 'product.reviews.rating'

        # and: set up dummy look up results
        id_spec = MagicMock(name='id_spec')
        rating_spec = MagicMock(name='rating_spec')
        look_up.side_effect = [id_spec, rating_spec]

        # and: set up strategies
        id_strategy = MagicMock(name='id_strategy')
        rating_strategy = MagicMock(name='rating_strategy')
        determine_strategy.side_effect = {
            id_spec: id_strategy,
            rating_spec: rating_strategy
        }.get

        # when:
        row_template = template_mgr.create_row_template(
            IngestWorksheet(reviews_worksheet))

        # then:
        expected_calls = [
            call(template,
                 'product.info.id',
                 concrete_type,
                 order_of_occurrence=1,
                 context='product.reviews'),
            call(template,
                 'product.reviews.rating',
                 concrete_type,
                 order_of_occurrence=1,
                 context='product.reviews')
        ]
        look_up.assert_has_calls(expected_calls)

        # and:
        self.assertIsNotNone(row_template)
        self.assertIn(id_strategy, row_template.cell_conversions)
        self.assertIn(rating_strategy, row_template.cell_conversions)
 def importable_worksheets(self):
     return [
         IngestWorksheet(worksheet)
         for worksheet in self.workbook.worksheets
         if worksheet.title not in SPECIAL_TABS
     ]
    def test_create_row_template_for_module_worksheet(self,
                                                      determine_strategy):
        # given:
        template = MagicMock(name='schema_template')
        ingest_api = MagicMock(name='mock_ingest_api')

        # and:
        spec_map = {
            'product': {
                'schema': {
                    'domain_entity': 'merchandise/product'
                }
            }
        }
        template.lookup_property_from_template = lambda key: spec_map.get(
            key, None)

        domain_entity = "merchandise/product"
        schema_url = "http://schema.sample.com/product"
        template.get_tabs_config = MagicMock()
        template.lookup_metadata_schema_name_given_title = MagicMock(
            return_value="product_type")
        template.get_latest_schema = MagicMock(return_value=schema_url)
        schema = {
            "schema": {
                "domain_entity": domain_entity,
                "url": schema_url
            }
        }
        property_one_schema = {
            "description": "Property one description",
            "value_type": "string"
        }
        property_two_schema = {
            "description": "Property two description",
            "value_type": "string"
        }
        spec_map = {
            "product_type": schema,
            "product.info.id": property_one_schema,
            "product.reviews.rating": property_two_schema
        }
        template.lookup_property_attributes_in_metadata = lambda key: spec_map.get(
            key)

        # and:
        template_mgr = TemplateManager(template, ingest_api)

        # and:
        workbook = create_test_workbook('Product - Reviews')
        reviews_worksheet = workbook['Product - Reviews']
        reviews_worksheet['A4'] = 'product.info.id'
        reviews_worksheet['B4'] = 'product.reviews.rating'

        # and: set up strategies
        id_strategy = MagicMock(name='id_strategy')
        rating_strategy = MagicMock(name='rating_strategy')
        determine_strategy.side_effect = [id_strategy, rating_strategy]

        # when:
        row_template = template_mgr.create_row_template(
            IngestWorksheet(reviews_worksheet))

        # and:
        self.assertIsNotNone(row_template)
        self.assertIn(id_strategy, row_template.cell_conversions)
        self.assertIn(rating_strategy, row_template.cell_conversions)
    def test_get_module_field_name(self):
        # given:
        workbook = create_test_workbook('Product - Reviews', 'User - SN Profiles',
                                        'Log - file-names', 'Account')

        # and: simple
        reviews_sheet = workbook.get_sheet_by_name('Product - Reviews')
        reviews = IngestWorksheet(reviews_sheet)

        # and: with space in between
        sn_profiles_sheet = workbook.get_sheet_by_name('User - SN Profiles')
        sn_profiles = IngestWorksheet(sn_profiles_sheet)

        # and: with hyphen
        file_names_sheet = workbook.get_sheet_by_name('Log - file-names')
        file_names = IngestWorksheet(file_names_sheet)

        # and: not module worksheet
        account_sheet = workbook.get_sheet_by_name('Account')
        account = IngestWorksheet(account_sheet)

        # expect:
        self.assertEqual('reviews', reviews.get_module_field_name())
        self.assertEqual('sn_profiles', sn_profiles.get_module_field_name())
        self.assertEqual('file_names', file_names.get_module_field_name())
        self.assertIsNone(account.get_module_field_name())
    def test_create_row_template(self, determine_strategy):
        # given:
        template = MagicMock(name='schema_template')
        ingest_api = MagicMock(name='mock_ingest_api')

        # and:
        concrete_type = "user"
        domain_entity = "main_category/subdomain"
        schema_url = "http://schema.sample.com/main_category"
        template.get_tabs_config = MagicMock()
        template.lookup_metadata_schema_name_given_title = MagicMock(
            return_value=concrete_type)
        template.get_latest_schema = MagicMock(return_value=schema_url)
        schema = {
            "schema": {
                "domain_entity": domain_entity,
                "url": schema_url
            }
        }
        property_one_schema = {
            "description": "Property one description",
            "value_type": "string"
        }
        property_two_schema = {
            "description": "Property two description",
            "value_type": "string"
        }
        spec_map = {
            concrete_type: schema,
            "user.profile.first_name": property_one_schema,
            "user.numbers": property_two_schema
        }
        template.lookup_property_from_template = lambda key: spec_map.get(key)

        # and:
        name_strategy = MagicMock('name_strategy')
        numbers_strategy = MagicMock('numbers_strategy')
        determine_strategy.side_effect = [name_strategy, numbers_strategy]

        # and: prepare worksheet
        header_row_idx = 4
        workbook = Workbook()
        worksheet = workbook.create_sheet('sample')
        worksheet[f'A{header_row_idx}'] = 'user.profile.first_name'
        worksheet[f'B{header_row_idx}'] = 'user.numbers'

        ingest_worksheet = IngestWorksheet(worksheet,
                                           header_row_idx=header_row_idx)

        # when:
        template_manager = TemplateManager(template, ingest_api)
        row_template: RowTemplate = template_manager.create_row_template(
            ingest_worksheet)

        # then:
        self.assertEqual(determine_strategy.call_count, 2)

        # and:
        self.assertIsNotNone(row_template)
        self.assertEqual('main_category', row_template.domain_type)
        self.assertEqual(concrete_type, row_template.concrete_type)
        self.assertEqual(2, len(row_template.cell_conversions))
        self.assertTrue(name_strategy in row_template.cell_conversions)
        self.assertTrue(numbers_strategy in row_template.cell_conversions)