Esempio n. 1
0
class AggregatorDetailViewTest(TestCase):
    def setUp(self):
        self.client_login('admin')

        self.aggregator = AggregatorFactory()
        self.export_url = reverse('aggregator_export',
                                  args=(self.aggregator.pk, ))
        self.workflow_url = reverse('aggregator_workflow',
                                    args=[
                                        self.aggregator.pk,
                                    ])

    def test_get(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.aggregator.name)

    def test_contains_edit_link(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response,
                            self.aggregator.get_absolute_url() + 'edit/')

    def test_contains_archiveitems(self):
        item1 = ArchiveItemFactory()
        item2 = ArchiveItemFactory()
        for item in (item1, item2):
            AggregatorArchiveItem.objects.create(aggregator=self.aggregator,
                                                 archiveitem=item)

        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, item1.get_absolute_url())
        self.assertContains(response, item2.get_absolute_url())

    def test_contains_rule(self):
        self.aggregator.silk_rule = 'tanto gentile e tanto onesta pare'
        self.aggregator.save()

        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.aggregator.silk_rule)

    def test_contains_schedulers(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, 'No scheduler found')

        scheduler = AggregatorSchedulerFactory(object_id=self.aggregator.pk)
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertNotContains(response, 'No scheduler found')
        self.assertContains(response, scheduler.get_absolute_url())

    def test_contains_silk_link(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        silk_url = 'http://{}:{}/workbench/'.format(
            settings.SILK_EXTERNAL_HOST, settings.SILK_EXTERNAL_PORT)
        self.assertContains(response, silk_url)

    def test_can_download_silk_project_file(self):
        self.client_login('admin')

        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.export_url + '?download')

        response = self.client.get(self.export_url + '?download')
        self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8')
        self.assertTrue(response.has_header('Content-Disposition'))
        self.assertNotEqual(response.content.strip(), '')

    def test_can_run_workflow(self):
        self.client_login('admin')
        Scheduler.objects.all().delete()
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.workflow_url)

        response = self.client.post(self.workflow_url)
        self.assertEqual(response.status_code, 302)
        self.assertTrue(
            response['Location'].startswith('http://testserver/s/task/'))
        self.assertEqual(Scheduler.objects.count(), 1)

    def test_can_view_silk_project_file_without_downloading(self):
        self.client_login('admin')
        response = self.client.get(self.export_url)
        self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8')
        self.assertFalse(response.has_header('Content-Disposition'))
        self.assertNotEqual(response.content.strip(), '')

    def test_silk_project_file_is_valid(self):
        import xml.etree.ElementTree as ET

        self.client_login('admin')

        item1 = ArchiveItemFactory()
        item2 = ArchiveItemFactory()
        for item in (item1, item2):
            AggregatorArchiveItem.objects.create(aggregator=self.aggregator,
                                                 archiveitem=item)

        response = self.client.get(self.export_url)
        tree = ET.fromstring(response.content)

        self.assertIn((settings.TRIPLE_DATABASE['PREFIXES']['sdv1'], 'sd'),
                      [(x.get('namespace'), x.get('id'))
                       for x in tree.findall('.//Prefix')])

        # check datasources
        datasources = tree.findall('.//DataSource')
        self.assertEqual(len(datasources), 3)
        self.assertEqual(datasources[0].get('id'), 'master-graph')

        mastergraph = datasources[0]
        datasources = datasources[1:]

        # check datasources endpoints
        self.assertEqual(
            mastergraph.find('Param[@name="host"]').get('value'),
            settings.TRIPLE_DATABASE_MASTER['HOST'])
        self.assertEqual([get_virtuoso_endpoint()] * 2, [
            x.find('Param[@name="endpointURI"]').get("value")
            for x in datasources
        ])

        # check datasources graph names
        self.assertEqual(
            mastergraph.find('Param[@name="graph"]').get('value'),
            settings.TRIPLE_DATABASE_MASTER["KWARGS"]["graph"])
        self.assertEqual(
            [item1.datagraph_mapped_name, item2.datagraph_mapped_name],
            [x.find('Param[@name="graph"]').get("value") for x in datasources])

        # check tasks
        datasource_ids = [x.get('id') for x in datasources]
        tasks = tree.findall('.//LinkingTask')
        self.assertEqual(len(tasks), 2)
        self.assertEqual(datasource_ids,
                         [x.find('.//Interlink').get('id') for x in tasks])

        # check task parameters
        for datasource_id, task in zip(datasource_ids, tasks):
            self.assertEqual(
                task.find('.//SourceDataset').get('dataSource'), datasource_id)
            self.assertEqual(
                task.find('.//TargetDataset').get('dataSource'),
                'master-graph')
            self.assertEqual(
                task.find('.//SourceDataset').find('RestrictTo').text.strip(),
                '?a rdf:type <{}> .'.format(self.aggregator.entity_type))
            self.assertEqual(
                task.find('.//TargetDataset').find('RestrictTo').text.strip(),
                'b -> {}'.format(self.aggregator.vertex_selector))
            self.assertIsNone(task.find('.//LinkageRule').text)
            self.assertIsNone(task.find('.//Filter').text)
            self.assertIsNone(task.find('.//Outputs').text)
            self.assertIsNone(task.find('.//PositiveEntities').text)
            self.assertIsNone(task.find('.//NegativeEntities').text)
            self.assertIsNone(
                task.find('.//Alignment/').find('{}Alignment'.format(
                    '{http://knowledgeweb.'
                    'semanticweb.org'
                    '/heterogeneity/alignment#}')).text)
Esempio n. 2
0
class AggregatorDetailViewTest(TestCase):
    def setUp(self):
        self.client_login('admin')

        self.aggregator = AggregatorFactory()
        self.export_url = reverse(
            'aggregator_export', args=(self.aggregator.pk, )
        )
        self.workflow_url = reverse(
            'aggregator_workflow', args=[self.aggregator.pk, ])

    def test_get(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.aggregator.name)

    def test_contains_edit_link(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(
            response, self.aggregator.get_absolute_url() + 'edit/'
        )

    def test_contains_archiveitems(self):
        item1 = ArchiveItemFactory()
        item2 = ArchiveItemFactory()
        for item in (item1, item2):
            AggregatorArchiveItem.objects.create(
                aggregator=self.aggregator,
                archiveitem=item
            )

        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, item1.get_absolute_url())
        self.assertContains(response, item2.get_absolute_url())

    def test_contains_rule(self):
        self.aggregator.silk_rule = 'tanto gentile e tanto onesta pare'
        self.aggregator.save()

        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.aggregator.silk_rule)

    def test_contains_schedulers(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, 'No scheduler found')

        scheduler = AggregatorSchedulerFactory(object_id=self.aggregator.pk)
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertNotContains(response, 'No scheduler found')
        self.assertContains(response, scheduler.get_absolute_url())

    def test_contains_silk_link(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        silk_url = 'http://{}:{}/workbench/'.format(
            settings.SILK_EXTERNAL_HOST, settings.SILK_EXTERNAL_PORT
        )
        self.assertContains(response, silk_url)

    def test_can_download_silk_project_file(self):
        self.client_login('admin')

        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.export_url + '?download')

        response = self.client.get(self.export_url + '?download')
        self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8')
        self.assertTrue(response.has_header('Content-Disposition'))
        self.assertNotEqual(response.content.strip(), '')

    def test_can_run_workflow(self):
        self.client_login('admin')
        Scheduler.objects.all().delete()
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.workflow_url)

        response = self.client.post(self.workflow_url)
        self.assertEqual(response.status_code, 302)
        self.assertTrue(
            response['Location'].startswith('http://testserver/s/task/')
        )
        self.assertEqual(Scheduler.objects.count(), 1)

    def test_can_view_silk_project_file_without_downloading(self):
        self.client_login('admin')
        response = self.client.get(self.export_url)
        self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8')
        self.assertFalse(response.has_header('Content-Disposition'))
        self.assertNotEqual(response.content.strip(), '')

    def test_silk_project_file_is_valid(self):
        import xml.etree.ElementTree as ET

        self.client_login('admin')

        item1 = ArchiveItemFactory()
        item2 = ArchiveItemFactory()
        for item in (item1, item2):
            AggregatorArchiveItem.objects.create(
                aggregator=self.aggregator,
                archiveitem=item
            )

        response = self.client.get(self.export_url)
        tree = ET.fromstring(response.content)

        self.assertIn(
            (settings.TRIPLE_DATABASE['PREFIXES']['sdv1'], 'sd'),
            [(x.get('namespace'), x.get('id'))
             for x in tree.findall('.//Prefix')]
        )

        # check datasources
        datasources = tree.findall('.//DataSource')
        self.assertEqual(len(datasources), 3)
        self.assertEqual(datasources[0].get('id'), 'master-graph')

        mastergraph = datasources[0]
        datasources = datasources[1:]

        # check datasources endpoints
        self.assertEqual(
            mastergraph.find('Param[@name="host"]').get('value'),
            settings.TRIPLE_DATABASE_MASTER['HOST']
        )
        self.assertEqual(
            [get_virtuoso_endpoint()] * 2,
            [x.find('Param[@name="endpointURI"]').get("value")
             for x in datasources]
        )

        # check datasources graph names
        self.assertEqual(
            mastergraph.find('Param[@name="graph"]').get('value'),
            settings.TRIPLE_DATABASE_MASTER["KWARGS"]["graph"]
        )
        self.assertEqual(
            [item1.datagraph_mapped_name, item2.datagraph_mapped_name],
            [x.find('Param[@name="graph"]').get("value")
             for x in datasources]
        )

        # check tasks
        datasource_ids = [x.get('id') for x in datasources]
        tasks = tree.findall('.//LinkingTask')
        self.assertEqual(len(tasks), 2)
        self.assertEqual(
            datasource_ids,
            [x.find('.//Interlink').get('id') for x in tasks]
        )

        # check task parameters
        for datasource_id, task in zip(datasource_ids, tasks):
            self.assertEqual(
                task.find('.//SourceDataset').get('dataSource'),
                datasource_id
            )
            self.assertEqual(
                task.find('.//TargetDataset').get('dataSource'),
                'master-graph'
            )
            self.assertEqual(
                task.find('.//SourceDataset').find('RestrictTo').text.strip(),
                '?a rdf:type <{}> .'.format(self.aggregator.entity_type)
            )
            self.assertEqual(
                task.find('.//TargetDataset').find('RestrictTo').text.strip(),
                'b -> {}'.format(self.aggregator.vertex_selector)
            )
            self.assertIsNone(task.find('.//LinkageRule').text)
            self.assertIsNone(task.find('.//Filter').text)
            self.assertIsNone(task.find('.//Outputs').text)
            self.assertIsNone(task.find('.//PositiveEntities').text)
            self.assertIsNone(task.find('.//NegativeEntities').text)
            self.assertIsNone(
                task.find('.//Alignment/')
                    .find('{}Alignment'.format('{http://knowledgeweb.'
                                               'semanticweb.org'
                                               '/heterogeneity/alignment#}')
                          ).text
            )
Esempio n. 3
0
class SilkRuleXMLTestCase(TestCase):
    def setUp(self):
        self.aggregator = AggregatorFactory()

    def test_can_silk_rules_file_is_valid(self):
        import xml.etree.ElementTree as ET
        from django.template.loader import render_to_string

        archive_item = ArchiveItemFactory()
        AggregatorArchiveItem.objects.create(
            aggregator=self.aggregator,
            archiveitem=archive_item
        )

        self.aggregator.silk_rule = \
            '<LinkageRule><smart data="now" /></LinkageRule>'
        self.aggregator.save()
        output_filename = 'a_really_cool_filename.thm'

        context = {
            'aggregator': self.aggregator,
            'sd_prefix': settings.TRIPLE_DATABASE['PREFIXES']['sdv1'],
            'sparql_endpoint': get_virtuoso_endpoint(),
            'archive_item': archive_item,
            'output_filename': output_filename,
            'mastergraph_host': settings.TRIPLE_DATABASE_MASTER['HOST'],
            'mastergraph_port':
            settings.TRIPLE_DATABASE_MASTER['KWARGS']['rexpro_port'],
            'mastergraph_graphname':
            settings.TRIPLE_DATABASE_MASTER['KWARGS']['graph'],
            'resource_namespace':
            settings.TRIPLE_DATABASE_MASTER['PREFIXES']['sdres'],
        }

        tree = ET.fromstring(render_to_string(
            'controller/aggregator/silk_rules.xml', context
        ))

        self.assertIn(
            (settings.TRIPLE_DATABASE['PREFIXES']['sdv1'], 'sd'),
            [(x.get('namespace'), x.get('id'))
             for x in tree.findall('.//Prefix')]
        )

        # check datasources
        datasources_dom = tree.findall('.//DataSource')
        self.assertEqual(len(datasources_dom), 2)
        self.assertEqual(datasources_dom[0].get('id'), 'master-graph')

        mastergraph, datasource = datasources_dom

        # check datasource endpoints
        self.assertEqual(
            get_virtuoso_endpoint(),
            datasource.find('Param[@name="endpointURI"]').get("value"),
        )

        # check datasources graph names
        self.assertEqual(
            mastergraph.find('Param[@name="graph"]').get('value'),
            settings.TRIPLE_DATABASE_MASTER["KWARGS"]["graph"]
        )
        self.assertEqual(
            archive_item.datagraph_mapped_name,
            datasource.find('Param[@name="graph"]').get("value")
        )

        # check tasks
        datasource_id = datasource.get('id')
        rules = tree.findall('.//Interlink')
        self.assertEqual(len(rules), 1)
        self.assertEqual(datasource_id, rules[0].get('id'))

        # check rules parameters
        rule = rules[0]
        self.assertEqual(
            rule.find('.//SourceDataset').get('dataSource'),
            datasource_id
        )
        self.assertEqual(
            rule.find('.//TargetDataset').get('dataSource'),
            'master-graph'
        )
        self.assertEqual(
            ET.tostring(rule.find('.//LinkageRule')).strip(),
            self.aggregator.silk_rule
        )
        self.assertEqual(
            rule.find('.//SourceDataset').find('RestrictTo').text.strip(),
            '?a rdf:type <{}> .'.format(self.aggregator.entity_type)
        )
        self.assertEqual(
            rule.find('.//TargetDataset').find('RestrictTo').text.strip(),
            'b -> {}'.format(self.aggregator.vertex_selector)
        )
        self.assertIsNone(rule.find('.//Filter').text)

        output = rule.find('.//Outputs').find('Output')
        self.assertEqual(output.get('type'), 'file')
        self.assertEqual(output.findall('Param')[0].get('name'), 'file')
        self.assertEqual(
            output.findall('Param')[0].get('value'), output_filename)
        self.assertEqual(output.findall('Param')[1].get('name'), 'format')
        self.assertEqual(output.findall('Param')[1].get('value'), 'ntriples')