Esempio n. 1
0
class AggregatorImportViewTest(TestCase):
    def setUp(self):
        self.aggregator = AggregatorFactory()
        self.update_url = self.aggregator.get_absolute_url() + 'post/'
        self.post_data = dict()
        self.post_data['silk_rule_file'] = open(
            self._get_test_file('config.xml', 'controller')
        )

    def test_post(self):
        self.client_login('admin')

        archiveitem = ArchiveItemFactory()
        self.post_data['archiveitems'] = [archiveitem.pk]
        response = self.client.post(self.update_url, data=self.post_data)
        self.assertRedirects(response, self.aggregator.get_absolute_url())

        aggregator = Aggregator.objects.get(pk=self.aggregator.pk)
        self.assertEqual(
            aggregator.silk_rule.strip().replace("\n", ""),
            '<LinkageRule>        '
            '<Compare id="unnamed_5" metric="levenshtein" required="false" '
            'threshold="0.0" weight="1">          '
            '<TransformInput function="lowerCase" id="unnamed_3">            '
            '<Input id="unnamed_1" path="?a/sd:Event#name" />          '
            '</TransformInput>          <TransformInput function="lowerCase" '
            'id="unnamed_4">            '
            '<Input id="unnamed_2" path="?b/sd:Event#name" />          '
            '</TransformInput>          <Param name="minChar" value="0" />'
            '          <Param name="maxChar" value="z" />        '
            '</Compare>      </LinkageRule>'.strip()
        )
Esempio n. 2
0
class AggregatorImportViewTest(TestCase):
    def setUp(self):
        self.aggregator = AggregatorFactory()
        self.update_url = self.aggregator.get_absolute_url() + 'post/'
        self.post_data = dict()
        self.post_data['silk_rule_file'] = open(
            self._get_test_file('config.xml', 'controller'))

    def test_post(self):
        self.client_login('admin')

        archiveitem = ArchiveItemFactory()
        self.post_data['archiveitems'] = [archiveitem.pk]
        response = self.client.post(self.update_url, data=self.post_data)
        self.assertRedirects(response, self.aggregator.get_absolute_url())

        aggregator = Aggregator.objects.get(pk=self.aggregator.pk)
        self.assertEqual(
            aggregator.silk_rule.strip().replace("\n", ""),
            '<LinkageRule>        '
            '<Compare id="unnamed_5" metric="levenshtein" required="false" '
            'threshold="0.0" weight="1">          '
            '<TransformInput function="lowerCase" id="unnamed_3">            '
            '<Input id="unnamed_1" path="?a/sd:Event#name" />          '
            '</TransformInput>          <TransformInput function="lowerCase" '
            'id="unnamed_4">            '
            '<Input id="unnamed_2" path="?b/sd:Event#name" />          '
            '</TransformInput>          <Param name="minChar" value="0" />'
            '          <Param name="maxChar" value="z" />        '
            '</Compare>      </LinkageRule>'.strip())
Esempio n. 3
0
    def setUp(self):
        self.client_login('admin')

        self.aggregator = AggregatorFactory()
        self.export_url = reverse('aggregator_export',
                                  args=(self.aggregator.pk, ))
        self.workflow_url = reverse('aggregator_workflow',
                                    args=[
                                        self.aggregator.pk,
                                    ])
Esempio n. 4
0
 def setUp(self):
     self.aggregator = AggregatorFactory()
     self.update_url = self.aggregator.get_absolute_url() + 'post/'
     self.post_data = dict()
     self.post_data['silk_rule_file'] = open(
         self._get_test_file('config.xml', 'controller')
     )
Esempio n. 5
0
    def setUp(self):
        self.client_login('admin')

        self.aggregator = AggregatorFactory()
        self.export_url = reverse(
            'aggregator_export', args=(self.aggregator.pk, )
        )
        self.workflow_url = reverse(
            'aggregator_workflow', args=[self.aggregator.pk, ])
Esempio n. 6
0
 def setUp(self):
     self.archiveitem = ArchiveItemFactory()
     self.aggregator = AggregatorFactory()
     AggregatorArchiveItem.objects.create(
         aggregator=self.aggregator,
         archiveitem=self.archiveitem,
     )
     self.url = reverse('archiveitem_aggregator_del',
                        args=(self.archiveitem.pk, ))
Esempio n. 7
0
class AggregatorUpdateViewTest(TestCase):
    def setUp(self):
        self.aggregator = AggregatorFactory()
        self.update_url = self.aggregator.get_absolute_url() + 'edit/'
        self.post_data = dict(AggregatorFactory.attributes())

    def test_get(self):
        self.client_login('admin')
        response = self.client.get(self.update_url)
        self.assertContains(response, self.aggregator.name)
        self.assertContains(response, self.aggregator.description)

    def test_post(self):
        self.client_login('admin')
        archiveitem = ArchiveItemFactory()
        self.post_data['archiveitems'] = [archiveitem.pk]
        print self.post_data
        response = self.client.post(self.update_url, data=self.post_data)
        self.assertRedirects(response, self.aggregator.get_absolute_url())

        aggregator = Aggregator.objects.get(pk=self.aggregator.pk)
        self.assertEqual(aggregator.name, self.post_data['name'])
        self.assertEqual(aggregator.description, self.post_data['description'])
        self.assertIn(archiveitem, aggregator.archiveitems.all())
Esempio n. 8
0
class AggregatorUpdateViewTest(TestCase):
    def setUp(self):
        self.aggregator = AggregatorFactory()
        self.update_url = self.aggregator.get_absolute_url() + 'edit/'
        self.post_data = dict(AggregatorFactory.attributes())

    def test_get(self):
        self.client_login('admin')
        response = self.client.get(self.update_url)
        self.assertContains(response, self.aggregator.name)
        self.assertContains(response, self.aggregator.description)

    def test_post(self):
        self.client_login('admin')
        archiveitem = ArchiveItemFactory()
        self.post_data['archiveitems'] = [archiveitem.pk]
        print self.post_data
        response = self.client.post(self.update_url, data=self.post_data)
        self.assertRedirects(response, self.aggregator.get_absolute_url())

        aggregator = Aggregator.objects.get(pk=self.aggregator.pk)
        self.assertEqual(aggregator.name, self.post_data['name'])
        self.assertEqual(aggregator.description, self.post_data['description'])
        self.assertIn(archiveitem, aggregator.archiveitems.all())
Esempio n. 9
0
 def setUp(self):
     self.aggregator = AggregatorFactory()
     self.update_url = self.aggregator.get_absolute_url() + 'edit/'
     self.post_data = dict(AggregatorFactory.attributes())
Esempio n. 10
0
 def setUp(self):
     self.aggregator = AggregatorFactory()
     self.update_url = self.aggregator.get_absolute_url() + 'edit/'
     self.post_data = dict(AggregatorFactory.attributes())
Esempio n. 11
0
 def setUp(self):
     self.create_url = '/c/aggregator/create/'
     self.post_data = dict(AggregatorFactory.attributes())
Esempio n. 12
0
class AggregatorDetailViewTest(TestCase):
    def setUp(self):
        self.client_login('admin')

        self.aggregator = AggregatorFactory()
        self.export_url = reverse(
            'aggregator_export', args=(self.aggregator.pk, )
        )
        self.workflow_url = reverse(
            'aggregator_workflow', args=[self.aggregator.pk, ])

    def test_get(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.aggregator.name)

    def test_contains_edit_link(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(
            response, self.aggregator.get_absolute_url() + 'edit/'
        )

    def test_contains_archiveitems(self):
        item1 = ArchiveItemFactory()
        item2 = ArchiveItemFactory()
        for item in (item1, item2):
            AggregatorArchiveItem.objects.create(
                aggregator=self.aggregator,
                archiveitem=item
            )

        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, item1.get_absolute_url())
        self.assertContains(response, item2.get_absolute_url())

    def test_contains_rule(self):
        self.aggregator.silk_rule = 'tanto gentile e tanto onesta pare'
        self.aggregator.save()

        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.aggregator.silk_rule)

    def test_contains_schedulers(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, 'No scheduler found')

        scheduler = AggregatorSchedulerFactory(object_id=self.aggregator.pk)
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertNotContains(response, 'No scheduler found')
        self.assertContains(response, scheduler.get_absolute_url())

    def test_contains_silk_link(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        silk_url = 'http://{}:{}/workbench/'.format(
            settings.SILK_EXTERNAL_HOST, settings.SILK_EXTERNAL_PORT
        )
        self.assertContains(response, silk_url)

    def test_can_download_silk_project_file(self):
        self.client_login('admin')

        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.export_url + '?download')

        response = self.client.get(self.export_url + '?download')
        self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8')
        self.assertTrue(response.has_header('Content-Disposition'))
        self.assertNotEqual(response.content.strip(), '')

    def test_can_run_workflow(self):
        self.client_login('admin')
        Scheduler.objects.all().delete()
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.workflow_url)

        response = self.client.post(self.workflow_url)
        self.assertEqual(response.status_code, 302)
        self.assertTrue(
            response['Location'].startswith('http://testserver/s/task/')
        )
        self.assertEqual(Scheduler.objects.count(), 1)

    def test_can_view_silk_project_file_without_downloading(self):
        self.client_login('admin')
        response = self.client.get(self.export_url)
        self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8')
        self.assertFalse(response.has_header('Content-Disposition'))
        self.assertNotEqual(response.content.strip(), '')

    def test_silk_project_file_is_valid(self):
        import xml.etree.ElementTree as ET

        self.client_login('admin')

        item1 = ArchiveItemFactory()
        item2 = ArchiveItemFactory()
        for item in (item1, item2):
            AggregatorArchiveItem.objects.create(
                aggregator=self.aggregator,
                archiveitem=item
            )

        response = self.client.get(self.export_url)
        tree = ET.fromstring(response.content)

        self.assertIn(
            (settings.TRIPLE_DATABASE['PREFIXES']['sdv1'], 'sd'),
            [(x.get('namespace'), x.get('id'))
             for x in tree.findall('.//Prefix')]
        )

        # check datasources
        datasources = tree.findall('.//DataSource')
        self.assertEqual(len(datasources), 3)
        self.assertEqual(datasources[0].get('id'), 'master-graph')

        mastergraph = datasources[0]
        datasources = datasources[1:]

        # check datasources endpoints
        self.assertEqual(
            mastergraph.find('Param[@name="host"]').get('value'),
            settings.TRIPLE_DATABASE_MASTER['HOST']
        )
        self.assertEqual(
            [get_virtuoso_endpoint()] * 2,
            [x.find('Param[@name="endpointURI"]').get("value")
             for x in datasources]
        )

        # check datasources graph names
        self.assertEqual(
            mastergraph.find('Param[@name="graph"]').get('value'),
            settings.TRIPLE_DATABASE_MASTER["KWARGS"]["graph"]
        )
        self.assertEqual(
            [item1.datagraph_mapped_name, item2.datagraph_mapped_name],
            [x.find('Param[@name="graph"]').get("value")
             for x in datasources]
        )

        # check tasks
        datasource_ids = [x.get('id') for x in datasources]
        tasks = tree.findall('.//LinkingTask')
        self.assertEqual(len(tasks), 2)
        self.assertEqual(
            datasource_ids,
            [x.find('.//Interlink').get('id') for x in tasks]
        )

        # check task parameters
        for datasource_id, task in zip(datasource_ids, tasks):
            self.assertEqual(
                task.find('.//SourceDataset').get('dataSource'),
                datasource_id
            )
            self.assertEqual(
                task.find('.//TargetDataset').get('dataSource'),
                'master-graph'
            )
            self.assertEqual(
                task.find('.//SourceDataset').find('RestrictTo').text.strip(),
                '?a rdf:type <{}> .'.format(self.aggregator.entity_type)
            )
            self.assertEqual(
                task.find('.//TargetDataset').find('RestrictTo').text.strip(),
                'b -> {}'.format(self.aggregator.vertex_selector)
            )
            self.assertIsNone(task.find('.//LinkageRule').text)
            self.assertIsNone(task.find('.//Filter').text)
            self.assertIsNone(task.find('.//Outputs').text)
            self.assertIsNone(task.find('.//PositiveEntities').text)
            self.assertIsNone(task.find('.//NegativeEntities').text)
            self.assertIsNone(
                task.find('.//Alignment/')
                    .find('{}Alignment'.format('{http://knowledgeweb.'
                                               'semanticweb.org'
                                               '/heterogeneity/alignment#}')
                          ).text
            )
Esempio n. 13
0
 def setUp(self):
     self.aggregator = AggregatorFactory()
     self.update_url = self.aggregator.get_absolute_url() + 'post/'
     self.post_data = dict()
     self.post_data['silk_rule_file'] = open(
         self._get_test_file('config.xml', 'controller'))
Esempio n. 14
0
class SilkRuleXMLTestCase(TestCase):
    def setUp(self):
        self.aggregator = AggregatorFactory()

    def test_can_silk_rules_file_is_valid(self):
        import xml.etree.ElementTree as ET
        from django.template.loader import render_to_string

        archive_item = ArchiveItemFactory()
        AggregatorArchiveItem.objects.create(
            aggregator=self.aggregator,
            archiveitem=archive_item
        )

        self.aggregator.silk_rule = \
            '<LinkageRule><smart data="now" /></LinkageRule>'
        self.aggregator.save()
        output_filename = 'a_really_cool_filename.thm'

        context = {
            'aggregator': self.aggregator,
            'sd_prefix': settings.TRIPLE_DATABASE['PREFIXES']['sdv1'],
            'sparql_endpoint': get_virtuoso_endpoint(),
            'archive_item': archive_item,
            'output_filename': output_filename,
            'mastergraph_host': settings.TRIPLE_DATABASE_MASTER['HOST'],
            'mastergraph_port':
            settings.TRIPLE_DATABASE_MASTER['KWARGS']['rexpro_port'],
            'mastergraph_graphname':
            settings.TRIPLE_DATABASE_MASTER['KWARGS']['graph'],
            'resource_namespace':
            settings.TRIPLE_DATABASE_MASTER['PREFIXES']['sdres'],
        }

        tree = ET.fromstring(render_to_string(
            'controller/aggregator/silk_rules.xml', context
        ))

        self.assertIn(
            (settings.TRIPLE_DATABASE['PREFIXES']['sdv1'], 'sd'),
            [(x.get('namespace'), x.get('id'))
             for x in tree.findall('.//Prefix')]
        )

        # check datasources
        datasources_dom = tree.findall('.//DataSource')
        self.assertEqual(len(datasources_dom), 2)
        self.assertEqual(datasources_dom[0].get('id'), 'master-graph')

        mastergraph, datasource = datasources_dom

        # check datasource endpoints
        self.assertEqual(
            get_virtuoso_endpoint(),
            datasource.find('Param[@name="endpointURI"]').get("value"),
        )

        # check datasources graph names
        self.assertEqual(
            mastergraph.find('Param[@name="graph"]').get('value'),
            settings.TRIPLE_DATABASE_MASTER["KWARGS"]["graph"]
        )
        self.assertEqual(
            archive_item.datagraph_mapped_name,
            datasource.find('Param[@name="graph"]').get("value")
        )

        # check tasks
        datasource_id = datasource.get('id')
        rules = tree.findall('.//Interlink')
        self.assertEqual(len(rules), 1)
        self.assertEqual(datasource_id, rules[0].get('id'))

        # check rules parameters
        rule = rules[0]
        self.assertEqual(
            rule.find('.//SourceDataset').get('dataSource'),
            datasource_id
        )
        self.assertEqual(
            rule.find('.//TargetDataset').get('dataSource'),
            'master-graph'
        )
        self.assertEqual(
            ET.tostring(rule.find('.//LinkageRule')).strip(),
            self.aggregator.silk_rule
        )
        self.assertEqual(
            rule.find('.//SourceDataset').find('RestrictTo').text.strip(),
            '?a rdf:type <{}> .'.format(self.aggregator.entity_type)
        )
        self.assertEqual(
            rule.find('.//TargetDataset').find('RestrictTo').text.strip(),
            'b -> {}'.format(self.aggregator.vertex_selector)
        )
        self.assertIsNone(rule.find('.//Filter').text)

        output = rule.find('.//Outputs').find('Output')
        self.assertEqual(output.get('type'), 'file')
        self.assertEqual(output.findall('Param')[0].get('name'), 'file')
        self.assertEqual(
            output.findall('Param')[0].get('value'), output_filename)
        self.assertEqual(output.findall('Param')[1].get('name'), 'format')
        self.assertEqual(output.findall('Param')[1].get('value'), 'ntriples')
Esempio n. 15
0
 def setUp(self):
     self.create_url = '/c/aggregator/create/'
     self.post_data = dict(AggregatorFactory.attributes())
Esempio n. 16
0
class AggregatorDetailViewTest(TestCase):
    def setUp(self):
        self.client_login('admin')

        self.aggregator = AggregatorFactory()
        self.export_url = reverse('aggregator_export',
                                  args=(self.aggregator.pk, ))
        self.workflow_url = reverse('aggregator_workflow',
                                    args=[
                                        self.aggregator.pk,
                                    ])

    def test_get(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.aggregator.name)

    def test_contains_edit_link(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response,
                            self.aggregator.get_absolute_url() + 'edit/')

    def test_contains_archiveitems(self):
        item1 = ArchiveItemFactory()
        item2 = ArchiveItemFactory()
        for item in (item1, item2):
            AggregatorArchiveItem.objects.create(aggregator=self.aggregator,
                                                 archiveitem=item)

        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, item1.get_absolute_url())
        self.assertContains(response, item2.get_absolute_url())

    def test_contains_rule(self):
        self.aggregator.silk_rule = 'tanto gentile e tanto onesta pare'
        self.aggregator.save()

        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.aggregator.silk_rule)

    def test_contains_schedulers(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, 'No scheduler found')

        scheduler = AggregatorSchedulerFactory(object_id=self.aggregator.pk)
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertNotContains(response, 'No scheduler found')
        self.assertContains(response, scheduler.get_absolute_url())

    def test_contains_silk_link(self):
        response = self.client.get(self.aggregator.get_absolute_url())
        silk_url = 'http://{}:{}/workbench/'.format(
            settings.SILK_EXTERNAL_HOST, settings.SILK_EXTERNAL_PORT)
        self.assertContains(response, silk_url)

    def test_can_download_silk_project_file(self):
        self.client_login('admin')

        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.export_url + '?download')

        response = self.client.get(self.export_url + '?download')
        self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8')
        self.assertTrue(response.has_header('Content-Disposition'))
        self.assertNotEqual(response.content.strip(), '')

    def test_can_run_workflow(self):
        self.client_login('admin')
        Scheduler.objects.all().delete()
        response = self.client.get(self.aggregator.get_absolute_url())
        self.assertContains(response, self.workflow_url)

        response = self.client.post(self.workflow_url)
        self.assertEqual(response.status_code, 302)
        self.assertTrue(
            response['Location'].startswith('http://testserver/s/task/'))
        self.assertEqual(Scheduler.objects.count(), 1)

    def test_can_view_silk_project_file_without_downloading(self):
        self.client_login('admin')
        response = self.client.get(self.export_url)
        self.assertEqual(response['Content-Type'], 'text/xml; charset=utf-8')
        self.assertFalse(response.has_header('Content-Disposition'))
        self.assertNotEqual(response.content.strip(), '')

    def test_silk_project_file_is_valid(self):
        import xml.etree.ElementTree as ET

        self.client_login('admin')

        item1 = ArchiveItemFactory()
        item2 = ArchiveItemFactory()
        for item in (item1, item2):
            AggregatorArchiveItem.objects.create(aggregator=self.aggregator,
                                                 archiveitem=item)

        response = self.client.get(self.export_url)
        tree = ET.fromstring(response.content)

        self.assertIn((settings.TRIPLE_DATABASE['PREFIXES']['sdv1'], 'sd'),
                      [(x.get('namespace'), x.get('id'))
                       for x in tree.findall('.//Prefix')])

        # check datasources
        datasources = tree.findall('.//DataSource')
        self.assertEqual(len(datasources), 3)
        self.assertEqual(datasources[0].get('id'), 'master-graph')

        mastergraph = datasources[0]
        datasources = datasources[1:]

        # check datasources endpoints
        self.assertEqual(
            mastergraph.find('Param[@name="host"]').get('value'),
            settings.TRIPLE_DATABASE_MASTER['HOST'])
        self.assertEqual([get_virtuoso_endpoint()] * 2, [
            x.find('Param[@name="endpointURI"]').get("value")
            for x in datasources
        ])

        # check datasources graph names
        self.assertEqual(
            mastergraph.find('Param[@name="graph"]').get('value'),
            settings.TRIPLE_DATABASE_MASTER["KWARGS"]["graph"])
        self.assertEqual(
            [item1.datagraph_mapped_name, item2.datagraph_mapped_name],
            [x.find('Param[@name="graph"]').get("value") for x in datasources])

        # check tasks
        datasource_ids = [x.get('id') for x in datasources]
        tasks = tree.findall('.//LinkingTask')
        self.assertEqual(len(tasks), 2)
        self.assertEqual(datasource_ids,
                         [x.find('.//Interlink').get('id') for x in tasks])

        # check task parameters
        for datasource_id, task in zip(datasource_ids, tasks):
            self.assertEqual(
                task.find('.//SourceDataset').get('dataSource'), datasource_id)
            self.assertEqual(
                task.find('.//TargetDataset').get('dataSource'),
                'master-graph')
            self.assertEqual(
                task.find('.//SourceDataset').find('RestrictTo').text.strip(),
                '?a rdf:type <{}> .'.format(self.aggregator.entity_type))
            self.assertEqual(
                task.find('.//TargetDataset').find('RestrictTo').text.strip(),
                'b -> {}'.format(self.aggregator.vertex_selector))
            self.assertIsNone(task.find('.//LinkageRule').text)
            self.assertIsNone(task.find('.//Filter').text)
            self.assertIsNone(task.find('.//Outputs').text)
            self.assertIsNone(task.find('.//PositiveEntities').text)
            self.assertIsNone(task.find('.//NegativeEntities').text)
            self.assertIsNone(
                task.find('.//Alignment/').find('{}Alignment'.format(
                    '{http://knowledgeweb.'
                    'semanticweb.org'
                    '/heterogeneity/alignment#}')).text)
Esempio n. 17
0
 def setUp(self):
     self.archiveitem = ArchiveItemFactory()
     self.aggregator = AggregatorFactory()
     self.url = reverse('archiveitem_aggregator_add',
                        args=(self.archiveitem.pk, ))
Esempio n. 18
0
 def setUp(self):
     self.aggregator = AggregatorFactory()
Esempio n. 19
0
class AggregatorSchedulerFactory(SchedulerFactory):
    """ factory for creating a Scheduler executed on an aggregator
    """
    object_id = factory.LazyAttribute(lambda x: AggregatorFactory().pk)
    content_type = factory.LazyAttribute(
        lambda x: ContentType.objects.get_for_model(Aggregator))
Esempio n. 20
0
    def handle(self, *args, **options):
        """
        entry point
        """
        from django.contrib.sites.models import Site

        site = Site.objects.get()
        site.name = 'controller'
        site.domain = 'localhost:8001'
        site.save()

        AdminFactory()

        trentinocultura = SourceFactory(
            name='trentinocultura',
            description='description description',
            scraper_name='trentinocultura',
            scraper_api_key='',
        )

        DatasetFactory(
            source=trentinocultura,
            name='Trentinocultura agenda',
            url='http://www.trentinocultura.net/asp_cat/main.asp?IDProspettiva'
                '=35&SearchType=AGENDA_SEARCH&Pag=%d&TipoVista=AGENDA&cmd=new',
            description='Eventi in Trentino',
            download='scraper:trentinocultura:trentinocultura',
            curator='Federico Scrinzi',
            license='All rights reserved',
            other_meta='{}',
            bounding_box='10.3817591116112,45.6730626059259,'
                         '12.4775685651704,47.0917759206089',
        )

        ingiro_local = SourceFactory(
            name='in-giro (locale)',
            description='i dati dello scraper di in-giro, ma in locale (file '
                        'webui.scheduler.tests.data/in-giro.zip) utile per'
                        'test, ma anche per boh, altro? =)',
        )

        DatasetFactory(
            source=ingiro_local,
            name='eventi-e-poi-ingiro',
            url='http://in-giro.net',
            description='Eventi e POI presi da in-giro',
            download='http://testserver/in-giro.zip',
            curator='Ciccio Pasticcio',
            license='All rights reserved',
            other_meta='{}',
        )

        bgg_source = SourceFactory(
            name='BoardGameGeek (test)',
            description='pochi dati per testare il matching su silk'
        )

        bgt_source = SourceFactory(
            name='BoardGameTournament (test)',
            description='pochi dati per testare il matching su silk',
        )

        bgg_dataset = DatasetFactory(
            source=bgg_source,
            name='boardgamegeek-games',
            url='http://boardgamegeek.com',
            description='Lista di boardgames presi da boardgamegeek',
            download='https://dl.dropbox.com/u/3435878/boardgamegeek.csv',
            curator='Stefano Parmesan',
            license='All rights reserved',
            other_meta='{}',
        )

        bgt_dataset = DatasetFactory(
            source=bgt_source,
            name='boardgametournament-games',
            url='http://boardgametournament.com',
            description='Lista di boardgames presi da boardgametournament',
            download='https://dl.dropbox.com/u/3435878/'
                     'boardgametournament.csv',
            curator='Stefano Parmesan',
            license='All rights reserved',
            other_meta='{}',
        )

        DatasetFactory(
            source=bgt_source,
            name='boardgametournament-games-xls',
            url='http://boardgametournament.com',
            description='Lista di boardgames presi da boardgametournament',
            download='https://dl.dropbox.com/u/3435878/'
                     'boardgametournament.xls',
            curator='Stefano Parmesan',
            license='All rights reserved',
            encoding="utf8",
            other_meta='{}',
        )

        DatasetFactory(
            source=bgt_source,
            name='boardgametournament-games-xlsx',
            url='http://boardgametournament.com',
            description='Lista di boardgames presi da boardgametournament',
            download='https://dl.dropbox.com/u/3435878/'
                     'boardgametournament.xlsx',
            curator='Stefano Parmesan',
            license='All rights reserved',
            encoding="utf8",
            other_meta='{}',
        )

        with open(get_test_file('boardgamegeek_refine_rules.json')) as fin:
            rule = ''.join(fin.readlines())
        bgg_archiveitem = ArchiveItemFactory(
            dataset=bgg_dataset,
            file_target='boardgamegeek.csv',
            file_hash='ea6ee15e9b052171db4f96743aa11425',
            rule=RuleFactory(
                hash="ea6ee15e9b052171db4f96743aa11425",
                rule=rule,
            )
        )

        with open(get_test_file('boardgametournament_refine_rules.json')) \
                as fin:
            rule = ''.join(fin.readlines())

        bgt_archiveitem = ArchiveItemFactory(
            dataset=bgt_dataset,
            file_target='boardgametournament.csv',
            file_hash='be864f716b6a7716f3b1c2254f4f5eea',
            rule=RuleFactory(
                hash="be864f716b6a7716f3b1c2254f4f5eea",
                rule=rule,
            )
        )

        with open(get_test_file('boardgames_aggregator_silk_rules.xml')) \
                as fin:
            rule = ''.join(fin.readlines())
        aggregator = AggregatorFactory(
            name='BoardGames',
            description='Un dataset di giochi da tavolo',
            silk_rule=rule,
            entity_type='{}BoardGame'.format(
                settings.TRIPLE_DATABASE['PREFIXES']['sdv1']
            ),
            vertex_selector="g.V('type', 'sd$BoardGame')%limit.id.fill(m)",
        )

        for archiveitem in (bgg_archiveitem, bgt_archiveitem):
            AggregatorArchiveItem.objects.create(
                aggregator=aggregator,
                archiveitem=archiveitem,
            )

        osm_source = SourceFactory(
            name='OSM (test)',
            description='pochi dati per testare lo slicer'
        )

        osm_dataset = DatasetFactory(
            source=osm_source,
            name='osm-dataset',
            url='http://openstreetmap.org',
            download='https://dl.dropbox.com/u/781790/osm-10nodes.csv',
            curator='Davide setti',
            license='CC PUCCI',
        )

        with open(get_test_file('osm-refine-rules.json')) as fin:
            rule = ''.join(fin.readlines())
        osm_archiveitem = ArchiveItemFactory(
            dataset=osm_dataset,
            file_target='osm-10nodes.csv',
            file_hash='e6f4a5c5f5fe12765f7b3ca04ab7a82d',
            rule=RuleFactory(
                hash="e6f4a5c5f5fe12765f7b3ca04ab7a82d",
                rule=rule,
            )
        )

        poi_aggregator = AggregatorFactory(
            name='POI',
            description='POI aggregator',
            entity_type=settings.TRIPLE_DATABASE['PREFIXES']['sdv1'] + 'POI',
            vertex_selector="g.V('type', 'sd$POI')%limit.id.fill(m)"
        )
        AggregatorArchiveItem.objects.create(
            aggregator=poi_aggregator,
            archiveitem=osm_archiveitem,
        )

        DatasetFactory(
            source=osm_source,
            name='Aeroporti',
            url='http://dati.trentino.it',
            description='Aeroporti del trentino, file SHP',
            download='http://testserver/aeroporti_tn.zip',
            curator='Federico Scrinzi',
            license='Open Data',
            other_meta='{}',
        )

        strange_source = SourceFactory(
            name='Strange or malformed (test)',
            description='pochi dati con valori strani tipo None',
        )

        DatasetFactory(
            source=strange_source,
            name='strange symbols',
            url='http://testserver/',
            description='Some strange symbols',
            download='http://testserver/strangesymbols.csv',
            curator='Federico Scrinzi',
            license='Open Data',
            other_meta='{}',
        )

        DatasetFactory(
            source=strange_source,
            name='looks like ascii',
            url='http://testserver/',
            description="file that looks like ascii but it's UTF8",
            download='http://testserver/lookslikeascii.csv',
            curator='Federico Scrinzi',
            license='Open Data',
            other_meta='{}',
        )