Beispiel #1
0
    def test_read_metadata(self):
        importer = MetadataImporter()
        source = SourceFactory(
            scraper_name='fake',
            scraperwiki_url='http://www.google.com/',
        )

        def side_metadata(*args):
            return [{
                'description': 'dati emergenza estratti da www.intoscana.it',
                'license': 'others',
                'tags': 'emergenza, carabinieri',
                'url': 'http://www.example.com/index.html',
                'curator': 'fondazione sistema toscana',
                'bounding_box': '42.24, 9.69, 44.47, 12.37',
                'other': None,
                'download': 'table://emergencies',
                'name': 'numeri di emergenza in toscana'
            }]

        with patch.object(Dataset, 'save'):
            with patch.object(MetadataImporter, 'get_metadata_of_scraper',
                              side_effect=side_metadata):
                self.assertEqual(
                    {'total': 1, 'errors': 0, 'report': []},
                    importer.read_metadata(source)
                )
Beispiel #2
0
    def test_it_works_for_sources(self):
        obj = SourceFactory()

        task = process_source.delay(obj)

        response = self.client.get('/s/task/{}/'.format(task.id))
        self.assertContains(response, 'Evaluating Init Handler')
Beispiel #3
0
    def test_it_logs_using_redis_logger(self):
        obj = SourceFactory()

        loggy = MagicMock()
        with patch('webui.scheduler.tasks.get_redis_logger',
                   return_value=loggy):
            process_source.delay(obj)

            self.assert_(loggy.info.called)
Beispiel #4
0
    def test_dataset_subtasks_log_using_the_same_redis_key(self):
        source = SourceFactory(name='boo name')
        source.datasets.add(DatasetFactory(
            name='wow name'
        ))

        loggy = MagicMock()
        with patch('webui.scheduler.tasks.get_redis_logger',
                   return_value=loggy) as get_redis_logger:
            task = process_source.delay(source)

            #TODO[vad]: this is broken! assert_calls is not a magick method
            get_redis_logger.assert_calls([task.id, task.id])
Beispiel #5
0
    def test_source_fetch_metadata_fail(self):
        request = MagicMock()
        request.method = 'POST'
        source = SourceFactory(
            name='test-source',
            scraper_name='',
            scraperwiki_url=settings.SCRAPERWIKI_APP,
            scraper_api_key='61f623f3-04ba-4c71-ba8e-acc5e88b8202',
        )

        # pylint: disable=W0613
        def side_fun(*args, **kwargs):
            raise Exception('A scraper name must be specified.')

        with patch.object(MetadataImporter,
                          'read_metadata',
                          side_effect=side_fun):
            with patch.object(messages, 'error') as messages_error:
                source_fetch_metadata(request, source.pk)
                messages_error.assert_called_once_with(
                    request, 'Error while updating metadata')
Beispiel #6
0
    def test_source_fetch_metadata_success(self):
        request = MagicMock()
        request.method = 'POST'
        source = SourceFactory(
            name='test-source',
            scraper_name='prodottiprotettitrentino',
            scraperwiki_url=settings.SCRAPERWIKI_APP,
            scraper_api_key='61f623f3-04ba-4c71-ba8e-acc5e88b8202',
        )

        with patch.object(MetadataImporter,
                          'read_metadata',
                          return_value={
                              'total': 1,
                              'errors': 0,
                              'report': []
                          }):
            with patch.object(messages, 'info') as messages_info:
                source_fetch_metadata(request, source.pk)
                MetadataImporter.read_metadata.assert_called_once_with(source)
                messages_info.assert_called_once_with(
                    request, '1 metadata imported, 0 errors')
Beispiel #7
0
 def setUp(self):
     self.source = Source.objects.all()[0]
     self.update_url = self.source.get_absolute_url() + 'edit/'
     self.post_data = dict(SourceFactory.attributes(), user=1)
Beispiel #8
0
 def setUp(self):
     self.create_url = '/c/source/create/'
     self.post_data = dict(SourceFactory.attributes(), user=1)
Beispiel #9
0
 def setUp(self):
     self.source = Source.objects.all()[0]
     self.update_url = self.source.get_absolute_url() + 'edit/'
     self.post_data = dict(SourceFactory.attributes(), user=1)
Beispiel #10
0
 def setUp(self):
     self.create_url = '/c/source/create/'
     self.post_data = dict(SourceFactory.attributes(), user=1)
Beispiel #11
0
    def handle(self, *args, **options):
        """
        entry point
        """
        from django.contrib.sites.models import Site

        site = Site.objects.get()
        site.name = 'controller'
        site.domain = 'localhost:8001'
        site.save()

        AdminFactory()

        trentinocultura = SourceFactory(
            name='trentinocultura',
            description='description description',
            scraper_name='trentinocultura',
            scraper_api_key='',
        )

        DatasetFactory(
            source=trentinocultura,
            name='Trentinocultura agenda',
            url='http://www.trentinocultura.net/asp_cat/main.asp?IDProspettiva'
                '=35&SearchType=AGENDA_SEARCH&Pag=%d&TipoVista=AGENDA&cmd=new',
            description='Eventi in Trentino',
            download='scraper:trentinocultura:trentinocultura',
            curator='Federico Scrinzi',
            license='All rights reserved',
            other_meta='{}',
            bounding_box='10.3817591116112,45.6730626059259,'
                         '12.4775685651704,47.0917759206089',
        )

        ingiro_local = SourceFactory(
            name='in-giro (locale)',
            description='i dati dello scraper di in-giro, ma in locale (file '
                        'webui.scheduler.tests.data/in-giro.zip) utile per'
                        'test, ma anche per boh, altro? =)',
        )

        DatasetFactory(
            source=ingiro_local,
            name='eventi-e-poi-ingiro',
            url='http://in-giro.net',
            description='Eventi e POI presi da in-giro',
            download='http://testserver/in-giro.zip',
            curator='Ciccio Pasticcio',
            license='All rights reserved',
            other_meta='{}',
        )

        bgg_source = SourceFactory(
            name='BoardGameGeek (test)',
            description='pochi dati per testare il matching su silk'
        )

        bgt_source = SourceFactory(
            name='BoardGameTournament (test)',
            description='pochi dati per testare il matching su silk',
        )

        bgg_dataset = DatasetFactory(
            source=bgg_source,
            name='boardgamegeek-games',
            url='http://boardgamegeek.com',
            description='Lista di boardgames presi da boardgamegeek',
            download='https://dl.dropbox.com/u/3435878/boardgamegeek.csv',
            curator='Stefano Parmesan',
            license='All rights reserved',
            other_meta='{}',
        )

        bgt_dataset = DatasetFactory(
            source=bgt_source,
            name='boardgametournament-games',
            url='http://boardgametournament.com',
            description='Lista di boardgames presi da boardgametournament',
            download='https://dl.dropbox.com/u/3435878/'
                     'boardgametournament.csv',
            curator='Stefano Parmesan',
            license='All rights reserved',
            other_meta='{}',
        )

        DatasetFactory(
            source=bgt_source,
            name='boardgametournament-games-xls',
            url='http://boardgametournament.com',
            description='Lista di boardgames presi da boardgametournament',
            download='https://dl.dropbox.com/u/3435878/'
                     'boardgametournament.xls',
            curator='Stefano Parmesan',
            license='All rights reserved',
            encoding="utf8",
            other_meta='{}',
        )

        DatasetFactory(
            source=bgt_source,
            name='boardgametournament-games-xlsx',
            url='http://boardgametournament.com',
            description='Lista di boardgames presi da boardgametournament',
            download='https://dl.dropbox.com/u/3435878/'
                     'boardgametournament.xlsx',
            curator='Stefano Parmesan',
            license='All rights reserved',
            encoding="utf8",
            other_meta='{}',
        )

        with open(get_test_file('boardgamegeek_refine_rules.json')) as fin:
            rule = ''.join(fin.readlines())
        bgg_archiveitem = ArchiveItemFactory(
            dataset=bgg_dataset,
            file_target='boardgamegeek.csv',
            file_hash='ea6ee15e9b052171db4f96743aa11425',
            rule=RuleFactory(
                hash="ea6ee15e9b052171db4f96743aa11425",
                rule=rule,
            )
        )

        with open(get_test_file('boardgametournament_refine_rules.json')) \
                as fin:
            rule = ''.join(fin.readlines())

        bgt_archiveitem = ArchiveItemFactory(
            dataset=bgt_dataset,
            file_target='boardgametournament.csv',
            file_hash='be864f716b6a7716f3b1c2254f4f5eea',
            rule=RuleFactory(
                hash="be864f716b6a7716f3b1c2254f4f5eea",
                rule=rule,
            )
        )

        with open(get_test_file('boardgames_aggregator_silk_rules.xml')) \
                as fin:
            rule = ''.join(fin.readlines())
        aggregator = AggregatorFactory(
            name='BoardGames',
            description='Un dataset di giochi da tavolo',
            silk_rule=rule,
            entity_type='{}BoardGame'.format(
                settings.TRIPLE_DATABASE['PREFIXES']['sdv1']
            ),
            vertex_selector="g.V('type', 'sd$BoardGame')%limit.id.fill(m)",
        )

        for archiveitem in (bgg_archiveitem, bgt_archiveitem):
            AggregatorArchiveItem.objects.create(
                aggregator=aggregator,
                archiveitem=archiveitem,
            )

        osm_source = SourceFactory(
            name='OSM (test)',
            description='pochi dati per testare lo slicer'
        )

        osm_dataset = DatasetFactory(
            source=osm_source,
            name='osm-dataset',
            url='http://openstreetmap.org',
            download='https://dl.dropbox.com/u/781790/osm-10nodes.csv',
            curator='Davide setti',
            license='CC PUCCI',
        )

        with open(get_test_file('osm-refine-rules.json')) as fin:
            rule = ''.join(fin.readlines())
        osm_archiveitem = ArchiveItemFactory(
            dataset=osm_dataset,
            file_target='osm-10nodes.csv',
            file_hash='e6f4a5c5f5fe12765f7b3ca04ab7a82d',
            rule=RuleFactory(
                hash="e6f4a5c5f5fe12765f7b3ca04ab7a82d",
                rule=rule,
            )
        )

        poi_aggregator = AggregatorFactory(
            name='POI',
            description='POI aggregator',
            entity_type=settings.TRIPLE_DATABASE['PREFIXES']['sdv1'] + 'POI',
            vertex_selector="g.V('type', 'sd$POI')%limit.id.fill(m)"
        )
        AggregatorArchiveItem.objects.create(
            aggregator=poi_aggregator,
            archiveitem=osm_archiveitem,
        )

        DatasetFactory(
            source=osm_source,
            name='Aeroporti',
            url='http://dati.trentino.it',
            description='Aeroporti del trentino, file SHP',
            download='http://testserver/aeroporti_tn.zip',
            curator='Federico Scrinzi',
            license='Open Data',
            other_meta='{}',
        )

        strange_source = SourceFactory(
            name='Strange or malformed (test)',
            description='pochi dati con valori strani tipo None',
        )

        DatasetFactory(
            source=strange_source,
            name='strange symbols',
            url='http://testserver/',
            description='Some strange symbols',
            download='http://testserver/strangesymbols.csv',
            curator='Federico Scrinzi',
            license='Open Data',
            other_meta='{}',
        )

        DatasetFactory(
            source=strange_source,
            name='looks like ascii',
            url='http://testserver/',
            description="file that looks like ascii but it's UTF8",
            download='http://testserver/lookslikeascii.csv',
            curator='Federico Scrinzi',
            license='Open Data',
            other_meta='{}',
        )