Beispiel #1
0
    def test_source_scraperwiki(self):
        Scheduler.objects.all().delete()
        ArchiveItem.objects.all().delete()
        source = Source.objects.get(name='trentinocultura')
        process_source.delay(source)

        dataset = source.datasets.get()
        archive_item = source.datasets.get().archive_items.get()
        self._assert_archive_item(
            archive_item,
            (u'category', u'city', u'title', u'url', u'price',
             u'hours', u'website', u'phone', u'location', u'address', u'date',
             u'notes', u'email', u'organizer', u'other_info', u'fax'),
            49
        )

        from webui.cnmain.utils import get_virtuoso
        virtuoso = get_virtuoso()
        source_meta_id = source.metagraph_resource_id
        dataset_meta_id = dataset.metagraph_resource_id

        from rdflib import Namespace
        METAPROP = Namespace(settings.TRIPLE_DATABASE['PREFIXES']['meta'])
        SDOWL = Namespace(settings.TRIPLE_DATABASE['PREFIXES']['sdowl'])
        RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'

        self._assert_description(virtuoso, source_meta_id, [
            (METAPROP['description'], source.description),
            (RDF_TYPE, SDOWL['Source'], 'iri'),
        ])
        self._assert_description(virtuoso, dataset_meta_id, [
            (METAPROP['download'], dataset.download),
            (RDF_TYPE, SDOWL['Dataset'], 'iri'),
            (SDOWL['belongs_to_source'], source_meta_id, 'iri'),
        ])
Beispiel #2
0
    def test_source_with_refine_rdf_rule(self):
        source = Source.objects.get(name='BoardGameTournament (test)')
        process_source.delay(source)

        path = self._get_test_file(
            "boardgametournament_refine_rules.json", "cnmain"
        )
        with open(path) as f:
            rule = f.read()

        dataset = source.datasets.get(name="boardgametournament-games")
        archive_item = dataset.archive_items.get()
        archive_item.rule = RuleFactory(
            rule=rule,
            hash=archive_item.file_hash
        )
        archive_item.save(force_update=True)

        process_source.delay(source)

        from webui.cnmain.utils import get_virtuoso
        virtuoso = get_virtuoso()
        row_id = archive_item.datagraph_mapped_row_id("0")

        self._assert_description(virtuoso, row_id, [
            ("http://ontologies.venturi.eu/v1#name",
             "Dominion"),
        ])

        row_id = archive_item.datagraph_mapped_row_id("1")

        self._assert_description(virtuoso, row_id, [
            ("http://ontologies.venturi.eu/v1#name",
             "Carcassonne"),
        ])
Beispiel #3
0
    def test_it_logs_using_redis_logger(self):
        obj = SourceFactory()

        loggy = MagicMock()
        with patch('webui.scheduler.tasks.get_redis_logger',
                   return_value=loggy):
            process_source.delay(obj)

            self.assert_(loggy.info.called)
Beispiel #4
0
    def test_source_csv_with_different_delimiter(self):
        Scheduler.objects.all().delete()
        ArchiveItem.objects.all().delete()
        source = Source.objects.get(name='in-giro (locale)')
        dataset = source.datasets.get()
        dataset.csv_delimiter = '$'
        dataset.download = 'http://testserver/csv_with_different_delimiter.csv'
        dataset.save()
        process_source.delay(source)

        archive_item = dataset.archive_items.get()

        self._assert_archive_item(archive_item, (u'col1', u'col2', u'col3'), 4)
Beispiel #5
0
    def test_it_works_for_sources(self):
        obj = SourceFactory()

        task = process_source.delay(obj)

        response = self.client.get('/s/task/{}/'.format(task.id))
        self.assertContains(response, 'Evaluating Init Handler')
Beispiel #6
0
    def test_it_works_for_sources(self):
        obj = SourceFactory()

        task = process_source.delay(obj)

        response = self.client.get('/s/task/{}/'.format(task.id))
        self.assertContains(response, 'Evaluating Init Handler')
Beispiel #7
0
    def test_source_csv_with_weird_quotes(self):
        Scheduler.objects.all().delete()
        ArchiveItem.objects.all().delete()
        source = Source.objects.get(name='in-giro (locale)')
        dataset = source.datasets.get()
        dataset.csv_quotechar = "&"
        dataset.download = 'http://testserver/csv_with_weird_quotes.csv'
        dataset.save()
        process_source.delay(source)

        archive_item = dataset.archive_items.get()

        self._assert_archive_item(archive_item, (u'col1', u'col2', u'col3'), 3)
        data = list(archive_item.data())
        self.assertEqual(data[1][0], 'testo lungo e bello')
        self.assertEqual(data[1][1], """guarda posso mettere sia " che '""")
Beispiel #8
0
    def test_does_not_crash(self):
        from webui.controller.models import Aggregator, Source
        from webui.scheduler.tasks import process_aggregator, process_source

        osm_source = Source.objects.get(name='OSM (test)')
        process_source.delay(osm_source)

        poi_aggregator = Aggregator.objects.get(name='POI')
        process_aggregator.delay(poi_aggregator)

        response = self.client.get('/l/slicer/{}/dump/'.format(self.slicer.pk))
        self.assertEqual(response.status_code, 200)

        data = json.loads(''.join(response.streaming_content))

        self.assertEqual(len(data['features']), 10)
Beispiel #9
0
    def test_does_not_crash(self):
        from webui.controller.models import Aggregator, Source
        from webui.scheduler.tasks import process_aggregator, process_source

        osm_source = Source.objects.get(name='OSM (test)')
        process_source.delay(osm_source)

        poi_aggregator = Aggregator.objects.get(name='POI')
        process_aggregator.delay(poi_aggregator)

        response = self.client.get('/l/slicer/{}/dump/'.format(self.slicer.pk))
        self.assertEqual(response.status_code, 200)

        data = json.loads(''.join(response.streaming_content))

        self.assertEqual(len(data['features']), 10)
Beispiel #10
0
    def test_source_with_refine_rule(self):
        Scheduler.objects.all().delete()
        ArchiveItem.objects.all().delete()
        source = Source.objects.get(name='trentinocultura')
        process_source.delay(source)

        rule = r"""[
          {
            "operation": {
              "repeat": false,
              "description": "Text transform on cells in column phone",
              "onError": "keep-original",
              "repeatCount": 10,
              "columnName": "phone",
              "engineConfig": {
                "facets": [],
                "mode": "row-based"
              },
              "expression": "jython:return value.replace(\"0\", \"x\") """ + \
            """if value else None",
                  "op": "core/text-transform"
                }
              }
            ]"""

        archive_item = source.datasets.get().archive_items.get()
        archive_item.rule = RuleFactory(
            rule=rule,
            hash=archive_item.file_hash
        )
        archive_item.save(force_update=True)

        process_source.delay(source)

        self._assert_archive_item(
            archive_item,
            (u'__sd_hash__', u'category', u'city', u'title', u'url', u'price',
             u'hours', u'website', u'phone', u'location', u'address', u'date',
             u'notes', u'email', u'organizer', u'other_info', u'fax'),
            49,
            {
                'phone': lambda x: not x or x.find('0') == -1
            }
        )
Beispiel #11
0
    def test_source_archive(self):
        Scheduler.objects.all().delete()
        ArchiveItem.objects.all().delete()
        source = Source.objects.get(name='in-giro (locale)')
        dataset = source.datasets.get()
        process_source.delay(source)

        events_item, poi_event = dataset.archive_items.all().\
            order_by("file_hash")

        self._assert_archive_item(
            poi_event,
            (u'website', u'city', u'name', u'url', u'phone', u'address',
             u'location_type', u'description', u'province'),
            158
        )

        self._assert_archive_item(
            events_item,
            (u'city', u'description', u'url', u'date', u'location',
             u'genre', u'location_url'),
            497
        )

        from webui.cnmain.utils import get_virtuoso
        virtuoso = get_virtuoso()
        source_meta_id = source.metagraph_resource_id
        dataset_meta_id = dataset.metagraph_resource_id

        from rdflib import Namespace
        METAPROP = Namespace(settings.TRIPLE_DATABASE['PREFIXES']['meta'])
        SDOWL = Namespace(settings.TRIPLE_DATABASE['PREFIXES']['sdowl'])
        RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'

        self._assert_description(virtuoso, source_meta_id, [
            (METAPROP['description'], source.description),
            (RDF_TYPE, SDOWL['Source'], 'iri'),
        ])
        self._assert_description(virtuoso, dataset_meta_id, [
            (METAPROP['download'], dataset.download),
            (RDF_TYPE, SDOWL['Dataset'], 'iri'),
            (SDOWL['belongs_to_source'], source_meta_id, 'iri'),
        ])
Beispiel #12
0
    def test_dataset_subtasks_log_using_the_same_redis_key(self):
        source = SourceFactory(name='boo name')
        source.datasets.add(DatasetFactory(
            name='wow name'
        ))

        loggy = MagicMock()
        with patch('webui.scheduler.tasks.get_redis_logger',
                   return_value=loggy) as get_redis_logger:
            task = process_source.delay(source)

            #TODO[vad]: this is broken! assert_calls is not a magick method
            get_redis_logger.assert_calls([task.id, task.id])