def test_source_scraperwiki(self): Scheduler.objects.all().delete() ArchiveItem.objects.all().delete() source = Source.objects.get(name='trentinocultura') process_source.delay(source) dataset = source.datasets.get() archive_item = source.datasets.get().archive_items.get() self._assert_archive_item( archive_item, (u'category', u'city', u'title', u'url', u'price', u'hours', u'website', u'phone', u'location', u'address', u'date', u'notes', u'email', u'organizer', u'other_info', u'fax'), 49 ) from webui.cnmain.utils import get_virtuoso virtuoso = get_virtuoso() source_meta_id = source.metagraph_resource_id dataset_meta_id = dataset.metagraph_resource_id from rdflib import Namespace METAPROP = Namespace(settings.TRIPLE_DATABASE['PREFIXES']['meta']) SDOWL = Namespace(settings.TRIPLE_DATABASE['PREFIXES']['sdowl']) RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' self._assert_description(virtuoso, source_meta_id, [ (METAPROP['description'], source.description), (RDF_TYPE, SDOWL['Source'], 'iri'), ]) self._assert_description(virtuoso, dataset_meta_id, [ (METAPROP['download'], dataset.download), (RDF_TYPE, SDOWL['Dataset'], 'iri'), (SDOWL['belongs_to_source'], source_meta_id, 'iri'), ])
def test_source_with_refine_rdf_rule(self): source = Source.objects.get(name='BoardGameTournament (test)') process_source.delay(source) path = self._get_test_file( "boardgametournament_refine_rules.json", "cnmain" ) with open(path) as f: rule = f.read() dataset = source.datasets.get(name="boardgametournament-games") archive_item = dataset.archive_items.get() archive_item.rule = RuleFactory( rule=rule, hash=archive_item.file_hash ) archive_item.save(force_update=True) process_source.delay(source) from webui.cnmain.utils import get_virtuoso virtuoso = get_virtuoso() row_id = archive_item.datagraph_mapped_row_id("0") self._assert_description(virtuoso, row_id, [ ("http://ontologies.venturi.eu/v1#name", "Dominion"), ]) row_id = archive_item.datagraph_mapped_row_id("1") self._assert_description(virtuoso, row_id, [ ("http://ontologies.venturi.eu/v1#name", "Carcassonne"), ])
def test_it_logs_using_redis_logger(self): obj = SourceFactory() loggy = MagicMock() with patch('webui.scheduler.tasks.get_redis_logger', return_value=loggy): process_source.delay(obj) self.assert_(loggy.info.called)
def test_source_csv_with_different_delimiter(self): Scheduler.objects.all().delete() ArchiveItem.objects.all().delete() source = Source.objects.get(name='in-giro (locale)') dataset = source.datasets.get() dataset.csv_delimiter = '$' dataset.download = 'http://testserver/csv_with_different_delimiter.csv' dataset.save() process_source.delay(source) archive_item = dataset.archive_items.get() self._assert_archive_item(archive_item, (u'col1', u'col2', u'col3'), 4)
def test_it_works_for_sources(self): obj = SourceFactory() task = process_source.delay(obj) response = self.client.get('/s/task/{}/'.format(task.id)) self.assertContains(response, 'Evaluating Init Handler')
def test_source_csv_with_weird_quotes(self): Scheduler.objects.all().delete() ArchiveItem.objects.all().delete() source = Source.objects.get(name='in-giro (locale)') dataset = source.datasets.get() dataset.csv_quotechar = "&" dataset.download = 'http://testserver/csv_with_weird_quotes.csv' dataset.save() process_source.delay(source) archive_item = dataset.archive_items.get() self._assert_archive_item(archive_item, (u'col1', u'col2', u'col3'), 3) data = list(archive_item.data()) self.assertEqual(data[1][0], 'testo lungo e bello') self.assertEqual(data[1][1], """guarda posso mettere sia " che '""")
def test_does_not_crash(self): from webui.controller.models import Aggregator, Source from webui.scheduler.tasks import process_aggregator, process_source osm_source = Source.objects.get(name='OSM (test)') process_source.delay(osm_source) poi_aggregator = Aggregator.objects.get(name='POI') process_aggregator.delay(poi_aggregator) response = self.client.get('/l/slicer/{}/dump/'.format(self.slicer.pk)) self.assertEqual(response.status_code, 200) data = json.loads(''.join(response.streaming_content)) self.assertEqual(len(data['features']), 10)
def test_source_with_refine_rule(self): Scheduler.objects.all().delete() ArchiveItem.objects.all().delete() source = Source.objects.get(name='trentinocultura') process_source.delay(source) rule = r"""[ { "operation": { "repeat": false, "description": "Text transform on cells in column phone", "onError": "keep-original", "repeatCount": 10, "columnName": "phone", "engineConfig": { "facets": [], "mode": "row-based" }, "expression": "jython:return value.replace(\"0\", \"x\") """ + \ """if value else None", "op": "core/text-transform" } } ]""" archive_item = source.datasets.get().archive_items.get() archive_item.rule = RuleFactory( rule=rule, hash=archive_item.file_hash ) archive_item.save(force_update=True) process_source.delay(source) self._assert_archive_item( archive_item, (u'__sd_hash__', u'category', u'city', u'title', u'url', u'price', u'hours', u'website', u'phone', u'location', u'address', u'date', u'notes', u'email', u'organizer', u'other_info', u'fax'), 49, { 'phone': lambda x: not x or x.find('0') == -1 } )
def test_source_archive(self): Scheduler.objects.all().delete() ArchiveItem.objects.all().delete() source = Source.objects.get(name='in-giro (locale)') dataset = source.datasets.get() process_source.delay(source) events_item, poi_event = dataset.archive_items.all().\ order_by("file_hash") self._assert_archive_item( poi_event, (u'website', u'city', u'name', u'url', u'phone', u'address', u'location_type', u'description', u'province'), 158 ) self._assert_archive_item( events_item, (u'city', u'description', u'url', u'date', u'location', u'genre', u'location_url'), 497 ) from webui.cnmain.utils import get_virtuoso virtuoso = get_virtuoso() source_meta_id = source.metagraph_resource_id dataset_meta_id = dataset.metagraph_resource_id from rdflib import Namespace METAPROP = Namespace(settings.TRIPLE_DATABASE['PREFIXES']['meta']) SDOWL = Namespace(settings.TRIPLE_DATABASE['PREFIXES']['sdowl']) RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type' self._assert_description(virtuoso, source_meta_id, [ (METAPROP['description'], source.description), (RDF_TYPE, SDOWL['Source'], 'iri'), ]) self._assert_description(virtuoso, dataset_meta_id, [ (METAPROP['download'], dataset.download), (RDF_TYPE, SDOWL['Dataset'], 'iri'), (SDOWL['belongs_to_source'], source_meta_id, 'iri'), ])
def test_dataset_subtasks_log_using_the_same_redis_key(self): source = SourceFactory(name='boo name') source.datasets.add(DatasetFactory( name='wow name' )) loggy = MagicMock() with patch('webui.scheduler.tasks.get_redis_logger', return_value=loggy) as get_redis_logger: task = process_source.delay(source) #TODO[vad]: this is broken! assert_calls is not a magick method get_redis_logger.assert_calls([task.id, task.id])