def test_read_metadata(self): importer = MetadataImporter() source = SourceFactory( scraper_name='fake', scraperwiki_url='http://www.google.com/', ) def side_metadata(*args): return [{ 'description': 'dati emergenza estratti da www.intoscana.it', 'license': 'others', 'tags': 'emergenza, carabinieri', 'url': 'http://www.example.com/index.html', 'curator': 'fondazione sistema toscana', 'bounding_box': '42.24, 9.69, 44.47, 12.37', 'other': None, 'download': 'table://emergencies', 'name': 'numeri di emergenza in toscana' }] with patch.object(Dataset, 'save'): with patch.object(MetadataImporter, 'get_metadata_of_scraper', side_effect=side_metadata): self.assertEqual( {'total': 1, 'errors': 0, 'report': []}, importer.read_metadata(source) )
def test_it_works_for_sources(self): obj = SourceFactory() task = process_source.delay(obj) response = self.client.get('/s/task/{}/'.format(task.id)) self.assertContains(response, 'Evaluating Init Handler')
def test_it_logs_using_redis_logger(self): obj = SourceFactory() loggy = MagicMock() with patch('webui.scheduler.tasks.get_redis_logger', return_value=loggy): process_source.delay(obj) self.assert_(loggy.info.called)
def test_dataset_subtasks_log_using_the_same_redis_key(self): source = SourceFactory(name='boo name') source.datasets.add(DatasetFactory( name='wow name' )) loggy = MagicMock() with patch('webui.scheduler.tasks.get_redis_logger', return_value=loggy) as get_redis_logger: task = process_source.delay(source) #TODO[vad]: this is broken! assert_calls is not a magick method get_redis_logger.assert_calls([task.id, task.id])
def test_source_fetch_metadata_fail(self): request = MagicMock() request.method = 'POST' source = SourceFactory( name='test-source', scraper_name='', scraperwiki_url=settings.SCRAPERWIKI_APP, scraper_api_key='61f623f3-04ba-4c71-ba8e-acc5e88b8202', ) # pylint: disable=W0613 def side_fun(*args, **kwargs): raise Exception('A scraper name must be specified.') with patch.object(MetadataImporter, 'read_metadata', side_effect=side_fun): with patch.object(messages, 'error') as messages_error: source_fetch_metadata(request, source.pk) messages_error.assert_called_once_with( request, 'Error while updating metadata')
def test_source_fetch_metadata_success(self): request = MagicMock() request.method = 'POST' source = SourceFactory( name='test-source', scraper_name='prodottiprotettitrentino', scraperwiki_url=settings.SCRAPERWIKI_APP, scraper_api_key='61f623f3-04ba-4c71-ba8e-acc5e88b8202', ) with patch.object(MetadataImporter, 'read_metadata', return_value={ 'total': 1, 'errors': 0, 'report': [] }): with patch.object(messages, 'info') as messages_info: source_fetch_metadata(request, source.pk) MetadataImporter.read_metadata.assert_called_once_with(source) messages_info.assert_called_once_with( request, '1 metadata imported, 0 errors')
def setUp(self): self.source = Source.objects.all()[0] self.update_url = self.source.get_absolute_url() + 'edit/' self.post_data = dict(SourceFactory.attributes(), user=1)
def setUp(self): self.create_url = '/c/source/create/' self.post_data = dict(SourceFactory.attributes(), user=1)
def handle(self, *args, **options): """ entry point """ from django.contrib.sites.models import Site site = Site.objects.get() site.name = 'controller' site.domain = 'localhost:8001' site.save() AdminFactory() trentinocultura = SourceFactory( name='trentinocultura', description='description description', scraper_name='trentinocultura', scraper_api_key='', ) DatasetFactory( source=trentinocultura, name='Trentinocultura agenda', url='http://www.trentinocultura.net/asp_cat/main.asp?IDProspettiva' '=35&SearchType=AGENDA_SEARCH&Pag=%d&TipoVista=AGENDA&cmd=new', description='Eventi in Trentino', download='scraper:trentinocultura:trentinocultura', curator='Federico Scrinzi', license='All rights reserved', other_meta='{}', bounding_box='10.3817591116112,45.6730626059259,' '12.4775685651704,47.0917759206089', ) ingiro_local = SourceFactory( name='in-giro (locale)', description='i dati dello scraper di in-giro, ma in locale (file ' 'webui.scheduler.tests.data/in-giro.zip) utile per' 'test, ma anche per boh, altro? =)', ) DatasetFactory( source=ingiro_local, name='eventi-e-poi-ingiro', url='http://in-giro.net', description='Eventi e POI presi da in-giro', download='http://testserver/in-giro.zip', curator='Ciccio Pasticcio', license='All rights reserved', other_meta='{}', ) bgg_source = SourceFactory( name='BoardGameGeek (test)', description='pochi dati per testare il matching su silk' ) bgt_source = SourceFactory( name='BoardGameTournament (test)', description='pochi dati per testare il matching su silk', ) bgg_dataset = DatasetFactory( source=bgg_source, name='boardgamegeek-games', url='http://boardgamegeek.com', description='Lista di boardgames presi da boardgamegeek', download='https://dl.dropbox.com/u/3435878/boardgamegeek.csv', curator='Stefano Parmesan', license='All rights reserved', other_meta='{}', ) bgt_dataset = DatasetFactory( source=bgt_source, name='boardgametournament-games', url='http://boardgametournament.com', description='Lista di boardgames presi da boardgametournament', download='https://dl.dropbox.com/u/3435878/' 'boardgametournament.csv', curator='Stefano Parmesan', license='All rights reserved', other_meta='{}', ) DatasetFactory( source=bgt_source, name='boardgametournament-games-xls', url='http://boardgametournament.com', description='Lista di boardgames presi da boardgametournament', download='https://dl.dropbox.com/u/3435878/' 'boardgametournament.xls', curator='Stefano Parmesan', license='All rights reserved', encoding="utf8", other_meta='{}', ) DatasetFactory( source=bgt_source, name='boardgametournament-games-xlsx', url='http://boardgametournament.com', description='Lista di boardgames presi da boardgametournament', download='https://dl.dropbox.com/u/3435878/' 'boardgametournament.xlsx', curator='Stefano Parmesan', license='All rights reserved', encoding="utf8", other_meta='{}', ) with open(get_test_file('boardgamegeek_refine_rules.json')) as fin: rule = ''.join(fin.readlines()) bgg_archiveitem = ArchiveItemFactory( dataset=bgg_dataset, file_target='boardgamegeek.csv', file_hash='ea6ee15e9b052171db4f96743aa11425', rule=RuleFactory( hash="ea6ee15e9b052171db4f96743aa11425", rule=rule, ) ) with open(get_test_file('boardgametournament_refine_rules.json')) \ as fin: rule = ''.join(fin.readlines()) bgt_archiveitem = ArchiveItemFactory( dataset=bgt_dataset, file_target='boardgametournament.csv', file_hash='be864f716b6a7716f3b1c2254f4f5eea', rule=RuleFactory( hash="be864f716b6a7716f3b1c2254f4f5eea", rule=rule, ) ) with open(get_test_file('boardgames_aggregator_silk_rules.xml')) \ as fin: rule = ''.join(fin.readlines()) aggregator = AggregatorFactory( name='BoardGames', description='Un dataset di giochi da tavolo', silk_rule=rule, entity_type='{}BoardGame'.format( settings.TRIPLE_DATABASE['PREFIXES']['sdv1'] ), vertex_selector="g.V('type', 'sd$BoardGame')%limit.id.fill(m)", ) for archiveitem in (bgg_archiveitem, bgt_archiveitem): AggregatorArchiveItem.objects.create( aggregator=aggregator, archiveitem=archiveitem, ) osm_source = SourceFactory( name='OSM (test)', description='pochi dati per testare lo slicer' ) osm_dataset = DatasetFactory( source=osm_source, name='osm-dataset', url='http://openstreetmap.org', download='https://dl.dropbox.com/u/781790/osm-10nodes.csv', curator='Davide setti', license='CC PUCCI', ) with open(get_test_file('osm-refine-rules.json')) as fin: rule = ''.join(fin.readlines()) osm_archiveitem = ArchiveItemFactory( dataset=osm_dataset, file_target='osm-10nodes.csv', file_hash='e6f4a5c5f5fe12765f7b3ca04ab7a82d', rule=RuleFactory( hash="e6f4a5c5f5fe12765f7b3ca04ab7a82d", rule=rule, ) ) poi_aggregator = AggregatorFactory( name='POI', description='POI aggregator', entity_type=settings.TRIPLE_DATABASE['PREFIXES']['sdv1'] + 'POI', vertex_selector="g.V('type', 'sd$POI')%limit.id.fill(m)" ) AggregatorArchiveItem.objects.create( aggregator=poi_aggregator, archiveitem=osm_archiveitem, ) DatasetFactory( source=osm_source, name='Aeroporti', url='http://dati.trentino.it', description='Aeroporti del trentino, file SHP', download='http://testserver/aeroporti_tn.zip', curator='Federico Scrinzi', license='Open Data', other_meta='{}', ) strange_source = SourceFactory( name='Strange or malformed (test)', description='pochi dati con valori strani tipo None', ) DatasetFactory( source=strange_source, name='strange symbols', url='http://testserver/', description='Some strange symbols', download='http://testserver/strangesymbols.csv', curator='Federico Scrinzi', license='Open Data', other_meta='{}', ) DatasetFactory( source=strange_source, name='looks like ascii', url='http://testserver/', description="file that looks like ascii but it's UTF8", download='http://testserver/lookslikeascii.csv', curator='Federico Scrinzi', license='Open Data', other_meta='{}', )