def test_read_metadata(self): importer = MetadataImporter() source = SourceFactory( scraper_name='fake', scraperwiki_url='http://www.google.com/', ) def side_metadata(*args): return [{ 'description': 'dati emergenza estratti da www.intoscana.it', 'license': 'others', 'tags': 'emergenza, carabinieri', 'url': 'http://www.example.com/index.html', 'curator': 'fondazione sistema toscana', 'bounding_box': '42.24, 9.69, 44.47, 12.37', 'other': None, 'download': 'table://emergencies', 'name': 'numeri di emergenza in toscana' }] with patch.object(Dataset, 'save'): with patch.object(MetadataImporter, 'get_metadata_of_scraper', side_effect=side_metadata): self.assertEqual( {'total': 1, 'errors': 0, 'report': []}, importer.read_metadata(source) )
def source_upload_metadata(request, pk): """ allow to import datasets metadatada from a csv file """ if not 'upload_csv_file' in request.FILES: return HttpResponseBadRequest() source = get_object_or_404(Source, pk=pk) filedata = request.FILES['upload_csv_file'] if not filedata.name.endswith('.csv'): messages.error(request, 'The uploaded file is of the wrong type') return redirect(source) report = MetadataImporter.read_csv(source, filedata) if report['errors'] != 0: messages.error( request, "Created {0} out of {1} datasets, some errors " "occurred: {2}".format(report['total'] - report['errors'], report['total'], report['report']) ) else: messages.success( request, "Succesfully added {0} datasets".format(report['total']) ) return redirect(source)
def source_fetch_metadata(request, pk): """ Update metadata action. Imports metadata from the configured source scraper. """ source = Source.objects.get(pk=pk) source.datasets.all().delete() try: if not source.scraper_name: raise Exception('A scraper name must be specified.') report = MetadataImporter.read_metadata(source) except URLError: logger.exception('Timeout while accessing scraper data') messages.error(request, "Timeout while accessing scraper data") except Exception: logger.exception('Error while updating metadata') messages.error(request, "Error while updating metadata") else: messages.info( request, "{} metadata imported, {} errors".format( report['total'], report['errors'] ) ) return redirect(source)
def source_fetch_metadata(request, pk): """ Update metadata action. Imports metadata from the configured source scraper. """ source = Source.objects.get(pk=pk) source.datasets.all().delete() try: if not source.scraper_name: raise Exception('A scraper name must be specified.') report = MetadataImporter.read_metadata(source) except URLError: logger.exception('Timeout while accessing scraper data') messages.error(request, "Timeout while accessing scraper data") except Exception: logger.exception('Error while updating metadata') messages.error(request, "Error while updating metadata") else: messages.info( request, "{} metadata imported, {} errors".format(report['total'], report['errors'])) return redirect(source)
def test_read_csv(self): source = Source.objects.all()[0] csv_stream = cStringIO.StringIO( "url\tdownload\tname\tdescription\ttags\tcurator\tlicense\t" "bounding_box\tother_meta\n" "http://path.to/url/1\thttp://path.to/download/1\tName 1\tDesc 1\t" "t11,t12\tCurator Name 1\tLic 1\t1,2,3,4\t{\"k11\":\"v11\"}\n" "http://path.to/url/2\thttp://path.to/download/2\tName 2\tDesc 2\t" "t21,t22\tCurator Name 2\tLic 2\t5,6,7,8\t{\"k12\":\"v12\"}\n") self.assertEqual( {'total': 2, 'errors': 0, 'report': []}, MetadataImporter.read_csv(source, csv_stream) )
def source_upload_metadata(request, pk): """ allow to import datasets metadatada from a csv file """ if not 'upload_csv_file' in request.FILES: return HttpResponseBadRequest() source = get_object_or_404(Source, pk=pk) filedata = request.FILES['upload_csv_file'] if not filedata.name.endswith('.csv'): messages.error(request, 'The uploaded file is of the wrong type') return redirect(source) report = MetadataImporter.read_csv(source, filedata) if report['errors'] != 0: messages.error( request, "Created {0} out of {1} datasets, some errors " "occurred: {2}".format(report['total'] - report['errors'], report['total'], report['report'])) else: messages.success( request, "Succesfully added {0} datasets".format(report['total'])) return redirect(source)