def from_userinput( self, userinput: str, mirror: bool, ags: Optional[str], skip_body_extra: bool = False, skip_files: bool = False, ) -> None: body_id, entrypoint = self.get_entrypoint_and_body(userinput, mirror) importer = Importer(get_loader_from_system(entrypoint)) body_data, dotenv = self.import_body_and_metadata( body_id, importer, userinput, ags, skip_body_extra) logger.info("Loading the bulk data from the oparl api") importer.fetch_lists_initial([body_data]) # Also avoid "MySQL server has gone away" errors due to timeouts # https://stackoverflow.com/a/32720475/3549270 db.close_old_connections() logger.info("Loading the data into the database") importer.import_objects() if not skip_files: logger.info("Loading the files") importer.load_files(fallback_city=userinput) if dotenv: logger.info( f"Done! Please add the following line to your dotenv file: \n\n{dotenv}\n" )
def test_manual_deletion(pytestconfig): """Check that after a file has been manually deleted, it can't get re-imported and it's gone from minio""" url = "https://example.org/file/1" file_id = 1 sample_file = File( name="Bad File", original_id=file_id, url=url, claimed_size=None, paper_original_id=sample_paper.original_id, ) data = RisData(sample_city, None, [], [], [sample_paper], [sample_file], [], [], [], 2) body = Body(name=data.meta.name, short_name=data.meta.name, ags=data.meta.ags) body.save() import_data(body, data) with responses.RequestsMock() as requests_mock: requests_mock.add( responses.GET, url, body=Path(pytestconfig.rootdir).joinpath( "testdata/media/file.txt").read_bytes(), status=200, content_type="text/plain", ) importer = Importer(BaseLoader({}), force_singlethread=True) [successful, failed] = importer.load_files(sample_city.name) assert successful == 1 and failed == 0 # Ensure that the file is there assert minio_client().get_object(minio_file_bucket, str(file_id)) assert models.File.objects.filter(pk=file_id).first() # This is what we test models.File.objects.get(pk=file_id).manually_delete() with pytest.raises(MinioException): minio_client().get_object(minio_file_bucket, str(file_id)) # Another import, to ensure that manually delete is respected import_data(body, data) assert not models.File.objects.filter(pk=file_id).first() with responses.RequestsMock(): importer = Importer(BaseLoader({}), force_singlethread=True) [successful, failed] = importer.load_files(sample_city.name) assert successful == 0 and failed == 0 with pytest.raises(MinioException): minio_client().get_object(minio_file_bucket, str(file_id))
def import_update(body_id: Optional[str] = None, ignore_modified: bool = False) -> None: from importer.importer import Importer if body_id: bodies = Body.objects.filter(oparl_id=body_id).all() else: bodies = Body.objects.filter(oparl_id__isnull=False).all() for body in bodies: logger.info("Updating body {}: {}".format(body, body.oparl_id)) loader = get_loader_from_body(body.oparl_id) importer = Importer(loader, body, ignore_modified=ignore_modified) importer.update(body.oparl_id) importer.force_singlethread = True importer.load_files(body.short_name)
def test_manual_deletion(pytestconfig, caplog): """Check that after a file has been manually deleted, it can't get re-imported and it's gone from minio""" url = "https://example.org/file/1" file_id = 1 body, data = make_sample_file(file_id, url) with responses.RequestsMock() as requests_mock: requests_mock.add( responses.GET, url, body=pytestconfig.rootpath.joinpath( "testdata/media/file.txt").read_bytes(), status=200, content_type="text/plain", ) importer = Importer(BaseLoader({}), force_singlethread=True) [successful, failed] = importer.load_files(sample_city.name) assert successful == 1 and failed == 0 # Ensure that the file is there assert minio_client().get_object(minio_file_bucket, str(file_id)) assert models.File.objects.filter(pk=file_id).first() # This is what we test models.File.objects.get(pk=file_id).manually_delete() with pytest.raises(MinioException): minio_client().get_object(minio_file_bucket, str(file_id)) # Another import, to ensure that manually delete is respected import_data(body, data) assert not models.File.objects.filter(pk=file_id).first() with responses.RequestsMock(): importer = Importer(BaseLoader({}), force_singlethread=True) [successful, failed] = importer.load_files(sample_city.name) assert successful == 0 and failed == 0 with pytest.raises(MinioException): minio_client().get_object(minio_file_bucket, str(file_id)) assert caplog.messages == [ "File 1 has an unknown mime type: 'text/plain'", "File 1: Couldn't get any text", ]
def test_file_analysis(self): loader = MockLoader() with open(filename, "rb") as fp: loader.files[download_url] = (fp.read(), "application/pdf") importer = Importer(loader, force_singlethread=True) [body] = Body.objects.all() importer.load_files(fallback_city=body.short_name) [file] = File.objects.all() self.assertEqual(file.mime_type, "application/pdf") self.assertEqual(file.page_count, 3) self.assertEqual(len(file.parsed_text), 10019) self.assertEqual(file.coordinates(), [{"lat": 11.35, "lon": 142.2}]) self.assertEqual(file.person_ids(), [1])
def test_file_404(pytestconfig, caplog): """Check that after a file has been manually deleted, it can't get re-imported and it's gone from minio""" url = "https://example.org/file/1" file_id = 1 make_sample_file(file_id, url) with responses.RequestsMock() as requests_mock: requests_mock.add(responses.GET, url, status=404, content_type="text/plain") importer = Importer(BaseLoader({}), force_singlethread=True) [successful, failed] = importer.load_files(sample_city.name, update=True) assert successful == 0 and failed == 1 assert caplog.messages == [ f"File 1: Failed to download {url}", "1 files failed to download", ]