コード例 #1
0
    def from_userinput(
        self,
        userinput: str,
        mirror: bool,
        ags: Optional[str],
        skip_body_extra: bool = False,
        skip_files: bool = False,
    ) -> None:
        body_id, entrypoint = self.get_entrypoint_and_body(userinput, mirror)
        importer = Importer(get_loader_from_system(entrypoint))
        body_data, dotenv = self.import_body_and_metadata(
            body_id, importer, userinput, ags, skip_body_extra)

        logger.info("Loading the bulk data from the oparl api")
        importer.fetch_lists_initial([body_data])

        # Also avoid "MySQL server has gone away" errors due to timeouts
        # https://stackoverflow.com/a/32720475/3549270
        db.close_old_connections()

        logger.info("Loading the data into the database")
        importer.import_objects()

        if not skip_files:
            logger.info("Loading the files")
            importer.load_files(fallback_city=userinput)

        if dotenv:
            logger.info(
                f"Done! Please add the following line to your dotenv file: \n\n{dotenv}\n"
            )
コード例 #2
0
def test_manual_deletion(pytestconfig):
    """Check that after a file has been manually deleted, it can't get re-imported and it's gone from minio"""
    url = "https://example.org/file/1"
    file_id = 1
    sample_file = File(
        name="Bad File",
        original_id=file_id,
        url=url,
        claimed_size=None,
        paper_original_id=sample_paper.original_id,
    )
    data = RisData(sample_city, None, [], [], [sample_paper], [sample_file],
                   [], [], [], 2)
    body = Body(name=data.meta.name,
                short_name=data.meta.name,
                ags=data.meta.ags)
    body.save()
    import_data(body, data)

    with responses.RequestsMock() as requests_mock:
        requests_mock.add(
            responses.GET,
            url,
            body=Path(pytestconfig.rootdir).joinpath(
                "testdata/media/file.txt").read_bytes(),
            status=200,
            content_type="text/plain",
        )
        importer = Importer(BaseLoader({}), force_singlethread=True)
        [successful, failed] = importer.load_files(sample_city.name)
        assert successful == 1 and failed == 0

    # Ensure that the file is there
    assert minio_client().get_object(minio_file_bucket, str(file_id))
    assert models.File.objects.filter(pk=file_id).first()

    # This is what we test
    models.File.objects.get(pk=file_id).manually_delete()

    with pytest.raises(MinioException):
        minio_client().get_object(minio_file_bucket, str(file_id))

    # Another import, to ensure that manually delete is respected
    import_data(body, data)

    assert not models.File.objects.filter(pk=file_id).first()
    with responses.RequestsMock():
        importer = Importer(BaseLoader({}), force_singlethread=True)
        [successful, failed] = importer.load_files(sample_city.name)
        assert successful == 0 and failed == 0

    with pytest.raises(MinioException):
        minio_client().get_object(minio_file_bucket, str(file_id))
コード例 #3
0
def import_update(body_id: Optional[str] = None,
                  ignore_modified: bool = False) -> None:
    from importer.importer import Importer

    if body_id:
        bodies = Body.objects.filter(oparl_id=body_id).all()
    else:
        bodies = Body.objects.filter(oparl_id__isnull=False).all()
    for body in bodies:
        logger.info("Updating body {}: {}".format(body, body.oparl_id))
        loader = get_loader_from_body(body.oparl_id)
        importer = Importer(loader, body, ignore_modified=ignore_modified)
        importer.update(body.oparl_id)
        importer.force_singlethread = True
        importer.load_files(body.short_name)
def test_manual_deletion(pytestconfig, caplog):
    """Check that after a file has been manually deleted, it can't get re-imported and it's gone from minio"""
    url = "https://example.org/file/1"
    file_id = 1
    body, data = make_sample_file(file_id, url)

    with responses.RequestsMock() as requests_mock:
        requests_mock.add(
            responses.GET,
            url,
            body=pytestconfig.rootpath.joinpath(
                "testdata/media/file.txt").read_bytes(),
            status=200,
            content_type="text/plain",
        )
        importer = Importer(BaseLoader({}), force_singlethread=True)
        [successful, failed] = importer.load_files(sample_city.name)
        assert successful == 1 and failed == 0

    # Ensure that the file is there
    assert minio_client().get_object(minio_file_bucket, str(file_id))
    assert models.File.objects.filter(pk=file_id).first()

    # This is what we test
    models.File.objects.get(pk=file_id).manually_delete()

    with pytest.raises(MinioException):
        minio_client().get_object(minio_file_bucket, str(file_id))

    # Another import, to ensure that manually delete is respected
    import_data(body, data)

    assert not models.File.objects.filter(pk=file_id).first()
    with responses.RequestsMock():
        importer = Importer(BaseLoader({}), force_singlethread=True)
        [successful, failed] = importer.load_files(sample_city.name)
        assert successful == 0 and failed == 0

    with pytest.raises(MinioException):
        minio_client().get_object(minio_file_bucket, str(file_id))

    assert caplog.messages == [
        "File 1 has an unknown mime type: 'text/plain'",
        "File 1: Couldn't get any text",
    ]
コード例 #5
0
    def test_file_analysis(self):
        loader = MockLoader()
        with open(filename, "rb") as fp:
            loader.files[download_url] = (fp.read(), "application/pdf")

        importer = Importer(loader, force_singlethread=True)

        [body] = Body.objects.all()

        importer.load_files(fallback_city=body.short_name)

        [file] = File.objects.all()

        self.assertEqual(file.mime_type, "application/pdf")
        self.assertEqual(file.page_count, 3)
        self.assertEqual(len(file.parsed_text), 10019)
        self.assertEqual(file.coordinates(), [{"lat": 11.35, "lon": 142.2}])
        self.assertEqual(file.person_ids(), [1])
def test_file_404(pytestconfig, caplog):
    """Check that after a file has been manually deleted, it can't get re-imported and it's gone from minio"""
    url = "https://example.org/file/1"
    file_id = 1
    make_sample_file(file_id, url)

    with responses.RequestsMock() as requests_mock:
        requests_mock.add(responses.GET,
                          url,
                          status=404,
                          content_type="text/plain")
        importer = Importer(BaseLoader({}), force_singlethread=True)
        [successful, failed] = importer.load_files(sample_city.name,
                                                   update=True)
        assert successful == 0 and failed == 1

    assert caplog.messages == [
        f"File 1: Failed to download {url}",
        "1 files failed to download",
    ]