Esempi in Python per RisData, esempi in Python per importer.json_datatypes.RisData

Esempio n. 1

0

Mostra file

File: test_import_json.py Progetto: meine-stadt-transparent/meine-stadt-transparent

def test_meeting_start_change():
    """
    As there are meetings without an associated id, we can't use oparl_id as unique_id.
    But since name+start are unique in the db and the start of a meeting can be updated
    to the actual start after the meeting happened, we need to hard delete old meetings
    or the import will crash with a failed unque constraint
    """
    organizations = [Organization("City Council", 1, True)]
    meetings_old = [
        Meeting(
            "City Council",
            "City Council Meeting 1",
            None,
            None,
            None,
            start=datetime.fromisoformat("2020-01-01T09:00:00+01:00"),
        ),
        Meeting(
            "City Council",
            "City Council Meeting 2",
            None,
            None,
            2,
            start=datetime.fromisoformat("2020-02-01T09:00:00+01:00"),
        ),
    ]
    meetings_new = [
        Meeting(
            "City Council",
            "City Council Meeting 1",
            None,
            None,
            None,
            start=datetime.fromisoformat("2020-01-01T09:00:10+01:00"),
        ),
        Meeting(
            "City Council",
            "City Council Meeting 2",
            None,
            None,
            2,
            start=datetime.fromisoformat("2020-02-01T09:00:05+01:00"),
        ),
    ]
    old = RisData(sample_city, None, [], organizations, [], [], meetings_old,
                  [], [], 2)
    new = RisData(sample_city, None, [], organizations, [], [], meetings_new,
                  [], [], 2)
    body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags)
    body.save()

    import_data(body, old)
    import_data(body, new)

    assert models.Meeting.objects.count() == 2
    # The old meeting without id should have been deleted
    assert models.Meeting.objects_with_deleted.count() == 3

Esempio n. 2

0

Mostra file

File: test_import_json.py Progetto: meine-stadt-transparent/meine-stadt-transparent

def test_agenda_item_with_id_name_changed():
    organizations = [Organization("City Council", 1, True)]
    meetings = [
        Meeting(
            "City Council",
            "City Council Meeting 1",
            None,
            None,
            1,
            start=datetime.fromisoformat("2020-01-01T09:00:00+01:00"),
        )
    ]

    agenda_items_old = [
        AgendaItem(
            key="1",
            position=0,
            name="Old name",
            meeting_id=1,
            paper_reference=None,
            paper_original_id=None,
            original_id=1,
            result=None,
            voting=None,
            note=None,
        )
    ]
    agenda_items_new = [
        AgendaItem(
            key="1",
            position=0,
            name="New name",
            meeting_id=1,
            paper_reference=None,
            paper_original_id=None,
            original_id=1,
            result=None,
            voting=None,
            note=None,
        )
    ]

    old = RisData(sample_city, None, [], organizations, [], [], meetings, [],
                  agenda_items_old, 2)
    new = RisData(sample_city, None, [], organizations, [], [], meetings, [],
                  agenda_items_new, 2)
    body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags)
    body.save()
    import_data(body, old)
    import_data(body, new)
    assert models.AgendaItem.objects_with_deleted.count() == 1
    assert models.AgendaItem.objects.count() == 1

Esempio n. 3

0

Mostra file

File: import_json.py Progetto: meine-stadt-transparent/meine-stadt-transparent

def handle_counts(ris_data: RisData, allow_shrinkage: bool):
    """Prints the old and new counts and makes sure we don't accidentally delete entries"""
    existing_counts = {
        "Paper": models.Paper.objects.count(),
        "File": models.File.objects.count(),
        "Person": models.Person.objects.count(),
        "Meeting": models.Meeting.objects.count(),
        "Organization": models.Organization.objects.count(),
        "Membership": models.Membership.objects.count(),
        "Agenda Item": models.AgendaItem.objects.count(),
    }
    new_counts = ris_data.get_counts()
    formatter = lambda x: " | ".join(f"{key_} {value_}"
                                     for key_, value_ in x.items())
    logger.info(f"Existing: {formatter(existing_counts)}")
    logger.info(f"New: {formatter(new_counts)}")
    if not allow_shrinkage:
        for key, value in existing_counts.items():
            # TODO: This check currently doesn't work because there's a fixup creating persons in the membership part
            if key == "Person":
                continue
            # The -3 is to allow some deletion or some failed page
            if new_counts[key] < value - 3:
                raise RuntimeError(
                    f"There are {value} {key} in the database, but only {new_counts[key]} in "
                    f"the imported dataset. This indicates a scraper failure. "
                    f"Use `--allow-shrinkage` to override.")

Esempio n. 4

0

Mostra file

File: test_import_json.py Progetto: meine-stadt-transparent/meine-stadt-transparent

def test_undelete():
    """A paper gets created, (spuriously?) deleted, and then undeleted"""
    with_paper = RisData(sample_city, None, [], [], [sample_paper], [], [], [],
                         [], 2)
    without_paper = RisData(sample_city, None, [], [], [], [], [], [], [], 2)
    body = Body(
        name=with_paper.meta.name,
        short_name=with_paper.meta.name,
        ags=with_paper.meta.ags,
    )
    body.save()

    import_data(body, with_paper)
    import_data(body, without_paper)
    import_data(body, with_paper)

    [paper] = models.Paper.objects_with_deleted.all()
    assert not paper.deleted

Esempio n. 5

0

Mostra file

    def handle(self, *args, **options):
        input_file: Path = Path("../scrape-session/out/json/Karlsruhe.json")

        with input_file.open() as fp:
            data = json.load(fp)
        start = time.time()
        ris_data: RisData = RisData.from_dict(data)
        end = time.time()
        print(end - start, type(ris_data))

Esempio n. 6

0

Mostra file

File: test_import_json.py Progetto: codeformuenster/meine-stadt-transparent

def test_manual_deletion(pytestconfig):
    """Check that after a file has been manually deleted, it can't get re-imported and it's gone from minio"""
    url = "https://example.org/file/1"
    file_id = 1
    sample_file = File(
        name="Bad File",
        original_id=file_id,
        url=url,
        claimed_size=None,
        paper_original_id=sample_paper.original_id,
    )
    data = RisData(sample_city, None, [], [], [sample_paper], [sample_file],
                   [], [], [], 2)
    body = Body(name=data.meta.name,
                short_name=data.meta.name,
                ags=data.meta.ags)
    body.save()
    import_data(body, data)

    with responses.RequestsMock() as requests_mock:
        requests_mock.add(
            responses.GET,
            url,
            body=Path(pytestconfig.rootdir).joinpath(
                "testdata/media/file.txt").read_bytes(),
            status=200,
            content_type="text/plain",
        )
        importer = Importer(BaseLoader({}), force_singlethread=True)
        [successful, failed] = importer.load_files(sample_city.name)
        assert successful == 1 and failed == 0

    # Ensure that the file is there
    assert minio_client().get_object(minio_file_bucket, str(file_id))
    assert models.File.objects.filter(pk=file_id).first()

    # This is what we test
    models.File.objects.get(pk=file_id).manually_delete()

    with pytest.raises(MinioException):
        minio_client().get_object(minio_file_bucket, str(file_id))

    # Another import, to ensure that manually delete is respected
    import_data(body, data)

    assert not models.File.objects.filter(pk=file_id).first()
    with responses.RequestsMock():
        importer = Importer(BaseLoader({}), force_singlethread=True)
        [successful, failed] = importer.load_files(sample_city.name)
        assert successful == 0 and failed == 0

    with pytest.raises(MinioException):
        minio_client().get_object(minio_file_bucket, str(file_id))

Esempio n. 7

0

Mostra file

File: test_import_json.py Progetto: meine-stadt-transparent/meine-stadt-transparent

def test_index_deletion():
    """Check that deleted persons get deleted from the elasticsearch index"""
    for index in registry.get_indices(registry.get_models()):
        index.delete(ignore=404)
    for index in registry.get_indices(registry.get_models()):
        index.create()

    old_persons = [
        Person(name="Frank Underwood", party="Democrats"),
        Person(name="Claire Underwood", party="Democrats"),
    ]
    new_persons = [Person(name="Claire Underwood", party="Democrats")]

    old = RisData(sample_city, None, old_persons, [], [], [], [], [], [], 2)
    new = RisData(sample_city, None, new_persons, [], [], [], [], [], [], 2)
    body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags)
    body.save()

    import_data(body, old)
    assert len(MainappSearch({"query": "Underwood"}).execute().hits) == 2
    import_data(body, new)
    assert len(MainappSearch({"query": "Underwood"}).execute().hits) == 1

Esempio n. 8

0

Mostra file

File: test_import_json.py Progetto: meine-stadt-transparent/meine-stadt-transparent

def make_sample_file(file_id, url):
    sample_file = File(
        name="Bad File",
        original_id=file_id,
        url=url,
        claimed_size=None,
        paper_original_id=sample_paper.original_id,
    )
    data = RisData(sample_city, None, [], [], [sample_paper], [sample_file],
                   [], [], [], 2)
    body = Body(name=data.meta.name,
                short_name=data.meta.name,
                ags=data.meta.ags)
    body.save()
    import_data(body, data)
    return body, data

Esempio n. 9

0

Mostra file

File: test_import_json.py Progetto: meine-stadt-transparent/meine-stadt-transparent

def test_duplicate_meetings_with_id(fixture, target_number,
                                    target_number_with_deleted):
    """
    There are two meetings with the same name/start, and
        a) different ids,
        b) with and without id,
        c) without ids.
    Inspired by https://ris.wuppertal.de/si0057.php?__ksinr=18329 and
    https://ris.wuppertal.de/si0057.php?__ksinr=18837
    """

    for meeting in serializers.deserialize("json", Path(fixture).read_text()):
        meeting.save()

    new_meeting = converter.structure(
        {
            "organization_name": "BV Uellendahl-Katernberg",
            "name": "BV Uellendahl-Katernberg",
            "location":
            "Rathaus Barmen, Ratssaal, Johannes-Rau-Platz 1, 42275 Wuppertal",
            "note": None,
            "original_id": 18329,
            "start": "2020-04-23T18:30:00+02:00",
            "end": "2020-04-23T19:20:00+02:00",
            "cancelled": False,
        },
        Meeting,
    )

    with_paper = RisData(sample_city, None, [], [], [], [], [new_meeting], [],
                         [], 2)
    body = Body(
        name=with_paper.meta.name,
        short_name=with_paper.meta.name,
        ags=with_paper.meta.ags,
    )
    body.save()

    import_data(body, with_paper)
    assert models.Meeting.objects.count() == target_number, list(
        models.Meeting.objects.values_list("oparl_id", "name", "start"))
    assert models.Meeting.objects_with_deleted.count(
    ) == target_number_with_deleted