def test_meeting_start_change():
    """
    As there are meetings without an associated id, we can't use oparl_id as unique_id.
    But since name+start are unique in the db and the start of a meeting can be updated
    to the actual start after the meeting happened, we need to hard delete old meetings
    or the import will crash with a failed unque constraint
    """
    organizations = [Organization("City Council", 1, True)]
    meetings_old = [
        Meeting(
            "City Council",
            "City Council Meeting 1",
            None,
            None,
            None,
            start=datetime.fromisoformat("2020-01-01T09:00:00+01:00"),
        ),
        Meeting(
            "City Council",
            "City Council Meeting 2",
            None,
            None,
            2,
            start=datetime.fromisoformat("2020-02-01T09:00:00+01:00"),
        ),
    ]
    meetings_new = [
        Meeting(
            "City Council",
            "City Council Meeting 1",
            None,
            None,
            None,
            start=datetime.fromisoformat("2020-01-01T09:00:10+01:00"),
        ),
        Meeting(
            "City Council",
            "City Council Meeting 2",
            None,
            None,
            2,
            start=datetime.fromisoformat("2020-02-01T09:00:05+01:00"),
        ),
    ]
    old = RisData(sample_city, None, [], organizations, [], [], meetings_old,
                  [], [], 2)
    new = RisData(sample_city, None, [], organizations, [], [], meetings_new,
                  [], [], 2)
    body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags)
    body.save()

    import_data(body, old)
    import_data(body, new)

    assert models.Meeting.objects.count() == 2
    # The old meeting without id should have been deleted
    assert models.Meeting.objects_with_deleted.count() == 3
 def test_body(self):
     body = Body()
     data = self.api_data["https://oparl.example.org/body/1"]
     self.converter.body(data, body)
     self.assertEqual(body.outline, None)
     self.assertNotEqual(body.center, None)
     self.assertEqual(body.ags, "05315000")
     body.save()
     self.converter.body_related(data, body)
     self.assertEqual(body.legislative_terms.count(), 1)
def test_manual_deletion(pytestconfig):
    """Check that after a file has been manually deleted, it can't get re-imported and it's gone from minio"""
    url = "https://example.org/file/1"
    file_id = 1
    sample_file = File(
        name="Bad File",
        original_id=file_id,
        url=url,
        claimed_size=None,
        paper_original_id=sample_paper.original_id,
    )
    data = RisData(sample_city, None, [], [], [sample_paper], [sample_file],
                   [], [], [], 2)
    body = Body(name=data.meta.name,
                short_name=data.meta.name,
                ags=data.meta.ags)
    body.save()
    import_data(body, data)

    with responses.RequestsMock() as requests_mock:
        requests_mock.add(
            responses.GET,
            url,
            body=Path(pytestconfig.rootdir).joinpath(
                "testdata/media/file.txt").read_bytes(),
            status=200,
            content_type="text/plain",
        )
        importer = Importer(BaseLoader({}), force_singlethread=True)
        [successful, failed] = importer.load_files(sample_city.name)
        assert successful == 1 and failed == 0

    # Ensure that the file is there
    assert minio_client().get_object(minio_file_bucket, str(file_id))
    assert models.File.objects.filter(pk=file_id).first()

    # This is what we test
    models.File.objects.get(pk=file_id).manually_delete()

    with pytest.raises(MinioException):
        minio_client().get_object(minio_file_bucket, str(file_id))

    # Another import, to ensure that manually delete is respected
    import_data(body, data)

    assert not models.File.objects.filter(pk=file_id).first()
    with responses.RequestsMock():
        importer = Importer(BaseLoader({}), force_singlethread=True)
        [successful, failed] = importer.load_files(sample_city.name)
        assert successful == 0 and failed == 0

    with pytest.raises(MinioException):
        minio_client().get_object(minio_file_bucket, str(file_id))
def test_agenda_item_with_id_name_changed():
    organizations = [Organization("City Council", 1, True)]
    meetings = [
        Meeting(
            "City Council",
            "City Council Meeting 1",
            None,
            None,
            1,
            start=datetime.fromisoformat("2020-01-01T09:00:00+01:00"),
        )
    ]

    agenda_items_old = [
        AgendaItem(
            key="1",
            position=0,
            name="Old name",
            meeting_id=1,
            paper_reference=None,
            paper_original_id=None,
            original_id=1,
            result=None,
            voting=None,
            note=None,
        )
    ]
    agenda_items_new = [
        AgendaItem(
            key="1",
            position=0,
            name="New name",
            meeting_id=1,
            paper_reference=None,
            paper_original_id=None,
            original_id=1,
            result=None,
            voting=None,
            note=None,
        )
    ]

    old = RisData(sample_city, None, [], organizations, [], [], meetings, [],
                  agenda_items_old, 2)
    new = RisData(sample_city, None, [], organizations, [], [], meetings, [],
                  agenda_items_new, 2)
    body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags)
    body.save()
    import_data(body, old)
    import_data(body, new)
    assert models.AgendaItem.objects_with_deleted.count() == 1
    assert models.AgendaItem.objects.count() == 1
    def body(self, lib_object: JSON, body: Body) -> Body:
        body.short_name = self.utils.normalize_body_name(body.short_name)

        body.ags = lib_object.get("ags")
        if body.ags:
            body.ags = body.ags.replace(" ", "")
        if len(body.ags or "") > 8:
            # Special case for https://ris.krefeld.de/webservice/oparl/v1/body/1
            if body.ags[8:] == "0" * len(body.ags[8:]):
                body.ags = body.ags[:8]
            else:
                raise RuntimeError(
                    "The Amtliche Gemeindeschlüssel of {} is longer than 8 characters: '{}'".format(
                        body, body.ags
                    )
                )

        # We don't really need the location because we have our own outline
        # importing logic and don't need the city, but we import it for comprehensiveness
        location = self.retrieve(Location, lib_object.get("location"), body.oparl_id)
        if location and location.geometry:
            if location.geometry["type"] == "Point":
                body.center = location
                body.outline = None
            elif location.geometry["type"] == "Polygon":
                logger.warning("Overriding outline of Body with api version")
                body.center = None
                body.outline = location
            else:
                logger.warning(
                    "Location object is of type {}, which is neither 'Point' nor 'Polygon'."
                    "Skipping this location.".format(location.geometry["type"])
                )

        return body
def make_sample_file(file_id, url):
    sample_file = File(
        name="Bad File",
        original_id=file_id,
        url=url,
        claimed_size=None,
        paper_original_id=sample_paper.original_id,
    )
    data = RisData(sample_city, None, [], [], [sample_paper], [sample_file],
                   [], [], [], 2)
    body = Body(name=data.meta.name,
                short_name=data.meta.name,
                ags=data.meta.ags)
    body.save()
    import_data(body, data)
    return body, data
Beispiel #7
0
    def organization(self, libobject: OParl.Organization):
        logging.info("Processing Organization {}".format(libobject.get_id()))
        if not libobject:
            return

        organization = self.check_existing(libobject, Organization)
        if not organization:
            return

        type_id = self.organization_classification.get(
            libobject.get_organization_type())
        if type_id:
            orgtype = OrganizationType.objects.get(id=type_id)
        else:
            orgtype, _ = OrganizationType.objects.get_or_create(
                name=libobject.get_organization_type())

        organization.organization_type = orgtype
        organization.body = Body.by_oparl_id(libobject.get_body().get_id())
        organization.start = self.glib_datetime_or_date_to_python(
            libobject.get_start_date())
        organization.end = self.glib_datetime_or_date_to_python(
            libobject.get_end_date())

        organization.save()

        for membership in libobject.get_membership():
            self.membership(organization, membership)

        organization.save()

        return organization
    def organization(self, libobject: JSON, organization: Organization) -> Organization:
        type_name = libobject.get("organizationType")

        # E.g. Leipzig sets organizationType: "Gremium" and classification: "Fraktion" for factions,
        # so we give priority to classification
        if libobject.get("classification") in self.utils.organization_classification:
            type_name = libobject["classification"]

        type_id = self.utils.organization_classification.get(type_name)
        if type_id:
            orgtype = OrganizationType.objects.get(id=type_id)
        else:
            orgtype, _ = OrganizationType.objects.get_or_create(
                name=libobject.get("organizationType")
            )
        organization.organization_type = orgtype
        if libobject.get("body"):
            # If we really have a case with an extra body then this should error because then we need some extra handling
            organization.body = Body.by_oparl_id(libobject["body"])
        else:
            organization.body = self.default_body
        organization.start = self.utils.parse_date(libobject.get("startDate"))
        organization.end = self.utils.parse_date(libobject.get("endDate"))

        organization.location = self.retrieve(Location, libobject.get("location"))

        if organization.name == organization.short_name and type_name:
            pattern = "[- ]?" + re.escape(type_name) + "[ ]?"
            organization.short_name = re.sub(
                pattern, "", organization.short_name, flags=re.I
            )

        return organization
Beispiel #9
0
def test_incremental_agenda_items():
    old = load_ris_data("importer/test-data/amtzell_old.json")
    new = load_ris_data("importer/test-data/amtzell_new.json")

    body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags)
    body.save()

    import_data(body, old)
    models.AgendaItem.objects_with_deleted.all().delete()

    # We don't have original ids for all agenda items (yet?),
    # so we just assume meeting x paper is unique
    consultation_map = {
        (a, b): c
        for a, b, c in models.Consultation.objects.values_list(
            "meeting_id", "paper_id", "id"
        )
    }

    meeting_id_map = make_id_map(models.Meeting.objects.filter(oparl_id__isnull=False))
    paper_id_map = make_id_map(models.Paper.objects)

    def convert_function(x):
        return convert_agenda_item(x, consultation_map, meeting_id_map, paper_id_map)

    incremental_import(
        models.AgendaItem, [convert_function(i) for i in old.agenda_items]
    )

    agenda_items = sorted(models.AgendaItem.objects.values_list("oparl_id", flat=True))
    agenda_items_with_deleted = sorted(
        models.AgendaItem.objects_with_deleted.values_list("oparl_id", flat=True)
    )
    assert agenda_items == ["1302", "1880"]
    assert agenda_items_with_deleted == ["1302", "1880"]

    incremental_import(
        models.AgendaItem, [convert_function(i) for i in new.agenda_items]
    )

    agenda_items = sorted(models.AgendaItem.objects.values_list("oparl_id", flat=True))
    agenda_items_with_deleted = sorted(
        models.AgendaItem.objects_with_deleted.values_list("oparl_id", flat=True)
    )
    assert agenda_items == ["1267", "1302"]
    assert agenda_items_with_deleted == ["1267", "1302", "1880"]
Beispiel #10
0
def test_import_json():
    old = load_ris_data("importer/test-data/amtzell_old.json")
    new = load_ris_data("importer/test-data/amtzell_new.json")

    body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags)
    body.save()

    import_data(body, old)

    actual = make_db_snapshot()
    expected = json.loads(Path("importer/test-data/amtzell_old_db.json").read_text())
    assert expected == actual

    import_data(body, new)

    actual = make_db_snapshot()
    expected = json.loads(Path("importer/test-data/amtzell_new_db.json").read_text())
    assert expected == actual
def test_undelete():
    """A paper gets created, (spuriously?) deleted, and then undeleted"""
    with_paper = RisData(sample_city, None, [], [], [sample_paper], [], [], [],
                         [], 2)
    without_paper = RisData(sample_city, None, [], [], [], [], [], [], [], 2)
    body = Body(
        name=with_paper.meta.name,
        short_name=with_paper.meta.name,
        ags=with_paper.meta.ags,
    )
    body.save()

    import_data(body, with_paper)
    import_data(body, without_paper)
    import_data(body, with_paper)

    [paper] = models.Paper.objects_with_deleted.all()
    assert not paper.deleted
def test_duplicate_meetings_with_id(fixture, target_number,
                                    target_number_with_deleted):
    """
    There are two meetings with the same name/start, and
        a) different ids,
        b) with and without id,
        c) without ids.
    Inspired by https://ris.wuppertal.de/si0057.php?__ksinr=18329 and
    https://ris.wuppertal.de/si0057.php?__ksinr=18837
    """

    for meeting in serializers.deserialize("json", Path(fixture).read_text()):
        meeting.save()

    new_meeting = converter.structure(
        {
            "organization_name": "BV Uellendahl-Katernberg",
            "name": "BV Uellendahl-Katernberg",
            "location":
            "Rathaus Barmen, Ratssaal, Johannes-Rau-Platz 1, 42275 Wuppertal",
            "note": None,
            "original_id": 18329,
            "start": "2020-04-23T18:30:00+02:00",
            "end": "2020-04-23T19:20:00+02:00",
            "cancelled": False,
        },
        Meeting,
    )

    with_paper = RisData(sample_city, None, [], [], [], [], [new_meeting], [],
                         [], 2)
    body = Body(
        name=with_paper.meta.name,
        short_name=with_paper.meta.name,
        ags=with_paper.meta.ags,
    )
    body.save()

    import_data(body, with_paper)
    assert models.Meeting.objects.count() == target_number, list(
        models.Meeting.objects.values_list("oparl_id", "name", "start"))
    assert models.Meeting.objects_with_deleted.count(
    ) == target_number_with_deleted
Beispiel #13
0
def import_outline(body: Body, gemeindeschluessel: str):
    if not body.outline:
        outline = Location()
        outline.name = "Outline of " + body.name
        outline.short_name = body.short_name
        outline.is_official = False
    else:
        outline = body.outline

    logger.info("Importing outline from {}".format(gemeindeschluessel))

    query = query_template_outline.format(gemeindeschluessel)

    response = requests.post(overpass_api, data={"data": query})
    response.raise_for_status()
    geojson = convert_to_geojson(response.text)
    outline.geometry = geojson
    outline.save()

    body.outline = outline
    body.save()
def test_import_outline(pytestconfig, ags):
    """This test exists mostly for the handling of the AGS with 5 vs. 8 digits"""
    # This currently assumes that we don't want to do any transformations with the ags before assigning it to the body
    body = Body(name=f"Place with AGS {ags}", short_name=f"AGS{ags}", ags=ags)
    with responses.RequestsMock() as requests_mock:
        fixture = pytestconfig.rootpath.joinpath(
            f"testdata/outline_query_responses/{ags}.json")
        fixture = json.loads(fixture.read_text())
        requests_mock.add(method=responses.POST,
                          url=fixture["url"],
                          body=fixture["response"])
        import_outline(body, ags)
def test_index_deletion():
    """Check that deleted persons get deleted from the elasticsearch index"""
    for index in registry.get_indices(registry.get_models()):
        index.delete(ignore=404)
    for index in registry.get_indices(registry.get_models()):
        index.create()

    old_persons = [
        Person(name="Frank Underwood", party="Democrats"),
        Person(name="Claire Underwood", party="Democrats"),
    ]
    new_persons = [Person(name="Claire Underwood", party="Democrats")]

    old = RisData(sample_city, None, old_persons, [], [], [], [], [], [], 2)
    new = RisData(sample_city, None, new_persons, [], [], [], [], [], [], 2)
    body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags)
    body.save()

    import_data(body, old)
    assert len(MainappSearch({"query": "Underwood"}).execute().hits) == 2
    import_data(body, new)
    assert len(MainappSearch({"query": "Underwood"}).execute().hits) == 1
    def setUpClass(cls):
        super().setUpClass()
        cls.api_data = {}
        cls.loader = MockLoader()
        cls.loader.api_data = cls.api_data
        for file in os.listdir(cls.dummy_data):
            if not file.endswith(".json"):
                continue

            with open(os.path.join(cls.dummy_data, file)) as fp:
                data = json.load(fp)
                cls.api_data[data["id"]] = data
                for entry in externalize(data):
                    if entry.data["id"] not in cls.api_data:
                        cls.api_data[entry.data["id"]] = entry.data

        # Used by test_location_default_body
        body = Body()
        body.short_name = "München"
        cls.converter = JsonToDb(cls.loader, default_body=body)
        cls.converter.warn_missing = False
        cls.utils = Utils()
def import_outline(body: Body, ags: Optional[str] = None):
    ags = ags or body.ags
    assert ags is not None

    logger.info("Importing outline from {}".format(ags))

    if not body.outline:
        outline = Location()
        outline.name = "Outline of " + body.name
        outline.short_name = body.short_name
        outline.is_official = False
    else:
        outline = body.outline

    query = format_template(query_template_outline, ags)

    response = requests.post(overpass_api, data={"data": query})
    response.raise_for_status()
    geojson = osm2geojson.json2geojson(response.text)
    outline.geometry = geojson
    outline.save()

    body.outline = outline
    body.save()
Beispiel #18
0
def test_json_to_db_empty_object(caplog):
    url = "https://lahr.ratsinfomanagement.net/webservice/oparl/v1.1/body/1/consultation/5999"
    loader = MockLoader(api_data={url: {}})
    converter = JsonToDb(loader,
                         default_body=Body(),
                         ensure_organization_type=False)
    with pytest.raises(
            RuntimeError,
            match=
            f"The object {url} has not type field and object_type wasn't given",
    ):
        converter.import_anything(url)
    converter.import_anything(url, Consultation)
    assert Consultation.objects.filter(oparl_id=url).count() == 1
    assert caplog.messages == [
        f"Object loaded from {url} has no type field, inferred to https://schema.oparl.org/1.0/Consultation",
        f"Object loaded from {url} has no id field, setting id to url",
    ]
    def organization_core(self, libobject, organization):
        self.logger.info("Processing Organization {}".format(
            libobject.get_id()))
        type_id = self.organization_classification.get(
            libobject.get_organization_type())
        if type_id:
            orgtype = OrganizationType.objects.get(id=type_id)
        else:
            orgtype, _ = OrganizationType.objects.get_or_create(
                name=libobject.get_organization_type())
        organization.organization_type = orgtype
        organization.body = Body.by_oparl_id(libobject.get_body().get_id())
        organization.start = self.glib_datetime_or_date_to_python(
            libobject.get_start_date())
        organization.end = self.glib_datetime_or_date_to_python(
            libobject.get_end_date())

        self.call_custom_hook("sanitize_organization", organization)
Beispiel #20
0
def test_json_to_db_missing_object(caplog):
    url = "https://lahr.ratsinfomanagement.net/webservice/oparl/v1.1/body/1/consultation/5999"
    loader = MockLoader(api_data={url: None})
    converter = JsonToDb(loader,
                         default_body=Body(),
                         ensure_organization_type=False)
    with pytest.raises(
            RuntimeError,
            match=
            rf"The object {url} is missing and the object type was not specified",
    ):
        converter.import_anything(url)
    converter.import_anything(url, Consultation)
    assert Consultation.objects.filter(oparl_id=url).count() == 1
    assert caplog.messages == [
        f"JSON loaded from {url} is not a dict/object. Using a dummy instead. THIS IS BAD",
        f"JSON loaded from {url} is not a dict/object. Using a dummy instead. THIS IS BAD",
    ]
def test_import_json(send_mail_function):
    """This test runs with elasticsearch if available and otherwise uses saved responses"""
    # Create the base state
    old = load_ris_data("importer/test-data/amtzell_old.json")
    body = Body(name=old.meta.name, short_name=old.meta.name, ags=old.meta.ags)
    body.save()

    import_data(body, old)

    actual = make_db_snapshot()
    expected = json.loads(
        Path("importer/test-data/amtzell_old_db.json").read_text())
    assert expected == actual

    last_notification = timezone.now()

    # Create notification
    user = User(username="******", email="*****@*****.**")
    user.save()
    UserProfile.objects.create(user=user)

    user_alert = UserAlert(
        user=user,
        search_string="Digitalisierungsstrategie",
        created=datetime.fromisoformat("2008-01-01T12:00:00+01:00"),
    )
    user_alert.save()

    # Import the new data
    new = load_ris_data("importer/test-data/amtzell_new.json")
    import_data(body, new)

    actual = make_db_snapshot()
    expected = json.loads(
        Path("importer/test-data/amtzell_new_db.json").read_text())
    assert expected == actual

    # Check that the notification was sent
    elasticsearch_mock = ElasticsearchMock({
        "importer/test-data/notification_request.json":
        "importer/test-data/notification_response.json"
    })
    if is_es_online():
        context = contextlib.nullcontext()
    else:
        context = mock.patch(
            "elasticsearch_dsl.search.get_connection",
            new=lambda _alias: elasticsearch_mock,
        )
    with context:
        if is_es_online():
            notifier = NotifyUsers(last_notification)
        else:
            notifier = NotifyUsers(
                datetime.fromisoformat("2020-05-17T12:07:37.887853+00:00"))
        notifier.notify_all()

        assert send_mail_function.call_count == 1
        assert send_mail_function.call_args[0][0] == "*****@*****.**"
        assert "Digitalisierungsstrategie" in send_mail_function.call_args[0][
            2]
        assert "Digitalisierungsstrategie" in send_mail_function.call_args[0][
            3]