def body(self, lib_object: JSON, body: Body) -> Body:
        body.short_name = self.utils.normalize_body_name(body.short_name)

        body.ags = lib_object.get("ags")
        if body.ags:
            body.ags = body.ags.replace(" ", "")
        if len(body.ags or "") > 8:
            # Special case for https://ris.krefeld.de/webservice/oparl/v1/body/1
            if body.ags[8:] == "0" * len(body.ags[8:]):
                body.ags = body.ags[:8]
            else:
                raise RuntimeError(
                    "The Amtliche Gemeindeschlüssel of {} is longer than 8 characters: '{}'".format(
                        body, body.ags
                    )
                )

        # We don't really need the location because we have our own outline
        # importing logic and don't need the city, but we import it for comprehensiveness
        location = self.retrieve(Location, lib_object.get("location"), body.oparl_id)
        if location and location.geometry:
            if location.geometry["type"] == "Point":
                body.center = location
                body.outline = None
            elif location.geometry["type"] == "Polygon":
                logger.warning("Overriding outline of Body with api version")
                body.center = None
                body.outline = location
            else:
                logger.warning(
                    "Location object is of type {}, which is neither 'Point' nor 'Polygon'."
                    "Skipping this location.".format(location.geometry["type"])
                )

        return body
    def person(self, lib_object: JSON, person: Person) -> Person:
        name = lib_object.get("name")
        given_name = lib_object.get("givenName")
        family_name = lib_object.get("familyName")

        if not name:
            if given_name and family_name:
                name = given_name + " " + family_name
            else:
                logger.warning("Person without name: {}".format(lib_object["id"]))
                name = _("Unknown")

        if not given_name and not family_name and " " in name:
            given_name = name.split(" ")[-2]
            family_name = name.split(" ")[-1]
            logger.warning("Inferring given and family name from compound name")

        if not given_name:
            logger.warning("Person without given name: {}".format(lib_object["id"]))
            given_name = _("Unknown")

        if not family_name:
            logger.warning("Person without family name: {}".format(lib_object["id"]))
            family_name = _("Unknown")

        person.name = name
        person.given_name = given_name
        person.family_name = family_name
        person.location = self.retrieve(
            Location, lib_object.get("location"), person.oparl_id
        )

        return person
Exemple #3
0
 def meeting_related(self, libobject: JSON, meeting: Meeting) -> None:
     meeting.auxiliary_files.set(
         self.retrieve_many(File, libobject.get("auxiliaryFile")))
     meeting.persons.set(
         self.retrieve_many(Person, libobject.get("participant")))
     meeting.organizations.set(
         self.retrieve_many(Organization, libobject.get("organization")))
    def paper(self, lib_object: JSON, paper: Paper) -> Paper:
        if lib_object.get("paperType"):
            paper_type, created = PaperType.objects.get_or_create(
                paper_type=lib_object.get("paperType")
            )
            paper.paper_type = paper_type
            if created:
                logging.info(
                    "Created new paper type {} through {}".format(
                        paper_type, lib_object["id"]
                    )
                )

        paper.reference_number = lib_object.get("reference")
        paper.main_file = self.retrieve(
            File, lib_object.get("mainFile"), paper.oparl_id
        )

        paper.legal_date = self.utils.parse_date(lib_object.get("date"))
        # At this point we don't have the agenda items yet. We'll fix up the
        # cases where there are consultations but no legal date later
        paper.display_date = paper.legal_date
        # If we don't have a good date, sort them behind those with a good date
        paper.sort_date = self.utils.date_to_datetime(paper.legal_date) or fallback_date

        return paper
Exemple #5
0
 def paper_related(self, libobject: JSON, paper: Paper) -> None:
     paper.files.set(
         self.retrieve_many(File, libobject.get("auxiliaryFile")))
     paper.organizations.set(
         self.retrieve_many(Organization,
                            libobject.get("underDirectionOf")))
     paper.persons.set(
         self.retrieve_many(Person, libobject.get("originatorPerson")))
Exemple #6
0
    def legislative_term(self, libobject: JSON,
                         term: LegislativeTerm) -> Optional[LegislativeTerm]:

        if not libobject.get("startDate") or not libobject.get("endDate"):
            logger.error("Term has no start or end date - skipping")
            return None

        term.start = self.utils.parse_date(libobject.get("startDate"))
        term.end = self.utils.parse_date(libobject.get("endDate"))

        return term
    def init_base(
        self, lib_object: JSON, base: E, name_fixup: Optional[str] = None
    ) -> E:
        """Sets common fields"""

        if not lib_object["id"]:
            raise RuntimeError("id is none: " + str(lib_object))
        base.oparl_id = lib_object["id"]
        base.deleted = bool(lib_object.get("deleted", False))
        if isinstance(base, ShortableNameFields):
            base.name = lib_object.get("name") or name_fixup
            base.set_short_name(lib_object.get("shortName") or base.name)
        return base
    def visit_object(self, response: JSON):
        if response.get("type") == "https://schema.oparl.org/1.0/File":
            if "accessUrl" in response:
                response["accessUrl"] = response["accessUrl"].replace(
                    r"files//rim", r"files/rim")
            if "downloadUrl" in response:
                response["downloadUrl"] = response["downloadUrl"].replace(
                    r"files//rim", r"files/rim")

        if response.get("type") == "https://schema.oparl.org/1.0/Body":
            # Check for a missing leading zero
            ags = response.get("ags")
            if ags and len(ags) == 7:
                # noinspection PyTypeChecker
                response["ags"] = "0" + ags
 def visit(self, data: JSON):
     """ Removes quirks like `"streetAddress": " "` in Location """
     for key, value in data.copy().items():
         if isinstance(value, dict):
             self.visit(value)
         elif isinstance(value, str):
             if value == "N/A" or not value.strip():
                 del data[key]
    def organization(self, libobject: JSON, organization: Organization) -> Organization:
        type_name = libobject.get("organizationType")

        # E.g. Leipzig sets organizationType: "Gremium" and classification: "Fraktion" for factions,
        # so we give priority to classification
        if libobject.get("classification") in self.utils.organization_classification:
            type_name = libobject["classification"]

        type_id = self.utils.organization_classification.get(type_name)
        if type_id:
            orgtype = OrganizationType.objects.get(id=type_id)
        else:
            orgtype, _ = OrganizationType.objects.get_or_create(
                name=libobject.get("organizationType")
            )
        organization.organization_type = orgtype
        if libobject.get("body"):
            # If we really have a case with an extra body then this should error because then we need some extra handling
            organization.body = Body.by_oparl_id(libobject["body"])
        else:
            organization.body = self.default_body
        organization.start = self.utils.parse_date(libobject.get("startDate"))
        organization.end = self.utils.parse_date(libobject.get("endDate"))

        organization.location = self.retrieve(Location, libobject.get("location"))

        if organization.name == organization.short_name and type_name:
            pattern = "[- ]?" + re.escape(type_name) + "[ ]?"
            organization.short_name = re.sub(
                pattern, "", organization.short_name, flags=re.I
            )

        return organization
Exemple #11
0
def externalize(libobject: JSON,
                key_callback: Optional[Set[str]] = None) -> List[CachedObject]:
    """Converts an oparl object with embedded objects to multiple flat json objects"""

    externalized = []

    # sorted copies, thereby avoiding modification while iterating
    for key in sorted(libobject.keys()):
        # Skip the geojson object
        if key == "geojson":
            continue

        entry = libobject[key]

        if isinstance(entry, dict):
            if "id" not in entry:
                logger.warning(
                    f"Embedded object '{key}' in {libobject['id']} does not have an id, skipping: {entry}"
                )
                del libobject[key]
                continue

            if isinstance(key_callback, set):
                key_callback.add(key)
            entry["mst:backref"] = libobject["id"]

            externalized += externalize(entry)
            libobject[key] = entry["id"]

        if isinstance(entry, list) and len(entry) > 0 and isinstance(
                entry[0], dict):
            if isinstance(key_callback, set):
                key_callback.add(key)
            for pos, entry in enumerate(entry):
                if "id" not in entry:
                    logger.warning(
                        f"Embedded object '{key}' in {libobject['id']} does not have an id, skipping: {entry}"
                    )
                    del libobject[key]
                    break

                entry["mst:backref"] = libobject["id"]
                entry[
                    "mst:backrefPosition"] = pos  # We need this for agenda items

                externalized += externalize(entry)
                libobject[key][pos] = entry["id"]

    externalized.append(
        CachedObject(
            url=libobject["id"],
            data=libobject,
            oparl_type=libobject["type"].split("/")[-1],
        ))

    return externalized
Exemple #12
0
    def location(self, libobject: JSON, location: Location) -> Location:
        location.description = libobject.get("description")
        location.is_official = self.utils.official_geojson
        location.geometry = libobject.get("geojson", {}).get("geometry")

        location.street_address = libobject.get("streetAddress")
        location.room = libobject.get("room")
        location.postal_code = libobject.get("postalCode")
        location.locality = libobject.get("locality")

        if not location.description:
            description = ""
            if location.room:
                description += location.room + ", "
            if location.street_address:
                description += location.street_address + ", "
            if location.locality:
                if location.postal_code:
                    description += location.postal_code + " "
                description += location.locality
            location.description = description

        # If a street_address is present, we try to find the exact location on the map
        if location.street_address and not location.geometry:
            search_str = location.street_address + ", "
            if location.locality:
                if location.postal_code:
                    search_str += location.postal_code + " " + location.locality
            elif self.default_body:
                search_str += self.default_body.short_name
            search_str += " " + settings.GEOEXTRACT_SEARCH_COUNTRY

            location.geometry = geocode(search_str)

        return location
Exemple #13
0
    def meeting(self, libobject: JSON, meeting: Meeting) -> Meeting:
        meeting.start = self.utils.parse_datetime(libobject.get("start"))
        meeting.end = self.utils.parse_datetime(libobject.get("end"))
        meeting.location = self.retrieve(Location, libobject.get("location"))
        meeting.invitation = self.retrieve(File, libobject.get("invitation"))
        meeting.verbatim_protocol = self.retrieve(
            File, libobject.get("verbatimProtocol"))
        meeting.results_protocol = self.retrieve(
            File, libobject.get("resultsProtocol"))
        meeting.cancelled = libobject.get("cancelled", False)

        return meeting
Exemple #14
0
    def get_ags(self, body: Body, system: JSON,
                userinput: str) -> Tuple[str, str]:
        """
        This function tries:
         1. The ags field in the oparl body
         2. Querying wikidata with
            a) the body's short name
            b) the user's input
            c) the body's full name
            d) the system's name
            e) locality in the location

        Returns the ags and the name that did match
        """
        ags = body.ags
        if ags:
            if len(ags) == 8 or len(ags) == 5:
                return ags, body.short_name
            else:
                logger.error("Ignoring ags '{}' with invalid legth {}".format(
                    ags, len(ags)))

        district = bool(
            re.match(settings.DISTRICT_REGEX, body.name, re.IGNORECASE))

        to_check = [
            ("body short name", body.short_name),
            ("user input", userinput),
            ("body name", body.name),
        ]

        if system.get("name"):
            short_system_name = self.utils.normalize_body_name(system["name"])
            to_check.append(("system name", short_system_name))

        if body.center and body.center.locality:
            locality = body.center.locality
            to_check.append(("body location locality", locality))

        for source, value in to_check:
            ags = city_to_ags(value, district)
            if ags:
                logger.debug("Found ags using the {}: '{}'".format(
                    source, value))
                return ags, value

        raise RuntimeError(
            "Could not determine the Amtliche Gemeindeschlüssel using {}".
            format(to_check))
    def consultation(self, libobject: JSON, consultation: Consultation) -> Consultation:
        consultation.authoritative = libobject.get("authoritative")
        consultation.role = libobject.get("role")

        paper_backref = libobject.get("paper") or libobject.get("mst:backref")
        consultation.paper = self.retrieve(Paper, paper_backref)
        consultation.meeting = self.retrieve(Meeting, libobject.get("meeting"))
        consultation.authoritative = libobject.get("authoritative")

        return consultation
 def visit(self, data: JSON):
     """Removes quirks like `"streetAddress": " "` in Location"""
     # `"auxiliaryFile": { ... }` -> `"auxiliaryFile": [{ ... }]`
     if "auxiliaryFile" in data and isinstance(data["auxiliaryFile"], dict):
         logger.warning(
             f"auxiliaryFile is supposed to be an array of objects, "
             f"but is an object (in {data.get('id')})")
         data["auxiliaryFile"] = [data["auxiliaryFile"]]
     for key, value in data.copy().items():
         if isinstance(value, dict):
             self.visit(value)
         if isinstance(value, list):
             for i in value:
                 if isinstance(i, dict):
                     self.visit(i)
         elif isinstance(value, str):
             if value == "N/A" or not value.strip():
                 del data[key]
    def membership(self, lib_object: JSON, membership: Membership) -> Membership:
        role = lib_object.get("role") or _("Unknown")

        membership.start = self.utils.parse_date(lib_object.get("startDate"))
        membership.end = self.utils.parse_date(lib_object.get("endDate"))
        membership.role = role
        person_backref = lib_object.get("person") or lib_object.get("mst:backref")
        membership.person = self.retrieve(Person, person_backref, membership.oparl_id)
        membership.organization = self.retrieve(
            Organization, lib_object.get("organization"), membership.oparl_id
        )

        return membership
def externalize(libobject: JSON,
                key_callback: Optional[Set[str]] = None) -> List[CachedObject]:
    """ Converts an oparl object with embedded objects to multiple flat json objeczs """

    externalized = []

    for key in libobject.keys():
        # Skip the geojson object
        if key == "geojson":
            continue

        entry = libobject[key]

        if isinstance(entry, dict):
            if isinstance(key_callback, set):
                key_callback.add(key)
            entry["mst:backref"] = libobject["id"]

            externalized += externalize(entry)
            libobject[key] = entry["id"]

        if isinstance(entry, list) and len(entry) > 0 and isinstance(
                entry[0], dict):
            if isinstance(key_callback, set):
                key_callback.add(key)
            for pos, entry in enumerate(entry):
                entry["mst:backref"] = libobject["id"]
                entry[
                    "mst:backrefPosition"] = pos  # We need this for agenda items

                externalized += externalize(entry)
                libobject[key][pos] = entry["id"]

    externalized.append(
        CachedObject(
            url=libobject["id"],
            data=libobject,
            oparl_type=libobject["type"].split("/")[-1],
        ))

    return externalized
Exemple #19
0
    def paper(self, libobject: JSON, paper: Paper) -> Paper:
        if libobject.get("paperType"):
            paper_type, created = PaperType.objects.get_or_create(
                defaults={"paper_type": libobject.get("paperType")})
            paper.paper_type = paper_type
            if created:
                logging.info("Created new paper type {} through {}".format(
                    paper_type, libobject["id"]))

        paper.legal_date = self.utils.parse_date(libobject.get("date"))
        paper.sort_date = (self.utils.date_to_datetime(paper.legal_date)
                           or self.utils.parse_datetime(
                               libobject.get("created")) or timezone.now())
        paper.reference_number = libobject.get("reference")
        paper.main_file = self.retrieve(File, libobject.get("mainFile"))

        return paper
 def body_related(self, lib_object: JSON, body: Body) -> None:
     body.legislative_terms.set(
         self.retrieve_many(
             LegislativeTerm, lib_object.get("legislativeTerm"), lib_object["id"]
         )
     )
Exemple #21
0
    def agenda_item(self, libobject: JSON, item: AgendaItem) -> AgendaItem:
        item.key = libobject.get("number") or "-"
        item.name = libobject.get("name")
        item.public = libobject.get("public")
        item.result = libobject.get("result")
        item.resolution_text = libobject.get("resolutionText")
        item.start = self.utils.parse_datetime(libobject.get("start"))
        item.end = self.utils.parse_datetime(libobject.get("end"))
        meeting_backref = libobject.get("meeting") or libobject.get(
            "mst:backref")
        item.meeting = self.retrieve(Meeting, meeting_backref)
        item.position = libobject.get("mst:backrefPosition")

        item.consultation = self.retrieve(Consultation,
                                          libobject.get("consultation"))
        item.resolution_file = self.retrieve(File,
                                             libobject.get("resolutionFile"))

        return item
 def agenda_item_related(self, lib_object: JSON, item: AgendaItem) -> None:
     item.auxiliary_file.set(
         self.retrieve_many(File, lib_object.get("auxiliaryFile"), lib_object["id"])
     )
    def agenda_item(self, lib_object: JSON, item: AgendaItem) -> AgendaItem:
        item.key = lib_object.get("number") or "-"
        if len(item.key) > 20:
            logger.warning(
                f"Overly long AgendaItem key, limiting to 20 character: {item.key}"
            )
            item.key = item.key[:20]
        item.name = lib_object.get("name")
        item.public = lib_object.get("public")
        item.result = lib_object.get("result")
        item.resolution_text = lib_object.get("resolutionText")
        item.start = self.utils.parse_datetime(lib_object.get("start"))
        item.end = self.utils.parse_datetime(lib_object.get("end"))
        meeting_backref = lib_object.get("meeting") or lib_object.get("mst:backref")
        item.meeting = self.retrieve(Meeting, meeting_backref, item.oparl_id)
        item.position = lib_object.get("mst:backrefPosition")

        item.consultation = self.retrieve(
            Consultation, lib_object.get("consultation"), item.oparl_id
        )
        item.resolution_file = self.retrieve(
            File, lib_object.get("resolutionFile"), item.oparl_id
        )

        return item
    def file(self, lib_object: JSON, file: File) -> File:
        cutoff = self.utils.filename_length_cutoff
        if lib_object.get("fileName"):
            filename = lib_object.get("fileName")
        elif lib_object.get("name"):
            extension = mimetypes.guess_extension("application/pdf") or ""
            length = cutoff - len(extension)
            filename = slugify(lib_object.get("name"))[:length] + extension
        else:
            access_url = lib_object["accessUrl"]
            filename = slugify(access_url.split("/")[-1])[-cutoff:]

        file.name = lib_object.get("name", "")
        if len(file.name) > 200:
            file.name = textwrap.wrap(file.name, 199)[0] + "\u2026"

        file.filename = filename
        file.mime_type = lib_object.get("mimeType") or "application/octet-stream"
        file.legal_date = self.utils.parse_date(lib_object.get("date"))
        file.sort_date = (
            self.utils.date_to_datetime(file.legal_date)
            or self.utils.parse_datetime(lib_object.get("created"))
            or timezone.now()
        )
        file.oparl_access_url = lib_object.get("accessUrl")
        file.oparl_download_url = lib_object.get("downloadUrl")
        file.filesize = None
        file.parsed_text = lib_object.get("text")
        file.license = lib_object.get("fileLicense")

        # We current do not handle locations attached to files due
        # to the lack of data and our own location extraction

        return file