コード例 #1
0
    def body(self, lib_object: JSON, body: Body) -> Body:
        body.short_name = self.utils.normalize_body_name(body.short_name)

        body.ags = lib_object.get("ags")
        if body.ags:
            body.ags = body.ags.replace(" ", "")
        if len(body.ags or "") > 8:
            # Special case for https://ris.krefeld.de/webservice/oparl/v1/body/1
            if body.ags[8:] == "0" * len(body.ags[8:]):
                body.ags = body.ags[:8]
            else:
                raise RuntimeError(
                    "The Amtliche Gemeindeschlüssel of {} is longer than 8 characters: '{}'".format(
                        body, body.ags
                    )
                )

        # We don't really need the location because we have our own outline
        # importing logic and don't need the city, but we import it for comprehensiveness
        location = self.retrieve(Location, lib_object.get("location"), body.oparl_id)
        if location and location.geometry:
            if location.geometry["type"] == "Point":
                body.center = location
                body.outline = None
            elif location.geometry["type"] == "Polygon":
                logger.warning("Overriding outline of Body with api version")
                body.center = None
                body.outline = location
            else:
                logger.warning(
                    "Location object is of type {}, which is neither 'Point' nor 'Polygon'."
                    "Skipping this location.".format(location.geometry["type"])
                )

        return body
コード例 #2
0
    def person(self, lib_object: JSON, person: Person) -> Person:
        name = lib_object.get("name")
        given_name = lib_object.get("givenName")
        family_name = lib_object.get("familyName")

        if not name:
            if given_name and family_name:
                name = given_name + " " + family_name
            else:
                logger.warning("Person without name: {}".format(lib_object["id"]))
                name = _("Unknown")

        if not given_name and not family_name and " " in name:
            given_name = name.split(" ")[-2]
            family_name = name.split(" ")[-1]
            logger.warning("Inferring given and family name from compound name")

        if not given_name:
            logger.warning("Person without given name: {}".format(lib_object["id"]))
            given_name = _("Unknown")

        if not family_name:
            logger.warning("Person without family name: {}".format(lib_object["id"]))
            family_name = _("Unknown")

        person.name = name
        person.given_name = given_name
        person.family_name = family_name
        person.location = self.retrieve(
            Location, lib_object.get("location"), person.oparl_id
        )

        return person
コード例 #3
0
 def meeting_related(self, libobject: JSON, meeting: Meeting) -> None:
     meeting.auxiliary_files.set(
         self.retrieve_many(File, libobject.get("auxiliaryFile")))
     meeting.persons.set(
         self.retrieve_many(Person, libobject.get("participant")))
     meeting.organizations.set(
         self.retrieve_many(Organization, libobject.get("organization")))
コード例 #4
0
    def paper(self, lib_object: JSON, paper: Paper) -> Paper:
        if lib_object.get("paperType"):
            paper_type, created = PaperType.objects.get_or_create(
                paper_type=lib_object.get("paperType")
            )
            paper.paper_type = paper_type
            if created:
                logging.info(
                    "Created new paper type {} through {}".format(
                        paper_type, lib_object["id"]
                    )
                )

        paper.reference_number = lib_object.get("reference")
        paper.main_file = self.retrieve(
            File, lib_object.get("mainFile"), paper.oparl_id
        )

        paper.legal_date = self.utils.parse_date(lib_object.get("date"))
        # At this point we don't have the agenda items yet. We'll fix up the
        # cases where there are consultations but no legal date later
        paper.display_date = paper.legal_date
        # If we don't have a good date, sort them behind those with a good date
        paper.sort_date = self.utils.date_to_datetime(paper.legal_date) or fallback_date

        return paper
コード例 #5
0
 def paper_related(self, libobject: JSON, paper: Paper) -> None:
     paper.files.set(
         self.retrieve_many(File, libobject.get("auxiliaryFile")))
     paper.organizations.set(
         self.retrieve_many(Organization,
                            libobject.get("underDirectionOf")))
     paper.persons.set(
         self.retrieve_many(Person, libobject.get("originatorPerson")))
コード例 #6
0
    def legislative_term(self, libobject: JSON,
                         term: LegislativeTerm) -> Optional[LegislativeTerm]:

        if not libobject.get("startDate") or not libobject.get("endDate"):
            logger.error("Term has no start or end date - skipping")
            return None

        term.start = self.utils.parse_date(libobject.get("startDate"))
        term.end = self.utils.parse_date(libobject.get("endDate"))

        return term
コード例 #7
0
    def init_base(
        self, lib_object: JSON, base: E, name_fixup: Optional[str] = None
    ) -> E:
        """Sets common fields"""

        if not lib_object["id"]:
            raise RuntimeError("id is none: " + str(lib_object))
        base.oparl_id = lib_object["id"]
        base.deleted = bool(lib_object.get("deleted", False))
        if isinstance(base, ShortableNameFields):
            base.name = lib_object.get("name") or name_fixup
            base.set_short_name(lib_object.get("shortName") or base.name)
        return base
コード例 #8
0
    def visit_object(self, response: JSON):
        if response.get("type") == "https://schema.oparl.org/1.0/File":
            if "accessUrl" in response:
                response["accessUrl"] = response["accessUrl"].replace(
                    r"files//rim", r"files/rim")
            if "downloadUrl" in response:
                response["downloadUrl"] = response["downloadUrl"].replace(
                    r"files//rim", r"files/rim")

        if response.get("type") == "https://schema.oparl.org/1.0/Body":
            # Check for a missing leading zero
            ags = response.get("ags")
            if ags and len(ags) == 7:
                # noinspection PyTypeChecker
                response["ags"] = "0" + ags
コード例 #9
0
 def visit(self, data: JSON):
     """ Removes quirks like `"streetAddress": " "` in Location """
     for key, value in data.copy().items():
         if isinstance(value, dict):
             self.visit(value)
         elif isinstance(value, str):
             if value == "N/A" or not value.strip():
                 del data[key]
コード例 #10
0
    def organization(self, libobject: JSON, organization: Organization) -> Organization:
        type_name = libobject.get("organizationType")

        # E.g. Leipzig sets organizationType: "Gremium" and classification: "Fraktion" for factions,
        # so we give priority to classification
        if libobject.get("classification") in self.utils.organization_classification:
            type_name = libobject["classification"]

        type_id = self.utils.organization_classification.get(type_name)
        if type_id:
            orgtype = OrganizationType.objects.get(id=type_id)
        else:
            orgtype, _ = OrganizationType.objects.get_or_create(
                name=libobject.get("organizationType")
            )
        organization.organization_type = orgtype
        if libobject.get("body"):
            # If we really have a case with an extra body then this should error because then we need some extra handling
            organization.body = Body.by_oparl_id(libobject["body"])
        else:
            organization.body = self.default_body
        organization.start = self.utils.parse_date(libobject.get("startDate"))
        organization.end = self.utils.parse_date(libobject.get("endDate"))

        organization.location = self.retrieve(Location, libobject.get("location"))

        if organization.name == organization.short_name and type_name:
            pattern = "[- ]?" + re.escape(type_name) + "[ ]?"
            organization.short_name = re.sub(
                pattern, "", organization.short_name, flags=re.I
            )

        return organization
コード例 #11
0
def externalize(libobject: JSON,
                key_callback: Optional[Set[str]] = None) -> List[CachedObject]:
    """Converts an oparl object with embedded objects to multiple flat json objects"""

    externalized = []

    # sorted copies, thereby avoiding modification while iterating
    for key in sorted(libobject.keys()):
        # Skip the geojson object
        if key == "geojson":
            continue

        entry = libobject[key]

        if isinstance(entry, dict):
            if "id" not in entry:
                logger.warning(
                    f"Embedded object '{key}' in {libobject['id']} does not have an id, skipping: {entry}"
                )
                del libobject[key]
                continue

            if isinstance(key_callback, set):
                key_callback.add(key)
            entry["mst:backref"] = libobject["id"]

            externalized += externalize(entry)
            libobject[key] = entry["id"]

        if isinstance(entry, list) and len(entry) > 0 and isinstance(
                entry[0], dict):
            if isinstance(key_callback, set):
                key_callback.add(key)
            for pos, entry in enumerate(entry):
                if "id" not in entry:
                    logger.warning(
                        f"Embedded object '{key}' in {libobject['id']} does not have an id, skipping: {entry}"
                    )
                    del libobject[key]
                    break

                entry["mst:backref"] = libobject["id"]
                entry[
                    "mst:backrefPosition"] = pos  # We need this for agenda items

                externalized += externalize(entry)
                libobject[key][pos] = entry["id"]

    externalized.append(
        CachedObject(
            url=libobject["id"],
            data=libobject,
            oparl_type=libobject["type"].split("/")[-1],
        ))

    return externalized
コード例 #12
0
    def location(self, libobject: JSON, location: Location) -> Location:
        location.description = libobject.get("description")
        location.is_official = self.utils.official_geojson
        location.geometry = libobject.get("geojson", {}).get("geometry")

        location.street_address = libobject.get("streetAddress")
        location.room = libobject.get("room")
        location.postal_code = libobject.get("postalCode")
        location.locality = libobject.get("locality")

        if not location.description:
            description = ""
            if location.room:
                description += location.room + ", "
            if location.street_address:
                description += location.street_address + ", "
            if location.locality:
                if location.postal_code:
                    description += location.postal_code + " "
                description += location.locality
            location.description = description

        # If a street_address is present, we try to find the exact location on the map
        if location.street_address and not location.geometry:
            search_str = location.street_address + ", "
            if location.locality:
                if location.postal_code:
                    search_str += location.postal_code + " " + location.locality
            elif self.default_body:
                search_str += self.default_body.short_name
            search_str += " " + settings.GEOEXTRACT_SEARCH_COUNTRY

            location.geometry = geocode(search_str)

        return location
コード例 #13
0
    def meeting(self, libobject: JSON, meeting: Meeting) -> Meeting:
        meeting.start = self.utils.parse_datetime(libobject.get("start"))
        meeting.end = self.utils.parse_datetime(libobject.get("end"))
        meeting.location = self.retrieve(Location, libobject.get("location"))
        meeting.invitation = self.retrieve(File, libobject.get("invitation"))
        meeting.verbatim_protocol = self.retrieve(
            File, libobject.get("verbatimProtocol"))
        meeting.results_protocol = self.retrieve(
            File, libobject.get("resultsProtocol"))
        meeting.cancelled = libobject.get("cancelled", False)

        return meeting
コード例 #14
0
ファイル: cli.py プロジェクト: cyroxx/meine-stadt-transparent
    def get_ags(self, body: Body, system: JSON,
                userinput: str) -> Tuple[str, str]:
        """
        This function tries:
         1. The ags field in the oparl body
         2. Querying wikidata with
            a) the body's short name
            b) the user's input
            c) the body's full name
            d) the system's name
            e) locality in the location

        Returns the ags and the name that did match
        """
        ags = body.ags
        if ags:
            if len(ags) == 8 or len(ags) == 5:
                return ags, body.short_name
            else:
                logger.error("Ignoring ags '{}' with invalid legth {}".format(
                    ags, len(ags)))

        district = bool(
            re.match(settings.DISTRICT_REGEX, body.name, re.IGNORECASE))

        to_check = [
            ("body short name", body.short_name),
            ("user input", userinput),
            ("body name", body.name),
        ]

        if system.get("name"):
            short_system_name = self.utils.normalize_body_name(system["name"])
            to_check.append(("system name", short_system_name))

        if body.center and body.center.locality:
            locality = body.center.locality
            to_check.append(("body location locality", locality))

        for source, value in to_check:
            ags = city_to_ags(value, district)
            if ags:
                logger.debug("Found ags using the {}: '{}'".format(
                    source, value))
                return ags, value

        raise RuntimeError(
            "Could not determine the Amtliche Gemeindeschlüssel using {}".
            format(to_check))
コード例 #15
0
    def consultation(self, libobject: JSON, consultation: Consultation) -> Consultation:
        consultation.authoritative = libobject.get("authoritative")
        consultation.role = libobject.get("role")

        paper_backref = libobject.get("paper") or libobject.get("mst:backref")
        consultation.paper = self.retrieve(Paper, paper_backref)
        consultation.meeting = self.retrieve(Meeting, libobject.get("meeting"))
        consultation.authoritative = libobject.get("authoritative")

        return consultation
コード例 #16
0
 def visit(self, data: JSON):
     """Removes quirks like `"streetAddress": " "` in Location"""
     # `"auxiliaryFile": { ... }` -> `"auxiliaryFile": [{ ... }]`
     if "auxiliaryFile" in data and isinstance(data["auxiliaryFile"], dict):
         logger.warning(
             f"auxiliaryFile is supposed to be an array of objects, "
             f"but is an object (in {data.get('id')})")
         data["auxiliaryFile"] = [data["auxiliaryFile"]]
     for key, value in data.copy().items():
         if isinstance(value, dict):
             self.visit(value)
         if isinstance(value, list):
             for i in value:
                 if isinstance(i, dict):
                     self.visit(i)
         elif isinstance(value, str):
             if value == "N/A" or not value.strip():
                 del data[key]
コード例 #17
0
    def membership(self, lib_object: JSON, membership: Membership) -> Membership:
        role = lib_object.get("role") or _("Unknown")

        membership.start = self.utils.parse_date(lib_object.get("startDate"))
        membership.end = self.utils.parse_date(lib_object.get("endDate"))
        membership.role = role
        person_backref = lib_object.get("person") or lib_object.get("mst:backref")
        membership.person = self.retrieve(Person, person_backref, membership.oparl_id)
        membership.organization = self.retrieve(
            Organization, lib_object.get("organization"), membership.oparl_id
        )

        return membership
コード例 #18
0
def externalize(libobject: JSON,
                key_callback: Optional[Set[str]] = None) -> List[CachedObject]:
    """ Converts an oparl object with embedded objects to multiple flat json objeczs """

    externalized = []

    for key in libobject.keys():
        # Skip the geojson object
        if key == "geojson":
            continue

        entry = libobject[key]

        if isinstance(entry, dict):
            if isinstance(key_callback, set):
                key_callback.add(key)
            entry["mst:backref"] = libobject["id"]

            externalized += externalize(entry)
            libobject[key] = entry["id"]

        if isinstance(entry, list) and len(entry) > 0 and isinstance(
                entry[0], dict):
            if isinstance(key_callback, set):
                key_callback.add(key)
            for pos, entry in enumerate(entry):
                entry["mst:backref"] = libobject["id"]
                entry[
                    "mst:backrefPosition"] = pos  # We need this for agenda items

                externalized += externalize(entry)
                libobject[key][pos] = entry["id"]

    externalized.append(
        CachedObject(
            url=libobject["id"],
            data=libobject,
            oparl_type=libobject["type"].split("/")[-1],
        ))

    return externalized
コード例 #19
0
    def paper(self, libobject: JSON, paper: Paper) -> Paper:
        if libobject.get("paperType"):
            paper_type, created = PaperType.objects.get_or_create(
                defaults={"paper_type": libobject.get("paperType")})
            paper.paper_type = paper_type
            if created:
                logging.info("Created new paper type {} through {}".format(
                    paper_type, libobject["id"]))

        paper.legal_date = self.utils.parse_date(libobject.get("date"))
        paper.sort_date = (self.utils.date_to_datetime(paper.legal_date)
                           or self.utils.parse_datetime(
                               libobject.get("created")) or timezone.now())
        paper.reference_number = libobject.get("reference")
        paper.main_file = self.retrieve(File, libobject.get("mainFile"))

        return paper
コード例 #20
0
 def body_related(self, lib_object: JSON, body: Body) -> None:
     body.legislative_terms.set(
         self.retrieve_many(
             LegislativeTerm, lib_object.get("legislativeTerm"), lib_object["id"]
         )
     )
コード例 #21
0
    def agenda_item(self, libobject: JSON, item: AgendaItem) -> AgendaItem:
        item.key = libobject.get("number") or "-"
        item.name = libobject.get("name")
        item.public = libobject.get("public")
        item.result = libobject.get("result")
        item.resolution_text = libobject.get("resolutionText")
        item.start = self.utils.parse_datetime(libobject.get("start"))
        item.end = self.utils.parse_datetime(libobject.get("end"))
        meeting_backref = libobject.get("meeting") or libobject.get(
            "mst:backref")
        item.meeting = self.retrieve(Meeting, meeting_backref)
        item.position = libobject.get("mst:backrefPosition")

        item.consultation = self.retrieve(Consultation,
                                          libobject.get("consultation"))
        item.resolution_file = self.retrieve(File,
                                             libobject.get("resolutionFile"))

        return item
コード例 #22
0
 def agenda_item_related(self, lib_object: JSON, item: AgendaItem) -> None:
     item.auxiliary_file.set(
         self.retrieve_many(File, lib_object.get("auxiliaryFile"), lib_object["id"])
     )
コード例 #23
0
    def agenda_item(self, lib_object: JSON, item: AgendaItem) -> AgendaItem:
        item.key = lib_object.get("number") or "-"
        if len(item.key) > 20:
            logger.warning(
                f"Overly long AgendaItem key, limiting to 20 character: {item.key}"
            )
            item.key = item.key[:20]
        item.name = lib_object.get("name")
        item.public = lib_object.get("public")
        item.result = lib_object.get("result")
        item.resolution_text = lib_object.get("resolutionText")
        item.start = self.utils.parse_datetime(lib_object.get("start"))
        item.end = self.utils.parse_datetime(lib_object.get("end"))
        meeting_backref = lib_object.get("meeting") or lib_object.get("mst:backref")
        item.meeting = self.retrieve(Meeting, meeting_backref, item.oparl_id)
        item.position = lib_object.get("mst:backrefPosition")

        item.consultation = self.retrieve(
            Consultation, lib_object.get("consultation"), item.oparl_id
        )
        item.resolution_file = self.retrieve(
            File, lib_object.get("resolutionFile"), item.oparl_id
        )

        return item
コード例 #24
0
    def file(self, lib_object: JSON, file: File) -> File:
        cutoff = self.utils.filename_length_cutoff
        if lib_object.get("fileName"):
            filename = lib_object.get("fileName")
        elif lib_object.get("name"):
            extension = mimetypes.guess_extension("application/pdf") or ""
            length = cutoff - len(extension)
            filename = slugify(lib_object.get("name"))[:length] + extension
        else:
            access_url = lib_object["accessUrl"]
            filename = slugify(access_url.split("/")[-1])[-cutoff:]

        file.name = lib_object.get("name", "")
        if len(file.name) > 200:
            file.name = textwrap.wrap(file.name, 199)[0] + "\u2026"

        file.filename = filename
        file.mime_type = lib_object.get("mimeType") or "application/octet-stream"
        file.legal_date = self.utils.parse_date(lib_object.get("date"))
        file.sort_date = (
            self.utils.date_to_datetime(file.legal_date)
            or self.utils.parse_datetime(lib_object.get("created"))
            or timezone.now()
        )
        file.oparl_access_url = lib_object.get("accessUrl")
        file.oparl_download_url = lib_object.get("downloadUrl")
        file.filesize = None
        file.parsed_text = lib_object.get("text")
        file.license = lib_object.get("fileLicense")

        # We current do not handle locations attached to files due
        # to the lack of data and our own location extraction

        return file