Exemple #1
0
 def test_normalize_body_name(self):
     utils = Utils()
     self.assertEqual("Bedburg", utils.normalize_body_name("Stadt  Bedburg"))
     self.assertEqual("Leipzig", utils.normalize_body_name("Leipzig"))
     self.assertEqual(
         "Bad Münstereifel", utils.normalize_body_name("Stadt Bad  Münstereifel ")
     )
Exemple #2
0
    def __init__(
        self,
        loader: BaseLoader,
        utils: Optional[Utils] = None,
        default_body: Optional[Body] = None,
    ):
        self.loader = loader
        self.utils = utils or Utils()
        self.default_body = default_body
        self.warn_missing = True  # Some tests set this to False

        self.ensure_organization_type()
    def setUpClass(cls):
        super().setUpClass()
        cls.api_data = {}
        cls.loader = MockLoader()
        cls.loader.api_data = cls.api_data
        for file in os.listdir(cls.dummy_data):
            if not file.endswith(".json"):
                continue

            with open(os.path.join(cls.dummy_data, file)) as fp:
                data = json.load(fp)
                cls.api_data[data["id"]] = data
                for entry in externalize(data):
                    if entry.data["id"] not in cls.api_data:
                        cls.api_data[entry.data["id"]] = entry.data

        # Used by test_location_default_body
        body = Body()
        body.short_name = "München"
        cls.converter = JsonToDb(cls.loader, default_body=body)
        cls.converter.warn_missing = False
        cls.utils = Utils()
Exemple #4
0
 def __init__(self):
     self.index = []  # type: List[Tuple[str, str, str]]
     self.utils = Utils()
Exemple #5
0
class Cli:
    """Tries to import all required data (oparl system id, ags and city name for osm) from a user input
    that might be the name of a city, the url of a body or the url of a system.

    It uses the information from wikidata, open street map and the oparl mirror.
    """
    def __init__(self):
        self.index = []  # type: List[Tuple[str, str, str]]
        self.utils = Utils()

    def load_index(self) -> List[Tuple[str, str, str]]:
        """ " Loads the list of known endpoints from the oparl mirror if it has not been loaded yet"""
        if self.index:
            return self.index
        next_page = settings.OPARL_INDEX
        while next_page:
            response = requests_get(next_page)
            data = response.json()
            next_page = data["links"].get("next")
            for body in data["data"]:
                if not "oparl-mirror:originalId" in body:
                    continue
                self.index.append((
                    body.get("name") or body["oparl-mirror:originalId"],
                    body["oparl-mirror:originalId"],
                    body["id"],
                ))

        return self.index

    def from_userinput(
        self,
        userinput: str,
        mirror: bool,
        ags: Optional[str],
        skip_body_extra: bool = False,
        skip_files: bool = False,
    ) -> None:
        body_id, entrypoint = self.get_entrypoint_and_body(userinput, mirror)
        importer = Importer(get_loader_from_system(entrypoint))
        body_data, dotenv = self.import_body_and_metadata(
            body_id, importer, userinput, ags, skip_body_extra)

        logger.info("Loading the bulk data from the oparl api")
        importer.fetch_lists_initial([body_data])

        # Also avoid "MySQL server has gone away" errors due to timeouts
        # https://stackoverflow.com/a/32720475/3549270
        db.close_old_connections()

        logger.info("Loading the data into the database")
        importer.import_objects()

        if not skip_files:
            logger.info("Loading the files")
            importer.load_files(fallback_city=userinput)

        if dotenv:
            logger.info(
                f"Done! Please add the following line to your dotenv file: \n\n{dotenv}\n"
            )

    def import_body_and_metadata(
        self,
        body_id: str,
        importer: Importer,
        userinput: str,
        ags: Optional[str],
        skip_body_extra: bool = False,
    ) -> Tuple[JSON, str]:
        logger.info(f"Fetching the body {body_id}")
        [body_data] = importer.load_bodies(body_id)
        logger.info("Importing the body")
        [body] = importer.import_bodies()
        importer.converter.default_body = body
        logger.info("Looking up the Amtliche Gemeindeschlüssel")
        if ags:
            if len(ags) != 5 and len(ags) != 8:
                logger.warning(
                    "Your Amtlicher Gemeindeschlüssel has {} digits instead of 5 or 8"
                    .format(len(ags)))
            body.ags = ags
        else:
            ags, match_name = self.get_ags(body, importer.loader.system,
                                           userinput)
            body.ags = ags
            # Sometimes there's a bad short name (e.g. "Rat" for Erkelenz),
            # so we use the name that's in wikidata instead
            body.short_name = match_name
        body.save()
        logger.info("Using {} as Amtliche Gemeindeschlüssel for '{}'".format(
            body.ags, body.short_name))
        dotenv = ""
        if body.id != settings.SITE_DEFAULT_BODY:
            dotenv += f"SITE_DEFAULT_BODY={body.id}\n"
        if dotenv:
            logger.info(
                "Found the oparl endpoint. Please add the following line to your dotenv file "
                "(you'll be reminded again after the import finished): \n\n" +
                dotenv)

        if not skip_body_extra:
            logger.info("Importing the shape of the city")
            import_outline(body)
            logger.info("Importing the streets")
            import_streets(body)
            logger.info(
                f"Body {body.short_name} import with geo data successful.")
        else:
            logger.info(
                f"Body {body.short_name} import successful. "
                f"Don't forget to run import_streets, import_amenities and import_outline"
            )
        return body_data.data, dotenv

    def get_entrypoint_and_body(self,
                                userinput: str,
                                mirror: bool = False) -> Tuple[str, str]:
        try:
            URLValidator()(userinput)
            is_url = True
        except ValidationError:
            is_url = False
        if not is_url:
            entrypoint, body_id = self.get_endpoint_from_cityname(
                userinput, mirror)
        else:
            entrypoint, body_id = self.get_endpoint_from_body_url(userinput)

        logger.info(
            f"Your body id is {body_id} and your system id is {entrypoint}")

        return body_id, entrypoint

    def get_endpoint_from_body_url(self, userinput: str) -> Tuple[str, str]:
        # We can't use the resolver here as we don't know the system url yet, which the resolver needs for determining
        # the cache folder
        logging.info(f"Using {userinput} as url")
        response = requests_get(userinput)
        data = response.json()
        if data.get("type") not in [
                "https://schema.oparl.org/1.0/Body",
                "https://schema.oparl.org/1.1/Body",
        ]:
            raise RuntimeError(
                "The url you provided didn't point to an oparl body")
        endpoint_system = data["system"]
        endpoint_id = data["id"]
        if userinput != endpoint_id:
            logger.warning(
                f"The body's url '{userinput}' doesn't match the body's id '{endpoint_id}'"
            )
        return endpoint_system, endpoint_id

    def get_ags(self, body: Body, system: JSON,
                userinput: str) -> Tuple[str, str]:
        """
        This function tries:
         1. The ags field in the oparl body
         2. Querying wikidata with
            a) the body's short name
            b) the user's input
            c) the body's full name
            d) the system's name
            e) locality in the location

        Returns the ags and the name that did match
        """
        ags = body.ags
        if ags:
            if len(ags) == 8 or len(ags) == 5:
                return ags, body.short_name
            else:
                logger.error(
                    f"Ignoring ags '{ags}' with invalid length {len(ags)}")

        district = bool(
            re.match(settings.DISTRICT_REGEX, body.name, re.IGNORECASE))

        to_check = [
            ("body short name", body.short_name),
            ("user input", userinput),
            ("body name", body.name),
        ]

        if system.get("name"):
            short_system_name = self.utils.normalize_body_name(system["name"])
            to_check.append(("system name", short_system_name))

        if body.center and body.center.locality:
            locality = body.center.locality
            to_check.append(("body location locality", locality))

        for source, value in to_check:
            ags = city_to_ags(value, district)
            if ags:
                logger.debug(f"Found ags using the {source}: '{value}'")
                return ags, value

        raise RuntimeError(
            f"Could not determine the Amtliche Gemeindeschlüssel using {to_check}.\n"
            f"Set it manually using `--ags`")

    def get_endpoint_from_cityname(self, userinput: str,
                                   mirror: bool) -> Tuple[str, str]:
        matching = []  # type: List[Tuple[str, str, str]]
        for (name, original_id, mirror_id) in self.load_index():
            if userinput.casefold() in name.casefold():
                # The oparl mirror doesn't give us the system id we need
                response = requests_get(original_id)
                system_id = response.json()["system"]
                if mirror:
                    matching.append((name, system_id, mirror_id))
                else:
                    matching.append((name, system_id, original_id))
        if len(matching) == 0:
            raise RuntimeError(
                f"Could not find anything for '{userinput}'. "
                f"Please check that it is in the list under {settings.OPARL_INDEX} or provide the body id."
            )
        if len(matching) > 1:
            exact_matches = [
                i for i in matching if i[0].casefold() == userinput.casefold()
            ]
            if len(exact_matches) == 1:
                matching = exact_matches
            else:
                logger.warning(
                    f"Found those entries: {json.dumps(matching, indent=4)}")
                raise RuntimeError(
                    f"There are {len(matching)} matches and {len(exact_matches)} exact matches for '{userinput}' and "
                    "I can't decide which one to use. Please provide a body url yourself."
                )
        return matching[0][1:3]