Exemple #1
0
 def get_endpoint_from_cityname(self, userinput: str,
                                mirror: bool) -> Tuple[str, str]:
     matching = []  # type: List[Tuple[str, str, str]]
     for (name, original_id, mirror_id) in self.load_index():
         if userinput.casefold() in name.casefold():
             # The oparl mirror doesn't give us the system id we need
             response = requests_get(original_id)
             system_id = response.json()["system"]
             if mirror:
                 matching.append((name, system_id, mirror_id))
             else:
                 matching.append((name, system_id, original_id))
     if len(matching) == 0:
         raise RuntimeError(
             f"Could not find anything for '{userinput}'. "
             f"Please check that it is in the list under {settings.OPARL_INDEX} or provide the body id."
         )
     if len(matching) > 1:
         exact_matches = [
             i for i in matching if i[0].casefold() == userinput.casefold()
         ]
         if len(exact_matches) == 1:
             matching = exact_matches
         else:
             logger.warning(
                 f"Found those entries: {json.dumps(matching, indent=4)}")
             raise RuntimeError(
                 f"There are {len(matching)} matches and {len(exact_matches)} exact matches for '{userinput}' and "
                 "I can't decide which one to use. Please provide a body url yourself."
             )
     return matching[0][1:3]
 def get_endpoint_from_cityname(self, userinput: str,
                                mirror: bool) -> Tuple[str, str]:
     matching = []  # type: List[Tuple[str, str, str]]
     for (name, original_id, mirror_id) in self.load_index():
         if userinput.casefold() in name.casefold():
             # The oparl mirror doesn't give us the system id we need
             response = requests_get(original_id)
             system_id = response.json()["system"]
             if mirror:
                 matching.append((name, system_id, mirror_id))
             else:
                 matching.append((name, system_id, original_id))
     if len(matching) == 0:
         raise RuntimeError(
             "Could not find anything for '{}'".format(userinput))
     if len(matching) > 1:
         exact_matches = [
             i for i in matching if i[0].casefold() == userinput.casefold()
         ]
         if len(exact_matches) == 1:
             matching = exact_matches
         else:
             logger.warning("Found those entries: {}".format(
                 json.dumps(matching, indent=4)))
             raise RuntimeError((
                 "There are {} matches and {} exact matchs for '{}' and I can't decide which one to use. "
                 + "Please provide a url yourself.").format(
                     len(matching), len(exact_matches), userinput))
     return matching[0][1:3]
def file_serve_proxy(request: HttpRequest,
                     original_file_id: int) -> StreamingHttpResponse:
    """ Ensure that the file is not deleted in the database """
    get_object_or_404(File, id=original_file_id)
    """ Util to proxy back to the original RIS in case we don't want to download all the files """
    url = settings.PROXY_ONLY_TEMPLATE.format(original_file_id)

    response = requests_get(url, stream=True)
    return StreamingHttpResponse(response.iter_content(chunk_size=None),
                                 status=response.status_code)
 def load(self, url: str, query: Optional[Dict[str, str]] = None) -> JSON:
     logger.debug("Loader is loading {}".format(url))
     if query is None:
         query = dict()
     response = requests_get(url, params=query)
     data = response.json()
     if "id" in data and data["id"] != url:
         logger.warning(
             "Mismatch between url and id. url: {} id: {}".format(
                 url, data["id"]))
     return data
Exemple #5
0
 def load(self, url: str, query: Optional[Dict[str, str]] = None) -> JSON:
     logger.debug(f"Loader is loading {url}")
     if query is None:
         query = dict()
     response = requests_get(url, params=query)
     data = response.json()
     if data is None:  # json() can actually return None
         data = dict()
     if "id" in data and data["id"] != url:
         logger.warning(f"Mismatch between url and id. url: {url} id: {data['id']}")
     return data
Exemple #6
0
    def load(self, url: str, query: Optional[dict] = None) -> JSON:
        logger.debug(f"Loader is loading {url}")
        if query is None:
            query = dict()

        try:
            response = requests_get(url, params=query)
        except HTTPError as e:
            if e.response.status_code == 500:
                logger.error(f"Got an 500 for a CC e-gov request, retrying: {e}")
                response = requests_get(url, params=query)
            else:
                raise
        text = response.text
        try:
            data = json.loads(text)
        except JSONDecodeError:
            logger.error(
                f"The server returned invalid json. This is a bug in the OParl implementation: {url}"
            )
            # Hack with based on std json code to load broken json where the control characters (U+0000 through
            # U+001F except \n) weren't properly escaped
            ESCAPE = re.compile(r"[\x00-\x09\x0B-\x1f]")
            ESCAPE_DCT = {}
            for i in range(0x20):
                ESCAPE_DCT.setdefault(chr(i), "\\u{0:04x}".format(i))

            def replace(match):
                return ESCAPE_DCT[match.group(0)]

            text = ESCAPE.sub(replace, text)
            data = json.loads(text)

        if data is None:  # json() can actually return None
            data = dict()
        if "id" in data and data["id"] != url:
            logger.warning(f"Mismatch between url and id. url: {url} id: {data['id']}")

        self.visit(data)
        return data
Exemple #7
0
def get_loader_from_system(entrypoint: str) -> BaseLoader:
    response = requests_get(entrypoint)
    system = response.json()
    if system.get("contactName") == "STERNBERG Software GmbH & Co. KG":
        logger.info("Using Sternberg patches")
        return SternbergLoader(system)
    elif (system.get("vendor") == "http://cc-egov.de/"
          or system.get("vendor") == "https://www.cc-egov.de"):
        logger.info("Using CC e-gov patches")
        return CCEgovLoader(system)
    else:
        logger.info("Using no vendor specific patches")
        return BaseLoader(system)
    def load(self, url: str, query: Optional[Dict[str, str]] = None) -> JSON:
        if query:
            # Somacos doesn't like encoded urls
            url = (
                url + "?" +
                "&".join([key + "=" + value for key, value in query.items()]))
        logger.debug("Loader is loading {}".format(url))
        try:
            response = requests_get(url)
        except HTTPError as e:
            if e.response.status_code == 500:
                logger.error(
                    f"Got an 500 for a Somacos request, retrying: {e}")
                response = requests_get(url)
            else:
                raise

        data = response.json()
        if "id" in data and data["id"] != url:
            logger.warning(
                "Mismatch between url and id. url: {} id: {}".format(
                    url, data["id"]))
        return data
def get_loader_from_system(entrypoint: str) -> BaseLoader:
    response = requests_get(entrypoint)
    system = response.json()
    if system.get("contactName") == "STERNBERG Software GmbH & Co. KG":
        logger.info("Using Sternberg patches")
        return SternbergLoader(system)
    elif (system.get("vendor") == "http://cc-egov.de/"
          or system.get("vendor") == "https://www.cc-egov.de"):
        logger.info("Using CC e-gov patches")
        return CCEgovLoader(system)
    elif (system.get("vendor") == "http://www.somacos.de"
          or system.get("product") ==
          "Sitzungsmanagementsystem Session  Copyright SOMACOS GmbH & Co. KG"):
        logger.info("Using Somacos patches ")
        return SomacosLoader(system)
    else:
        logger.info("Using no vendor specific patches")
        return BaseLoader(system)
Exemple #10
0
    def load_index(self) -> List[Tuple[str, str, str]]:
        """ " Loads the list of known endpoints from the oparl mirror if it has not been loaded yet"""
        if self.index:
            return self.index
        next_page = settings.OPARL_INDEX
        while next_page:
            response = requests_get(next_page)
            data = response.json()
            next_page = data["links"].get("next")
            for body in data["data"]:
                if not "oparl-mirror:originalId" in body:
                    continue
                self.index.append((
                    body.get("name") or body["oparl-mirror:originalId"],
                    body["oparl-mirror:originalId"],
                    body["id"],
                ))

        return self.index
Exemple #11
0
 def get_endpoint_from_body_url(self, userinput: str) -> Tuple[str, str]:
     # We can't use the resolver here as we don't know the system url yet, which the resolver needs for determining
     # the cache folder
     logging.info(f"Using {userinput} as url")
     response = requests_get(userinput)
     data = response.json()
     if data.get("type") not in [
             "https://schema.oparl.org/1.0/Body",
             "https://schema.oparl.org/1.1/Body",
     ]:
         raise RuntimeError(
             "The url you provided didn't point to an oparl body")
     endpoint_system = data["system"]
     endpoint_id = data["id"]
     if userinput != endpoint_id:
         logger.warning(
             f"The body's url '{userinput}' doesn't match the body's id '{endpoint_id}'"
         )
     return endpoint_system, endpoint_id
Exemple #12
0
def get_loader_from_body(body_id: str) -> BaseLoader:
    """
    Assumptions:
     * The body->system link hasn't changed
     * The system might have, e.g. to a newer version where we don't workarounds anymore
    """
    cached_body = CachedObject.objects.filter(url=body_id).first()
    if cached_body:
        logger.info(f"The body {body_id} is cached")
        system_id = cached_body.data["system"]
    else:
        logger.info(f"Fetching the body {body_id}")
        response = requests_get(body_id)
        data = response.json()
        CachedObject.objects.create(
            url=data["id"], oparl_type=data["type"], data=data, to_import=False
        )
        system_id = data["system"]

    return get_loader_from_system(system_id)
 def get_with_retry_on_500(self, url: str) -> Response:
     """Custom retry logic with logging and backoff"""
     current_try = 1
     while True:
         try:
             return requests_get(url)
         except HTTPError as e:
             if e.response.status_code == 500:
                 if current_try == self.max_retries:
                     logger.error(
                         f"Request failed {self.max_retries} times with an Error 500, aborting: {e}"
                     )
                     raise
                 else:
                     logger.error(
                         f"Got an 500 for a Somacos request, retrying after sleeping {self.error_sleep_seconds}s: {e}"
                     )
                     time.sleep(self.error_sleep_seconds)
                     current_try += 1
                     continue
             else:
                 raise
 def load_file(self, url: str) -> Tuple[bytes, Optional[str]]:
     """ Returns the content and the content type """
     response = requests_get(url)
     content = response.content
     content_type = response.headers.get("Content-Type")
     return content, content_type