def fetch_senateurs() -> bytes: http_session = get_http_session() resp = http_session.get(URL) if resp.status_code != HTTPStatus.OK: # 200 raise RuntimeError("Failed to download senateurs CSV file") content: bytes = resp.content return content
def _retrieve_content( url: str, force_list: Optional[Tuple[str]] = None) -> Dict[str, OrderedDict]: logger.info("Récupération de %r", url) http_session = get_http_session() try: resp = http_session.get(url) except RequestsConnectionError: raise NotFound(url) if resp.status_code == HTTPStatus.NOT_FOUND: raise NotFound(url) # Due to a configuration change on the AN web server, we now get a 500 error # for abandoned or non-existing amendements, so we'll consider this a 404 too :( if resp.status_code == HTTPStatus.INTERNAL_SERVER_ERROR: raise NotFound(url) # Sometimes the URL returns a 200 but the content is empty which leads to # a parsing error from xmltodict if not handled manually before. if not resp.content: raise NotFound(url) # Other errors if resp.status_code >= 400: raise FetchError(url, resp) result: OrderedDict = xmltodict.parse(resp.content, force_list=force_list) return result
def download_textes_recents() -> str: http_session = get_http_session() resp = http_session.get(TEXTES_RECENTS_URL) if resp.status_code != HTTPStatus.OK: raise RuntimeError("Failed to download textes recents from senat.fr") content: str = resp.text return content
def download_rss(url: str) -> str: http_session = get_http_session() resp = http_session.get(url) if resp.status_code != HTTPStatus.OK: raise RuntimeError(f"Failed to download RSS url: {url}") content: str = resp.text return content
def _fetch_discussion_details( lecture: Lecture) -> Iterator[Tuple[Any, MissionRef]]: """ Récupère les amendements à discuter, dans l'ordre de passage NB : les amendements jugés irrecevables ne sont pas inclus. """ http_session = get_http_session() for url, mission_ref in derouleur_urls_and_mission_refs(lecture): resp = http_session.get(url) if resp.status_code == HTTPStatus.NOT_FOUND: # 404 logger.warning(f"Could not fetch {url}") continue if resp.text == "": logger.warning(f"Empty response for {url}") continue yield resp.json(), mission_ref
def extract_from_remote_zip( url: str) -> Generator[Tuple[str, IO[str]], None, None]: http_session = get_http_session() response = http_session.get(url) if response.status_code not in (HTTPStatus.OK, HTTPStatus.NOT_MODIFIED): message = f"Unexpected status code {response.status_code} while fetching {url}" logger.error(message) raise RuntimeError(message) content_type = response.headers["content-type"] if content_type != "application/zip": message = ( f"Unexpected content type {content_type} while fetching {url} " "(expected application/zip)") logger.error(message) raise RuntimeError(message) yield from extract_from_zip(BytesIO(response.content))
def _fetch_all(lecture: Lecture, dry_run: bool = False) -> List[OrderedDict]: """ Récupère tous les amendements, dans l'ordre de dépôt """ http_session = get_http_session() url = _build_amendements_url(lecture) resp = http_session.get(url) if resp.status_code == HTTPStatus.NOT_FOUND: raise NotFound(url) if dry_run: return [] text = resp.content.decode("cp1252") lines = [_filter_line(line) for line in text.splitlines()[1:]] reader = csv.DictReader(lines, delimiter="\t") items = list(reader) return items