Beispiel #1
0
def fetch_senateurs() -> bytes:
    http_session = get_http_session()
    resp = http_session.get(URL)
    if resp.status_code != HTTPStatus.OK:  # 200
        raise RuntimeError("Failed to download senateurs CSV file")
    content: bytes = resp.content
    return content
Beispiel #2
0
def _retrieve_content(
        url: str,
        force_list: Optional[Tuple[str]] = None) -> Dict[str, OrderedDict]:
    logger.info("Récupération de %r", url)
    http_session = get_http_session()
    try:
        resp = http_session.get(url)
    except RequestsConnectionError:
        raise NotFound(url)

    if resp.status_code == HTTPStatus.NOT_FOUND:
        raise NotFound(url)

    # Due to a configuration change on the AN web server, we now get a 500 error
    # for abandoned or non-existing amendements, so we'll consider this a 404 too :(
    if resp.status_code == HTTPStatus.INTERNAL_SERVER_ERROR:
        raise NotFound(url)

    # Sometimes the URL returns a 200 but the content is empty which leads to
    # a parsing error from xmltodict if not handled manually before.
    if not resp.content:
        raise NotFound(url)

    # Other errors
    if resp.status_code >= 400:
        raise FetchError(url, resp)

    result: OrderedDict = xmltodict.parse(resp.content, force_list=force_list)
    return result
Beispiel #3
0
def download_textes_recents() -> str:
    http_session = get_http_session()
    resp = http_session.get(TEXTES_RECENTS_URL)
    if resp.status_code != HTTPStatus.OK:
        raise RuntimeError("Failed to download textes recents from senat.fr")

    content: str = resp.text
    return content
Beispiel #4
0
def download_rss(url: str) -> str:
    http_session = get_http_session()
    resp = http_session.get(url)
    if resp.status_code != HTTPStatus.OK:
        raise RuntimeError(f"Failed to download RSS url: {url}")

    content: str = resp.text
    return content
Beispiel #5
0
def _fetch_discussion_details(
        lecture: Lecture) -> Iterator[Tuple[Any, MissionRef]]:
    """
    Récupère les amendements à discuter, dans l'ordre de passage

    NB : les amendements jugés irrecevables ne sont pas inclus.
    """
    http_session = get_http_session()
    for url, mission_ref in derouleur_urls_and_mission_refs(lecture):
        resp = http_session.get(url)
        if resp.status_code == HTTPStatus.NOT_FOUND:  # 404
            logger.warning(f"Could not fetch {url}")
            continue
        if resp.text == "":
            logger.warning(f"Empty response for {url}")
            continue
        yield resp.json(), mission_ref
Beispiel #6
0
def extract_from_remote_zip(
        url: str) -> Generator[Tuple[str, IO[str]], None, None]:
    http_session = get_http_session()
    response = http_session.get(url)

    if response.status_code not in (HTTPStatus.OK, HTTPStatus.NOT_MODIFIED):
        message = f"Unexpected status code {response.status_code} while fetching {url}"
        logger.error(message)
        raise RuntimeError(message)

    content_type = response.headers["content-type"]
    if content_type != "application/zip":
        message = (
            f"Unexpected content type {content_type} while fetching {url} "
            "(expected application/zip)")
        logger.error(message)
        raise RuntimeError(message)

    yield from extract_from_zip(BytesIO(response.content))
Beispiel #7
0
def _fetch_all(lecture: Lecture, dry_run: bool = False) -> List[OrderedDict]:
    """
    Récupère tous les amendements, dans l'ordre de dépôt
    """

    http_session = get_http_session()
    url = _build_amendements_url(lecture)
    resp = http_session.get(url)
    if resp.status_code == HTTPStatus.NOT_FOUND:
        raise NotFound(url)

    if dry_run:
        return []

    text = resp.content.decode("cp1252")
    lines = [_filter_line(line) for line in text.splitlines()[1:]]
    reader = csv.DictReader(lines, delimiter="\t")
    items = list(reader)
    return items