Ejemplo n.º 1
0
        'https://www.policianacional.gov.py/nomina-de-salarios-de-personal-de-la-policia-nacional/',
        '.column-2 > a')
    to_download.extend(
        _get_links(
            'https://www.policianacional.gov.py/convenio-y-contratos-celebrados-objeto-monto-total-de-la-contratacion-plazos-de-ejecucion-mecanismos-de-control-y-rendicion-de-cuentas/',
            '.entry-content a'))
    to_download.extend(
        _get_links('https://www.policianacional.gov.py/viaticos/', 'td > a'))
    to_download.extend(
        only_files(
            _get_links(
                'https://www.policianacional.gov.py/ley-n-518914-de-libre-acceso-ciudadano-a-la-informacion-publica-y-transparencia-gubernamental-articulo-n-8/',
                '.column-2 > a')))
    to_download.extend(
        only_files(
            _get_links(
                'https://www.policianacional.gov.py/ley-n-5282-de-libre-acceso-ciudadano-a-la-informacion-publica-y-transparencia-gubernamental/',
                '.column-2 > a')))
    to_download.extend(
        _get_links(
            'https://www.policianacional.gov.py/informe-anual-sobre-derechos-humanos-y-situacion-carcelaria-con-especial-enfasis-en-los-derechos-sociales-a-la-salud-y-a-la-educacion/',
            'td a'))
    return to_download


if __name__ == "__main__":
    to_download = get_links()
    downloaded = download_links(to_download, "/tmp/poli")
    for download in downloaded:
        print(get_target_path(download, "2020-12-12"))
def retrieve_links_and_download(target: str, **context):
    links = context['ti'].xcom_pull(task_ids="fetch_links")
    return download_links(links, target, verify=False)
Ejemplo n.º 3
0
import ssl
from typing import List

from _muni_operators import get_target_path
from _policia_operators import _get_links
from network_operators import download_links

ssl._create_default_https_context = ssl._create_unverified_context


def get_links() -> List[str]:
    to_download = _get_links(
        'https://www.mspbs.gov.py/ley-5282-14-funcionarios.html',
        '.item-mes a',
        verify=False)
    return to_download


if __name__ == "__main__":

    to_download = get_links()
    downloaded = download_links(to_download, "/tmp/mspbs", verify=False)
    for download in downloaded:
        print(get_target_path(download, "2020-12-12"))
Ejemplo n.º 4
0
from typing import List

import requests
from bs4 import BeautifulSoup

from _muni_operators import get_target_path
from _policia_operators import _get_links, only_files
from network_operators import download_links


def get_links() -> List[str]:
    to_download = only_files(
        _get_links(
            'https://www.pj.gov.py/contenido/943-nomina-de-magistrados-y-funcionarios/943',
            '.mainContent a'))
    return to_download


if __name__ == "__main__":
    to_download = get_links()
    downloaded = download_links(to_download, "/tmp/csj")
    for download in downloaded:
        print(get_target_path(download, "2020-12-12"))
Ejemplo n.º 5
0
    print(html)
    soup = BeautifulSoup(html, features="html.parser")
    to_ret = []

    for link in soup.select("a.ubermenu-target"):
        href: str = link.get("href")

        if href is not None and 'wp-content' in href:
            print("Link: " + href)
            to_ret.append(href)

    return to_ret


def get_target_path(local_path: str, prefix: str) -> str:
    """
    Returns the target path of a local file
    :param local_path: the local path of the file, only the basename is used
    :return: the target path, with the format "prefix_hash_basename"
    """
    file_hash = calculate_hash_of_file(local_path)
    basename = os.path.basename(local_path)
    return f"{prefix}_{file_hash}_{basename}"


if __name__ == "__main__":
    to_download = get_links()
    downloaded = download_links(to_download, "/tmp/muni")
    for download in downloaded:
        print(get_target_path(download, "2020-12-09"))