def retrieve_all():
    """Rapatrie l'ensemble des ressources."""

    taskname = 'retrieving'
    log_task(taskname, 'start')

    some_action_performed = False
    no_exception = True

    with open(DYNAMIC_CONFIG_DATA_FILE_PATH, 'r') as f:

        j = json.load(f)
        try:
            for ressource in j['res']:
                params = ressource['params']
                print("PARAMS "+str(params))
                uri = params['uri']

                # chaque uri est traitée de façon adéquate
                print("1111111111111111")
                if uri.startswith('htt'):
                    if get_data_from_url(params):
                        some_action_performed = True
                    
                elif uri.startswith('file:///'):
                    if get_data_from_filepath(params):
                        some_action_performed = True
                    print("22222222222222")
                else:
                    log('unimplemented processing for uri "{}"'.format(uri), 'warn')

        except KeyError as e:
            no_exception = False
            printex(e, 'incorrect or missing node in "{}" (bad file structure)'.format(DYNAMIC_CONFIG_DATA_FILE_PATH))
        except Exception as e:
            no_exception = False
            printex(e)

        if no_exception:
            if some_action_performed:
                log('all data downloaded successfully')
            else:
                log('nothing were done, because all data were already retrieved')
        else:
            log('sorry, some data could not be downloaded', 'warn')

        if some_action_performed:
            # nettoyage (suppression récursive) des dossiers vides potentiellement présents dans le répertoire de récupération des données
            log('removed potentially useless created directories in "{}"'.format(WORK_ROOT))
            remove_empty_dirs_in_dir(DOWNLOAD_ROOT)

        log_task(taskname, 'end')
def create_dir_if_not_exists(path):
    """
    Crée un répertoire si celui-ci est inexistant.
    
    :param path: string le chemin absolu du dossier à créer
    """
    if not exists(path):
        try:
            makedirs(path)
        except PermissionError as e:
            path_top_level = sub(r"^({0}?[^{0}]*){0}.*$".format(sep), r"\1",
                                 path)
            printex(
                e, 'cannot create the directory "{}". check rights on "{}"'.
                format(path, path_top_level))
            exit(1)
        except Exception as e:
            printex(e,
                    'while attempting to create directory "{}".'.format(path))
            log('some directory could not be created. crashing.', 'error')
            exit(1)
        else:
            log('directory "{}" created'.format(path))
Example #3
0
def convert_then_import_all():
    """Convertit/prépare l'ensemble des ressources, puis les importe en base."""

    taskname = 'converting and importing'
    log_task(taskname, 'start')

    # variables de journalisation
    amount_of_atomic_data_processed = 0
    amount_of_data_files_processed = 0
    things_were_done = False
    no_problem = True

    # on itère les ressources du fichier de configuration
    with open(DYNAMIC_CONFIG_DATA_FILE_PATH, 'r') as f:
        j = json.load(f)

        # connexion à la base de données
        conn = connect_to_database()
        try:

            for ressource in j['res']:

                # on recherche 'LA' donnée atomique
                to_process, params = search_for_files_to_process(ressource)

                # on envoie à traiter la donnée trouvée
                amount_of_data_files = len(to_process)
                if amount_of_data_files > 0:
                    log('{} data file found'.format(amount_of_data_files))

                    # on traite le groupe
                    if process_group(to_process, ressource, conn):
                        things_were_done = True
                        amount_of_data_files_processed += amount_of_data_files
                        amount_of_atomic_data_processed += 1
                    else:
                        no_problem = False
                        log('encountered some problem(s) with this data',
                            'error')

        except KeyError as e:
            printex(
                e, 'incorrect or missing node in "{}" (bad file structure)'.
                format(DYNAMIC_CONFIG_DATA_FILE_PATH))
        except Exception as e:
            printex(e)
        finally:
            delete_empty_schemes(conn)
            conn.close()

    # on journalise
    if things_were_done:
        log('{} atomic data successfully processed ({} files)'.format(
            amount_of_atomic_data_processed, amount_of_data_files_processed))
    else:
        if no_problem:
            log('nothing were done, because all found data were already prepared'
                )
        else:
            log(
                'nothing were done, because all found & processed data encountered errors',
                'warn')

    log_task(taskname, 'end')
def extract_all():
    """
    Décompresse toutes les archives à disposition.

    Solutions explorées :
    - "lzma" ne gère pas les archives, mais seule la (dé)compression d'un unique fichier compressé en ".7z"
    - la bibliothèque C "libarchive" (téléchargée via conda) via un wrapper python "libarchive-c" (téléchargée via pip)

    Choix final porté sur "patoolib" car gère beaucoup de formats.
    En réalité c'est un wrapper.
    Après détection des types MIME, il fait appel aux exécutables/librairies appropriés ("7zr" par exemple).
    """

    taskname = 'extracting'
    log_task(taskname, 'start')

    # liste des chemins complets des archives à extraire
    log('searching for archives to extract in "{}". this may take a while'.format(DOWNLOAD_ROOT))
    archives_to_extract = search_with_criteria(DOWNLOAD_ROOT, is_archive, search_depth=2)
    archives_to_extract += search_with_criteria(join(DOWNLOAD_ROOT, COPIED_DATA_DIRNAME), is_archive, search_depth=2)
    total = len(archives_to_extract)
    log('{} archive(s) found in "{}"'.format(total, DOWNLOAD_ROOT))

    # on extrait archive par archive
    no_exception = True
    some_action_performed = False
    done = 0
    for archive_path in archives_to_extract:

        # journalisation de l'avancement global
        done += 1

        # on vérifie que l'archive n'a pas déjà été extraite
        archive_dir = dirname(archive_path)
        if len(listdir(archive_dir)) > 1:
            # si archive pas seule dans le dossier, on considère qu'elle a été extraite
            log('archive "{}" ignored because previously extracted ({}/{})'.format(archive_path, done, total))

        else:

            log('extracting archive "{}" ({}/{})'.format(archive_path, done, total))

            try:
                extract_archive(archive_path, verbosity=-1, outdir=archive_dir, interactive=False)
            except PatoolError as e:
                no_exception = False
                printex(e, 'the file extension probably does not match the real data type')
            except Exception as e:
                no_exception = False
                printex(e)
            finally:
                some_action_performed = True

    if no_exception:
        if some_action_performed:
            log('all retrieved archives extracted successfully')
        else:
            log('nothing were done, because all archives were already extracted')
    else:
        log('sorry, some retrieved archives could not be extracted', 'warn')

    log_task(taskname, 'end')
Example #5
0
def xls_handler(ressource, conn):
    """Parse et importe directement une donnée non géographique en base."""

    # déclarations
    okko = True
    params = ressource['params']
    import_mode = ressource['import_mode']
    shortname, data_name = params['shortname'], params['data_name']
    schema, table = params['schema'], params['table']

    # la donnée aurait-elle déjà été importée ?
    already_done = False
    if exists(REMEMBER_IMPORT_FILE_PATH):
        already_done = check_for_line_in_file(remember_line_builder(params),
                                              REMEMBER_IMPORT_FILE_PATH)

    # on ignore la donnée si l'import a déjà été effectué précédemment
    if already_done:
        log('ignoring data "{} / {}" because previously imported into database'
            .format(shortname, data_name))

    else:

        # fichier d'entrée
        in_f = find_key_file_from_params(params, DOWNLOAD_ROOT)
        if in_f == '':
            okko = False
            log(
                'ignoring data about "{} / {}" because a crucial file is missing'
                .format(shortname, data_name), 'warn')
        else:
            log('importing data about "{} / {}" in mode "{}" in schema.table "{}.{}"'
                .format(shortname, data_name, import_mode, schema, table))
            try:
                cur = conn.cursor()
                # parsage et import
                with open_workbook(in_f) as f:
                    for sn in f.sheet_names():
                        s = f.sheet_by_name(sn)

                        # -------- traitements préliminaires

                        # structure intermédiaire
                        attrs_and_their_types = [
                            (x[0], TO_POSTGRESQL_TYPE[x[1]]) for x in [
                                s.cell(0, ci).value.rsplit(':', 1)
                                for ci in xrange(s.ncols)
                            ]
                        ]

                        # champs pour requêtes
                        scheme_dot_table = '"{}"."{}"'.format(schema, table)
                        fields_list = '({})'.format(', '.join([
                            '{} {}'.format(the_attr, the_type)
                            for the_attr, the_type in attrs_and_their_types
                        ]))
                        columns = [
                            the_attr
                            for the_attr, the_type in attrs_and_their_types
                        ]

                        # requête
                        query = 'CREATE TABLE IF NOT EXISTS {} {};'.format(
                            scheme_dot_table, fields_list)

                        # -------- création de la table
                        if import_mode == 'controlee_nouvelle_table':
                            create_schema_if_not_exists(schema, conn)
                            execute_query(query, conn)

                        # -------- remplissage de la table
                        for ri in xrange(1, s.nrows):

                            values = [
                                s.cell(ri, ci).value for ci in xrange(s.ncols)
                            ]
                            fionalike_struct = OrderedDict()

                            for index, v in enumerate(values):
                                # on repasse les "faux floats" en int
                                try:
                                    v = int(v)
                                except ValueError:
                                    pass
                                fionalike_struct[columns[index]] = v

                            query = build_insert_query(fionalike_struct,
                                                       params,
                                                       georef=False)
                            cur.execute(query)

            except Exception as e:
                conn.rollback()
                okko = False
                printex(e, 'insert query failed into table "{}"'.format(table))
            else:
                conn.commit()
                remember_this_import(params)

    return okko
Example #6
0
def apply_params_to_properties(params, properties, from_where='insert'):
    """
    Effectue les modifications sur les propriétés en fonction des paramètres.

    Ces modifications portent sur les attributs.
    On touche donc à la structure, ce qui équivaudrait à un "ALTER".
    :param params: les paramètres
    :param properties: les propriétés
    :param from_where: d'où cette fonction a été appellée (pour évter de log un message identique pour n tuples)
    :return: les propriétés modifiées   et    True si tout s'est bien passé sinon False
    """
    okko = False
    pp = OrderedDict()

    try:

        # déclarations en noms courts
        mode = ''
        if check_for_node_in_parent('mode', params):
            mode = params['mode']
        bb = {}
        some_b_is_waiting_for_each_one = []
        if check_for_node_in_parent('bindings', params):
            bb = params['bindings']
            some_b_is_waiting_for_each_one = [b['from'] for b in bb]

        # on effectue les modifications précisées dans bindings
        for prop_key in properties.keys():
            need_to_copy_the_attribute = mode != 'keep_only'  # MODIFIED & KEEP_ONLY

            for b in bb:
                b_from = b['from']
                b_to = b['to']

                if prop_key == b_from:

                    # -------- DROP
                    if b_to == '':
                        if mode == 'keep_only':
                            # on est sur un DROP + KEEP_ONLY, ce n'est pas valide
                            # on avertit et ignore la clause DROP
                            # en effet copy est déjà à False
                            if from_where != 'insert':
                                log(
                                    'ignoring non-sense DROP of attribute "{}" because of KEEP_ONLY mode'
                                    .format(b_from), 'warn')
                        else:
                            # l'attribut ne sera pas retenu, c'est-à-dire non copié
                            need_to_copy_the_attribute = False

                    # changement de nom de champ
                    else:
                        pp[b_to] = properties[prop_key]
                        # on vient de le copier en le renommant, inutile de le copier à nouveau "comme s'il n'était pas à modifier"
                        need_to_copy_the_attribute = False

                # champ inchangé
                already_copied = prop_key in pp.keys()
                some_b_is_waiting_for_it = prop_key in some_b_is_waiting_for_each_one
                if need_to_copy_the_attribute and not already_copied and not some_b_is_waiting_for_it:
                    pp[prop_key] = properties[prop_key]

            # pas de bindings donc on recopie sans se poser de questions
            if not check_for_node_in_parent('bindings', params):
                pp[prop_key] = properties[prop_key]

        # ajout des champs communs à tous les modes d'import
        pp[YEAR_NAME] = '{}:{}'.format(YEAR_TYPE, YEAR_LENGTH)
        pp[VERSION_NAME] = '{}:{}'.format(VERSION_TYPE, VERSION_LENGTH)
        pp[SRID_NAME] = '{}:{}'.format(SRID_TYPE, SRID_LENGTH)
        pp[GEOMETRY_NAME] = '{}:'.format(GEOMETRY_TYPE)

    except KeyError as e:
        printex(e, 'incorrect or missing node')
    except Exception as e:
        printex(e)
    else:
        okko = True

    return pp, okko
Example #7
0
def to_database(ressource, conn):
    """
    Parcoure la donnée et l'insère dans la base de données en créant dynamiquement les requêtes.

    Travaille les données en flux uniquement.
    Pour cela, on utilise les générateurs python et les requêtes sont construites à la volée pour chaque tuple (au sens base de données) de la donnée d'entrée.
    """

    # déclarations
    okko = True
    cur = conn.cursor()
    import_mode = ressource['import_mode']
    params = ressource['params']
    mode = ''
    if check_for_node_in_parent('mode', params):
        mode = params['mode']
    data_name = params['data_name']
    shortname = simplify(params['shortname'])

    # on trouve le fichier clé pour ce traitement
    input_file = find_key_file_from_params(params, DOWNLOAD_ROOT)
    if input_file == '':
        log(
            'ignoring data about "{} / {}" because a crucial file is missing'.
            format(shortname, data_name), 'warn')
        okko = False
    else:

        # la donnée aurait-elle déjà été importée ?
        already_done = False
        if exists(REMEMBER_IMPORT_FILE_PATH):
            already_done = check_for_line_in_file(
                remember_line_builder(params), REMEMBER_IMPORT_FILE_PATH)

        # on ignore la donnée si l'import a déjà été effectué précédemment
        if already_done:
            log('ignoring data "{} / {}" because previously imported into database 1'
                .format(shortname, data_name))
        else:
            mode_msg = 'automatic' if mode == '' else mode
            log('importing data about "{} / {}" in mode "{} / {}" in schema.table "{}.{}"'
                .format(shortname, data_name, import_mode, mode_msg,
                        params['schema'], params['table']))

            with fiona.drivers():

                # on ouvre le fichier d'entrée
                # ajoute, conserve, et modifie les colonnes désirées
                # et écrit le tout dans un fichier de sortie
                # l'ensemble des traitements s'effectuent en flux

                with fiona.open(input_file, 'r') as in_data:

                    # on part du schéma initial
                    in_schema = in_data.schema.copy()

                    # effectuer les modifications sur les champs
                    properties, prop_changes_ok = apply_params_to_properties(
                        params, in_schema['properties'], from_where='create')
                    okko = prop_changes_ok and okko

                    if okko:
                        try:

                            # -------- schéma et table

                            # si le schéma (au sens espace de noms pour la base de données) n'existe pas, le créer
                            create_schema_if_not_exists(params['schema'], conn)

                            # création éventuelle de la table
                            if import_mode == 'controlee_nouvelle_table':
                                # on fait en sorte que la table existe en tentant systématiquement de la créer
                                create_table_if_not_exists(
                                    properties, params, conn)

                            # -------- détection des projections I/O

                            srid_src_user, srid_dst_user = get_user_srids(
                                params)
                            srid_src_detected, srid_dst_detected = get_detected_srids(
                                in_data.crs_wkt, params, conn)
                            srid_src, srid_dst = None, None

                            # -------- réactions aux projections à disposition (source)

                            if srid_src_user is not None:
                                if srid_src_user != srid_src_detected and srid_src_detected is not None:
                                    log(
                                        'detected source SRID ({}, ignored) is different from the enforced one that you gave ({}, picked)'
                                        .format(srid_src_detected,
                                                srid_src_user), 'warn')
                                srid_src = srid_src_user
                            else:
                                if srid_src_detected is not None:
                                    srid_src = srid_src_detected
                                else:
                                    okko = False
                                    log(
                                        'sorry, you will need to define manually the source SRID',
                                        'error')

                            # -------- réactions aux projections à disposition (destination)
                            if okko:
                                if import_mode == 'controlee_nouvelle_table':
                                    srid_dst = srid_src
                                    if srid_dst_user is not None:
                                        srid_dst = srid_dst_user
                                    params[SRID_NAME] = srid_dst
                                else:
                                    if srid_dst_user is not None:
                                        log(
                                            'ignoring the given destination SRID, which is useless regarding the import mode',
                                            'warn')
                                    if srid_dst_detected is not None:
                                        srid_dst = srid_dst_detected
                                        params[SRID_NAME] = srid_dst
                                    else:
                                        okko = False
                                        msg_beginning = 'weird, cannot find destination SRID.'
                                        if table_empty(params, conn):
                                            msg_ending = 'table exists but is empty. drop it manually, change the import mode, then retry'
                                        else:
                                            msg_ending = 'check in database that the SRID column is named "{}"'.format(
                                                SRID_NAME)
                                        log(
                                            '{} {}'.format(
                                                msg_beginning, msg_ending),
                                            'error')

                            # -------- modifications structurelles et import

                            if okko:

                                reprojection_needed = okko and (srid_src !=
                                                                srid_dst)
                                proj_src, proj_dst = None, None
                                if reprojection_needed:
                                    prefix = 'epsg:'
                                    proj_src, proj_dst = '{}{}'.format(
                                        prefix, srid_src), '{}{}'.format(
                                            prefix, srid_dst)
                                    log('data will be reprojected from SRID {} to {}'
                                        .format(srid_src, srid_dst))

                                # si nouveaux champs, les ajouter à la table en base de données
                                log('adding new fields to table in database')
                                if check_for_node_in_parent(
                                        'new_fields', params):
                                    for nf in params['new_fields']:
                                        the_name = nf['name']
                                        the_type_name, the_type_length = nf[
                                            'type'].split(':')
                                        query = 'ALTER TABLE "{}"."{}" ADD COLUMN {} {}({});'.format(
                                            params['schema'], params['table'],
                                            the_name, the_type_name,
                                            the_type_length)
                                        execute_query(query, conn)

                                # on travaille les données en flux, chaque feature aura son "INSERT"
                                for feature in in_data:

                                    # recopie des valeurs attributaires pour chaque feature
                                    prop, okko = apply_params_to_properties(
                                        params, feature['properties'])
                                    prop[SRID_NAME] = srid_dst
                                    prop[YEAR_NAME] = params['year']
                                    prop[VERSION_NAME] = params['version']

                                    # -------- reprojection
                                    geom = feature['geometry']
                                    prop[GEOMETRY_NAME] = reproject(
                                        geom, proj_src, proj_dst
                                    ) if reprojection_needed else geom

                                    # construction de la requête dynamiquement
                                    insert_query = build_insert_query(
                                        prop, params)
                                    cur.execute(insert_query)

                        except Exception as e:
                            conn.rollback()
                            okko = False
                            printex(
                                e,
                                'insert query failed into table "{}"'.format(
                                    params['table']))
                        else:
                            conn.commit()
                            remember_this_import(params)

    return okko
def load_general_config(filename):
    """
    Charge un fichier de configuration.
    
    :param filename: string le chemin du fichier de configuration à charger 
    """

    with open(filename, 'r') as f:

        all_is_ok = False
        j = json.load(f)
        try:

            # emplacement racine à utiliser
            global PLACE_TO_GO

            # dossier de travail
            global WORK_ROOT

            # racine du stockage des fichiers de configuration
            global CONFIG_ROOT

            # racine du stockage des documents téléchargés
            global DOWNLOAD_ROOT

            # racine du stockage des données converties
            global CONVERSION_ROOT

            # nom du dossier pour les données copiées
            global COPIED_DATA_DIRNAME

            # fichier retenant les imports yaant été effectués
            global REMEMBER_IMPORT_FILE_NAME
            global REMEMBER_IMPORT_FILE_EXT
            global REMEMBER_IMPORT_FILENAME
            global REMEMBER_IMPORT_FILE_PATH

            # niveaux de sévérité
            global SEVERITY_LEVELS

            # niveau de verbosité
            global LOG_LEVEl

            # longueur de la décoration placée à droite et à gauche des logs délimitant les tâches principales
            global VISUAL_SECTION_DECORATOR_LENGTH

            # caractère de séparation visuelle utilisé pour les exceptions dans les logs
            global VISUAL_SEPARATOR_CARACTER

            # taille minimale et maximale d'un nom court
            global SHORTNAME_MIN_LENGTH
            global SHORTNAME_MAX_LENGTH

            # groupes d'extensions
            global EXTENSION_GROUPS
            global KEY_EXTS
            global MANDATORY_EXT_GROUPS
            global OPTIONAL_EXT_GROUPS
            global INTERESTING_EXTS

            # caractéristiques des champs ajoutés
            global YEAR_NAME
            global YEAR_TYPE
            global YEAR_LENGTH
            global VERSION_NAME
            global VERSION_TYPE
            global VERSION_LENGTH
            global SRID_NAME
            global SRID_TYPE
            global SRID_LENGTH
            global GEOMETRY_NAME
            global GEOMETRY_TYPE

            # correspondance entre les types tels que représentés dans les structure python générées par fiona et les types au sein de postgresql
            global TO_POSTGRESQL_TYPE

            # paramètres de connection à la base de données
            global DB_HOST
            global DB_PORT
            global DB_NAME
            global DB_USER_NAME
            global DB_USER_PASSWORD

            # chemin du fichier dynamique de configuration "general"
            global DYNAMIC_CONFIG_GENERAL_FILE_PATH

            # chemin du fichier dynamique de configuration "data"
            global DYNAMIC_CONFIG_DATA_FILE_PATH

            # définit les modes d'import valides
            global IMPORT_MODES

            # renseigne les proxys
            global PROXIES

            # attribution des valeurs
            PLACE_TO_GO = j['place_to_go']
            WORK_ROOT = join(PLACE_TO_GO, j['work_root_name'])
            CONFIG_ROOT = join(WORK_ROOT, j['config_root_name'])
            DOWNLOAD_ROOT = join(WORK_ROOT, j['download_root_name'])
            CONVERSION_ROOT = join(WORK_ROOT, j['conversion_root_name'])
            COPIED_DATA_DIRNAME = join(j['copied_data_dirname'])
            REMEMBER_IMPORT_FILE_NAME = j['remember_import_file_name']
            REMEMBER_IMPORT_FILE_EXT = j['remember_import_file_ext']
            REMEMBER_IMPORT_FILENAME = '{}{}{}'.format(
                REMEMBER_IMPORT_FILE_NAME, extsep, REMEMBER_IMPORT_FILE_EXT)
            REMEMBER_IMPORT_FILE_PATH = join(CONFIG_ROOT,
                                             REMEMBER_IMPORT_FILENAME)
            SEVERITY_LEVELS = j['severity_levels']
            LOG_LEVEl = j['log_level']
            VISUAL_SECTION_DECORATOR_LENGTH = j[
                'visual_section_decorator_length']
            VISUAL_SEPARATOR_CARACTER = j['visual_separator_caracter']
            SHORTNAME_MIN_LENGTH = j['shortname_min_length']
            SHORTNAME_MAX_LENGTH = j['shortname_max_length']
            EXTENSION_GROUPS = j['extension_groups']
            KEY_EXTS = [gg[0][0] for gg in EXTENSION_GROUPS]
            MANDATORY_EXT_GROUPS = [
                g for gg in EXTENSION_GROUPS for g in gg[0]
            ]
            OPTIONAL_EXT_GROUPS = [g for gg in EXTENSION_GROUPS for g in gg[1]]
            INTERESTING_EXTS = MANDATORY_EXT_GROUPS + OPTIONAL_EXT_GROUPS
            YEAR_NAME = j['year_name']
            YEAR_TYPE = j['year_type']
            YEAR_LENGTH = j['year_length']
            VERSION_NAME = j['version_name']
            VERSION_TYPE = j['version_type']
            VERSION_LENGTH = j['version_length']
            SRID_NAME = j['srid_name']
            SRID_TYPE = j['srid_type']
            SRID_LENGTH = j['srid_length']
            GEOMETRY_NAME = j['geometry_name']
            GEOMETRY_TYPE = j['geometry_type']
            TO_POSTGRESQL_TYPE = j['types_map']
            DB_HOST = j['database_host']
            DB_PORT = j['database_port']
            DB_NAME = j['database_name']
            DB_USER_NAME = j['database_user_name']
            DB_USER_PASSWORD = j['database_user_password']
            IMPORT_MODES = j['import_modes']
            PROXIES = j['proxies']

            DYNAMIC_CONFIG_GENERAL_FILE_PATH = join(CONFIG_ROOT,
                                                    CONFIG_GENERAL_FILENAME)
            DYNAMIC_CONFIG_DATA_FILE_PATH = join(CONFIG_ROOT,
                                                 CONFIG_DATA_FILENAME)

        except KeyError as e:
            printex(
                e, 'incorrect or missing node in "{}" (bad file structure)'.
                format(config_file_to_load))
        except Exception as e:
            printex(e)
        else:
            all_is_ok = True
            log('general purpose configuration file "{}" correctly loaded'.
                format(filename))

        # problème inconnu quelconque : on crash, puisque l'on aura de toute façon besoin des configs pour focntionner correctement
        if not all_is_ok:
            log(
                'can\'t load correctly the permanent general configuration file (see the reason above). exiting',
                'error')
            exit(1)
def my_json_res_file_checker(path):
    """
    Vérifie la validité du fichier JSON contenant les liens de téléchargement et affiche des informations en conséquence.

    :param path: string le chemin du fichier de configuration data à vérifier
    """

    with open(path, 'r') as f:
        j = json.load(f)

        everything_is_fine = False
        try:
            for ressource in j['res']:

                s = ressource['params']

                # -------- vérification du mode d'import

                import_mode = ressource['import_mode']
                if import_mode not in IMPORT_MODES:
                    everything_is_fine = False
                    log('unrecognized import mode "{}"'.format(import_mode),
                        'warn')

                # -------- vérification du formatage du shortname

                # génération d'un avertissement pour chaque nom court incorrect
                # le nom de correction peut être donné
                # avant le 1er avertissement, les critères de validité de taille requise d'un nom court sont indiquées

                shortname_length = len(s['shortname'])

                # c'est trop court
                if shortname_length < SHORTNAME_MIN_LENGTH:
                    if not everything_is_fine:
                        give_shortname_info()
                        everything_is_fine = True
                    log(
                        'shortname "{}" is too short (length is {}, minimum is {})'
                        .format(s['shortname'], shortname_length,
                                SHORTNAME_MIN_LENGTH), 'warn')

                # c'est trop long
                if shortname_length > SHORTNAME_MAX_LENGTH:
                    if not everything_is_fine:
                        give_shortname_info()
                        everything_is_fine = True
                    log(
                        'shortname "{}" is too long (length is {}, maximum is {})'
                        .format(s['shortname'], shortname_length,
                                SHORTNAME_MAX_LENGTH), 'warn')

                # c'est mal formaté
                if s['shortname'] != simplify(s['shortname']):
                    if not everything_is_fine:
                        give_shortname_info()
                        everything_is_fine = True
                    log(
                        'shortname "{}" is poorly formated. you should consider changing it manually to something like "{}" for example'
                        .format(s['shortname'],
                                simplify(s['shortname'])), 'warn')

        except KeyError as e:
            printex(
                e, 'incorrect or missing node in "{}" (bad file structure)'.
                format(path))

        log('filecheck "{}" complete'.format(path))
def get_data_from_url(params):
    """Récupère une ressource web distante."""

    # récupération du chemin dans lequel télécharger la ressource
    save_to = get_path_from_params(params, DOWNLOAD_ROOT)

    # pour journalisation
    about = simplify(params['shortname'])
    done_something = False

    # si un dossier de ce nom existe déjà, on passe
    if exists(save_to):
        log('ignored "{}" because data has already been downloaded (directory "{}" exists)'.format(about, save_to))

    else:

        # déclarations
        uri = params['uri']
        site = get_website_name_from_url(uri)

        # création de la destination
        create_dir_if_not_exists(save_to)
        save_as = join(save_to, about)

        # téléchargement de la ressource
        log('fetching data from the "{}" website about "{}"'.format(site.upper(), about))

        # le try/catch interne à la boucle permet de passer outre les URLS invalides et de continuer
        try:
            # @deprecated ne permet pas de gérer les certificats SSL
            # urlretrieve(uri, save_as)

            with open(save_as, 'wb') as output_file:
                response = get(uri, verify=False, proxies=PROXIES)
                output_file.write(response.content)

        # gestion des exceptions
        except URLError as e:
            printex(e, 'problem during "{}" ressource download'.format(uri))
        except Exception as e:
            printex(e)  # problème inconnu quelconque

        # aucun problème n'est survenu
        else:

            done_something = True
            # renommage : ajout de l'extension au nom de l'archive téléchargée
            try:
                extension = get_archive_format(save_as)[0]
                file_with_extension = '{}{}{}'.format(save_as, extsep, extension)
                rename(save_as, file_with_extension)
                print("PRINT DATA_NAME   :  "+params['data_name'])

            except PatoolError as e:
                printex(e, 'the ressource located at "{}" is probably not an archive, download may have failed somehow. just copying it'.format(uri))

                # ressources distantes fichiers solo
                filename = '{}{}{}'.format(params['data_name'], extsep, params['extension'])
                filepath = join(save_to, filename)
                rename(save_as, filepath)

            except Exception as e:
                printex(e)

            log('successfully downloaded data about "{}"'.format(about))

    return done_something