コード例 #1
0
def ranger_articles_xml(chemin_base, coll_articles, entree_version_section,
                        entree_texte, entree_version_texte, dates_changement,
                        ensemble_articles, cidTexte):

    if coll_articles == None:
        return dates_changement, ensemble_articles

    for i in range(len(coll_articles)):

        print('({}/{})'.format(i + 1, len(coll_articles)), end='')
        sys.stdout.flush()

        # Lecture brute des attributs XML
        id = coll_articles[i]['id']
        nom = coll_articles[i].text
        etat_juridique = coll_articles[i]['etat']
        num = coll_articles[i]['num']
        debut = normalise_date(coll_articles[i]['debut'])
        fin = normalise_date(coll_articles[i]['fin'])

        # Enregistrement de l’article
        # TODO gérer les mises à jour
        try:
            entree_article = Article.get(Article.id == id)
        except:
            entree_article = Article.create(
                id=id,
                nom=nom,
                etat_juridique=etat_juridique,
                num=num,
                debut=debut,
                fin=fin,
                texte=entree_texte,
                version_section=entree_version_section,
                version_texte=entree_version_texte)

        # Inscription des dates et articles
        dates_changement |= {debut, fin}
        ensemble_articles |= {entree_article}

        nb_chiffres = len('({}/{})'.format(i + 1, len(coll_articles)))
        print('\033[' + str(nb_chiffres) + 'D' +
              (''.join([' ' * nb_chiffres])) + '\033[' + str(nb_chiffres) +
              'D',
              end='')
        sys.stdout.flush()

    return dates_changement, ensemble_articles
コード例 #2
0
ファイル: ranger.py プロジェクト: Dollab/Archeo-Lex
def ranger_articles_xml(chemin_base, coll_articles, entree_version_section, entree_texte, entree_version_texte, dates_changement, ensemble_articles, cidTexte):
    
    if coll_articles == None:
        return dates_changement, ensemble_articles
    
    for i in range(len(coll_articles)):
        
        print('({}/{})'.format(i+1, len(coll_articles)), end='')
        sys.stdout.flush()
        
        # Lecture brute des attributs XML
        id = coll_articles[i]['id']
        nom = coll_articles[i].text
        etat_juridique = coll_articles[i]['etat']
        num = coll_articles[i]['num']
        debut = normalise_date(coll_articles[i]['debut'])
        fin = normalise_date(coll_articles[i]['fin'])
        
        # Enregistrement de l’article
        # TODO gérer les mises à jour
        try:
            entree_article = Article.get(Article.id == id)
        except:
            entree_article = Article.create(
                id=id,
                nom=nom,
                etat_juridique=etat_juridique,
                num=num,
                debut=debut,
                fin=fin,
                texte=entree_texte,
                version_section=entree_version_section,
                version_texte=entree_version_texte
            )
        
        # Inscription des dates et articles
        dates_changement |= {debut, fin}
        ensemble_articles |= {entree_article}
        
        nb_chiffres=len('({}/{})'.format(i+1, len(coll_articles)))
        print('\033[' + str(nb_chiffres) + 'D' + (''.join([' ' * nb_chiffres])) + '\033[' + str(nb_chiffres) + 'D', end='')
        sys.stdout.flush()
    
    return dates_changement, ensemble_articles
コード例 #3
0
def ranger_texte_xml(chemin_base, cidTexte, nature_attendue=None):

    # Lecture brute du fichier XML texte/version
    chemin_texte_version = os.path.join(chemin_base, 'texte', 'version',
                                        cidTexte + '.xml')
    if not os.path.exists(chemin_texte_version):
        raise Exception()
    f_version = open(chemin_texte_version, 'r')
    soup_version = BeautifulSoup(f_version.read(), 'xml')
    version_META = soup_version.find('META')
    version_META_COMMUN = version_META.find('META_COMMUN')
    version_META_SPEC = version_META.find('META_SPEC')
    version_META_TEXTE_CHRONICLE = version_META_SPEC.find(
        'META_TEXTE_CHRONICLE')
    version_META_TEXTE_VERSION = version_META_SPEC.find('META_TEXTE_VERSION')

    version_NATURE = version_META_COMMUN.find('NATURE').text
    version_CID = version_META_TEXTE_CHRONICLE.find('CID').text
    version_NOR = version_META_TEXTE_CHRONICLE.find('NOR').text
    version_DATE_TEXTE = version_META_TEXTE_CHRONICLE.find('DATE_TEXTE').text
    version_DATE_PUBLI = version_META_TEXTE_CHRONICLE.find('DATE_PUBLI').text
    version_TITRE = version_META_TEXTE_VERSION.find('TITRE').text
    version_TITREFULL = version_META_TEXTE_VERSION.find('TITREFULL').text
    version_DATE_DEBUT = version_META_TEXTE_VERSION.find('DATE_DEBUT').text
    version_DATE_FIN = version_META_TEXTE_VERSION.find('DATE_FIN').text
    version_ETAT = version_META_TEXTE_VERSION.find('ETAT').text

    # Lecture brute du fichier XML texte/struct
    chemin_texte_struct = os.path.join(chemin_base, 'texte', 'struct',
                                       cidTexte + '.xml')
    if not os.path.exists(chemin_texte_struct):
        raise Exception()
    f_struct = open(chemin_texte_struct, 'r')
    soup_struct = BeautifulSoup(f_struct.read(), 'xml')
    struct_META = soup_struct.find('META')
    struct_META_COMMUN = struct_META.find('META_COMMUN')
    struct_META_SPEC = struct_META.find('META_SPEC')
    struct_META_TEXTE_CHRONICLE = struct_META_SPEC.find('META_TEXTE_CHRONICLE')
    struct_META_TEXTE_VERSION = struct_META_SPEC.find('META_TEXTE_VERSION')
    struct_VERSIONS = soup_struct.find('VERSIONS')
    struct_STRUCT = soup_struct.find('STRUCT')

    struct_NATURE = struct_META_COMMUN.find('NATURE').text
    struct_CID = struct_META_TEXTE_CHRONICLE.find('CID').text
    struct_NOR = struct_META_TEXTE_CHRONICLE.find('NOR').text
    struct_DATE_TEXTE = struct_META_TEXTE_CHRONICLE.find('DATE_TEXTE').text
    struct_DATE_PUBLI = struct_META_TEXTE_CHRONICLE.find('DATE_PUBLI').text
    struct_VERSION = struct_VERSIONS.find_all('VERSION')
    struct_VERSION_etat = struct_VERSION[0]['etat']
    struct_LIEN_TXT = struct_VERSION[0].find('LIEN_TXT')
    struct_LIEN_TXT_id = struct_LIEN_TXT['id']
    struct_LIEN_TXT_debut = struct_LIEN_TXT['debut']
    struct_LIEN_TXT_fin = struct_LIEN_TXT['fin']
    struct_LIEN_ART = struct_STRUCT.find_all('LIEN_ART')
    struct_LIEN_SECTION_TA = struct_STRUCT.find_all('LIEN_SECTION_TA')

    # Traitements de base
    version_DATE_TEXTE = normalise_date(version_DATE_TEXTE)
    version_DATE_PUBLI = normalise_date(version_DATE_PUBLI)
    version_DATE_DEBUT = normalise_date(version_DATE_DEBUT)
    version_DATE_FIN = normalise_date(version_DATE_FIN)
    struct_DATE_TEXTE = normalise_date(struct_DATE_TEXTE)
    struct_DATE_PUBLI = normalise_date(struct_DATE_PUBLI)

    # Vérifications
    if not cidTexte == version_CID:
        raise Exception()
    if nature_attendue and not version_NATURE == nature_attendue.upper(
    ) or not struct_NATURE == nature_attendue.upper():
        raise Exception()
    if not version_DATE_TEXTE == struct_DATE_TEXTE:
        raise Exception()
    if not version_DATE_PUBLI == struct_DATE_PUBLI:
        raise Exception()
    if not len(
            struct_VERSION
    ) == 1:  # texte/version ne peut avoir qu’une seule version, donc texte/struct également et elles doivent correspondre
        raise Exception()

    # Enregistrement du Texte
    # TODO gérer les mises à jour
    try:
        entree_texte = Texte.get(Texte.cid == version_CID)
    except:
        entree_texte = Texte.create(
            cid=version_CID.upper(),
            nor=version_NOR.upper(),
            nature=version_NATURE.lower(),
            date_publi=version_DATE_PUBLI,
            date_texte=version_DATE_TEXTE,
        )

    # Enregistrement de la Version_texte d’autorité
    # TODO gérer les mises à jour
    try:
        entree_version_texte = Version_texte.get(
            Version_texte.texte == entree_texte)
    except:
        entree_version_texte = Version_texte.create(
            texte=entree_texte,
            titre=version_TITRE,
            titre_long=version_TITREFULL,
            etat_juridique=version_ETAT.lower(),
            debut=version_DATE_DEBUT,
            fin=version_DATE_FIN,
            base=None)

    # Recensement des dates de changement
    dates_changement = set([version_DATE_DEBUT, version_DATE_FIN])
    ensemble_versions_sections = set()
    ensemble_articles = set()

    # Ajouter récursivement les sections et articles
    dates_changement, ensemble_versions_section, ensemble_articles = ranger_sections_xml(
        chemin_base, struct_LIEN_SECTION_TA, struct_LIEN_ART, entree_texte,
        entree_version_texte, None, None, dates_changement,
        ensemble_versions_sections, ensemble_articles, cidTexte, 1)
    print('')

    # Créer les versions de textes
    dates_changement = list(dates_changement)
    dates_changement.sort(cmp=comp_infini)
    for i in range(len(dates_changement) - 1):
        # TODO gérer les mises à jour
        Version_texte.create(texte=entree_texte,
                             titre=version_TITRE,
                             titre_long=version_TITREFULL,
                             etat_juridique=version_ETAT.lower(),
                             debut=dates_changement[i],
                             fin=dates_changement[i + 1],
                             base=entree_version_texte)
コード例 #4
0
def ranger_sections_xml(chemin_base, coll_sections, coll_articles,
                        entree_texte, entree_version_texte, section_parente,
                        version_section_parente, dates_changement,
                        ensemble_versions_sections, ensemble_articles,
                        cidTexte, niv):

    # Prévenir les récursions infinies - les specs indiquent un max de 10
    if niv == 10:
        raise Exception()

    # Traiter les articles à ce niveau
    dates_changement, ensemble_articles = ranger_articles_xml(
        chemin_base, coll_articles, version_section_parente, entree_texte,
        entree_version_texte, dates_changement, ensemble_articles, cidTexte)

    for i in range(len(coll_sections)):

        print('{}/{}'.format(i + 1, len(coll_sections)), end='')
        sys.stdout.flush()

        cid = coll_sections[i]['cid']
        id = coll_sections[i]['id']
        nom = coll_sections[i].text
        etat_juridique = coll_sections[i]['etat']
        niveau = coll_sections[i]['niv']
        debut = normalise_date(coll_sections[i]['debut'])
        fin = normalise_date(coll_sections[i]['fin'])
        url = coll_sections[i]['url'][1:]
        numero = i + 1

        # Enregistrement de la section
        try:
            entree_section = Section.get(Section.cid == cid)
        except:
            entree_section = Section.create(cid=cid,
                                            cid_parent=section_parente,
                                            niveau=niveau,
                                            texte=entree_texte)

        # Ajout des dates limites pour préparer l’édition de liens
        dates_changement |= {debut, fin}

        # Enregistrement de version de section
        # TODO gérer les mises à jour
        try:
            entree_version_section = Version_section.get(
                Version_section.id == id)
        except:
            entree_version_section = Version_section.create(
                cid=cid,
                id=id,
                id_parent=version_section_parente,
                nom=nom,
                etat_juridique=etat_juridique,
                niveau=niveau,
                numero=numero,
                debut=debut,
                fin=fin,
                texte=entree_texte,
                version_texte=entree_version_texte)

        # Ajout de cette version de section
        ensemble_versions_sections |= {entree_version_section}

        print(' → ', end='')
        sys.stdout.flush()

        # Continuer récursivement
        chemin_section_ta = os.path.join(chemin_base, 'section_ta', url)
        f_section_ta = open(chemin_section_ta, 'r')
        soup = BeautifulSoup(f_section_ta.read(), 'xml')
        section_ta_STRUCTURE_TA = soup.find('STRUCTURE_TA')
        section_ta_LIEN_SECTION_TA = section_ta_STRUCTURE_TA.find_all(
            'LIEN_SECTION_TA')
        section_ta_LIEN_ART = section_ta_STRUCTURE_TA.find_all('LIEN_ART')

        dates_changement, ensemble_versions_sections, ensemble_articles = ranger_sections_xml(
            chemin_base, section_ta_LIEN_SECTION_TA, section_ta_LIEN_ART,
            entree_texte, entree_version_texte, entree_section,
            entree_version_section, dates_changement,
            ensemble_versions_sections, ensemble_articles, cidTexte, niv + 1)

        print('\033[3D   \033[3D', end='')

        nb_chiffres = len('{}/{}'.format(i + 1, len(coll_sections)))
        print('\033[' + str(nb_chiffres) + 'D' +
              (''.join([' ' * nb_chiffres])) + '\033[' + str(nb_chiffres) +
              'D',
              end='')

    return dates_changement, ensemble_versions_sections, ensemble_articles
コード例 #5
0
ファイル: ranger.py プロジェクト: Dollab/Archeo-Lex
def ranger_texte_xml(chemin_base, cidTexte, nature_attendue=None):
    
    # Lecture brute du fichier XML texte/version
    chemin_texte_version = os.path.join(chemin_base, 'texte', 'version', cidTexte + '.xml')
    if not os.path.exists(chemin_texte_version):
        raise Exception()
    f_version = open(chemin_texte_version, 'r')
    soup_version = BeautifulSoup(f_version.read(), 'xml')
    version_META = soup_version.find('META')
    version_META_COMMUN = version_META.find('META_COMMUN')
    version_META_SPEC = version_META.find('META_SPEC')
    version_META_TEXTE_CHRONICLE = version_META_SPEC.find('META_TEXTE_CHRONICLE')
    version_META_TEXTE_VERSION = version_META_SPEC.find('META_TEXTE_VERSION')
    
    version_NATURE = version_META_COMMUN.find('NATURE').text
    version_CID = version_META_TEXTE_CHRONICLE.find('CID').text
    version_NOR = version_META_TEXTE_CHRONICLE.find('NOR').text
    version_DATE_TEXTE = version_META_TEXTE_CHRONICLE.find('DATE_TEXTE').text
    version_DATE_PUBLI = version_META_TEXTE_CHRONICLE.find('DATE_PUBLI').text
    version_TITRE = version_META_TEXTE_VERSION.find('TITRE').text
    version_TITREFULL = version_META_TEXTE_VERSION.find('TITREFULL').text
    version_DATE_DEBUT = version_META_TEXTE_VERSION.find('DATE_DEBUT').text
    version_DATE_FIN = version_META_TEXTE_VERSION.find('DATE_FIN').text
    version_ETAT = version_META_TEXTE_VERSION.find('ETAT').text
    
    # Lecture brute du fichier XML texte/struct
    chemin_texte_struct = os.path.join(chemin_base, 'texte', 'struct', cidTexte + '.xml')
    if not os.path.exists(chemin_texte_struct):
        raise Exception()
    f_struct = open(chemin_texte_struct, 'r')
    soup_struct = BeautifulSoup(f_struct.read(), 'xml')
    struct_META = soup_struct.find('META')
    struct_META_COMMUN = struct_META.find('META_COMMUN')
    struct_META_SPEC = struct_META.find('META_SPEC')
    struct_META_TEXTE_CHRONICLE = struct_META_SPEC.find('META_TEXTE_CHRONICLE')
    struct_META_TEXTE_VERSION = struct_META_SPEC.find('META_TEXTE_VERSION')
    struct_VERSIONS = soup_struct.find('VERSIONS')
    struct_STRUCT = soup_struct.find('STRUCT')
    
    struct_NATURE = struct_META_COMMUN.find('NATURE').text
    struct_CID = struct_META_TEXTE_CHRONICLE.find('CID').text
    struct_NOR = struct_META_TEXTE_CHRONICLE.find('NOR').text
    struct_DATE_TEXTE = struct_META_TEXTE_CHRONICLE.find('DATE_TEXTE').text
    struct_DATE_PUBLI = struct_META_TEXTE_CHRONICLE.find('DATE_PUBLI').text
    struct_VERSION = struct_VERSIONS.find_all('VERSION')
    struct_VERSION_etat = struct_VERSION[0]['etat']
    struct_LIEN_TXT = struct_VERSION[0].find('LIEN_TXT')
    struct_LIEN_TXT_id = struct_LIEN_TXT['id']
    struct_LIEN_TXT_debut = struct_LIEN_TXT['debut']
    struct_LIEN_TXT_fin = struct_LIEN_TXT['fin']
    struct_LIEN_ART = struct_STRUCT.find_all('LIEN_ART')
    struct_LIEN_SECTION_TA = struct_STRUCT.find_all('LIEN_SECTION_TA')
    
    # Traitements de base
    version_DATE_TEXTE = normalise_date(version_DATE_TEXTE)
    version_DATE_PUBLI = normalise_date(version_DATE_PUBLI)
    version_DATE_DEBUT = normalise_date(version_DATE_DEBUT)
    version_DATE_FIN = normalise_date(version_DATE_FIN)
    struct_DATE_TEXTE = normalise_date(struct_DATE_TEXTE)
    struct_DATE_PUBLI = normalise_date(struct_DATE_PUBLI)
    
    # Vérifications
    if not cidTexte == version_CID:
        raise Exception()
    if nature_attendue and not version_NATURE == nature_attendue.upper() or not struct_NATURE == nature_attendue.upper():
        raise Exception()
    if not version_DATE_TEXTE == struct_DATE_TEXTE:
        raise Exception()
    if not version_DATE_PUBLI == struct_DATE_PUBLI:
        raise Exception()
    if not len(struct_VERSION) == 1:  # texte/version ne peut avoir qu’une seule version, donc texte/struct également et elles doivent correspondre
        raise Exception()
    
    # Enregistrement du Texte
    # TODO gérer les mises à jour
    try:
        entree_texte = Texte.get(Texte.cid == version_CID)
    except:
        entree_texte = Texte.create(
            cid=version_CID.upper(),
            nor=version_NOR.upper(),
            nature=version_NATURE.lower(),
            date_publi=version_DATE_PUBLI,
            date_texte=version_DATE_TEXTE,
        )
    
    # Enregistrement de la Version_texte d’autorité
    # TODO gérer les mises à jour
    try:
        entree_version_texte = Version_texte.get(Version_texte.texte == entree_texte)
    except:
        entree_version_texte = Version_texte.create(
            texte=entree_texte,
            titre=version_TITRE,
            titre_long=version_TITREFULL,
            etat_juridique=version_ETAT.lower(),
            debut=version_DATE_DEBUT,
            fin=version_DATE_FIN,
            base=None
        )
    
    # Recensement des dates de changement
    dates_changement = set([version_DATE_DEBUT, version_DATE_FIN])
    ensemble_versions_sections = set()
    ensemble_articles = set()
    
    # Ajouter récursivement les sections et articles
    dates_changement, ensemble_versions_section, ensemble_articles = ranger_sections_xml(chemin_base, struct_LIEN_SECTION_TA, struct_LIEN_ART, entree_texte, entree_version_texte, None, None, dates_changement, ensemble_versions_sections, ensemble_articles, cidTexte, 1)
    print('')
    
    # Créer les versions de textes
    dates_changement = list(dates_changement)
    dates_changement.sort(cmp=comp_infini)
    for i in range(len(dates_changement) - 1):
        # TODO gérer les mises à jour
        Version_texte.create(
            texte=entree_texte,
            titre=version_TITRE,
            titre_long=version_TITREFULL,
            etat_juridique=version_ETAT.lower(),
            debut=dates_changement[i],
            fin=dates_changement[i+1],
            base=entree_version_texte
        )
コード例 #6
0
ファイル: ranger.py プロジェクト: Dollab/Archeo-Lex
def ranger_sections_xml(chemin_base, coll_sections, coll_articles, entree_texte, entree_version_texte, section_parente, version_section_parente, dates_changement, ensemble_versions_sections, ensemble_articles, cidTexte, niv):
    
    # Prévenir les récursions infinies - les specs indiquent un max de 10
    if niv == 10:
        raise Exception()
    
    # Traiter les articles à ce niveau
    dates_changement, ensemble_articles = ranger_articles_xml(chemin_base, coll_articles, version_section_parente, entree_texte, entree_version_texte, dates_changement, ensemble_articles, cidTexte)
    
    for i in range(len(coll_sections)):
        
        print('{}/{}'.format(i+1, len(coll_sections)), end='')
        sys.stdout.flush()
        
        cid = coll_sections[i]['cid']
        id = coll_sections[i]['id']
        nom = coll_sections[i].text
        etat_juridique = coll_sections[i]['etat']
        niveau = coll_sections[i]['niv']
        debut = normalise_date(coll_sections[i]['debut'])
        fin = normalise_date(coll_sections[i]['fin'])
        url = coll_sections[i]['url'][1:]
        numero = i+1
        
        # Enregistrement de la section
        try:
            entree_section = Section.get(Section.cid == cid)
        except:
            entree_section = Section.create(
                cid=cid,
                cid_parent=section_parente,
                niveau=niveau,
                texte=entree_texte
            )
        
        # Ajout des dates limites pour préparer l’édition de liens
        dates_changement |= {debut, fin}
        
        # Enregistrement de version de section
        # TODO gérer les mises à jour
        try:
            entree_version_section = Version_section.get(Version_section.id == id)
        except:
            entree_version_section = Version_section.create(
                cid=cid,
                id=id,
                id_parent=version_section_parente,
                nom=nom,
                etat_juridique=etat_juridique,
                niveau=niveau,
                numero=numero,
                debut=debut,
                fin=fin,
                texte=entree_texte,
                version_texte=entree_version_texte
            )
        
        # Ajout de cette version de section
        ensemble_versions_sections |= {entree_version_section}
        
        print(' → ', end='')
        sys.stdout.flush()
        
        # Continuer récursivement
        chemin_section_ta = os.path.join(chemin_base, 'section_ta', url)
        f_section_ta = open(chemin_section_ta, 'r')
        soup = BeautifulSoup(f_section_ta.read(), 'xml')
        section_ta_STRUCTURE_TA = soup.find('STRUCTURE_TA')
        section_ta_LIEN_SECTION_TA = section_ta_STRUCTURE_TA.find_all('LIEN_SECTION_TA')
        section_ta_LIEN_ART = section_ta_STRUCTURE_TA.find_all('LIEN_ART')
        
        dates_changement, ensemble_versions_sections, ensemble_articles = ranger_sections_xml(chemin_base, section_ta_LIEN_SECTION_TA, section_ta_LIEN_ART, entree_texte, entree_version_texte, entree_section, entree_version_section, dates_changement, ensemble_versions_sections, ensemble_articles, cidTexte, niv+1)
        
        print('\033[3D   \033[3D', end='')
        
        nb_chiffres=len('{}/{}'.format(i+1, len(coll_sections)))
        print('\033[' + str(nb_chiffres) + 'D' + (''.join([' ' * nb_chiffres])) + '\033[' + str(nb_chiffres) + 'D', end='')
    
    return dates_changement, ensemble_versions_sections, ensemble_articles