예제 #1
0
def GetFeries(win):
    print("GetFeries")
    from bs4 import BeautifulSoup
    import urllib.request, urllib.error, urllib.parse
    from objects_wx import myProgressDialog

    MOIS = [
        'janvier', 'février', 'mars', 'avril', 'mai', 'juin', 'juillet',
        'août', 'septembre', 'octobre', 'novembre', 'décembre'
    ]

    message = "Recherche des jours fériés\n\n"

    ETABLISSEMENTS = ouvrir()
    lstAcad = sorted([a[0] for a in list(ETABLISSEMENTS.values())])

    urlCal = 'http://www.education.gouv.fr/pid25058/le-calendrier-scolaire.html'  #?annee=160&search_input=cancale'
    try:
        downloadPage = BeautifulSoup(
            urllib.request.urlopen(urlCal, timeout=10), "html5lib")
    except IOError:
        print("pas d'accès Internet")
        return

    list_feries = {}

    annees = {}
    tag_annee = downloadPage.find(id="annee")
    for a in tag_annee.find_all('option'):
        #        print "     a:", a['label'].split("-")[0].split()[-1], annee
        annees[int(a['label'].split("-")[0].split()[-1])] = a['value']

    print("  annees:", annees)

    dlg = myProgressDialog("Recherche des jours fériés",
                           message,
                           len(annees),
                           parent=win)
    dlg.Show()
    count = 1

    for annee, code in list(annees.items()):
        count += 1
        message += "Année : " + str(annee) + "\n"
        dlg.update(count, message)
        list_crenaux = {
            "A": [],
            "B": [],
            "C": []
        }  # Les créneaux de jours féries
        list_zones = {
            "A": [],
            "B": [],
            "C": []
        }  # Les académies rangées par zone

        url = urlCal + '?annee=%s' % code

        page = BeautifulSoup(urllib.request.urlopen(url, timeout=10),
                             "html5lib")

        tag_cal = page.find(id="calendrier-v2-detail")
        for z, tag_acad in enumerate(tag_cal.find_all(headers="academie")):
            for i, acad in enumerate(lstAcad):
                if tag_acad.text is not None and acad in tag_acad.text:
                    list_zones[chr(65 + z)].append(i)
        print("  zones:", list_zones)

        for tr in tag_cal.find_all('tr'):
            creneaux = tr.find_all('td', headers="creneau")
            for z, creneau in enumerate(creneaux):
                for p in creneau.find_all('p'):
                    l = p.text.split("\n")
                    l = [t.strip() for t in l]
                    l = [t for t in l if len(t) > 0]

                    if len(l) == 1:
                        if l[0][0] == "R":
                            l = ["", l[0]]
                        else:
                            l = [l[0], ""]
                    l = [t.split(":")[-1].strip() for t in l]
                    v = []
                    for d in l:

                        if d == "":
                            v.append([])
                        else:
                            js, j, m, a = d.split()
                            a = int(a)
                            m = MOIS.index(m) + 1
                            if j == "1er":
                                j = 1
                            else:
                                j = int(j)
                            v.append([a, m, j])

                    if v[0] == []:
                        v[0] = [v[1][0], 7, 31]
                    elif v[1] == []:
                        v[1] = [v[0][0], 7, 31]

                    list_crenaux[chr(65 + z)].append(v)
                    if len(creneaux) == 1:
                        list_crenaux["B"].append(v)
                        list_crenaux["C"].append(v)

        print("   crenaux:", list_crenaux)
        list_feries[annee] = [list_zones, list_crenaux]

        wx.Yield()
        if dlg.stop:
            dlg.Destroy()
            print("STOP")
            return []

    print(list_feries)
    return list_feries
예제 #2
0
def GetEtablissements(win):

    from bs4 import BeautifulSoup
    import urllib.request, urllib.error, urllib.parse
    from objects_wx import myProgressDialog

    # titre, message, maximum, parent, style = 0, btnAnnul = True, msgAnnul = u"Annuler l'opération"
    message = "Recherche des établissements\n\n"

    errmsg = ""

    tentatives = 0

    #    def getEtabVille(page):
    #        lst = []
    #        for v in page.find_all('div', attrs={'class':"annuaire-resultats-entete"}):
    #            ville = v.contents[0].split(',')[1].lstrip('\n')
    #            print "   ville =", v
    #            pagev = BeautifulSoup(v, 'xml')
    #            for divEtab in pagev.find_all('div', attrs={'class':"annuaire-etablissement-label"}):
    #                etab = divEtab.a.string
    #                print "     etab =", etab
    #                lst.append([etab, ville])
    #        return lst

    def getEtabVille(page, message):
        lst = []
        for v in page.find_all('div'):
            #            print v.attrs.keys(), v['class']
            #            print type(v)
            if ('class' in list(v.attrs.keys())
                ) and v['class'][0] == "annuaire-resultats-entete":
                ville = v.contents[0].split(',')[1].lstrip('\n').lstrip()
                #                 print "   ville :", ville
                message += "     ville : " + ville + "\n"
                dlg.update(count, message)
            if ('class' in list(v.attrs.keys())
                ) and v['class'][0] == "annuaire-etablissement-label":
                etab = str(v.a.string)
                #                 print "       etab :", etab
                #                 message += u"      établissement : "+ etab + u"\n"
                #                 dlg.Update(0, message)
                lst.append([etab, ville])
        return lst, message

##        lst = [[e.a.string, []] for e in page.find_all('div', attrs={'class':"annuaire-etablissement-label"})]
#        try:
#            lst = [[e.a['title'].split(' - ')[1], e.a['title'].split(' - ')[-1]] for e in page.find_all('div', attrs={'class':"annuaire-etablissement-autres-liens"})]
#            return lst
#        except:
#            lst = [[e.a.string, u""] for e in page.find_all('div', attrs={'class':"annuaire-etablissement-label"})]
#            return lst

    def getNbrEtab(page):
        """ Renvoie le nombre d'établissements dans les résultats de la recherche
        """
        try:
            return str(
                page.find_all('div', attrs={'class': "annuaire-nb-results"
                                            })[0].contents[-2]).strip("<>b/")
        except IndexError:
            return "0"

#    def getTousEtabVilleAcad(page):
#        liste_etab = {}
#        for acad, num in liste_acad:
#            liste_etab[num] = [acad, [], []]
#        urlCol = urlEtab + "?college=2&lycee_name=&localisation=4&ville_name=&nbPage=20000"
#        urlLyc = urlEtab + "?lycee=3&lycee_name=&localisation=4&ville_name=&nbPage=20000"
#
#        return liste_etab

# url = 'https://code.google.com/p/pysequence/downloads/list'
#     print "GetEtablissements"

    urlEtab = 'http://www.education.gouv.fr/pid24302/annuaire-resultat-recherche.html'
    urlAcad = 'http://www.education.gouv.fr/pid24301/annuaire-accueil-recherche.html'

    try:
        downloadPage = BeautifulSoup(
            urllib.request.urlopen(urlAcad, timeout=10), "html5lib")
    except IOError:
        #         message += u"pas d'accès Internet"
        #         dlg.Update(0, message)
        print("pas d'accès Internet")
        return

    acad_select = downloadPage.find(id="acad_select")
    liste_acad = [[o['label'], o['value']]
                  for o in acad_select.find_all('option')]
    liste_acad_txt = [l + "\t" + str(v) for l, v in liste_acad]
    #     message += u"Liste des académies :\n   "+ u"\n   ".join(liste_acad_txt)
    #     dlg.Update(0, message)
    #     print liste_acad

    liste_etab = {}

    dlg = myProgressDialog("Recherche des établissements",
                           message,
                           len(liste_acad) * 2,
                           parent=win)
    dlg.Show()
    #     dlg.maximum = len(liste_acad)*2
    count = 1

    for acad, num in liste_acad:
        message += "Académie : " + acad + "\t" + str(num) + "\n"
        dlg.update(count, message)
        #         print "  ",acad, num

        liste_etab[num] = [acad, [], []]

        #
        # Collèges
        #

        #            urlCol = urlEtab + '?'+ 'acad_select[]=' + str(num) + '&critere_gene_2=1&valid_aff=Chercher'
        #        urlCol = urlEtab + '?college=2&lycee_name=&ville_name=&localisation=3&nbPage=1000&acad_select[]='+num
        #        urlCol = urlEtab + "?college=2&lycee_name=&localisation=2&dept_select[]=01"
        #        page = BeautifulSoup(urllib2.urlopen(urlCol, timeout = 5))
        urlCol = urlEtab + '?college=2&localisation=3&nbPage=1000&acad_select[]=' + num
        message += "  Collèges :\n  ----------\n"
        dlg.update(count, message)
        #         print "  ", urlCol

        continuer = True
        n = 0
        while continuer:
            try:
                page = BeautifulSoup(urllib.request.urlopen(urlCol, timeout=5),
                                     "html5lib")
                tentatives = 0
            except urllib.error.HTTPError:
                time.sleep(1)
                tentatives += 1
                message += "+"
                dlg.update(count, message)
                if tentatives > 10:
                    break
                else:
                    continue
#            print page.find_all('a', attrs={'class':"annuaire-modif-recherche"})[0]['href']
            if "select[]="+str(num) in page.find_all('a', attrs={'class':"annuaire-modif-recherche"})[0]['href'] \
                or n>10:
                continuer = False
            n += 1
#             message += u"."
#             dlg.Update(0, message)
#             print "   .",
        l, message = getEtabVille(page, message)
        liste_etab[num][1].extend(l)

        r = len(liste_etab[num][1])  # Récupérés
        t = int(getNbrEtab(page))  # Trouvés
        if r < t:
            errmsg += "Académie " + acad + " : manque " + str(
                t - r) + " Collèges !\n"

        count += 1
        message += "  " + str(r) + " / " + str(t) + " collèges récupérés\n\n"
        dlg.update(count, message)

        #         print "   ", len(liste_etab[num][1]),"/",
        #         print getNbrEtab(page)

        #        tt = tt.replace('<b>', '')
        #        tt = tt.replace('</b>', '')
        #        print tt.split()[-1]

        #        <div class="annuaire-nb-results">
        #Résultats <b>1 à 43</b> sur <b>43</b>
        #</div>

        # Lycées
        #            urlLyc = urlEtab + '?'+ 'acad_select[]=' + str(num) + '&critere_gene_3=1&valid_aff=Chercher'
        #        urlLyc = urlEtab + '?lycee=3&lycee_name=&ville_name=&localisation=3&nbPage=1000&acad_select[]='+num
        #        urlLyc = urlEtab + "?lycee=3&lycee_name=&localisation=2&dept_select[]=01"
        #        page = BeautifulSoup(urllib2.urlopen(urlLyc, timeout = 5))
        urlLyc = urlEtab + '?lycee=3&localisation=3&nbPage=1000&acad_select[]=' + num
        message += "  Lycées :\n  --------\n"
        dlg.update(count, message)
        #         print "  ", urlLyc

        continuer = True
        n = 0
        while continuer:
            try:
                page = BeautifulSoup(urllib.request.urlopen(urlLyc, timeout=5),
                                     "html5lib")
                tentatives = 0
            except urllib.error.HTTPError:
                time.sleep(1)
                tentatives += 1
                message += "+"
                dlg.update(count, message)
                if tentatives > 10:
                    break
                else:
                    continue
#            print page.find_all('a', attrs={'class':"annuaire-modif-recherche"})[0]['href']
            if "select[]="+str(num) in page.find_all('a', attrs={'class':"annuaire-modif-recherche"})[0]['href'] \
                or n>10:
                continuer = False
            n += 1
#             message += u"."
#             dlg.Update(0, message)
#             print "   .",
        l, message = getEtabVille(page, message)
        liste_etab[num][2].extend(l)

        r = len(liste_etab[num][2])  # Récupérés
        t = int(getNbrEtab(page))  # Trouvés

        if r < t:
            errmsg += "Académie " + acad + " : manque " + str(
                t - r) + " Lycées !\n"
        count += 1
        message += "  " + str(r) + " / " + str(t) + " lycées récupérés\n\n"
        dlg.update(count, message)

        wx.Yield()
        if dlg.stop:
            dlg.Destroy()
            print("STOP")
            return []

#         print "   ", len(liste_etab[num][2]),"/",
#         print getNbrEtab(page)
#         print

    message += "\nOpération Terminée !\n"
    if errmsg != "":
        message += "ERREURS de récupération :\n"
        message += errmsg
    dlg.update(count, message)
    #    print liste_etab
    return liste_etab