def parseDesc(): tournois = {} directory = os.listdir("webContent/desc/") for fileIn in directory: id_tournois = fileIn.split(".")[0] id_tournois = id_tournois.split("_")[1] tournoi = {} with open("webContent/desc/" + fileIn, mode='r', encoding='utf-8') as fp: soup = BeautifulSoup( fp, 'html.parser', ) data = soup.find_all("span") for child in data: id_element = child.attrs['id'] if (id_element): id_element_split = id_element.split("_") nb_element = len(id_element_split) if (nb_element > 0): tag = id_element_split[nb_element - 1] if (tag != "LabelTitre"): tournoi[tag[5:]] = child.text tournois[id_tournois] = tournoi includes.saveInJSON(tournois, "tournois")
def parseGrille(): res = {} for filename in files: id_tournois = filename.split("_")[1].split(".")[0] titre = [] file = open(directory + filename, mode="r", encoding="utf-8") soup = BeautifulSoup(file, 'html.parser') Titrelist = soup.table.find(name="tr", class_="papi_small_t") if Titrelist is None: continue for subBalise in Titrelist: if (subBalise != "\n"): for element in subBalise.contents: titre.append(element) voisin = Titrelist.find_next_sibling() res[id_tournois] = [] while isinstance(voisin, Tag) and isLine(voisin): grille = {} i = 0 for element in voisin.contents: if (element != "\n"): grille[titre[i]] = element.text i += 1 res[id_tournois].append(grille) voisin = voisin.find_next_sibling() includes.saveInJSON(res, "result")
def parsePerson(): participants = {} personns = {} for fileIn in filesName: id_tournois = fileIn.split(".")[0] id_tournois = id_tournois.split("_")[1] titre = [] fichierIn = open(directory + fileIn, mode="r", encoding="utf-8") soup = BeautifulSoup(fichierIn, 'html.parser') Titrelist = soup.table.find(name="tr", class_="papi_liste_t") person = {} if Titrelist is None: continue for subBalise in Titrelist: if (subBalise != "\n"): for element in subBalise.contents: titre.append(element) dataPersonne = soup.table.findAll(name="tr", class_="papi_liste_f") i = 0 for tr in dataPersonne: j = 0 for element in tr.contents: if (element != "\n"): person[titre[j]] = element.text j += 1 # print(person) personns[i] = person.copy() person.clear() i += 1 dataPersonne = soup.table.findAll(name="tr", class_="papi_liste_c") for tr in dataPersonne: j = 0 for td in tr.contents: if (td != "\n"): person[titre[j]] = td.text j += 1 # print(person) personns[i] = person.copy() person.clear() i += 1 participants[id_tournois] = personns.copy() personns.clear() includes.saveInJSON(participants, "participants")
def parseStats(): files = os.listdir(includes.getDir(includes.stat)) if len(files) == 0: print( "Il n'y a pas de fichiers dans le dossier 'webContent/stats'.\nVeuillez le peupler avec le script 'extractFromWeb'" ) return stats = {} for filename in files: file = open(includes.getDir(includes.stat) + filename, mode="r", encoding="utf-8", errors="replace") id = getId(filename) soup = BeautifulSoup(file, 'html.parser') table = soup.find("table", {"id": "TablePage"}).find("table") if table == None: continue flag_two_tr = False current_stat = {} for tr in table.find_all("tr", {"class": "papi_liste_t"}): category = getCategory(tr.find("td").text) voisin = tr.find_next_sibling() if flag_two_tr: flag_two_tr = False continue if isACategory(voisin): flag_two_tr = True voisin = voisin.find_next_sibling() current_stat[category] = {} while isinstance(voisin, Tag) and not isACategory(voisin): td_sous_cat = voisin.find("td", {"class": "papi_liste_c"}) if td_sous_cat == None: break td_value = td_sous_cat.find_next_sibling() sous_cat = td_sous_cat.text.split(" :")[0] value = td_value.text.split(" ")[0] current_stat[category][sous_cat] = value voisin = voisin.find_next_sibling() stats[id] = current_stat includes.saveInJSON(stats, "stats")
def programme(annee, moisDeb=1, moisFin=12): TournoisDelAnnee = getTournoisAn(annee) tournoisDelAnneePeriode = getTournoisMois(TournoisDelAnnee, moisDeb, moisFin) classement_json = json.dumps(classement(tournoisDelAnneePeriode)) includes.saveInJSON(classement_json, "classement")