Example #1
0
def parse_infos_course(urlsource):

  req = urllib2.Request(urlsource)
  html = urllib2.urlopen(req)
  pronos_page = BeautifulSoup(html,"lxml")

  boite_entete = pronos_page.find_all("div","boite4rond")[0]

  num_course = urlsource[urlsource.rfind("/")+1:]
  num_course = num_course[:num_course.find("_")]

  nom_course = boite_entete.find_all("h1")[0].contents[0]
  nom_course = sanitize.s(nom_course[:nom_course.find(' - ')])

  reunion_course = boite_entete.find_all("h3")[0].contents[0]
  reunion_course = sanitize.s(reunion_course[reunion_course.rfind('union')+6:])

  lieu_course = boite_entete.find_all("h3")[0].contents[0]
  lieu_course = lieu_course[:lieu_course.find(' | ')]
  lieu_course = sanitize.s(lieu_course[lieu_course.find(' - ')+3:])

  meteo_course = boite_entete.find_all("div","boite4rond")[0].find_all("img")
  if not meteo_course:
	meteo_course = ''
  else:
	meteo_course = meteo_course[0].get("src")
  	meteo_course = meteo_course[meteo_course.rfind('/')+1:]
  	meteo_course = meteo_dico[meteo_course]

  temp_course = str(boite_entete.find("h4"))
  if temp_course == 'None':
	temp_course = ''
  else :
  	temp_course = temp_course[temp_course.find(">")+1:temp_course.find("\xc2")] + ' - ' + temp_course[temp_course.rfind("/>")+2:temp_course.rfind("\xc2")]

  type_course = boite_entete.find_all("p")[0].contents[0]
  type_course = type_course[:type_course.find(' - ')]

  distance_course = boite_entete.find_all("p")[0].contents[0]
  distance_course = distance_course[:distance_course.find('m - ')]
  distance_course = distance_course[distance_course.find(' - ')+3:]

  prix_course = boite_entete.find_all("p")[0].contents[0]
  prix_course = prix_course[prix_course.rfind(' - ')+3:]
  prix_course = prix_course[:prix_course.find('&')]

  date_course = urlsource[urlsource.find("prono")+15:urlsource.find("prono")+25]

  heure_course = boite_entete.find_all("h3")[0].contents[0]
  heure_course = heure_course[:heure_course.find(' - ')]

  return [num_course,nom_course, reunion_course,lieu_course, meteo_course, temp_course,type_course, distance_course,prix_course, date_course,heure_course]
Example #2
0
def find_nom_cheval(urlsource):

  req = urllib2.Request(urlsource)
  html = urllib2.urlopen(req)
  page = BeautifulSoup(html,"lxml")
  
  try:
   nom_cheval=page.find("div","ficheinfo").find("h1").contents[0].encode("ascii","ignore")
  except: 
   nom_cheval = ''

  return sanitize.s(nom_cheval)
Example #3
0
def find_nom_cheval(urlsource):

    req = urllib2.Request(urlsource)
    html = urllib2.urlopen(req)
    page = BeautifulSoup(html, "lxml")

    try:
        nom_cheval = page.find("div",
                               "ficheinfo").find("h1").contents[0].encode(
                                   "ascii", "ignore")
    except:
        nom_cheval = ''

    return sanitize.s(nom_cheval)
def parse_infos_course(urlsource):

    req = urllib2.Request(urlsource)
    html = urllib2.urlopen(req)
    pronos_page = BeautifulSoup(html, "lxml")

    boite_entete = pronos_page.find_all("div", "boite4rond")[0]

    num_course = urlsource[urlsource.rfind("/") + 1:]
    num_course = num_course[:num_course.find("_")]

    nom_course = boite_entete.find_all("h1")[0].contents[0]
    nom_course = sanitize.s(nom_course[:nom_course.find(' - ')])

    reunion_course = boite_entete.find_all("h3")[0].contents[0]
    reunion_course = sanitize.s(reunion_course[reunion_course.rfind('union') +
                                               6:])

    lieu_course = boite_entete.find_all("h3")[0].contents[0]
    lieu_course = lieu_course[:lieu_course.find(' | ')]
    lieu_course = sanitize.s(lieu_course[lieu_course.find(' - ') + 3:])

    meteo_course = boite_entete.find_all("div",
                                         "boite4rond")[0].find_all("img")
    if not meteo_course:
        meteo_course = ''
    else:
        meteo_course = meteo_course[0].get("src")
        meteo_course = meteo_course[meteo_course.rfind('/') + 1:]
        meteo_course = meteo_dico[meteo_course]

    temp_course = str(boite_entete.find("h4"))
    if temp_course == 'None':
        temp_course = ''
    else:
        temp_course = temp_course[temp_course.find(">") + 1:temp_course.find(
            "\xc2")] + ' - ' + temp_course[temp_course.rfind("/>") +
                                           2:temp_course.rfind("\xc2")]

    type_course = boite_entete.find_all("p")[0].contents[0]
    type_course = type_course[:type_course.find(' - ')]

    distance_course = boite_entete.find_all("p")[0].contents[0]
    distance_course = distance_course[:distance_course.find('m - ')]
    distance_course = distance_course[distance_course.find(' - ') + 3:]

    prix_course = boite_entete.find_all("p")[0].contents[0]
    prix_course = prix_course[prix_course.rfind(' - ') + 3:]
    prix_course = prix_course[:prix_course.find('&')]

    date_course = urlsource[urlsource.find("prono") +
                            15:urlsource.find("prono") + 25]

    heure_course = boite_entete.find_all("h3")[0].contents[0]
    heure_course = heure_course[:heure_course.find(' - ')]

    return [
        num_course, nom_course, reunion_course, lieu_course, meteo_course,
        temp_course, type_course, distance_course, prix_course, date_course,
        heure_course
    ]
Example #5
0
def get_id_chevaux(mini,maxi):

  nb_errors = 0
  url_root = "http://www.canalturf.com/courses_fiche_cheval.php?idcheval="
  cheval_id_table = []

  for i in range(mini,maxi+1):
   urlsource = url_root+str(i)
   req = urllib2.Request(urlsource)
   html = urllib2.urlopen(req)
   page = BeautifulSoup(html,"lxml").find("div","ficheinfo")
   
   try:
     cartouche = page.find_all("div")[0]
     cart_infos = cartouche.find_all("div")[1]
     palmares = page.find("div","fiche_bloc")
     palm_infos = palmares.find("p")
   except:
     nb_errors = nb_errors +1
     continue

   num = str(i)
   
   try:
     nom = cartouche.find("h1").contents[0].encode("ascii","ignore")
     nom = sanitize.s(nom[nom.find("chev")+7:])
   except:
     nom = ''
     nb_errors = nb_errors+1

   try:
     sex = cart_infos.contents[0].encode("ascii","ignore")	
     sexe = sex[11:12]
     age = sex[12:]
   except:
     sexe = ''
     age = ''
     nb_errors = nb_errors+1

   try:
     robe = cart_infos.contents[2].encode("ascii","ignore")[7:]
   except:
     robe = ''
     nb_errors = nb_errors+1

   try:
     pere = sanitize.s(cart_infos.contents[4].encode("ascii","ignore")[6:])
   except:
     pere = ''
     nb_errors = nb_errors+1

   try:
     mere = sanitize.s(cart_infos.contents[6].encode("ascii","ignore")[6:])
   except:
     mere = ''
     nb_errors = nb_errors+1

   try:
     pere_mere = sanitize.s(cart_infos.contents[8].encode("ascii","ignore")[13:])
   except:
     pere_mere = ''
     nb_errors = nb_errors+1

   try:
     proprio = sanitize.s(cart_infos.contents[11].encode("ascii","ignore")[14:])
   except:
     proprio = ''
     nb_errors = nb_errors+1

   try:
     entraineur = sanitize.s(cart_infos.contents[13].encode("ascii","ignore")[13:])
   except:
     entraineur = ''
     nb_errors = nb_errors+1

   try:
     eleveur = sanitize.s(cart_infos.contents[15].encode("ascii","ignore")[10:])
   except:
     eleveur = ''
     nb_errors = nb_errors+1

   try:
     gain = palm_infos.contents[0].encode("ascii","ignore")[7:]
   except:
     gain = ''
     nb_errors = nb_errors+1

   try:
     perfs = palm_infos.contents[2].encode("ascii","ignore")[8:]
   except:
     perfs = ''
     nb_errors = nb_errors+1

   try:
     courus = palm_infos.contents[4].encode("ascii","ignore")[12:]
   except:
     courus = ''
     nb_errors = nb_errors+1

   try:
     victoires = palm_infos.contents[6].encode("ascii","ignore")[14:]
   except:
     victoires = ''
     nb_errors = nb_errors+1

   try:
     places = palm_infos.contents[8].encode("ascii","ignore")[10:]
   except:
     places = ''
     nb_errors = nb_errors+1

   cheval_id = [num,nom,sexe,age,robe,pere,mere,pere_mere,proprio,entraineur,eleveur,gain,perfs,\
 	       courus,victoires,places]

   cheval_id_table.append(cheval_id)

  
  return (cheval_id_table,nb_errors)
Example #6
0
def get_id_chevaux(mini, maxi):

    nb_errors = 0
    url_root = "http://www.canalturf.com/courses_fiche_cheval.php?idcheval="
    cheval_id_table = []

    for i in range(mini, maxi + 1):
        urlsource = url_root + str(i)
        req = urllib2.Request(urlsource)
        html = urllib2.urlopen(req)
        page = BeautifulSoup(html, "lxml").find("div", "ficheinfo")

        try:
            cartouche = page.find_all("div")[0]
            cart_infos = cartouche.find_all("div")[1]
            palmares = page.find("div", "fiche_bloc")
            palm_infos = palmares.find("p")
        except:
            nb_errors = nb_errors + 1
            continue

        num = str(i)

        try:
            nom = cartouche.find("h1").contents[0].encode("ascii", "ignore")
            nom = sanitize.s(nom[nom.find("chev") + 7:])
        except:
            nom = ''
            nb_errors = nb_errors + 1

        try:
            sex = cart_infos.contents[0].encode("ascii", "ignore")
            sexe = sex[11:12]
            age = sex[12:]
        except:
            sexe = ''
            age = ''
            nb_errors = nb_errors + 1

        try:
            robe = cart_infos.contents[2].encode("ascii", "ignore")[7:]
        except:
            robe = ''
            nb_errors = nb_errors + 1

        try:
            pere = sanitize.s(cart_infos.contents[4].encode("ascii",
                                                            "ignore")[6:])
        except:
            pere = ''
            nb_errors = nb_errors + 1

        try:
            mere = sanitize.s(cart_infos.contents[6].encode("ascii",
                                                            "ignore")[6:])
        except:
            mere = ''
            nb_errors = nb_errors + 1

        try:
            pere_mere = sanitize.s(cart_infos.contents[8].encode(
                "ascii", "ignore")[13:])
        except:
            pere_mere = ''
            nb_errors = nb_errors + 1

        try:
            proprio = sanitize.s(cart_infos.contents[11].encode(
                "ascii", "ignore")[14:])
        except:
            proprio = ''
            nb_errors = nb_errors + 1

        try:
            entraineur = sanitize.s(cart_infos.contents[13].encode(
                "ascii", "ignore")[13:])
        except:
            entraineur = ''
            nb_errors = nb_errors + 1

        try:
            eleveur = sanitize.s(cart_infos.contents[15].encode(
                "ascii", "ignore")[10:])
        except:
            eleveur = ''
            nb_errors = nb_errors + 1

        try:
            gain = palm_infos.contents[0].encode("ascii", "ignore")[7:]
        except:
            gain = ''
            nb_errors = nb_errors + 1

        try:
            perfs = palm_infos.contents[2].encode("ascii", "ignore")[8:]
        except:
            perfs = ''
            nb_errors = nb_errors + 1

        try:
            courus = palm_infos.contents[4].encode("ascii", "ignore")[12:]
        except:
            courus = ''
            nb_errors = nb_errors + 1

        try:
            victoires = palm_infos.contents[6].encode("ascii", "ignore")[14:]
        except:
            victoires = ''
            nb_errors = nb_errors + 1

        try:
            places = palm_infos.contents[8].encode("ascii", "ignore")[10:]
        except:
            places = ''
            nb_errors = nb_errors + 1

        cheval_id = [num,nom,sexe,age,robe,pere,mere,pere_mere,proprio,entraineur,eleveur,gain,perfs,\
              courus,victoires,places]

        cheval_id_table.append(cheval_id)

    return (cheval_id_table, nb_errors)
Example #7
0
def parse_infos_chevaux(urlsource):

    nb_errors = 0
    req = urllib2.Request(urlsource)
    html = urllib2.urlopen(req)
    pronos_page = BeautifulSoup(html, "lxml")

    table_chevaux = pronos_page.find(
        "table", "course").find_all("tbody")[1].find_all("tr")

    label = pronos_page.find("table",
                             "course").find_all("tbody")[0].find_all("td")
    for i in range(0, len(label)):
        label[i] = str(label[i].contents)

    result_cheval = []

    for cheval in table_chevaux:

        sstable = cheval.find_all("td")

        num_course = urlsource[urlsource.rfind("/") + 1:]
        num_course = num_course[:num_course.find("_")]

        if sstable[0].string == "NP":
            result_cheval.append([num_course,sstable[1].string,\
            '',"NP",'','','','','','','','','','','',''])
            continue

        nom_cheval = sanitize.s(
            cheval.find("strong").contents[0].encode("ascii", "ignore"))

        id_cheval = cheval.find("a", "fiche").get("href")
        id_cheval = id_cheval[id_cheval.find("idcheval") + 9:]
        id_cheval = id_cheval[:id_cheval.find("&")]

        num_cheval = sstable[0].string.encode("ascii", "ignore")

        ind = find_index(label, "Def")
        try:
            def_cheval = sstable[ind].contents[0].encode("ascii", "ignore")
        except:
            nb_errors = nb_errors + 1
            def_cheval = ''

        ind = find_index(label, "Ec")
        try:
            ecurie_cheval = sstable[ind].contents[0].encode("ascii", "ignore")
        except:
            nb_errors = nb_errors + 1
            ecurie_cheval = ''

        ind = find_index(label, "Corde")
        try:
            corde_cheval = sstable[ind].contents[0].encode("ascii", "ignore")
        except:
            nb_errors = nb_errors + 1
            corde_cheval = ''

        ind = find_index(label, "Oeil")
        try:
            oeil_cheval = sstable[ind].contents[0].encode("ascii", "ignore")
        except:
            nb_errors = nb_errors + 1
            oeil_cheval = ''

        ind = find_index(label, "Entrai")
        try:
            jockey_cheval = sanitize.s(
                sstable[ind].find_all("a")[0].contents[1].encode(
                    "ascii", "ignore"))
        except:
            nb_errors = nb_errors + 1
            jockey_cheval = ''

        ind = find_index(label, "Poids")
        try:
            poids_cheval = sstable[ind].contents[0].encode("ascii", "ignore")
        except:
            nb_errors = nb_errors + 1
            poids_cheval = ''

        ind = find_index(label, "Dist")
        try:
            dist_cheval = sstable[ind].contents[0].encode("ascii", "ignore")
        except:
            nb_errors = nb_errors + 1
            dist_cheval = ''

        ind = find_index(label, "10h")
        try:
            cote10h_cheval = sstable[ind].contents[0].string.encode(
                "ascii", "ignore")
        except:
            nb_errors = nb_errors + 1
            cote10h_cheval = ''
        try:
            cotepmu_cheval = sstable[ind + 1].find(
                "strong").contents[0].string.encode("ascii", "ignore")
        except:
            nb_errors = nb_errors + 1
            cotepmu_cheval = ''
        try:
            variation = sstable[ind + 2].contents[0].string.encode(
                "ascii", "ignore")
        except:
            nb_errors = nb_errors + 1
            variation = ''
        try:
            cotezeturf_cheval = sstable[ind + 3].find(
                "strong").contents[0].string.encode("ascii", "ignore")
        except:
            nb_errors = nb_errors + 1
            cotezeturf_cheval = ''
        try:
            cotebetclic_cheval = sstable[ind + 4].find(
                "strong").contents[0].string.encode("ascii", "ignore")
        except:
            nb_errors = nb_errors + 1
            cotebetclic_cheval = ''


        cheval_infos = [num_course, nom_cheval, id_cheval, num_cheval,\
                def_cheval,ecurie_cheval, corde_cheval,oeil_cheval,jockey_cheval,\
                       poids_cheval,dist_cheval,cote10h_cheval,cotepmu_cheval,variation,\
                       cotezeturf_cheval,cotebetclic_cheval]

        result_cheval.append(cheval_infos)

    return (result_cheval, nb_errors)
Example #8
0
def parse_infos_chevaux(urlsource):

  nb_errors = 0
  req = urllib2.Request(urlsource)
  html = urllib2.urlopen(req)
  pronos_page = BeautifulSoup(html,"lxml")

  table_chevaux = pronos_page.find("table", "course").find_all("tbody")[1].find_all("tr")
  
  label = pronos_page.find("table", "course").find_all("tbody")[0].find_all("td")
  for i in range(0,len(label)):
	label[i] = str(label[i].contents)

  result_cheval = []
  

  for cheval in table_chevaux:
   
     sstable = cheval.find_all("td")

     num_course = urlsource[urlsource.rfind("/")+1:]
     num_course = num_course[:num_course.find("_")]

     if sstable[0].string == "NP":
	result_cheval.append([num_course,sstable[1].string,\
	'',"NP",'','','','','','','','','','','',''])
	continue

     nom_cheval = sanitize.s(cheval.find("strong").contents[0].encode("ascii","ignore"))

     id_cheval = cheval.find("a","fiche").get("href")
     id_cheval = id_cheval[id_cheval.find("idcheval")+9:]
     id_cheval = id_cheval[:id_cheval.find("&")]     

     num_cheval = sstable[0].string.encode("ascii","ignore")
      
     ind = find_index(label,"Def")
     try:
	def_cheval = sstable[ind].contents[0].encode("ascii","ignore")
     except:
	nb_errors = nb_errors+1
	def_cheval = ''

     ind = find_index(label,"Ec")
     try:
	ecurie_cheval = sstable[ind].contents[0].encode("ascii","ignore")
     except:
	nb_errors = nb_errors+1
	ecurie_cheval = ''

     ind = find_index(label,"Corde")
     try:
	corde_cheval = sstable[ind].contents[0].encode("ascii","ignore")
     except:
	nb_errors = nb_errors+1
	corde_cheval = ''

     ind = find_index(label,"Oeil")
     try:
	oeil_cheval = sstable[ind].contents[0].encode("ascii","ignore")
     except:
	nb_errors = nb_errors+1
	oeil_cheval = ''

     ind = find_index(label,"Entrai")
     try:
    	jockey_cheval = sanitize.s(sstable[ind].find_all("a")[0].contents[1].encode("ascii","ignore"))
     except:
	nb_errors = nb_errors+1
	jockey_cheval = ''
    
     ind = find_index(label,"Poids")
     try:
	poids_cheval = sstable[ind].contents[0].encode("ascii","ignore")
     except:
	nb_errors = nb_errors+1
	poids_cheval = ''

     ind = find_index(label,"Dist")
     try:
	dist_cheval = sstable[ind].contents[0].encode("ascii","ignore")
     except:
	nb_errors = nb_errors+1
	dist_cheval = ''

     ind = find_index(label,"10h")
     try:
	cote10h_cheval = sstable[ind].contents[0].string.encode("ascii","ignore")
     except:
	nb_errors = nb_errors+1
	cote10h_cheval = ''
     try:
     	cotepmu_cheval = sstable[ind+1].find("strong").contents[0].string.encode("ascii","ignore")
     except:
	nb_errors = nb_errors+1
	cotepmu_cheval = ''
     try:
	variation = sstable[ind+2].contents[0].string.encode("ascii","ignore")
     except:
	nb_errors = nb_errors+1
	variation = ''
     try:
    	cotezeturf_cheval = sstable[ind+3].find("strong").contents[0].string.encode("ascii","ignore")
     except:
	nb_errors = nb_errors+1
	cotezeturf_cheval = ''
     try:
      	cotebetclic_cheval = sstable[ind+4].find("strong").contents[0].string.encode("ascii","ignore")
     except:
	nb_errors = nb_errors+1
	cotebetclic_cheval = ''


     cheval_infos = [num_course, nom_cheval, id_cheval, num_cheval,\
	            def_cheval,ecurie_cheval, corde_cheval,oeil_cheval,jockey_cheval,\
                    poids_cheval,dist_cheval,cote10h_cheval,cotepmu_cheval,variation,\
                    cotezeturf_cheval,cotebetclic_cheval]


     result_cheval.append(cheval_infos)

  
  return (result_cheval,nb_errors)