Ejemplo n.º 1
0
import fr

if __name__ == "__main__":
    debug = True
    opener = fr.getOpener()

    #collect all the countries using the module_choixcompet
    countries = []
    for i in range(0,10):
        for q in range(0,57, 7):
            url ="http://www.footballdatabase.eu/module_choixcompet.php?cont="+str(i)+"&paysaff="+str(q)

            if debug:
                print(url)
                
            s = fr.read(url)
            a = s.find_all("iframe")
            countries += [ab['src'] for ab in a if ab['src']!= ""]
            
    #for each country, collect all the competitions
    competitions = []
    for c in countries:
        for r in range(0,42,7):
            #theyve got french chars
            c = unidecode(c)
            url = "http://www.footballdatabase.eu/"+c+"&compaff="+str(r)
            url = url.replace(" ", "%20")

            if debug:
                print(url)
Ejemplo n.º 2
0
def getGamesForRound(s):
    global gameId
    global missedRounds
    comp, country, year, rnd = s.split(".")[2:6]
    rnd2 = "" if rnd is None else rnd
    soup = fr.read("http://www.footballdatabase.eu/"+s)

    gamesThisRound = 0
    s = s.encode('ascii','ignore')
    sSp = s.split(".")
    gameTable = soup.find("table", class_="fondsoustitrembleu488")
    if gameTable is not None:
        rows = gameTable.find_all("tr")
        badYears = [str(y) for y in range(1860,1900)]
        for i, g in enumerate(rows):

            #while looping through rows keep track of current date
            dateFind = g.find("td", class_="styledatebleu")
            if dateFind != None:
                dfTxt = dateFind.text
                if " ovember" in dfTxt:
                    dfTxt = dfTxt.replace(" ovember", "November")
                if "In " in dfTxt:
                    #sometimes date just says "in june 2015", just make it first of month/year

                    if len(dfTxt.strip("In "))==4:
                        #just a year "in 1995"
                        dfTxt = dfTxt.strip("In ")
                        d = datetime.strptime(dfTxt, "%Y")
                    else:
                        dfTxt = "01 "+dfTxt.strip("In").strip()
                        d = datetime.strptime(dfTxt, "%d %B %Y")
                elif any(x in dfTxt for x in badYears):
                    d = datetime(1900,01,01)
                else:  
                    d = datetime.strptime(dfTxt, "%A %d %B %Y")
            else:
                #this isnt a daterow, its a game
                home = away = ref = homeScore = awayScore = season = compType = url = ""
                ot = pks = neutral = False
                homePks = awayPks = det = tds2 = None
                mid=0

                tds = g.find_all("td")
                if len(tds) > 10:
                    refT = tds[0]; homeT = tds[5]; homeS = tds[6]; awayS = tds[7]; awayT = tds[8];
                    home = homeT.a['href'].split(".")[2]
                    homeScore = int(homeS.getText())
                    awayScore = int(awayS.getText())
                    away = awayT.a['href'].split(".")[2]
                    if refT.a["href"] is not "":
                        ref = refT.a["href"].strip("football.arbitres.").strip(".en.html")

                    #check the next row, if its another stlemneutre there might be game details (pks or et)
                    if i < len(rows)-1:
                        nextGame = rows[i+1]
                        det = nextGame.find("span", class_="detailsr")
                        if det is not None:
                            det = det.getText()
                            if "on penalties" in det:
                                ot = True
                                pks = True
                                tds2 = nextGame.find_all("td")
                                homePks = int(tds2[2].getText())
                                awayPks = int(tds2[3].getText())
                            elif "After Extra Time" in det:
                                ot = True
                    
                    #TODO this check could be much more thourough
                    if True:
                        if rnd.startswith('fina'):
                            neutral = True
                        else:
                            neutral =  False
                    else:
                        neutral = False
                    
                    #get url and footballdatabase mid
                    url = homeS["onclick"].strip("window.location=")
                    mid = int(homeS["onclick"].strip("window.location=").split(".")[-3])
                    row = [mid, d.strftime("%d/%m/%Y"), ref, home, homeScore, awayScore, away, comp, country, rnd,  ot, pks, homePks, awayPks, 0, 0, neutral, url]
                    games.loc[gameId] = row
                    gameId += 1
                    gamesThisRound += 1
    if gamesThisRound == 0:
        missedRounds += [s]
    if debug:
        print("number of games ", gamesThisRound)
    return games