コード例 #1
0
from functions import html_parse_tree, xpath_parse, regex_strip_array, array2csv

weeks_url = "http://www.atpworldtour.com/en/rankings/singles"
weeks_tree = html_parse_tree(weeks_url)
weeks_xpath = "//ul[@data-value = 'rankDate']/li/@data-value"
weeks_parsed = xpath_parse(weeks_tree, weeks_xpath)
weeks_cleaned = regex_strip_array(weeks_parsed)
weeks_list = [[week] for week in weeks_cleaned]
# Output to CSV
filename = 'weeks'
array2csv(weeks_list, filename)
コード例 #2
0
print("")
print("Index    Week")
print("-----    ----")

#for h in xrange(index, 1):
#for h in xrange(index, len(weeks_list)):
for h in range(start_index, end_index + 1):
    week = weeks_list[h][0]
    week_url = "http://www.atpworldtour.com/en/rankings/singles?rankDate=" + week + "&rankRange=1-3000"

    week_tree = html_parse_tree(week_url)

    player_count_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='rank-cell']/text()"
    player_count_parsed = xpath_parse(week_tree, player_count_xpath)
    player_count_cleaned = regex_strip_array(player_count_parsed)
    player_count = len(player_count_cleaned)

    rank_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='rank-cell']/text()"
    rank_parsed = xpath_parse(week_tree, rank_xpath)
    rank_cleaned = regex_strip_array(rank_parsed)

    player_name_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='player-cell']/a/text()"
    player_name_parsed = xpath_parse(week_tree, player_name_xpath)
    player_name_cleaned = regex_strip_array(player_name_parsed)

    country_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='country-cell']/div/div/img/@alt"
    country_parsed = xpath_parse(week_tree, country_xpath)
    country_cleaned = regex_strip_array(country_parsed)

    player_url_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='player-cell']/a/@href"
コード例 #3
0
print("-----    ----")

for h in range(start_index, end_index + 1):
    week = weeks_list[h][0]
    dateList = week.split("-")
    yearRanking = dateList[0]
    monthRanking = dateList[1]
    dayRanking = dateList[2]

    week_url = "http://www.atpworldtour.com/en/rankings/singles?rankDate=" + week + "&rankRange=1-3000"

    week_tree = html_parse_tree(week_url)

    rank_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='rank-cell']/text()"
    rank_parsed = xpath_parse(week_tree, rank_xpath)
    rank_cleaned = regex_strip_array(rank_parsed)

    player_name_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='player-cell']/a/@data-ga-label"
    player_name_parsed = xpath_parse(week_tree, player_name_xpath)
    player_name_cleaned = regex_strip_array(player_name_parsed)

    move_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='move-cell']/div[@class='move-text']/text()"
    move_parsed = xpath_parse(week_tree, move_xpath)
    move_cleaned = regex_strip_array(move_parsed)

    age_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='age-cell']/text()"
    age_parsed = xpath_parse(week_tree, age_xpath)
    age_cleaned = regex_strip_array(age_parsed)

    points_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='points-cell']/a/text()"
    points_parsed = xpath_parse(week_tree, points_xpath)
コード例 #4
0
    new_row = row1
    new_row.append('profile_picture')
    new_rows.append(new_row)
    for row in readCSV:
        print(row[5])

        new_row = row
        profile_url = "http://www.atptour.com" + row[11]
        profile_tree = html_parse_tree(profile_url)

        player_thumbnaul_xpath = "//meta[@name='thumbnail']/@content"
        player_thumbnail_parsed = xpath_parse(profile_tree,
                                              player_thumbnaul_xpath)
        if not player_thumbnail_parsed:
            player_thumbnail_parsed = ['']
        player_thumbnail_cleaned = regex_strip_array(player_thumbnail_parsed)
        new_row.append(player_thumbnail_cleaned[0])
        new_rows.append(new_row)

        # Get profile data
        player_id = row[15]

        player_slug = row[12]

        first_name_xpath = "//div[@id='playerProfileHero']/div[@class='player-profile-hero-overflow']/div/div/div/div[@class='first-name']/text()"
        first_name_parsed = xpath_parse(profile_tree, first_name_xpath)
        first_name_cleaned = regex_strip_array(first_name_parsed)

        last_name_xpath = "//div[@id='playerProfileHero']/div[@class='player-profile-hero-overflow']/div/div/div/div[@class='last-name']/text()"
        last_name_parsed = xpath_parse(profile_tree, last_name_xpath)
        last_name_cleaned = regex_strip_array(last_name_parsed)
コード例 #5
0
print ""
print "Index    Week"
print "-----    ----"

#for h in xrange(index, 1):
#for h in xrange(index, len(weeks_list)):
for h in xrange(start_index, end_index + 1):
    week = weeks_list[h][0]
    week_url = "http://www.atpworldtour.com/en/rankings/singles?rankDate=" + week + "&rankRange=1-3000"

    week_tree = html_parse_tree(week_url)

    player_count_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='rank-cell']/text()"
    player_count_parsed = xpath_parse(week_tree, player_count_xpath)
    player_count_cleaned = regex_strip_array(player_count_parsed)
    player_count = len(player_count_cleaned)

    rank_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='rank-cell']/text()"
    rank_parsed = xpath_parse(week_tree, rank_xpath)
    rank_cleaned = regex_strip_array(rank_parsed)   

    player_url_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='player-cell']/a/@href"
    player_url_parsed = xpath_parse(week_tree, player_url_xpath)
    player_url_cleaned = regex_strip_array(player_url_parsed)

    move_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='move-cell']/div[@class='move-text']/text()"
    move_parsed = xpath_parse(week_tree, move_xpath)
    move_cleaned = regex_strip_array(move_parsed)

    age_xpath = "//table[@class='mega-table']/tbody/tr/td[@class='age-cell']/text()"
コード例 #6
0
from functions import html_parse_tree, xpath_parse, regex_strip_array

weeks_url = "http://www.atpworldtour.com/en/rankings/singles"
weeks_tree = html_parse_tree(weeks_url)
weeks_xpath = "//ul[@data-value = 'rankDate']/li/@data-value"
weeks_parsed = xpath_parse(weeks_tree, weeks_xpath)
weeks_cleaned = regex_strip_array(weeks_parsed)
for row in weeks_cleaned:
    print row