-
Notifications
You must be signed in to change notification settings - Fork 3
/
get_athlete_info.py
121 lines (96 loc) · 3.08 KB
/
get_athlete_info.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import cookielib
import time
import urllib
import urllib2
from BeautifulSoup import BeautifulSoup
from credentials import *
TIME_BT_REQUESTS = 0.5
# authentication code by: https://github.com/loisaidasam/stravalib
def log_in():
print "Logging in..."
cj = cookielib.CookieJar()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))
f = opener.open('https://www.strava.com/login')
soup = BeautifulSoup(f.read())
time.sleep(TIME_BT_REQUESTS)
utf8 = soup.findAll('input', {'name': 'utf8'})[0].get('value').encode('utf-8')
token = soup.findAll('input', {'name': 'authenticity_token'})[0].get('value')
values = {
'utf8': utf8,
'authenticity_token': token,
'email': EMAIL,
'password': PASSWORD,
}
data = urllib.urlencode(values)
url = 'https://www.strava.com/session'
response = opener.open(url, data)
soup = BeautifulSoup(response.read())
time.sleep(TIME_BT_REQUESTS)
return opener
def get_athlete_info(opener, athlete_id):
# without these headers, the request doesn't return anything
opener.addheaders = [
('X-Requested-With', 'XMLHttpRequest'),
('Accept',
('text/javascript, application/javascript, application/ecmascript,'
' application/x-ecmascript')
),
]
url = 'http://www.strava.com/athletes/%s/profile_sidebar_comparison' \
% athlete_id
try:
response = opener.open(url)
except Exception, e:
print '%s - %s' % (e, athlete_id)
return {'error': 'fail5'}
if response.getcode() != 200:
raise Exception('Athlete info: %s - %s' % \
(response.getcode(), response.msg))
soup = BeautifulSoup(response.read())
time.sleep(TIME_BT_REQUESTS)
# find the data we're interested in
run_info = list(soup.childGenerator())[4]
# first get the 10k record
try:
records = list(run_info.childGenerator())[7]
records = list(records.childGenerator())[3]
except:
return {'error': 'fail1'}
ten_k = None
for item in records.childGenerator():
if '10k' in str(item):
record = list(item.childGenerator())[3]
record = record.text
# time is in the format 1:02:23 or 45:54
# convert it to seconds
splitted = record.split(':')
try:
seconds = int(splitted[-1]) + int(splitted[-2]) * 60
except:
return {'error': 'fail4'}
if len(splitted) == 3:
seconds += int(splitted[0]) * 3600
ten_k = seconds
break
if not ten_k:
return {'error': 'fail2'}
# then see if the user was active all year (don't want
# to include people that just started using strava, since they may
# have training outsite of strava that would mess the results)
this_year = list(run_info.childGenerator())[9]
this_year = list(this_year.childGenerator())[3]
this_year = list(this_year.childGenerator())[1]
this_year = list(this_year.childGenerator())[3].text
all_time = list(run_info.childGenerator())[11]
all_time = list(all_time.childGenerator())[3]
all_time = list(all_time.childGenerator())[1]
all_time = list(all_time.childGenerator())[3].text
if this_year == all_time:
return {'error': 'fail3'}
if 'km' not in this_year:
raise Exception('weird')
this_year = float(this_year[:-2].replace(',',''))
return {
'10k': ten_k,
'year': this_year
}