def FetchGPSData(tokensFile,CPCdate,CPClen): client = Client() ###To get the saved access tokens below, I did the following: ##1. Run the following lines: #authorize_url = client.authorization_url(client_id=22380, redirect_uri='http://sustainability.leeds.ac.uk',approval_prompt='force') #print(authorize_url) ##2. Paste the above url into a browser, accept the request, ## and copy the 'code' from the resulting url into the following line, ## along with the client_secret which can be found under air pollution9 account on strava: #access_token = client.exchange_code_for_token(client_id=22380, client_secret='***', # code='***') ##3. Extract token from the above variable: #print(access_token) ###Saved access tokens: f=open(tokensFile,'r') myTokens=f.read().splitlines() f.close() #Find activity which most closely matches CPC start date/time and sample length #All activities within 5 mins of the CPC start date are considered #The activity with the closest-matching elapsed time to the CPC sample length is then chosen validActs={} for i,token in enumerate(myTokens): client.access_token = token #athlete = client.get_athlete() #print(athlete.firstname,athlete.lastname+':') myActivities=client.get_activities() for activity in myActivities: startDate=activity.start_date_local #print(' '+activity.name+':',startDate,'Local time') if abs((CPCdate-startDate).total_seconds()) < 60: validActs.update({i:activity.id}) assert len(validActs) > 0, "No GPS activities with a start time within 5 minutes of the CPC data file start time" DeltaT=1e10 for key,value in validActs.items(): client.access_token=myTokens[key] activity=client.get_activity(value) elap=activity.elapsed_time.seconds thisDT=abs(CPClen-elap) if thisDT < DeltaT: DeltaT=thisDT chosenAth=key chosenAct=value #Extract required data from chosen activity: client.access_token=myTokens[chosenAth] activity=client.get_activity(chosenAct) startDate=activity.start_date_local endDate=startDate+dt.timedelta(seconds=activity.elapsed_time.seconds) endDateCPC=CPCdate+dt.timedelta(seconds=CPClen) assert abs((endDateCPC-endDate).total_seconds()) < 60, "No valid GPS activities with an end time within 1 minute of the CPC data file end time" myTypes = ['time', 'latlng'] myStream = client.get_activity_streams(chosenAct,types=myTypes) latlon=myStream['latlng'].data lat=[latlon[i][0] for i in range(len(latlon))] lon=[latlon[i][1] for i in range(len(latlon))] time=myStream['time'].data dateTime=[startDate+dt.timedelta(seconds=i) for i in time] GPSData=pd.DataFrame(data={'lon':lon,'lat':lat,'dateTime':dateTime}) return GPSData
class StravaData: RUNNER_ID = None ACCESS_TOKEN = None client = None def __init__(self, runner_id, access_token): self.RUNNER_ID = runner_id self.ACCESS_TOKEN = access_token self.client = Client() self.client.access_token = access_token def getAthlete(self): athlete = self.client.get_athlete() return athlete def getActivities(self): activities = self.client.get_activities() return activities def getActivity(self, id): activity = self.client.get_activity(id) return activity def getActivityStreams(self, id, types): streams = self.client.get_activity_streams(id, types=types, resolution='medium') return streams
def rides(request): if (not request.user.is_authenticated): return JsonResponse({"user_auth": False}) client = Client() user = AuthUser.objects.get(user_id=request.user.id) client.access_token = user.auth_code _before = datetime.now() _after = datetime(2018, 4, 1) batched_activities = client.get_activities(before=_before, after=_after) list_activities = list(batched_activities) rtn_activity_list = [] for a in list_activities: detailed_activity = client.get_activity(a.id) _new_activity = JsonActivity(detailed_activity.id, detailed_activity.map.polyline, a.distance, a.start_date) rtn_activity_list.append(_new_activity.toJson()) rtn = { "user_auth": True, "activities": rtn_activity_list } return JsonResponse(rtn)
def get_polyline(self): if polyline==None: strava_code = UserInfo.objects.filter(athlete_id = self.athlete_id) client = Client(access_token = strava_code) activity = client.get_activity(self.guid) self.polyline = activity.map.polyline return self.polyline else: return self.polyline
def index(request): user = None if (request.user.is_authenticated): user = AuthUser.objects.get(user_id=request.user.id) client = Client() authorize_url = client.authorization_url(client_id=24429, redirect_uri="http://localhost:8000/main") #get code from get #Get Code from webapp response code = request.GET.get('code') if (request.GET.get('code') != None) else '' start_pos, end_pos, a_polyline = '', '', '' if (code != '' and request.user.is_authenticated): access_token = client.exchange_code_for_token(client_id=24429, client_secret=config_vars.STRAVA_CLIENT_SECRET, code=code) user_model = AuthUser.objects.get(user_id=request.user.id) # Store User Access Token in DB if (user_model is not None): user_model.auth_code = access_token user_model.save() # Set Access Token On Client if (request.user.is_authenticated and user.auth_code != ''): pprint("User Logged in and has an auth code") client.access_token = user.auth_code athlete = client.get_athlete() full_activity = client.get_activity(1486441471, True) a_polyline = full_activity.map.polyline start_pos = full_activity.start_latlng end_pos = full_activity.end_latlng return render(request, "index.html", { "auth_url": authorize_url, "start_pos": start_pos, "end_pos": end_pos, "polyline": a_polyline, "user_is_authenticated": request.user.is_authenticated })
def strava_last_activity(request): currentUser = User.objects.get(pk=request.user.id) client = Client() client.access_token = currentUser.profile.stravaAccessCode activities = client.get_activities(before=datetime.datetime.now(), limit=1) activityName = '' for activity in activities: activityName = activity.name downloadedActivity = client.get_activity(activity.id) activityStream = client.get_activity_streams(activity.id, types=["time", "watts"]) averageCadence = downloadedActivity.average_cadence return render(request, 'strava_last_activity.html', {'activity_name': averageCadence})
start = datetime.datetime(2016, 1, 1, 0, 0) end = datetime.datetime(2016, 1, 3, 0, 0) activities = client.get_activities(end, start) STRONZO = list(activities) types = ['time', 'latlng', 'altitude', 'heartrate', 'temp', 'segments', 'segment'] # get all streams and push efforts into array segment_array = [] stream_types = ['time', 'latlng', 'distance', 'altitude', 'velocity_smooth', 'heartrate', 'cadence', 'watts', 'temp', 'moving', 'grade_smooth'] iterator = 0 for entry in STRONZO: print (float(iterator) / float(len(STRONZO))) iterator += 1 activity = client.get_activity(entry.id, True) # stream = client.get_activity_streams(entry.id, stream_types) # segment_array.append(stream) if activity.segment_efforts: for efforts in activity.segment_efforts: # segment_activity.effort_stream = client.get_effort_streams(efforts.id, stream_types) # segment_activity = client.get_effort_streams(efforts.id, stream_types) # segment_activity.activity_id = activity.id # segment_activity.effort_id = efforts.id for leaderboard_entry in efforts.segment.leaderboard.entries: segment_activity = client.get_effort_streams(leaderboard_entry.effort.id, stream_types) segment_array.append({ 'id': leaderboard_entry.effort.id, 'name': leaderboard_entry.athlete_name, 'streams': segment_activity })
class Strava_scraper(object): ''' A strava scraper class. ''' def __init__(self, client_secret, access_token, strava_email, strava_password): self.client_secret = client_secret self.access_token = access_token self.strava_email = strava_email self.strava_password = strava_password self.client = None self.athlete = None self.friends = None # list of my friends, dtype = stravalib object self.activity_ids = [] # list of activity ids scraped from strava self.friend_ids = [] self.activities = [] # list of activities self.clubs = [] # list of athlete clubs self.other_athletes = [ ] # list of other athlete objects unfollowed by client def get_client(self): """ The get_client method create a client object for making requests to the strava API. The Client class accepts an access_token and a rate_limiter object. The method also populates a friends list Inputs: None Outputs: None """ self.client = Client(access_token=self.access_token, rate_limiter=DefaultRateLimiter()) self.athlete = self.client.get_athlete() # Get Gordon's full athlete print "Client setup complete!" print self.friends = list( self.client.get_athlete_friends()) # Get athlete Gordon's friends print "Authenticated user's friends list complete!" print for friend in self.friends: self.friend_ids.append(friend.id) def log_in_strava(self): """ The log_in_strava method uses a selenium webdriver to open and maintain a secure connect with Strava. It returns the driver object. Input: None Output: webdriver object """ chromeOptions = webdriver.ChromeOptions() prefs = {"profile.managed_default_content_settings.images": 2} chromeOptions.add_experimental_option("prefs", prefs) print "logging in..." print driver = webdriver.Chrome(chrome_options=chromeOptions) url = "https://www.strava.com/login" driver.get(url) user = driver.find_element_by_name('email') user.click() user.send_keys(self.strava_email) pwrd = driver.find_element_by_name('password') pwrd.click() pwrd.send_keys(self.strava_password) driver.find_element_by_id('login-button').click() sleep(10) print "complete!" return driver def _get_activity_by_id(self, act_id): try: activity = self.client.get_activity( act_id) # get id with id = act_id from strava client return activity except HTTPError: return None def get_soup(self, driver, url): ''' Helper function to get soup from a live url, as opposed to a local copy INPUT: -url: str OUTPUT: soup object ''' driver.get(url) soup = BeautifulSoup(driver.page_source, 'html.parser') return soup def _make_interval_list(self): """ This helper function makes an interval list that returns a list of numbers cooresponding with a year and week number for the given year. It only returns a static list as of now but in the future could search farther back. It only goes back to week 1, 2014. """ now = datetime.datetime.now() # current date week_num = now.date().isocalendar()[1] # current week number yr_wk = { 2014: 52, 2015: 53, 2016: 52, 2017: week_num } # num of weeks each year only going back to 2014 week_ints = [ range(k * 100 + 1, k * 100 + v + 1) for k, v in yr_wk.iteritems() ] # week ints in ugly nested lists new_week_ints = [] for row in week_ints: new_week_ints.extend( row) # creates new_week_ints which is week ints flattened return new_week_ints def _get_activities_from_page(self, soup): temp_act_id_list = [] regex = re.compile('/activities/([0-9]*)') # compile regex function for link in soup.find_all('a'): text = link.get('href') try: act_id = regex.findall( text ) # look for digits after '/activities/'. Stop upon any character not a number. only looking for 1st item found. should be unicode string. try: # only looking for integers 9 digits long temp_act_id_list.append(int( act_id[0])) # append number to small list # print act_id[0] except (IndexError, ValueError): continue except TypeError: continue return temp_act_id_list def web_scrape_activities(self, start_n=0, sleep=False, sleep_time=2): """ This function when called will scrape strava data for athlete activity id's. It will only get those of people I follow. It will store them in a list Example url: https://www.strava.com/athletes/2304253#interval?interval=201631&interval_type=week&chart_type=miles&year_offset=0 where 2304253 is athlete id 201631 is the year and week num This is whats needed to find and parse html from athlete pages and grab activity id's. Example tag: <a href="/activities/666921221">And the winning number is 400</a> ==$0 """ driver = self.log_in_strava() week_ints = self._make_interval_list() print "scraping athletes" for ath_id in self.friend_ids[ start_n:]: #starting on index 191, athlete 66299 athlete_act_id_list = [] for yearweek_int in week_ints: url = "https://www.strava.com/athletes/{}#interval?interval={}&interval_type=week&chart_type=miles&year_offset=0".format( str(ath_id), str(yearweek_int)) soup = self.get_soup(driver, url) # self.activity_ids.extend(self._get_activities_from_page(soup)) # print "added {}'s {} intervals to list".format(ath_id, yearweek_int) if sleep: sleep( np.random.exponential(1.0) * sleep_time ) # pause for amount of sleep time before completing each loop athlete_act_id_list.extend( self._get_activities_from_page(soup)) filename = "{}_act_ids.csv".format(ath_id) filepath = os.path.join('activity_files', filename) write_list_to_csv(athlete_act_id_list, filepath) self.activity_ids = set(self.activity_ids) print "All done!" def get_other_athletes(self, list_ath_ids): """ This utility function is provided to populate a list of other athletes. It requires a list of predifined athlete id's. Input: list_ath_ids as list Output: None """ print "Getting other athletes..." print for ath_id in list_ath_ids: if ath_id in self.friend_ids: continue else: athlete = self.client.get_athlete(ath_id) self.other_athletes.append(athlete) print "All done!" def load_activity_ids(self, act_id_csv_filename): """ This utility function should only be called to populate the class attribute 'activity_ids' from a csv when a new scraper has been instantiated """ with open(act_id_csv_filename) as f: reader = csv.reader(f) self.activity_ids = np.array(next(reader), dtype='int') def get_activities_main(self): """ This function when called after get client function will populate list attributes for class. This may be done when client wants all(last 200 for feeds) things associated with their athlete, friends, and clubs Input: None Output: None """ print "Getting client activities..." print self.activities.extend(list(self.client.get_activities())) # gets all print "Getting friend activities..." print self.activities.extend(list(self.client.get_friend_activities( ))) # only gets last 200 activities from users feed print "Getting athlete clubs..." print self.clubs.extend(self.client.get_athlete_clubs()) # gets all club_ids = [club.id for club in self.clubs] print "Getting club activities..." print for club in club_ids: self.activities.extend(list(self.client.get_club_activities( club))) # gets last 200 activities per club print "All done!" def get_activities_from_ids(self): requested_activity = None while len(self.activity_ids) > 0: requested_activity = self._get_activity_by_id(self.activity_ids[0]) if requested_activity: self.activities.append(requested_activity) self.activity_ids = self.activity_ids[1:] def __repr__(self): return "This is {} {}'s strava scraper class".format( self.athlete.firstname, self.athlete.lastname)
activity_ids = [396462307] #391255712 - Hertford #394259308 - Hills #396462307 - Epping Club Ride client = Client(access_token='250d33ceabfbe833376eb18885e797af14888512') athlete = client.get_athlete() # Get John's full athlete record print("Hello, {}. I know your email is {}".format(athlete.firstname, athlete.email)) # "Hello, John. I know your email is [email protected]" activity_id = activity_ids[0] # The first ID is the root which we find the others activity = client.get_activity(activity_id) print (activity) related_activities = client.get_related_activities(activity_id, limit=None) print (related_activities) for ra in related_activities: print ra activity_ids.append(ra.id) print activity_ids for activity_id in activity_ids: act = client.get_activity(activity_id) print act.name, act.type, act.athlete.firstname, act.athlete.lastname if (act.gear == None and act.calories == None):
if (act.distance > units.unit("km")(5)): # Longer than usual. Where was I going? pass elif (1594398 in segments): ret["name"] = "Morning commute" ret["commute"] = True elif (1547949 in segments): ret["name"] = "Evening commute" ret["commute"] = True if "name" in ret and not (act.name.endswith("rit") or act.name.endswith(" Ride")): # May already not be the default name anymore. del ret["name"] return ret seen = dbm.open("seen", "c") after = datetime.datetime.now() - datetime.timedelta(days=2) for act in act for act in client.get_activities(after=after, limit=5): if str(act.id) in seen: continue full = client.get_activity(act.id) print full updates = build_updates(full) print updates seen[str(act.id)] = "1" if updates: updates["activity_id"] = act.id print client.update_activity(**updates)
def read_strava(activity_id, access_token, refresh_token=None, client_id=None, client_secret=None, to_df=False, **kwargs): """ This method loads the activity data from Strava into a Pandas DataFrame or runpandas Activity. Column names are translated to runpandas terminology (e.g. "heartrate" > "heart_rate"). Datetimes indexes are replaced by time offsets. All NaN rows are removed. Attention: Two API requests are made to the Strava webservice: 1 to retrieve activity metadata, 1 to retrieve the raw data ("streams"). Parameters ---------- activity_id : str, The id of the activity access_token: str, The Strava access token refresh_token: str, The Strava refresh token, optional client_id: int, The Strava client id used for token refresh, optional client_secret: str, The strava client secret used for token refresh, optional to_df : bool, optional Return a obj:`runpandas.Activity` if `to_df=True`, otherwise a :obj:`pandas.DataFrame` will be returned. Defaults to False. **kwargs : Keyword args to be passed to the `read_strava` Returns ------- Return a obj:`runpandas.Activity` if `to_df=True`, otherwise a :obj:`pandas.DataFrame` will be returned. """ client = Client() client.access_token = access_token client.refresh_token = refresh_token activity = client.get_activity(activity_id) start_datetime = activity.start_date_local streams = client.get_activity_streams(activity_id=activity_id, types=STREAM_TYPES, series_type="time") data = pd.DataFrame(gen_records(streams)) times = data.pop("time") data.columns = map(utils.camelcase_to_snakecase, data.columns) def time_to_datetime(time): return start_datetime + timedelta(seconds=time) timestamps = times.apply(time_to_datetime) timeoffsets = timestamps - timestamps[0] timestamp_index = TimedeltaIndex(timeoffsets, unit="s", name="time") data.index = timestamp_index data.dropna(axis=1, how="all", inplace=True) if to_df: return data return Activity(data, cspecs=COLUMNS_SCHEMA, start=timestamps[0])
"created_at": "2017-10-31T20:53:13Z", "updated_at": "2019-06-22T02:28:48Z", "badge_type_id": 0, "profile_medium": "https://dgalywyr863hv.cloudfront.net/pictures/athletes/25997915/12282949/1/medium.jpg", "profile": "https://dgalywyr863hv.cloudfront.net/pictures/athletes/25997915/12282949/1/large.jpg", "friend": null, "follower": null } }""" from stravalib.client import Client client = Client(access_token="ff95a3c12476ca93a68825a38688dc72fb164e62 ") activities = client.get_activities(limit=10) run = client.get_activity(list(activities)[0].id) # Activities can have many streams, you can request n desired stream types types = ['time', 'latlng', 'altitude', 'distance'] streams = client.get_activity_streams(list(activities)[0].id, types=types, resolution='high') # Result is a dictionary object. The dict's key are the stream type. heights = streams['altitude'].data cords = streams['latlng'].data times = streams['time'].data distances = streams['distance'].data
class StravaAthlete(object): ''' Gathers data for a strava athelete given possession of the API token ''' def __init__(self, token): self.token = token self.client = Client(access_token=self.token) self._athlete_name() def _athlete_name(self): this_athlete = self.client.get_athlete() first = this_athlete.firstname last = this_athlete.lastname return first + '_' + last def check_strava_dir(self): if os.path.basename(os.getcwd()) != 'strava': return print('Please navigate to strava directory before proceeding.') else: print('In strava directory.') pass def check_athlete_folder(self): try: os.chdir('data/'+self._athlete_name()) print('Data for {0} to be stored in {1}'.format( self._athlete_name(),'data/'+self._athlete_name())) os.chdir(Path(os.getcwd()).parents[1]) except: return print('Need to create directory first. use .add_athlete()') def check_directories(self): self.check_strava_dir() self.check_athlete_folder() def add_athlete(self): self.check_strava_dir() if os.path.isdir('data') == False: os.mkdir('data') print('data directory created') else: pass if os.path.isdir(os.path.join('data/',self._athlete_name())) == False: print('creating data/{} directory...'.format(self._athlete_name())) os.mkdir('data/' + self._athlete_name()) print('data/{} directory created'.format(self._athlete_name())) else: print('data/{} directory already exists'.format(self._athlete_name())) try: #try to load df self.ath_df = pd.read_csv('data/athletes.csv') print('ath_df successfully loaded') except: #check that df isn't in memory # if ath_df in globals(): # print('ath_df already in memory') # else: # ath_cols = ['firstname','lastname','city','state'] # ath_df = pd.DataFrame(columns = ath_cols) # print('empty ath_df intialized') try: self.ath_df = self.ath_df print('ath_df already in memory') except: #create empty df ath_cols = ['firstname','lastname','city','state'] ath_df = pd.DataFrame(columns = ath_cols) print('empty ath_df intialized') ath_cols = ['firstname','lastname','city','state'] athlete_dict = self.client.get_athlete().to_dict() self.ath_df = self.ath_df.append( {k:athlete_dict[k] for k in ath_cols if k in athlete_dict},ignore_index=True) self.ath_df = self.ath_df.drop_duplicates() #in case athlete was already added print('{0} {1} added to ath_df'.format(self.ath_df.firstname.iloc[-1], self.ath_df.lastname.iloc[-1])) self.ath_df.to_csv('data/athletes.csv', index=False) print('ath_df saved to data/athletes.csv') def get_rides(self): ''' Downloads a csv of all activity ids associated with rides ''' self.check_directories() try: self.ride_ids = pd.read_csv('data/'+self._athlete_name()+'/ride_ids.csv') print('ride_ids.csv successfully loaded.') except: print('Need to gather ride IDs...') try: os.chdir('data/'+self._athlete_name()) print('data for {0} to be stored in {1}'.format( self._athlete_name(),'data/'+self._athlete_name())) os.chdir(Path(os.getcwd()).parents[1]) except: return print('Need to create directory first. Use .add_athlete()') activities = self.client.get_activities() act_list = list(activities) print('Ride IDs successfully downloaded') ride_id_list = [] for act in act_list: if act.type != 'Ride': continue else: ride_id_list.append(act.id) ride_ids = pd.DataFrame(ride_id_list) ride_ids.columns = ['ride_id'] ride_ids.name = ride_ids ride_ids.to_csv('data/'+self._athlete_name()+'/ride_ids.csv', index=False) print('Ride IDs saved to data/{}/ride_ids.csv'.format(self._athlete_name())) self.ride_ids = ride_ids def check_dependency(self, df): ''' This has a problem with passing a df that isn't defined. In that case it will get an error before it even runs. Leaving in for now in case I can fix it but will just code it individually. (It's not too redundant. ''' try: df = df except NameError: return print('No'+df+'. Run appropriate method') def get_segments(self): ''' Downloads .csv of rides and segment ids. Require ride_ids dataframe ''' self.check_directories() ### replace with check_dependency(ride_ids) if I get that working try: self.ride_ids = self.ride_ids except: return print('No ride_ids data frame. Must run get_rides() method first') ### try: self.segments_df = pd.read_csv('data/'+self._athlete_name()+'/segments_df.csv') return print('segments_df successfully loaded') except: segment_cols = ['ride_name', 'ride_id', 'bike', 'ride_distance', 'ride_moving_time', 'ride_elapsed_time', 'ride_elevation_gain', 'ride_start_time', 'segment_id'] self.segments_df = pd.DataFrame(columns = segment_cols) print('Empty segments_df created.') ride_index = 0 remaining_rides = self.ride_ids.ride_id rides_left = len(remaining_rides) while ride_index <= rides_left: try: for ride in range(ride_index, rides_left): activity = self.client.get_activity(self.ride_ids.ride_id[ride], include_all_efforts=True) for segment in activity.segment_efforts: try: activity.gear.name self.segments_df = self.segments_df.append( {'ride_name' : activity.name, 'ride_id' : activity.id, 'bike' : activity.gear.name, 'ride_distance' : activity.distance, 'ride_moving_time' : activity.moving_time, 'ride_elapsed_time' : activity.elapsed_time, 'ride_elevation_gain' : activity.total_elevation_gain, 'ride_start_time' : activity.start_date_local, 'segment_id' : segment.segment.id}, ignore_index=True) except AttributeError: #if activity.gear.name is missing, skip it self.segments_df = self.segments_df.append( {'ride_name' : activity.name, 'ride_id' : activity.id, 'ride_distance' : activity.distance, 'ride_moving_time' : activity.moving_time, 'ride_elapsed_time' : activity.elapsed_time, 'ride_elevation_gain' : activity.total_elevation_gain, 'ride_start_time' : activity.start_date_local, 'segment_id' : segment.segment.id}, ignore_index=True) ride_index += 1 if ride_index % 50 == 0: print('Last ride downloaded: id:{} {}'.format(self.segments_df.tail(1).iloc[0,1], self.segments_df.tail(1).iloc[0,0])) print('It is {0}. Segments for {1} rides downloaded. {2} rides to go' .format(datetime.datetime.now().strftime("%H:%M"), ride_index, rides_left-ride_index)) except: wait = 0 print('rate limit exceeded, need to wait 15') print('it is now {} minutes after the hour'.format(datetime.datetime.now().minute)) time.sleep(60*16) print('trying again...') print('segments for all {} rides downloaded'.format(ride_index)) self.segments_df.name = 'segments_df' self.segments_df.to_csv('data/'+self._athlete_name()+'/segments_df.csv', index=False) print('segments_df saved to data/{}/segments_df.csv'.format(self._athlete_name())) def get_efforts(self): ''' Returns df of individual efforts on segements. Requires segments_df df. ''' self.check_directories() ### replace with check_dependency(ride_ids) if I get that working try: self.segments_df = self.segments_df except: return print('No segments_df data frame. Must run get_segments() method first') ### try: self.efforts_df = pd.read_csv('data/'+self._athlete_name()+'/efforts_df.csv') return print('efforts_df successfully loaded.') except FileNotFoundError: efforts_columns = ['segment_id', 'effort_id', 'name', 'start_date', 'moving_time', 'elapsed_time', 'average_heartrate', 'max_heartrate'] try: self.efforts_df = self.efforts_df effort_segs = self.efforts_df.segment_id.unique() print('Appending to existing efforts df.') except: self.efforts_df = pd.DataFrame(columns = efforts_columns) effort_segs = np.array([]) print('Empty efforts_df created.') effort_segment_index = 0 effort_segs_to_download = 1 while effort_segment_index < effort_segs_to_download: effort_segment_index = 0 remaining_effort_segs = list(np.setdiff1d(self.segments_df.segment_id,effort_segs)) effort_segs_to_download = len(remaining_effort_segs) print('remaining segments to get efforts for: {}'.format(effort_segs_to_download)) try: for segment in remaining_effort_segs: this_segment = list(self.client.get_segment_efforts(segment)) for this_effort in this_segment: this_effort_dict = this_effort.to_dict() this_effort_dict['segment_id'] = segment this_effort_dict['effort_id'] = this_effort.id self.efforts_df = self.efforts_df.append( {k:this_effort_dict[k] for k in efforts_columns if k in this_effort_dict}, ignore_index=True) effort_segment_index += 1 if effort_segment_index % 50 == 0: print('Last segment downloaded: {0} {1}'.format(self.efforts_df.tail(1).iloc[0,0], self.efforts_df.tail(1).iloc[0,2])) print('It is {0}. Efforts for {1} segments downloaded. {2} segments to go...' .format(datetime.datetime.now().strftime("%H:%M"), effort_segment_index, effort_segs_to_download - effort_segment_index)) except: self.efforts_df.to_csv('data/'+self._athlete_name()+'/efforts_df.csv', index=False) print('efforts_df.csv successfully saved.') wait = 0 print('Rate limit exceeded, need to wait 15 minutes') print('It is now {} minutes after the hour'.format(datetime.datetime.now().minute)) time.sleep(60*16) print('Trying again...') print('Efforts for all {} segments downloaded'.format(segment_index)) self.efforts_df.to_csv('data/'+self._athlete_name()+'/efforts_df.csv', index=False) print('efforts_df.csv successfully saved.') def get_seg_details(self): ''' Downloads .csv of segment details. Require segments_df dataframe ''' self.check_directories() ### replace with check_dependency(ride_ids) if I get that working try: self.segments_df = self.segments_df except: return print('No ride_ids data frame. Must run get_rides() method first') ### try: self.seg_details_df = pd.read_csv('data/'+self._athlete_name()+'/seg_details_df.csv') return print('seg_details_df successfully loaded.') except FileNotFoundError: try: self.seg_details_df = self.seg_details_df print('Appending to existing seg_details df.') except AttributeError: seg_details_cols = ['segment_id', 'name', 'distance', 'average_grade', 'maximum_grade', 'elevation_high', 'elevation_low', 'total_elevation_gain', 'start_latitude', 'end_latitude', 'start_longitude', 'end_longitude', 'climb_category', 'city', 'state', 'country', 'effort_count', 'athlete_count', 'athlete_segment_stats', 'map'] self.seg_details_df = pd.DataFrame(columns = seg_details_cols) seg_details_to_download = np.array([]) print('Empty seg_details_df created.') segments_remaining = len(self.segments_df) seg_details_index = 0 while seg_details_index < segments_remaining: seg_details_index = 0 seg_details_remaining = list(np.setdiff1d(self.segments_df.segment_id,seg_details_to_download)) seg_details_to_download = len(seg_details_remaining) print('remaining segments to get details for: {}'.format(seg_details_to_download)) try: for segment in seg_details_remaining: this_segment_dict = self.client.get_segment(segment).to_dict() this_segment_dict['segment_id'] = segment self.seg_details_df = self.seg_details_df.append( {k:this_segment_dict[k] for k in seg_details_cols if k in this_segment_dict}, ignore_index=True) seg_details_index += 1 if seg_details_index % 50 == 0: self.seg_details_df.to_csv('data/'+self._athlete_name()+'/seg_details_df.csv', index=False) print('efforseg_details_dfts_df.csv successfully saved.') print('Last segment downloaded: {0} {1}'.format(self.seg_details_df.tail(1).iloc[0,0], self.seg_details_df.tail(1).iloc[0,1])) print('It is {0}. {1} segments downloaded. {2} segments to go...' .format(datetime.datetime.now().strftime("%H:%M"), seg_details_index, segments_remaining - seg_details_index)) except: wait = 0 print('rate limit exceeded, need to wait 15 minutes') print('it is now {} minutes after the hour'.format(datetime.datetime.now().minute)) time.sleep(60*16) print('trying again...') print('all {} segments downloaded'.format(seg_details_index)) self.seg_details_df.to_csv('data/'+self._athlete_name()+'/seg_details_df.csv', index=False) print('seg_details_df.csv successfully saved.')
from data_from_http import token_entry from data_from_lib import cfg fg = configparser.ConfigParser() cfg._interpolation = configparser.ExtendedInterpolation() cfg.read("token_id.ini") clientid = cfg.get('strava', 'clientid') print(clientid) client = Client() authorize_url = client.authorization_url( clientid, redirect_uri='http://127.0.0.0.0:8100/authorized') # Have the user click the authorization URL, a 'code' param will be added to the redirect_uri client = Client(token_entry) #inserting the token # Currently-authenticated (based on provided token) athlete # setting the athlete specific activity curr_athlete = client.get_athlete() activity = client.get_activity(1726741296) # method to take specific activity print("type={0.type} distance={1} km".format( activity, unithelper.kilometers(activity.distance))) # method to take more activities and its informations for activity in client.get_activities(after="2010-01-01T00:00:00Z", limit=10): print( "{0.name} {0.moving_time}".format(activity), "type={0.type} distance={1} ".format( activity, unithelper.kilometers(activity.distance)))
if __name__ == '__main__': mysegments = {} # get athlete client = Client(access_token) # 1 -- possibly not counted athlete = client.get_athlete() # 2 # get athlete activities activities = client.get_activities(limit=200) # 3 print("number of activities returned", str(len(list(activities)))) # per activity, get segment efforts for activity in activities: segment_efforts = client.get_activity(activity.id).segment_efforts # 4 # per segment effort for segment in segment_efforts: mysegments[segment.segment.id] = segment.segment # save to db # check if segment leaderboard contains any friends for key, segment in mysegments.iteritems(): leaderboard = client.get_segment_leaderboard(key, following=True).entries # 12 # get friend with time < athlete time for person in leaderboard: if person.athlete_id == 1869056: me = person index = leaderboard.index(me) if index > 0:
from stravalib import unithelper import settings at = airtable.Airtable(settings.AIRTABLE_BASE, settings.AIRTABLE_API_KEY) client = Client(access_token=settings.STRAVA_ACCESS_TOKEN) athlete = client.get_athlete() print athlete.firstname activities= client.get_activities() activity_list = [] for activity in activities: activity = client.get_activity(activity.id) if activity.photos.primary: primary_pic = activity.photos.primary.urls['600'] else: primary_pic = [] data = { "Name" : activity.name, "Created At" : activity.start_date.isoformat(), "Distance Meters" : float(unithelper.meters(activity.distance)), "Elapsed Time in Seconds" : activity.elapsed_time.seconds, "Link to Activity" : "https://www.strava.com/activities/" + str(activity.id), "Notes" : activity.description, "Photos" : [{'url':primary_pic}] if primary_pic else [], "Strava Activity ID" : activity.id,
class Strava_scraper(object): ''' A strava scraper class. ''' def __init__(self, client_secret, access_token): self.client_secret = client_secret self.access_token = access_token self.client = None self.athlete = None self.friends = None # list of my friends, dtype = stravalib object self.friend_ids = [] self.friend_activities = [] self.athlete_ids = [] # not used self.list_of_athletes = [] # not used def get_client(self): """ The get_client method create a client object for making requests to the strava API. The Client class accepts an access_token and a rate_limiter object. The method also populates a friends list Inputs: None Outputs: None """ self.client = Client(access_token=self.access_token, rate_limiter=DefaultRateLimiter()) self.athlete = self.client.get_athlete() # Get Gordon's full athlete print "Client setup complete!" print self.friends = list( self.client.get_athlete_friends()) # Get athlete Gordon's friends print "Authenticated user's friends list complete!" print for friend in self.friends: self.friend_ids.append(friend.id) def _check_for_id(self, id): """ The _check_for_id method checks both the friends_ids and athlete_ids class attributes for the input id number. Inputs: id as integer Outputs: None """ return True if (id in self.friend_ids) or ( id in self.athlete_ids) else False def get_n_athletes(self, n): """ The get_n_athletes method is deprecated because Strava no longer allows authenticated users to indiscriminantly pull activities for a particular user if they are not friends with the authenticated user. This means that by creating a large list of athletes without following them does not help get activities for those atheletes. It would take one parameter n which is the limit on the ammount of athletes to get. Input: n as integer Output: None """ athlete_deq = deque(self.friend_ids, maxlen=n) id_deq = deque(self.friend_ids, maxlen=n) num = 0 while len(self.athlete_ids) + len(self.friend_ids) < n: athlete_id = id_deq.popleft() athlete = athlete_deq.popleft() for i in range(100): # try one hundred times while True: try: athletes_friends = self.client.get_athlete_friends( athlete_id) except ConnectionError: continue break for friend in athletes_friends: athlete_deq.append(friend) id_deq.append(friend.id) if not self._check_for_id(athlete_id): self.athlete_ids.append(athlete_id) self.list_of_athletes.append(athlete) firstname = re.sub(r'[^\x00-\x7F]+', '', athlete.firstname) lastname = re.sub(r'[^\x00-\x7F]+', '', athlete.lastname) print "athlete '{} {}' added to list position {}".format( firstname, lastname, num) num += 1 def _get_state(self, latlng): if latlng: geoc = Nominatim() location = geoc.reverse(latlng) state = None try: state = location.raw['address']['state'] return state except KeyError: pass def _get_activity(self, act_id, state): try: activity = self.client.get_activity( act_id) # get id with id = act_id from strava client except HTTPError: print "id:{}; client HTTP error when getting activity!!!".format( act_id) return None latlng = activity.start_latlng if not latlng: return None act_ath_id = activity.athlete.id firstname = re.sub(r'[^\x00-\x7F]+', '', activity.athlete.firstname) lastname = re.sub(r'[^\x00-\x7F]+', '', activity.athlete.lastname) act_state = self._get_state(list(latlng)) if act_ath_id in self.friend_ids and act_state == state: print "activity id: {} belonging to {} {}, added to list".format( act_id, firstname, lastname) return activity else: print "activity {} not a gps coordinated activity or not in state.".format( act_id) return None def get_n_activities(self, start_id, end_id, state='Colorado', n=30000): """ The get_friends_activities method takes 2 parameters. The state which is the subset of the data to save to the self.activities attribute of the scraper class and n which is the max number of entries to add to the list. The default state is 'Colorado'. Input: state as string, n as int Output: None """ print "Getting activities starting with id: {}".format(start_id) print "_" * 50 act_id = start_id while len(self.friend_activities) <= n and act_id >= end_id: activity = self._get_activity(act_id, state) if activity: self.friend_activities.append(activity) act_id -= 1 def __repr__(self): return "This is {} {}'s strava scraper class".format( self.my_athlete.firstname, self.my_athlete.lastname)
def read_strava( activity_id: int, access_token: str, refresh_token: str = None, client_id: int = None, client_secret: str = None, resample: bool = False, interpolate: bool = False, ) -> pd.DataFrame: """This method lets you retrieve activity data from Strava. Columns names are translated to sweat terminology (e.g. "heart_rate" > "heartrate"). Two API calls are made to the Strava API: 1 to retrieve activity metadata, 1 to retrieve the raw data ("streams"). Args: activity_id: The id of the activity access_token: The Strava access token refresh_token: The Strava refresh token. Optional. client_id: The Strava client id. Optional. Used for token refresh. client_secret: The Strava client secret. Optional. Used for token refresh. resample: whether or not the data frame needs to be resampled to 1Hz interpolate: whether or not missing data in the data frame needs to be interpolated Returns: A pandas data frame with all the data. """ client = Client() client.access_token = access_token client.refresh_token = refresh_token activity = client.get_activity(activity_id) start_datetime = activity.start_date_local streams = client.get_activity_streams( activity_id=activity_id, types=STREAM_TYPES, series_type="time", ) raw_data = dict() for key, value in streams.items(): if key == "latlng": latitude, longitude = list(zip(*value.data)) raw_data["latitude"] = latitude raw_data["longitude"] = longitude else: try: key = COLUMN_TRANSLATIONS[key] except KeyError: pass raw_data[key] = value.data data = pd.DataFrame(raw_data) def time_to_datetime(time): return start_datetime + timedelta(seconds=time) data["datetime"] = data["time"].apply(time_to_datetime) data = data.drop(["time"], axis="columns") data = data.set_index("datetime") data = resample_data(data, resample, interpolate) return data
return str(s) print 'Opening file to write...' outputfile = open('runLogsSorted.csv', 'a') # Append the entry for each run to this file schema = '"ID","Name","Distance (mi)","Moving time (s)","Elapsed time (s)","Elevation gain (ft)","Avg speed (mph)","Max speed (mph)","Avg cadence","Avg temp (C)","Avg HR","Max HR","Calories","Shoes","Start timestamp (local)","Start Lat","Start Lng","End Lat","End Lng","City","State","Country","Achievements","Kudos","Workout type"\n' print 'Writing schema...' outputfile.write(schema) runs = 0 print 'Writing activities...' for x in range(total_activities-1,-1,-1): curr_activity = activity_list[x] if curr_activity.type == 'Run': print("Writing activity {i} (Run): {act_id}".format(i=x, act_id=curr_activity.id)) curr_activity_full = client.get_activity(curr_activity.id) record = '' record = record + '"' + xstr(curr_activity_full.id) + '",' record = record + '"' + xstr(curr_activity_full.name) + '",' record = record + '"' + xstr(unithelper.miles(curr_activity_full.distance).num) + '",' record = record + '"' + xstr(curr_activity_full.moving_time.seconds) + '",' record = record + '"' + xstr(curr_activity_full.elapsed_time.seconds) + '",' record = record + '"' + xstr(unithelper.feet(curr_activity_full.total_elevation_gain).num) + '",' record = record + '"' + xstr(unithelper.miles_per_hour(curr_activity_full.average_speed).num) + '",' record = record + '"' + xstr(unithelper.miles_per_hour(curr_activity_full.max_speed).num) + '",' record = record + '"' + xstr(curr_activity_full.average_cadence) + '",' record = record + '"' + xstr(curr_activity_full.average_temp) + '",' record = record + '"' + xstr(curr_activity_full.average_heartrate) + '",' record = record + '"' + xstr(curr_activity_full.max_heartrate) + '",' record = record + '"' + xstr(curr_activity_full.calories) + '",'
client = Client(access_token=AccessToken) athlete = client.get_athlete() r = open('data.json', 'r') data = json.load(r) activities = list(d["id"] for d in data) r.close() stravaActivities = client.get_activities() for activity in stravaActivities: if (activity.id in activities): print("Already have this activity!") continue a = client.get_activity(activity.id) if (a.type != "Run"): print("Activity was a run") continue print("Found a new activity!", activity.id) act = {} act["date"] = a.start_date_local.strftime("%y-%m-%d") act["id"] = a.id act["distance"] = unithelper.miles(a.distance).num act["duration"] = a.moving_time.seconds act["speed"] = a.average_speed.num * meterToMile * 3600 act["pace"] = float(1 / (meterToMile*a.average_speed)) act["name"] = a.name act["splits"] = list({"time":split.elapsed_time.total_seconds() , "distance": unithelper.miles(split.distance).num } for split in a.splits_standard) data.append(act)
mydb = mysql.connector.connect(host='localhost', user='', passwd='', database='', auth_plugin='mysql_native_password') mycursor = mydb.cursor() list_of_id = [] for activity in activities: list_of_id.append(activity.id) for _id_ in list_of_id: activity_current = client.get_activity(_id_) info = {} info['Activity_ID'] = _id_ t = activity_current.start_date_local info['Date'] = t.strftime('%m/%d/%Y') info['Start_time'] = t.strftime('%H:%M') td = activity_current.elapsed_time.total_seconds() info['Elapsed_time'] = td info['Distance'] = activity_current.distance.__str__() info['Cadence'] = activity_current.average_cadence.__str__() info['Actvity'] = activity_current.type.__str__() info['Average_speed'] = activity_current.average_speed.__str__() #Enter the table name in the place of TableName sql = "INSERT INTO TableName (Activity_ID, Date, Start_time, Elapsed_time, Distance, Cadence, Activity, Average_speed) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)" val = (info['Activity_ID'], info['Date'], info['Start_time'], info['Elapsed_time'], info['Distance'], info['Cadence'],
activity = cfg.get('strava', 'activity') client = Client() authorize_url = client.authorization_url( clientid, redirect_uri='http://127.0.0.0.0:8100/authorized') # Have the user click the authorization URL, a 'code' param will be added to the redirect_uri client = Client(access_token=token_entry) # Currently-authenticated (based on provided token) athlete curr_athlete = client.get_athlete() # This is now me # Saying hello athlete = client.get_athlete() print("Hello, {}".format(athlete.firstname)) # Showing the friends athlete = client.get_athlete_clubs() for a in athlete: print("{} is your club.".format(a)) # Testing the activities # setting the athlete specific activity activity_1 = client.get_activity(activity) # method to take more activities and its informations for activity in client.get_activities(after="2010-01-01T00:00:00Z", limit=1): print( "{0.name} {0.moving_time}".format(activity), "type={0.type} distance={1} ".format( activity, unithelper.kilometers(activity.distance)))
activities = client.get_activities(limit=10) assert len(list(activities)) == 10 clubs = client.get_athlete_clubs() icc_members = client.get_club_members(club_id, limit=20) assert len(list(icc_members)) == 20 club_activities = client.get_club_activities(club_id, limit=20) assert len(list(club_activities)) == 20 #View activities #for x in activities: # print (x) for x in clubs: print (x) for x in icc_members: print (x) for x in club_activities: cm_activity = client.get_activity(x.id) print (x) print (cm_activity) #<Activity id=270828720 name='Evening Ride' resource_state=2> #<Activity id=270590277 name='Morning Ride' resource_state=2> #<Activity id=270577804 name='Evening Ride' resource_state=2> #<Activity id=270137878 name='Morning Ride' resource_state=2> #a = client.get_activity(270137878)
class StravaBot: def __init__(self): Config = ConfigParser.ConfigParser() Config.read("configuration/config") Config.sections() self.clientId = Config.get('Strava', 'ClientId') self.clientSecret = Config.get('Strava', 'ClientSecret') self.clientAccessToken = Config.get('Strava', 'ClientAccessToken') self.clubId = Config.get('Strava', 'ClubId') self.mattermostUrl = Config.get('Mattermost', 'URL') self.delay = Config.get('Bot', 'Delay') self.client = Client() self.client.access_token = self.clientAccessToken self.club = self.client.get_club(self.clubId) self.http = urllib3.PoolManager( cert_reqs='CERT_REQUIRED', ca_certs=certifi.where()) print('Bot for club {name} with id {id} is here :^)\n'.format(name=self.club.name, id=self.clubId)) def post_activity(self, activity): payload = {} if (activity.athlete.firstname is None): activity.athlete = self.client.get_athlete(activity.athlete.id) first_name = activity.athlete.firstname last_name = activity.athlete.lastname distance = kilometers(activity.distance) activity_duration = activity.moving_time speed = kilometers_per_hour(activity.average_speed) climbing = meters(activity.total_elevation_gain) activity_id = activity.id description = activity.name if (len(description) > 100): description = description[:97] + "..." payload = {'username': '******', 'icon_url': 'https://raw.githubusercontent.com/patoupatou/pymatterstrava/master/icon-strava.png', 'text': u':bicyclist: *{} {} : distance: {}, moving time duration: {}, speed: {}, climbing: {}* [{}](http://strava.com/activities/{}) :bicyclist:'.format(first_name, last_name, distance, activity_duration, speed, climbing, description, activity_id)} r = self.http.request('POST', self.mattermostUrl, headers={'Content-Type': 'application/json'}, body=json.dumps(payload)) print(time.ctime() + ': New activity posted') print('payload: ' + str(payload) + '\n') def get_activity_details(self, activity): return self.client.get_activity(activity.id) def get_new_activities(self, old_activities, new_activities): new_list = [] new_activity_ids = [] old_activity_ids = [] for new_activity in new_activities: new_activity_ids.append(new_activity.id) for old_activity in old_activities: old_activity_ids.append(old_activity.id) diff_ids = list(set(new_activity_ids) - set(old_activity_ids)) new_list = [act for act in new_activities if act.id in diff_ids] return new_list def run(self): activities = set(self.client.get_club_activities(self.clubId, limit=5)) new_activities = activities # for activity in activities: # details = self.get_activity_details(activity) # self.post_activity(details) while(1): new_activities = set(self.client.get_club_activities(self.clubId, limit=5)) diff_activities = self.get_new_activities(activities, new_activities) if len(diff_activities) > 0: print(time.ctime() + ': New activities!\n') print(diff_activities) for new_activity in diff_activities: details = self.get_activity_details(new_activity) self.post_activity(details) else: print(time.ctime() + ': No new activities\n') activities = new_activities time.sleep(float(self.delay))
url = client.authorization_url(client_id=MY_STRAVA_CLIENT_ID, redirect_uri='http://127.0.0.1:5000/authorization') # follow this url and grab the code it gives you print(url) # new cell code = "GRAB THE CODE FROM THE URL REDIRECT ABOVE" access_token = client.exchange_code_for_token(client_id=MY_STRAVA_CLIENT_ID, client_secret=MY_STRAVA_CLIENT_SECRET, code=code) activity_id = 123 # any activity (you can easily get a list of recents from client) activity = client.get_activity(activity_id).to_dict() types = ['time', 'latlng'] stream = client.get_activity_streams(activity_id, types=types) print(activity["type"]) shoes = activity["gear"]["name"] elapsed_time = activity["elapsed_time"] moving_time = activity["moving_time"] distance_miles = activity["distance"] * 0.000621371 # meters to miles average_heartrate = activity["average_heartrate"] max_heartrate = activity["max_heartrate"] start_date_local = datetime.strptime(activity["start_date_local"], "%Y-%m-%dT%H:%M:%S") name = activity["name"] description = activity["description"] calories = activity["calories"]