def get_strava_api(secret, ID): all_act = [] client = Client(access_token=secret) tot = total_num(client) me = client.get_athlete(ID) activities = client.get_activities() for i in trange(tot): df = pd.DataFrame() _a = activities.next() _streams = client.get_activity_streams(_a.id, types=types) for item in types: if item in _streams.keys(): df[item] = pd.Series(_streams[item].data, index=None) df['act_id'] = _a.id df['act_name'] = _a.name df['act_type'] = _a.type df['lat'] = map(split_lat, (df['latlng'])) df['lon'] = map(split_long, (df['latlng'])) df['time'] = df['distance'] / (df['velocity_smooth']) df.fillna(0) all_act.append(df) del df with open(save_file + '.pkl', 'wb') as fp: pickle.dump(all_act, fp) pd.concat(all_act, ignore_index=True).to_csv(save_file + '.csv') return all_act
def home(request): if 'access_token' not in request.session: return redirect('auth') # Retrieving athlete's data from API client = Client(access_token=request.session.get('access_token')) athlete = client.get_athlete() # Checking if the athlete has already visited the site before try: db_athlete = Athlete.objects.get(id=athlete.id) except Athlete.DoesNotExist: db_athlete = Athlete.create(athlete) db_athlete.save() # Getting the latest activity from the current athlete try: latest_activity = Activity.objects.filter(athlete=db_athlete).latest('date') activities = client.get_activities(after=latest_activity.date) except Activity.DoesNotExist: activities = client.get_activities() # Adding new activities to the database for activity in activities: if not activity.manual: stream = client.get_activity_streams(activity.id, types=['latlng'], resolution='medium').get('latlng') db_activity = Activity.create(activity, db_athlete, stream.data) db_activity.save() return render(request, 'home.html', {'athlete': athlete, 'activities': activities, 'act_length': len(list(activities))})
class StravaData: RUNNER_ID = None ACCESS_TOKEN = None client = None def __init__(self, runner_id, access_token): self.RUNNER_ID = runner_id self.ACCESS_TOKEN = access_token self.client = Client() self.client.access_token = access_token def getAthlete(self): athlete = self.client.get_athlete() return athlete def getActivities(self): activities = self.client.get_activities() return activities def getActivity(self, id): activity = self.client.get_activity(id) return activity def getActivityStreams(self, id, types): streams = self.client.get_activity_streams(id, types=types, resolution='medium') return streams
def FetchGPSData(tokensFile,CPCdate,CPClen): client = Client() ###To get the saved access tokens below, I did the following: ##1. Run the following lines: #authorize_url = client.authorization_url(client_id=22380, redirect_uri='http://sustainability.leeds.ac.uk',approval_prompt='force') #print(authorize_url) ##2. Paste the above url into a browser, accept the request, ## and copy the 'code' from the resulting url into the following line, ## along with the client_secret which can be found under air pollution9 account on strava: #access_token = client.exchange_code_for_token(client_id=22380, client_secret='***', # code='***') ##3. Extract token from the above variable: #print(access_token) ###Saved access tokens: f=open(tokensFile,'r') myTokens=f.read().splitlines() f.close() #Find activity which most closely matches CPC start date/time and sample length #All activities within 5 mins of the CPC start date are considered #The activity with the closest-matching elapsed time to the CPC sample length is then chosen validActs={} for i,token in enumerate(myTokens): client.access_token = token #athlete = client.get_athlete() #print(athlete.firstname,athlete.lastname+':') myActivities=client.get_activities() for activity in myActivities: startDate=activity.start_date_local #print(' '+activity.name+':',startDate,'Local time') if abs((CPCdate-startDate).total_seconds()) < 60: validActs.update({i:activity.id}) assert len(validActs) > 0, "No GPS activities with a start time within 5 minutes of the CPC data file start time" DeltaT=1e10 for key,value in validActs.items(): client.access_token=myTokens[key] activity=client.get_activity(value) elap=activity.elapsed_time.seconds thisDT=abs(CPClen-elap) if thisDT < DeltaT: DeltaT=thisDT chosenAth=key chosenAct=value #Extract required data from chosen activity: client.access_token=myTokens[chosenAth] activity=client.get_activity(chosenAct) startDate=activity.start_date_local endDate=startDate+dt.timedelta(seconds=activity.elapsed_time.seconds) endDateCPC=CPCdate+dt.timedelta(seconds=CPClen) assert abs((endDateCPC-endDate).total_seconds()) < 60, "No valid GPS activities with an end time within 1 minute of the CPC data file end time" myTypes = ['time', 'latlng'] myStream = client.get_activity_streams(chosenAct,types=myTypes) latlon=myStream['latlng'].data lat=[latlon[i][0] for i in range(len(latlon))] lon=[latlon[i][1] for i in range(len(latlon))] time=myStream['time'].data dateTime=[startDate+dt.timedelta(seconds=i) for i in time] GPSData=pd.DataFrame(data={'lon':lon,'lat':lat,'dateTime':dateTime}) return GPSData
def main(): """this is the main function for the cycle mapping program. It calls everything else""" act_dict={} client=Client(access_token="ACCESS_TOKEN_HERE ") activities = client.get_activities(limit=1) for activity in activities: act_dict[activity.name]=activity.id streams = client.get_activity_streams(activity.id,types = ['time', 'latlng', 'altitude', 'heartrate', 'temp', ], resolution='medium') print(streams.keys()) print(act_dict)
def strava_last_activity(request): currentUser = User.objects.get(pk=request.user.id) client = Client() client.access_token = currentUser.profile.stravaAccessCode activities = client.get_activities(before=datetime.datetime.now(), limit=1) activityName = '' for activity in activities: activityName = activity.name downloadedActivity = client.get_activity(activity.id) activityStream = client.get_activity_streams(activity.id, types=["time", "watts"]) averageCadence = downloadedActivity.average_cadence return render(request, 'strava_last_activity.html', {'activity_name': averageCadence})
def get_data(): load_dotenv(find_dotenv()) # authorize_url = client.authorization_url(client_id=os.getenv("client_id"), # redirect_uri='http://localhost:8282/authorized') # Have the user click the authorization URL, a 'code' param will be added to the # redirect_uri # ..... # Extract the code from your webapp response # code = request.get('code') # or whatever your framework does # access_token = client.exchange_code_for_token(client_id=22120, # client_secret='<client_secret>', code=code) client = Client(access_token=os.getenv("access_token")) # client.access_token = os.getenv("access_token") activities = client.get_activities() types = ['time', 'latlng', 'altitude', 'heartrate', 'temp'] headers_written = False #stream_types = ['time', 'latlng', 'altitude', 'heartrate', 'temp'] stream_types = ['heartrate'] with open( os.path.join("/home/greg/repos/commute_analysis", "data", "raw", 'raw_strava_data.csv'), 'w') as f: for activity in activities: streams = client.get_activity_streams(activity.id, types=stream_types, resolution='medium') temp = activity.to_dict() for k in types: if k in streams: temp[k] = streams[k].data else: temp[k] = None if not headers_written: w = csv.DictWriter(f, temp.keys()) w.writeheader() headers_written = True w.writerow(temp)
# follow this url and grab the code it gives you print(url) # new cell code = "GRAB THE CODE FROM THE URL REDIRECT ABOVE" access_token = client.exchange_code_for_token(client_id=MY_STRAVA_CLIENT_ID, client_secret=MY_STRAVA_CLIENT_SECRET, code=code) activity_id = 123 # any activity (you can easily get a list of recents from client) activity = client.get_activity(activity_id).to_dict() types = ['time', 'latlng'] stream = client.get_activity_streams(activity_id, types=types) print(activity["type"]) shoes = activity["gear"]["name"] elapsed_time = activity["elapsed_time"] moving_time = activity["moving_time"] distance_miles = activity["distance"] * 0.000621371 # meters to miles average_heartrate = activity["average_heartrate"] max_heartrate = activity["max_heartrate"] start_date_local = datetime.strptime(activity["start_date_local"], "%Y-%m-%dT%H:%M:%S") name = activity["name"] description = activity["description"] calories = activity["calories"] print(f""" shoes:\t\t\t{shoes}
''' Script to grab activities from strava ''' import sys import stravalib from stravalib.client import Client from configparser import SafeConfigParser config = SafeConfigParser() config.read('config.yaml') client = Client() authorize_url = client.authorization_url( client_id=config.get('strava', 'client_id'), redirect_uri='http://localhost:8282/authorized') # Extract the code from your webapp response # access_token = client.exchange_code_for_token(client_id=config.get('strava', 'client_id'), client_secret=config.get('strava', 'client_secret'), code=config.get('strava', 'code')) # Now store that access token somewhere (a database?) client.access_token = config.get('strava', 'Bearer') athlete = client.get_athlete() activities = client.get_activities(after="2017-11-17T00:00:00Z", limit=15) activity_data = [] for activity in activities: activity_stream = client.get_activity_streams(activity.id, types=['latlng', 'distance']) activity_data.append(activity_stream['latlng'].data)
def read_strava(activity_id, access_token, refresh_token=None, client_id=None, client_secret=None, to_df=False, **kwargs): """ This method loads the activity data from Strava into a Pandas DataFrame or runpandas Activity. Column names are translated to runpandas terminology (e.g. "heartrate" > "heart_rate"). Datetimes indexes are replaced by time offsets. All NaN rows are removed. Attention: Two API requests are made to the Strava webservice: 1 to retrieve activity metadata, 1 to retrieve the raw data ("streams"). Parameters ---------- activity_id : str, The id of the activity access_token: str, The Strava access token refresh_token: str, The Strava refresh token, optional client_id: int, The Strava client id used for token refresh, optional client_secret: str, The strava client secret used for token refresh, optional to_df : bool, optional Return a obj:`runpandas.Activity` if `to_df=True`, otherwise a :obj:`pandas.DataFrame` will be returned. Defaults to False. **kwargs : Keyword args to be passed to the `read_strava` Returns ------- Return a obj:`runpandas.Activity` if `to_df=True`, otherwise a :obj:`pandas.DataFrame` will be returned. """ client = Client() client.access_token = access_token client.refresh_token = refresh_token activity = client.get_activity(activity_id) start_datetime = activity.start_date_local streams = client.get_activity_streams(activity_id=activity_id, types=STREAM_TYPES, series_type="time") data = pd.DataFrame(gen_records(streams)) times = data.pop("time") data.columns = map(utils.camelcase_to_snakecase, data.columns) def time_to_datetime(time): return start_datetime + timedelta(seconds=time) timestamps = times.apply(time_to_datetime) timeoffsets = timestamps - timestamps[0] timestamp_index = TimedeltaIndex(timeoffsets, unit="s", name="time") data.index = timestamp_index data.dropna(axis=1, how="all", inplace=True) if to_df: return data return Activity(data, cspecs=COLUMNS_SCHEMA, start=timestamps[0])
# Download some activities print "Downloading activities from {0:%d %b %Y}".format(from_date) acts = client.get_activities(after=from_date) for act in acts: total += 1 if act.type != "Run" or act.average_heartrate is None: continue count += 1 # Get the full data streams streams = client.get_activity_streams(act.id, types=stream_filter) sdf = pd.DataFrame(dict((stype, stream.data) for (stype, stream) in streams.iteritems())) if "latlng" in stream_filter: sdf["lat"] = [a[0] for a in sdf.latlng] sdf["lng"] = [a[1] for a in sdf.latlng] del sdf["latlng"] detail_fname = join(output_detail_dir, "{0}.json".format(act.id)) sdf.to_json(detail_fname) # with open(join(output_detail_dir, "{0}.p".format(act.id)), "wb") as f: # pickle.dump(sdf, f, 2) print "{0} on {1:%d %b %Y} [kudos {2}]".format(act.name, act.start_date, act.kudos_count) print "\tHR: {0}".format(act.average_heartrate)
#print(ali) acts = client.get_activities(before=None, after=None, limit=10) for act in acts: print(act.id, act.name, unithelper.miles(act.distance), act.description) #testing #ryan = client.get_athlete(6777976) #print ryan.city # Activities can have many streams, you can request n desired stream types types = [ 'time', 'latlng', 'altitude', 'heartrate', 'temp', ] streams = client.get_activity_streams(1179030205, types=types) # This returns initial data for start of activity via streams # print('streams:') # print(streams) # print('kvs:') # for key, value in streams.items(): # print(key + ' has ' + str(value.original_size) + ' original data points') # points = value.data # print(points[0])
activity_ids.append(ra.id) print activity_ids for activity_id in activity_ids: act = client.get_activity(activity_id) print act.name, act.type, act.athlete.firstname, act.athlete.lastname if (act.gear == None and act.calories == None): print 'Summary Activity' else: print 'Detailed Activity' if False: #act_streams = client.get_activity_streams(activity_id, types=['velocity_smooth','grade_smooth'], resolution='high') act_streams = client.get_activity_streams(activity_id, types=['velocity_smooth','grade_smooth']) print (act_streams) vs = act_streams['velocity_smooth'] gs = act_streams['grade_smooth'] dst = act_streams['distance'] #print (vs.data) vs_sorted = vs.data vs_sorted.sort() #print vs_sorted p_25th = percentile(vs_sorted, 0.25) p_50th = percentile(vs_sorted, 0.50) p_75th = percentile(vs_sorted, 0.75) p_90th = percentile(vs_sorted, 0.9) print 'Activity ID: {}'.format(activity_id)
for x in ('Pedalada', 'Ciclismo', 'Vuelta ciclista', 'Ride', 'almuerzo')): client.update_activity(activity.id, private=d['private'], commute=d['commute'], name=d['name_back']) prev_activity = activity current_activity_processed = True # Paso por lugar configurado for p in k['pass_through_locations']: # Solo act de mas de 20k if activity.distance and int( activity.distance) > 20000: streams = client.get_activity_streams( activity.id, ['latlng'], 'medium') if 'latlng' in streams.keys(): if pass_through_zone(p['latlng'], streams['latlng'].data, precision=0.3): print(p['name'], end="") print( u": [{0.suffer_score}] \"{0.name}\" {0.moving_time} {0.distance} {0.start_date_local} https://www.strava.com/activities/{0.id}" .format(activity)) if write and any( x in activity.name for x in ('Pedalada', 'Ciclismo', 'Vuelta ciclista', 'Ride', 'almuerzo')): client.update_activity(activity.id, name=p['name'])
resolution = 'high' types = ['time', 'altitude', 'heartrate', 'temp', 'distance', 'watts'] access_token = client.exchange_code_for_token(client_id=client_id, client_secret=client_secret, code=code) client = Client(access_token=access_token) df_overview = pd.DataFrame() activities = dict() for activity in client.get_activities( after='{}-01-01T00:00:00Z'.format(str(year)), before='{}-01-01T00:00:00Z'.format(str(year + 1))): streams = client.get_activity_streams(activity.id, types=types, series_type='time', resolution=resolution) for key, value in streams.items(): streams[key] = value.data df_overview = df_overview.append( pd.DataFrame([{ 'Name': activity.name, 'Date': activity.start_date_local, 'Durasi [min]': int(activity.moving_time.seconds / 60), 'Jarak [m]': round(activity.distance.num, 1), 'Measurements': list(streams.keys()) }], index=[activity.id])) activities[activity.id] = pd.DataFrame(streams) writer = pd.ExcelWriter('strava_export_{}.xlsx'.format(str(year)),
"follower": null } }""" from stravalib.client import Client client = Client(access_token="ff95a3c12476ca93a68825a38688dc72fb164e62 ") activities = client.get_activities(limit=10) run = client.get_activity(list(activities)[0].id) # Activities can have many streams, you can request n desired stream types types = ['time', 'latlng', 'altitude', 'distance'] streams = client.get_activity_streams(list(activities)[0].id, types=types, resolution='high') # Result is a dictionary object. The dict's key are the stream type. heights = streams['altitude'].data cords = streams['latlng'].data times = streams['time'].data distances = streams['distance'].data import csv from datetime import datetime pacePoints = 0 paceSum = 0 paces = [[]]
def read_strava( activity_id: int, access_token: str, refresh_token: str = None, client_id: int = None, client_secret: str = None, resample: bool = False, interpolate: bool = False, ) -> pd.DataFrame: """This method lets you retrieve activity data from Strava. Columns names are translated to sweat terminology (e.g. "heart_rate" > "heartrate"). Two API calls are made to the Strava API: 1 to retrieve activity metadata, 1 to retrieve the raw data ("streams"). Args: activity_id: The id of the activity access_token: The Strava access token refresh_token: The Strava refresh token. Optional. client_id: The Strava client id. Optional. Used for token refresh. client_secret: The Strava client secret. Optional. Used for token refresh. resample: whether or not the data frame needs to be resampled to 1Hz interpolate: whether or not missing data in the data frame needs to be interpolated Returns: A pandas data frame with all the data. """ client = Client() client.access_token = access_token client.refresh_token = refresh_token activity = client.get_activity(activity_id) start_datetime = activity.start_date_local streams = client.get_activity_streams( activity_id=activity_id, types=STREAM_TYPES, series_type="time", ) raw_data = dict() for key, value in streams.items(): if key == "latlng": latitude, longitude = list(zip(*value.data)) raw_data["latitude"] = latitude raw_data["longitude"] = longitude else: try: key = COLUMN_TRANSLATIONS[key] except KeyError: pass raw_data[key] = value.data data = pd.DataFrame(raw_data) def time_to_datetime(time): return start_datetime + timedelta(seconds=time) data["datetime"] = data["time"].apply(time_to_datetime) data = data.drop(["time"], axis="columns") data = data.set_index("datetime") data = resample_data(data, resample, interpolate) return data
strava_data['start_date_local'] = pd.to_datetime( strava_data['start_date_local']) activities = client.get_activities(strava_data['start_date_local'].max()) for activity in activities: strava_data.append(pd.from_dict(activity.to_dict()), ignore_index=True) stava_data.to_csv('raw_strava_data.csv') else: activities = client.get_activities() types = ['time', 'latlng', 'altitude', 'heartrate', 'temp'] headers_written = False #stream_types = ['time', 'latlng', 'altitude', 'heartrate', 'temp'] stream_types = ['heartrate'] with open('raw_strava_data.csv', 'w') as f: for activity in activities: streams = client.get_activity_streams(activity.id, types=stream_types, resolution='medium') temp = activity.to_dict() for k in types: if k in streams: temp[k] = streams[k].data else: temp[k] = None if not headers_written: w = csv.DictWriter(f, temp.keys()) w.writeheader() headers_written = True w.writerow(temp) # Get datetime from string. This is for later for updating the data # t = datetime.strptime(str, '%Y-%m-%dT%H:%M:%S')
subplt = plt.subplot(111) fig.subplots_adjust(left=0.03, bottom=0.03, right=0.97, top=0.97) m = Basemap(projection='lcc', resolution='h', width=basemap_width, height=basemap_height, lat_0=chicago_lat, lon_0=chicago_lon, ax=subplt, fix_aspect=False) run_name = 'Chicago Marathon' run = [i for i in runs if i.name == run_name][0] plot_data = [] r = client.get_activity_streams(run.id, types=['latlng'], resolution='medium') if r is not None: lats, lngs = zip(*r['latlng'].data) x, y = m(lngs, lats) plot_data = {'name': run.name, 'x': x, 'y': y} # Interpolate the X/Y data to achieve the desired number of points # necessary for framerate xdata = np.interp(np.linspace(0, 999, frames), *list(zip(*enumerate(plot_data['x'])))) ydata = np.interp(np.linspace(0, 999, frames), *list(zip(*enumerate(plot_data['y'])))) frame_size = 750 subplt.set_xlim(xdata[0] - frame_size, xdata[0] + frame_size) subplt.set_ylim(ydata[0] - frame_size, ydata[0] + frame_size)
class strava_data_cache: #Some Constants Run = 1 Walk = 2 Hike = 3 Ride = 4 Swim = 5 Workout = 6 VirtualRide = 7 AlpineSki = 8 stream_types = ['time', 'latlng', 'distance', 'altitude', 'velocity_smooth', \ 'heartrate', 'cadence', 'watts', 'temp', 'moving', 'grade_smooth'] stream_cols = ['time', 'distance', 'altitude', 'velocity_smooth', \ 'heartrate', 'cadence', 'watts', 'temp', 'moving', 'grade_smooth', 'lat', 'lon'] def __init__(self): self.token_file = 'token.txt' self.activities_file = 'activities.csv' self.streams_file = 'streams.h5' self.activites_df = pd.DataFrame() self.client = None #Setup Strava Client def setup_client(self): self.client = Client() if os.path.exists(self.token_file): f = open(self.token_file, "r") token = f.readline() f.close() else: token = input('Enter access token: ') self.client = Client(access_token=token) self.client.get_athlete() # Get current athlete details #Pull Activities into specified csv file def get_activities(self): if not os.path.exists(self.activities_file): date = "1990-01-01T00:00:00Z" old_df = pd.DataFrame() else: old_df = pd.read_csv(self.activities_file) date = pd.to_datetime(old_df.iloc[-1]['start_date']).strftime('%Y-%m-%dT%H:%M:%SZ') activities = [] for act in self.client.get_activities(after = date): activities.append(act) if len(activities) > 0: #package up these activites into a Dataframe from list of dictionary entries attributes = ['achievement_count', 'athlete_count', 'average_cadence', 'average_heartrate', 'average_speed', 'average_temp', 'average_watts', 'best_efforts', 'calories', 'comment_count', 'commute', 'description', 'device_name', 'device_watts', 'distance', 'elapsed_time', 'elev_high', 'elev_low', 'embed_token', 'external_id', 'flagged', 'gear', 'gear_id', 'guid', 'has_heartrate', 'has_kudoed', 'id', 'instagram_primary_photo', 'kilojoules', 'kudos_count', 'location_city', 'location_country', 'location_state', 'manual', 'max_heartrate', 'max_speed', 'max_watts', 'moving_time', 'name', 'partner_brand_tag', 'partner_logo_url', 'photo_count', 'photos', 'pr_count', 'private', 'resource_state', 'segment_efforts', 'splits_metric', 'splits_standard', 'start_date', 'start_date_local', 'start_latitude', 'start_longitude', 'suffer_score', 'timezone', 'total_elevation_gain', 'total_photo_count', 'trainer', 'type', 'upload_id', 'weighted_average_watts', 'workout_type'] new_df = pd.DataFrame([{fn: getattr(act, fn) for fn in attributes} for act in activities]) new_df.distance = new_df.distance/1609 #convert distances from meters to miles if len(old_df) == 0: self.activites_df = new_df else: self.activites_df = pd.concat([old_df,new_df]) self.activites_df.to_csv(self.activities_file,encoding ='utf-8') else: self.activites_df = old_df print("Number of activities: ", len(self.activites_df)) #Pull new streams into h5 file def get_streams(self,maxNum = float("inf")): #Check if we have the activities if not then try and grab them if len(self.activites_df) == 0: self.get_activities() type_map = pd.Series([self.Run,self.Ride,self.Swim,self.Walk,self.Workout,self.VirtualRide,self.Hike,self.AlpineSki], index = ['Run', 'Ride', 'Swim', 'Walk', 'Workout', 'VirtualRide', 'Hike', 'AlpineSki']) with pd.HDFStore(self.streams_file,'a') as store: try: stream_ids = store['streams']['id'].unique() except: stream_ids = [] for idx,row in self.activites_df.iterrows(): if idx > maxNum: break; if row.id in stream_ids: continue else: print("Processing ID: {} Name: {}".format(row.id,row.name)) try: stream = self.client.get_activity_streams(row.id, types=self.stream_types) except: print ("Unexpected error getting:", sys.exc_info()[0]) continue try: temp_df = pd.DataFrame({i : stream[i].data for i in stream}, index=stream['time'].data) if len(temp_df) == 0: print ("Skipping empty stream: %s"%row.name) continue if 'latlng' in temp_df.keys(): temp_df['lat'], temp_df['lon'] = list(zip(*temp_df["latlng"])) temp_df = temp_df.drop('latlng',axis=1) if 'distance' in temp_df.keys(): temp_df['distance'] = temp_df['distance']/1609 temp_df['type'] = row.type temp_df['id'] = row.id #common formatting to get it in the same format for uniform writes for col in find_setdiff(self.stream_cols,temp_df.keys()): temp_df[col] = pd.np.nan temp_df['heartrate'] = temp_df['heartrate'].astype('float') temp_df['cadence'] = temp_df['cadence'].astype('float') temp_df['velocity_smooth'] = temp_df['velocity_smooth'].astype('float') temp_df['watts'] = temp_df['watts'].astype('float') temp_df['type'] = temp_df['type'].map(type_map) store.append('streams', temp_df, data_columns=True, format='table',complib='blosc') except: print ("Unexpected error post get:", sys.exc_info()[0]) def simple_stream_test(self): print(self.client) print(self.activites_df.iloc[0].id) stream = self.client.get_activity_streams(self.activites_df.iloc[0].id, types=self.stream_types) print(stream)
class stravaImporter(object): def __init__(self): self.client = Client() self.API_CALL_PAUSE_SECONDS = 1.5 # 40 requests per minute # the self-authorization is NOT working right now -- using hard-coded URL / ACCESS_CODE right now #url = self.client.authorization_url(client_id=CLIENT_ID, # redirect_uri='http://localhost:5000/authorization') #code = request.args.get('code') # or whatever flask does #url = 'http://www.strava.com/oauth/authorize?client_id=16424&response_type=code&redirect_uri=http://localhost/5001&approval_prompt=force&scope=write' #print(url) access_token = self.client.exchange_code_for_token( client_id=CLIENT_ID, client_secret=CLIENT_SECRET, code=ACCESS_CODE) # Now store that access token somewhere (a database?) self.client.access_token = access_token # retrieve the athlete self.athlete = self.client.get_athlete() print("For {}, I now have an access token".format(self.athlete.id)) # name of tables in model self.user_TBL = 'users' self.activity_TBL = 'activities' self.streams_TBL = 'streams' self.gear_TBL = 'gear' # streams to extract from strava self.streams = [ 'time', 'latlng', 'distance', 'altitude', 'velocity_smooth', 'heartrate', 'cadence', 'temp', 'moving', 'grade_smooth' ] def get_activities(self, before=None, after=None, limit=None): """ Get activities and the related metadata from strava """ return list( self.client.get_activities(before=before, after=after, limit=limit)) def get_streams(self, activity_id): # download the entire stream: `resolution` = `all` (default) # download all stream_types except `power` try: s = self.client.get_activity_streams(activity_id, types=self.streams) return s except: print('Could not get streams for activity {0}. Manual upload?'. format(activity_id)) return def stream_to_DF(self, s): """ Convert a Strava Stream to a pandas DF """ return pd.DataFrame.from_dict({k: s[k].data for k in s.keys()}) def add_user(self, username, email=None, fname=None, lname=None, password=None): """ Add a user to the DB""" s = Session() try: u = User(username=username, email=email, fname=fname, lname=lname, password=password, strava_id=self.athlete.id) s.add(u) s.commit() return int(u.id) except SQLAlchemyError as err: s.rollback() print('Error: \n', err) raise s.close() return def add_streams(self, user_id, s_id): """ Add Strava data streams for a given user_id and activity_id """ # get the strava streams for that activity stream = self.get_streams(s_id) # convert the streams to a DF if stream is not None: s = Session() df = self.stream_to_DF(stream) # add `user_id` to the DF df['user_id'] = user_id # add `activity_id` to the DF df['activity_id'] = s.query( Activity.id).filter_by(strava_id=s_id.astype(str)).one()[0] try: df.to_sql(self.streams_TBL, engine, if_exists='append', index=False) s.commit() except: s.rollback() print('Error: `add_streams` cannot write event to DB. \n') raise s.close() else: print('Error: Stream is empty for User {0}, Activity {1}'.format( user_id, s_id)) return def add_activity(self, user_id, before=None, after=None, limit=None, add_streams=True): """ Get & add a list of activities from strava """ # get the list of activities from strava activities = self.get_activities(before=before, after=after, limit=limit) activities = activities # transform activities to a DF ready for Postgres df = self.munge_activity(activities) df['user_id'] = user_id s = Session() try: df.to_sql(self.activity_TBL, engine, if_exists='append', index=False) s.commit() print('Added {0} activities from Strava.\n'.format( len(df.strava_id))) except: s.rollback() print('Error: `add_activity` cannot write event to DB. \n') raise s.close() # if needed, add the streams as well if add_streams is True: size = len(df.strava_id) for (i, strava_id) in enumerate(df.strava_id): print('Fetching data streams for {0}: {1} of {2}'.format( strava_id, i, size), end='\r') time.sleep( self.API_CALL_PAUSE_SECONDS) # limit API call to 40 / min self.add_streams(user_id, strava_id) print('Added `Streams` for {0} activities from Strava.'.format( len(df.strava_id))) return def strip_units(self, df, cols): """ strip units from columns -- changes dtype from `object` to `float`""" d = { 'average_speed': ' m / s', 'max_speed': ' m / s', 'distance': ' m', 'total_elevation_gain': ' m' } for col in cols: if col in df.columns: df[col] = pd.to_numeric(df[col].astype('str').str.strip( d[col]), errors='coerce') return df def munge_activity(self, activities): """ Get `activities` ready for Postgres DB """ # `stravalib`.`activity` attributes to import fields = [ 'athlete_count', 'average_cadence', 'average_heartrate', 'average_speed', 'distance', 'elapsed_time', 'elev_high', 'elev_low', 'end_latlng', 'external_id', 'gear_id', 'has_heartrate', 'id', 'location_city', 'location_country', 'location_state', 'manual', 'max_heartrate', 'max_speed', 'moving_time', 'name', 'pr_count', 'start_date', 'start_date_local', 'start_latitude', 'start_latlng', 'start_longitude', 'suffer_score', 'timezone', 'total_elevation_gain', 'type', 'upload_id', 'workout_type' ] # convert `activities` into a df with `fields` as columns df = pd.DataFrame([[getattr(i, j) for j in fields] for i in activities], columns=fields) # rename since `id` will be an internal DB `id` df.rename(columns={'id': 'strava_id'}, inplace=True) # pandas or SQL does not support units, so strip the units from the fields # could be worth adding units when the tables are imported # I want GIS support, so can't really move to JSONB or BSON cols_to_strip = ('average_speed', 'max_speed', 'distance', 'total_elevation_gain') df = self.strip_units(df, cols_to_strip) # pd.to_sql does not yet support TZ, so strip it. for now. # also does not suport timedelta ... maybe I have to ditch `pd.to_sql` # https://github.com/pandas-dev/pandas/issues/9086 df.start_date = pd.DatetimeIndex(df.start_date).tz_convert(None) df.timezone = df.timezone.astype(str) return df