client.access_token = 'd47099a29b2f3539e1c21af6d820e33a109a079e' ## CartoDB connection API_KEY ='cad54ea0c580a0c554b9e9562157e7c9bd9f37b0' cartodb_domain = 'geodarcy' cl = CartoDBAPIKey(API_KEY, cartodb_domain) ## remove duplicate rows cl.sql('DELETE FROM wbstrava WHERE cartodb_id IN (SELECT cartodb_id FROM (SELECT cartodb_id, ROW_NUMBER() OVER (partition BY segment_id ORDER BY cartodb_id) AS rnum FROM wbstrava) t WHERE t.rnum > 1);') ## find segments with no geometry queryResult = cl.sql('select segment_id from wbstrava where the_geom is null') currentSegments = [x['segment_id'] for x in queryResult['rows']] for id in currentSegments: try: segmentGeojson = PolylineCodec().decode(client.get_segment(id).map.polyline) flippedGeojson = [[x[1], x[0]] for x in segmentGeojson] inputGeojson = '{"coordinates": ' + str(flippedGeojson) + ', "type": "LineString", "crs":{"type":"name","properties":{"name":"EPSG:4326"}}}' cl.sql('UPDATE wbstrava SET the_geom = ST_GeomFromGeoJSON(\'' + inputGeojson + '\') WHERE segment_id=' + str(id)) except Exception as e: if "Not Found" in e.message: cl.sql('DELETE FROM wbstrava WHERE segment_id= ' + str(id)) print("Deleted Strava ID: {}".format(id)) else: print("Something's wrong with Strava ID: {}".format(id)) ## get list of segments already in CartoDB queryResult = cl.sql('select segment_id from wbstrava') currentSegments = [x['segment_id'] for x in queryResult['rows']] now = datetime.datetime.now()
class StravaAthlete(object): ''' Gathers data for a strava athelete given possession of the API token ''' def __init__(self, token): self.token = token self.client = Client(access_token=self.token) self._athlete_name() def _athlete_name(self): this_athlete = self.client.get_athlete() first = this_athlete.firstname last = this_athlete.lastname return first + '_' + last def check_strava_dir(self): if os.path.basename(os.getcwd()) != 'strava': return print('Please navigate to strava directory before proceeding.') else: print('In strava directory.') pass def check_athlete_folder(self): try: os.chdir('data/'+self._athlete_name()) print('Data for {0} to be stored in {1}'.format( self._athlete_name(),'data/'+self._athlete_name())) os.chdir(Path(os.getcwd()).parents[1]) except: return print('Need to create directory first. use .add_athlete()') def check_directories(self): self.check_strava_dir() self.check_athlete_folder() def add_athlete(self): self.check_strava_dir() if os.path.isdir('data') == False: os.mkdir('data') print('data directory created') else: pass if os.path.isdir(os.path.join('data/',self._athlete_name())) == False: print('creating data/{} directory...'.format(self._athlete_name())) os.mkdir('data/' + self._athlete_name()) print('data/{} directory created'.format(self._athlete_name())) else: print('data/{} directory already exists'.format(self._athlete_name())) try: #try to load df self.ath_df = pd.read_csv('data/athletes.csv') print('ath_df successfully loaded') except: #check that df isn't in memory # if ath_df in globals(): # print('ath_df already in memory') # else: # ath_cols = ['firstname','lastname','city','state'] # ath_df = pd.DataFrame(columns = ath_cols) # print('empty ath_df intialized') try: self.ath_df = self.ath_df print('ath_df already in memory') except: #create empty df ath_cols = ['firstname','lastname','city','state'] ath_df = pd.DataFrame(columns = ath_cols) print('empty ath_df intialized') ath_cols = ['firstname','lastname','city','state'] athlete_dict = self.client.get_athlete().to_dict() self.ath_df = self.ath_df.append( {k:athlete_dict[k] for k in ath_cols if k in athlete_dict},ignore_index=True) self.ath_df = self.ath_df.drop_duplicates() #in case athlete was already added print('{0} {1} added to ath_df'.format(self.ath_df.firstname.iloc[-1], self.ath_df.lastname.iloc[-1])) self.ath_df.to_csv('data/athletes.csv', index=False) print('ath_df saved to data/athletes.csv') def get_rides(self): ''' Downloads a csv of all activity ids associated with rides ''' self.check_directories() try: self.ride_ids = pd.read_csv('data/'+self._athlete_name()+'/ride_ids.csv') print('ride_ids.csv successfully loaded.') except: print('Need to gather ride IDs...') try: os.chdir('data/'+self._athlete_name()) print('data for {0} to be stored in {1}'.format( self._athlete_name(),'data/'+self._athlete_name())) os.chdir(Path(os.getcwd()).parents[1]) except: return print('Need to create directory first. Use .add_athlete()') activities = self.client.get_activities() act_list = list(activities) print('Ride IDs successfully downloaded') ride_id_list = [] for act in act_list: if act.type != 'Ride': continue else: ride_id_list.append(act.id) ride_ids = pd.DataFrame(ride_id_list) ride_ids.columns = ['ride_id'] ride_ids.name = ride_ids ride_ids.to_csv('data/'+self._athlete_name()+'/ride_ids.csv', index=False) print('Ride IDs saved to data/{}/ride_ids.csv'.format(self._athlete_name())) self.ride_ids = ride_ids def check_dependency(self, df): ''' This has a problem with passing a df that isn't defined. In that case it will get an error before it even runs. Leaving in for now in case I can fix it but will just code it individually. (It's not too redundant. ''' try: df = df except NameError: return print('No'+df+'. Run appropriate method') def get_segments(self): ''' Downloads .csv of rides and segment ids. Require ride_ids dataframe ''' self.check_directories() ### replace with check_dependency(ride_ids) if I get that working try: self.ride_ids = self.ride_ids except: return print('No ride_ids data frame. Must run get_rides() method first') ### try: self.segments_df = pd.read_csv('data/'+self._athlete_name()+'/segments_df.csv') return print('segments_df successfully loaded') except: segment_cols = ['ride_name', 'ride_id', 'bike', 'ride_distance', 'ride_moving_time', 'ride_elapsed_time', 'ride_elevation_gain', 'ride_start_time', 'segment_id'] self.segments_df = pd.DataFrame(columns = segment_cols) print('Empty segments_df created.') ride_index = 0 remaining_rides = self.ride_ids.ride_id rides_left = len(remaining_rides) while ride_index <= rides_left: try: for ride in range(ride_index, rides_left): activity = self.client.get_activity(self.ride_ids.ride_id[ride], include_all_efforts=True) for segment in activity.segment_efforts: try: activity.gear.name self.segments_df = self.segments_df.append( {'ride_name' : activity.name, 'ride_id' : activity.id, 'bike' : activity.gear.name, 'ride_distance' : activity.distance, 'ride_moving_time' : activity.moving_time, 'ride_elapsed_time' : activity.elapsed_time, 'ride_elevation_gain' : activity.total_elevation_gain, 'ride_start_time' : activity.start_date_local, 'segment_id' : segment.segment.id}, ignore_index=True) except AttributeError: #if activity.gear.name is missing, skip it self.segments_df = self.segments_df.append( {'ride_name' : activity.name, 'ride_id' : activity.id, 'ride_distance' : activity.distance, 'ride_moving_time' : activity.moving_time, 'ride_elapsed_time' : activity.elapsed_time, 'ride_elevation_gain' : activity.total_elevation_gain, 'ride_start_time' : activity.start_date_local, 'segment_id' : segment.segment.id}, ignore_index=True) ride_index += 1 if ride_index % 50 == 0: print('Last ride downloaded: id:{} {}'.format(self.segments_df.tail(1).iloc[0,1], self.segments_df.tail(1).iloc[0,0])) print('It is {0}. Segments for {1} rides downloaded. {2} rides to go' .format(datetime.datetime.now().strftime("%H:%M"), ride_index, rides_left-ride_index)) except: wait = 0 print('rate limit exceeded, need to wait 15') print('it is now {} minutes after the hour'.format(datetime.datetime.now().minute)) time.sleep(60*16) print('trying again...') print('segments for all {} rides downloaded'.format(ride_index)) self.segments_df.name = 'segments_df' self.segments_df.to_csv('data/'+self._athlete_name()+'/segments_df.csv', index=False) print('segments_df saved to data/{}/segments_df.csv'.format(self._athlete_name())) def get_efforts(self): ''' Returns df of individual efforts on segements. Requires segments_df df. ''' self.check_directories() ### replace with check_dependency(ride_ids) if I get that working try: self.segments_df = self.segments_df except: return print('No segments_df data frame. Must run get_segments() method first') ### try: self.efforts_df = pd.read_csv('data/'+self._athlete_name()+'/efforts_df.csv') return print('efforts_df successfully loaded.') except FileNotFoundError: efforts_columns = ['segment_id', 'effort_id', 'name', 'start_date', 'moving_time', 'elapsed_time', 'average_heartrate', 'max_heartrate'] try: self.efforts_df = self.efforts_df effort_segs = self.efforts_df.segment_id.unique() print('Appending to existing efforts df.') except: self.efforts_df = pd.DataFrame(columns = efforts_columns) effort_segs = np.array([]) print('Empty efforts_df created.') effort_segment_index = 0 effort_segs_to_download = 1 while effort_segment_index < effort_segs_to_download: effort_segment_index = 0 remaining_effort_segs = list(np.setdiff1d(self.segments_df.segment_id,effort_segs)) effort_segs_to_download = len(remaining_effort_segs) print('remaining segments to get efforts for: {}'.format(effort_segs_to_download)) try: for segment in remaining_effort_segs: this_segment = list(self.client.get_segment_efforts(segment)) for this_effort in this_segment: this_effort_dict = this_effort.to_dict() this_effort_dict['segment_id'] = segment this_effort_dict['effort_id'] = this_effort.id self.efforts_df = self.efforts_df.append( {k:this_effort_dict[k] for k in efforts_columns if k in this_effort_dict}, ignore_index=True) effort_segment_index += 1 if effort_segment_index % 50 == 0: print('Last segment downloaded: {0} {1}'.format(self.efforts_df.tail(1).iloc[0,0], self.efforts_df.tail(1).iloc[0,2])) print('It is {0}. Efforts for {1} segments downloaded. {2} segments to go...' .format(datetime.datetime.now().strftime("%H:%M"), effort_segment_index, effort_segs_to_download - effort_segment_index)) except: self.efforts_df.to_csv('data/'+self._athlete_name()+'/efforts_df.csv', index=False) print('efforts_df.csv successfully saved.') wait = 0 print('Rate limit exceeded, need to wait 15 minutes') print('It is now {} minutes after the hour'.format(datetime.datetime.now().minute)) time.sleep(60*16) print('Trying again...') print('Efforts for all {} segments downloaded'.format(segment_index)) self.efforts_df.to_csv('data/'+self._athlete_name()+'/efforts_df.csv', index=False) print('efforts_df.csv successfully saved.') def get_seg_details(self): ''' Downloads .csv of segment details. Require segments_df dataframe ''' self.check_directories() ### replace with check_dependency(ride_ids) if I get that working try: self.segments_df = self.segments_df except: return print('No ride_ids data frame. Must run get_rides() method first') ### try: self.seg_details_df = pd.read_csv('data/'+self._athlete_name()+'/seg_details_df.csv') return print('seg_details_df successfully loaded.') except FileNotFoundError: try: self.seg_details_df = self.seg_details_df print('Appending to existing seg_details df.') except AttributeError: seg_details_cols = ['segment_id', 'name', 'distance', 'average_grade', 'maximum_grade', 'elevation_high', 'elevation_low', 'total_elevation_gain', 'start_latitude', 'end_latitude', 'start_longitude', 'end_longitude', 'climb_category', 'city', 'state', 'country', 'effort_count', 'athlete_count', 'athlete_segment_stats', 'map'] self.seg_details_df = pd.DataFrame(columns = seg_details_cols) seg_details_to_download = np.array([]) print('Empty seg_details_df created.') segments_remaining = len(self.segments_df) seg_details_index = 0 while seg_details_index < segments_remaining: seg_details_index = 0 seg_details_remaining = list(np.setdiff1d(self.segments_df.segment_id,seg_details_to_download)) seg_details_to_download = len(seg_details_remaining) print('remaining segments to get details for: {}'.format(seg_details_to_download)) try: for segment in seg_details_remaining: this_segment_dict = self.client.get_segment(segment).to_dict() this_segment_dict['segment_id'] = segment self.seg_details_df = self.seg_details_df.append( {k:this_segment_dict[k] for k in seg_details_cols if k in this_segment_dict}, ignore_index=True) seg_details_index += 1 if seg_details_index % 50 == 0: self.seg_details_df.to_csv('data/'+self._athlete_name()+'/seg_details_df.csv', index=False) print('efforseg_details_dfts_df.csv successfully saved.') print('Last segment downloaded: {0} {1}'.format(self.seg_details_df.tail(1).iloc[0,0], self.seg_details_df.tail(1).iloc[0,1])) print('It is {0}. {1} segments downloaded. {2} segments to go...' .format(datetime.datetime.now().strftime("%H:%M"), seg_details_index, segments_remaining - seg_details_index)) except: wait = 0 print('rate limit exceeded, need to wait 15 minutes') print('it is now {} minutes after the hour'.format(datetime.datetime.now().minute)) time.sleep(60*16) print('trying again...') print('all {} segments downloaded'.format(seg_details_index)) self.seg_details_df.to_csv('data/'+self._athlete_name()+'/seg_details_df.csv', index=False) print('seg_details_df.csv successfully saved.')
from polyline.codec import PolylineCodec from cartodb import CartoDBAPIKey, CartoDBException from geojson import Feature, LineString ## Strava connection client = Client() client.access_token = 'd47099a29b2f3539e1c21af6d820e33a109a079e' ## CartoDB connection API_KEY = 'cad54ea0c580a0c554b9e9562157e7c9bd9f37b0' cartodb_domain = 'geodarcy' cl = CartoDBAPIKey(API_KEY, cartodb_domain) ## get list of segments already in CartoDB queryResult = cl.sql('select segment_id from wbstrava') currentSegments = [x['segment_id'] for x in queryResult['rows']] ## get started segments from a user segmentIDs = [x.id for x in client.get_starred_segment()] for id in segmentIDs: if id not in currentSegments: segmentGeojson = PolylineCodec().decode( client.get_segment(id).map.polyline) flippedGeojson = [[x[1], x[0]] for x in segmentGeojson] inputGeojson = '{"coordinates": ' + str( flippedGeojson ) + ', "type": "LineString", "crs":{"type":"name","properties":{"name":"EPSG:4326"}}}' cl.sql( 'INSERT INTO wbstrava (the_geom, segment_id) VALUES (ST_GeomFromGeoJSON(\'' + inputGeojson + '\'), ' + str(id) + ')')
API_KEY = 'cad54ea0c580a0c554b9e9562157e7c9bd9f37b0' cartodb_domain = 'geodarcy' cl = CartoDBAPIKey(API_KEY, cartodb_domain) ## remove duplicate rows cl.sql( 'DELETE FROM wbstrava WHERE cartodb_id IN (SELECT cartodb_id FROM (SELECT cartodb_id, ROW_NUMBER() OVER (partition BY segment_id ORDER BY cartodb_id) AS rnum FROM wbstrava) t WHERE t.rnum > 1);' ) ## find segments with no geometry queryResult = cl.sql('select segment_id from wbstrava where the_geom is null') currentSegments = [x['segment_id'] for x in queryResult['rows']] for id in currentSegments: try: segmentGeojson = PolylineCodec().decode( client.get_segment(id).map.polyline) flippedGeojson = [[x[1], x[0]] for x in segmentGeojson] inputGeojson = '{"coordinates": ' + str( flippedGeojson ) + ', "type": "LineString", "crs":{"type":"name","properties":{"name":"EPSG:4326"}}}' cl.sql('UPDATE wbstrava SET the_geom = ST_GeomFromGeoJSON(\'' + inputGeojson + '\') WHERE segment_id=' + str(id)) except Exception as e: if "Not Found" in e.message: cl.sql('DELETE FROM wbstrava WHERE segment_id= ' + str(id)) print("Deleted Strava ID: {}".format(id)) else: print("Something's wrong with Strava ID: {}".format(id)) ## get list of segments already in CartoDB queryResult = cl.sql('select segment_id from wbstrava')
## import libraries from stravalib.client import Client from polyline.codec import PolylineCodec from cartodb import CartoDBAPIKey, CartoDBException from geojson import Feature, LineString ## Strava connection client = Client() client.access_token = 'd47099a29b2f3539e1c21af6d820e33a109a079e' ## CartoDB connection API_KEY ='cad54ea0c580a0c554b9e9562157e7c9bd9f37b0' cartodb_domain = 'geodarcy' cl = CartoDBAPIKey(API_KEY, cartodb_domain) ## get list of segments already in CartoDB queryResult = cl.sql('select segment_id from wbstrava') currentSegments = [x['segment_id'] for x in queryResult['rows']] ## get started segments from a user segmentIDs = [x.id for x in client.get_starred_segment()] for id in segmentIDs: if id not in currentSegments: segmentGeojson = PolylineCodec().decode(client.get_segment(id).map.polyline) flippedGeojson = [[x[1], x[0]] for x in segmentGeojson] inputGeojson = '{"coordinates": ' + str(flippedGeojson) + ', "type": "LineString", "crs":{"type":"name","properties":{"name":"EPSG:4326"}}}' cl.sql('INSERT INTO wbstrava (the_geom, segment_id) VALUES (ST_GeomFromGeoJSON(\'' + inputGeojson + '\'), ' + str(id) + ')')