Beispiel #1
0
def get_followers(request, id):
    followers = []
    if (request.user.userinfo != None):
        followers += [str(request.user.userinfo.athlete_id)]
        client = Client(access_token=request.user.userinfo.strava_code)
        for f in client.get_athlete_friends():
            followers += [str(f.id)]
    return HttpResponse(json.dumps(followers), content_type="application/json")
Beispiel #2
0
def main():
    reload(sys)  
    sys.setdefaultencoding('utf8')

   
        
    client = Client(access_token='76824abf6abf903eb3d8b0bde83625135c0be0ec')
    athlete = client.get_athlete()
    print("Hello, {}. I know your email is {}".format(athlete.firstname, athlete.email))
    josh_friends = client.get_athlete_friends(5991862)
    print "Starting...."        
    leaderboard = client.get_segment_leaderboard(2658830,following=True)
    print leaderboard[0].athlete_name

    friend_colour_dict = {}
    friend_colour_file = open('friend_colour.csv')
    colourreader = UnicodeDictReader(friend_colour_file)
    for line in colourreader:
        friend_colour_dict[line["name"]] = line["colour"]

    for x in friend_colour_dict:
        print str(x).encode("utf8")
Beispiel #3
0
def main():
    reload(sys)  
    sys.setdefaultencoding('utf8')
    
    df1 = pd.read_csv('segoutput.csv',index_col=False)
    df1 = df1.set_index(['segment_id'])
    
    segmentlist = []
    file = open('segments.csv')
    reader = csv.DictReader(file)
    for line in reader:
        segmentlist.append(line["Segment Id"])

    #get rid of badsegments
    badsegments = []
    badinfile = open('bad_segments.csv')
    badreader = csv.DictReader(badinfile)
    for line in badreader:
        badsegments.append(line["Segment Id"])
    print 'Bad Segments: '+str(badsegments)
    
    for x in badsegments:
        if x in segmentlist:
            segmentlist.remove(x)
        
    client = Client(access_token='76824abf6abf903eb3d8b0bde83625135c0be0ec')
    athlete = client.get_athlete()
    print("Hello, {}. I know your email is {}".format(athlete.firstname, athlete.email))
    josh_friends = client.get_athlete_friends(5991862)
    print "Starting...."        
    #colors
    colours = ['575757','FFCDF3','FFEE33','FF9233','29D0D0','8126C0','814A19','1D6914','2A4BD7','AD2323','000000','88C6ED','C7258E']
    
    segoutfile = open('segoutput.csv', 'w')
    segoutfile.write('id,latitude,longitude,name,type,color,segment_name,segment_id,url'+'\n')
    segoutputlist = []

    friend_colour_dict = {}
    friend_colour_file = open('friend_colour.csv')
    colourreader = csv.DictReader(friend_colour_file)
    for line in colourreader:
        friend_colour_dict[line["name"]] = line["colour"]

    friend_count_dict = {}
           
    
    for num,j in enumerate(segmentlist):
        time.sleep(3)
        segment = retry_get_segment(client,j)
                        
        try:
            leaderboard = retry_get_leaderboard(client,j)
            if not leaderboard:
                topguy = 'UNCLAIMED'
            else:
                topguy = leaderboard[0].athlete_name
                            
            if not topguy in friend_colour_dict:
                friend_colour_dict[topguy] = colours.pop()
                print str(topguy)+' not in friend_colour_dict, popping colour: '+ str(friend_colour_dict[topguy])

            if topguy in friend_count_dict:
                friend_count_dict[topguy] += 1
            else:
                friend_count_dict[topguy] = 1

                      
            
            for z in segment_details(num,segment,topguy,friend_colour_dict):
                segoutfile.write(str(z)+',')
            segoutfile.write('\n')
            
   
        except Exception:
            #badoutfile = open('bad_segments.csv', 'a+')
            #badoutfile.write(str(j)+','+'\n')
            #badoutfile.close()
            pass

    
    
    segoutfile.close()
    
    #segment count output
    segcountoutfile = open('segmentcount.csv', 'w')
    segcountoutfile.write('name,colour,count'+'\n')
    for x in friend_count_dict:
        if x != 'UNCLAIMED':
            print str(x)+': '+str(friend_count_dict[x])
            segcountoutfile.write(str(x)+','+str(friend_colour_dict[x])+','+str(friend_count_dict[x])+'\n')
    segcountoutfile.write('\n')
    segcountoutfile.close()
    json_convert_segmentcount()
    


    #segment count over time output
    segcountovertimefile = open('segmentcountovertime.csv', 'a+')
    nowdate = datetime.datetime.now().strftime('%Y-%m-%d')
    for x in friend_count_dict:
        if x != 'UNCLAIMED':
            segcountovertimefile.write(str(nowdate)+','+str(x)+','+str(friend_colour_dict[x])+','+str(friend_count_dict[x])+'\n')
    segcountovertimefile.close()
    trim_count_overtime()
    json_convert_trim_count_overtime()

    time.sleep(5)
    #read newly created segoutput.csv (df2) and compare it to original (df1):
    df2 = pd.read_csv('segoutput.csv',index_col=False)
    df2 = df2.set_index(['segment_id'])  
    try:
        main_logger(df2,df1)
        #strava1 main_logger (warlog creation)
        res = requests.get("https://nosnch.in/ae58837141")
    except Exception as e:
        print 'Error: '+str(e)
        pass

    # strava1_segment main
    res = requests.get("https://nosnch.in/26ba53ff3d")
Beispiel #4
0
class Strava_scraper(object):
    '''
	A strava scraper class.
	'''
    def __init__(self, client_secret, access_token, strava_email,
                 strava_password):
        self.client_secret = client_secret
        self.access_token = access_token
        self.strava_email = strava_email
        self.strava_password = strava_password
        self.client = None
        self.athlete = None
        self.friends = None  # list of my friends, dtype = stravalib object
        self.activity_ids = []  # list of activity ids scraped from strava
        self.friend_ids = []
        self.activities = []  # list of activities
        self.clubs = []  # list of athlete clubs
        self.other_athletes = [
        ]  # list of other athlete objects unfollowed by client

    def get_client(self):
        """
		The get_client method create a client object for making requests to the strava API. The Client class accepts an access_token and a rate_limiter object. The method also populates a friends list
		Inputs: None
		Outputs: None
		"""
        self.client = Client(access_token=self.access_token,
                             rate_limiter=DefaultRateLimiter())

        self.athlete = self.client.get_athlete()  # Get Gordon's full athlete

        print "Client setup complete!"
        print
        self.friends = list(
            self.client.get_athlete_friends())  # Get athlete Gordon's friends
        print "Authenticated user's friends list complete!"
        print
        for friend in self.friends:
            self.friend_ids.append(friend.id)

    def log_in_strava(self):
        """
		The log_in_strava method uses a selenium webdriver to open and maintain a secure connect with Strava. It returns the driver object.
		Input: None
		Output: webdriver object
		"""
        chromeOptions = webdriver.ChromeOptions()
        prefs = {"profile.managed_default_content_settings.images": 2}
        chromeOptions.add_experimental_option("prefs", prefs)

        print "logging in..."
        print
        driver = webdriver.Chrome(chrome_options=chromeOptions)
        url = "https://www.strava.com/login"
        driver.get(url)
        user = driver.find_element_by_name('email')
        user.click()
        user.send_keys(self.strava_email)
        pwrd = driver.find_element_by_name('password')
        pwrd.click()
        pwrd.send_keys(self.strava_password)
        driver.find_element_by_id('login-button').click()
        sleep(10)
        print "complete!"
        return driver

    def _get_activity_by_id(self, act_id):
        try:
            activity = self.client.get_activity(
                act_id)  # get id with id = act_id from strava client
            return activity
        except HTTPError:
            return None

    def get_soup(self, driver, url):
        '''
		Helper function to get soup from a live url, as opposed to a local copy
		INPUT:
		-url: str
		OUTPUT: soup object
		'''
        driver.get(url)
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        return soup

    def _make_interval_list(self):
        """
		This helper function makes an interval list that returns a list of numbers cooresponding with a year and week number for the given year. It only returns a static list as of now but in the future could search farther back. It only goes back to week 1, 2014.
		"""
        now = datetime.datetime.now()  # current date
        week_num = now.date().isocalendar()[1]  # current week number
        yr_wk = {
            2014: 52,
            2015: 53,
            2016: 52,
            2017: week_num
        }  # num of weeks each year only going back to 2014
        week_ints = [
            range(k * 100 + 1, k * 100 + v + 1) for k, v in yr_wk.iteritems()
        ]  # week ints in ugly nested lists
        new_week_ints = []
        for row in week_ints:
            new_week_ints.extend(
                row)  # creates new_week_ints which is week ints flattened
        return new_week_ints

    def _get_activities_from_page(self, soup):
        temp_act_id_list = []
        regex = re.compile('/activities/([0-9]*)')  # compile regex function
        for link in soup.find_all('a'):
            text = link.get('href')
            try:
                act_id = regex.findall(
                    text
                )  # look for digits after '/activities/'. Stop upon any character not a number. only looking for 1st item found. should be unicode string.
                try:  # only looking for integers 9 digits long
                    temp_act_id_list.append(int(
                        act_id[0]))  # append number to small list
                    # print act_id[0]
                except (IndexError, ValueError):
                    continue
            except TypeError:
                continue
        return temp_act_id_list

    def web_scrape_activities(self, start_n=0, sleep=False, sleep_time=2):
        """
		This function when called will scrape strava data for athlete activity id's. It will only get those of people I follow. It will store them in a list
		Example url:
		https://www.strava.com/athletes/2304253#interval?interval=201631&interval_type=week&chart_type=miles&year_offset=0
		where 2304253 is athlete id
		201631 is the year and week num

		This is whats needed to find and parse html from athlete pages and grab activity id's.
		Example tag:
		<a href="/activities/666921221">And the winning number is 400</a> ==$0
		"""
        driver = self.log_in_strava()
        week_ints = self._make_interval_list()

        print "scraping athletes"
        for ath_id in self.friend_ids[
                start_n:]:  #starting on index 191, athlete 66299
            athlete_act_id_list = []
            for yearweek_int in week_ints:
                url = "https://www.strava.com/athletes/{}#interval?interval={}&interval_type=week&chart_type=miles&year_offset=0".format(
                    str(ath_id), str(yearweek_int))
                soup = self.get_soup(driver, url)
                # self.activity_ids.extend(self._get_activities_from_page(soup))
                # print "added {}'s {} intervals to list".format(ath_id, yearweek_int)
                if sleep:
                    sleep(
                        np.random.exponential(1.0) * sleep_time
                    )  # pause for amount of sleep time before completing each loop
                athlete_act_id_list.extend(
                    self._get_activities_from_page(soup))
            filename = "{}_act_ids.csv".format(ath_id)
            filepath = os.path.join('activity_files', filename)
            write_list_to_csv(athlete_act_id_list, filepath)

        self.activity_ids = set(self.activity_ids)

        print "All done!"

    def get_other_athletes(self, list_ath_ids):
        """
		This utility function is provided to populate a list of other athletes. It requires a list of predifined athlete id's.
		Input: list_ath_ids as list
		Output: None
		"""
        print "Getting other athletes..."
        print
        for ath_id in list_ath_ids:
            if ath_id in self.friend_ids:
                continue
            else:
                athlete = self.client.get_athlete(ath_id)
                self.other_athletes.append(athlete)
        print "All done!"

    def load_activity_ids(self, act_id_csv_filename):
        """
		This utility function should only be called to populate the class attribute 'activity_ids' from a csv when a new scraper has been instantiated
		"""
        with open(act_id_csv_filename) as f:
            reader = csv.reader(f)
            self.activity_ids = np.array(next(reader), dtype='int')

    def get_activities_main(self):
        """
		This function when called after get client function will populate list attributes for class. This may be done when client wants all(last 200 for feeds) things associated with their athlete, friends, and clubs
		Input: None
		Output: None
		"""
        print "Getting client activities..."
        print
        self.activities.extend(list(self.client.get_activities()))  # gets all
        print "Getting friend activities..."
        print
        self.activities.extend(list(self.client.get_friend_activities(
        )))  # only gets last 200 activities from users feed
        print "Getting athlete clubs..."
        print
        self.clubs.extend(self.client.get_athlete_clubs())  # gets all
        club_ids = [club.id for club in self.clubs]
        print "Getting club activities..."
        print
        for club in club_ids:
            self.activities.extend(list(self.client.get_club_activities(
                club)))  # gets last 200 activities per club

        print "All done!"

    def get_activities_from_ids(self):
        requested_activity = None
        while len(self.activity_ids) > 0:
            requested_activity = self._get_activity_by_id(self.activity_ids[0])
            if requested_activity:
                self.activities.append(requested_activity)
            self.activity_ids = self.activity_ids[1:]

    def __repr__(self):
        return "This is {} {}'s strava scraper class".format(
            self.athlete.firstname, self.athlete.lastname)
Beispiel #5
0
class Strava_scraper(object):
    '''
	A strava scraper class.
	'''
    def __init__(self, client_secret, access_token):
        self.client_secret = client_secret
        self.access_token = access_token
        self.client = None
        self.athlete = None
        self.friends = None  # list of my friends, dtype = stravalib object
        self.friend_ids = []
        self.friend_activities = []
        self.athlete_ids = []  # not used
        self.list_of_athletes = []  # not used

    def get_client(self):
        """
		The get_client method create a client object for making requests to the strava API. The Client class accepts an access_token and a rate_limiter object. The method also populates a friends list
		Inputs: None
		Outputs: None
		"""
        self.client = Client(access_token=self.access_token,
                             rate_limiter=DefaultRateLimiter())

        self.athlete = self.client.get_athlete()  # Get Gordon's full athlete

        print "Client setup complete!"
        print
        self.friends = list(
            self.client.get_athlete_friends())  # Get athlete Gordon's friends
        print "Authenticated user's friends list complete!"
        print
        for friend in self.friends:
            self.friend_ids.append(friend.id)

    def _check_for_id(self, id):
        """
		The _check_for_id method checks both the friends_ids and athlete_ids class attributes for the input id number.
		Inputs: id as integer
		Outputs: None
		"""
        return True if (id in self.friend_ids) or (
            id in self.athlete_ids) else False

    def get_n_athletes(self, n):
        """
		The get_n_athletes method is deprecated because Strava no longer allows authenticated users to indiscriminantly pull activities for a particular user if they are not friends with the authenticated user. This means that by creating a large list of athletes without following them does not help get activities for those atheletes. It would take one parameter n which is the limit on the ammount of athletes to get.
		Input: n as integer
		Output: None
		"""
        athlete_deq = deque(self.friend_ids, maxlen=n)
        id_deq = deque(self.friend_ids, maxlen=n)
        num = 0
        while len(self.athlete_ids) + len(self.friend_ids) < n:
            athlete_id = id_deq.popleft()
            athlete = athlete_deq.popleft()
            for i in range(100):  # try one hundred times
                while True:
                    try:
                        athletes_friends = self.client.get_athlete_friends(
                            athlete_id)
                    except ConnectionError:
                        continue
                    break
            for friend in athletes_friends:
                athlete_deq.append(friend)
                id_deq.append(friend.id)

            if not self._check_for_id(athlete_id):
                self.athlete_ids.append(athlete_id)
                self.list_of_athletes.append(athlete)
                firstname = re.sub(r'[^\x00-\x7F]+', '', athlete.firstname)
                lastname = re.sub(r'[^\x00-\x7F]+', '', athlete.lastname)
                print "athlete '{} {}' added to list position {}".format(
                    firstname, lastname, num)
                num += 1

    def _get_state(self, latlng):
        if latlng:
            geoc = Nominatim()
            location = geoc.reverse(latlng)
            state = None
            try:
                state = location.raw['address']['state']
                return state
            except KeyError:
                pass

    def _get_activity(self, act_id, state):
        try:
            activity = self.client.get_activity(
                act_id)  # get id with id = act_id from strava client
        except HTTPError:
            print "id:{}; client HTTP error when getting activity!!!".format(
                act_id)
            return None
        latlng = activity.start_latlng
        if not latlng:
            return None
        act_ath_id = activity.athlete.id
        firstname = re.sub(r'[^\x00-\x7F]+', '', activity.athlete.firstname)
        lastname = re.sub(r'[^\x00-\x7F]+', '', activity.athlete.lastname)
        act_state = self._get_state(list(latlng))
        if act_ath_id in self.friend_ids and act_state == state:
            print "activity id: {} belonging to {} {}, added to list".format(
                act_id, firstname, lastname)
            return activity
        else:
            print "activity {} not a gps coordinated activity or not in state.".format(
                act_id)
            return None

    def get_n_activities(self, start_id, end_id, state='Colorado', n=30000):
        """
		The get_friends_activities method takes 2 parameters. The state which is the subset of the data to save to the self.activities attribute of the scraper class and n which is the max number of entries to add to the list. The default state is 'Colorado'.
		Input: state as string, n as int
		Output: None
		"""
        print "Getting activities starting with id: {}".format(start_id)
        print "_" * 50
        act_id = start_id
        while len(self.friend_activities) <= n and act_id >= end_id:
            activity = self._get_activity(act_id, state)
            if activity:
                self.friend_activities.append(activity)
            act_id -= 1

    def __repr__(self):
        return "This is {} {}'s strava scraper class".format(
            self.my_athlete.firstname, self.my_athlete.lastname)