def sync_rides_distributed( self, total_segments: int, segment: int, start_date: datetime = None, end_date: datetime = None, ): """ :param total_segments: The number of segments to divide athletes into (e.g. 24 if this is being run hourly) :param segment: Which segment (0-based) to select. :param start_date: Will default to competition start. :param end_date: Will default to competition end. """ with meta.transaction_context() as sess: q = sess.query(Athlete) q = q.filter(Athlete.access_token != None) q = q.filter(func.mod(Athlete.id, total_segments) == segment) athletes: List[Athlete] = q.all() self.logger.info( "Selecting segment {} / {}, found {} athletes".format( segment, total_segments, len(athletes))) athlete_ids = [a.id for a in athletes] if athlete_ids: return self.sync_rides(start_date=start_date, end_date=end_date, athlete_ids=athlete_ids)
def load_board_and_data(leaderboard) -> Tuple[GenericBoard, List[Dict[str, Any]]]: path = os.path.join(config.LEADERBOARDS_DIR, '{}.yml'.format(os.path.basename(leaderboard))) if not os.path.exists(path): raise ObjectNotFound("Could not find yaml board definition {}".format(path)) with open(path, 'rt', encoding='utf-8') as fp: doc = yaml.load(fp) schema = GenericBoardSchema() board: GenericBoard = schema.load(doc).data with meta.transaction_context(read_only=True) as session: rs = session.execute(board.query) if not board.fields: board.fields = [GenericBoardField(name=k, label=k) for k in rs.keys()] try: rows = [{f.name: f.format_value(row[f.name], row) for f in board.fields} for row in rs.fetchall()] except KeyError as ke: raise RuntimeError("Field not found in result row: {}".format(ke)) return board, rows
def sync_athletes(self, max_records: int = None): with meta.transaction_context() as sess: # We iterate over all of our athletes that have access tokens. # (We can't fetch anything for those that don't.) q = sess.query(Athlete) q = q.filter(Athlete.access_token is not None) if max_records: self.logger.info("Limiting to {} records.".format(max_records)) q = q.limit(max_records) for athlete in q.all(): self.logger.info("Updating athlete: {0}".format(athlete)) try: client = StravaClientForAthlete(athlete) strava_athlete = client.get_athlete() self.register_athlete(strava_athlete, athlete.access_token) if not self.all_done(): self.register_athlete_team(strava_athlete, athlete) except: self.logger.warning( "Error registering athlete {0}".format(athlete), exc_info=True)
def fetch_and_store_activity_detail(self, *, athlete_id: int, activity_id: int, use_cache: bool = False): with meta.transaction_context() as session: self.logger.info( "Fetching detailed activity athlete_id={}, activity_id={}". format(athlete_id, activity_id)) try: athlete = session.query(Athlete).get(athlete_id) if not athlete: self.logger.warning( "Athlete {} not found in database, ignoring activity {}" .format(athlete_id, activity_id)) return # Makes the else a little unnecessary, but reads easier. else: client = StravaClientForAthlete(athlete) af = CachingActivityFetcher( cache_basedir=config.STRAVA_ACTIVITY_CACHE_DIR, client=client) strava_activity = af.fetch( athlete_id=athlete_id, object_id=activity_id, use_cache=use_cache, ) self.check_activity( strava_activity, start_date=config.START_DATE, end_date=config.END_DATE, exclude_keywords=config.EXCLUDE_KEYWORDS, ) ride = self.write_ride(strava_activity) self.update_ride_complete(strava_activity=strava_activity, ride=ride) except ObjectNotFound: raise ActivityNotFound( "Activity {} not found, ignoring.".format(activity_id)) except IneligibleActivity: raise except Fault as x: self.logger.exception( "Stravalib fault: " "detail {}, athlete {}, exception {}".format( activity_id, athlete_id, str(x))) raise except: self.logger.exception("Error fetching/writing activity " "detail {}, athlete {}".format( activity_id, athlete_id)) raise
def fetch_and_store_activity_streams(self, *, athlete_id: int, activity_id: int, use_cache: bool = False): with meta.transaction_context() as session: self.logger.info( "Fetching activity streams for athlete_id={}, activity_id={}". format(athlete_id, activity_id)) ride = (session.query(Ride).options(joinedload( Ride.athlete)).get(activity_id)) if not ride: raise RuntimeError( "Cannot load streams before fetching activity.") try: client = StravaClientForAthlete(ride.athlete) sf = CachingStreamFetcher( cache_basedir=config.STRAVA_ACTIVITY_CACHE_DIR, client=client) streams = sf.fetch( athlete_id=athlete_id, object_id=activity_id, use_cache=use_cache, only_cache=False, ) if streams: self.write_ride_streams(streams, ride) session.commit() else: self.logger.debug( "No streams for {!r} (skipping)".format(ride)) except ObjectNotFound: raise ActivityNotFound( "Streams not found for {}, athlete {}".format( ride, ride.athlete)) except: self.logger.exception( "Error fetching/writing activity streams for " "{}, athlete {}".format(ride, ride.athlete), exc_info=True, ) raise
def load_board_and_data( leaderboard) -> Tuple[GenericBoard, List[Dict[str, Any]]]: board = load_board(leaderboard) with meta.transaction_context(read_only=True) as session: rs = session.execute(board.query) if not board.fields: board.fields = [ GenericBoardField(name=k, label=k) for k in rs.keys() ] rows = rs.fetchall() return board, format_rows(rows, board)
def handle_message(self, message: ActivityUpdate): self.logger.info("Processing activity update {}".format(message)) with meta.transaction_context() as session: athlete: Athlete = session.query(Athlete).get(message.athlete_id) if not athlete: self.logger.warning( "Athlete {} not found in database, " "ignoring activity update message {}".format( message.athlete_id, message)) return # Makes the else a little unnecessary, but reads easier. try: if message.operation is AspectType.delete: statsd.increment( "strava.activity.delete", tags=["team:{}".format(athlete.team_id)], ) self.activity_sync.delete_activity( athlete_id=message.athlete_id, activity_id=message.activity_id) elif message.operation is AspectType.update: statsd.increment( "strava.activity.update", tags=["team:{}".format(athlete.team_id)], ) self.activity_sync.fetch_and_store_activity_detail( athlete_id=message.athlete_id, activity_id=message.activity_id) # (We'll assume the stream doens't need re-fetching.) elif message.operation is AspectType.create: statsd.increment( "strava.activity.create", tags=["team:{}".format(athlete.team_id)], ) self.activity_sync.fetch_and_store_activity_detail( athlete_id=message.athlete_id, activity_id=message.activity_id) self.streams_sync.fetch_and_store_activity_streams( athlete_id=message.athlete_id, activity_id=message.activity_id) except (ActivityNotFound, IneligibleActivity) as x: log.info(str(x))
def sync_photos(self): with meta.transaction_context() as sess: q = sess.query(orm.Ride) q = q.filter_by(photos_fetched=False, private=False) for ride in q: self.logger.info("Writing out photos for {0!r}".format(ride)) try: client = StravaClientForAthlete(ride.athlete) activity_photos = client.get_activity_photos( ride.id, only_instagram=True) """ :type: list[stravalib.orm.ActivityPhoto] """ self.write_ride_photos_nonprimary(activity_photos, ride) except: self.logger.exception("Error fetching/writing " "non-primary photos activity " "{0}, athlete {1}".format( ride.id, ride.athlete), exc_info=True)
def sync_rides( self, start_date: datetime = None, end_date: datetime = None, rewrite: bool = False, force: bool = False, athlete_ids: List[int] = None, ): with meta.transaction_context() as sess: if start_date is None: start_date = config.START_DATE if end_date is None: end_date = config.END_DATE self.logger.debug( "Fetching rides newer than {} and older than {}".format( start_date, end_date)) if (arrow.now() > (end_date + config.UPLOAD_GRACE_PERIOD)) and not force: raise CommandError( "Current time is after competition end date + grace " "period, not syncing rides. (Use `force` to override.)") if rewrite: self.logger.info("Rewriting existing ride data.") # We iterate over all of our athletes that have access tokens. (We can't fetch anything # for those that don't.) q = sess.query(Athlete) q = q.filter(Athlete.access_token != None) if athlete_ids is not None: q = q.filter(Athlete.id.in_(athlete_ids)) # Also only fetch athletes that have teams configured. This may not be strictly necessary # but this is a team competition, so not a lot of value in pulling in data for those # without teams. # (The way the athlete sync works, athletes will only be configured for a single team # that is one of the configured competition teams.) q = q.filter(Athlete.team_id != None) for athlete in q.all(): assert isinstance(athlete, Athlete) self.logger.info( "Fetching rides for athlete: {0}".format(athlete)) try: self._sync_rides( start_date=start_date, end_date=end_date, athlete=athlete, rewrite=rewrite, ) except AccessUnauthorized: self.logger.error( "Invalid authorization token for {} (removing)".format( athlete)) athlete.access_token = None sess.add(athlete) sess.commit() except: self.logger.exception( "Error syncing rides for athlete {}".format(athlete)) sess.rollback() else: sess.commit()