def sync_ride_weather(): """ Synchronize rides from strava with the database. """ parser = optparse.OptionParser() parser.add_option("--clear", action="store_true", dest="clear", default=False, help="Whether to clear data before fetching.") parser.add_option("--cache-only", action="store_true", dest="cache_only", default=False, help="Whether to only use existing cache.") parser.add_option("--limit", type="int", dest="limit", default=0, help="Limit how many rides are processed (e.g. during development)") parser.add_option("--debug", action="store_true", dest="debug", default=False, help="Whether to log at debug level.") parser.add_option("--quiet", action="store_true", dest="quiet", default=False, help="Whether to suppress non-error log output.") (options, args) = parser.parse_args() if options.quiet: loglevel = logging.ERROR elif options.debug: loglevel = logging.DEBUG else: loglevel = logging.INFO logging.basicConfig(level=loglevel) logger = logging.getLogger('sync') sess = db.session if options.clear: logger.info("Clearing all weather data!") sess.query(model.RideWeather).delete() if options.limit: logger.info("Fetching weather for first {0} rides".format(options.limit)) else: logger.info("Fetching weather for all rides") # Find rides that have geo, but no weather sess.query(model.RideWeather) q = text(""" select R.id from rides R join ride_geo G on G.ride_id = R.id left join ride_weather W on W.ride_id = R.id where W.ride_id is null and date(R.start_date) < CURDATE() and time(R.start_date) != '00:00:00' -- Exclude bad entries. ; """) c = wu_api.Client(api_key=app.config['WUNDERGROUND_API_KEY'], cache_dir=app.config['WUNDERGROUND_CACHE_DIR'], pause=7.0, # Max requests 10/minute for developer license cache_only=options.cache_only) rx = re.compile('^POINT\((.+)\)$') rows = db.engine.execute(q).fetchall() # @UndefinedVariable num_rides = len(rows) for i,r in enumerate(rows): if options.limit and i > options.limit: logging.info("Limit ({0}) reached".format(options.limit)) break ride = sess.query(model.Ride).get(r['id']) logger.info("Processing ride: {0} ({1}/{2})".format(ride.id, i, num_rides)) try: start_geo_wkt = db.session.scalar(ride.geo.start_geo.wkt) # @UndefinedVariable (lat,lon) = rx.match(start_geo_wkt).group(1).split(' ') hist = c.history(ride.start_date, us_city=ride.location, lat=lat, lon=lon) ride_start = ride.start_date.replace(tzinfo=hist.date.tzinfo) ride_end = ride_start + timedelta(seconds=ride.elapsed_time) # NOTE: if elapsed_time is significantly more than moving_time then we need to assume # that the rider wasn't actually riding for this entire time (and maybe just grab temps closest to start of # ride as opposed to averaging observations during ride. ride_observations = hist.find_observations_within(ride_start, ride_end) start_obs = hist.find_nearest_observation(ride_start) end_obs = hist.find_nearest_observation(ride_end) def avg(l): no_nulls = [e for e in l if e is not None] if not no_nulls: return None return sum(no_nulls) / len(no_nulls) * 1.0 # to force float rw = model.RideWeather() rw.ride_id = ride.id rw.ride_temp_start = start_obs.temp rw.ride_temp_end = end_obs.temp if len(ride_observations) <= 2: # if we dont' have many observations, bookend the list with the start/end observations ride_observations = [start_obs] + ride_observations + [end_obs] rw.ride_temp_avg = avg([o.temp for o in ride_observations]) rw.ride_windchill_start = start_obs.windchill rw.ride_windchill_end = end_obs.windchill rw.ride_windchill_avg = avg([o.windchill for o in ride_observations]) rw.ride_precip = sum([o.precip for o in ride_observations if o.precip is not None]) rw.ride_rain = any([o.rain for o in ride_observations]) rw.ride_snow = any([o.snow for o in ride_observations]) rw.day_temp_min = hist.min_temp rw.day_temp_max = hist.max_temp ride.weather_fetched = True ride.timezone = hist.date.tzinfo.zone sess.add(rw) sess.flush() if lat and lon: try: sun = Sun(lat=lat, lon=lon) rw.sunrise = sun.sunrise(ride_start) rw.sunset = sun.sunset(ride_start) except: logger.exception("Error getting sunrise/sunset for ride {0}".format(ride)) # But soldier on ... except: logger.exception("Error getting weather data for ride: {0}".format(ride)) # But soldier on ... sess.commit()
def execute(self, options, args): sess = db.session if options.clear: self.logger.info("Clearing all weather data!") sess.query(model.RideWeather).delete() if options.limit: self.logger.info("Fetching weather for first {0} rides".format(options.limit)) else: self.logger.info("Fetching weather for all rides") # Find rides that have geo, but no weather sess.query(model.RideWeather) q = text(""" select R.id from rides R join ride_geo G on G.ride_id = R.id left join ride_weather W on W.ride_id = R.id where W.ride_id is null and date(R.start_date) < CURDATE() and time(R.start_date) != '00:00:00' -- Exclude bad entries. ; """) c = wu_api.Client(api_key=app.config['WUNDERGROUND_API_KEY'], cache_dir=app.config['WUNDERGROUND_CACHE_DIR'], pause=7.0, # Max requests 10/minute for developer license cache_only=options.cache_only) rows = db.engine.execute(q).fetchall() # @UndefinedVariable num_rides = len(rows) for i, r in enumerate(rows): if options.limit and i > options.limit: logging.info("Limit ({0}) reached".format(options.limit)) break ride = sess.query(model.Ride).get(r['id']) self.logger.info("Processing ride: {0} ({1}/{2})".format(ride.id, i, num_rides)) try: start_geo_wkt = db.session.scalar(ride.geo.start_geo.wkt) # @UndefinedVariable point = parse_point_wkt(start_geo_wkt) lon = point.lon lat = point.lat # We are doing only lat/lon now instead of us_city, since us_city seems to resolve to regional weather stations # rather than the closest weather stations ... # hist = c.history(ride.start_date, us_city=ride.location, lat=lat, lon=lon) hist = c.history(ride.start_date, lat=lat, lon=lon) ride_start = ride.start_date.replace(tzinfo=hist.date.tzinfo) ride_end = ride_start + timedelta(seconds=ride.elapsed_time) # NOTE: if elapsed_time is significantly more than moving_time then we need to assume # that the rider wasn't actually riding for this entire time (and maybe just grab temps closest to start of # ride as opposed to averaging observations during ride. ride_observations = hist.find_observations_within(ride_start, ride_end) start_obs = hist.find_nearest_observation(ride_start) end_obs = hist.find_nearest_observation(ride_end) def avg(l): no_nulls = [e for e in l if e is not None] if not no_nulls: return None return sum(no_nulls) / len(no_nulls) * 1.0 # to force float rw = model.RideWeather() rw.ride_id = ride.id rw.ride_temp_start = start_obs.temp rw.ride_temp_end = end_obs.temp if len(ride_observations) <= 2: # if we dont' have many observations, bookend the list with the start/end observations ride_observations = [start_obs] + ride_observations + [end_obs] rw.ride_temp_avg = avg([o.temp for o in ride_observations]) rw.ride_windchill_start = start_obs.windchill rw.ride_windchill_end = end_obs.windchill rw.ride_windchill_avg = avg([o.windchill for o in ride_observations]) rw.ride_precip = sum([o.precip for o in ride_observations if o.precip is not None]) rw.ride_rain = any([o.rain for o in ride_observations]) rw.ride_snow = any([o.snow for o in ride_observations]) rw.day_temp_min = hist.min_temp rw.day_temp_max = hist.max_temp # ride.weather_fetched = True # (We don't have such an attribute, actually.) # (We get this from the activity now.) # ride.timezone = hist.date.tzinfo.zone sess.add(rw) sess.flush() if lat and lon: try: sun = Sun(lat=lat, lon=lon) rw.sunrise = sun.sunrise(ride_start) rw.sunset = sun.sunset(ride_start) except: self.logger.exception("Error getting sunrise/sunset for ride {0}".format(ride)) # But soldier on ... except: self.logger.exception("Error getting weather data for ride: {0}".format(ride)) # But soldier on ... sess.commit()
def execute(self, options, args): sess = db.session if options.clear: self.logger.info("Clearing all weather data!") sess.query(model.RideWeather).delete() if options.limit: self.logger.info("Fetching weather for first {0} rides".format( options.limit)) else: self.logger.info("Fetching weather for all rides") # Find rides that have geo, but no weather sess.query(model.RideWeather) q = text(""" select R.id from rides R join ride_geo G on G.ride_id = R.id left join ride_weather W on W.ride_id = R.id where W.ride_id is null and date(R.start_date) < CURDATE() and time(R.start_date) != '00:00:00' -- Exclude bad entries. ; """) c = wu_api.Client( api_key=app.config['WUNDERGROUND_API_KEY'], cache_dir=app.config['WUNDERGROUND_CACHE_DIR'], pause=7.0, # Max requests 10/minute for developer license cache_only=options.cache_only) rows = db.engine.execute(q).fetchall() # @UndefinedVariable num_rides = len(rows) for i, r in enumerate(rows): if options.limit and i > options.limit: logging.info("Limit ({0}) reached".format(options.limit)) break ride = sess.query(model.Ride).get(r['id']) self.logger.info("Processing ride: {0} ({1}/{2})".format( ride.id, i, num_rides)) try: start_geo_wkt = db.session.scalar( ride.geo.start_geo.wkt) # @UndefinedVariable point = parse_point_wkt(start_geo_wkt) lon = point.lon lat = point.lat # We are doing only lat/lon now instead of us_city, since us_city seems to resolve to regional weather stations # rather than the closest weather stations ... # hist = c.history(ride.start_date, us_city=ride.location, lat=lat, lon=lon) hist = c.history(ride.start_date, lat=lat, lon=lon) ride_start = ride.start_date.replace(tzinfo=hist.date.tzinfo) ride_end = ride_start + timedelta(seconds=ride.elapsed_time) # NOTE: if elapsed_time is significantly more than moving_time then we need to assume # that the rider wasn't actually riding for this entire time (and maybe just grab temps closest to start of # ride as opposed to averaging observations during ride. ride_observations = hist.find_observations_within( ride_start, ride_end) start_obs = hist.find_nearest_observation(ride_start) end_obs = hist.find_nearest_observation(ride_end) def avg(l): no_nulls = [e for e in l if e is not None] if not no_nulls: return None return sum(no_nulls) / len( no_nulls) * 1.0 # to force float rw = model.RideWeather() rw.ride_id = ride.id rw.ride_temp_start = start_obs.temp rw.ride_temp_end = end_obs.temp if len(ride_observations) <= 2: # if we dont' have many observations, bookend the list with the start/end observations ride_observations = [start_obs ] + ride_observations + [end_obs] rw.ride_temp_avg = avg([o.temp for o in ride_observations]) rw.ride_windchill_start = start_obs.windchill rw.ride_windchill_end = end_obs.windchill rw.ride_windchill_avg = avg( [o.windchill for o in ride_observations]) rw.ride_precip = sum([ o.precip for o in ride_observations if o.precip is not None ]) rw.ride_rain = any([o.rain for o in ride_observations]) rw.ride_snow = any([o.snow for o in ride_observations]) rw.day_temp_min = hist.min_temp rw.day_temp_max = hist.max_temp # ride.weather_fetched = True # (We don't have such an attribute, actually.) # (We get this from the activity now.) # ride.timezone = hist.date.tzinfo.zone sess.add(rw) sess.flush() if lat and lon: try: sun = Sun(lat=lat, lon=lon) rw.sunrise = sun.sunrise(ride_start) rw.sunset = sun.sunset(ride_start) except: self.logger.exception( "Error getting sunrise/sunset for ride {0}".format( ride)) # But soldier on ... except: self.logger.exception( "Error getting weather data for ride: {0}".format(ride)) # But soldier on ... sess.commit()