def get_incidents(self): """Return CHP incidents given various request args. """ id = self.request.get("id") center = self.request.get("center") dispatch = self.request.get("dispatch") area = self.request.get("area") city = self.request.get("city") since = self.request.get("since") memcache_key = "incidents-%s-%s-%s-%s-%s-%s" % (id, center, dispatch, area, city, since) memcache_expiry_time = 60 incidents = memcache.get(memcache_key) if incidents is None: if id == "": query = CHPIncident.all() query.order('-LogTime') if center != "": query.filter('CenterID =', center) if dispatch != "": query.filter('DispatchID =', dispatch) if area != "": query.filter('Area =', area) if city != "": query.filter('city =', city) if since != "": query.filter('LogTime >', datetime.fromtimestamp(float(since))) incidents = query.fetch(10000) else: # Handle single incident requests, slightly different approach # We want to use get_by_key_name() instead of filtering. incidents = [] incident = CHPIncident.get_by_key_name(id) if incident is not None: incidents.append(incident) try: memcache.add(memcache_key, incidents, memcache_expiry_time) except ValueError: pass if len(incidents) > 0: self.incidents_last_mod = max(incidents, key=lambda incident: incident.updated).updated else: self.incidents_last_mod = CHPData.last_updated() self.incidents = incidents
def get(self): count = 0 chp_data_last_updated = CHPData.last_updated() if chp_data_last_updated is not None: query = CHPIncident.all(keys_only=True) query.filter('updated <', chp_data_last_updated - timedelta(hours=1)) count = query.count() db.delete(query) self.response.write("Purged %d records." % count)
def get(self): # We need to be careful here. It's possible this handler could run in the # window between when the CHPData is updated and when the incidents get # updated. So throw in 5 minutes of padding. # # See also: https://developers.google.com/appengine/docs/python/ndb/#writes query = CHPIncident.query(CHPIncident.updated < CHPData.last_updated() - timedelta(minutes=5)) count = query.count() keys = query.fetch(keys_only=True) ndb.delete_multi(keys) self.response.write("Purged %d records." % count)
def get_incidents(self): """Return CHP incidents given various request args. """ center = self.request.get("center") dispatch = self.request.get("dispatch") area = self.request.get("area") city = self.request.get("city") # Change this for the West Sac News-Leger since no one appears home there if "http://www.westsac.com/news-ledger" in self.request.headers['User-Agent']: if dispatch == "": dispatch = 'SACC' memcache_key = "incidents-%s-%s-%s-%s" % (center, dispatch, area, city) # Don't even try the cache if this looks like a PubSubHubBub request. if "pubsubhubbub" in self.request.headers['User-Agent']: incidents = None else: incidents = memcache.get(memcache_key) if incidents is None: query = CHPIncident.query().order(-CHPIncident.LogTime) if city != "": query = query.filter(CHPIncident.city == city) elif area != "": query = query.filter(CHPIncident.Area == area) elif dispatch != "": query = query.filter(CHPIncident.DispatchID == dispatch) elif center != "": query = query.filter(CHPIncident.CenterID == center) incidents = query.fetch(10000) try: memcache.set(memcache_key, incidents, 300) except: pass if len(incidents) > 0: self.incidents_last_mod = max(incidents, key=lambda incident: incident.updated).updated else: self.incidents_last_mod = CHPData.last_updated() self.incidents = incidents
def process_chp_center(chpCenter): """Process a CHP Center. """ incident_list = [] psh_pings = [] dash_re = re.compile(r'\s*-\s*') center_id = chpCenter.attrib['ID'] for chpDispatch in chpCenter: dispatch_id = chpDispatch.attrib['ID'] # For some reason, sometimes the Dispatch ID is blank so skip these. if dispatch_id == "": continue for chpLog in chpDispatch: log_id = chpLog.attrib['ID'] # There are two different time formats in the CHP feed. Try the # "standard" format first, then fall back to the new SAHB format. try: log_time = datetime.strptime(chpLog.find('LogTime').text, '"%m/%d/%Y %I:%M:%S %p"') except ValueError: log_time = datetime.strptime(chpLog.find('LogTime').text, '"%b %d %Y %I:%M%p"') # Duct tape to get the timezone right pacific_tz = tzinfo.Pacific() log_time = log_time - pacific_tz.utcoffset(log_time) incident_key_name = "%s.%s.%s" % (center_id, dispatch_id, log_id) incident_key = ndb.Key(CHPIncident, incident_key_name) incident = incident_key.get() if incident is None: incident = CHPIncident(key = incident_key, CenterID = center_id, DispatchID = dispatch_id, LogID = log_id, LogTime = log_time, Area = chpLog.find('Area').text.strip('"')) # # THe following are attributes that can change between updates so reset # them from the data given. # # LogType and LogTypeID (logtypeid, dash, logtype) = chpLog.find('LogType').text.strip('"').partition("-") if dash == '': # If 'dash' is missing then the hyphen was missing, if so we # use the whole thing as the LogType and forget the LogTypeID incident.LogType = deCopIfy(re.sub(dash_re, ' - ', logtypeid.strip())) else: incident.LogType = deCopIfy(re.sub(dash_re, ' - ', logtype.strip())) incident.LogTypeID = logtypeid.strip() # Location incident.Location = deCopIfy(chpLog.find('Location').text.strip('"')) # LocationDesc # Make sure the location and the locationDesc aren't dupes locationDesc = deCopIfy(chpLog.find('LocationDesc').text.strip('"')) if (incident.Location.upper() != locationDesc.upper()): incident.LocationDesc = locationDesc # geolocation latlon = chpLog.find('LATLON').text.strip('"') if latlon is not None and latlon != "0:0": incident.geolocation = ndb.GeoPt( lat = float(latlon.partition(":")[0]) / 1000000, lon = float(latlon.partition(":")[2]) / 1000000 * -1 ) # LogDetails LogDetails = { 'details': [] } logdetails_element = chpLog.find('LogDetails') for element in logdetails_element: try: detail_dict = { 'DetailTime': element.find('DetailTime').text.strip('"'), 'IncidentDetail': deCopIfy(element.find('IncidentDetail').text.strip('"^')).capitalize() } LogDetails[element.tag].append(detail_dict) except AttributeError: pass incident.LogDetails = LogDetails # Set up the PSH pings. Note, we are NOT checking for actual # changes in the data, we are just assuming that the existance of # an incident in the CHP feed declares it as "updated" so we ping. psh_pings.append('http://www.sactraffic.org/atom?center=%s' % urllib.quote(incident.CenterID)) psh_pings.append('http://www.sactraffic.org/atom?dispatch=%s' % urllib.quote(incident.DispatchID)) if incident.Area is not None and incident.Area != "": psh_pings.append('http://www.sactraffic.org/atom?area=%s' % urllib.quote(incident.Area)) if incident.city is not None and incident.city != "": psh_pings.append('http://www.sactraffic.org/atom?city=%s' % urllib.quote(incident.city.encode('utf8'))) # Save this incident incident_list.append(incident) # Store the incidents in a batch ndb.put_multi(incident_list) # Ping the PSH hub, use a set so we don't ping duplicates. ping_set = set(psh_pings) if not debug: deferred.defer(pubsubhubbub_publish.publish, 'http://pubsubhubbub.appspot.com', ping_set, _queue="pshPingQueue") else: logging.info("Skipping PSH pings for %s on the development server. %s" % (incident.CenterID, ping_set)) if center_id == 'SAHB': # Limit reverse geocoding to only the SAHB center to limit the number of # requests to the geocoder, we are _Sac_ Traffic after all. # Reverse geocode the incidents if we haven't already for incident in incident_list: if incident.city is None and incident.geolocation is not None: if not debug: deferred.defer(reverse_geocode.load_city, incident, _queue="reverseGeocodeQueue") else: logging.info("Skipping reverse geocode for %s on the development server." % (incident.Area)) logging.info("Processed %d incidents in %s." % (len(incident_list), chpCenter.attrib['ID']))
def process_chp_center(chpCenter): """Process a CHP Center. """ incident_list = [] psh_pings = [] dash_re = re.compile(r'\s*-\s*') for chpDispatch in chpCenter: # For some reason, sometimes the Dispatch ID is blank # so skip these. if chpDispatch.attrib['ID'] == "": continue for chpLog in chpDispatch: # There are two different time formats in the CHP feed. Try the # "standard" format first, then fall back to the new SAHB format. try: log_time = datetime.strptime(chpLog.find('LogTime').text, '"%m/%d/%Y %I:%M:%S %p"').replace(tzinfo=tzinfo.Pacific()) except ValueError: log_time = datetime.strptime(chpLog.find('LogTime').text, '"%b %d %Y %I:%M%p"').replace(tzinfo=tzinfo.Pacific()) key_name = "%s.%s.%s.%d" % (chpCenter.attrib['ID'], chpDispatch.attrib['ID'], chpLog.attrib['ID'], time.mktime(log_time.timetuple())) incident = CHPIncident.get_by_key_name(key_name) if incident is None: incident = CHPIncident(key_name = key_name, CenterID = chpCenter.attrib['ID'], DispatchID = chpDispatch.attrib['ID'], LogID = chpLog.attrib['ID']) incident.LogTime = log_time (logtypeid, dash, logtype) = chpLog.find('LogType').text.strip('"').partition("-") if dash == '': # If 'dash' is missing then the hyphen was missing, if so we # use the whole thing as the LogType and forget the LogTypeID incident.LogType = deCopIfy(re.sub(dash_re, ' - ', logtypeid.strip())) else: incident.LogType = deCopIfy(re.sub(dash_re, ' - ', logtype.strip())) incident.LogTypeID = logtypeid.strip() incident.Location = deCopIfy(chpLog.find('Location').text.strip('"')) incident.Area = chpLog.find('Area').text.strip('"') incident.ThomasBrothers = chpLog.find('ThomasBrothers').text.strip('"') # Like the LogTime above, there are now two location formats. # This time we try the SAHB LATLON first, then fall back to TBXY. try: latlon = chpLog.find('LATLON').text.strip('"') if latlon != "0:0": incident.geolocation = db.GeoPt( lat = float(latlon.partition(":")[0]) / 1000000, lon = float(latlon.partition(":")[2]) / 1000000 * -1 ) except AttributeError: incident.TBXY = chpLog.find('TBXY').text.strip('"') incident.geolocation = geoConvertTBXY(incident.CenterID, incident.TBXY) # Special handling for the LogDetails LogDetails = { 'details': [], 'units': [] } logdetails_element = chpLog.find('LogDetails') for element in logdetails_element: try: detail_dict = { 'DetailTime': element.find('DetailTime').text.strip('"'), 'IncidentDetail': deCopIfy(element.find('IncidentDetail').text.strip('"^')).capitalize() } LogDetails[element.tag].append(detail_dict) except AttributeError: pass incident.LogDetails = pickle.dumps(LogDetails) # Set up the PSH pings. Note, we are NOT checking for actual # changes in the data, we are just assuming that the existance of # an incident in the CHP feed declares it as "updated" so we ping. psh_pings.append('http://www.sactraffic.org/atom?center=%s' % urllib.quote(incident.CenterID)) psh_pings.append('http://www.sactraffic.org/atom?dispatch=%s' % urllib.quote(incident.DispatchID)) psh_pings.append('http://www.sactraffic.org/atom?area=%s' % urllib.quote(incident.Area)) if incident.city is not None and incident.city != "": psh_pings.append('http://www.sactraffic.org/atom?city=%s' % urllib.quote(incident.city)) # Save this incident incident_list.append(incident) # Store the incidents in a batch db.put(incident_list) # Ping the PSH hub, use a set so we don't ping duplicates. ping_set = set(psh_pings) if not debug: deferred.defer(pubsubhubbub_publish.publish, 'http://pubsubhubbub.appspot.com', ping_set, _queue="pshPingQueue") else: logging.info("Skipping PSH pings for %s on the development server. %s" % (incident.CenterID, ping_set)) # Reverse geocode the incidents if we haven't already for incident in incident_list: if incident.city is None and incident.geolocation is not None: deferred.defer(reverse_geocode.load_city, incident, _queue="reverseGeocodeQueue") logging.info("Processed %d incidents in %s." % (len(incident_list), chpCenter.attrib['ID']))