def process_chp_center(chpCenter): """Process a CHP Center. """ incident_list = [] psh_pings = [] dash_re = re.compile(r'\s*-\s*') center_id = chpCenter.attrib['ID'] for chpDispatch in chpCenter: dispatch_id = chpDispatch.attrib['ID'] # For some reason, sometimes the Dispatch ID is blank so skip these. if dispatch_id == "": continue for chpLog in chpDispatch: log_id = chpLog.attrib['ID'] # There are two different time formats in the CHP feed. Try the # "standard" format first, then fall back to the new SAHB format. try: log_time = datetime.strptime(chpLog.find('LogTime').text, '"%m/%d/%Y %I:%M:%S %p"') except ValueError: log_time = datetime.strptime(chpLog.find('LogTime').text, '"%b %d %Y %I:%M%p"') # Duct tape to get the timezone right pacific_tz = tzinfo.Pacific() log_time = log_time - pacific_tz.utcoffset(log_time) incident_key_name = "%s.%s.%s" % (center_id, dispatch_id, log_id) incident_key = ndb.Key(CHPIncident, incident_key_name) incident = incident_key.get() if incident is None: incident = CHPIncident(key = incident_key, CenterID = center_id, DispatchID = dispatch_id, LogID = log_id, LogTime = log_time, Area = chpLog.find('Area').text.strip('"')) # # THe following are attributes that can change between updates so reset # them from the data given. # # LogType and LogTypeID (logtypeid, dash, logtype) = chpLog.find('LogType').text.strip('"').partition("-") if dash == '': # If 'dash' is missing then the hyphen was missing, if so we # use the whole thing as the LogType and forget the LogTypeID incident.LogType = deCopIfy(re.sub(dash_re, ' - ', logtypeid.strip())) else: incident.LogType = deCopIfy(re.sub(dash_re, ' - ', logtype.strip())) incident.LogTypeID = logtypeid.strip() # Location incident.Location = deCopIfy(chpLog.find('Location').text.strip('"')) # LocationDesc # Make sure the location and the locationDesc aren't dupes locationDesc = deCopIfy(chpLog.find('LocationDesc').text.strip('"')) if (incident.Location.upper() != locationDesc.upper()): incident.LocationDesc = locationDesc # geolocation latlon = chpLog.find('LATLON').text.strip('"') if latlon is not None and latlon != "0:0": incident.geolocation = ndb.GeoPt( lat = float(latlon.partition(":")[0]) / 1000000, lon = float(latlon.partition(":")[2]) / 1000000 * -1 ) # LogDetails LogDetails = { 'details': [] } logdetails_element = chpLog.find('LogDetails') for element in logdetails_element: try: detail_dict = { 'DetailTime': element.find('DetailTime').text.strip('"'), 'IncidentDetail': deCopIfy(element.find('IncidentDetail').text.strip('"^')).capitalize() } LogDetails[element.tag].append(detail_dict) except AttributeError: pass incident.LogDetails = LogDetails # Set up the PSH pings. Note, we are NOT checking for actual # changes in the data, we are just assuming that the existance of # an incident in the CHP feed declares it as "updated" so we ping. psh_pings.append('http://www.sactraffic.org/atom?center=%s' % urllib.quote(incident.CenterID)) psh_pings.append('http://www.sactraffic.org/atom?dispatch=%s' % urllib.quote(incident.DispatchID)) if incident.Area is not None and incident.Area != "": psh_pings.append('http://www.sactraffic.org/atom?area=%s' % urllib.quote(incident.Area)) if incident.city is not None and incident.city != "": psh_pings.append('http://www.sactraffic.org/atom?city=%s' % urllib.quote(incident.city.encode('utf8'))) # Save this incident incident_list.append(incident) # Store the incidents in a batch ndb.put_multi(incident_list) # Ping the PSH hub, use a set so we don't ping duplicates. ping_set = set(psh_pings) if not debug: deferred.defer(pubsubhubbub_publish.publish, 'http://pubsubhubbub.appspot.com', ping_set, _queue="pshPingQueue") else: logging.info("Skipping PSH pings for %s on the development server. %s" % (incident.CenterID, ping_set)) if center_id == 'SAHB': # Limit reverse geocoding to only the SAHB center to limit the number of # requests to the geocoder, we are _Sac_ Traffic after all. # Reverse geocode the incidents if we haven't already for incident in incident_list: if incident.city is None and incident.geolocation is not None: if not debug: deferred.defer(reverse_geocode.load_city, incident, _queue="reverseGeocodeQueue") else: logging.info("Skipping reverse geocode for %s on the development server." % (incident.Area)) logging.info("Processed %d incidents in %s." % (len(incident_list), chpCenter.attrib['ID']))
def process_chp_center(chpCenter): """Process a CHP Center. """ incident_list = [] psh_pings = [] dash_re = re.compile(r'\s*-\s*') for chpDispatch in chpCenter: # For some reason, sometimes the Dispatch ID is blank # so skip these. if chpDispatch.attrib['ID'] == "": continue for chpLog in chpDispatch: # There are two different time formats in the CHP feed. Try the # "standard" format first, then fall back to the new SAHB format. try: log_time = datetime.strptime(chpLog.find('LogTime').text, '"%m/%d/%Y %I:%M:%S %p"').replace(tzinfo=tzinfo.Pacific()) except ValueError: log_time = datetime.strptime(chpLog.find('LogTime').text, '"%b %d %Y %I:%M%p"').replace(tzinfo=tzinfo.Pacific()) key_name = "%s.%s.%s.%d" % (chpCenter.attrib['ID'], chpDispatch.attrib['ID'], chpLog.attrib['ID'], time.mktime(log_time.timetuple())) incident = CHPIncident.get_by_key_name(key_name) if incident is None: incident = CHPIncident(key_name = key_name, CenterID = chpCenter.attrib['ID'], DispatchID = chpDispatch.attrib['ID'], LogID = chpLog.attrib['ID']) incident.LogTime = log_time (logtypeid, dash, logtype) = chpLog.find('LogType').text.strip('"').partition("-") if dash == '': # If 'dash' is missing then the hyphen was missing, if so we # use the whole thing as the LogType and forget the LogTypeID incident.LogType = deCopIfy(re.sub(dash_re, ' - ', logtypeid.strip())) else: incident.LogType = deCopIfy(re.sub(dash_re, ' - ', logtype.strip())) incident.LogTypeID = logtypeid.strip() incident.Location = deCopIfy(chpLog.find('Location').text.strip('"')) incident.Area = chpLog.find('Area').text.strip('"') incident.ThomasBrothers = chpLog.find('ThomasBrothers').text.strip('"') # Like the LogTime above, there are now two location formats. # This time we try the SAHB LATLON first, then fall back to TBXY. try: latlon = chpLog.find('LATLON').text.strip('"') if latlon != "0:0": incident.geolocation = db.GeoPt( lat = float(latlon.partition(":")[0]) / 1000000, lon = float(latlon.partition(":")[2]) / 1000000 * -1 ) except AttributeError: incident.TBXY = chpLog.find('TBXY').text.strip('"') incident.geolocation = geoConvertTBXY(incident.CenterID, incident.TBXY) # Special handling for the LogDetails LogDetails = { 'details': [], 'units': [] } logdetails_element = chpLog.find('LogDetails') for element in logdetails_element: try: detail_dict = { 'DetailTime': element.find('DetailTime').text.strip('"'), 'IncidentDetail': deCopIfy(element.find('IncidentDetail').text.strip('"^')).capitalize() } LogDetails[element.tag].append(detail_dict) except AttributeError: pass incident.LogDetails = pickle.dumps(LogDetails) # Set up the PSH pings. Note, we are NOT checking for actual # changes in the data, we are just assuming that the existance of # an incident in the CHP feed declares it as "updated" so we ping. psh_pings.append('http://www.sactraffic.org/atom?center=%s' % urllib.quote(incident.CenterID)) psh_pings.append('http://www.sactraffic.org/atom?dispatch=%s' % urllib.quote(incident.DispatchID)) psh_pings.append('http://www.sactraffic.org/atom?area=%s' % urllib.quote(incident.Area)) if incident.city is not None and incident.city != "": psh_pings.append('http://www.sactraffic.org/atom?city=%s' % urllib.quote(incident.city)) # Save this incident incident_list.append(incident) # Store the incidents in a batch db.put(incident_list) # Ping the PSH hub, use a set so we don't ping duplicates. ping_set = set(psh_pings) if not debug: deferred.defer(pubsubhubbub_publish.publish, 'http://pubsubhubbub.appspot.com', ping_set, _queue="pshPingQueue") else: logging.info("Skipping PSH pings for %s on the development server. %s" % (incident.CenterID, ping_set)) # Reverse geocode the incidents if we haven't already for incident in incident_list: if incident.city is None and incident.geolocation is not None: deferred.defer(reverse_geocode.load_city, incident, _queue="reverseGeocodeQueue") logging.info("Processed %d incidents in %s." % (len(incident_list), chpCenter.attrib['ID']))