def post(self): intersection = self.request.get('intersection') latitude = self.request.get('latitude') longitude = self.request.get('longitude') direction = self.request.get('direction') routeID = self.request.get('routeID') stopID = self.request.get('stopID') logging.info("storing route %s intersection %s at lat/lon %s,%s toward %s" % (routeID,intersection,latitude,longitude,direction)) if len(intersection) > 400: intersection = intersection.ljust(400) if stopID == '00' or latitude is None or longitude is None: # create a task event to process the error task = Task(url='/crawl/errortask', params={'intersection':intersection, 'location':(latitude+","+longitude), 'direction':direction, 'metaStringOne':self.request.get('crawlLine'), 'metaStringTwo':'from geotask crawler', 'routeID':routeID, 'stopID':stopID, }) task.add('crawlerrors') else: # ignore this stop if we've already stored it # stopID + routeID stop = db.GqlQuery("SELECT * FROM StopLocation WHERE stopID = :1 and routeID = :2", stopID, routeID).get() if stop is None: stop = StopLocation() stop.stopID = stopID stop.routeID = routeID stop.intersection = intersection.upper() stop.direction = direction.upper() stop.location = GeoPt(latitude,longitude) stop.update_location() stop.put() # update the route table to include a reference to the new geo data if stopID != '00': route = db.GqlQuery("SELECT * FROM RouteListing WHERE stopID = :1 and route = :2", stopID,routeID).get() if route is None: logging.error("IMPOSSIBLE... no stop on record?!? stop %s, route %s" % (stopID,routeID)) # create a task event to process the error task = Task(url='/crawl/errortask', params={'intersection':intersection, 'location':(latitude+","+longitude), 'direction':direction, 'metaStringOne':self.request.get('crawlLine'), 'metaStringTwo':'routelisting update', 'routeID':routeID, 'stopID':stopID, }) task.add('crawlerrors') else: route.stopLocation = stop route.put() return
def post(self): stop_list = [] stopID = self.request.get('stopID') if len(stopID) == 1: stopID = "000" + stopID if len(stopID) == 2: stopID = "00" + stopID if len(stopID) == 3: stopID = "0" + stopID name = self.request.get('name') description = self.request.get('description') lat = self.request.get('lat') lon = self.request.get('lon') direction = self.request.get('direction') # if it doesn't, create a new one s = StopLocation() s.stopID = stopID s.intersection = name.split('(')[0].rstrip() s.direction = direction s.description = description s.location = GeoPt(lat, lon) s.update_location() stop_list.append(s) # put the new stop in the datastore db.put(stop_list) logging.info('done updating stop locations for stopID %s' % stopID) self.response.set_status(200)
def post(self): stop_list = [] stopID = self.request.get('stopID') if len(stopID) == 1: stopID = "000" + stopID if len(stopID) == 2: stopID = "00" + stopID if len(stopID) == 3: stopID = "0" + stopID name = self.request.get('name') description = self.request.get('description') lat = self.request.get('lat') lon = self.request.get('lon') direction = self.request.get('direction') # if it doesn't, create a new one s = StopLocation() s.stopID = stopID s.intersection = name.split('(')[0].rstrip() s.direction = direction s.description = description s.location = GeoPt(lat,lon) s.update_location() stop_list.append(s) # put the new stop in the datastore db.put(stop_list) logging.info('done updating stop locations for stopID %s' % stopID) self.response.set_status(200)
def nearbyStops(lat,lon,radius,routeID): # limit the radius value to 500 if radius > 1000: radius = 1000 if routeID is None or routeID == "": #logging.debug('nearbyStops (%s,%s,%s,%s)' % (lat,lon,radius,routeID)) results = StopLocation.proximity_fetch( StopLocation.all(), geotypes.Point(lat,lon), # Or db.GeoPt max_results=100, max_distance=radius) else: results = StopLocation.proximity_fetch( StopLocation.all().filter('routeID =', routeID), # Rich query! geotypes.Point(lat,lon), # Or db.GeoPt max_results=100, max_distance=radius) if results is None: response_dict = {'status':'0', 'info':'No stops found', } return response_dict response_dict = {'status':'0',} stop_results = [] stop_tracking = [] for stop in results: # kind of a hack, but limit the results to one per route. # the query will return multiple results for each stop if stop.stopID not in stop_tracking: stop_results.append(dict({ 'stopID':stop.stopID, 'intersection':stop.intersection, 'latitude':stop.location.lat, 'longitude':stop.location.lon, })) #logging.debug('appending %s to route tracking list' % stop.stopID) stop_tracking.append(stop.stopID) response_dict.update({'stop':stop_results}) return response_dict
def post(self): stopID = self.request.get('stopID') lat = self.request.get('lat') lon = self.request.get('lon') stop = StopLocation() stop.stopID = stopID stop.routeID = '00' stop.intersection = self.request.get('intersection').upper() stop.location = GeoPt(lat,lon) stop.update_location() stop.direction = '00' logging.debug('created new stoplocation for %s' % stopID) stop.put() routeQ = db.GqlQuery("SELECT * FROM RouteListing WHERE stopID = :1", stopID) routes = routeQ.fetch(100) if len(routes) > 0: for r in routes: logging.debug('updating route %s with new location' % r.route) r.stopLocation = stop r.put() self.redirect('http://smsmybus.com/labs/displaystops')
def post(self): stop_list = [] route_list = [] stopID = self.request.get('stopID') if len(stopID) == 1: stopID = "000" + stopID if len(stopID) == 2: stopID = "00" + stopID if len(stopID) == 3: stopID = "0" + stopID name = self.request.get('name') description = self.request.get('description') lat = self.request.get('lat') lon = self.request.get('lon') direction = self.request.get('direction') # check to see if the stop exists already stops = db.GqlQuery("select * from StopLocation where stopID = :1", stopID).fetch(50) # if it does, append the stop description if stops is not None and len(stops) > 0: for s in stops: stop_template = s s.description = description stop_list.append(s) else: # if it doesn't, create a new one s = StopLocation() stop_template = s s.stopID = stopID s.intersection = name.split('(')[0].rstrip() s.direction = direction s.description = description s.location = GeoPt(lat,lon) stop_list.append(s) # put the new stop in the datastore db.put(stop_list) logging.info('done updating stop locations for stopID %s' % stopID) # find all of the RouteListings with this stopID # loop through them and update the StopLocation references routes = db.GqlQuery("select * from RouteListing where stopID = :1", stopID).fetch(50) for r in routes: r.stopLocation = stop_template route_list.append(r) # save the route updates db.put(route_list) logging.info('done updating %s route listings for stopID %s' % (str(len(routes)),stopID) ) self.response.set_status(200)
def get(self): # do some analysis on the request history... reqs = getRequestedStops() # find that lat/longs for all the stops validStops = reqs.keys() stopLocs = memcache.get_multi(validStops) if self.request.get('clean') or stopLocs is None: memcache.delete_multi(validStops) logging.debug("logging stop locations!") locations = dict() cursor = None # Start a query for all stop locations q = StopLocation.all() while q is not None: # If the app stored a cursor during a previous request, use it. if cursor: q.with_cursor(cursor) # Perform the query to get results. locationQuery = q.fetch(1000) cursor = q.cursor() if len(locationQuery) > 0: logging.debug( 'just read in another chunk of stop locations...') for l in locationQuery: location = l.location stopKey = l.stopID + ':loc' if l.stopID in validStops and stopKey not in stopLocs: logging.debug('adding location %s for stopID %s' % (location, l.stopID)) stopLocs[stopKey] = location else: logging.debug('No more stop locations left in the query!') break memcache.set_multi(stopLocs) return
def get(self): # do some analysis on the request history... reqs = getRequestedStops() # find that lat/longs for all the stops validStops = reqs.keys() stopLocs = memcache.get_multi(validStops) if self.request.get('clean') or stopLocs is None: memcache.delete_multi(validStops) logging.debug("logging stop locations!") locations = dict() cursor = None # Start a query for all stop locations q = StopLocation.all() while q is not None: # If the app stored a cursor during a previous request, use it. if cursor: q.with_cursor(cursor) # Perform the query to get results. locationQuery = q.fetch(1000) cursor = q.cursor() if len(locationQuery) > 0: logging.debug('just read in another chunk of stop locations...') for l in locationQuery: location = l.location stopKey = l.stopID + ':loc' if l.stopID in validStops and stopKey not in stopLocs: #logging.debug('adding location %s for stopID %s' % (location,l.stopID)) stopLocs[stopKey] = location else: logging.debug('No more stop locations left in the query!') break memcache.set_multi(stopLocs) return
def nearbyStops(lat, lon, radius, routeID, destination): route_stops = None # limit results to 200 max_results = 200 # limit the radius value to 500 if radius > 1000: radius = 1000 logging.debug('nearbyStops (%s,%s,%s,%s,%s)' % (lat, lon, radius, routeID, destination)) if routeID is None or routeID == "": results = StopLocation.proximity_fetch( StopLocation.all(), geotypes.Point(lat, lon), # Or db.GeoPt max_results=max_results, max_distance=radius) else: if destination is not None and destination is not "": destination_code = getDestinationCode(destination) if destination_code == -1: response_dict = { 'status': '-1', 'info': ('Unknown destination %s' % destination) } return response_dict else: # first filter stops by route and destination... logging.debug( '... filter by destination for route %s and destination %s' % (routeID, destination_code)) q = db.GqlQuery( 'select stopID from RouteListing where route = :1 and direction = :2 order by route', routeID, destination_code) routes = q.fetch(1000) route_stops = [] for route in routes: route_stops.append(route.stopID) results = StopLocation.proximity_fetch( StopLocation.all(), geotypes.Point(lat, lon), # Or db.GeoPt max_results=max_results, max_distance=radius) else: # first filter stops by route and destination... logging.debug('... filter by destination for route %s' % routeID) q = db.GqlQuery('select * from RouteListing where route = :1', routeID) routes = q.fetch(1000) route_stops = [] for route in routes: route_stops.append(route.stopID) results = StopLocation.proximity_fetch( StopLocation.all(), geotypes.Point(lat, lon), # Or db.GeoPt max_results=max_results, max_distance=radius) if results is None: response_dict = { 'status': '0', 'timestamp': api_utils.getLocalTimestamp(), 'info': 'No stops found', 'stops': [] } return response_dict response_dict = { 'status': '0', 'timestamp': api_utils.getLocalTimestamp(), } stop_results = [] stop_tracking = [] logging.info('loop through %s results' % len(results)) for stop in results: # manually apply the destination filter here because # the GCL query limits our ability to apply it in the # proximity query if route_stops is not None and stop.stopID not in route_stops: #logging.debug('filtered out %s' % stop.stopID) continue # kind of a hack, but limit the results to one per route. # the query will return multiple results for each stop if stop.stopID not in stop_tracking: stop_results.append( dict({ 'stopID': stop.stopID, 'intersection': stop.intersection, 'latitude': stop.location.lat, 'longitude': stop.location.lon, })) logging.debug('appending %s to route tracking list' % stop.stopID) stop_tracking.append(stop.stopID) response_dict.update({'stop': stop_results}) return response_dict
def post(self): stopID = self.request.get('stopID') lat = self.request.get('lat') lon = self.request.get('lon') stop = StopLocation() stop.stopID = stopID stop.routeID = '00' stop.intersection = self.request.get('intersection').upper() stop.location = GeoPt(lat, lon) stop.update_location() stop.direction = '00' logging.debug('created new stoplocation for %s' % stopID) stop.put() routeQ = db.GqlQuery("SELECT * FROM RouteListing WHERE stopID = :1", stopID) routes = routeQ.fetch(100) if len(routes) > 0: for r in routes: logging.debug('updating route %s with new location' % r.route) r.stopLocation = stop r.put() self.redirect('http://smsmybus.com/labs/displaystops')
def post(self): try: scrapeURL = self.request.get('crawl') direction = self.request.get('direction') routeID = self.request.get('routeID') logging.debug("task scraping for %s, direction %s, route %s" % (scrapeURL,direction,routeID)) loop = 0 done = False result = None start = quota.get_request_cpu_usage() while not done and loop < 3: try: # fetch the page result = urlfetch.fetch(scrapeURL) done = True; except urlfetch.DownloadError: logging.info("Error loading page (%s)... sleeping" % loop) if result: logging.debug("Error status: %s" % result.status_code) logging.debug("Error header: %s" % result.headers) logging.debug("Error content: %s" % result.content) time.sleep(4) loop = loop+1 end = quota.get_request_cpu_usage() #logging.info("scraping took %s cycles" % (end-start)) # start to interrogate the results soup = BeautifulSoup(result.content) stopUpdates = [] for slot in soup.html.body.findAll("a","ada"): logging.info("pulling out data from page... %s" % slot) if slot.has_key('href'): href = slot['href'] title = slot['title'] logging.info("FOUND A TITLE ----> %s" % title) # route crawler looks for titles with an ID# string if title.find("#") > 0: # we finally got down to the page we're looking for # pull the stopID from the page content... stopID = title.split("#")[1].split("]")[0] # pull the intersection from the page content... intersection = title.split("[")[0].strip() logging.info("found stop %s, %s" % (stopID,intersection)) # check for conflicts... stop = db.GqlQuery("SELECT * FROM StopLocation WHERE stopID = :1", stopID).get() if stop is None: # add the new stop stop = StopLocation() stop.stopID = stopID stop.routeID = routeID stop.intersection = intersection.upper() stop.direction = direction.upper() stopUpdates.append(stop) # stop.put() logging.info("ADDED StopLocation (%s) - MINUS geo location" % stopID) else: logging.info("StopLoation entity already exists for %s..." % stopID) stop.routeID = routeID stopUpdates.append(stop) # pull the route and direction data from the URL routeData = scrapeURL.split('?')[1] logging.info("FOUND THE PAGE ---> arguments: %s stopID: %s" % (routeData,stopID)) routeArgs = routeData.split('&') routeID = routeArgs[0].split('=')[1] directionID = routeArgs[1].split('=')[1] timeEstimatesURL = CRAWL_URLBASE + href # check for conflicts... r = db.GqlQuery("SELECT * FROM RouteListing WHERE route = :1 AND direction = :2 AND stopID = :3", routeID, directionID, stopID).get() if r is None: # add the new route to the DB route = RouteListing() route.route = routeID route.direction = directionID route.stopID = stopID route.scheduleURL = timeEstimatesURL route.put() logging.info("added new route listing entry to the database!") else: logging.error("we found a duplicate entry!?! %s", r.scheduleURL) #else: # title.split(",")[0].isdigit(): elif href.find("?r=") > -1: # create a new task with this link crawlURL = CRAWL_URLBASE + href if routeID == '00': routeID = href.split('r=')[1] elif href.find("&") > -1: routeID = href.split('&')[0].split('r=')[1] task = Task(url='/routelist/crawlingtask', params={'crawl':crawlURL,'direction':title,'routeID':routeID}) task.add('crawler') logging.info("Added new task for %s, direction %s, route %s" % (title.split(",")[0],title,routeID)) # label crawler looks for titles with letters for extraction/persistence #elif title.replace('-','').replace(' ','').isalpha(): # routeData = href.split('?')[1] # logging.info("found the route LABEL page! href: %s" % href) # routeArgs = routeData.split('&') # directionID = routeArgs[1].split('=')[1] # # l = DestinationListing.get_or_insert(title, id=directionID, label=title) # push the vehicle updates to the datastore db.put(stopUpdates) except apiproxy_errors.DeadlineExceededError: logging.error("DeadlineExceededError exception!?") return return;
def nearbyStops(lat,lon,radius,routeID,destination): route_stops = None # limit results to 200 max_results = 200 # limit the radius value to 500 if radius > 1000: radius = 1000 logging.debug('nearbyStops (%s,%s,%s,%s,%s)' % (lat,lon,radius,routeID,destination)) if routeID is None or routeID == "": results = StopLocation.proximity_fetch( StopLocation.all(), geotypes.Point(lat,lon), # Or db.GeoPt max_results=max_results, max_distance=radius) else: if destination is not None and destination is not "": destination_code = getDestinationCode(destination) if destination_code == -1 : response_dict = {'status' : '-1', 'info' : ('Unknown destination %s' % destination) } return response_dict else: # first filter stops by route and destination... logging.debug('... filter by destination for route %s and destination %s' % (routeID,destination_code)) q = db.GqlQuery('select stopID from RouteListing where route = :1 and direction = :2 order by route', routeID, destination_code) routes = q.fetch(1000) route_stops = [] for route in routes: route_stops.append(route.stopID) results = StopLocation.proximity_fetch( StopLocation.all(), geotypes.Point(lat,lon), # Or db.GeoPt max_results=max_results, max_distance=radius) else: # first filter stops by route and destination... logging.debug('... filter by destination for route %s' % routeID) q = db.GqlQuery('select * from RouteListing where route = :1', routeID) routes = q.fetch(1000) route_stops = [] for route in routes: route_stops.append(route.stopID) results = StopLocation.proximity_fetch( StopLocation.all(), geotypes.Point(lat,lon), # Or db.GeoPt max_results=max_results, max_distance=radius) if results is None: response_dict = {'status':'0', 'timestamp':api_utils.getLocalTimestamp(), 'info':'No stops found', 'stops':[] } return response_dict response_dict = {'status':'0','timestamp':api_utils.getLocalTimestamp(),} stop_results = [] stop_tracking = [] logging.info('loop through %s results' % len(results)) for stop in results: # manually apply the destination filter here because # the GCL query limits our ability to apply it in the # proximity query if route_stops is not None and stop.stopID not in route_stops: #logging.debug('filtered out %s' % stop.stopID) continue # kind of a hack, but limit the results to one per route. # the query will return multiple results for each stop if stop.stopID not in stop_tracking: stop_results.append(dict({ 'stopID':stop.stopID, 'intersection':stop.intersection, 'latitude':stop.location.lat, 'longitude':stop.location.lon, })) logging.debug('appending %s to route tracking list' % stop.stopID) stop_tracking.append(stop.stopID) response_dict.update({'stop':stop_results}) return response_dict
def post(self): try: scrapeURL = self.request.get('crawl') direction = self.request.get('direction') routeID = self.request.get('routeID') logging.debug("task scraping for %s, direction %s, route %s" % (scrapeURL,direction,routeID)) # fetch the URL content content = fetchURL(scrapeURL) # start to interrogate the results soup = BeautifulSoup(content) stopUpdates = [] for slot in soup.html.body.findAll("a","ada"): #logging.info("pulling out data from page... %s" % slot) if slot.has_key('href'): href = slot['href'] title = slot['title'] logging.info("FOUND A TITLE ----> %s" % title) # route crawler looks for titles with an ID# string if title.find("[ID#") > 0: # we finally got down to the page we're looking for. this is a reference # to a specific stop including a stopID and intersection. # pull the stopID from the page content... stopID = title.split("ID#")[1].split("]")[0] # pull the intersection from the page content... intersection = title.split("[")[0].strip() logging.info("found stop %s, %s" % (stopID,intersection)) # check to see if we've already found this stop... q = db.GqlQuery("SELECT * FROM StopLocation WHERE stopID = :1 and direction = :2 and routeID = :3", stopID, direction.upper(), routeID) stopQuery = q.fetch(1) if len(stopQuery) == 0: # add the new stop stop = StopLocation() stop.stopID = stopID stop.routeID = routeID stop.intersection = intersection.upper() stop.direction = direction.upper() stopUpdates.append(stop) # we'll do a batch put at the end logging.info("added new stop listing MINUS geo location") else: logging.info("already have this stop in the table...") stopQuery[0].routeID = routeID stopUpdates.append(stopQuery[0]) elif href.find("?r=") > -1: # this is step #2 and #3 from the algorithm documented above. we're going to create # a new task to go off and scrape the live route data for a specific route. crawlURL = CRAWL_URLBASE + href if routeID == '00': routeID = href.split('r=')[1] elif href.find("&") > -1: routeID = href.split('&')[0].split('r=')[1] task = Task(url='/crawl/crawlingtask', params={'crawl':crawlURL,'direction':title,'routeID':routeID}) task.add('crawler') logging.info("Added new task for %s, direction %s, route %s" % (title.split(",")[0],title,routeID)) # push the StopLocation updates to the datastore db.put(stopUpdates) except apiproxy_errors.DeadlineExceededError: logging.error("DeadlineExceededError exception!?") return return;