Esempio n. 1
0
    def post(self):
        intersection = self.request.get('intersection')
        latitude = self.request.get('latitude')
        longitude = self.request.get('longitude')
        direction = self.request.get('direction')
        routeID = self.request.get('routeID')
        stopID = self.request.get('stopID')
        logging.info("storing route %s intersection %s at lat/lon %s,%s toward %s" % 
                     (routeID,intersection,latitude,longitude,direction))
        
        if len(intersection) > 400:
            intersection = intersection.ljust(400)

        if stopID == '00' or latitude is None or longitude is None:
            # create a task event to process the error
            task = Task(url='/crawl/errortask', params={'intersection':intersection,
                                                        'location':(latitude+","+longitude),
                                                        'direction':direction,
                                                        'metaStringOne':self.request.get('crawlLine'),
                                                        'metaStringTwo':'from geotask crawler',
                                                        'routeID':routeID,
                                                        'stopID':stopID,
                                                        })
            task.add('crawlerrors')
        else:
            # ignore this stop if we've already stored it
            # stopID + routeID
            stop = db.GqlQuery("SELECT * FROM StopLocation WHERE stopID = :1 and routeID = :2", stopID, routeID).get()
            if stop is None:
                stop = StopLocation()
                stop.stopID = stopID
                stop.routeID = routeID
                stop.intersection = intersection.upper()
                stop.direction = direction.upper()
                stop.location = GeoPt(latitude,longitude)
                stop.update_location()
                stop.put()
            
                # update the route table to include a reference to the new geo data
                if stopID != '00':
                    route = db.GqlQuery("SELECT * FROM RouteListing WHERE stopID = :1 and route = :2", stopID,routeID).get()
                    if route is None:
                        logging.error("IMPOSSIBLE... no stop on record?!? stop %s, route %s" % (stopID,routeID))
                        # create a task event to process the error
                        task = Task(url='/crawl/errortask', params={'intersection':intersection,
                                                            'location':(latitude+","+longitude),
                                                            'direction':direction,
                                                            'metaStringOne':self.request.get('crawlLine'),
                                                            'metaStringTwo':'routelisting update',
                                                            'routeID':routeID,
                                                            'stopID':stopID,
                                                            })
                        task.add('crawlerrors')
                    else:
                        route.stopLocation = stop
                        route.put()

        return
Esempio n. 2
0
    def post(self):
        stop_list = []

        stopID = self.request.get('stopID')
        if len(stopID) == 1:
            stopID = "000" + stopID
        if len(stopID) == 2:
            stopID = "00" + stopID
        if len(stopID) == 3:
            stopID = "0" + stopID

        name = self.request.get('name')
        description = self.request.get('description')
        lat = self.request.get('lat')
        lon = self.request.get('lon')
        direction = self.request.get('direction')

        # if it doesn't, create a new one
        s = StopLocation()

        s.stopID = stopID
        s.intersection = name.split('(')[0].rstrip()
        s.direction = direction
        s.description = description
        s.location = GeoPt(lat, lon)
        s.update_location()
        stop_list.append(s)

        # put the new stop in the datastore
        db.put(stop_list)
        logging.info('done updating stop locations for stopID %s' % stopID)

        self.response.set_status(200)
Esempio n. 3
0
    def post(self):
        stop_list = []

        stopID      = self.request.get('stopID')
        if len(stopID) == 1:
            stopID = "000" + stopID
        if len(stopID) == 2:
            stopID = "00" + stopID
        if len(stopID) == 3:
            stopID = "0" + stopID

        name        = self.request.get('name')
        description = self.request.get('description')
        lat         = self.request.get('lat')
        lon         = self.request.get('lon')
        direction   = self.request.get('direction')

        # if it doesn't, create a new one
        s = StopLocation()

        s.stopID = stopID
        s.intersection = name.split('(')[0].rstrip()
        s.direction = direction
        s.description = description
        s.location = GeoPt(lat,lon)
        s.update_location()
        stop_list.append(s)

        # put the new stop in the datastore
        db.put(stop_list)
        logging.info('done updating stop locations for stopID %s' % stopID)

        self.response.set_status(200)
Esempio n. 4
0
def nearbyStops(lat,lon,radius,routeID):

    # limit the radius value to 500
    if radius > 1000:
        radius = 1000

    if routeID is None or routeID == "":
        #logging.debug('nearbyStops (%s,%s,%s,%s)' % (lat,lon,radius,routeID))
        results = StopLocation.proximity_fetch(
             StopLocation.all(),
             geotypes.Point(lat,lon),  # Or db.GeoPt
             max_results=100,
             max_distance=radius)
    else:
        results = StopLocation.proximity_fetch(
             StopLocation.all().filter('routeID =', routeID),  # Rich query!
             geotypes.Point(lat,lon),  # Or db.GeoPt
             max_results=100,
             max_distance=radius)    

    if results is None:
        response_dict = {'status':'0',
                         'info':'No stops found',
                        }
        return response_dict

    
    response_dict = {'status':'0',}
    stop_results = []
    stop_tracking = []
    for stop in results:
        # kind of a hack, but limit the results to one per route.
        # the query will return multiple results for each stop
        if stop.stopID not in stop_tracking:
            stop_results.append(dict({
                                'stopID':stop.stopID,
                                'intersection':stop.intersection,
                                'latitude':stop.location.lat,
                                'longitude':stop.location.lon,
                                }))
            #logging.debug('appending %s to route tracking list' % stop.stopID)
            stop_tracking.append(stop.stopID)

    response_dict.update({'stop':stop_results})
        
    return response_dict
Esempio n. 5
0
    def post(self):
        stopID = self.request.get('stopID')
        lat = self.request.get('lat')
        lon = self.request.get('lon')

        stop = StopLocation()
        stop.stopID = stopID
        stop.routeID = '00'
        stop.intersection = self.request.get('intersection').upper()
        stop.location = GeoPt(lat,lon)
        stop.update_location()
        stop.direction = '00'
        logging.debug('created new stoplocation for %s' % stopID)
        stop.put()
        
        routeQ = db.GqlQuery("SELECT * FROM RouteListing WHERE stopID = :1", stopID)
        routes = routeQ.fetch(100)
        if len(routes) > 0:
            for r in routes:
                logging.debug('updating route %s with new location' % r.route)
                r.stopLocation = stop
                r.put()

        self.redirect('http://smsmybus.com/labs/displaystops')
Esempio n. 6
0
 def post(self):
     stop_list = []
     route_list = []
     
     stopID      = self.request.get('stopID')
     if len(stopID) == 1:
         stopID = "000" + stopID
     if len(stopID) == 2:
         stopID = "00" + stopID
     if len(stopID) == 3:
         stopID = "0" + stopID
         
     name        = self.request.get('name')
     description = self.request.get('description')
     lat         = self.request.get('lat')
     lon         = self.request.get('lon')
     direction   = self.request.get('direction')
     
     # check to see if the stop exists already
     stops = db.GqlQuery("select * from StopLocation where stopID = :1", stopID).fetch(50)
     
     # if it does, append the stop description
     if stops is not None and len(stops) > 0:
         for s in stops:
             stop_template = s
             s.description = description
             stop_list.append(s)
     else:
         # if it doesn't, create a new one
         s = StopLocation()
         stop_template = s
         
         s.stopID = stopID
         s.intersection = name.split('(')[0].rstrip()
         s.direction = direction
         s.description = description
         s.location = GeoPt(lat,lon)
         stop_list.append(s)
             
     # put the new stop in the datastore
     db.put(stop_list)
     logging.info('done updating stop locations for stopID %s' % stopID)
     
     # find all of the RouteListings with this stopID
     # loop through them and update the StopLocation references
     routes = db.GqlQuery("select * from RouteListing where stopID = :1", stopID).fetch(50)
     for r in routes:
         r.stopLocation = stop_template
         route_list.append(r)
     
     # save the route updates
     db.put(route_list)
     logging.info('done updating %s route listings for stopID %s' % (str(len(routes)),stopID) )
     
     self.response.set_status(200)
Esempio n. 7
0
    def get(self):
        # do some analysis on the request history...
        reqs = getRequestedStops()

        # find that lat/longs for all the stops
        validStops = reqs.keys()
        stopLocs = memcache.get_multi(validStops)
        if self.request.get('clean') or stopLocs is None:
            memcache.delete_multi(validStops)
            logging.debug("logging stop locations!")
            locations = dict()
            cursor = None
            # Start a query for all stop locations
            q = StopLocation.all()
            while q is not None:
                # If the app stored a cursor during a previous request, use it.
                if cursor:
                    q.with_cursor(cursor)

                # Perform the query to get results.
                locationQuery = q.fetch(1000)
                cursor = q.cursor()
                if len(locationQuery) > 0:
                    logging.debug(
                        'just read in another chunk of stop locations...')
                    for l in locationQuery:
                        location = l.location
                        stopKey = l.stopID + ':loc'
                        if l.stopID in validStops and stopKey not in stopLocs:
                            logging.debug('adding location %s for stopID %s' %
                                          (location, l.stopID))
                            stopLocs[stopKey] = location
                else:
                    logging.debug('No more stop locations left in the query!')
                    break

        memcache.set_multi(stopLocs)
        return
Esempio n. 8
0
    def get(self):
      # do some analysis on the request history...
      reqs = getRequestedStops()

      # find that lat/longs for all the stops
      validStops = reqs.keys()      
      stopLocs = memcache.get_multi(validStops)
      if self.request.get('clean') or stopLocs is None:
        memcache.delete_multi(validStops)
        logging.debug("logging stop locations!")
        locations = dict()
        cursor = None
        # Start a query for all stop locations
        q = StopLocation.all()
        while q is not None:
          # If the app stored a cursor during a previous request, use it.
          if cursor:
              q.with_cursor(cursor)

          # Perform the query to get results.
          locationQuery = q.fetch(1000)
          cursor = q.cursor()
          if len(locationQuery) > 0:
            logging.debug('just read in another chunk of stop locations...')
            for l in locationQuery:
                location = l.location
                stopKey = l.stopID + ':loc'
                if l.stopID in validStops and stopKey not in stopLocs:
                    #logging.debug('adding location %s for stopID %s' % (location,l.stopID))
                    stopLocs[stopKey] = location
          else:
              logging.debug('No more stop locations left in the query!')
              break
          
      memcache.set_multi(stopLocs)
      return
Esempio n. 9
0
def nearbyStops(lat, lon, radius, routeID, destination):
    route_stops = None

    # limit results to 200
    max_results = 200
    # limit the radius value to 500
    if radius > 1000:
        radius = 1000

    logging.debug('nearbyStops (%s,%s,%s,%s,%s)' %
                  (lat, lon, radius, routeID, destination))
    if routeID is None or routeID == "":
        results = StopLocation.proximity_fetch(
            StopLocation.all(),
            geotypes.Point(lat, lon),  # Or db.GeoPt
            max_results=max_results,
            max_distance=radius)
    else:
        if destination is not None and destination is not "":

            destination_code = getDestinationCode(destination)
            if destination_code == -1:
                response_dict = {
                    'status': '-1',
                    'info': ('Unknown destination %s' % destination)
                }
                return response_dict
            else:
                # first filter stops by route and destination...
                logging.debug(
                    '... filter by destination for route %s and destination %s'
                    % (routeID, destination_code))
                q = db.GqlQuery(
                    'select stopID from RouteListing where route = :1 and direction = :2 order by route',
                    routeID, destination_code)
                routes = q.fetch(1000)
                route_stops = []
                for route in routes:
                    route_stops.append(route.stopID)

                results = StopLocation.proximity_fetch(
                    StopLocation.all(),
                    geotypes.Point(lat, lon),  # Or db.GeoPt
                    max_results=max_results,
                    max_distance=radius)
        else:
            # first filter stops by route and destination...
            logging.debug('... filter by destination for route %s' % routeID)
            q = db.GqlQuery('select * from RouteListing where route = :1',
                            routeID)
            routes = q.fetch(1000)
            route_stops = []
            for route in routes:
                route_stops.append(route.stopID)

            results = StopLocation.proximity_fetch(
                StopLocation.all(),
                geotypes.Point(lat, lon),  # Or db.GeoPt
                max_results=max_results,
                max_distance=radius)

    if results is None:
        response_dict = {
            'status': '0',
            'timestamp': api_utils.getLocalTimestamp(),
            'info': 'No stops found',
            'stops': []
        }
        return response_dict

    response_dict = {
        'status': '0',
        'timestamp': api_utils.getLocalTimestamp(),
    }
    stop_results = []
    stop_tracking = []
    logging.info('loop through %s results' % len(results))
    for stop in results:

        # manually apply the destination filter here because
        # the GCL query limits our ability to apply it in the
        # proximity query
        if route_stops is not None and stop.stopID not in route_stops:
            #logging.debug('filtered out %s' % stop.stopID)
            continue

        # kind of a hack, but limit the results to one per route.
        # the query will return multiple results for each stop
        if stop.stopID not in stop_tracking:

            stop_results.append(
                dict({
                    'stopID': stop.stopID,
                    'intersection': stop.intersection,
                    'latitude': stop.location.lat,
                    'longitude': stop.location.lon,
                }))
            logging.debug('appending %s to route tracking list' % stop.stopID)
            stop_tracking.append(stop.stopID)

    response_dict.update({'stop': stop_results})

    return response_dict
Esempio n. 10
0
    def post(self):
        stopID = self.request.get('stopID')
        lat = self.request.get('lat')
        lon = self.request.get('lon')

        stop = StopLocation()
        stop.stopID = stopID
        stop.routeID = '00'
        stop.intersection = self.request.get('intersection').upper()
        stop.location = GeoPt(lat, lon)
        stop.update_location()
        stop.direction = '00'
        logging.debug('created new stoplocation for %s' % stopID)
        stop.put()

        routeQ = db.GqlQuery("SELECT * FROM RouteListing WHERE stopID = :1",
                             stopID)
        routes = routeQ.fetch(100)
        if len(routes) > 0:
            for r in routes:
                logging.debug('updating route %s with new location' % r.route)
                r.stopLocation = stop
                r.put()

        self.redirect('http://smsmybus.com/labs/displaystops')
Esempio n. 11
0
    def post(self):
        try:
            scrapeURL = self.request.get('crawl')
            direction = self.request.get('direction')
            routeID = self.request.get('routeID')
            logging.debug("task scraping for %s, direction %s, route %s" % (scrapeURL,direction,routeID))
            
            loop = 0
            done = False
            result = None
            start = quota.get_request_cpu_usage()
            while not done and loop < 3:
                try:
                    # fetch the page
                    result = urlfetch.fetch(scrapeURL)
                    done = True;
                except urlfetch.DownloadError:
                    logging.info("Error loading page (%s)... sleeping" % loop)
                    if result:
                        logging.debug("Error status: %s" % result.status_code)
                        logging.debug("Error header: %s" % result.headers)
                        logging.debug("Error content: %s" % result.content)
                        time.sleep(4)
                        loop = loop+1
            end = quota.get_request_cpu_usage()
            #logging.info("scraping took %s cycles" % (end-start))

            # start to interrogate the results
            soup = BeautifulSoup(result.content)
            stopUpdates = []
            for slot in soup.html.body.findAll("a","ada"):
                logging.info("pulling out data from page... %s" % slot)

                if slot.has_key('href'):
                    href = slot['href']
                    title = slot['title']
                    logging.info("FOUND A TITLE ----> %s" % title)
                    # route crawler looks for titles with an ID# string
                    if title.find("#") > 0:
                        # we finally got down to the page we're looking for
                        
                        # pull the stopID from the page content...
                        stopID = title.split("#")[1].split("]")[0]
                        
                        # pull the intersection from the page content...
                        intersection = title.split("[")[0].strip()
                        
                        logging.info("found stop %s, %s" % (stopID,intersection))
                        
                        # check for conflicts...
                        stop = db.GqlQuery("SELECT * FROM StopLocation WHERE stopID = :1", stopID).get()
                        if stop is None:
                            # add the new stop
                            stop = StopLocation()
                            stop.stopID = stopID
                            stop.routeID = routeID
                            stop.intersection = intersection.upper()
                            stop.direction = direction.upper()
                            stopUpdates.append(stop)  # stop.put()
                            logging.info("ADDED StopLocation (%s) - MINUS geo location" % stopID)
                        else:
                            logging.info("StopLoation entity already exists for %s..." % stopID)
                            stop.routeID = routeID
                            stopUpdates.append(stop)
                        
                        # pull the route and direction data from the URL
                        routeData = scrapeURL.split('?')[1]
                        logging.info("FOUND THE PAGE ---> arguments: %s stopID: %s" % (routeData,stopID))
                        routeArgs = routeData.split('&')
                        routeID = routeArgs[0].split('=')[1]
                        directionID = routeArgs[1].split('=')[1]
                        timeEstimatesURL = CRAWL_URLBASE + href
                    
                        # check for conflicts...
                        r = db.GqlQuery("SELECT * FROM RouteListing WHERE route = :1 AND direction = :2 AND stopID = :3",
                                        routeID, directionID, stopID).get()
                        if r is None:
                          # add the new route to the DB
                          route = RouteListing()
                          route.route = routeID
                          route.direction = directionID
                          route.stopID = stopID
                          route.scheduleURL = timeEstimatesURL
                          route.put()
                          logging.info("added new route listing entry to the database!")
                        else:
                          logging.error("we found a duplicate entry!?! %s", r.scheduleURL)
                    #else: # title.split(",")[0].isdigit():
                    elif href.find("?r=") > -1:
                        # create a new task with this link
                        crawlURL = CRAWL_URLBASE + href
                        if routeID == '00':
                            routeID = href.split('r=')[1]
                        elif href.find("&") > -1:
                            routeID = href.split('&')[0].split('r=')[1]
                        task = Task(url='/routelist/crawlingtask', params={'crawl':crawlURL,'direction':title,'routeID':routeID})
                        task.add('crawler')
                        logging.info("Added new task for %s, direction %s, route %s" % (title.split(",")[0],title,routeID))                    
                    # label crawler looks for titles with letters for extraction/persistence
                    #elif title.replace('-','').replace(' ','').isalpha():
                    #    routeData = href.split('?')[1]
                    #    logging.info("found the route LABEL page! href: %s" % href)
                    #    routeArgs = routeData.split('&')
                    #    directionID = routeArgs[1].split('=')[1]
                    #    
                    #    l = DestinationListing.get_or_insert(title, id=directionID, label=title)

            # push the vehicle updates to the datastore
            db.put(stopUpdates)
                                        
        except apiproxy_errors.DeadlineExceededError:
            logging.error("DeadlineExceededError exception!?")
            return
            
        return;
Esempio n. 12
0
def nearbyStops(lat,lon,radius,routeID,destination):
    route_stops = None

    # limit results to 200
    max_results = 200
    # limit the radius value to 500
    if radius > 1000:
        radius = 1000

    logging.debug('nearbyStops (%s,%s,%s,%s,%s)' % (lat,lon,radius,routeID,destination))
    if routeID is None or routeID == "":
        results = StopLocation.proximity_fetch(
             StopLocation.all(),
             geotypes.Point(lat,lon),  # Or db.GeoPt
             max_results=max_results,
             max_distance=radius)
    else:
        if destination is not None and destination is not "":

            destination_code = getDestinationCode(destination)
            if destination_code == -1 :
                response_dict = {'status' : '-1',
                                 'info' : ('Unknown destination %s' % destination)
                                }
                return response_dict
            else:
                # first filter stops by route and destination...
                logging.debug('... filter by destination for route %s and destination %s' % (routeID,destination_code))
                q = db.GqlQuery('select stopID from RouteListing where route = :1 and direction = :2 order by route', routeID, destination_code)
                routes = q.fetch(1000)
                route_stops = []
                for route in routes:
                    route_stops.append(route.stopID)

                results = StopLocation.proximity_fetch(
                     StopLocation.all(),
                     geotypes.Point(lat,lon),  # Or db.GeoPt
                     max_results=max_results,
                     max_distance=radius)
        else:
            # first filter stops by route and destination...
            logging.debug('... filter by destination for route %s' % routeID)
            q = db.GqlQuery('select * from RouteListing where route = :1', routeID)
            routes = q.fetch(1000)
            route_stops = []
            for route in routes:
                route_stops.append(route.stopID)

            results = StopLocation.proximity_fetch(
                 StopLocation.all(),
                 geotypes.Point(lat,lon),  # Or db.GeoPt
                 max_results=max_results,
                 max_distance=radius)

    if results is None:
        response_dict = {'status':'0',
                         'timestamp':api_utils.getLocalTimestamp(),
                         'info':'No stops found',
                         'stops':[]
                        }
        return response_dict


    response_dict = {'status':'0','timestamp':api_utils.getLocalTimestamp(),}
    stop_results = []
    stop_tracking = []
    logging.info('loop through %s results' % len(results))
    for stop in results:

        # manually apply the destination filter here because
        # the GCL query limits our ability to apply it in the
        # proximity query
        if route_stops is not None and stop.stopID not in route_stops:
            #logging.debug('filtered out %s' % stop.stopID)
            continue

        # kind of a hack, but limit the results to one per route.
        # the query will return multiple results for each stop
        if stop.stopID not in stop_tracking:

            stop_results.append(dict({
                                'stopID':stop.stopID,
                                'intersection':stop.intersection,
                                'latitude':stop.location.lat,
                                'longitude':stop.location.lon,
                                }))
            logging.debug('appending %s to route tracking list' % stop.stopID)
            stop_tracking.append(stop.stopID)

    response_dict.update({'stop':stop_results})

    return response_dict
Esempio n. 13
0
    def post(self):
        try:
            scrapeURL = self.request.get('crawl')
            direction = self.request.get('direction')
            routeID = self.request.get('routeID')
            logging.debug("task scraping for %s, direction %s, route %s" % (scrapeURL,direction,routeID))
            
            # fetch the URL content
            content = fetchURL(scrapeURL)
            
            # start to interrogate the results
            soup = BeautifulSoup(content)
            stopUpdates = []
            for slot in soup.html.body.findAll("a","ada"):
                #logging.info("pulling out data from page... %s" % slot)

                if slot.has_key('href'):
                    href = slot['href']
                    title = slot['title']
                    logging.info("FOUND A TITLE ----> %s" % title)
                    # route crawler looks for titles with an ID# string
                    if title.find("[ID#") > 0:
                        # we finally got down to the page we're looking for. this is a reference
                        # to a specific stop including a stopID and intersection.
                        
                        # pull the stopID from the page content...
                        stopID = title.split("ID#")[1].split("]")[0]
                        
                        # pull the intersection from the page content...
                        intersection = title.split("[")[0].strip()
                        
                        logging.info("found stop %s, %s" % (stopID,intersection))
                        
                        # check to see if we've already found this stop...
                        q = db.GqlQuery("SELECT * FROM StopLocation WHERE stopID = :1 and direction = :2 and routeID = :3", 
                                        stopID, direction.upper(), routeID)
                        stopQuery = q.fetch(1)
                        if len(stopQuery) == 0:
                            # add the new stop
                            stop = StopLocation()
                            stop.stopID = stopID
                            stop.routeID = routeID
                            stop.intersection = intersection.upper()
                            stop.direction = direction.upper()
                            stopUpdates.append(stop)  # we'll do a batch put at the end 
                            logging.info("added new stop listing MINUS geo location")
                        else:
                            logging.info("already have this stop in the table...")
                            stopQuery[0].routeID = routeID
                            stopUpdates.append(stopQuery[0])
                        
                    elif href.find("?r=") > -1:
                        # this is step #2 and #3 from the algorithm documented above. we're going to create 
                        # a new task to go off and scrape the live route data for a specific route.
                        crawlURL = CRAWL_URLBASE + href
                        if routeID == '00':
                            routeID = href.split('r=')[1]
                        elif href.find("&") > -1:
                            routeID = href.split('&')[0].split('r=')[1]
                        task = Task(url='/crawl/crawlingtask', params={'crawl':crawlURL,'direction':title,'routeID':routeID})
                        task.add('crawler')
                        logging.info("Added new task for %s, direction %s, route %s" % (title.split(",")[0],title,routeID))

            # push the StopLocation updates to the datastore
            db.put(stopUpdates)
                                        
        except apiproxy_errors.DeadlineExceededError:
            logging.error("DeadlineExceededError exception!?")
            return
            
        return;