def print_checkin_stats(): dbw = DBWrapper() checkins = dbw.get_all_checkins() delay = 60*2 next_check_time = time.time() while True: while time.time() < next_check_time: sleep_dur = next_check_time - time.time() time.sleep( sleep_dur ) print 'unique checkins captured: %d' % len(checkins) diff = datetime.timedelta(hours=1) checked = [] count = 0 for checkin1 in checkins: for checkin2 in checkins: if not checkin2 in checked or checkin1 in checked: if checkin1.venue_id == checkin2.venue_id: if not checkin1.user_id == checkin2.user_id: time1 = datetime.datetime.fromtimestamp(checkin1.created_at) time2 = datetime.datetime.fromtimestamp(checkin2.created_at) if time1 - time2 < diff: count = count + 1 print 'matching checkins: %d' % count next_check_time = time.time() + delay
def print_locations(): dbw = DBWrapper() venues = dbw.get_all_venues() for venue in venues: statistics = venue.statistics location = venue.location users = 0 for statistic in statistics: users = max(users, statistic.users) print '%s;%.6f;%.6f;%s' % (venue.name,location.latitude,location.longitude,users)
def count_venues_in_polygon(): dbw = DBWrapper() venues = dbw.get_all_venues() polygon=Polygon([(51.4648,-2.6107),(51.4707,-2.5924),(51.4738,-2.5619),(51.4683,-2.5463),(51.4602,-2.5365),(51.4496,-2.5350),(51.4319,-2.5457),(51.4304,-2.5887),(51.4334,-2.6089),(51.4471,-2.6194),(51.4560,-2.6195)]) count = 0 for venue in venues: location = venue.location point = Point(location.latitude, location.longitude) if point_inside_polygon(point,polygon): count = count + 1 print count
def print_kml(): f=open('locations.kml', 'w') g=open('locations_restricted.kml','w') dbw = DBWrapper() venues = dbw.get_all_venues() cardiff_polygon = Polygon([(51.4846,-3.2314),(51.4970,-3.2162),(51.5043,-3.1970),(51.5010,-3.1575),(51.4831,-3.1411),(51.4660,-3.1356),(51.4514,-3.1562),(51.4260,-3.1692),(51.4320,-3.1878)]) f.write( '<?xml version="1.0" encoding="UTF-8"?>' ) f.write( '<kml xmlns="http://www.opengis.net/kml/2.2">' ) f.write( '<Folder>' ) g.write( '<?xml version="1.0" encoding="UTF-8"?>' ) g.write( '<kml xmlns="http://www.opengis.net/kml/2.2">' ) g.write( '<Folder>' ) count = 0 total = dbw.count_venues_in_database()/2 for venue in venues: count = count + 1 f.write( '<Placemark>' ) f.write( '<description>"%s"</description>' % venue.name.replace('&','and').replace('<','').encode('utf-8') ) f.write( '<Point>' ) f.write( '<coordinates>%.8f,%.8f</coordinates>' % (venue.location.longitude, venue.location.latitude) ) f.write( '</Point>' ) f.write( '</Placemark>' ) location = venue.location point = Point(location.latitude, location.longitude) if point_inside_polygon(point,cardiff_polygon): g.write( '<Placemark>' ) g.write( '<description>"%s"</description>' % venue.name.replace('&','and').replace('<','').encode('utf-8') ) g.write( '<Point>' ) g.write( '<coordinates>%.8f,%.8f</coordinates>' % (venue.location.longitude, venue.location.latitude) ) g.write( '</Point>' ) g.write( '</Placemark>' ) f.write( '</Folder>' ) f.write( '</kml>' ) g.write( '</Folder>' ) g.write( '</kml>' ) print count
logging.debug(e) def point_inside_polygon(point,poly): return poly.contains(point) if __name__ == "__main__": # # Logging logging.basicConfig( filename="4sq.log", level=logging.DEBUG, datefmt='%d/%m/%y|%H:%M:%S', format='|%(asctime)s|%(levelname)s| %(message)s' ) logging.info( 'checkin monitor initiated' ) import _credentials dbw = DBWrapper() # load credentials client_id = _credentials.client_id client_secret = _credentials.client_secret access_tokens = _credentials.access_tokens # use venue gateway not normal gateway so can do more than 500 calls an hour venues = dbw.get_all_venues()#_with_checkins() if len(venues)*3 < 5000: calls = len(venues)*3 else: calls = 5000 venue_gateway = VenueAPIGateway( client_id=client_id, client_secret=client_secret, token_hourly_query_quota=calls ) gateway = APIGateway( access_tokens=access_tokens, token_hourly_query_quota=450 ) api = APIWrapper( gateway ) venue_api = APIWrapper( venue_gateway )
# # Logging import logging logging.basicConfig( filename="4sq.log", level=logging.DEBUG, datefmt='%d/%m/%y|%H:%M:%S', format='|%(asctime)s|%(levelname)s| %(message)s' ) logging.info( 'initiating a friend crawl.' ) # # Prep import _credentials access_tokens = _credentials.access_tokens gateway = APIGateway( access_tokens=access_tokens, token_hourly_query_quota=500 ) api = APIWrapper( gateway ) dbw = DBWrapper() if True: #~ from database import Friendship if not dbw._get_engine().has_table('friendships'): dbw._get_engine().create(Friendship.__table__) # # Begin mining... max_crawl_id = dbw.get_friendships_max_crawl_id() if max_crawl_id is None: logging.debug( 'no previous crawls found.' ) crawl_id = 1 else: crawl_id = max_crawl_id + 1
def get_venue_details( id ): while True: response = '' try : response = api.query_resource( "venues", id, userless=True, tenacious=True ) return response, True except Exception as e: logging.debug( u'STAT_CHK Error (Venue deletion/Foursquare down?), moving on. ' ) logging.debug( e ) return response, False if __name__ == "__main__": dbw = DBWrapper() setproctitle('STAT_CHK') # load credentials client_id = _credentials.sc_client_id client_secret = _credentials.sc_client_secret client_tuples = [(client_id, client_secret)] access_tokens = _credentials.sc_access_token gateway = APIGateway( access_tokens, 500, client_tuples, 5000 ) api = APIWrapper( gateway ) venues = dbw.get_all_venues( ) crawl_string = 'CHECK_STATS' dbw.add_crawl_to_database( crawl_string, 'START', now.now( ) )
wrapper routine to call the venues API. """ while True: response = '' try : response = api.query_resource( "venues", id, aspect=aspect, userless=userless, tenacious=True ) return response, True # anything else, record and try again except Exception as e: logging.debug( u'CHK_MON Error (Venue deletion/Foursquare down?), moving on. ' ) return response, False if __name__ == "__main__": import _credentials dbw = DBWrapper( ) # # Input & args args = sys.argv if len(args) is not 2: print "Incorrect number of arguments - please supply city_code" exit(1) else: city_code = args[1] logging.info( u'CHK_MON Restarted monitor_checkins.py' ) logging.info( u'CHK_MON Running with city_code: %s' % city_code ) setproctitle( u'CHK_MON %s' % city_code )