from read_foursquare import Location import foursquare import cities import CommonMongo as cm import pycurl import cStringIO as cs from utils import get_nested from lxml import etree PARSER = etree.HTMLParser() # thanks Google Chrome (although this is rather fragile) XPATH_QUERY = '//*[@id="container"]/div/div[2]/div[2]/div[2]/ul' from collections import namedtuple from api_keys import FOURSQUARE_ID as CLIENT_ID from api_keys import FOURSQUARE_SECRET as CLIENT_SECRET from twitter_helper import obtain_tree, find_town CITIES_TREE = obtain_tree() # https://developer.foursquare.com/docs/responses/venue Venue = namedtuple('Venue', [ 'id', 'name', 'loc', 'cats', 'cat', 'checkinsCount', 'usersCount', 'tipCount', 'hours', 'price', 'rating', 'createdAt', 'mayor', 'tags', 'shortUrl', 'canonicalUrl', 'likes', 'likers', 'city', 'closed' ]) # https://developer.foursquare.com/docs/responses/user User = namedtuple('User', [ 'id', 'firstName', 'lastName', 'friends', 'friendsCount', 'gender', 'homeCity', 'tips', 'lists', 'badges', 'mayorships', 'photos', 'checkins' ]) def parse_opening_time(info):
previously = load_var('avenues_id_new_kosh') except IOError: previously = None venues_getter = VenueIdCrawler(previously, use_network=False) checkins = None client = pymongo.MongoClient('localhost', 27017) db = client['foursquare'] checkins = db['checkin'] checkins.ensure_index([('loc', pymongo.GEOSPHERE), ('lid', pymongo.ASCENDING), ('city', pymongo.ASCENDING), ('time', pymongo.ASCENDING)]) import sys infile = 'verysmall' if len(sys.argv) < 2 else sys.argv[1] tree = th.obtain_tree() stats = defaultdict(lambda: 0) # def find_city(x, y): # for city in bboxes: # if city.contains(x, y): # return city.name # return None seen = [] how_many = 0 with open(infile) as f: # UserID\tTweetID\tLatitude\tLongitude\tCreatedAt\tText\tPlaceID for line in f: data = line.strip().split('\t') if len(data) is not 7:
import CommonMongo as cm import pycurl import cStringIO as cs from utils import get_nested from lxml import etree PARSER = etree.HTMLParser() # thanks Google Chrome (although this is rather fragile) XPATH_QUERY = '//*[@id="container"]/div/div[2]/div[2]/div[2]/ul' from collections import namedtuple # from api_keys import FOURSQUARE_ID as CLIENT_ID # from api_keys import FOURSQUARE_SECRET as CLIENT_SECRET from twitter_helper import obtain_tree, find_town CITIES_TREE = obtain_tree() # https://developer.foursquare.com/docs/responses/venue Venue = namedtuple( "Venue", [ "id", "name", "loc", "cats", "cat", "checkinsCount", "usersCount", "tipCount", "hours", "price",