def venue_profile(venue): """Return a Venue object from a venue json description.""" assert len(venue.keys()) > 1, "don't send top-level object" vid = venue['id'] name = venue['name'] loc = venue['location'] try: lon, lat = loc['lng'], loc['lat'] loc = Location('Point', [lon, lat])._asdict() city = find_town(lat, lon, CITIES_TREE) except KeyError: print(vid, loc) # Because loc is 2dsphere index, one cannot insert document with no # location. I could have use 2d index (because I will use only flat # geometry but then there are limitation on compound index: # http://docs.mongodb.org/manual/applications/geospatial-indexes/ return None # if city is None: # print("can't match {}".format(venue['location'])) cats = [c['id'] for c in venue['categories']] cat = None if len(cats) == 0 else cats.pop(0) checkinsCount = venue['stats']['checkinsCount'] usersCount = venue['stats']['usersCount'] tipCount = venue['stats']['tipCount'] hours = None if 'hours' in venue: hours = parse_opening_time(venue['hours']) price = None if 'price' not in venue else venue['price']['tier'] rating = None if 'rating' not in venue else venue['rating'] createdAt = datetime.fromtimestamp(venue['createdAt']) mayor = None if 'mayor' in venue and 'user' in venue['mayor']: mayor = int(venue['mayor']['user']['id']) tags = list(set([t.strip() for t in venue['tags']])) shortUrl = venue['shortUrl'] canonicalUrl = venue['canonicalUrl'] likers, likes = get_list_of('likes', venue) closed = None if 'closed' not in venue else venue['closed'] return Venue(vid, name, loc, cats, cat, checkinsCount, usersCount, tipCount, hours, price, rating, createdAt, mayor, tags, shortUrl, canonicalUrl, likes, likers, city, closed)
def venue_profile(venue): """Return a Venue object from a venue json description.""" assert len(venue.keys()) > 1, "don't send top-level object" vid = venue['id'] name = venue['name'] loc = venue['location'] try: lon, lat = loc['lng'], loc['lat'] loc = Location('Point', [lon, lat])._asdict() city = find_town(lat, lon, CITIES_TREE) except KeyError: print(vid, loc) # Because loc is 2dsphere index, one cannot insert document with no # location. I could have use 2d index (because I will use only flat # geometry but then there are limitation on compound index: # http://docs.mongodb.org/manual/applications/geospatial-indexes/ return None # if city is None: # print("can't match {}".format(venue['location'])) cats = [c['id'] for c in venue['categories']] cat = None if len(cats) == 0 else cats.pop(0) checkinsCount = venue['stats']['checkinsCount'] usersCount = venue['stats']['usersCount'] tipCount = venue['stats']['tipCount'] hours = None if 'hours' in venue: hours = parse_opening_time(venue['hours']) price = None if 'price' not in venue else venue['price']['tier'] rating = None if 'rating' not in venue else venue['rating'] createdAt = datetime.fromtimestamp(venue['createdAt']) mayor = None if 'mayor' in venue and 'user' in venue['mayor']: mayor = int(venue['mayor']['user']['id']) tags = list(set([t.strip() for t in venue['tags']])) shortUrl = venue['shortUrl'] canonicalUrl = venue['canonicalUrl'] likers, likes = get_list_of('likes', venue) closed = None if 'closed' not in venue else venue['closed'] return Venue(vid, name, loc, cats, cat, checkinsCount, usersCount, tipCount, hours, price, rating, createdAt, mayor, tags, shortUrl, canonicalUrl, likes, likers, city, closed)
def venue_profile(venue): """Return a Venue object from a venue json description.""" assert len(venue.keys()) > 1, "don't send top-level object" vid = venue["id"] name = venue["name"] loc = venue["location"] try: lon, lat = loc["lng"], loc["lat"] loc = Location("Point", [lon, lat])._asdict() city = find_town(lat, lon, CITIES_TREE) except KeyError: print (vid, loc) # Because loc is 2dsphere index, one cannot insert document with no # location. I could have use 2d index (because I will use only flat # geometry but then there are limitation on compound index: # http://docs.mongodb.org/manual/applications/geospatial-indexes/ return None # if city is None: # print("can't match {}".format(venue['location'])) cats = [c["id"] for c in venue["categories"]] cat = None if len(cats) == 0 else cats.pop(0) checkinsCount = venue["stats"]["checkinsCount"] usersCount = venue["stats"]["usersCount"] tipCount = venue["stats"]["tipCount"] hours = None if "hours" in venue: hours = parse_opening_time(venue["hours"]) price = None if "price" not in venue else venue["price"]["tier"] rating = None if "rating" not in venue else venue["rating"] createdAt = datetime.fromtimestamp(venue["createdAt"]) mayor = None if "user" in venue["mayor"]: mayor = int(venue["mayor"]["user"]["id"]) tags = list(set([t.strip() for t in venue["tags"]])) shortUrl = venue["shortUrl"] canonicalUrl = venue["canonicalUrl"] likers, likes = get_list_of("likes", venue) closed = None if "closed" not in venue else venue["closed"] return Venue( vid, name, loc, cats, cat, checkinsCount, usersCount, tipCount, hours, price, rating, createdAt, mayor, tags, shortUrl, canonicalUrl, likes, likers, city, closed, )
seen = [] how_many = 0 with open(infile) as f: # UserID\tTweetID\tLatitude\tLongitude\tCreatedAt\tText\tPlaceID for line in f: data = line.strip().split('\t') if len(data) is not 7: continue uid, tid, x, y, t, msg, place = data # if not id_must_be_process(int(tid)): # continue lat, lon = float(x), float(y) # city = find_city(lat, lon) # assert city == find_town(lat, lon, tree) city = th.find_town(lat, lon, tree) lid = None if city is not None: lid = extract_url_from_msg(msg) stats[city] += 1 how_many += 1 tid, uid = int(tid), int(uid) t = datetime.strptime(t, '%Y-%m-%d %H:%M:%S') t = cities.utc_to_local(city, t) # to have more numerical values (but lid should be a 64bit # unsigned integer which seems to be quite complicated in # mongo) # t = timegm(t.utctimetuple()) # city = cities.INDEX[city] loc = Location('Point', [lon, lat])._asdict() # seen.append(CheckIn(tid, lid, uid, city, loc, t, place))
seen = [] how_many = 0 with open(infile) as f: # UserID\tTweetID\tLatitude\tLongitude\tCreatedAt\tText\tPlaceID for line in f: data = line.strip().split('\t') if len(data) is not 7: continue uid, tid, x, y, t, msg, place = data # if not id_must_be_process(int(tid)): # continue lat, lon = float(x), float(y) # city = find_city(lat, lon) # assert city == find_town(lat, lon, tree) city = th.find_town(lat, lon, tree) lid = None if city is not None: lid = extract_url_from_msg(msg) stats[city] += 1 how_many += 1 tid, uid = int(tid), int(uid) t = datetime.strptime(t, '%Y-%m-%d %H:%M:%S') t = cities.utc_to_local(city, t) # to have more numerical values (but lid should be a 64bit # unsigned integer which seems to be quite complicated in # mongo) # t = timegm(t.utctimetuple()) # city = cities.INDEX[city] loc = Location('Point', [lon, lat])._asdict() # seen.append(CheckIn(tid, lid, uid, city, loc, t, place))