Ejemplo n.º 1
0
def venue_profile(venue):
    """Return a Venue object from a venue json description."""
    assert len(venue.keys()) > 1, "don't send top-level object"
    vid = venue['id']
    name = venue['name']
    loc = venue['location']
    try:
        lon, lat = loc['lng'], loc['lat']
        loc = Location('Point', [lon, lat])._asdict()
        city = find_town(lat, lon, CITIES_TREE)
    except KeyError:
        print(vid, loc)
        # Because loc is 2dsphere index, one cannot insert document with no
        # location. I could have use 2d index (because I will use only flat
        # geometry but then there are limitation on compound index:
        # http://docs.mongodb.org/manual/applications/geospatial-indexes/
        return None
    # if city is None:
    #     print("can't match {}".format(venue['location']))
    cats = [c['id'] for c in venue['categories']]
    cat = None if len(cats) == 0 else cats.pop(0)
    checkinsCount = venue['stats']['checkinsCount']
    usersCount = venue['stats']['usersCount']
    tipCount = venue['stats']['tipCount']
    hours = None
    if 'hours' in venue:
        hours = parse_opening_time(venue['hours'])
    price = None if 'price' not in venue else venue['price']['tier']
    rating = None if 'rating' not in venue else venue['rating']
    createdAt = datetime.fromtimestamp(venue['createdAt'])
    mayor = None
    if 'mayor' in venue and 'user' in venue['mayor']:
        mayor = int(venue['mayor']['user']['id'])
    tags = list(set([t.strip() for t in venue['tags']]))
    shortUrl = venue['shortUrl']
    canonicalUrl = venue['canonicalUrl']
    likers, likes = get_list_of('likes', venue)
    closed = None if 'closed' not in venue else venue['closed']

    return Venue(vid, name, loc, cats, cat, checkinsCount, usersCount,
                 tipCount, hours, price, rating, createdAt, mayor, tags,
                 shortUrl, canonicalUrl, likes, likers, city, closed)
Ejemplo n.º 2
0
def venue_profile(venue):
    """Return a Venue object from a venue json description."""
    assert len(venue.keys()) > 1, "don't send top-level object"
    vid = venue['id']
    name = venue['name']
    loc = venue['location']
    try:
        lon, lat = loc['lng'], loc['lat']
        loc = Location('Point', [lon, lat])._asdict()
        city = find_town(lat, lon, CITIES_TREE)
    except KeyError:
        print(vid, loc)
        # Because loc is 2dsphere index, one cannot insert document with no
        # location. I could have use 2d index (because I will use only flat
        # geometry but then there are limitation on compound index:
        # http://docs.mongodb.org/manual/applications/geospatial-indexes/
        return None
    # if city is None:
    #     print("can't match {}".format(venue['location']))
    cats = [c['id'] for c in venue['categories']]
    cat = None if len(cats) == 0 else cats.pop(0)
    checkinsCount = venue['stats']['checkinsCount']
    usersCount = venue['stats']['usersCount']
    tipCount = venue['stats']['tipCount']
    hours = None
    if 'hours' in venue:
        hours = parse_opening_time(venue['hours'])
    price = None if 'price' not in venue else venue['price']['tier']
    rating = None if 'rating' not in venue else venue['rating']
    createdAt = datetime.fromtimestamp(venue['createdAt'])
    mayor = None
    if 'mayor' in venue and 'user' in venue['mayor']:
        mayor = int(venue['mayor']['user']['id'])
    tags = list(set([t.strip() for t in venue['tags']]))
    shortUrl = venue['shortUrl']
    canonicalUrl = venue['canonicalUrl']
    likers, likes = get_list_of('likes', venue)
    closed = None if 'closed' not in venue else venue['closed']

    return Venue(vid, name, loc, cats, cat, checkinsCount, usersCount,
                 tipCount, hours, price, rating, createdAt, mayor, tags,
                 shortUrl, canonicalUrl, likes, likers, city, closed)
def venue_profile(venue):
    """Return a Venue object from a venue json description."""
    assert len(venue.keys()) > 1, "don't send top-level object"
    vid = venue["id"]
    name = venue["name"]
    loc = venue["location"]
    try:
        lon, lat = loc["lng"], loc["lat"]
        loc = Location("Point", [lon, lat])._asdict()
        city = find_town(lat, lon, CITIES_TREE)
    except KeyError:
        print (vid, loc)
        # Because loc is 2dsphere index, one cannot insert document with no
        # location. I could have use 2d index (because I will use only flat
        # geometry but then there are limitation on compound index:
        # http://docs.mongodb.org/manual/applications/geospatial-indexes/
        return None
    # if city is None:
    #     print("can't match {}".format(venue['location']))
    cats = [c["id"] for c in venue["categories"]]
    cat = None if len(cats) == 0 else cats.pop(0)
    checkinsCount = venue["stats"]["checkinsCount"]
    usersCount = venue["stats"]["usersCount"]
    tipCount = venue["stats"]["tipCount"]
    hours = None
    if "hours" in venue:
        hours = parse_opening_time(venue["hours"])
    price = None if "price" not in venue else venue["price"]["tier"]
    rating = None if "rating" not in venue else venue["rating"]
    createdAt = datetime.fromtimestamp(venue["createdAt"])
    mayor = None
    if "user" in venue["mayor"]:
        mayor = int(venue["mayor"]["user"]["id"])
    tags = list(set([t.strip() for t in venue["tags"]]))
    shortUrl = venue["shortUrl"]
    canonicalUrl = venue["canonicalUrl"]
    likers, likes = get_list_of("likes", venue)
    closed = None if "closed" not in venue else venue["closed"]

    return Venue(
        vid,
        name,
        loc,
        cats,
        cat,
        checkinsCount,
        usersCount,
        tipCount,
        hours,
        price,
        rating,
        createdAt,
        mayor,
        tags,
        shortUrl,
        canonicalUrl,
        likes,
        likers,
        city,
        closed,
    )
Ejemplo n.º 4
0
    seen = []
    how_many = 0
    with open(infile) as f:
        # UserID\tTweetID\tLatitude\tLongitude\tCreatedAt\tText\tPlaceID
        for line in f:
            data = line.strip().split('\t')
            if len(data) is not 7:
                continue
            uid, tid, x, y, t, msg, place = data
            # if not id_must_be_process(int(tid)):
            #     continue
            lat, lon = float(x), float(y)
            # city = find_city(lat, lon)
            # assert city == find_town(lat, lon, tree)
            city = th.find_town(lat, lon, tree)
            lid = None
            if city is not None:
                lid = extract_url_from_msg(msg)
                stats[city] += 1
                how_many += 1
                tid, uid = int(tid), int(uid)
                t = datetime.strptime(t, '%Y-%m-%d %H:%M:%S')
                t = cities.utc_to_local(city, t)
                # to have more numerical values (but lid should be a 64bit
                # unsigned integer which seems to be quite complicated in
                # mongo)
                # t = timegm(t.utctimetuple())
                # city = cities.INDEX[city]
                loc = Location('Point', [lon, lat])._asdict()
                # seen.append(CheckIn(tid, lid, uid, city, loc, t, place))
Ejemplo n.º 5
0
    seen = []
    how_many = 0
    with open(infile) as f:
        # UserID\tTweetID\tLatitude\tLongitude\tCreatedAt\tText\tPlaceID
        for line in f:
            data = line.strip().split('\t')
            if len(data) is not 7:
                continue
            uid, tid, x, y, t, msg, place = data
            # if not id_must_be_process(int(tid)):
            #     continue
            lat, lon = float(x), float(y)
            # city = find_city(lat, lon)
            # assert city == find_town(lat, lon, tree)
            city = th.find_town(lat, lon, tree)
            lid = None
            if city is not None:
                lid = extract_url_from_msg(msg)
                stats[city] += 1
                how_many += 1
                tid, uid = int(tid), int(uid)
                t = datetime.strptime(t, '%Y-%m-%d %H:%M:%S')
                t = cities.utc_to_local(city, t)
                # to have more numerical values (but lid should be a 64bit
                # unsigned integer which seems to be quite complicated in
                # mongo)
                # t = timegm(t.utctimetuple())
                # city = cities.INDEX[city]
                loc = Location('Point', [lon, lat])._asdict()
                # seen.append(CheckIn(tid, lid, uid, city, loc, t, place))