Example #1
0
def plot_city(city, weekly=False, clusters=5):
    """Plot the 5 time clusters of `city` and save them on disk."""
    shift = 2  # start from 1am instead of midnight
    chunk = 4
    venue_visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
    # Compute aggregated frequency for venues with at least 5 visits
    enough = {
        k: xp.to_frequency(xp.aggregate_visits(v, shift, chunk)[int(weekly)])
        for k, v in venue_visits.iteritems() if len(v) > 5
    }
    sval = np.array(enough.values())
    num_cluster = clusters
    min_disto = 1e9
    for _ in range(7):
        tak, tkl = DO_CLUSTER(sval, num_cluster)
        current_disto = vf.get_distorsion(tak, tkl, sval)
        if current_disto < min_disto:
            min_disto, ak, kl = current_disto, tak, tkl
    std_ord = np.argsort((np.argsort(ak)), 0)[:, -1]
    # vf.draw_classes(ak[std_ord, :], shift, chunk)
    # vf.plt.title('{}, {} venues'.format(city, len(enough)))
    # vf.plt.ylim([0, 0.28 if weekly else 0.9])
    city = 'times/' + city
    city += '_weekly' if weekly else '_daily'
    sio.savemat(city + '_time', {'t': ak[std_ord, :]}, do_compression=True)
Example #2
0
def global_info(city, standalone=False):
    """Gather global statistics about `city`."""
    lvenues = geo_project(city, DB.venue.find({'city': city}, {'loc': 1}))
    lcheckins = geo_project(city, DB.checkin.find({'city': city}, {'loc': 1}))
    lphotos = geo_project(city, CLIENT.world.photos.find({'hint': city},
                                                         {'loc': 1}))
    local_projection = [lvenues, lcheckins, lphotos]
    visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
    visitors = xp.get_visitors(CLIENT, city)
    density = estimate_density(city)
    activity = [visits, visitors, density]
    global TOP_CATS
    TOP_CATS = p.load_var('top_cats.my')
    infos = {'venue': [] if standalone else ['cat', 'cats'],
             'photo': ['taken'] if standalone else ['venue']}
    svenues = s.Surrounding(DB.venue, {'city': city}, infos['venue'], lvenues)
    scheckins = s.Surrounding(DB.checkin, {'city': city}, ['time'], lcheckins)
    sphotos = s.Surrounding(CLIENT.world.photos, {'hint': city},
                            infos['photo'], lphotos)
    surroundings = [svenues, scheckins, sphotos]
    p.save_var('{}_s{}s.my'.format(city, 'venue'), svenues)
    if standalone:
        for name, var in zip(['venue', 'checkin', 'photo'], surroundings):
            p.save_var('{}_s{}s.my'.format(city, name), var)
    return local_projection + activity + surroundings
Example #3
0
def plot_city(city, weekly=False, clusters=5):
    """Plot the 5 time clusters of `city` and save them on disk."""
    shift = 2  # start from 1am instead of midnight
    chunk = 4
    venue_visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
    # Compute aggregated frequency for venues with at least 5 visits
    enough = {
        k: xp.to_frequency(xp.aggregate_visits(v, shift, chunk)[int(weekly)])
        for k, v in venue_visits.iteritems()
        if len(v) > 5
    }
    sval = np.array(enough.values())
    num_cluster = clusters
    min_disto = 1e9
    for _ in range(7):
        tak, tkl = DO_CLUSTER(sval, num_cluster)
        current_disto = vf.get_distorsion(tak, tkl, sval)
        if current_disto < min_disto:
            min_disto, ak, kl = current_disto, tak, tkl
    std_ord = np.argsort((np.argsort(ak)), 0)[:, -1]
    # vf.draw_classes(ak[std_ord, :], shift, chunk)
    # vf.plt.title('{}, {} venues'.format(city, len(enough)))
    # vf.plt.ylim([0, 0.28 if weekly else 0.9])
    city = "times/" + city
    city += "_weekly" if weekly else "_daily"
    sio.savemat(city + "_time", {"t": ak[std_ord, :]}, do_compression=True)
Example #4
0
def photos_around(id_, centroid, offset, daily, radius=200):
    """Gather photos timestamp in a `radius` around `id_` and return its time
    pattern (`daily` or not), and its distance to every `centroid`."""
    center = get_loc(id_)
    photos = xp.get_visits(CLIENT, xp.Entity.photo, ball=(center, radius))
    kind = xp.to_frequency(xp.aggregate_visits(photos.values(), offset)[daily])
    nb_class = centroid.shape[0]
    # pylint: disable=E1101
    classes = np.linalg.norm(np.tile(kind, (nb_class, 1)) - centroid, axis=1)
    return len(photos), kind, classes, np.argmin(classes)
Example #5
0
def venues_info(vids, visits=None, visitors=None, density=None, depth=10,
                tags_freq=True):
    """Return various info about from the venue ids `vids`."""
    tags = defaultdict(int)
    city = DB.venue.find_one({'_id': vids[0]})['city']
    visits = visits or xp.get_visits(CLIENT, xp.Entity.venue, city)
    visitors = visitors or xp.get_visitors(CLIENT, city)
    density = density or estimate_density(city)
    venues = list(DB.venue.find({'_id': {'$in': vids}},
                                {'cat': 1, 'name': 1, 'loc': 1,
                                 'price': 1, 'rating': 1, 'tags': 1,
                                 'likes': 1, 'usersCount': 1,
                                 'checkinsCount': 1}))

    msg = 'Asked for {} but get only {}'.format(len(vids), len(venues))
    assert len(vids) == len(venues), msg
    res = pd.DataFrame(index=[_['_id'] for _ in venues])

    def add_col(field):
        res[field.replace('Count', '')] = [_[field] for _ in venues]
    for field in ['name', 'price', 'rating', 'likes',
                  'usersCount', 'checkinsCount']:
        add_col(field)
    if tags_freq:
        res['tags'] = [[normalized_tag(t) for t in _['tags']] for _ in venues]
    loc = [_['loc']['coordinates'] for _ in venues]
    get_cat = lambda c, d: top_category(c) if d == 1 else parenting_cat(c, d)
    res['cat'] = [get_cat(_['cat'], depth) for _ in venues]
    res['vis'] = [len(visits[id_]) for id_ in res.index]
    res['H'] = [venue_entropy(visitors[id_]) for id_ in res.index]
    res['Ht'] = [time_entropy(visits[id_]) for id_ in res.index]
    coords = np.fliplr(np.array(loc))
    points = cm.cities.GEO_TO_2D[city](coords)
    res['Den'] = density(points)
    if tags_freq:
        for venue in venues:
            for tag in venue['tags']:
                tags[normalized_tag(tag)] += 1
    return res, OrderedDict(sorted(tags.iteritems(), key=lambda x: x[1],
                                   reverse=True))
Example #6
0
    clusterer = cl.DBSCAN(eps=5, min_samples=8, metric='cityblock')
    clusterer = cl.AffinityPropagation(damping=.55, affinity='euclidean')
    clusterer = cl.SpectralClustering(3, affinity='cosine', n_init=3)

    hel = cn.load_matrix(city)
    features = hel['v']
    scale = pp.MinMaxScaler(copy=False)
    scale.fit_transform(features[:, 0:3])
    scores = []
    for k in range(3, 16):
        clusterer = cl.KMeans(k, n_init=10, tol=1e-5, max_iter=500)
        labels = clusterer.fit_predict(features)
        scores.append(mt.silhouette_score(features, labels))
        print(Counter(labels))
    np.argsort(scores)[::-1]+3
    ppl.plot(range(3, 16), scores[0:], '+')
    clusterer = cl.MeanShift(min_bin_freq=3, cluster_all=False)
    clusterer = cl.KMeans(6, n_init=20, tol=1e-5, max_iter=500)

    visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
    visitors = xp.get_visitors(CLIENT, city)
    density = vf.estimate_density(city)
    c0, _ = vf.venues_info([_ for _ in hel['i'][labels == 0].tolist()
                            if _ in visits],
                           visits, visitors, density, depth=2, tags_freq=False)
    c5, v = vf.venues_info([v for v in hel['i'][labels == 5].tolist()
                            if v in visits],
                           visits, visitors, density, depth=2, tags_freq=False)
    c0.describe()
    c5.describe()
Example #7
0
    hel = cn.load_matrix(city)
    features = hel['v']
    scale = pp.MinMaxScaler(copy=False)
    scale.fit_transform(features[:, 0:3])
    scores = []
    for k in range(3, 16):
        clusterer = cl.KMeans(k, n_init=10, tol=1e-5, max_iter=500)
        labels = clusterer.fit_predict(features)
        scores.append(mt.silhouette_score(features, labels))
        print(Counter(labels))
    np.argsort(scores)[::-1] + 3
    ppl.plot(range(3, 16), scores[0:], '+')
    clusterer = cl.MeanShift(min_bin_freq=3, cluster_all=False)
    clusterer = cl.KMeans(6, n_init=20, tol=1e-5, max_iter=500)

    visits = xp.get_visits(CLIENT, xp.Entity.venue, city)
    visitors = xp.get_visitors(CLIENT, city)
    density = vf.estimate_density(city)
    c0, _ = vf.venues_info(
        [_ for _ in hel['i'][labels == 0].tolist() if _ in visits],
        visits,
        visitors,
        density,
        depth=2,
        tags_freq=False)
    c5, v = vf.venues_info(
        [v for v in hel['i'][labels == 5].tolist() if v in visits],
        visits,
        visitors,
        density,
        depth=2,