def plot_city(city, weekly=False, clusters=5): """Plot the 5 time clusters of `city` and save them on disk.""" shift = 2 # start from 1am instead of midnight chunk = 4 venue_visits = xp.get_visits(CLIENT, xp.Entity.venue, city) # Compute aggregated frequency for venues with at least 5 visits enough = { k: xp.to_frequency(xp.aggregate_visits(v, shift, chunk)[int(weekly)]) for k, v in venue_visits.iteritems() if len(v) > 5 } sval = np.array(enough.values()) num_cluster = clusters min_disto = 1e9 for _ in range(7): tak, tkl = DO_CLUSTER(sval, num_cluster) current_disto = vf.get_distorsion(tak, tkl, sval) if current_disto < min_disto: min_disto, ak, kl = current_disto, tak, tkl std_ord = np.argsort((np.argsort(ak)), 0)[:, -1] # vf.draw_classes(ak[std_ord, :], shift, chunk) # vf.plt.title('{}, {} venues'.format(city, len(enough))) # vf.plt.ylim([0, 0.28 if weekly else 0.9]) city = 'times/' + city city += '_weekly' if weekly else '_daily' sio.savemat(city + '_time', {'t': ak[std_ord, :]}, do_compression=True)
def plot_city(city, weekly=False, clusters=5): """Plot the 5 time clusters of `city` and save them on disk.""" shift = 2 # start from 1am instead of midnight chunk = 4 venue_visits = xp.get_visits(CLIENT, xp.Entity.venue, city) # Compute aggregated frequency for venues with at least 5 visits enough = { k: xp.to_frequency(xp.aggregate_visits(v, shift, chunk)[int(weekly)]) for k, v in venue_visits.iteritems() if len(v) > 5 } sval = np.array(enough.values()) num_cluster = clusters min_disto = 1e9 for _ in range(7): tak, tkl = DO_CLUSTER(sval, num_cluster) current_disto = vf.get_distorsion(tak, tkl, sval) if current_disto < min_disto: min_disto, ak, kl = current_disto, tak, tkl std_ord = np.argsort((np.argsort(ak)), 0)[:, -1] # vf.draw_classes(ak[std_ord, :], shift, chunk) # vf.plt.title('{}, {} venues'.format(city, len(enough))) # vf.plt.ylim([0, 0.28 if weekly else 0.9]) city = "times/" + city city += "_weekly" if weekly else "_daily" sio.savemat(city + "_time", {"t": ak[std_ord, :]}, do_compression=True)
def photos_around(id_, centroid, offset, daily, radius=200): """Gather photos timestamp in a `radius` around `id_` and return its time pattern (`daily` or not), and its distance to every `centroid`.""" center = get_loc(id_) photos = xp.get_visits(CLIENT, xp.Entity.photo, ball=(center, radius)) kind = xp.to_frequency(xp.aggregate_visits(photos.values(), offset)[daily]) nb_class = centroid.shape[0] # pylint: disable=E1101 classes = np.linalg.norm(np.tile(kind, (nb_class, 1)) - centroid, axis=1) return len(photos), kind, classes, np.argmin(classes)
def describe_city(city): """Compute feature vector for selected venue in `city`.""" CATS2 = p.load_var('cat_depth_2.my') # a few venues don't have level 2 categories (TODO add it manually?) CATS2.update({cat: int(idx*1e5) for idx, cat in enumerate(CATS)}) info = global_info(city) lvenues, lcheckins, lphotos = info[:3] visits, visitors, density = info[3:6] nb_visitors = np.unique(np.array([v for place in visitors.itervalues() for v in place])).size svenues, scheckins, sphotos = info[6:] categories = categories_repartition(city, svenues, lvenues, RADIUS) venues = DB.venue.find({'city': city, 'closed': {'$ne': True}, 'cat': {'$ne': None}, 'usersCount': {'$gt': 1}}, {'cat': 1}) chosen = [v['_id'] for v in venues if len(visits.get(v['_id'], [])) > 4 and len(np.unique(visitors.get(v['_id'], []))) > 1 and not is_event(v['cat'])] print("Chosen {} venues in {}.".format(len(chosen), city)) info, _ = venues_info(chosen, visits, visitors, density, depth=2, tags_freq=False) print("{} of them will be in the matrix.".format(len(info))) numeric = np.zeros((len(info), 31), dtype=np.float32) numeric[:, :5] = np.array([info['likes'], info['users'], info['checkins'], info['H'], info['Den']]).T print('venues with no level 2 category:') print([info.index[i] for i, c in enumerate(info['cat']) if CATS2[c] % int(1e5) == 0]) numeric[:, 5] = [CATS2[c] for c in info['cat']] numeric[:, 24] = np.array(info['Ht']) for idx, vid in enumerate(info.index): surrounding = full_surrounding(vid, lvenues, lphotos, lcheckins, svenues, scheckins, sphotos, city) cat, focus, ratio, around_visits = surrounding numeric[idx, 6:15] = cat numeric[idx, 15] = focus numeric[idx, 16] = ratio own_visits = visits[vid] numeric[idx, 17] = is_week_end_place(own_visits) daily_visits = xp.aggregate_visits(own_visits, 1, 4)[0] numeric[idx, 18:24] = xp.to_frequency(daily_visits) numeric[idx, 25:31] = xp.to_frequency(around_visits) weird = np.argwhere(np.logical_or(np.isnan(numeric), np.isinf(numeric))) numeric[weird] = 0.0 sio.savemat(city+'_fv', {'v': numeric, 'c': categories, 'i': np.array(list(info.index)), 'stat': [nb_visitors]}, do_compression=True)
def full_surrounding(vid, vmapping, pmapping, cmapping, svenues, scheckins, sphotos, city, radius=350): """Return a list of photos, checkins and venues categories in a `radius` around `vid`, within `city`. The mappings are dict({id: 2dpos})""" cat_distrib = categories_repartition(city, svenues, vmapping, radius, vid) center = vmapping[vid] pids, infos, _ = sphotos.around(center, radius) pvenue = infos[0] cids, infos, _ = scheckins.around(center, radius) ctime = infos[0] focus = photo_focus(vid, center, pids, pvenue, radius, pmapping) photogeny, c_smoothed = photo_ratio(center, pids, cids, radius, pmapping, cmapping) if len(ctime) < 5: print(vid + ' is anomalous because there is less than 5 check-in in a 350m radius') if len(ctime) == 0: surround_visits = np.ones(6) else: surround_visits = xp.aggregate_visits(ctime, 1, 4, c_smoothed)[0] return cat_distrib, focus, photogeny, surround_visits