def accu_dt(movdata, bsmap, output, log=True): """ Distribution of dwelling time for each person """ bsmap = BaseStationMap(bsmap) res = {} for person in movement_reader(open(movdata, 'rb'), bsmap): uid = person.id dt = person.accdwelling.values() if uid not in res: res[uid] = dt else: res[uid].extend(dt) ofile = open(output, 'wb') if log is True: bins = np.logspace(-2,2,50) else: bins = np.arange(0,24.5,0.5) if log is True: ofile.write('#bins np.logspace(-2,2,50)\n') else: ofile.write('#bins np.arange(0,24.5,0.5)\n') for uid in res: hist = np.histogram(np.array(res[uid])/3600, bins=bins)[0] ofile.write('%d,%s\n' % (uid, ','.join([str(h) for h in hist]))) ofile.close()
def loc_dt(movdata, bsmap, output, log=True): """ Distribution of dwelling time for each person, each location """ bsmap = BaseStationMap(bsmap) res = {} for person in movement_reader(open(movdata, 'rb'), bsmap): uid = person.id dt = person.accdwelling if uid not in res: res[uid] = {} for k, v in dt.items(): if k not in res[uid]: res[uid][k] = [] res[uid][k].append(v) ofile = open(output, 'wb') if log is True: bins = np.logspace(-2,2,50) else: bins = np.arange(0,24.5,0.5) if log is True: ofile.write('#bins np.logspace(-2,2,50)\n') else: ofile.write('#bins np.arange(0,24.5,0.5)\n') for uid in res: vs = [np.average(v) for k, v in res[uid].items()] hist = np.histogram(np.array(vs)/3600, bins=bins)[0] ofile.write('%d,%s\n' % (uid, ','.join([str(h) for h in hist]))) ofile.close()
def mobility_graphs(logiter, bsmap, roadnet): results = [] for person in movement_reader(logiter, bsmap): graph = person.convert2graph(roadnet, True) nlen = len(graph.nodes()) if nlen > 1: results.append((person.id, person.dtstart, nlen, graph)) return results
def opmap_stat(): movdata = 'data/hcl_mesos0825' bsmap = 'data/hcl_mesos0825_bm' bsmap = BaseStationMap(bsmap) opgraph = load_oppmap(bsmap) opnodes = opgraph.nodes(data=True) for person in movement_reader(open(movdata, 'rb'), bsmap): if person.distinct_loc_num() < 2: continue rg = person.radius_of_gyration() alpha = 1 delta = 0.05 locs = person.coordinates home = person.coordinates[0] max_trd = np.max([ greate_circle_distance(home[0], home[1], i[0], i[1]) for i in person.coordinates ]) for i in range(0, len(locs) - 1): j = i + 1 dist = greate_circle_distance(locs[i][0], locs[i][1], locs[j][0], locs[j][1]) # all opportunities for current location with radius rg person_opnodes = [] for nn, vv in opnodes: if greate_circle_distance(nn[0], nn[1], locs[i][0], locs[i][1]) <= rg * alpha: person_opnodes.append((nn, vv['weight'])) aops = [] iops = [] for nn, value in person_opnodes: gcd = greate_circle_distance(locs[i][0], locs[i][1], nn[0], nn[1]) if gcd < dist * (1 - delta): iops.append((nn, value)) elif gcd >= dist * (1 - delta) and gcd <= dist * (1 + delta): aops.append((nn, value)) print aops, iops avops_total = np.sum([p[1] for p in aops]) avops_max = np.max([p[1] for p in aops]) inops_total = np.sum([p[1] for p in iops]) inops_max = np.max([p[1] for p in iops]) print person.id, rg, avops_total, inops_total, avops_max, inops_max break
def opmap_stat(): movdata = 'data/hcl_mesos0825' bsmap = 'data/hcl_mesos0825_bm' bsmap = BaseStationMap(bsmap) opgraph = load_oppmap(bsmap) opnodes = opgraph.nodes(data=True) for person in movement_reader(open(movdata, 'rb'), bsmap): if person.distinct_loc_num() < 2: continue rg = person.radius_of_gyration() alpha = 1 delta = 0.05 locs = person.coordinates home = person.coordinates[0] max_trd = np.max([greate_circle_distance(home[0], home[1], i[0], i[1]) for i in person.coordinates]) for i in range(0, len(locs) - 1): j = i + 1 dist = greate_circle_distance(locs[i][0], locs[i][1], locs[j][0], locs[j][1]) # all opportunities for current location with radius rg person_opnodes = [] for nn, vv in opnodes: if greate_circle_distance(nn[0], nn[1], locs[i][0], locs[i][1]) <= rg * alpha: person_opnodes.append((nn, vv['weight'])) aops = [] iops = [] for nn, value in person_opnodes: gcd = greate_circle_distance(locs[i][0], locs[i][1], nn[0], nn[1]) if gcd < dist * (1 - delta): iops.append((nn, value)) elif gcd >= dist * (1 - delta) and gcd <= dist * (1 + delta): aops.append((nn, value)) print aops, iops avops_total = np.sum([p[1] for p in aops]) avops_max = np.max([p[1] for p in aops]) inops_total = np.sum([p[1] for p in iops]) inops_max = np.max([p[1] for p in iops]) print person.id, rg, avops_total, inops_total, avops_max, inops_max break
def mobility_graphs(logiter, bsmap, roadnet, cmin=2, cmax=15, dates=None): """ Extract mobility graphs from a list of movement observations """ results = [] for person in movement_reader(logiter, bsmap): if person.which_day() not in dates: continue nloc = len(set(person.locations)) if nloc > cmax or nloc < cmin: continue graph = person.convert2graph(roadnet) nlen = len(graph.nodes()) if nlen > 1: results.append((person.id, person.dtstart, nlen, graph)) return results
def mobgraph_degree(movdata, bsmap, output): """ Node degree of mobility graphs """ nloc = [] ndgr = [] bsmap = BaseStationMap(bsmap) for person in movement_reader(open(movdata, 'rb'), bsmap): if person.distinct_loc_num() < 2: continue graph = person.convert2graph() ndgr.append(np.mean(graph.degree().values())) nloc.append(person.distinct_loc_num()) ofile = open(output, 'wb') ofile.write('nloc,ndgr\n') ofile.write('\n'.join( ['%d,%.3f' % (x,y) for x, y in zip(nloc, ndgr)]))
def accu_rg(movdata, bsmap, output): """ Accumulative R_g over multiple days """ bsmap = BaseStationMap(bsmap) dates = {'0820': 0, '0821': 1, '0822': 2, '0823': 3, '0824': 4, '0825': 5, '0826': 6} res = {} coords = {} for person in movement_reader(open(movdata, 'rb'), bsmap): uid = person.id tdate = person.which_day() if tdate not in dates: continue if uid not in coords: coords[uid] = person.coordinates else: coords[uid].extend(person.coordinates) if uid not in res: res[uid] = np.empty(7) res[uid].fill(-1) res[uid][dates[tdate]] = radius_of_gyration(coords[uid]) res2 = [] for uid in res: v = res[uid] v2 = [] for n in v: if n == -1: try: v2.append(v2[-1]) except: v2.append(0) else: v2.append(n) res2.append((uid, v2)) res2 = sorted(res2, key=lambda x: x[0]) ofile = open(output, 'wb') [ofile.write('%d,%s\n' % (i[0], ','.join(['%.4f' % j for j in i[1]]))) for i in res2] ofile.close()
def loc_dt_all(movdata, bsmap, output): """ All raw dwelling times for each person, each location """ bsmap = BaseStationMap(bsmap) res = {} for person in movement_reader(open(movdata, 'rb'), bsmap): uid = person.id dt = person.accdwelling if uid not in res: res[uid] = {} for k, v in dt.items(): if k not in res[uid]: res[uid][k] = [] res[uid][k].append(v) ofile = open(output, 'wb') for uid in res: vs = sorted([np.average(v)/3600 for k, v in res[uid].items()], reverse=True) ofile.write('%d,%s\n' % (uid, ','.join(['%.3f' % v for v in vs]))) ofile.close()
def daily_rg(movdata, bsmap, output): """ R_g for one day """ bsmap = BaseStationMap(bsmap) res = {} for person in movement_reader(open(movdata, 'rb'), bsmap): uid = person.id tdate = person.dtstart.strftime("%m%d") rg = person.radius_of_gyration() if tdate not in res: res[tdate] = [] res[tdate].append((uid, rg)) for tdate in res: try: os.mkdir(output) except: pass ofile = open(os.path.join(output, tdate), 'wb') [ofile.write('%d,%.4f\n' % (i[0],i[1])) for i in sorted(res[tdate], key=lambda x: x[0])] ofile.close()
def empirical_data(): ifname = 'data/hcl_mesos0822_sample0.2' bsmap = bsmap = BaseStationMap('data/hcl_mesos0822_bm') ofile = open('data/mesos_model_emp_stat2', 'wb') for person in movement_reader(open(ifname), bsmap): if len(person) < 2: continue uid = person.id rg = person.radius_of_gyration() totloc = len(set(person.locations)) traj = person.coordinates trvdist = [greate_circle_distance(traj[i][0], traj[i][1], traj[i+1][0], traj[i+1][1]) for i in range(len(traj)-1)] totdist = np.sum(trvdist) ofile.write('%d\t%.3f\t%d\t%.3f\t%s\n' % ( uid, rg, totloc, totdist, ','.join(['%.3f' % i for i in trvdist]), )) ofile.close()
def validate_selfsim(): ssfile = 'data/mesos0825_s0dot2/mesos0825_s0dot2_ssmode' movdata = 'data/hcl_mesos0825_sample0.2' bsmap = 'data/hcl_mesos0825_bm' ofname = 'data/mesos0825_s0dot2/mesos0825_s0dot2_ssmode_mg' users = {} i = 0 for line in open(ssfile, 'rb'): if i == 0: i = 1 continue parts = line.strip('\r\n').split(',') uid = int(parts[0]) group = int(parts[1]) clust = int(parts[2]) dist = float(parts[3]) selfdist = float(parts[4]) mode = str(parts[5]) users[uid] = (group, clust, dist, selfdist, mode) print len(users) ofile = open(ofname, 'wb') for person in movement_reader(open(movdata), BaseStationMap(bsmap)): if person.id not in users or person.distinct_loc_num() < 2: continue user = users[person.id] ofile.write('%d\t%d\t%d\t%.3f\t%.3f\t%s\t%s\n' % (person.id, user[0], user[1], user[2], user[3], user[4], dumps_mobgraph(person.convert2graph()))) ofile.close()
def validate_selfsim(): ssfile = 'data/mesos0825_s0dot2/mesos0825_s0dot2_ssmode' movdata = 'data/hcl_mesos0825_sample0.2' bsmap = 'data/hcl_mesos0825_bm' ofname = 'data/mesos0825_s0dot2/mesos0825_s0dot2_ssmode_mg' users = {} i = 0 for line in open(ssfile, 'rb'): if i == 0: i = 1 continue parts = line.strip('\r\n').split(',') uid = int(parts[0]) group = int(parts[1]) clust = int(parts[2]) dist = float(parts[3]) selfdist = float(parts[4]) mode = str(parts[5]) users[uid] = (group, clust, dist, selfdist, mode) print len(users) ofile = open(ofname, 'wb') for person in movement_reader(open(movdata), BaseStationMap(bsmap)): if person.id not in users or person.distinct_loc_num() < 2: continue user = users[person.id] ofile.write('%d\t%d\t%d\t%.3f\t%.3f\t%s\t%s\n' % ( person.id, user[0], user[1], user[2], user[3], user[4], dumps_mobgraph(person.convert2graph()))) ofile.close()
if __name__ == '__main__': class IdCounter(object): ids = set() @staticmethod def count(new_id): IdCounter.ids.add(new_id) return len(IdCounter.ids) counter = MAX_USER_NUM basemap = BSMAP movement = MOVEMENT_DAT print("Extracting motifs ...") motifrepo = Motif() for person in movement_reader(open(movement, 'rb'), BaseStationMap(basemap)): if IdCounter.count(person.id) > counter: break user_graph = seq2graph(person.locations, True) motifrepo.add_graph(user_graph) motifrepo.stat().to_csv('motifs_stat.csv', index=False) print("Plotting motifs ...") motif_filter = range(3, 11) # Global stat all_motifs = motifrepo.all_motifs(motif_filter, True, True) totmotif = motifrepo.number_of_motifs(motif_filter)
def top_compare(): """ Compare top motif and mesostructure """ datapath = "data/mesos0825_s0dot2" movdata = "data/hcl_mesos0825_sample0.2" bsmap = "data/hcl_mesos0825_bm" ofname = os.path.join(datapath, "mesos0825_s0dot2_top") mobgraphs = {} for person in movement_reader(open(movdata), BaseStationMap(bsmap)): if person.which_day() != "0825": continue nn = len(set(person.locations)) if nn > 20: continue if nn not in mobgraphs: mobgraphs[nn] = {} mobgraphs[nn][person.id] = person.convert2graph() new_file = True for C in range(2, 16): for kn in range(1, 5): print C, kn # Read dist matrix for (group, cluster) users fileklab = os.path.join(datapath, "mesos0825_s0dot2_c%d_kn%d" % (C, kn)) distmat = [] i = 0 for line in open(fileklab): if i == 0: uids = [int(i) for i in line.strip("\r\n").split(",")] i == 1 continue distmat.append([float(i) for i in line.strip("\r\n").split(",")]) distmat = np.array(distmat) distvec = distmat.sum(1) / len(uids) uids_sorted = [x for (y, x) in sorted(zip(distvec, uids))] N = len(uids_sorted) print ("Total users %d: " % N) mgs = mobgraphs[C] mesos = Mesos(mgs[uids_sorted[0]], mgs[uids_sorted[1]]) topmesos = mesos.mesos topmesos_sim = 1 - mesos.struct_dist() motifs = {} for i in range(N - 1): u1 = uids_sorted[i] u2 = uids_sorted[i + 1] g1 = mgs[u1] g2 = mgs[u2] mesos = Mesos(g1, g2).mesos found = False for key in motifs.keys(): if isomorphism.is_isomorphic(key, mesos): motifs[key].append((mesos, i)) found = True if found: break if not found: motifs[mesos] = [(mesos, i)] res = [] for key, value in motifs.items(): res.append((len(value), value[0][0])) res = sorted(res, key=lambda x: x[0], reverse=True) topmotif = res[0][1] topmotif_supp = 1.0 * res[0][0] / N if new_file: mode = "wb" new_file = False else: mode = "ab" ofile = open(ofname, mode) ofile.write("%d\t%d" % (C, kn)) ofile.write("\t%.3f\t%.3f" % (topmesos_sim, topmotif_supp)) ofile.write("\t%s" % dumps_mobgraph(topmesos)) ofile.write("\t%s" % dumps_mobgraph(topmotif)) ofile.write("\n") ofile.close()
def trv_distance(): """ Travel distance for clustered users """ datapath = "data/mesos0825_s0dot2" movdata = "data/hcl_mesos0825_sample0.2" bsmap = "data/hcl_mesos0825_bm" ofname = os.path.join(datapath, "mesos0825_s0dot2_trd") travdist = {} mobgraphs = {} for person in movement_reader(open(movdata), BaseStationMap(bsmap)): if person.which_day() != "0825": continue nn = len(set(person.locations)) if nn > 20: continue if nn not in mobgraphs: mobgraphs[nn] = {} mobgraphs[nn][person.id] = person.convert2graph() circle_num = len(person.circles) edge_freq = np.mean(person.freq.values()) trvd = person.travel_dist() rg = person.radius_of_gyration() nloc = len(person.locations) travdist[person.id] = (rg, trvd, edge_freq, circle_num, nloc) new_file = True for C in range(2, 16): for kn in range(1, 5): print C, kn # Read dist matrix for (group, cluster) users fileklab = os.path.join(datapath, "mesos0825_s0dot2_c%d_kn%d" % (C, kn)) distmat = [] i = 0 for line in open(fileklab): if i == 0: uids = [int(i) for i in line.strip("\r\n").split(",")] i == 1 continue distmat.append([float(i) for i in line.strip("\r\n").split(",")]) distmat = np.array(distmat) distvec = distmat.sum(1) / len(uids) uids_sorted = [x for (y, x) in sorted(zip(distvec, uids))] N = len(uids_sorted) print ("Total users %d: " % N) mgs = mobgraphs[C] mesos = Mesos(mgs[uids_sorted[0]], mgs[uids_sorted[1]]) topmesos = mesos.mesos eigndist = np.sum([e[2]["weight"] for e in topmesos.edges(data=True)]) if new_file: mode = "wb" new_file = False else: mode = "ab" ofile = open(ofname, mode) if mode == "wb": ofile.write("uid,group,clust,eigndist,rg,trvd,efreq,circlenum,nloc,dist,selfdist\n") for i in range(0, len(uids)): uid = uids_sorted[i] dist = np.sum(distmat[i]) / len(uids) selfdist = distmat[i][i] rg, trvd, edge_freq, circle_num, nloc = travdist[uid] ofile.write( "%d,%d,%d,%.3f,%.3f,%.3f,%.3f,%d,%d,%.3f,%.3f" % (uid, C, kn, eigndist, rg, trvd, edge_freq, circle_num, nloc, dist, selfdist) ) ofile.write("\n") ofile.close()
class IdCounter(object): ids = set() @staticmethod def count(new_id): IdCounter.ids.add(new_id) return len(IdCounter.ids) counter = MAX_USER_NUM basemap = BSMAP movement = MOVEMENT_DAT print("Extracting motifs ...") motifrepo = Motif() for person in movement_reader(open(movement, 'rb'), BaseStationMap(basemap)): if IdCounter.count(person.id) > counter: break user_graph = seq2graph(person.locations, True) motifrepo.add_graph(user_graph) motifrepo.stat().to_csv('motifs_stat.csv', index=False) print("Plotting motifs ...") motif_filter = range(3, 11) # Global stat all_motifs = motifrepo.all_motifs(motif_filter, True, True) totmotif = motifrepo.number_of_motifs(motif_filter)
def top_compare(): """ Compare top motif and mesostructure """ datapath = 'data/mesos0825_s0dot2' movdata = 'data/hcl_mesos0825_sample0.2' bsmap = 'data/hcl_mesos0825_bm' ofname = os.path.join(datapath, 'mesos0825_s0dot2_top') mobgraphs = {} for person in movement_reader(open(movdata), BaseStationMap(bsmap)): if person.which_day() != '0825': continue nn = len(set(person.locations)) if nn > 20: continue if nn not in mobgraphs: mobgraphs[nn] = {} mobgraphs[nn][person.id] = person.convert2graph() new_file = True for C in range(2, 16): for kn in range(1, 5): print C, kn # Read dist matrix for (group, cluster) users fileklab = os.path.join(datapath, 'mesos0825_s0dot2_c%d_kn%d' % (C, kn)) distmat = [] i = 0 for line in open(fileklab): if i == 0: uids = [int(i) for i in line.strip('\r\n').split(',')] i == 1 continue distmat.append( [float(i) for i in line.strip('\r\n').split(',')]) distmat = np.array(distmat) distvec = distmat.sum(1) / len(uids) uids_sorted = [x for (y, x) in sorted(zip(distvec, uids))] N = len(uids_sorted) print('Total users %d: ' % N) mgs = mobgraphs[C] mesos = Mesos(mgs[uids_sorted[0]], mgs[uids_sorted[1]]) topmesos = mesos.mesos topmesos_sim = 1 - mesos.struct_dist() motifs = {} for i in range(N - 1): u1 = uids_sorted[i] u2 = uids_sorted[i + 1] g1 = mgs[u1] g2 = mgs[u2] mesos = Mesos(g1, g2).mesos found = False for key in motifs.keys(): if isomorphism.is_isomorphic(key, mesos): motifs[key].append((mesos, i)) found = True if found: break if not found: motifs[mesos] = [(mesos, i)] res = [] for key, value in motifs.items(): res.append((len(value), value[0][0])) res = sorted(res, key=lambda x: x[0], reverse=True) topmotif = res[0][1] topmotif_supp = 1.0 * res[0][0] / N if new_file: mode = 'wb' new_file = False else: mode = 'ab' ofile = open(ofname, mode) ofile.write('%d\t%d' % (C, kn)) ofile.write('\t%.3f\t%.3f' % (topmesos_sim, topmotif_supp)) ofile.write('\t%s' % dumps_mobgraph(topmesos)) ofile.write('\t%s' % dumps_mobgraph(topmotif)) ofile.write('\n') ofile.close()
def trv_distance(): """ Travel distance for clustered users """ datapath = 'data/mesos0825_s0dot2' movdata = 'data/hcl_mesos0825_sample0.2' bsmap = 'data/hcl_mesos0825_bm' ofname = os.path.join(datapath, 'mesos0825_s0dot2_trd') travdist = {} mobgraphs = {} for person in movement_reader(open(movdata), BaseStationMap(bsmap)): if person.which_day() != '0825': continue nn = len(set(person.locations)) if nn > 20: continue if nn not in mobgraphs: mobgraphs[nn] = {} mobgraphs[nn][person.id] = person.convert2graph() circle_num = len(person.circles) edge_freq = np.mean(person.freq.values()) trvd = person.travel_dist() rg = person.radius_of_gyration() nloc = len(person.locations) travdist[person.id] = (rg, trvd, edge_freq, circle_num, nloc) new_file = True for C in range(2, 16): for kn in range(1, 5): print C, kn # Read dist matrix for (group, cluster) users fileklab = os.path.join(datapath, 'mesos0825_s0dot2_c%d_kn%d' % (C, kn)) distmat = [] i = 0 for line in open(fileklab): if i == 0: uids = [int(i) for i in line.strip('\r\n').split(',')] i == 1 continue distmat.append( [float(i) for i in line.strip('\r\n').split(',')]) distmat = np.array(distmat) distvec = distmat.sum(1) / len(uids) uids_sorted = [x for (y, x) in sorted(zip(distvec, uids))] N = len(uids_sorted) print('Total users %d: ' % N) mgs = mobgraphs[C] mesos = Mesos(mgs[uids_sorted[0]], mgs[uids_sorted[1]]) topmesos = mesos.mesos eigndist = np.sum( [e[2]['weight'] for e in topmesos.edges(data=True)]) if new_file: mode = 'wb' new_file = False else: mode = 'ab' ofile = open(ofname, mode) if mode == 'wb': ofile.write( 'uid,group,clust,eigndist,rg,trvd,efreq,circlenum,nloc,dist,selfdist\n' ) for i in range(0, len(uids)): uid = uids_sorted[i] dist = np.sum(distmat[i]) / len(uids) selfdist = distmat[i][i] rg, trvd, edge_freq, circle_num, nloc = travdist[uid] ofile.write('%d,%d,%d,%.3f,%.3f,%.3f,%.3f,%d,%d,%.3f,%.3f' % \ (uid, C, kn, eigndist, rg, trvd, edge_freq, circle_num, nloc, dist, selfdist)) ofile.write('\n') ofile.close()