def accu_dt(movdata, bsmap, output, log=True): """ Distribution of dwelling time for each person """ bsmap = BaseStationMap(bsmap) res = {} for person in movement_reader(open(movdata, 'rb'), bsmap): uid = person.id dt = person.accdwelling.values() if uid not in res: res[uid] = dt else: res[uid].extend(dt) ofile = open(output, 'wb') if log is True: bins = np.logspace(-2,2,50) else: bins = np.arange(0,24.5,0.5) if log is True: ofile.write('#bins np.logspace(-2,2,50)\n') else: ofile.write('#bins np.arange(0,24.5,0.5)\n') for uid in res: hist = np.histogram(np.array(res[uid])/3600, bins=bins)[0] ofile.write('%d,%s\n' % (uid, ','.join([str(h) for h in hist]))) ofile.close()
def loc_dt(movdata, bsmap, output, log=True): """ Distribution of dwelling time for each person, each location """ bsmap = BaseStationMap(bsmap) res = {} for person in movement_reader(open(movdata, 'rb'), bsmap): uid = person.id dt = person.accdwelling if uid not in res: res[uid] = {} for k, v in dt.items(): if k not in res[uid]: res[uid][k] = [] res[uid][k].append(v) ofile = open(output, 'wb') if log is True: bins = np.logspace(-2,2,50) else: bins = np.arange(0,24.5,0.5) if log is True: ofile.write('#bins np.logspace(-2,2,50)\n') else: ofile.write('#bins np.arange(0,24.5,0.5)\n') for uid in res: vs = [np.average(v) for k, v in res[uid].items()] hist = np.histogram(np.array(vs)/3600, bins=bins)[0] ofile.write('%d,%s\n' % (uid, ','.join([str(h) for h in hist]))) ofile.close()
def opmap_stat(): movdata = 'data/hcl_mesos0825' bsmap = 'data/hcl_mesos0825_bm' bsmap = BaseStationMap(bsmap) opgraph = load_oppmap(bsmap) opnodes = opgraph.nodes(data=True) for person in movement_reader(open(movdata, 'rb'), bsmap): if person.distinct_loc_num() < 2: continue rg = person.radius_of_gyration() alpha = 1 delta = 0.05 locs = person.coordinates home = person.coordinates[0] max_trd = np.max([ greate_circle_distance(home[0], home[1], i[0], i[1]) for i in person.coordinates ]) for i in range(0, len(locs) - 1): j = i + 1 dist = greate_circle_distance(locs[i][0], locs[i][1], locs[j][0], locs[j][1]) # all opportunities for current location with radius rg person_opnodes = [] for nn, vv in opnodes: if greate_circle_distance(nn[0], nn[1], locs[i][0], locs[i][1]) <= rg * alpha: person_opnodes.append((nn, vv['weight'])) aops = [] iops = [] for nn, value in person_opnodes: gcd = greate_circle_distance(locs[i][0], locs[i][1], nn[0], nn[1]) if gcd < dist * (1 - delta): iops.append((nn, value)) elif gcd >= dist * (1 - delta) and gcd <= dist * (1 + delta): aops.append((nn, value)) print aops, iops avops_total = np.sum([p[1] for p in aops]) avops_max = np.max([p[1] for p in aops]) inops_total = np.sum([p[1] for p in iops]) inops_max = np.max([p[1] for p in iops]) print person.id, rg, avops_total, inops_total, avops_max, inops_max break
def main(sc): if len(sys.argv) < 5: print >> sys.stderr, \ """ Usage: mesos-spark <movdata> <bsmap> <output> <dates> Note: dates is a comma separeted string list, e.g., 0822,0823 """ exit(-1) # Read movement records movDataRDD = sc.textFile(sys.argv[1]) bsmap = BaseStationMap(sys.argv[2]) output = sys.argv[3] dates = sys.argv[4].split(',') roadnet = RoadNetwork(HZ_ROADNET) roadnet = None # Extract mobility graphs mobgraphRDD = movDataRDD.map(lambda x: record_splitter(x))\ .groupBy(lambda x: x[0])\ .flatMap(lambda x: mobility_graphs(x[1], bsmap, roadnet, dates=dates))\ .cache() groups = mobgraphRDD.groupBy(lambda x: x[2]).collect() group_dict = {} for c, mgs in groups: group_dict[c] = mgs def self_join(x): res = [] if x[2] in group_dict: for i in group_dict[x[2]]: if x[0] <= i[0] and x[1] <= i[1]: res.append((x, i)) return res # Extract Mesoses # Assume 8 executors, 24 vcores per executor, we # get the partition number of multiple of 8 x 24 = 192. pairsRDD = mobgraphRDD.keyBy(lambda x: (x))\ .partitionBy(300)\ .flatMap(lambda x: self_join(x[1]))\ .keyBy(lambda x: (x[0], x[1]))\ .partitionBy(800)\ .map(lambda x: dump_stat(gen_mesos(x[1][0], x[1][1]))) pairsRDD.saveAsTextFile(output)
def mobgraph_degree(movdata, bsmap, output): """ Node degree of mobility graphs """ nloc = [] ndgr = [] bsmap = BaseStationMap(bsmap) for person in movement_reader(open(movdata, 'rb'), bsmap): if person.distinct_loc_num() < 2: continue graph = person.convert2graph() ndgr.append(np.mean(graph.degree().values())) nloc.append(person.distinct_loc_num()) ofile = open(output, 'wb') ofile.write('nloc,ndgr\n') ofile.write('\n'.join( ['%d,%.3f' % (x,y) for x, y in zip(nloc, ndgr)]))
def accu_rg(movdata, bsmap, output): """ Accumulative R_g over multiple days """ bsmap = BaseStationMap(bsmap) dates = {'0820': 0, '0821': 1, '0822': 2, '0823': 3, '0824': 4, '0825': 5, '0826': 6} res = {} coords = {} for person in movement_reader(open(movdata, 'rb'), bsmap): uid = person.id tdate = person.which_day() if tdate not in dates: continue if uid not in coords: coords[uid] = person.coordinates else: coords[uid].extend(person.coordinates) if uid not in res: res[uid] = np.empty(7) res[uid].fill(-1) res[uid][dates[tdate]] = radius_of_gyration(coords[uid]) res2 = [] for uid in res: v = res[uid] v2 = [] for n in v: if n == -1: try: v2.append(v2[-1]) except: v2.append(0) else: v2.append(n) res2.append((uid, v2)) res2 = sorted(res2, key=lambda x: x[0]) ofile = open(output, 'wb') [ofile.write('%d,%s\n' % (i[0], ','.join(['%.4f' % j for j in i[1]]))) for i in res2] ofile.close()
def loc_dt_all(movdata, bsmap, output): """ All raw dwelling times for each person, each location """ bsmap = BaseStationMap(bsmap) res = {} for person in movement_reader(open(movdata, 'rb'), bsmap): uid = person.id dt = person.accdwelling if uid not in res: res[uid] = {} for k, v in dt.items(): if k not in res[uid]: res[uid][k] = [] res[uid][k].append(v) ofile = open(output, 'wb') for uid in res: vs = sorted([np.average(v)/3600 for k, v in res[uid].items()], reverse=True) ofile.write('%d,%s\n' % (uid, ','.join(['%.3f' % v for v in vs]))) ofile.close()
def daily_rg(movdata, bsmap, output): """ R_g for one day """ bsmap = BaseStationMap(bsmap) res = {} for person in movement_reader(open(movdata, 'rb'), bsmap): uid = person.id tdate = person.dtstart.strftime("%m%d") rg = person.radius_of_gyration() if tdate not in res: res[tdate] = [] res[tdate].append((uid, rg)) for tdate in res: try: os.mkdir(output) except: pass ofile = open(os.path.join(output, tdate), 'wb') [ofile.write('%d,%.4f\n' % (i[0],i[1])) for i in sorted(res[tdate], key=lambda x: x[0])] ofile.close()
def empirical_data(): ifname = 'data/hcl_mesos0822_sample0.2' bsmap = bsmap = BaseStationMap('data/hcl_mesos0822_bm') ofile = open('data/mesos_model_emp_stat2', 'wb') for person in movement_reader(open(ifname), bsmap): if len(person) < 2: continue uid = person.id rg = person.radius_of_gyration() totloc = len(set(person.locations)) traj = person.coordinates trvdist = [greate_circle_distance(traj[i][0], traj[i][1], traj[i+1][0], traj[i+1][1]) for i in range(len(traj)-1)] totdist = np.sum(trvdist) ofile.write('%d\t%.3f\t%d\t%.3f\t%s\n' % ( uid, rg, totloc, totdist, ','.join(['%.3f' % i for i in trvdist]), )) ofile.close()
def validate_selfsim(): ssfile = 'data/mesos0825_s0dot2/mesos0825_s0dot2_ssmode' movdata = 'data/hcl_mesos0825_sample0.2' bsmap = 'data/hcl_mesos0825_bm' ofname = 'data/mesos0825_s0dot2/mesos0825_s0dot2_ssmode_mg' users = {} i = 0 for line in open(ssfile, 'rb'): if i == 0: i = 1 continue parts = line.strip('\r\n').split(',') uid = int(parts[0]) group = int(parts[1]) clust = int(parts[2]) dist = float(parts[3]) selfdist = float(parts[4]) mode = str(parts[5]) users[uid] = (group, clust, dist, selfdist, mode) print len(users) ofile = open(ofname, 'wb') for person in movement_reader(open(movdata), BaseStationMap(bsmap)): if person.id not in users or person.distinct_loc_num() < 2: continue user = users[person.id] ofile.write('%d\t%d\t%d\t%.3f\t%.3f\t%s\t%s\n' % (person.id, user[0], user[1], user[2], user[3], user[4], dumps_mobgraph(person.convert2graph()))) ofile.close()
def main(sc): if len(sys.argv) < 3: print >> sys.stderr, "Usage: hzstat <movdata> <output>" exit(-1) # Read movement records movDataRDD = sc.textFile(sys.argv[1]) output = sys.argv[2] bsmap = BaseStationMap(BSMAP) movDataRDD = movDataRDD.map(lambda x: record_splitter(x)) sc.parallelize([movDataRDD.count()], 1).saveAsTextFile(os.path.join(output, 'totalrecords')) sc.parallelize([movDataRDD.map(lambda x: x[2]).distinct().count()], 1).saveAsTextFile(os.path.join(output, 'totalbs')) # Extract mobility graphs mobgraphRDD = movDataRDD.groupBy(lambda x: x[0]).flatMap( lambda x: mobility_graphs(x[1], bsmap, None)) sc.parallelize([mobgraphRDD.count()], 1).saveAsTextFile(os.path.join(output, 'totalmgs')) sc.parallelize([mobgraphRDD.map(lambda x: x[0]).distinct().count()], 1).saveAsTextFile(os.path.join(output, 'totalusers')) def group_stat(mgiter): totalusers = len(set([i[0] for i in mgiter])) totalgraphs = len([i[3] for i in mgiter]) return (totalusers, totalgraphs) mobgraphRDD.groupBy(lambda x: x[2]).mapValues(lambda x: group_stat(x))\ .saveAsTextFile(os.path.join(output, 'groupstat'))
def model_maxoppo(): ofname = 'data/mesos_model_mom_stat' ofile = open(ofname, 'wb') bsmap = BaseStationMap('data/hcl_mesos0825_bm') opmap = load_oppmap(bsmap) dtmodel = load_dtmodels() nmodel = len(dtmodel) TIMEBOUND = 18 for j in range(1, 1000): # User profile uid = j dtm = dtmodel[np.random.random_integers(nmodel)] rhome = gen_random_home(opmap) rg = gen_rg.rvs() print '%d: %.3f' % (uid, rg) traj = [rhome] traj_dts = [6] acctime = 0 isvalid = True while acctime < TIMEBOUND: print acctime # determine dwelling time dt = random_dt(dtm[0], dtm[1], dtm[2], dtm[3], dtm[4]) if acctime + dt > TIMEBOUND: dt = TIMEBOUND - acctime traj_dts.append(dt) acctime += dt # determine location try: iops = person_map(traj, rg, opmap) maxloc = rhome maxdist = 0 for i in iops: if i != traj[-1]: dist = greate_circle_distance(i[0], i[1], traj[-1][0], traj[-1][1]) if dist > maxdist: maxloc = i maxdist = dist traj.append(maxloc) except: isvalid = False if not isvalid: print isvalid continue traj.append(rhome) traj_dts.append(0) # stat totloc = len(set(traj)) dtloc = {} for l in range(len(traj)): if traj[l] not in dtloc: dtloc[traj[l]] = 0 dtloc[traj[l]] += traj_dts[l] trvdist = [greate_circle_distance(traj[i][0], traj[i][1], traj[i+1][0], traj[i+1][1]) for i in range(len(traj)-1)] totdist = np.sum(trvdist) print trvdist ofile.write('%d\t%.3f\t%d\t%.3f\t%s\n' % ( uid, rg, totloc, totdist, ','.join(['%.3f' % i for i in trvdist]), )) ofile.close()
class IdCounter(object): ids = set() @staticmethod def count(new_id): IdCounter.ids.add(new_id) return len(IdCounter.ids) counter = MAX_USER_NUM basemap = BSMAP movement = MOVEMENT_DAT print("Extracting motifs ...") motifrepo = Motif() for person in movement_reader(open(movement, 'rb'), BaseStationMap(basemap)): if IdCounter.count(person.id) > counter: break user_graph = seq2graph(person.locations, True) motifrepo.add_graph(user_graph) motifrepo.stat().to_csv('motifs_stat.csv', index=False) print("Plotting motifs ...") motif_filter = range(3, 11) # Global stat all_motifs = motifrepo.all_motifs(motif_filter, True, True) totmotif = motifrepo.number_of_motifs(motif_filter)
def top_compare(): """ Compare top motif and mesostructure """ datapath = 'data/mesos0825_s0dot2' movdata = 'data/hcl_mesos0825_sample0.2' bsmap = 'data/hcl_mesos0825_bm' ofname = os.path.join(datapath, 'mesos0825_s0dot2_top') mobgraphs = {} for person in movement_reader(open(movdata), BaseStationMap(bsmap)): if person.which_day() != '0825': continue nn = len(set(person.locations)) if nn > 20: continue if nn not in mobgraphs: mobgraphs[nn] = {} mobgraphs[nn][person.id] = person.convert2graph() new_file = True for C in range(2, 16): for kn in range(1, 5): print C, kn # Read dist matrix for (group, cluster) users fileklab = os.path.join(datapath, 'mesos0825_s0dot2_c%d_kn%d' % (C, kn)) distmat = [] i = 0 for line in open(fileklab): if i == 0: uids = [int(i) for i in line.strip('\r\n').split(',')] i == 1 continue distmat.append( [float(i) for i in line.strip('\r\n').split(',')]) distmat = np.array(distmat) distvec = distmat.sum(1) / len(uids) uids_sorted = [x for (y, x) in sorted(zip(distvec, uids))] N = len(uids_sorted) print('Total users %d: ' % N) mgs = mobgraphs[C] mesos = Mesos(mgs[uids_sorted[0]], mgs[uids_sorted[1]]) topmesos = mesos.mesos topmesos_sim = 1 - mesos.struct_dist() motifs = {} for i in range(N - 1): u1 = uids_sorted[i] u2 = uids_sorted[i + 1] g1 = mgs[u1] g2 = mgs[u2] mesos = Mesos(g1, g2).mesos found = False for key in motifs.keys(): if isomorphism.is_isomorphic(key, mesos): motifs[key].append((mesos, i)) found = True if found: break if not found: motifs[mesos] = [(mesos, i)] res = [] for key, value in motifs.items(): res.append((len(value), value[0][0])) res = sorted(res, key=lambda x: x[0], reverse=True) topmotif = res[0][1] topmotif_supp = 1.0 * res[0][0] / N if new_file: mode = 'wb' new_file = False else: mode = 'ab' ofile = open(ofname, mode) ofile.write('%d\t%d' % (C, kn)) ofile.write('\t%.3f\t%.3f' % (topmesos_sim, topmotif_supp)) ofile.write('\t%s' % dumps_mobgraph(topmesos)) ofile.write('\t%s' % dumps_mobgraph(topmotif)) ofile.write('\n') ofile.close()
def trv_distance(): """ Travel distance for clustered users """ datapath = 'data/mesos0825_s0dot2' movdata = 'data/hcl_mesos0825_sample0.2' bsmap = 'data/hcl_mesos0825_bm' ofname = os.path.join(datapath, 'mesos0825_s0dot2_trd') travdist = {} mobgraphs = {} for person in movement_reader(open(movdata), BaseStationMap(bsmap)): if person.which_day() != '0825': continue nn = len(set(person.locations)) if nn > 20: continue if nn not in mobgraphs: mobgraphs[nn] = {} mobgraphs[nn][person.id] = person.convert2graph() circle_num = len(person.circles) edge_freq = np.mean(person.freq.values()) trvd = person.travel_dist() rg = person.radius_of_gyration() nloc = len(person.locations) travdist[person.id] = (rg, trvd, edge_freq, circle_num, nloc) new_file = True for C in range(2, 16): for kn in range(1, 5): print C, kn # Read dist matrix for (group, cluster) users fileklab = os.path.join(datapath, 'mesos0825_s0dot2_c%d_kn%d' % (C, kn)) distmat = [] i = 0 for line in open(fileklab): if i == 0: uids = [int(i) for i in line.strip('\r\n').split(',')] i == 1 continue distmat.append( [float(i) for i in line.strip('\r\n').split(',')]) distmat = np.array(distmat) distvec = distmat.sum(1) / len(uids) uids_sorted = [x for (y, x) in sorted(zip(distvec, uids))] N = len(uids_sorted) print('Total users %d: ' % N) mgs = mobgraphs[C] mesos = Mesos(mgs[uids_sorted[0]], mgs[uids_sorted[1]]) topmesos = mesos.mesos eigndist = np.sum( [e[2]['weight'] for e in topmesos.edges(data=True)]) if new_file: mode = 'wb' new_file = False else: mode = 'ab' ofile = open(ofname, mode) if mode == 'wb': ofile.write( 'uid,group,clust,eigndist,rg,trvd,efreq,circlenum,nloc,dist,selfdist\n' ) for i in range(0, len(uids)): uid = uids_sorted[i] dist = np.sum(distmat[i]) / len(uids) selfdist = distmat[i][i] rg, trvd, edge_freq, circle_num, nloc = travdist[uid] ofile.write('%d,%d,%d,%.3f,%.3f,%.3f,%.3f,%d,%d,%.3f,%.3f' % \ (uid, C, kn, eigndist, rg, trvd, edge_freq, circle_num, nloc, dist, selfdist)) ofile.write('\n') ofile.close()