def accu_dt(movdata, bsmap, output, log=True):
    """ Distribution of dwelling time for each person
    """
    bsmap = BaseStationMap(bsmap)

    res = {}
    for person in movement_reader(open(movdata, 'rb'), bsmap):
        uid = person.id
        dt = person.accdwelling.values()
        if uid not in res:
            res[uid] = dt
        else:
            res[uid].extend(dt)

    ofile = open(output, 'wb')
    if log is True:
        bins = np.logspace(-2,2,50)
    else:
        bins = np.arange(0,24.5,0.5)

    if log is True:
        ofile.write('#bins np.logspace(-2,2,50)\n')
    else:
        ofile.write('#bins np.arange(0,24.5,0.5)\n')

    for uid in res:
        hist = np.histogram(np.array(res[uid])/3600, bins=bins)[0]
        ofile.write('%d,%s\n' % (uid, ','.join([str(h) for h in hist])))
    ofile.close()
def loc_dt(movdata, bsmap, output, log=True):
    """ Distribution of dwelling time for each person, each location
    """
    bsmap = BaseStationMap(bsmap)

    res = {}
    for person in movement_reader(open(movdata, 'rb'), bsmap):
        uid = person.id
        dt = person.accdwelling
        if uid not in res:
            res[uid] = {}
        for k, v in dt.items():
            if k not in res[uid]:
                res[uid][k] = []
            res[uid][k].append(v)

    ofile = open(output, 'wb')
    if log is True:
        bins = np.logspace(-2,2,50)
    else:
        bins = np.arange(0,24.5,0.5)

    if log is True:
        ofile.write('#bins np.logspace(-2,2,50)\n')
    else:
        ofile.write('#bins np.arange(0,24.5,0.5)\n')

    for uid in res:
        vs = [np.average(v) for k, v in res[uid].items()]
        hist = np.histogram(np.array(vs)/3600, bins=bins)[0]
        ofile.write('%d,%s\n' % (uid, ','.join([str(h) for h in hist])))
    ofile.close()
def opmap_stat():
    movdata = 'data/hcl_mesos0825'
    bsmap = 'data/hcl_mesos0825_bm'
    bsmap = BaseStationMap(bsmap)

    opgraph = load_oppmap(bsmap)
    opnodes = opgraph.nodes(data=True)

    for person in movement_reader(open(movdata, 'rb'), bsmap):
        if person.distinct_loc_num() < 2:
            continue

        rg = person.radius_of_gyration()
        alpha = 1
        delta = 0.05
        locs = person.coordinates
        home = person.coordinates[0]
        max_trd = np.max([
            greate_circle_distance(home[0], home[1], i[0], i[1])
            for i in person.coordinates
        ])

        for i in range(0, len(locs) - 1):
            j = i + 1
            dist = greate_circle_distance(locs[i][0], locs[i][1], locs[j][0],
                                          locs[j][1])

            # all opportunities for current location with radius rg
            person_opnodes = []
            for nn, vv in opnodes:
                if greate_circle_distance(nn[0], nn[1], locs[i][0],
                                          locs[i][1]) <= rg * alpha:
                    person_opnodes.append((nn, vv['weight']))

            aops = []
            iops = []

            for nn, value in person_opnodes:
                gcd = greate_circle_distance(locs[i][0], locs[i][1], nn[0],
                                             nn[1])
                if gcd < dist * (1 - delta):
                    iops.append((nn, value))
                elif gcd >= dist * (1 - delta) and gcd <= dist * (1 + delta):
                    aops.append((nn, value))

            print aops, iops

            avops_total = np.sum([p[1] for p in aops])
            avops_max = np.max([p[1] for p in aops])
            inops_total = np.sum([p[1] for p in iops])
            inops_max = np.max([p[1] for p in iops])

            print person.id, rg, avops_total, inops_total, avops_max, inops_max

        break
def main(sc):
    if len(sys.argv) < 5:
        print >> sys.stderr, \
"""
Usage: mesos-spark <movdata> <bsmap> <output> <dates>
    Note: dates is a comma separeted string list, e.g., 0822,0823
"""
        exit(-1)

    # Read movement records
    movDataRDD = sc.textFile(sys.argv[1])
    bsmap = BaseStationMap(sys.argv[2])
    output = sys.argv[3]
    dates = sys.argv[4].split(',')

    roadnet = RoadNetwork(HZ_ROADNET)
    roadnet = None

    # Extract mobility graphs
    mobgraphRDD = movDataRDD.map(lambda x: record_splitter(x))\
        .groupBy(lambda x: x[0])\
        .flatMap(lambda x: mobility_graphs(x[1], bsmap, roadnet, dates=dates))\
        .cache()

    groups = mobgraphRDD.groupBy(lambda x: x[2]).collect()
    group_dict = {}
    for c, mgs in groups:
        group_dict[c] = mgs

    def self_join(x):
        res = []
        if x[2] in group_dict:
            for i in group_dict[x[2]]:
                if x[0] <= i[0] and x[1] <= i[1]:
                    res.append((x, i))
        return res

    # Extract Mesoses
    # Assume 8 executors, 24 vcores per executor, we
    # get the partition number of multiple of 8 x 24 = 192.
    pairsRDD = mobgraphRDD.keyBy(lambda x: (x))\
        .partitionBy(300)\
        .flatMap(lambda x: self_join(x[1]))\
        .keyBy(lambda x: (x[0], x[1]))\
        .partitionBy(800)\
        .map(lambda x: dump_stat(gen_mesos(x[1][0], x[1][1])))

    pairsRDD.saveAsTextFile(output)
def mobgraph_degree(movdata, bsmap, output):
    """ Node degree of mobility graphs
    """
    nloc = []
    ndgr = []
    bsmap = BaseStationMap(bsmap)

    for person in movement_reader(open(movdata, 'rb'), bsmap):
        if person.distinct_loc_num() < 2:
            continue

        graph = person.convert2graph()
        ndgr.append(np.mean(graph.degree().values()))
        nloc.append(person.distinct_loc_num())

    ofile = open(output, 'wb')
    ofile.write('nloc,ndgr\n')
    ofile.write('\n'.join( ['%d,%.3f' % (x,y) for x, y in zip(nloc, ndgr)]))
def accu_rg(movdata, bsmap, output):
    """ Accumulative R_g over multiple days
    """
    bsmap = BaseStationMap(bsmap)

    dates = {'0820': 0, '0821': 1, '0822': 2, '0823': 3, '0824': 4, '0825': 5, '0826': 6}

    res = {}
    coords = {}
    for person in movement_reader(open(movdata, 'rb'), bsmap):
        uid = person.id
        tdate = person.which_day()
        if tdate not in dates:
            continue

        if uid not in coords:
            coords[uid] = person.coordinates
        else:
            coords[uid].extend(person.coordinates)

        if uid not in res:
            res[uid] = np.empty(7)
            res[uid].fill(-1)

        res[uid][dates[tdate]] = radius_of_gyration(coords[uid])

    res2 = []
    for uid in res:
        v = res[uid]
        v2 = []
        for n in v:
            if n == -1:
                try:
                    v2.append(v2[-1])
                except:
                    v2.append(0)
            else:
                v2.append(n)
        res2.append((uid, v2))
    res2 = sorted(res2, key=lambda x: x[0])

    ofile = open(output, 'wb')
    [ofile.write('%d,%s\n' % (i[0], ','.join(['%.4f' % j for j in i[1]]))) for i in res2]
    ofile.close()
def loc_dt_all(movdata, bsmap, output):
    """ All raw dwelling times for each person, each location
    """
    bsmap = BaseStationMap(bsmap)

    res = {}
    for person in movement_reader(open(movdata, 'rb'), bsmap):
        uid = person.id
        dt = person.accdwelling
        if uid not in res:
            res[uid] = {}
        for k, v in dt.items():
            if k not in res[uid]:
                res[uid][k] = []
            res[uid][k].append(v)

    ofile = open(output, 'wb')
    for uid in res:
        vs = sorted([np.average(v)/3600 for k, v in res[uid].items()], reverse=True)
        ofile.write('%d,%s\n' % (uid, ','.join(['%.3f' % v for v in vs])))
    ofile.close()
def daily_rg(movdata, bsmap, output):
    """ R_g for one day
    """
    bsmap = BaseStationMap(bsmap)

    res = {}
    for person in movement_reader(open(movdata, 'rb'), bsmap):
        uid = person.id
        tdate = person.dtstart.strftime("%m%d")
        rg = person.radius_of_gyration()
        if tdate not in res:
            res[tdate] = []
        res[tdate].append((uid, rg))

    for tdate in res:
        try:
            os.mkdir(output)
        except:
            pass
        ofile = open(os.path.join(output, tdate), 'wb')
        [ofile.write('%d,%.4f\n' % (i[0],i[1])) for i in sorted(res[tdate], key=lambda x: x[0])]
        ofile.close()
Example #9
0
def empirical_data():
    ifname = 'data/hcl_mesos0822_sample0.2'
    bsmap = bsmap = BaseStationMap('data/hcl_mesos0822_bm')
    ofile = open('data/mesos_model_emp_stat2', 'wb')

    for person in movement_reader(open(ifname), bsmap):
        if len(person) < 2:
            continue

        uid = person.id
        rg = person.radius_of_gyration()
        totloc = len(set(person.locations))
        traj = person.coordinates
        trvdist = [greate_circle_distance(traj[i][0], traj[i][1], traj[i+1][0], traj[i+1][1]) for i in range(len(traj)-1)]
        totdist = np.sum(trvdist)

        ofile.write('%d\t%.3f\t%d\t%.3f\t%s\n' % (
            uid, rg, totloc, totdist,
            ','.join(['%.3f' % i for i in trvdist]),
        ))

    ofile.close()
Example #10
0
def validate_selfsim():
    ssfile = 'data/mesos0825_s0dot2/mesos0825_s0dot2_ssmode'
    movdata = 'data/hcl_mesos0825_sample0.2'
    bsmap = 'data/hcl_mesos0825_bm'
    ofname = 'data/mesos0825_s0dot2/mesos0825_s0dot2_ssmode_mg'

    users = {}
    i = 0
    for line in open(ssfile, 'rb'):
        if i == 0:
            i = 1
            continue

        parts = line.strip('\r\n').split(',')
        uid = int(parts[0])
        group = int(parts[1])
        clust = int(parts[2])
        dist = float(parts[3])
        selfdist = float(parts[4])
        mode = str(parts[5])

        users[uid] = (group, clust, dist, selfdist, mode)

    print len(users)

    ofile = open(ofname, 'wb')
    for person in movement_reader(open(movdata), BaseStationMap(bsmap)):
        if person.id not in users or person.distinct_loc_num() < 2:
            continue

        user = users[person.id]
        ofile.write('%d\t%d\t%d\t%.3f\t%.3f\t%s\t%s\n' %
                    (person.id, user[0], user[1], user[2], user[3], user[4],
                     dumps_mobgraph(person.convert2graph())))

    ofile.close()
def main(sc):
    if len(sys.argv) < 3:
        print >> sys.stderr, "Usage: hzstat <movdata> <output>"
        exit(-1)

    # Read movement records
    movDataRDD = sc.textFile(sys.argv[1])
    output = sys.argv[2]

    bsmap = BaseStationMap(BSMAP)

    movDataRDD = movDataRDD.map(lambda x: record_splitter(x))

    sc.parallelize([movDataRDD.count()],
                   1).saveAsTextFile(os.path.join(output, 'totalrecords'))

    sc.parallelize([movDataRDD.map(lambda x: x[2]).distinct().count()],
                   1).saveAsTextFile(os.path.join(output, 'totalbs'))

    # Extract mobility graphs
    mobgraphRDD = movDataRDD.groupBy(lambda x: x[0]).flatMap(
        lambda x: mobility_graphs(x[1], bsmap, None))

    sc.parallelize([mobgraphRDD.count()],
                   1).saveAsTextFile(os.path.join(output, 'totalmgs'))

    sc.parallelize([mobgraphRDD.map(lambda x: x[0]).distinct().count()],
                   1).saveAsTextFile(os.path.join(output, 'totalusers'))

    def group_stat(mgiter):
        totalusers = len(set([i[0] for i in mgiter]))
        totalgraphs = len([i[3] for i in mgiter])
        return (totalusers, totalgraphs)

    mobgraphRDD.groupBy(lambda x: x[2]).mapValues(lambda x: group_stat(x))\
        .saveAsTextFile(os.path.join(output, 'groupstat'))
Example #12
0
def model_maxoppo():
    ofname = 'data/mesos_model_mom_stat'
    ofile = open(ofname, 'wb')

    bsmap = BaseStationMap('data/hcl_mesos0825_bm')
    opmap = load_oppmap(bsmap)
    dtmodel = load_dtmodels()
    nmodel = len(dtmodel)

    TIMEBOUND = 18

    for j in range(1, 1000):
        # User profile
        uid = j
        dtm = dtmodel[np.random.random_integers(nmodel)]
        rhome = gen_random_home(opmap)

        rg = gen_rg.rvs()
        print '%d: %.3f' % (uid, rg)

        traj = [rhome]
        traj_dts = [6]
        acctime = 0
        isvalid = True
        while acctime < TIMEBOUND:
            print acctime

            # determine dwelling time
            dt = random_dt(dtm[0], dtm[1], dtm[2], dtm[3], dtm[4])
            if acctime + dt > TIMEBOUND:
                dt = TIMEBOUND - acctime

            traj_dts.append(dt)
            acctime += dt

            # determine location
            try:
                iops = person_map(traj, rg, opmap)
                maxloc = rhome
                maxdist = 0
                for i in iops:
                    if i != traj[-1]:
                        dist = greate_circle_distance(i[0], i[1], traj[-1][0], traj[-1][1])
                        if dist > maxdist:
                            maxloc = i
                            maxdist = dist
                traj.append(maxloc)
            except:
                isvalid = False

        if not isvalid:
            print isvalid
            continue

        traj.append(rhome)
        traj_dts.append(0)

        # stat
        totloc = len(set(traj))
        dtloc = {}
        for l in range(len(traj)):
            if traj[l] not in dtloc:
                dtloc[traj[l]] = 0
            dtloc[traj[l]] += traj_dts[l]
        trvdist = [greate_circle_distance(traj[i][0], traj[i][1], traj[i+1][0], traj[i+1][1]) for i in range(len(traj)-1)]
        totdist = np.sum(trvdist)

        print trvdist

        ofile.write('%d\t%.3f\t%d\t%.3f\t%s\n' % (
            uid, rg, totloc, totdist,
            ','.join(['%.3f' % i for i in trvdist]),
        ))

    ofile.close()
    class IdCounter(object):
        ids = set()

        @staticmethod
        def count(new_id):
            IdCounter.ids.add(new_id)
            return len(IdCounter.ids)

    counter = MAX_USER_NUM
    basemap = BSMAP
    movement = MOVEMENT_DAT

    print("Extracting motifs ...")
    motifrepo = Motif()
    for person in movement_reader(open(movement, 'rb'),
                                  BaseStationMap(basemap)):

        if IdCounter.count(person.id) > counter:
            break

        user_graph = seq2graph(person.locations, True)
        motifrepo.add_graph(user_graph)

    motifrepo.stat().to_csv('motifs_stat.csv', index=False)

    print("Plotting motifs ...")
    motif_filter = range(3, 11)

    # Global stat
    all_motifs = motifrepo.all_motifs(motif_filter, True, True)
    totmotif = motifrepo.number_of_motifs(motif_filter)
def top_compare():
    """ Compare top motif and mesostructure
    """
    datapath = 'data/mesos0825_s0dot2'
    movdata = 'data/hcl_mesos0825_sample0.2'
    bsmap = 'data/hcl_mesos0825_bm'
    ofname = os.path.join(datapath, 'mesos0825_s0dot2_top')

    mobgraphs = {}
    for person in movement_reader(open(movdata), BaseStationMap(bsmap)):
        if person.which_day() != '0825':
            continue

        nn = len(set(person.locations))
        if nn > 20:
            continue
        if nn not in mobgraphs:
            mobgraphs[nn] = {}

        mobgraphs[nn][person.id] = person.convert2graph()

    new_file = True
    for C in range(2, 16):
        for kn in range(1, 5):

            print C, kn

            # Read dist matrix for (group, cluster) users
            fileklab = os.path.join(datapath,
                                    'mesos0825_s0dot2_c%d_kn%d' % (C, kn))
            distmat = []
            i = 0
            for line in open(fileklab):
                if i == 0:
                    uids = [int(i) for i in line.strip('\r\n').split(',')]
                    i == 1
                    continue
                distmat.append(
                    [float(i) for i in line.strip('\r\n').split(',')])

            distmat = np.array(distmat)
            distvec = distmat.sum(1) / len(uids)
            uids_sorted = [x for (y, x) in sorted(zip(distvec, uids))]

            N = len(uids_sorted)
            print('Total users %d: ' % N)

            mgs = mobgraphs[C]
            mesos = Mesos(mgs[uids_sorted[0]], mgs[uids_sorted[1]])
            topmesos = mesos.mesos
            topmesos_sim = 1 - mesos.struct_dist()

            motifs = {}
            for i in range(N - 1):
                u1 = uids_sorted[i]
                u2 = uids_sorted[i + 1]
                g1 = mgs[u1]
                g2 = mgs[u2]
                mesos = Mesos(g1, g2).mesos
                found = False
                for key in motifs.keys():
                    if isomorphism.is_isomorphic(key, mesos):
                        motifs[key].append((mesos, i))
                        found = True
                    if found:
                        break
                if not found:
                    motifs[mesos] = [(mesos, i)]

            res = []
            for key, value in motifs.items():
                res.append((len(value), value[0][0]))
            res = sorted(res, key=lambda x: x[0], reverse=True)
            topmotif = res[0][1]
            topmotif_supp = 1.0 * res[0][0] / N

            if new_file:
                mode = 'wb'
                new_file = False
            else:
                mode = 'ab'
            ofile = open(ofname, mode)

            ofile.write('%d\t%d' % (C, kn))
            ofile.write('\t%.3f\t%.3f' % (topmesos_sim, topmotif_supp))
            ofile.write('\t%s' % dumps_mobgraph(topmesos))
            ofile.write('\t%s' % dumps_mobgraph(topmotif))
            ofile.write('\n')
            ofile.close()
def trv_distance():
    """ Travel distance for clustered users
    """
    datapath = 'data/mesos0825_s0dot2'
    movdata = 'data/hcl_mesos0825_sample0.2'
    bsmap = 'data/hcl_mesos0825_bm'
    ofname = os.path.join(datapath, 'mesos0825_s0dot2_trd')

    travdist = {}
    mobgraphs = {}
    for person in movement_reader(open(movdata), BaseStationMap(bsmap)):
        if person.which_day() != '0825':
            continue

        nn = len(set(person.locations))
        if nn > 20:
            continue
        if nn not in mobgraphs:
            mobgraphs[nn] = {}

        mobgraphs[nn][person.id] = person.convert2graph()

        circle_num = len(person.circles)
        edge_freq = np.mean(person.freq.values())
        trvd = person.travel_dist()
        rg = person.radius_of_gyration()
        nloc = len(person.locations)
        travdist[person.id] = (rg, trvd, edge_freq, circle_num, nloc)

    new_file = True
    for C in range(2, 16):
        for kn in range(1, 5):

            print C, kn

            # Read dist matrix for (group, cluster) users
            fileklab = os.path.join(datapath,
                                    'mesos0825_s0dot2_c%d_kn%d' % (C, kn))
            distmat = []
            i = 0
            for line in open(fileklab):
                if i == 0:
                    uids = [int(i) for i in line.strip('\r\n').split(',')]
                    i == 1
                    continue
                distmat.append(
                    [float(i) for i in line.strip('\r\n').split(',')])

            distmat = np.array(distmat)
            distvec = distmat.sum(1) / len(uids)
            uids_sorted = [x for (y, x) in sorted(zip(distvec, uids))]

            N = len(uids_sorted)
            print('Total users %d: ' % N)

            mgs = mobgraphs[C]
            mesos = Mesos(mgs[uids_sorted[0]], mgs[uids_sorted[1]])
            topmesos = mesos.mesos
            eigndist = np.sum(
                [e[2]['weight'] for e in topmesos.edges(data=True)])

            if new_file:
                mode = 'wb'
                new_file = False
            else:
                mode = 'ab'
            ofile = open(ofname, mode)
            if mode == 'wb':
                ofile.write(
                    'uid,group,clust,eigndist,rg,trvd,efreq,circlenum,nloc,dist,selfdist\n'
                )

            for i in range(0, len(uids)):
                uid = uids_sorted[i]
                dist = np.sum(distmat[i]) / len(uids)
                selfdist = distmat[i][i]
                rg, trvd, edge_freq, circle_num, nloc = travdist[uid]
                ofile.write('%d,%d,%d,%.3f,%.3f,%.3f,%.3f,%d,%d,%.3f,%.3f' % \
                            (uid, C, kn, eigndist, rg, trvd, edge_freq, circle_num, nloc, dist, selfdist))
                ofile.write('\n')

            ofile.close()