Esempio n. 1
0
def get_zx(start_time, method="single", fname="", Zf=False, **kwas):
    '''
    :param start_time: int indicating the earliest query the window should include
    :param method: the linkage method to be used
    :param fname: string to be appended to end of plot file name
    :param Zf: boolean -> if True, will load from file (see code for file name).
        NOTE: it will save the most recent version you calculated. Make sure the
        right version of the file exists before setting Zf to true
    :param **kwas: keyword arguments for vv.get_svl()
    :return: linkage, dendrogram's output, svl

    computes and plots dendrogram with respect to distance between clients
    '''
    if Zf is False:
        kwas['start_time'] = start_time
        X, fmt, _, ccache = vv.get_svl(**kwas)
        logger.warning("svl len: "+str(len(X)))

        dm = np.zeros((len(X) * (len(X) - 1)) // 2, dtype=np.double)
        k = 0
        for i in xrange(0, len(X)-1):
            for j in xrange(i + 1, len(X)):
                dm[k] = 1.0 - ccache[X[i]][X[j]]
                k = k + 1
        ccache.dump()
        Z = linkage(dm, method)
        df.pickleout(plotsdir+'pickles/'+'Z_'+method+fname+'.pickle', (Z, dm, X))
        logger.warning('dumped Z to ' \
                +plotsdir+'pickles/'+'Z_'+method+fname+'.pickle')
    else:
        Z, dm, X = df.picklein(plotsdir+'pickles/'+'Z_'+method+fname+'.pickle')
        logger.warning('loaded Z from '+plotsdir+'pickles/'+'Z_'+method+fname+'.pickle')
    c, coph_dists = cophenet(Z, dm)

    return Z, X
Esempio n. 2
0
def arrange_self_data(start_time, gap=0, loops=2, **kwas):
    '''
    :param start_time: int indicating the earliest query the window should include
    :param gap: the gap (in seconds) between each iteration's dataset
    :param loops: the number of iterations (datasets)
    :param **kwas: keyword arguments for vv.get_svl()
    '''
    svld = defaultdict(list)  # dict {id: [svl]}
    allsvl = list()
    allfmt = set()
    anssets = defaultdict(set)

    kwas['return_ccache'] = False
    for l in xrange(0, loops):
        kwas['start_time'] = start_time + l * (gap + kwas['duration'])
        svl, fmt2, anssets2 = vv.get_svl(**kwas)
        logger.warning("svl len: " + str(len(svl)))
        allfmt |= set(fmt2)
        for dom in anssets2:
            anssets[dom] |= set(anssets2[dom])
        for i in xrange(0, len(svl)):
            svld[svl[i].get_id()].append(svl[i])
            allsvl.append(svl[i])

    return svld, allsvl, list(allfmt), anssets
Esempio n. 3
0
def inv_hist(start_time, fname="", thresh=.35, **kwas):
    logger.info("getting svl...")
    kwas['start_time'] = start_time
    kwas['return_ccache'] = False
    svl, fmt, __ = vv.get_svl(**kwas)
    logger.info("getting ipsl...")
    ipsl, dompairs = get_ip_sets(svl)
    logger.info("getting pairing counts...")
    pc = vg.get_pairing_counts(ipsl)
    ipcount = len(pc)
    logger.info("building inv. graph...")
    G = vg.build_inv_graph(pc)
    vg.remove_far_edges(G, thresh)
    dd = vg.nodes_by_degree(G)
    vg.remove_degree_below(G, dd, 1)
    weights = [e[2] for e in G.edges_iter(data='weight')]
    cc = list(nx.connected_components(G))
    #print cc
    for c in cc:
        print "****************************"
        print str(len(c))
        print set([dom for ip in c for dom in dompairs[ip]])
        weights = [w for a,b,w in G.edges_iter(c, data='weight')]
        print "median weight: "+str(np.median(weights))
        print "average weight: "+str(np.mean(weights))
    print "num connected comps: "+str(len(cc))
    print "size of connected comps: "+str(np.median([len(z) for z in cc]))

    plt.figure(figsize=(15, 10))
    plt.xlabel('pairwise closeness')
    plt.ylabel('# of pairs (servers)')
    plt.hist(weights, bins=100)
    plt.savefig(plotsdir+fname+'inv_hist.pdf', bbox_inches='tight')
Esempio n. 4
0
def plot_resolver_comparison(start_time, fname="", xlim=[.6, 1.0], rmask=16, **kwas):
    '''
    :param start_time: int indicating the earliest query the window should include
    :param fname: string to be appended to end of plot file name
    :param xlim: x axis limits for plot. Accepts formats: None, [a, b],
    :param rmask: mask for resolver IPs
    :param **kwas: keyword arguments for vv.get_svl()
    :returns: [country, ASN, subnet, prefix] pair dictionaries of closeness lists

    gets pairwise closeness of probes with different descriptors to find odd
    behavior (probes in difference descriptors with high closeness scores)

    NOTE: writes data to files for conveniece
    '''


    print("getting svl...")
    kwas['start_time'] = start_time
    svl, fmt, __, ccache = vv.get_svl(**kwas)
    logger.warning("svl len: "+str(len(svl)))

    nearbies = cc.nearby_probes_diff_ldns(svl, rmask)

    vals = defaultdict(list)
    fmtmask = ipp.make_v4_prefix_mask(rmask)
    for group in nearbies:
        for i in xrange(0, len(group)-1):
            for j in xrange(i+1, len(group)):
                a = group[i]
                b = group[j]
                closeness = ccache[a][b]
                if a.get_ldns() & fmtmask == b.get_ldns() & fmtmask:
                    vals['same LDNS'].append(closeness)
                else:
                    vals['diff LDNS'].append(closeness)
    ccache.dump()

    fig, ax = plt.subplots(1, 1)
    for l in vals:
        ecdf = ECDF(vals[l])
        x = list(ecdf.x)
        y = list(ecdf.y)
        ax.plot(x, y, label=l)
    ps.set_dim(fig, ax, xdim=13, ydim=7.5, xlim=xlim)
    plt.xlabel("pairwise probe closeness")
    plt.ylabel("CDF of pairs")
    lgd = ps.legend_setup(ax, 4, "top center", True)
    filename = plotsdir+"closeness_ldns"+fname
    fig.savefig(filename+'.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
    fig.savefig(filename+'.pdf', bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.close(fig)
Esempio n. 5
0
def get_ansset_sizes(start_time, fname="", **kwas):
    '''
    :param start_time: int indicating the earliest query the window should include
    :param **kwas: keyword arguments for vv.get_svl()
    :return: (m) matrix of client pairs vs domains,
             (fmt) list of domains

    other outputs:
        -> csv with pairs vs domains matrix (m)
        -> csv with list of domain pair correlations (corrs)
        -> csv with list of mean Jaccard for each domain (means)
    '''
    kwas['start_time'] = start_time
    kwas['return_ccache'] = False
    svl, fmt, anssets = vv.get_svl(**kwas)
    anssets = sorted([(z, len(anssets[z])) for z in anssets],
                     key=lambda p: p[1])
    df.overwrite(plotsdir + "big_ansset" + fname + ".csv",
                 df.list2col(anssets))
Esempio n. 6
0
def plot_varying_mc(start_time, fname="", tmin=.75, tmax=1.01, tinc=.025, **kwas):
    '''
    :param start_time: int indicating the earliest query the window should include
    :param fname: string to be appended to end of plot file name
    :param tmin: start of threshold range (e.g., a of np.arange(a, b, c))
    :param tmax: end of threshold range (e.g., b of np.arange(a, b, c))
    :param tinc: the step size of the threshold range (e.g., c of np.arange(a,
        b, c)
    :param **kwas: keyword arguments for vv.get_svl()

    '''

    logger.info("getting svl")
    kwas['start_time'] = start_time
    svl, fmt, __, ccache = vv.get_svl(**kwas)
    mwl = np.arange(tmin, tmax, tinc)
    ccl = vg.get_cc_varying_mc(svl, mwl, ccache)

    fig, ax = plt.subplots(1, 1)
    labels = ['country', 'prefix', 'resolver', 'subnet', 'asn']
    x = mwl
    for label in labels:
        y = [z[label] for z in ccl]
        ax.plot(x,y, label=label)
    ax.set_xlabel('minimum closeness')
    ax.set_ylabel('% of component')

    ax2 = ax.twinx()
    y = [z['quantity'] for z in ccl]
    ax2.plot(x, y, 'k', label='# components')
    ax2.set_ylabel('# components')

    ps.set_dim(fig, ax, xdim=13, ydim=7.5)
    lgd = ps.legend_setup(ax, 5, "top center", True)
    plt.savefig(plotsdir+'components_'+fname+'.pdf', bbox_inches='tight')
    plt.savefig(plotsdir+'components_'+fname+'.png', bbox_inches='tight')
    ccache.dump()
Esempio n. 7
0
def get_domain_matrix(start_time, fname="", **kwas):
    '''
    :param start_time: int indicating the earliest query the window should include
    :param **kwas: keyword arguments for vv.get_svl()
    :return: (m) matrix of client pairs vs domains,
             (fmt) list of domains

    other outputs:
        -> csv with pairs vs domains matrix (m)
        -> csv with list of domain pair correlations (corrs)
        -> csv with list of mean Jaccard for each domain (means)
    '''
    kwas['start_time'] = start_time
    kwas['return_ccache'] = False
    svl, fmt, anssets = vv.get_svl(**kwas)
    print "svl len", len(svl)
    combs = fact(len(svl)) / (fact(2) * fact(len(svl) - 2))
    m = np.zeros((combs, len(fmt)))
    p = 0
    for i in xrange(0, len(svl) - 1):
        a = svl[i]
        logger.warning(str(i) + ", " + str(a.get_id()))
        aset = dict()
        for dom in a:
            aset[dom] = set(a[dom])
        for j in xrange(i + 1, len(svl)):
            b = svl[j]
            for k in xrange(0, len(fmt)):
                dom = fmt[k]
                domtotal = sum([a[dom][z] for z in a[dom]]) + sum(
                    [b[dom][z] for z in b[dom]])
                overlap = aset[dom].intersection(b[dom])
                weight = 0
                for z in overlap:
                    weight += (a[dom][z] + b[dom][z])
                m[p, k] = weight / domtotal
            p += 1

    df.overwrite(plotsdir + "dommatrix" + fname + ".csv",
                 df.list2line(fmt) + "\n")
    df.append(plotsdir + "dommatrix" + fname + ".csv", df.list2col(m))

    C = np.corrcoef(m, rowvar=False)
    corrs = list()
    for i in xrange(0, len(fmt) - 1):
        for j in xrange(i + 1, len(fmt)):
            corrs.append((fmt[i] + "_" + fmt[j], C[i, j]))
    corrs = sorted([y for y in corrs if not math.isnan(y[1])],
                   key=lambda z: z[1])
    means = sorted(zip(fmt, np.mean(m, axis=0)), key=lambda z: z[1])

    df.overwrite(plotsdir + "domcorr" + fname + ".csv", df.list2col(corrs))
    df.overwrite(plotsdir + "dommean" + fname + ".csv", df.list2col(means))

    meand = dict(means)
    # get mean jaccard vs # IPs seen
    mj_ni = [(meand[dom], len(anssets[dom])) for dom in meand]
    d_mj_ni = sorted([(dom, meand[dom], len(anssets[dom])) for dom in meand],
                     key=lambda z: z[1])
    df.overwrite(plotsdir + "jaccard_vs_ipspace" + fname + ".csv",
                 df.list2col(d_mj_ni))

    fig, ax = plt.subplots(1, 1)

    colors = iter(cm.rainbow(np.linspace(0, 1, len(mj_ni))))
    for x, y in mj_ni:
        ax.scatter(x, y, color=next(colors))
    plt.xlabel("mean jaccard")
    plt.ylabel("# IPs observed")
    ax.grid(b=True, which='major', color='b', linestyle='-')
    ps.set_dim(fig, ax, ylog=True)
    filename = plotsdir + "jaccard_vs_ipspace" + fname
    fig.savefig(filename + '.png', bbox_inches='tight')
    fig.savefig(filename + '.pdf', bbox_inches='tight')
    plt.show()
    plt.close(fig)

    return m, fmt
Esempio n. 8
0
def closest_diff_desc(start_time, fname="", xlim=[.6, 1.0], **kwas):
    '''
    :param t: int indicating the earliest query the window should include
    :param fname: string to be appended to end of plot file name
    :returns: [country, ASN, subnet, prefix] pair dictionaries of closeness lists

    gets pairwise closeness of probes with different descriptors to find odd
    behavior (probes in difference descriptors with high closeness scores)

    NOTE: writes data to files for conveniece
    NOTE: accepts vv.get_svl keyword params
    '''
    print("getting svl...")
    kwas['start_time'] = start_time
    svl, __, __, ccache = vv.get_svl(**kwas)
    logger.warning("svl len: " + str(len(svl)))

    print("getting descriptor lists...")
    csvl = vv.country_svl(svl)
    asvl = vv.asn_svl(svl)
    ssvl = vv.subnet_svl(svl)
    psvl = vv.prefix_svl(svl)

    idc = defaultdict(list)
    # {idA_idB:closeness}
    iic = dict()
    # {asnA_asnB: [closeness]}
    ddc = defaultdict(list)
    print("\n\ncalculating closeness for ASNs...")
    asns = [c for c in asvl if len(asvl[c]) > 1]
    for i in xrange(0, len(asns) - 1):
        print(asns[i], end=", ")
        sys.stdout.flush()
        for a in asvl[asns[i]]:
            for j in xrange(i + 1, len(asns)):
                for b in asvl[asns[j]]:
                    closeness = ccache[a][b]
                    ad = str(a.get_asn())
                    bd = str(b.get_asn())
                    aid = str(a.get_id())
                    bid = str(b.get_id())
                    dist = em.latlong_distance_km(a.get_coordinates(),
                                                  b.get_coordinates())
                    dist = distance(closeness, dist)
                    idc[aid + "_" + bd].append((closeness, dist))
                    idc[bid + "_" + ad].append((closeness, dist))
                    iic["_".join(sorted([aid, bid]))] = (closeness, dist)
                    ddc["_".join(sorted([ad, bd]))].append((closeness, dist))
    ccache.dump()


    idac = sorted([(k, np.mean([q[0] for q in idc[k]]), np.mean([q[1] for q in \
            idc[k]])) for k in idc], key=lambda z: z[2], reverse=True)
    idac = [(z[0], z[1]) for z in idac]
    filename = plotsdir + "asn_idac" + fname + ".csv"
    df.overwrite(filename, df.list2col(idac))

    ddac = sorted([(k, np.mean([q[0] for q in ddc[k]]), np.mean([q[1] for q in \
            ddc[k]])) for k in ddc], key=lambda z: z[2], reverse=True)
    ddac = [(z[0], z[1]) for z in ddac]
    filename = plotsdir + "asn_ddac" + fname + ".csv"
    df.overwrite(filename, df.list2col(ddac))

    iic = sorted([(k, iic[k][0], iic[k][1]) for k in iic],
                 reverse=True,
                 key=lambda z: z[2])
    iic = [(z[0], z[1]) for z in iic]
    filename = plotsdir + "asn_iic" + fname + ".csv"
    df.overwrite(filename, df.list2col(iic))

    # {idA_prefixB: [closeness]}
    idc = defaultdict(list)
    # {idA_idB:closeness}
    iic = dict()
    # {prefixA_prefixB: [closeness]}
    ddc = defaultdict(list)
    print("\n\ncalculating closeness for prefixes...")
    prefixes = [c for c in psvl if len(psvl[c]) > 1]
    for i in xrange(0, len(prefixes) - 1):
        print(prefixes[i], end=", ")
        sys.stdout.flush()
        for a in psvl[prefixes[i]]:
            for j in xrange(i + 1, len(prefixes)):
                for b in psvl[prefixes[j]]:
                    closeness = ccache[a][b]
                    ad = str(a.get_prefix())
                    bd = str(b.get_prefix())
                    aid = str(a.get_id())
                    bid = str(b.get_id())
                    dist = em.latlong_distance_km(a.get_coordinates(),
                                                  b.get_coordinates())
                    dist = distance(closeness, dist)
                    idc[aid + "_" + bd].append((closeness, dist))
                    idc[bid + "_" + ad].append((closeness, dist))
                    iic["_".join(sorted([aid, bid]))] = (closeness, dist)
                    ddc["_".join(sorted([ad, bd]))].append((closeness, dist))
    ccache.dump()


    idac = sorted([(k, np.mean([q[0] for q in idc[k]]), np.mean([q[1] for q in \
            idc[k]])) for k in idc], key=lambda z: z[2], reverse=True)
    idac = [(z[0], z[1]) for z in idac]
    filename = plotsdir + "prefix_idac" + fname + ".csv"
    df.overwrite(filename, df.list2col(idac))

    ddac = sorted([(k, np.mean([q[0] for q in ddc[k]]), np.mean([q[1] for q in \
            ddc[k]])) for k in ddc], key=lambda z: z[2], reverse=True)
    ddac = [(z[0], z[1]) for z in ddac]
    filename = plotsdir + "prefix_ddac" + fname + ".csv"
    df.overwrite(filename, df.list2col(ddac))

    iic = sorted([(k, iic[k][0], iic[k][1]) for k in iic],
                 reverse=True,
                 key=lambda z: z[2])
    iic = [(z[0], z[1]) for z in iic]
    filename = plotsdir + "prefix_iic" + fname + ".csv"
    df.overwrite(filename, df.list2col(iic))

    svd = dict()
    for sv in svl:
        svd[sv.get_id()] = sv

    return svd
Esempio n. 9
0
def plot_closeness_same_desc(start_time, duration, fname="", xlim=[.6, 1.0], rmask=16,
        loops=31, **kwas):
    '''
    :param start_time: int indicating the earliest query the window should include
    :param fname: string to be appended to end of plot file name
    :param xlim: x axis limits for plot. Accepts formats: None, [a, b],
    :param rmask: mask for resolver IPs
    :param **kwas: keyword arguments for vv.get_svl()

    for each descriptor (ASN, country, registered prefix, /24 subnet), plot the
    CDF of the pairwise closeness of clients, such that the clients in a pair
    come from the same groups in the descriptor (e.g., same country for the
        country descriptor)

    NOTE: plot 4.1
    '''
    lvals = list()
    cvals = list()
    avals = list()
    svals = list()
    pvals = list()
    kwas['duration'] = duration
    for l in xrange(0, loops):
        print "getting svl..."
        kwas['start_time'] = start_time+duration*l
        svl, fmt, __, ccache = vv.get_svl(**kwas)
        logger.warning("svl len: "+str(len(svl)))

        print "getting descriptor lists..."
        csvl = vv.country_svl(svl)
        asvl = vv.asn_svl(svl)
        ssvl = vv.subnet_svl(svl)
        #osvl = vv.owner_svl(svl)
        psvl = vv.prefix_svl(svl)
        lsvl = vv.ldns_svl(svl, rmask, False)
        fmtmask = ipp.make_v4_prefix_mask(rmask)
        to_remove = [
                '208.67.222.123',   # OpenDNS
                '208.67.220.123',
                '8.8.8.8',          # Google Public DNS
                '8.8.4.4',
                '64.6.64.6',        # Verisign
                '64.6.65.6']
        # remove massive public DNS providers
        for ip in to_remove:
            tmp = ipp.ip2int(ip) & fmtmask
            if tmp in lsvl:
                del lsvl[tmp]

        print "calculating closeness for resolvers..."
        resolvers = lsvl.keys()
        for k in resolvers:
            ksvl = lsvl[k]
            for a in xrange(0, len(ksvl)-1):
                for b in xrange(a+1, len(ksvl)):
                    lvals.append(ccache[ksvl[a]][ksvl[b]])

        print "calculating closeness for countries..."
        countries = csvl.keys()
        for k in countries:
            ksvl = csvl[k]
            for a in xrange(0, len(ksvl)-1):
                for b in xrange(a+1, len(ksvl)):
                    cvals.append(ccache[ksvl[a]][ksvl[b]])
        print "calculating closeness for ASNs..."
        asns = asvl.keys()
        for k in asns:
            ksvl = asvl[k]
            for a in xrange(0, len(ksvl)-1):
                for b in xrange(a+1, len(ksvl)):
                    avals.append(ccache[ksvl[a]][ksvl[b]])
        print "calculating closeness for subnets..."
        subnets = ssvl.keys()
        for k in subnets:
            ksvl = ssvl[k]
            for a in xrange(0, len(ksvl)-1):
                for b in xrange(a+1, len(ksvl)):
                    svals.append(ccache[ksvl[a]][ksvl[b]])
        '''
        print "calculating closeness for owners..."
        ovals = list()
        owners = osvl.keys()
        for k in owners:
            ksvl = osvl[k]
            for a in xrange(0, len(ksvl)-1):
                for b in xrange(a+1, len(ksvl)):
                    ovals.append(ccache[ksvl[a]][ksvl[b]])
        '''
        print "calculating closeness for prefixes..."
        prefixes = psvl.keys()
        for k in prefixes:
            ksvl = psvl[k]
            for a in xrange(0, len(ksvl)-1):
                for b in xrange(a+1, len(ksvl)):
                    pvals.append(ccache[ksvl[a]][ksvl[b]])

    print "plotting..."
    #vals = [cvals, avals, svals, ovals, pvals]
    #labels = ['country', 'ASN', 'subnet', 'owner', 'prefix']
    vals = [cvals, avals, svals, pvals, lvals]
    labels = ['country', 'ASN', 'subnet', 'prefix', 'resolver']

    fig, ax = plt.subplots(1, 1)
    for i in xrange(0, len(vals)):
        print type(vals[i][0])
        print labels[i], "\n"
        print len(vals[i])
        ecdf = ECDF(np.array(vals[i]))
        x = list(ecdf.x)
        y = list(ecdf.y)
        ax.plot(x, y, label=labels[i])
    ps.set_dim(fig, ax, xdim=13, ydim=7.5)
    plt.xlabel("pairwise probe closeness")
    plt.ylabel("CDF of pairs")
    lgd = ps.legend_setup(ax, 4, "top center", True)
    filename = plotsdir+"closeness_same_desc"+fname
    fig.savefig(filename+'.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
    fig.savefig(filename+'.pdf', bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.close(fig)

    print "saving data..."
    for i in xrange(0, len(vals)):
        outstr = df.overwrite(plotsdir+labels[i]+'_same.csv',
                df.list2col(vals[i]))
    ccache.dump()
Esempio n. 10
0
def plot_closeness(start_time, duration, fname="", xlim=[.6, 1.0], loops=15, **kwas):
    '''
    :param start_time: int indicating the earliest query the window should include
    :param fname: string to be appended to end of plot file name
    :param xlim: x axis limits for plot. Accepts formats: None, [a, b],
    :param loops: number of time blocks
    :param **kwas: keyword arguments for vv.get_svl()

    plots:
        1) CDF for pairwise closeness of each pair
        2) CDF for the average pairwise closeness experienced by each probe
        across all other probes

    NOTE: plot 3.1
    '''
    means = defaultdict(list)
    vals = list()
    kwas['duration'] = duration
    for l in xrange(0, loops):
        print "getting svl..."
        kwas['start_time'] = start_time+duration*l
        svl, __, __, ccache = vv.get_svl(**kwas)
        logger.warning("svl len: "+str(len(svl)))
        print len(svl)

        print "calculating closeness for resolvers..."
        for i in xrange(0, len(svl)-1):
            for j in xrange(i + 1, len(svl)):
                vals.append(ccache[svl[i]][svl[j]])
                means[svl[i].get_id()].append(vals[-1])
                means[svl[j].get_id()].append(vals[-1])
        ccache.dump()
        del ccache, svl, __
        gc.collect()

    print "plotting..."
    fig, ax = plt.subplots(1, 1)

    ecdf = ECDF(vals)
    x = list(ecdf.x)
    y = list(ecdf.y)
    ax.plot(x, y, label="pairwise")

    ecdf = ECDF([np.mean(means[z]) for z in means])
    x = list(ecdf.x)
    y = list(ecdf.y)
    ax.plot(x, y, label="average (per client)")

    ps.set_dim(fig, ax, xdim=13, ydim=7.5, xlim=xlim)
    plt.xlabel("pairwise probe closeness")
    plt.ylabel("CDF of pairs")
    lgd = ps.legend_setup(ax, 4, "top center", True)
    filename = plotsdir+"overall_closeness"+fname
    fig.savefig(filename+'.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
    fig.savefig(filename+'.pdf', bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.close(fig)

    print "saving data..."
    df.overwrite(plotsdir+'overall_closeness'+fname+'.csv',
        df.list2col(vals))
    df.overwrite(plotsdir+'overall_avg_closeness'+fname+'.csv',
        df.list2col([(z, np.mean(means[z])) for z in means]))
Esempio n. 11
0
def plot_optimizing_window(start_time, duration, fname="", xlim=None,
        maxdur=90000*15, incr=30000, **kwas):
    '''
    :param start_time: int indicating the earliest query the window should include
    :param fname: string to be appended to end of plot file name
    :param xlim: x axis limits for plot. Accepts formats: None, [a, b],
    :param maxdur: the outer bound of the duration range to be covered
    :param incr: the number of seconds to increment the duration by in each loop
    :param **kwas: keyword arguments for vv.get_svl()

    makes line plot varying the duration (x axis) vs the closeness to one's self
    from a different point in time (e.g., for a 10 second duration, self A would
    be time 0-9, and self B would be time 10-19)
    '''

    allvals = list()
    allbars = list()
    allx = list()
    dur = duration
    kwas['return_ccache'] = False
    while dur < maxdur:
        print "getting svls..."
        kwas['duration'] = dur
        kwas['start_time'] = start_time
        svl, __, __ = vv.get_svl(**kwas)
        logger.warning("svl len: "+str(len(svl)))
        svl1 = dict()
        for sv in svl:
            svl1[sv.id] = sv
        kwas['start_time'] = start_time+dur
        svl, __, __ = vv.get_svl(**kwas)
        logger.warning("svl len: "+str(len(svl)))
        svl2 = dict()
        for sv in svl:
            svl2[sv.id] = sv

        print "calculating closeness for subnets...", dur
        vals = list()
        for pid in svl1:
            if pid in svl2:
                vals.append(vv.closeness(svl1[pid], svl2[pid]))

        allvals.append(np.mean(vals))
        allbars.append(np.std(vals))
        allx.append(float(dur)/(60.0*60.0*8.0))
        dur += incr



    fig, ax = plt.subplots(1, 1)
    ax.errorbar(allx, allvals, yerr=allbars)
    ps.set_dim(fig, ax, xdim=13, ydim=7.5, xlim=xlim)
    plt.xlabel("# 8 hour cycles in block duration")
    plt.ylabel("average self closeness")
    lgd = ps.legend_setup(ax, 4, "top center", True)
    filename = plotsdir+"avg_self_closeness"+fname
    fig.savefig(filename+'.png', bbox_extra_artists=(lgd,), bbox_inches='tight')
    fig.savefig(filename+'.pdf', bbox_extra_artists=(lgd,), bbox_inches='tight')
    plt.close(fig)

    print "saving data..."
    outstr = df.overwrite(plotsdir+fname+'_avg_self_closeness.csv',
            df.list2col(allvals))