def checkcache(itemname, filename='importvars/asn.cache', incachedir='importvars/mycache'): ''' checks cached asns; else, does a new query and stores result in cache''' retval = '?' cachedir = df.rightdir(df.getdir(__file__) + incachedir) if len(itemname) >= 7 and '.' in itemname: tmpfname = cachedir + itemname.split('.')[0] + '.cache' data = defaultdict(str, df.pjsonin(tmpfname)) tmpval = data[itemname] if df.isint(tmpval) or tmpval == 'NA': retval = tmpval elif tmpval == str(): asn = None try: asn = af.asn_lookup(itemname) except: pass if asn is None: data[itemname] = '?' else: data[itemname] = asn retval = asn df.pjsonout(tmpfname, dict(data), 'w') return retval
def dump(self): if self.changed: invals = self.hash cache = self.indirect_cache logger.warning("dumping, DO NOT ctrl-C!") if invals not in cache: cache[invals] = df.rightdir(statedir + "pickles") + "closeness" + str( time.time()) + ".pickle" df.pickleout(self.f, cache) df.pickleout(cache[invals], self.cache) logger.warning("safe to ctrl-C") self.changed = False time.sleep(2) if self.hits + self.misses > 0: logger.debug("hit rate: "+ \ str(float(self.hits)/float(self.hits+self.misses)) \ +", total hits: "+str(self.hits))
import logging import logging.config logging.config.fileConfig('logging.conf', disable_existing_loggers=False) # create logger logger = logging.getLogger(__name__) logger.debug(__name__ + "logger loaded") ################################################################## # GLOBAL AND SETUP ################################################################## # paths basedir = df.getdir(__file__) + '../' statedir = df.rightdir(basedir + 'state/') rawdirlist = df.getlines(basedir + 'state/datapaths.list') datafiles = df.listfiles(basedir + rawdirlist[0], fullpath=True) plotsdir = df.rightdir(basedir + "plots/") ################################################################## # CODE ################################################################## def get_ansset_sizes(start_time, fname="", **kwas): ''' :param start_time: int indicating the earliest query the window should include :param **kwas: keyword arguments for vv.get_svl() :return: (m) matrix of client pairs vs domains, (fmt) list of domains
# database setup mclient = MongoClient() db = mclient.veracity pdata = db.probe_data # fields that are specific to probes pfields = ['country', 'asn_v4', 'asn_v6', 'geometry', 'probe_ip'] ################################################################## # GLOBAL AND SETUP ################################################################## # paths basedir = df.getdir(__file__)+'../' statedir = df.rightdir(basedir+"state/") # external parameters params = df.getlines(statedir+'params') creation_key = params[0] destruction_key = params[1] ################################################################## # CODE ################################################################## def get_measurements(filters): ''' see https://atlas.ripe.net/docs/api/v2/reference/#!/measurements/ for
import logging import logging.config logging.config.fileConfig('logging.conf', disable_existing_loggers=False) # create logger logger = logging.getLogger(__name__) logger.debug(__name__ + "logger loaded") ################################################################## # GLOBAL AND SETUP ################################################################## # paths basedir = df.getdir(__file__) + '../' statedir = df.rightdir(basedir + 'state/') rawdirlist = df.getlines(basedir + 'state/datapaths.list') datafiles = df.listfiles(basedir + rawdirlist[0], fullpath=True) plotsdir = df.rightdir(basedir + "plots/") ccachef = df.rightdir(statedir + "pickles/") + "ccache.pickle" ################################################################## # CODE ################################################################## def distance(c, d): # c: closeness, d: physical distance (km) return math.sqrt(c**2 + math.log(max([d, 1.0]), 40075)**2) def closest_diff_desc(start_time, fname="", xlim=[.6, 1.0], **kwas):
def get_big_svl(start_time, duration=30000, mask=32, fmt=None, country_set=None, oddballs=True, maxmissing=0, return_ccache=True, ccachef=df.rightdir(statedir + "pickles/") + "ccache.pickle", mindomsize=1): ''' see get_svl() ''' dur = 60 * 60 * 24 end_time = start_time + duration anssets = defaultdict(set) myfmt = set() svl, tmp_fmt, tmp_anssets = get_svl(start_time, duration=dur, mask=mask, fmt=None, country_set=country_set, oddballs=oddballs, maxmissing=1000, return_ccache=False, ccachef=ccachef, mindomsize=1) svld = dict() for sv in svl: svld[sv.get_id()] = sv for dom in tmp_anssets.keys(): anssets[dom] |= tmp_anssets[dom] myfmt |= set(tmp_fmt) start_time += dur while start_time + dur <= end_time: tmp_svl, tmp_fmt, tmp_anssets = get_svl(start_time, duration=dur, mask=mask, fmt=None, country_set=country_set, oddballs=oddballs, maxmissing=1000, return_ccache=False, ccachef=ccachef, mindomsize=1) for dom in tmp_anssets.keys(): anssets[dom] |= tmp_anssets[dom] myfmt |= set(tmp_fmt) for sv in tmp_svl: svid = sv.get_id() if svid in svld: svld[svid].absorb(sv) else: svld[svid] = sv start_time += dur svl = svld.values() if fmt is None: fmt = list(myfmt) for dom in fmt: if len(anssets[dom]) < mindomsize \ or len([sv for sv in svl if dom not in sv]) > 0.5*len(svl): del anssets[dom] svl = reduce_svl(svl, fmt, maxmissing) if return_ccache: ccache = init_ccache(None, ccachef, start_time, duration, mask, fmt, oddballs, maxmissing) return svl, fmt, dict(anssets), ccache else: return svl, fmt, dict(anssets)
def get_svl(start_time, duration=30000, mask=32, fmt=None, country_set=None, oddballs=True, maxmissing=0, return_ccache=True, ccachef=df.rightdir(statedir + "pickles/") + "ccache.pickle", mindomsize=2): ''' :param t: int indicating the earliest query the window should include :param duration: int indication the span of time covered by the window, in seconds :param mask: int, prefix mask to use over domain IPs :param fmt: see transform fmt :param country_set: the set of countries the window should include queries from. If None, then all countries will be inluded :param oddballs: if True, will include non-public IPs (10.x.x.x, etc); if False, will only include public IPs :return: list of smartvecs ''' # if the duration is too long, mongo times out the cursor before we can # finish processing, so we should break it into smaller jobs with # get_big_svl() if duration > 24 * 60 * 60: logger.warning( "using get_big_svl(), which assumes that 'duration' is a multple of 24 hours" ) return get_big_svl(start_time, duration, mask, fmt, country_set, oddballs, maxmissing, return_ccache, ccachef, mindomsize) logger.info("->window...") window = get_window(start_time, duration, country_set=country_set, domain_set=fmt) logger.info("->svl...") svl, doms, anssets = dicts_to_svl(window, mask, oddballs) logger.debug(str(doms)) fmt = transform_fmt(fmt, doms) # remove any domains that only have 1 IP (since all nodes will see the # same thing) for dom in fmt: if len(anssets[dom]) < mindomsize \ or len([sv for sv in svl if dom not in sv]) > 0.5*len(svl): del anssets[dom] fmt = sorted(list(set(anssets.keys()).intersection(set(fmt)))) svl = reduce_svl(svl, fmt, maxmissing) for dom in fmt: logger.debug("-----------" + dom + "-----------") tmp = sorted(anssets[dom]) for val in tmp: if type(val) is int: logger.debug(ipp.int2ip(val)) if return_ccache: ccache = init_ccache(None, ccachef, start_time, duration, mask, fmt, oddballs, maxmissing) return svl, fmt, dict(anssets), ccache else: return svl, fmt, dict(anssets)
import logging import logging.config logging.config.fileConfig('logging.conf', disable_existing_loggers=False) # create logger logger = logging.getLogger(__name__) logger.debug(__name__ + "logger loaded") ################################################################## # GLOBAL AND SETUP ################################################################## # paths basedir = df.getdir(__file__) + '../' statedir = df.rightdir(basedir + "state/") rawdirlist = df.getlines(basedir + 'state/datapaths.list') datafiles = df.listfiles(basedir + rawdirlist[0], fullpath=True) vngr.set_cache_dir(df.rightdir(statedir + "pickles")) # database setup mclient = MongoClient() db = mclient.veracity data = db.m30002_may17_full pdata = db.probe_data #printing pp = pprint.PrettyPrinter(indent=4) # vector processor vproc = df.picklein(statedir + 'nmf_rank20_pkr01.pickle')
def set_cache_dir(f): global cachedir cachedir = f global datadir datadir = df.rightdir(cachedir + "cache/")
# LOGGING ################################################################## import logging import logging.config logging.config.fileConfig('logging.conf', disable_existing_loggers=False) # create logger logger = logging.getLogger(__name__) logger.debug(__name__ + "logger loaded") ################################################################## # GLOBAL AND SETUP ################################################################## basedir = df.rightdir(os.getcwd()) cachedir = basedir # manually change this in the file that is using vinegar datadir = df.rightdir(cachedir + "cache/") ################################################################## # CODE ################################################################## def set_cache_dir(f): global cachedir cachedir = f global datadir datadir = df.rightdir(cachedir + "cache/")
logging.config.fileConfig('logging.conf', disable_existing_loggers=False) # create logger logger = logging.getLogger(__name__) logger.debug(__name__ + "logger loaded") ################################################################## # GLOBAL AND SETUP ################################################################## # paths basedir = df.getdir(__file__) + '../' rawdirlist = df.getlines(basedir + 'state/datapaths.list') datafiles = df.listfiles(basedir + rawdirlist[0], fullpath=True) plotsdir = df.rightdir(basedir + "plots/") ################################################################## # CODE ################################################################## def add_closeness_edges(svl, ccache, G=None, minweight=0): ''' :param svl: list of smartvecs :param ccache: closeness_cache :param G: preexisting graph :result: graph builds graph using pairwise closeness as edge weights '''