def process_file(raw_path, file_name): exe_path = os.path.join(PE_DIR, "%s.exe" % (file_name, )) print "raw_file", raw_path print "exe_path", exe_path is_pe = pe_extract(raw_path, exe_path) if not is_pe: print "This is NOT a PE file! Skipping..." return # If we are really dealing with a PE file sha1, md5, file_size = get_file_hashes(exe_path) dump_id, corrupt_pe = db_pe_dumps(raw_path, sha1, md5, file_size) # query VT Process(target=process_timeout, args=(db_virus_total, (dump_id, ), VT_TIMEOUT)).start() if vts_config == "manual": # attempt to re-download the file "manually" Process(target=process_timeout, args=(manual_download, (sha1, ), MD_TIMEOUT)).start() ip2asn(dump_id) get_feature_vector(dump_id) classify_dump(dump_id) Process(target=db_syslog, args=(dump_id, )).start() sha1_path = os.path.join(PE_DIR, "%s.exe" % (sha1, )) md5_path = os.path.join(PE_DIR, "%s.exe" % (md5, )) shutil.move(exe_path, sha1_path) print "sha1_path", sha1_path print "md5_path", md5_path if not os.path.exists(md5_path): print "os.path.exists(md5_path)", os.path.exists(md5_path) os.symlink("%s.exe" % (sha1, ), md5_path) print "Done processing file: %s" % (raw_path, )
def process_file(raw_path, file_name): exe_path = os.path.join(PE_DIR, "%s.exe" % (file_name,)) print "raw_file", raw_path print "exe_path", exe_path is_pe = pe_extract(raw_path, exe_path) if not is_pe: print "This is NOT a PE file! Skipping..." return # If we are really dealing with a PE file sha1, md5, file_size = get_file_hashes(exe_path) dump_id, corrupt_pe = db_pe_dumps(raw_path, sha1, md5, file_size) # query VT Process(target=process_timeout, args=(db_virus_total, (dump_id,), VT_TIMEOUT)).start() if vts_config == "manual": # attempt to re-download the file "manually" Process(target=process_timeout, args=(manual_download, (sha1,), MD_TIMEOUT)).start() ip2asn(dump_id) get_feature_vector(dump_id) classify_dump(dump_id) Process(target=db_syslog, args=(dump_id,)).start() sha1_path = os.path.join(PE_DIR, "%s.exe" % (sha1,)) md5_path = os.path.join(PE_DIR, "%s.exe" % (md5,)) shutil.move(exe_path, sha1_path) print "sha1_path", sha1_path print "md5_path", md5_path if not os.path.exists(md5_path): print "os.path.exists(md5_path)", os.path.exists(md5_path) os.symlink("%s.exe" % (sha1,), md5_path) print "Done processing file: %s" % (raw_path,)
def __init__(self, fnames, target_asns, ip2asn_db="data/rib.20180401.pickle", ip2asn_ixp="data/ixs_201901.jsonl", output_directory="graphs/test/", af=4): """fnames: list of traceroute files target_asns: output graphs for these ASNs ip2asn_db: pickle file for the ip2asn module ip2asn_ixp: IXP info for ip2asn module""" self.fnames = fnames self.target_asns = [int(asn) for asn in target_asns.split(',')] self.i2a = ip2asn.ip2asn(ip2asn_db, ip2asn_ixp) self.graph = nx.Graph() self.vinicity_asns = defaultdict(set) self.routers_asn = {} self.ttls = defaultdict(list) self.sizes = defaultdict(list) self.af = af self.output_directory = output_directory if not self.output_directory.endswith('/'): self.output_directory += '/' if not os.path.exists(self.output_directory): os.makedirs(self.output_directory) self.periphery_size = 2 print('Loading bdrmapit results...') self.bdrmapit = bdrmapit.bdrmapit()
def processInit(): global db global i2a client = pymongo.MongoClient("mongodb-iijlab", connect=True) db = client.atlas i2a = ip2asn.ip2asn("../lib/ip2asn/db/rib.20180401.pickle", "../lib/ixs_201802.jsonl")
def scan(fin): """ Read a series of passive DNS entries from a file. Each line should be a JSON object in the ISC-SIE format. Any filelike object should work. """ start_time = time.time() line_count = 0 for line in fin: line_count += 1 if DEBUG_MODE and line_count % DEBUG_INTERVAL == 0: log('{} lines processed in {:.2f} seconds'.format( line_count, time.time() - start_time)) if isinstance( line, bytes): # Fix for gzip returning bytes instead of a string js = json.loads(line.decode('utf-8')) else: js = json.loads(line) packet_type = js['type'] if 'type' in js else None query_type = js['qtype'] if 'qtype' in js else None # Process A records if packet_type == 'UDP_QUERY_RESPONSE' and query_type == 1: # Process query (domain) query = js['qname'] url = fix_url(query) url_parts = etld.split(url) if url_parts: domain = '.'.join(url_parts) else: continue # Invalid domain name, skip # Process response (ip) response = js['response_ip'] ip = response if '.' in ip: asn = ip2asn.ip2asn(ip) else: continue # Invalid IP (likely IPv6), skip # Ignore anything that's no in the Alexa Top N if not alexa.is_top_n(domain, FILTER_TOP_DOMAINS): continue # Store any interesting information # : domain -> asn -> ip -> count domain_to_asn[domain][asn][ip] += 1 # : asn -> set(domain) asn_to_domain[asn].add(domain)
def scan(fin): """ Read a series of passive DNS entries from a file. Each line should be a JSON object in the ISC-SIE format. Any filelike object should work. """ start_time = time.time() line_count = 0 for line in fin: line_count += 1 if DEBUG_MODE and line_count % DEBUG_INTERVAL == 0: log('{} lines processed in {:.2f} seconds'.format(line_count, time.time() - start_time)) if isinstance(line, bytes): # Fix for gzip returning bytes instead of a string js = json.loads(line.decode('utf-8')) else: js = json.loads(line) packet_type = js['type'] if 'type' in js else None query_type = js['qtype'] if 'qtype' in js else None # Process A records if packet_type == 'UDP_QUERY_RESPONSE' and query_type == 1: # Process query (domain) query = js['qname'] url = fix_url(query) url_parts = etld.split(url) if url_parts: domain = '.'.join(url_parts) else: continue # Invalid domain name, skip # Process response (ip) response = js['response_ip'] ip = response if '.' in ip: asn = ip2asn.ip2asn(ip) else: continue # Invalid IP (likely IPv6), skip # Ignore anything that's no in the Alexa Top N if not alexa.is_top_n(domain, FILTER_TOP_DOMAINS): continue # Store any interesting information # : domain -> asn -> ip -> count domain_to_asn[domain][asn][ip] += 1 # : asn -> set(domain) asn_to_domain[asn].add(domain)
def detectRttChangesMongo(expId=None): streaming = False replay = False nbProcesses = 12 binMult = 3 # number of bins = binMult*nbProcesses pool = Pool(nbProcesses, initializer=processInit) #, maxtasksperchild=binMult) client = pymongo.MongoClient("mongodb-iijlab") db = client.atlas detectionExperiments = db.rttExperiments alarmsCollection = db.rttChanges if expId == "stream": expParam = detectionExperiments.find_one({"stream": True}) expId = expParam["_id"] if expId is None: expParam = { "timeWindow": 60 * 60, # in seconds "start": datetime(2016, 11, 1, 0, 0, tzinfo=timezone("UTC")), "end": datetime(2016, 11, 26, 0, 0, tzinfo=timezone("UTC")), "alpha": 0.01, "confInterval": 0.05, "minASN": 3, "minASNEntropy": 0.5, "minSeen": 3, "experimentDate": datetime.now(), "af": "", "comment": "Study case for Emile (8.8.8.8) Nov. 2016", "prefixes": None } expId = detectionExperiments.insert_one(expParam).inserted_id sampleMediandiff = {} else: # streaming mode: analyze what happened in the last time bin streaming = True now = datetime.now(timezone("UTC")) expParam = detectionExperiments.find_one({"_id": expId}) if replay: expParam["start"] = expParam["end"] expParam["end"] = expParam["start"] + timedelta(hours=1) else: expParam["start"] = datetime( now.year, now.month, now.day, now.hour, 0, 0, tzinfo=timezone("UTC")) - timedelta(hours=1) expParam["end"] = datetime(now.year, now.month, now.day, now.hour, 0, 0, tzinfo=timezone("UTC")) expParam["analysisTimeUTC"] = now resUpdate = detectionExperiments.replace_one({"_id": expId}, expParam) if resUpdate.modified_count != 1: print "Problem happened when updating the experiment dates!" print resUpdate return sys.stdout.write("Loading previous reference...") try: fi = open("saved_references/%s_%s.pickle" % (expId, "diffRTT"), "rb") sampleMediandiff = pickle.load(fi) except IOError: sampleMediandiff = {} sys.stdout.write("done!\n") if not expParam["prefixes"] is None: expParam["prefixes"] = re.compile(expParam["prefixes"]) probe2asn = {} probeip2asn = {} lastAlarms = [] i2a = ip2asn.ip2asn("../lib/ip2asn/db/rib.20180401.pickle", "../lib/ixs_201802.jsonl") start = int(calendar.timegm(expParam["start"].timetuple())) end = int(calendar.timegm(expParam["end"].timetuple())) for currDate in range(start, end, int(expParam["timeWindow"])): sys.stdout.write("Rtt analysis %s" % datetime.utcfromtimestamp(currDate)) tsS = time.time() # Get distributions for the current time bin c = datetime.utcfromtimestamp(currDate) col = "traceroute%s_%s_%02d_%02d" % (expParam["af"], c.year, c.month, c.day) if expParam["prefixes"] is None: totalRows = db[col].count({ "timestamp": { "$gte": currDate, "$lt": currDate + expParam["timeWindow"] } }) else: totalRows = db[col].count({ "timestamp": { "$gte": currDate, "$lt": currDate + expParam["timeWindow"] }, "result.result.from": expParam["prefixes"] }) if not totalRows: print "No data for that time bin!" continue params = [] limit = int(totalRows / (nbProcesses * binMult - 1)) skip = range(0, totalRows, limit) for i, val in enumerate(skip): params.append( (expParam["af"], currDate, currDate + expParam["timeWindow"], val, limit, expParam["prefixes"])) diffRtt = defaultdict(dict) nbRow = 0 rttResults = pool.imap_unordered(computeRtt, params) diffRtt, nbRow = mergeRttResults(rttResults, currDate, tsS, nbProcesses * binMult) # Detect oulier values lastAlarms = outlierDetection(diffRtt, sampleMediandiff, expParam, expId, datetime.utcfromtimestamp(currDate), probe2asn, i2a, alarmsCollection, streaming, probeip2asn) timeSpent = (time.time() - tsS) sys.stdout.write(", %s sec/bin, %s row/sec\r" % (timeSpent, float(nbRow) / timeSpent)) pool.close() pool.join() # Update results on the webserver if streaming: # update ASN table conn_string = "host='psqlserver' dbname='ihr'" # get a connection, if a connect cannot be made an exception will be raised here conn = psycopg2.connect(conn_string) cursor = conn.cursor() asnList = set(probeip2asn.values()) cursor.execute("SELECT number FROM ihr_asn WHERE tartiflette=TRUE") registeredAsn = set([x[0] for x in cursor.fetchall()]) for asn in asnList: #cursor.execute("INSERT INTO ihr_asn (number, name, tartiflette) VALUES (%s, %s, %s) \ # ON CONFLICT (number) DO UPDATE SET tartiflette = TRUE;", (int(asn), asname, True)) asname = i2a.asn2name(asn) if int(asn) not in registeredAsn: cursor.execute( """do $$ begin insert into ihr_asn(number, name, tartiflette, disco, ashash) values(%s, %s, TRUE, FALSE, FALSE); exception when unique_violation then update ihr_asn set tartiflette = TRUE where number = %s; end $$;""", (asn, asname, asn)) # push alarms to the webserver for alarm in lastAlarms: ts = alarm["timeBin"] + timedelta(seconds=expParam["timeWindow"] / 2) for ip in alarm["ipPair"]: cursor.execute( "INSERT INTO ihr_delay_alarms (asn_id, timebin, ip, link, \ medianrtt, nbprobes, diffmedian, deviation) VALUES (%s, %s, %s, \ %s, %s, %s, %s, %s) RETURNING id", (probeip2asn[ip], ts, ip, alarm["ipPair"], alarm["median"], alarm["nbProbes"], alarm["diffMed"], alarm["devBound"])) # Push measurement and probes ID corresponding to this alarm alarmid = cursor.fetchone()[0] for msmid, probes in alarm["msmId"].iteritems(): if not msmid is None: for probeid in probes: cursor.execute( "INSERT INTO ihr_delay_alarms_msms(alarm_id, msmid, probeid) \ VALUES (%s, %s, %s)", (alarmid, msmid, probeid)) # compute magnitude mag = computeMagnitude(asnList, datetime.utcfromtimestamp(currDate), expId, alarmsCollection) for asn in asnList: cursor.execute( "INSERT INTO ihr_delay (asn_id, timebin, magnitude, deviation, label) \ VALUES (%s, %s, %s, %s, %s)", (asn, expParam["start"] + timedelta(seconds=expParam["timeWindow"] / 2), mag[asn], 0, "")) conn.commit() cursor.close() conn.close() print "Cleaning rtt change reference." sampleMediandiff = cleanRef(sampleMediandiff, datetime.utcfromtimestamp(currDate)) print "Writing diffRTT reference to file system." fi = open("saved_references/%s_diffRTT.pickle" % (expId), "w") pickle.dump(sampleMediandiff, fi, 2)
def computeMagnitude(asnList, timebin, expId, collection, tau=5, metric="devBound", historySize=7 * 24, minPeriods=0): # Retrieve alarms starttime = timebin - timedelta(hours=historySize) endtime = timebin cursor = collection.aggregate([ { "$match": { "expId": expId, "timeBin": { "$gt": starttime, "$lte": timebin }, "diffMed": { "$gt": 1 }, } }, { "$project": { "ipPair": 1, "timeBin": 1, "devBound": 1, } }, { "$unwind": "$ipPair" }, ]) df = pd.DataFrame(list(cursor)) df["timeBin"] = pd.to_datetime(df["timeBin"], utc=True) df.set_index("timeBin") if "asn" not in df.columns: # find ASN for each ip i2a = ip2asn.ip2asn("../lib/ip2asn/db/rib.20180401.pickle", "../lib/ixs_201802.jsonl") fct = functools.partial(i2a.ip2asn) sTmp = df["ipPair"].apply(fct).apply(pd.Series) df["asn"] = sTmp[0] magnitudes = {} for asn in asnList: dfb = pd.DataFrame( { u'devBound': 0.0, u'timeBin': starttime, u'asn': asn, }, index=[starttime]) dfe = pd.DataFrame({ u'devBound': 0.0, u'timeBin': endtime, u'asn': asn }, index=[endtime]) dfasn = pd.concat([dfb, df[df["asn"] == asn], dfe]) grp = dfasn.groupby("timeBin") grpSum = grp.sum().resample("1H").sum() mad = lambda x: np.median( np.fabs(pd.notnull(x) - np.median(pd.notnull(x)))) magnitudes[asn] = (grpSum[metric][-1] - grpSum[metric].median()) / ( 1 + 1.4826 * mad(grpSum[metric])) return magnitudes
def process_file(raw_path, file_name): file_type, file_path, file_extension = extract_file(raw_path) print "raw_file:", raw_path print "file_path:", file_path if not file_type: print "This is NOT a file of interest! " print "Removing raw data from disk:", raw_path # remove the related raw file os.remove(raw_path) print "Removed!" return print "file_type:", file_type # If we are really dealing with a PE file sha1, md5, file_size = get_file_hashes(file_path) dump_id, corrupt_pe, host, client, server = db_file_dumps( raw_path, sha1, md5, file_size, file_type) skip_classification = False score = None # check if we have already recently classified the same md5 dump from the same host md5_cache_key = md5 if host is not None: md5_cache_key += '-' + host if md5_cache_key in md5host_cache.keys(): md5host_cache[md5_cache_key]['count'] += 1 if md5host_cache[md5_cache_key]['count'] > MAX_MD5_CACHE_COUNT: # do not classify again! retrieve cached score skip_classification = True score = md5host_cache[md5_cache_key][ 'score'] # get the last cached score print "MD5 CACHE: will use previous score : %s %s %s %s" % ( dump_id, md5, host, score) elif not corrupt_pe: md5host_cache[md5_cache_key] = {'count': 1, 'score': None} # check if we have already recently classified several dumps from the same host,client,server hostcs_cache_key = '' if host is not None: hostcs_cache_key += host hostcs_cache_key += '-' + client hostcs_cache_key += '-' + server if hostcs_cache_key in hostcs_cache.keys(): hostcs_cache[hostcs_cache_key]['count'] += 1 if hostcs_cache[hostcs_cache_key]['count'] > MAX_HOSTCS_CACHE_COUNT: # do not classify again! retrieve cached score skip_classification = True if score is None: score = hostcs_cache[hostcs_cache_key][ 'score'] # get the last cached score print "HOSTCS CACHE: will use previous score : %s %s %s %s" % ( dump_id, host, server, score) elif not corrupt_pe: hostcs_cache[hostcs_cache_key] = {'count': 1, 'score': None} if not corrupt_pe and (not skip_classification or score is None): ip2asn(dump_id) get_feature_vector(dump_id, file_type) score = classify_dump(dump_id) md5host_cache[md5_cache_key]['score'] = score # update cached score hostcs_cache[hostcs_cache_key]['score'] = score # update cached score # query VT Process(target=process_timeout, args=(db_virus_total, (dump_id, ), VT_TIMEOUT)).start() if vts_config == "manual": # attempt to re-download the file "manually" Process(target=process_timeout, args=(manual_download, sha1, MD_TIMEOUT)).start() if not corrupt_pe: if score is None: print "ERROR : None score : this should not happen! dump_id=", dump_id if skip_classification and not score is None: update_score(dump_id, score) print "Syslog score = %s (dump_id=%s)" % (score, dump_id) Process(target=db_syslog, args=(dump_id, score)).start() sha1_path = os.path.join(FILES_DIR, "%s.%s" % (sha1, file_extension)) md5_path = os.path.join(FILES_DIR, "%s.%s" % (md5, file_extension)) shutil.move(file_path, sha1_path) print "sha1_path", sha1_path print "md5_path", md5_path if not os.path.exists(md5_path): os.symlink("%s.%s" % (sha1, file_extension), md5_path) print "Done processing file: %s" % (raw_path, )
def main(self): """ Main program connecting all modules. """ try: # Saver initialisation saver_queue = Queue() try: saver_module = importlib.import_module(self.saver) # These are run in a separate process saver = saver_module.Saver(self.saver_filename, saver_queue) saver.start() except ImportError: logging.error("Saver unknown! ({})".format(self.saver)) traceback.print_exc(file=sys.stdout) return # Detector initialisation if self.detection_enabled: self.detector_pipe = Pipe(False) detector = AnomalyDetector(self.detector_pipe[0], saver_queue) detector.start() # Time Track initialisation sys.path.append(self.ip2asn_dir) import ip2asn i2a = ip2asn.ip2asn(self.ip2asn_db, self.ip2asn_ixp) try: timetrac_module = importlib.import_module("timetrack."+self.timetrack_converter) timetrackconverter = timetrac_module.TimeTrackConverter(i2a) except ImportError: logging.error("Timetrack converter unknown! ({})".format(self.timetrack_converter)) traceback.print_exc(file=sys.stdout) return # Aggregator initialisation tm = TracksAggregator(self.tm_window_size, self.tm_significance_level, self.tm_min_tracks) saver_queue.put(("experiment", [datetime.datetime.now(), str(sys.argv), str(self.config.sections())])) # Reader initialisation try: reader_module = importlib.import_module(self.reader) tr_reader = reader_module.Reader(self.atlas_start, self.atlas_stop, timetrackconverter, self.atlas_msm_ids, self.atlas_probe_ids, chunk_size=self.atlas_chunk_size) # tr_reader = DumpReader(dump_name, dump_filter) except ImportError: logging.error("Reader unknown! ({})".format(self.reader)) traceback.print_exc(file=sys.stdout) return # # Main Loop: with tr_reader: for track in tr_reader.read(): if not track: continue aggregates = tm.add_track(track) if aggregates: self.save_aggregates(saver_queue, aggregates) logging.info("Finished to read data {}".format(datetime.datetime.today())) # Try to get results from remaining track bins aggregates = tm.aggregate(force_expiration=0.5) self.save_aggregates(saver_queue, aggregates) logging.info("Number of ignored tracks {}".format(tm.nb_ignored_tracks)) # closing saver_queue.put("MAIN_FINISHED") saver.join() # saver.terminate() if self.detection_enabled: detector.terminate() logging.info("Ended on {}".format(datetime.datetime.today())) except Exception as e: print("type error: " + str(e)) print(traceback.format_exc())
def detectRouteChangesMongo( expId=None, configFile="detection.cfg"): # TODO config file implementation streaming = False replay = False nbProcesses = 18 binMult = 3 # number of bins = binMult*nbProcesses pool = Pool(nbProcesses, initializer=processInit) #, maxtasksperchild=binMult) client = pymongo.MongoClient("mongodb-iijlab") db = client.atlas detectionExperiments = db.routeExperiments alarmsCollection = db.routeChanges refRoutes = None if expId == "stream": expParam = detectionExperiments.find_one({"stream": True}) expId = expParam["_id"] if expId is None: expParam = { "timeWindow": 60 * 60, # in seconds "start": datetime(2016, 11, 15, 0, 0, tzinfo=timezone("UTC")), "end": datetime(2016, 11, 26, 0, 0, tzinfo=timezone("UTC")), "alpha": 0.01, # parameter for exponential smoothing "minCorr": -0.25, # correlation scores lower than this value will be reported "minSeen": 3, "minASN": 3, "minASNEntropy": 0.5, "af": "", "experimentDate": datetime.now(), "comment": "Study case for Emile (8.8.8.8) Nov. 2016", } expId = detectionExperiments.insert_one(expParam).inserted_id refRoutes = defaultdict(routeCountRef) else: # Streaming mode: analyze the last time bin streaming = True now = datetime.now(timezone("UTC")) expParam = detectionExperiments.find_one({"_id": expId}) if replay: expParam["start"] = expParam["end"] expParam["end"] = expParam["start"] + timedelta(hours=1) else: expParam["start"] = datetime( now.year, now.month, now.day, now.hour, 0, 0, tzinfo=timezone("UTC")) - timedelta(hours=1) expParam["end"] = datetime(now.year, now.month, now.day, now.hour, 0, 0, tzinfo=timezone("UTC")) expParam["analysisTimeUTC"] = now expParam["minASN"] = 3 expParam["minASNEntropy"] = 0.5 resUpdate = detectionExperiments.replace_one({"_id": expId}, expParam) if resUpdate.modified_count != 1: print "Problem happened when updating the experiment dates!" print resUpdate return sys.stdout.write("Loading previous reference...") try: fi = open("saved_references/%s_%s.pickle" % (expId, "routeChange"), "rb") refRoutes = pickle.load(fi) except IOError: sys.stdout.write("corrupted file!?") refRoutes = defaultdict(routeCountRef) sys.stdout.write("done!\n") probe2asn = {} start = int(calendar.timegm(expParam["start"].timetuple())) end = int(calendar.timegm(expParam["end"].timetuple())) nbIteration = 0 sys.stdout.write("Route analysis:\n") for currDate in range(start, end, int(expParam["timeWindow"])): tsS = time.time() # count packet routes for the current time bin params = [] binEdges = np.linspace(currDate, currDate + expParam["timeWindow"], nbProcesses * binMult + 1) for i in range(nbProcesses * binMult): params.append((expParam["af"], binEdges[i], binEdges[i + 1])) nbRow = 0 routes = pool.imap_unordered(countRoutes, params) routes, nbRow = mergeRoutes(routes, currDate, tsS, nbProcesses * binMult) print "size before params: %s" % len(refRoutes) # Detect route changes params = [] for target, newRoutes in routes.iteritems(): params.append( (newRoutes, refRoutes[target], expParam, expId, datetime.utcfromtimestamp(currDate), target, probe2asn)) print "size after params: %s" % len(refRoutes) mapResult = pool.imap_unordered(routeChangeDetection, params) # Update the reference for target, newRef in mapResult: refRoutes[target] = newRef print "size after analysis: %s" % len(refRoutes) if nbRow > 0: nbIteration += 1 # Update results on the webserver if streaming: i2a = ip2asn.ip2asn("../lib/ip2asn/db/rib.20180401.pickle", "../lib/ixs_201802.jsonl") # update ASN table conn_string = "host='psqlserver' dbname='ihr'" # get a connection, if a connect cannot be made an exception will be raised here conn = psycopg2.connect(conn_string) cursor = conn.cursor() cursor.execute( "SELECT number, name FROM ihr_asn WHERE tartiflette=TRUE;") asnList = cursor.fetchall() probeip2asn = {} # compute magnitude mag, alarms = computeMagnitude(asnList, datetime.utcfromtimestamp(currDate), expId, probeip2asn, alarmsCollection, i2a) for asn, asname in asnList: cursor.execute( "INSERT INTO ihr_forwarding (asn_id, timebin, magnitude, resp, label) \ VALUES (%s, %s, %s, %s, %s)", (int(asn), expParam["start"] + timedelta(seconds=expParam["timeWindow"] / 2), mag[asn], 0, "")) conn.commit() # push alarms to the webserver ts = expParam["start"] + timedelta(seconds=expParam["timeWindow"] / 2) for alarm in alarms: if alarm["asn"] in mag: cursor.execute( "INSERT INTO ihr_forwarding_alarms (asn_id, timebin, ip, \ correlation, responsibility, pktdiff, previoushop ) VALUES (%s, %s, %s, \ %s, %s, %s, %s) RETURNING id", (alarm["asn"], ts, alarm["ip"], alarm["correlation"], alarm["responsibility"], alarm["pktDiff"], alarm["previousHop"])) # Push measurement and probes ID corresponding to this alarm alarmid = cursor.fetchone()[0] for msmid, probes in alarm["msmId"].iteritems(): if not msmid is None: for probeid in probes: cursor.execute( "INSERT INTO ihr_forwarding_alarms_msms(alarm_id, msmid, probeid) \ VALUES (%s, %s, %s)", (alarmid, msmid, probeid)) conn.commit() cursor.close() conn.close() pool.close() pool.join() sys.stdout.write("\n") print "Writing route change reference to file system." fi = open("saved_references/%s_routeChange.pickle" % (expId), "w") pickle.dump(refRoutes, fi, 2)
import os import logging import sys from matplotlib import pylab as plt plt.switch_backend('agg') import glob import pickle import networkx as nx from collections import defaultdict, Counter from itertools import chain import multiprocessing sys.path.append("../ip2asn") import ip2asn ia = ip2asn.ip2asn("../ip2asn/db/rib.20180701.pickle") esteban_results_directory = "20181001_BGPcount" def asnres(ip): """Find the ASN corresponding to the given IP address""" asn = ia.ip2asn(ip) if asn == "unknown": asn = "0" return str(asn) def validation(events, ts=1505287800, prefix="84.205.67.0/24"): """Validate SSL results using traceroute data.
import bz2 import json from ripe.atlas.sagan import DnsResult import logging logging.basicConfig() logging.getLogger().setLevel(logging.ERROR) import sys sys.path.append('../ip2asn/') import ip2asn i2a = ip2asn.ip2asn('../ip2asn/db/rib.20200601.pickle.bz2') builtin_msmid = [30001, 30002] fname = sys.argv[1] date = fname.partition('-')[2].rpartition('.')[0] output_fname = f'data/parsed_results_{date}.json' output = [] with bz2.open(fname, 'rb') as fp: for line in fp: line_json = json.loads(line) result = DnsResult(line_json) for response in result.responses: # Skip if something's wrong if (response.is_error or response.is_malformed or not response.destination_address or not response.abuf): continue