Example #1
0
def process_file(raw_path, file_name):
    exe_path = os.path.join(PE_DIR, "%s.exe" % (file_name, ))
    print "raw_file", raw_path
    print "exe_path", exe_path
    is_pe = pe_extract(raw_path, exe_path)
    if not is_pe:
        print "This is NOT a PE file! Skipping..."
        return

    # If we are really dealing with a PE file
    sha1, md5, file_size = get_file_hashes(exe_path)
    dump_id, corrupt_pe = db_pe_dumps(raw_path, sha1, md5, file_size)

    # query VT
    Process(target=process_timeout,
            args=(db_virus_total, (dump_id, ), VT_TIMEOUT)).start()
    if vts_config == "manual":  # attempt to re-download the file "manually"
        Process(target=process_timeout,
                args=(manual_download, (sha1, ), MD_TIMEOUT)).start()

    ip2asn(dump_id)
    get_feature_vector(dump_id)
    classify_dump(dump_id)
    Process(target=db_syslog, args=(dump_id, )).start()
    sha1_path = os.path.join(PE_DIR, "%s.exe" % (sha1, ))
    md5_path = os.path.join(PE_DIR, "%s.exe" % (md5, ))
    shutil.move(exe_path, sha1_path)
    print "sha1_path", sha1_path
    print "md5_path", md5_path
    if not os.path.exists(md5_path):
        print "os.path.exists(md5_path)", os.path.exists(md5_path)
        os.symlink("%s.exe" % (sha1, ), md5_path)
    print "Done processing file: %s" % (raw_path, )
Example #2
0
def process_file(raw_path, file_name):
    exe_path = os.path.join(PE_DIR, "%s.exe" % (file_name,))
    print "raw_file", raw_path
    print "exe_path", exe_path
    is_pe = pe_extract(raw_path, exe_path)
    if not is_pe:
        print "This is NOT a PE file! Skipping..."
        return

    # If we are really dealing with a PE file
    sha1, md5, file_size = get_file_hashes(exe_path)
    dump_id, corrupt_pe = db_pe_dumps(raw_path, sha1, md5, file_size)

    # query VT
    Process(target=process_timeout, args=(db_virus_total, (dump_id,), VT_TIMEOUT)).start()
    if vts_config == "manual":  # attempt to re-download the file "manually"
        Process(target=process_timeout, args=(manual_download, (sha1,), MD_TIMEOUT)).start()

    ip2asn(dump_id)
    get_feature_vector(dump_id)
    classify_dump(dump_id)
    Process(target=db_syslog, args=(dump_id,)).start()
    sha1_path = os.path.join(PE_DIR, "%s.exe" % (sha1,))
    md5_path = os.path.join(PE_DIR, "%s.exe" % (md5,))
    shutil.move(exe_path, sha1_path)
    print "sha1_path", sha1_path
    print "md5_path", md5_path
    if not os.path.exists(md5_path):
        print "os.path.exists(md5_path)", os.path.exists(md5_path)
        os.symlink("%s.exe" % (sha1,), md5_path)
    print "Done processing file: %s" % (raw_path,)
Example #3
0
    def __init__(self, fnames, target_asns, ip2asn_db="data/rib.20180401.pickle",
                 ip2asn_ixp="data/ixs_201901.jsonl", output_directory="graphs/test/",
                 af=4):
        """fnames: list of traceroute files
        target_asns: output graphs for these ASNs
        ip2asn_db: pickle file for the ip2asn module
        ip2asn_ixp: IXP info for ip2asn module"""

        self.fnames = fnames
        self.target_asns = [int(asn) for asn in target_asns.split(',')]
        self.i2a = ip2asn.ip2asn(ip2asn_db, ip2asn_ixp)
        self.graph = nx.Graph()
        self.vinicity_asns = defaultdict(set)
        self.routers_asn = {}
        self.ttls = defaultdict(list)
        self.sizes = defaultdict(list)
        self.af = af
        self.output_directory = output_directory

        if not self.output_directory.endswith('/'):
            self.output_directory += '/'

        if not os.path.exists(self.output_directory):
            os.makedirs(self.output_directory)

        self.periphery_size = 2

        print('Loading bdrmapit results...')
        self.bdrmapit = bdrmapit.bdrmapit()
def processInit():
    global db
    global i2a
    client = pymongo.MongoClient("mongodb-iijlab", connect=True)
    db = client.atlas
    i2a = ip2asn.ip2asn("../lib/ip2asn/db/rib.20180401.pickle",
                        "../lib/ixs_201802.jsonl")
def scan(fin):
    """
    Read a series of passive DNS entries from a file.

    Each line should be a JSON object in the ISC-SIE format.
    Any filelike object should work.
    """

    start_time = time.time()
    line_count = 0

    for line in fin:
        line_count += 1
        if DEBUG_MODE and line_count % DEBUG_INTERVAL == 0:
            log('{} lines processed in {:.2f} seconds'.format(
                line_count,
                time.time() - start_time))

        if isinstance(
                line,
                bytes):  # Fix for gzip returning bytes instead of a string
            js = json.loads(line.decode('utf-8'))
        else:
            js = json.loads(line)

        packet_type = js['type'] if 'type' in js else None
        query_type = js['qtype'] if 'qtype' in js else None

        # Process A records
        if packet_type == 'UDP_QUERY_RESPONSE' and query_type == 1:
            # Process query (domain)
            query = js['qname']
            url = fix_url(query)
            url_parts = etld.split(url)
            if url_parts:
                domain = '.'.join(url_parts)
            else:
                continue  # Invalid domain name, skip

            # Process response (ip)
            response = js['response_ip']
            ip = response
            if '.' in ip:
                asn = ip2asn.ip2asn(ip)
            else:
                continue  # Invalid IP (likely IPv6), skip

            # Ignore anything that's no in the Alexa Top N
            if not alexa.is_top_n(domain, FILTER_TOP_DOMAINS):
                continue

            # Store any interesting information

            # : domain -> asn -> ip -> count
            domain_to_asn[domain][asn][ip] += 1

            # : asn -> set(domain)
            asn_to_domain[asn].add(domain)
def scan(fin):
    """
    Read a series of passive DNS entries from a file.

    Each line should be a JSON object in the ISC-SIE format.
    Any filelike object should work.
    """

    start_time = time.time()
    line_count = 0

    for line in fin:
        line_count += 1
        if DEBUG_MODE and line_count % DEBUG_INTERVAL == 0:
            log('{} lines processed in {:.2f} seconds'.format(line_count, time.time() - start_time))

        if isinstance(line, bytes): # Fix for gzip returning bytes instead of a string
            js = json.loads(line.decode('utf-8'))
        else:
            js = json.loads(line)

        packet_type = js['type'] if 'type' in js else None
        query_type = js['qtype'] if 'qtype' in js else None

        # Process A records
        if packet_type  == 'UDP_QUERY_RESPONSE' and query_type == 1:
            # Process query (domain)
            query = js['qname']
            url = fix_url(query)
            url_parts = etld.split(url)
            if url_parts:
                domain = '.'.join(url_parts)
            else:
                continue # Invalid domain name, skip 

            # Process response (ip)            
            response = js['response_ip']
            ip = response
            if '.' in ip:
                asn = ip2asn.ip2asn(ip)
            else:
                continue # Invalid IP (likely IPv6), skip

            # Ignore anything that's no in the Alexa Top N
            if not alexa.is_top_n(domain, FILTER_TOP_DOMAINS):
                continue

            # Store any interesting information

            # : domain -> asn -> ip -> count
            domain_to_asn[domain][asn][ip] += 1

            # : asn -> set(domain)
            asn_to_domain[asn].add(domain)
Example #7
0
def detectRttChangesMongo(expId=None):

    streaming = False
    replay = False
    nbProcesses = 12
    binMult = 3  # number of bins = binMult*nbProcesses
    pool = Pool(nbProcesses,
                initializer=processInit)  #, maxtasksperchild=binMult)

    client = pymongo.MongoClient("mongodb-iijlab")
    db = client.atlas
    detectionExperiments = db.rttExperiments
    alarmsCollection = db.rttChanges

    if expId == "stream":
        expParam = detectionExperiments.find_one({"stream": True})
        expId = expParam["_id"]

    if expId is None:
        expParam = {
            "timeWindow": 60 * 60,  # in seconds 
            "start": datetime(2016, 11, 1, 0, 0, tzinfo=timezone("UTC")),
            "end": datetime(2016, 11, 26, 0, 0, tzinfo=timezone("UTC")),
            "alpha": 0.01,
            "confInterval": 0.05,
            "minASN": 3,
            "minASNEntropy": 0.5,
            "minSeen": 3,
            "experimentDate": datetime.now(),
            "af": "",
            "comment": "Study case for Emile (8.8.8.8) Nov. 2016",
            "prefixes": None
        }

        expId = detectionExperiments.insert_one(expParam).inserted_id
        sampleMediandiff = {}

    else:
        # streaming mode: analyze what happened in the last time bin
        streaming = True
        now = datetime.now(timezone("UTC"))

        expParam = detectionExperiments.find_one({"_id": expId})
        if replay:
            expParam["start"] = expParam["end"]
            expParam["end"] = expParam["start"] + timedelta(hours=1)
        else:
            expParam["start"] = datetime(
                now.year,
                now.month,
                now.day,
                now.hour,
                0,
                0,
                tzinfo=timezone("UTC")) - timedelta(hours=1)
            expParam["end"] = datetime(now.year,
                                       now.month,
                                       now.day,
                                       now.hour,
                                       0,
                                       0,
                                       tzinfo=timezone("UTC"))
        expParam["analysisTimeUTC"] = now
        resUpdate = detectionExperiments.replace_one({"_id": expId}, expParam)
        if resUpdate.modified_count != 1:
            print "Problem happened when updating the experiment dates!"
            print resUpdate
            return

        sys.stdout.write("Loading previous reference...")
        try:
            fi = open("saved_references/%s_%s.pickle" % (expId, "diffRTT"),
                      "rb")
            sampleMediandiff = pickle.load(fi)
        except IOError:
            sampleMediandiff = {}

        sys.stdout.write("done!\n")

    if not expParam["prefixes"] is None:
        expParam["prefixes"] = re.compile(expParam["prefixes"])

    probe2asn = {}
    probeip2asn = {}
    lastAlarms = []
    i2a = ip2asn.ip2asn("../lib/ip2asn/db/rib.20180401.pickle",
                        "../lib/ixs_201802.jsonl")

    start = int(calendar.timegm(expParam["start"].timetuple()))
    end = int(calendar.timegm(expParam["end"].timetuple()))

    for currDate in range(start, end, int(expParam["timeWindow"])):
        sys.stdout.write("Rtt analysis %s" %
                         datetime.utcfromtimestamp(currDate))
        tsS = time.time()

        # Get distributions for the current time bin
        c = datetime.utcfromtimestamp(currDate)
        col = "traceroute%s_%s_%02d_%02d" % (expParam["af"], c.year, c.month,
                                             c.day)
        if expParam["prefixes"] is None:
            totalRows = db[col].count({
                "timestamp": {
                    "$gte": currDate,
                    "$lt": currDate + expParam["timeWindow"]
                }
            })
        else:
            totalRows = db[col].count({
                "timestamp": {
                    "$gte": currDate,
                    "$lt": currDate + expParam["timeWindow"]
                },
                "result.result.from":
                expParam["prefixes"]
            })
        if not totalRows:
            print "No data for that time bin!"
            continue
        params = []
        limit = int(totalRows / (nbProcesses * binMult - 1))
        skip = range(0, totalRows, limit)
        for i, val in enumerate(skip):
            params.append(
                (expParam["af"], currDate, currDate + expParam["timeWindow"],
                 val, limit, expParam["prefixes"]))

        diffRtt = defaultdict(dict)
        nbRow = 0
        rttResults = pool.imap_unordered(computeRtt, params)
        diffRtt, nbRow = mergeRttResults(rttResults, currDate, tsS,
                                         nbProcesses * binMult)

        # Detect oulier values
        lastAlarms = outlierDetection(diffRtt, sampleMediandiff,
                                      expParam, expId,
                                      datetime.utcfromtimestamp(currDate),
                                      probe2asn, i2a, alarmsCollection,
                                      streaming, probeip2asn)

        timeSpent = (time.time() - tsS)
        sys.stdout.write(", %s sec/bin,  %s row/sec\r" %
                         (timeSpent, float(nbRow) / timeSpent))

    pool.close()
    pool.join()

    # Update results on the webserver
    if streaming:
        # update ASN table
        conn_string = "host='psqlserver' dbname='ihr'"

        # get a connection, if a connect cannot be made an exception will be raised here
        conn = psycopg2.connect(conn_string)
        cursor = conn.cursor()

        asnList = set(probeip2asn.values())
        cursor.execute("SELECT number FROM ihr_asn WHERE tartiflette=TRUE")
        registeredAsn = set([x[0] for x in cursor.fetchall()])
        for asn in asnList:
            #cursor.execute("INSERT INTO ihr_asn (number, name, tartiflette) VALUES (%s, %s, %s) \
            #    ON CONFLICT (number) DO UPDATE SET tartiflette = TRUE;", (int(asn), asname, True))
            asname = i2a.asn2name(asn)
            if int(asn) not in registeredAsn:
                cursor.execute(
                    """do $$
                begin 
                      insert into ihr_asn(number, name, tartiflette, disco, ashash) values(%s, %s, TRUE, FALSE, FALSE);
                  exception when unique_violation then
                    update ihr_asn set tartiflette = TRUE where number = %s;
                end $$;""", (asn, asname, asn))

        # push alarms to the webserver
        for alarm in lastAlarms:
            ts = alarm["timeBin"] + timedelta(seconds=expParam["timeWindow"] /
                                              2)
            for ip in alarm["ipPair"]:
                cursor.execute(
                    "INSERT INTO ihr_delay_alarms (asn_id, timebin, ip, link, \
                        medianrtt, nbprobes, diffmedian, deviation) VALUES (%s, %s, %s, \
                        %s, %s, %s, %s, %s) RETURNING id",
                    (probeip2asn[ip], ts, ip, alarm["ipPair"], alarm["median"],
                     alarm["nbProbes"], alarm["diffMed"], alarm["devBound"]))

                # Push measurement and probes ID corresponding to this alarm
                alarmid = cursor.fetchone()[0]
                for msmid, probes in alarm["msmId"].iteritems():
                    if not msmid is None:
                        for probeid in probes:
                            cursor.execute(
                                "INSERT INTO ihr_delay_alarms_msms(alarm_id, msmid, probeid) \
                                       VALUES (%s, %s, %s)",
                                (alarmid, msmid, probeid))

        # compute magnitude
        mag = computeMagnitude(asnList, datetime.utcfromtimestamp(currDate),
                               expId, alarmsCollection)
        for asn in asnList:
            cursor.execute(
                "INSERT INTO ihr_delay (asn_id, timebin, magnitude, deviation, label) \
            VALUES (%s, %s, %s, %s, %s)",
                (asn, expParam["start"] +
                 timedelta(seconds=expParam["timeWindow"] / 2), mag[asn], 0,
                 ""))

        conn.commit()
        cursor.close()
        conn.close()

        print "Cleaning rtt change reference."
        sampleMediandiff = cleanRef(sampleMediandiff,
                                    datetime.utcfromtimestamp(currDate))

    print "Writing diffRTT reference to file system."
    fi = open("saved_references/%s_diffRTT.pickle" % (expId), "w")
    pickle.dump(sampleMediandiff, fi, 2)
Example #8
0
def computeMagnitude(asnList,
                     timebin,
                     expId,
                     collection,
                     tau=5,
                     metric="devBound",
                     historySize=7 * 24,
                     minPeriods=0):

    # Retrieve alarms
    starttime = timebin - timedelta(hours=historySize)
    endtime = timebin
    cursor = collection.aggregate([
        {
            "$match": {
                "expId": expId,
                "timeBin": {
                    "$gt": starttime,
                    "$lte": timebin
                },
                "diffMed": {
                    "$gt": 1
                },
            }
        },
        {
            "$project": {
                "ipPair": 1,
                "timeBin": 1,
                "devBound": 1,
            }
        },
        {
            "$unwind": "$ipPair"
        },
    ])

    df = pd.DataFrame(list(cursor))
    df["timeBin"] = pd.to_datetime(df["timeBin"], utc=True)
    df.set_index("timeBin")

    if "asn" not in df.columns:
        # find ASN for each ip
        i2a = ip2asn.ip2asn("../lib/ip2asn/db/rib.20180401.pickle",
                            "../lib/ixs_201802.jsonl")
        fct = functools.partial(i2a.ip2asn)
        sTmp = df["ipPair"].apply(fct).apply(pd.Series)
        df["asn"] = sTmp[0]

    magnitudes = {}
    for asn in asnList:

        dfb = pd.DataFrame(
            {
                u'devBound': 0.0,
                u'timeBin': starttime,
                u'asn': asn,
            },
            index=[starttime])
        dfe = pd.DataFrame({
            u'devBound': 0.0,
            u'timeBin': endtime,
            u'asn': asn
        },
                           index=[endtime])
        dfasn = pd.concat([dfb, df[df["asn"] == asn], dfe])

        grp = dfasn.groupby("timeBin")
        grpSum = grp.sum().resample("1H").sum()

        mad = lambda x: np.median(
            np.fabs(pd.notnull(x) - np.median(pd.notnull(x))))
        magnitudes[asn] = (grpSum[metric][-1] - grpSum[metric].median()) / (
            1 + 1.4826 * mad(grpSum[metric]))

    return magnitudes
Example #9
0
def process_file(raw_path, file_name):
    file_type, file_path, file_extension = extract_file(raw_path)
    print "raw_file:", raw_path
    print "file_path:", file_path
    if not file_type:
        print "This is NOT a file of interest! "
        print "Removing raw data from disk:", raw_path
        # remove the related raw file
        os.remove(raw_path)
        print "Removed!"
        return
    print "file_type:", file_type

    # If we are really dealing with a PE file
    sha1, md5, file_size = get_file_hashes(file_path)
    dump_id, corrupt_pe, host, client, server = db_file_dumps(
        raw_path, sha1, md5, file_size, file_type)

    skip_classification = False
    score = None

    # check if we have already recently classified the same md5 dump from the same host
    md5_cache_key = md5
    if host is not None:
        md5_cache_key += '-' + host
    if md5_cache_key in md5host_cache.keys():
        md5host_cache[md5_cache_key]['count'] += 1
        if md5host_cache[md5_cache_key]['count'] > MAX_MD5_CACHE_COUNT:
            # do not classify again! retrieve cached score
            skip_classification = True
            score = md5host_cache[md5_cache_key][
                'score']  # get the last cached score
            print "MD5 CACHE: will use previous score : %s %s %s %s" % (
                dump_id, md5, host, score)
    elif not corrupt_pe:
        md5host_cache[md5_cache_key] = {'count': 1, 'score': None}

    # check if we have already recently classified several dumps from the same host,client,server
    hostcs_cache_key = ''
    if host is not None:
        hostcs_cache_key += host
    hostcs_cache_key += '-' + client
    hostcs_cache_key += '-' + server
    if hostcs_cache_key in hostcs_cache.keys():
        hostcs_cache[hostcs_cache_key]['count'] += 1
        if hostcs_cache[hostcs_cache_key]['count'] > MAX_HOSTCS_CACHE_COUNT:
            # do not classify again! retrieve cached score
            skip_classification = True
            if score is None:
                score = hostcs_cache[hostcs_cache_key][
                    'score']  # get the last cached score
                print "HOSTCS CACHE: will use previous score : %s %s %s %s" % (
                    dump_id, host, server, score)
    elif not corrupt_pe:
        hostcs_cache[hostcs_cache_key] = {'count': 1, 'score': None}

    if not corrupt_pe and (not skip_classification or score is None):
        ip2asn(dump_id)
        get_feature_vector(dump_id, file_type)
        score = classify_dump(dump_id)
        md5host_cache[md5_cache_key]['score'] = score  # update cached score
        hostcs_cache[hostcs_cache_key]['score'] = score  # update cached score

        # query VT
        Process(target=process_timeout,
                args=(db_virus_total, (dump_id, ), VT_TIMEOUT)).start()
        if vts_config == "manual":  # attempt to re-download the file "manually"
            Process(target=process_timeout,
                    args=(manual_download, sha1, MD_TIMEOUT)).start()

    if not corrupt_pe:
        if score is None:
            print "ERROR : None score : this should not happen! dump_id=", dump_id
        if skip_classification and not score is None:
            update_score(dump_id, score)
        print "Syslog score = %s (dump_id=%s)" % (score, dump_id)
        Process(target=db_syslog, args=(dump_id, score)).start()

    sha1_path = os.path.join(FILES_DIR, "%s.%s" % (sha1, file_extension))
    md5_path = os.path.join(FILES_DIR, "%s.%s" % (md5, file_extension))
    shutil.move(file_path, sha1_path)
    print "sha1_path", sha1_path
    print "md5_path", md5_path
    if not os.path.exists(md5_path):
        os.symlink("%s.%s" % (sha1, file_extension), md5_path)
    print "Done processing file: %s" % (raw_path, )
Example #10
0
    def main(self):
        """
        Main program connecting all modules.
        """

        try:
            # Saver initialisation
            saver_queue = Queue()
            try:
                saver_module = importlib.import_module(self.saver)
                # These are run in a separate process
                saver = saver_module.Saver(self.saver_filename, saver_queue)
                saver.start()
            except ImportError:
                logging.error("Saver unknown! ({})".format(self.saver))
                traceback.print_exc(file=sys.stdout)
                return

            # Detector initialisation
            if self.detection_enabled:
                self.detector_pipe = Pipe(False)
                detector = AnomalyDetector(self.detector_pipe[0], saver_queue)
                detector.start()

            # Time Track initialisation
            sys.path.append(self.ip2asn_dir)
            import ip2asn
            i2a = ip2asn.ip2asn(self.ip2asn_db, self.ip2asn_ixp)

            try:
                timetrac_module = importlib.import_module("timetrack."+self.timetrack_converter)
                timetrackconverter = timetrac_module.TimeTrackConverter(i2a)
            except ImportError:
                logging.error("Timetrack converter unknown! ({})".format(self.timetrack_converter))
                traceback.print_exc(file=sys.stdout)
                return

            # Aggregator initialisation
            tm = TracksAggregator(self.tm_window_size, self.tm_significance_level, self.tm_min_tracks)
            saver_queue.put(("experiment", [datetime.datetime.now(), str(sys.argv), str(self.config.sections())]))

            # Reader initialisation
            try:
                reader_module = importlib.import_module(self.reader)
                tr_reader = reader_module.Reader(self.atlas_start, self.atlas_stop, timetrackconverter,
                    self.atlas_msm_ids, self.atlas_probe_ids, chunk_size=self.atlas_chunk_size)
            # tr_reader  = DumpReader(dump_name, dump_filter)
            except ImportError:
                logging.error("Reader unknown! ({})".format(self.reader))
                traceback.print_exc(file=sys.stdout)
                return


            # # Main Loop:
            with tr_reader:
                for track in tr_reader.read():
                    if not track:
                        continue

                    aggregates = tm.add_track(track)
                    if aggregates:
                        self.save_aggregates(saver_queue, aggregates)

            logging.info("Finished to read data {}".format(datetime.datetime.today()))

            # Try to get results from remaining track bins
            aggregates = tm.aggregate(force_expiration=0.5)
            self.save_aggregates(saver_queue, aggregates)

            logging.info("Number of ignored tracks {}".format(tm.nb_ignored_tracks))

            # closing
            saver_queue.put("MAIN_FINISHED")
            saver.join()
            # saver.terminate()
            if self.detection_enabled:
                detector.terminate()

            logging.info("Ended on {}".format(datetime.datetime.today()))

        except Exception as e:
            print("type error: " + str(e))
            print(traceback.format_exc())
def detectRouteChangesMongo(
        expId=None,
        configFile="detection.cfg"):  # TODO config file implementation

    streaming = False
    replay = False
    nbProcesses = 18
    binMult = 3  # number of bins = binMult*nbProcesses
    pool = Pool(nbProcesses,
                initializer=processInit)  #, maxtasksperchild=binMult)

    client = pymongo.MongoClient("mongodb-iijlab")
    db = client.atlas
    detectionExperiments = db.routeExperiments
    alarmsCollection = db.routeChanges
    refRoutes = None

    if expId == "stream":
        expParam = detectionExperiments.find_one({"stream": True})
        expId = expParam["_id"]

    if expId is None:
        expParam = {
            "timeWindow": 60 * 60,  # in seconds 
            "start": datetime(2016, 11, 15, 0, 0, tzinfo=timezone("UTC")),
            "end": datetime(2016, 11, 26, 0, 0, tzinfo=timezone("UTC")),
            "alpha": 0.01,  # parameter for exponential smoothing 
            "minCorr":
            -0.25,  # correlation scores lower than this value will be reported
            "minSeen": 3,
            "minASN": 3,
            "minASNEntropy": 0.5,
            "af": "",
            "experimentDate": datetime.now(),
            "comment": "Study case for Emile (8.8.8.8) Nov. 2016",
        }

        expId = detectionExperiments.insert_one(expParam).inserted_id
        refRoutes = defaultdict(routeCountRef)

    else:
        # Streaming mode: analyze the last time bin
        streaming = True
        now = datetime.now(timezone("UTC"))
        expParam = detectionExperiments.find_one({"_id": expId})
        if replay:
            expParam["start"] = expParam["end"]
            expParam["end"] = expParam["start"] + timedelta(hours=1)
        else:
            expParam["start"] = datetime(
                now.year,
                now.month,
                now.day,
                now.hour,
                0,
                0,
                tzinfo=timezone("UTC")) - timedelta(hours=1)
            expParam["end"] = datetime(now.year,
                                       now.month,
                                       now.day,
                                       now.hour,
                                       0,
                                       0,
                                       tzinfo=timezone("UTC"))
        expParam["analysisTimeUTC"] = now
        expParam["minASN"] = 3
        expParam["minASNEntropy"] = 0.5
        resUpdate = detectionExperiments.replace_one({"_id": expId}, expParam)
        if resUpdate.modified_count != 1:
            print "Problem happened when updating the experiment dates!"
            print resUpdate
            return

        sys.stdout.write("Loading previous reference...")
        try:
            fi = open("saved_references/%s_%s.pickle" % (expId, "routeChange"),
                      "rb")
            refRoutes = pickle.load(fi)
        except IOError:
            sys.stdout.write("corrupted file!?")
            refRoutes = defaultdict(routeCountRef)
        sys.stdout.write("done!\n")

    probe2asn = {}
    start = int(calendar.timegm(expParam["start"].timetuple()))
    end = int(calendar.timegm(expParam["end"].timetuple()))
    nbIteration = 0

    sys.stdout.write("Route analysis:\n")
    for currDate in range(start, end, int(expParam["timeWindow"])):
        tsS = time.time()

        # count packet routes for the current time bin
        params = []
        binEdges = np.linspace(currDate, currDate + expParam["timeWindow"],
                               nbProcesses * binMult + 1)
        for i in range(nbProcesses * binMult):
            params.append((expParam["af"], binEdges[i], binEdges[i + 1]))

        nbRow = 0
        routes = pool.imap_unordered(countRoutes, params)
        routes, nbRow = mergeRoutes(routes, currDate, tsS,
                                    nbProcesses * binMult)

        print "size before params: %s" % len(refRoutes)
        # Detect route changes
        params = []
        for target, newRoutes in routes.iteritems():
            params.append(
                (newRoutes, refRoutes[target], expParam, expId,
                 datetime.utcfromtimestamp(currDate), target, probe2asn))

        print "size after params: %s" % len(refRoutes)

        mapResult = pool.imap_unordered(routeChangeDetection, params)

        # Update the reference
        for target, newRef in mapResult:
            refRoutes[target] = newRef

        print "size after analysis: %s" % len(refRoutes)

        if nbRow > 0:
            nbIteration += 1

    # Update results on the webserver
    if streaming:
        i2a = ip2asn.ip2asn("../lib/ip2asn/db/rib.20180401.pickle",
                            "../lib/ixs_201802.jsonl")
        # update ASN table
        conn_string = "host='psqlserver' dbname='ihr'"

        # get a connection, if a connect cannot be made an exception will be raised here
        conn = psycopg2.connect(conn_string)
        cursor = conn.cursor()
        cursor.execute(
            "SELECT number, name FROM ihr_asn WHERE tartiflette=TRUE;")
        asnList = cursor.fetchall()

        probeip2asn = {}
        # compute magnitude
        mag, alarms = computeMagnitude(asnList,
                                       datetime.utcfromtimestamp(currDate),
                                       expId, probeip2asn, alarmsCollection,
                                       i2a)
        for asn, asname in asnList:
            cursor.execute(
                "INSERT INTO ihr_forwarding (asn_id, timebin, magnitude, resp, label) \
            VALUES (%s, %s, %s, %s, %s)",
                (int(asn), expParam["start"] +
                 timedelta(seconds=expParam["timeWindow"] / 2), mag[asn], 0,
                 ""))

        conn.commit()

        # push alarms to the webserver
        ts = expParam["start"] + timedelta(seconds=expParam["timeWindow"] / 2)
        for alarm in alarms:
            if alarm["asn"] in mag:
                cursor.execute(
                    "INSERT INTO ihr_forwarding_alarms (asn_id, timebin, ip,  \
                    correlation, responsibility, pktdiff, previoushop ) VALUES (%s, %s, %s, \
                    %s, %s, %s, %s) RETURNING id",
                    (alarm["asn"], ts, alarm["ip"], alarm["correlation"],
                     alarm["responsibility"], alarm["pktDiff"],
                     alarm["previousHop"]))

                # Push measurement and probes ID corresponding to this alarm
                alarmid = cursor.fetchone()[0]
                for msmid, probes in alarm["msmId"].iteritems():
                    if not msmid is None:
                        for probeid in probes:
                            cursor.execute(
                                "INSERT INTO ihr_forwarding_alarms_msms(alarm_id, msmid, probeid) \
                                   VALUES (%s, %s, %s)",
                                (alarmid, msmid, probeid))

        conn.commit()
        cursor.close()
        conn.close()

    pool.close()
    pool.join()

    sys.stdout.write("\n")
    print "Writing route change reference to file system."
    fi = open("saved_references/%s_routeChange.pickle" % (expId), "w")
    pickle.dump(refRoutes, fi, 2)
Example #12
0
import os
import logging
import sys
from matplotlib import pylab as plt
plt.switch_backend('agg')
import glob
import pickle
import networkx as nx
from collections import defaultdict, Counter
from itertools import chain
import multiprocessing

sys.path.append("../ip2asn")
import ip2asn

ia = ip2asn.ip2asn("../ip2asn/db/rib.20180701.pickle")
esteban_results_directory = "20181001_BGPcount"


def asnres(ip):
    """Find the ASN corresponding to the given IP address"""

    asn = ia.ip2asn(ip)
    if asn == "unknown":
        asn = "0"
    return str(asn)


def validation(events, ts=1505287800, prefix="84.205.67.0/24"):
    """Validate SSL results using traceroute data.
    
Example #13
0
import bz2
import json
from ripe.atlas.sagan import DnsResult
import logging

logging.basicConfig()
logging.getLogger().setLevel(logging.ERROR)

import sys
sys.path.append('../ip2asn/')
import ip2asn
i2a = ip2asn.ip2asn('../ip2asn/db/rib.20200601.pickle.bz2')

builtin_msmid = [30001, 30002]
fname = sys.argv[1]

date = fname.partition('-')[2].rpartition('.')[0]
output_fname = f'data/parsed_results_{date}.json'
output = []

with bz2.open(fname, 'rb') as fp:
    for line in fp:

        line_json = json.loads(line)
        result = DnsResult(line_json)

        for response in result.responses:
            # Skip if something's wrong
            if (response.is_error or response.is_malformed
                    or not response.destination_address or not response.abuf):
                continue