Ejemplo n.º 1
1
    def calculate_effective_prefixes(self, start_interval=1451692800):
        '''
        This method calculates the number of effective /24
        prefixes that lie underneath a specified route prefix
        :param start_interval: the start interval of the data
        stream
        :return: number of effective prefixes under the route's
        root prefix
        '''

        stream = BGPStream()
        rec = BGPRecord()

        stream.add_filter('collector', self.collector_name)
        stream.add_filter('record-type', 'ribs')

        if isinstance(start_interval, datetime.datetime):
            interval_start_utc = self.convert_interval_to_utc(start_interval)
            stream.add_interval_filter(interval_start_utc - 300, interval_start_utc + 300)
        else:
            stream.add_interval_filter(start_interval - 300, start_interval + 300)

        stream.start()
        print "Starting routing table parsing"
        while (stream.get_next_record(rec)):
            elem = rec.get_next_elem()
            while elem:
                # Get the peer ASN and IP. We then construct a peer_id, since a collector
                # can establish multiple connections with the same ASN.
                peer_asn = elem.peer_asn
                peer_asn_ip = elem.peer_address

                # make this an unmodifiable tuple
                peer_id = (peer_asn, peer_asn_ip)
                peer_route_trie = self.peer_trie_handles[peer_id]

                # Do a single level search of the route prefix to
                # find the left and right prefix advertisements
                single_level_prefixes = \
                    peer_route_trie.single_level_search(elem.fields['prefix'])
                print single_level_prefixes

                prefix_count = len(single_level_prefixes)
                root_24_prefix_count = 0
                lr_24_prefix_count1 = lr_24_prefix_count2 = 0

                # The /24 prefixes below the advertised prefix are calculated
                # as all the /24 prefixes served by the root - sum of the /24
                # prefixes served by root's children
                if prefix_count == 1:
                    root_24_prefix_count = \
                        2 ** (24 - int(str(single_level_prefixes[0]).lstrip('<')
                                       .rstrip('>').split('/')[1]))
                elif prefix_count == 2:
                    root_24_prefix_count = \
                        2 ** (24 - int(str(single_level_prefixes[0]).lstrip('<')
                                       .rstrip('>').split('/')[1]))
                    lr_24_prefix_count1 = \
                        2 ** (24 - int(str(single_level_prefixes[1]).lstrip('<')
                                        .rstrip('>').split('/')[1]))
                else:
                    root_24_prefix_count = \
                        2 ** (24 - int(str(single_level_prefixes[0]).lstrip('<')
                                       .rstrip('>').split('/')[1]))
                    lr_24_prefix_count1 = \
                        2 ** (24 - int(str(single_level_prefixes[1]).lstrip('<')
                                       .rstrip('>').split('/')[1]))
                    lr_24_prefix_count2 = \
                        2 ** (24 - int(str(single_level_prefixes[2]).lstrip('<')
                                       .rstrip('>').split('/')[1]))

                effective_24_prefix_count = \
                    root_24_prefix_count - (lr_24_prefix_count1 +
                                            lr_24_prefix_count2)

                print "Effective Prefix Count : ", \
                    effective_24_prefix_count

                for prefix in single_level_prefixes:
                    trie_node = peer_route_trie.search_exact(str(prefix))
                    as_path = trie_node.data['as-path'].split(" ")
                    as_path_headless = as_path[1:-1]
                    print "AS-Path : ", as_path
                    as_headless_len = len(as_path_headless)
                    if as_headless_len > 1:
                        for i in range(0, as_headless_len - 1):
                            print "Headless nodes : ", as_path_headless[i], \
                                as_path_headless[i + 1]
                            if as_path_headless[i] in self.aslink_datastore:
                                self.aslink_datastore[as_path_headless[i]] += 1
                            else:
                                self.aslink_datastore[as_path_headless[i]] = 1

                elem = rec.get_next_elem()
def run_bgpstream(args):
    (collector, start_time, end_time, data_type) = args

    # initialize and configure BGPStream
    stream = BGPStream()
    rec = BGPRecord()
    stream.add_filter('collector', collector)
    # NB: BGPStream uses inclusive/inclusive intervals, so subtract one off the
    # end time since we are using inclusive/exclusive intervals
    stream.add_interval_filter(start_time, end_time-1)
    stream.add_filter('record-type', data_type)
    stream.start()

    # per-peer data
    peers_data = {}

    # loop over all records in the stream
    while stream.get_next_record(rec):
        elem = rec.get_next_elem()
        # to track the peers that have elems in this record
        peer_signatures = set()
        # loop over all elems in the record
        while elem:
            # create a peer signature for this elem
            sig = peer_signature(rec, elem)
            peer_signatures.add(sig)
            # if this is the first time we have ever seen this peer, create
            # an empty result: (Pfxs_v4_set, Pfxs_v6_set, ASNs_v4_set, ASNs_v6_set)
            if sig not in peers_data:
                peers_data[sig] =[set(),set(),
                                  set(),set()]

            if('prefix' in elem.fields):            
                pfx=elem.fields['prefix'];                    
                if(":" in pfx):
                    peers_data[sig][1].add(pfx)
                else:
                    peers_data[sig][0].add(pfx)

            if('as-path' in elem.fields):
                path_split=elem.fields['as-path'].split()       
                if(":" in pfx):
                    for ASN in path_split:
                        peers_data[sig][3].add(ASN)
                else:
                    for ASN in path_split:
                        peers_data[sig][2].add(ASN)

            elem = rec.get_next_elem()

        # one peer that was present in this record (allows a true, per-collector
        # count of records since each record can contain elems for many peers)

    # the time in the output row is truncated down to a multiple of
    # RESULT_GRANULARITY so that slices can be merged correctly
    start_time = \
        int(math.floor(start_time/RESULT_GRANULARITY) * RESULT_GRANULARITY)

    # for each peer that we processed data for, create an output row
    return [((start_time, collector, p), (peers_data[p])) for p in peers_data]
Ejemplo n.º 3
0
def recv_bgpstream_rib(begin, until, collector):
    """
    Receive and parse BGP RIB records from a given bgpstream collector.
    """
    logging.info("CALL recv_bgpstream_rib")
    # Create bgpstream
    stream = BGPStream()
    rec = BGPRecord()
    # set filtering
    stream.add_filter('collector', collector)
    stream.add_filter('record-type', 'updates')
    stream.add_interval_filter(begin, until)
    # Start the stream
    stream.start()
    while (stream.get_next_record(rec)):
        if rec.status == 'valid':
            elem = rec.get_next_elem()
        else:
            logging.warn("stream record invalid, skipping.")
            continue
        bgp_message = None
        while (elem):
            if (elem.type.upper() == 'A') or (elem.type.upper() == 'R'):
                bgp_message = BGPmessage(elem.time, 'update')
                aspath = elem.fields['as-path'].split()
                for a in aspath:
                    if not '{' in a:  # ignore AS-SETs
                        bgp_message.add_as_to_path(a)
                bgp_message.add_announce(elem.fields['prefix'])
                output(bgp_message)
            elem = rec.get_next_elem()
        # end while (elem)
    # end while (stream...)
    output('FLUSH')
Ejemplo n.º 4
0
def run_collector(stream, output_file):
    # Start the stream
    stream.start()

    # Create record
    rec = BGPRecord()

    # Collect some general stream stats
    total_records = 0
    total_updates = 0

    # Write to output file
    with open(output_file, 'w') as f:
        # Read in each record at a time
        while (stream.get_next_record(rec)):
            # Print the record information only if it is not a valid record
            if rec.status != "valid":
                print('Recieved invalid record from BGPStream collector {}'.
                      format(rec.collector))
                record_members = []
                for i in inspect.getmembers(rec):
                    # Ignores anything starting with underscore
                    # (that is, private and protected attributes)
                    if not i[0].startswith('_'):
                        # Ignores methods
                        if not inspect.ismethod(i[1]):
                            record_members.append(i)
                print('Invalid record: {}'.format(record_members))
            else:
                elem = rec.get_next_elem()
                while (elem):
                    # Aggregate metadata about new record
                    metadata = dict()
                    metadata['rec_dump_time'] = rec.dump_time
                    metadata['project'] = rec.project
                    metadata['collector'] = rec.collector
                    metadata['rec_type'] = rec.type
                    metadata['rec_time'] = rec.time
                    metadata['status'] = rec.status
                    metadata['elem_type'] = elem.type
                    metadata['elem_time'] = elem.time
                    metadata['peer_address'] = elem.peer_address
                    metadata['peer_asn'] = elem.peer_asn

                    # Update and log stats
                    total_records += 1
                    if elem.type == 'A':
                        total_updates += 1

                    if total_records % 100 == 0:
                        print(
                            'Collected {} total records and {} total announcements...'
                            .format(total_records, total_updates))

                    # Build full record and save to file
                    full_record = merge_dicts(metadata, elem.fields)
                    f.write(json.dumps(full_record) + '\n')

                    # Get the next element
                    elem = rec.get_next_elem()
Ejemplo n.º 5
0
    def create_trie_from_bgpstream_info(self, interval_start=1451692800):

        stream = BGPStream()
        rec = BGPRecord()

        stream.add_filter('collector', self.collector_name)
        stream.add_filter('record-type', 'ribs')

        if isinstance(interval_start, datetime.datetime):
            interval_start_utc = self.convert_interval_to_utc(interval_start)
            stream.add_interval_filter(interval_start_utc - 300, interval_start_utc + 300)
        else:
            stream.add_interval_filter(interval_start - 300, interval_start + 300)

        stream.start()

        while (stream.get_next_record(rec)):
            elem = rec.get_next_elem()
            while elem:
                # Get the peer ASN and IP. We then construct a peer_id, since a collector
                # can establish multiple connections with the same ASN.
                peer_asn = elem.peer_asn
                peer_asn_ip = elem.peer_address

                # make this an unmodifiable tuple
                peer_id = (peer_asn, peer_asn_ip)

                peer_route_trie = self.peer_trie_handles[peer_id]
                trie_node = peer_route_trie.add(elem.fields['prefix'])
                trie_node.data['as-path'] = elem.fields['as-path']
                elem = rec.get_next_elem()
    def _get_data(self, prefix, datetime):
        """
        output example: [['15547', '8220', '1853', '1205'],[..another AS path..]]
        :param prefix:
        :param datetime: end interval
        :return: list of AS paths
        """
        print('[*] ris.py: _get_data() called')
        print('[*] ris.py: _get_data() prefix: {}'.format(prefix))
        start = int(datetime) - 20000  # 20000 second seems to be the shortest interval to get data from BGPstream
        stop = int(datetime)
        result = []

        stream = BGPStream()
        rec = BGPRecord()

        stream.add_filter('prefix', prefix)
        stream.add_filter('record-type', 'ribs')
        stream.add_filter('project', 'ris')
        stream.add_interval_filter(start, stop)

        stream.start()

        while stream.get_next_record(rec):
            if rec.status == "valid":
                elem = rec.get_next_elem()
                while elem:
                    as_path = elem.fields['as-path'].split()
                    as_path.append(' ')  # for tree creation
                    result.append(as_path)
                    elem = rec.get_next_elem()
        print('[*] ris.py: _get_data() finished.')
        return result
Ejemplo n.º 7
0
def downloader(start_date, duration):
    """Download BGP paths from Routeviews and RIPE NCC from a start date for a certain duration."""

    # Start of UNIX time
    base = int(datetime.datetime.strptime(start_date, '%m/%d/%Y').strftime('%s'))
    # Create a new bgpstream instance and a reusable bgprecord instance
    stream = BGPStream()
    rec = BGPRecord()
    # Consider 1-day interval:
    stream.add_interval_filter(base, base + int(duration))
    stream.add_filter('record-type', 'ribs')
    stream.start()
    path_set = set()
    f = open('rib.txt', 'w')
    while(stream.get_next_record(rec)):
        if rec.status != "valid":
            continue
        else:
            elem = rec.get_next_elem()
            while(elem):
                path = elem.fields['as-path']
                if '{' in path or '(' in path:
                    elem = rec.get_next_elem()
                    continue
                prefix = elem.fields['prefix']
                # Focus on IPv4 prefixes
                if ":" not in prefix and path not in path_set:
                    f.write(path.replace(' ', '|') + '\n')
                    path_set.add(path)
                elem = rec.get_next_elem()
    f.close()
Ejemplo n.º 8
0
    def sendMessageToKafka(self, col_name, col_data):
        print "lalalala"
        stream = BGPStream()
        record = BGPRecord()

        time_start = int(col_data.get('ribs').get('latestDumpTime'))
        time_end = time_start + int(col_data.get('ribs').get('dumpPeriod'))

        stream.add_filter('collector', col_name)
        stream.add_filter('record-type', 'ribs')
        stream.add_interval_filter(time_start, time_end)
        print "Before Start"
        stream.start()
        print "After Start"

        while stream.get_next_record(record):
            if record.status == "valid":
                elem = record.get_next_elem()
                while elem:
                    # print "Record:{}".format(elem)
                    producer = Kafka_producer()
                    producer.send_data(col_name, json.dumps(elem))
                    elem = record.get_next_elem()
            else:
                print "## Current record not valid!"
        print "One Collector Finished"
Ejemplo n.º 9
0
 def next(self):
     rec = BGPRecord()
     print('get next rec')
     if stream.get_next_record(rec):
         if rec.status == 'valid':
             elem = rec.get_next_elem()
             timestamp = rec.time
             nlri = []
             withdraw = []
             attr = {}
             while (elem):
                 peer_address = elem.peer_address
                 peer_asn = elem.peer_asn
                 if elem.type == 'A' or elem.type == 'R':
                     nlri.append(elem.fields['prefix'])
                     attr['as_path'] = elem.fields['as-path']
                     attr['nexthop'] = elem.fields['next-hop']
                     attr['community'] = elem.fields['communities']
                 elif elem.type == 'W':
                     withdraw.append(elem.fields['prefix'])
                 elem = rec.get_next_elem()
             return (timestamp, attr, nlri, withdraw)
         else:
             return self.next(rec)
     else:
         StopIteration
Ejemplo n.º 10
0
    def get_data_graph(self):

        stream = BGPStream()
        rec = BGPRecord()
        stream.add_filter('record-type', 'ribs')
        stream.add_interval_filter(self.start, self.end)
        stream.start()

        as_graph = nx.Graph()

        while stream.get_next_record(rec):
            if rec.status == "valid":
                elem = rec.get_next_elem()
                while elem:
                    # the list is a list, which stores an as-path
                    as_path = [
                        k
                        for k, g in groupby(elem.fields['as-path'].split(" "))
                    ]
                    peer = str(elem.peer_asn)
                    # judge whether the as-path is legal
                    if len(as_path) > 1 and as_path[0] == peer:
                        if re.match(self.pattern, elem.fields['prefix']):
                            self.as_prefix[as_path[-1]].add(
                                elem.fields['prefix'])
                            # add edges to the graph
                            for i in range(0, len(as_path) - 1):
                                as_graph.add_edge(as_path[i], as_path[i + 1])
                    elem = rec.get_next_elem()

        as_graph = self.add_geo_loc(as_graph)

        return as_graph
Ejemplo n.º 11
0
 def __init__(self):
     # Create a new bgpstream instance
     # and a reusable bgprecord instances
     self.stream = BGPStream()
     self.rec = BGPRecord()
     self.origin_ases = set()
     self.bgp_lens = defaultdict(lambda: defaultdict(lambda: None))
Ejemplo n.º 12
0
def build_sql_db():
    conn = sqlite3.connect('bgp_stage.db')
    c = conn.cursor()

    # Create a new bgpstream instance and a reusable bgprecord instance
    stream = BGPStream()
    rec = BGPRecord()

    stream.add_filter('collector','rrc11')
    stream.add_interval_filter(1438417216,1438417316)
    stream.start()

    while(stream.get_next_record(rec)):
        if rec.status != "valid":
            continue
        else:
            elem = rec.get_next_elem()
            while(elem):
                if elem.type == "AB":
                    prefix = elem.fields["prefix"]
                    as_path = elem.fields["as-path"].split(" ")
                    origin = as_path[-1]
                    time = elem.time


                    #IP Prefix database
                    ip_min, ip_max = calculate_min_max(prefix)
                    c.execute("SELECT ip_min FROM prefix_as WHERE ip_min = (?) AND ip_max = (?) AND as_o = (?)", (ip_min, ip_max, origin))
                    row = c.fetchone()
                    if len(row) != 0:
                        c.execute("UPDATE prefix_as SET count = count + 1  WHERE ip_min = (?) AND ip_max = (?) AND as_o = (?)", (ip_min, ip_max, origin))
                    else:
                        c.execute("INSERT INTO prefix_as VALUES(?,?,?,?,?)", (ip_min, ip_max, origin, 1, time))


                    #AS link database
                    for as1,as2 in zip(as_path, as_path[1:]) :
                        c.execute("SELECT as_o FROM as_link WHERE as_o = (?) AND as_n = (?)",(as1,as2))
                        row = c.fetchone()
                        if len(row) != 0:
                            c.execute("UPDATE as_link SET count = count + 1 WHERE as_o = (?) AND as_n = (?)",
                                      (as1, as2))
                        else:
                            c.execute("INSERT INTO as_link VALUES(?,?,?,?)", (as1, as2, 1, 0))

                elif elem.type == "WA":
                    prefix = elem.fields["prefix"]
                    time = elem.time
                    #Needs research

                    print(rec.project, rec.collector, rec.type, rec.time, rec.status,
                        elem.type, elem.peer_address, elem.peer_asn, elem.fields)
                    print(prefix,elem.time, "W")

                print(rec.project, rec.collector, rec.type, rec.time, rec.status,
                      elem.type, elem.peer_address, elem.peer_asn, elem.fields)
                elem = rec.get_next_elem()
            conn.commit()
    conn.close()
Ejemplo n.º 13
0
def dump2file(type, name, latestDumpTime, dumpDuration, dumpPeriod):
    print type + ' of ' + name + ': '
    csv_header = ['type', 'addr', 'as', 'prefix', 'next_hop', 'as_path']
    _file = None
    stream = BGPStream()
    rec = BGPRecord()
    stream.add_filter('collector', c_name)
    if type == 'ribs':
        stream.add_filter('record-type', 'ribs')
        _file = open(c_name + '_ribs.csv', 'w+')
    elif type == 'updates':
        stream.add_filter('record-type', 'updates')
        _file = open(c_name + '_updates.csv', 'w+')
    stream.add_interval_filter(latestDumpTime, latestDumpTime + dumpPeriod)

    stream.start()

    count = 0
    useless_c = 0

    writer = csv.writer(_file)
    writer.writerow(csv_header)

    # Get next record
    while (stream.get_next_record(rec)):
        # Print the record information only if it is not a valid record
        if rec.status != "valid":
            # print rec.project, rec.collector, rec.type, rec.time, rec.status
            print 'current rec not valid.'
        else:
            elem = rec.get_next_elem()
            while (elem):
                useless_c += 1
                if useless_c % 1000 == 0:
                    print 'Got ' + str(useless_c) + ' elem totally.'
                # Print record and elem information
                if isIPV6(elem):  # ipv6 packet ignored
                    elem = rec.get_next_elem()
                    continue
                count += 1
                # print rec.project, rec.collector, rec.type, rec.time, rec.status,
                # print elem.type, elem.peer_address, elem.peer_asn
                # elem.fields contains four column: communities, next-hop, prefix, as-path
                field = elem.fields
                prefix = field['prefix'] if 'prefix' in field.keys() else ''
                next_hop = field['next-hop'] if 'next-hop' in field.keys(
                ) else ''
                as_path = field['as-path'] if 'as-path' in field.keys() else ''
                as_path = as_path.replace(' ', '|')

                writer.writerow([
                    elem.type, elem.peer_address, elem.peer_asn, prefix,
                    next_hop, as_path
                ])

                elem = rec.get_next_elem()
    _file.close()
    print 'count: ' + str(count)
    return count
Ejemplo n.º 14
0
def main():
    (options, args) = getopts()
    start = options.start_time
    end = options.end_time

    target_prefs = Set()
    with open('./../../atlas/anchor_prefix.txt', 'rb') as br:
        for l in br:
            target_prefs.add(l.strip())

    # Create a new bgpstream instance and a reusable bgprecord instance
    stream = BGPStream()
    rec = BGPRecord()

    with open('./data/stream_{0}'.format(start), 'wb') as bw:
        #for pref in target_prefs:
        for ptmp in target_prefs:
            stream.add_filter('prefix', ptmp)
        # stream.add_filter('prefix','0.0.0.0/0')

        # Consider RIPE RRC 10 only
        stream.add_filter('record-type', 'updates')
        stream.add_filter('collector', 'rrc00')

        # Consider this time interval:
        # Sat Aug  1 08:20:11 UTC 2015
        # stream.add_interval_filter(1438417216,1438417216)
        # stream.add_interval_filter(1451606400,1454785264
        stream.add_interval_filter(start, end)

        # Start the stream
        stream.start()

        # Get next record
        cnt = 0

        while stream.get_next_record(rec):
            # Print the record information only if it is not a valid record
            if rec.status != "valid":
                pass
                # print '*', rec.project, rec.collector, rec.type, rec.time, rec.status
            else:
                cnt += 1
                elem = rec.get_next_elem()
                while elem:
                    if elem.type == 'S':
                        continue
                    # Print record and elem information
                    # print rec.project, rec.collector, rec.type, rec.time, rec.status,
                    # print elem.type, elem.peer_address, elem.peer_asn, elem.fields, elem.pref
                    bw.write(
                        '{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\n'.
                        format(rec.project, rec.collector, rec.type, rec.time,
                               rec.status, elem.type, elem.fields['prefix'],
                               elem.peer_address, elem.peer_asn, elem.fields))
                    bw.flush()
                    elem = rec.get_next_elem()

    print 'Successful termination; Start time: {0}'.format(start)
Ejemplo n.º 15
0
def iterate_stream(stream, collector):
    rec = BGPRecord()
    while stream.get_next_record(rec):
        elem = rec.get_next_elem()
        while elem:
            for item in bgpstream_format(collector, elem):
                yield item
            elem = rec.get_next_elem()
Ejemplo n.º 16
0
def iterate_stream(stream, collector):
    rec = BGPRecord()
    while stream.get_next_record(rec):
        elem = rec.get_next_elem()
        while elem:
            for item in bgpstream_format(collector, elem):
                yield item
            elem = rec.get_next_elem()
Ejemplo n.º 17
0
def run_bgpstream(args):
    (collector, start_time, end_time, data_type) = args

    # initialize and configure BGPStream
    stream = BGPStream()
    rec = BGPRecord()
    stream.add_filter('collector', collector)
    # NB: BGPStream uses inclusive/inclusive intervals, so subtract one off the
    # end time since we are using inclusive/exclusive intervals
    stream.add_interval_filter(start_time, end_time-1)
    stream.add_filter('record-type', data_type)
    stream.start()

    # per-peer data
    peers_data = {}

    # loop over all records in the stream
    while stream.get_next_record(rec):
        elem = rec.get_next_elem()
        # loop over all elems in the record
        while elem:
            # create a peer signature for this elem
            sig = peer_signature(rec, elem)
            # if this is the first time we have ever seen this peer, create
            # an empty result: (dict(Pfx_v4)=Pfx_origins,  dict(Pfx_v6)=Pfx_origins))
            if sig not in peers_data:
                peers_data[sig] =[{},{}]


            if('prefix' in elem.fields):            
                pfx=elem.fields['prefix'];   
                origin=""
                if('as-path' in elem.fields):
                    path_split=elem.fields['as-path'].split()      
                    if(len(path_split)!=0): 
                        origin=path_split[len(path_split)-1]
    
                if(":" in pfx):
                    if(pfx not in peers_data[sig][1]):
                        peers_data[sig][1][pfx]=set()
                    #discard as origin: AS sets, and ASN=23456 [AS_TRANS]
                    if(origin!="" and origin!="23456" and "{" not in origin): peers_data[sig][1][pfx].add(origin)
                else:
                    if(pfx not in peers_data[sig][0]):
                        peers_data[sig][0][pfx]=set()
                    #discard as origin: AS sets, and ASN=23456 [AS_TRANS]
                    if(origin!="" and origin!="23456" and "{" not in origin): peers_data[sig][0][pfx].add(origin)


            elem = rec.get_next_elem()

    # the time in the output row is truncated down to a multiple of
    # RESULT_GRANULARITY so that slices can be merged correctly
    start_time = \
        int(math.floor(start_time/RESULT_GRANULARITY) * RESULT_GRANULARITY)

    # for each peer that we processed data for, create an output row
    return [((start_time, collector, p), (peers_data[p])) for p in peers_data]
Ejemplo n.º 18
0
    def path_finder(self):
        logging.info(
            f"[ZombieRecordFinder-{self.collector}] starting path_finder()")

        result_path = self.config['DEFAULT']['Result']
        dump_path = dict()

        stream = self.get_stream()
        stream.start()
        rec = BGPRecord()

        try:
            while stream and stream.get_next_record(rec):
                if rec.status != "valid":
                    continue
                if rec.type == "unknown":
                    continue

                recordTimeStamp = int(rec.time)
                while self.changing and self.changing[-1][0] < recordTimeStamp:
                    _, prefix = self.changing.pop()
                    self.watching_prefix.add(prefix)

                while self.zombies and self.zombies[-1][0] < recordTimeStamp:
                    ts, prefix = self.zombies.pop()
                    dump_path[f"{prefix}|{ts}"] = copy.deepcopy(
                        self.path[prefix])

                    if prefix in self.watching_prefix:
                        self.watching_prefix.remove(prefix)
                        f = open(
                            f"{result_path}/{self.year}-{self.month}-changing-{self.collector}.txt",
                            "a+")
                        data = ",".join(self.record[prefix])
                        f.write(f"{prefix} {ts} ? {data} \n")
                        f.close()
                        del data
                        del self.record[prefix]
                    else:
                        logging.warning(
                            f"[ZombieRecordFinder-{self.collector}] trying to remove unwatched prefix {prefix}"
                        )

                elem = rec.get_next_elem()
                while (elem):
                    self.analyze_element(elem, recordTimeStamp)
                    elem = rec.get_next_elem()

        except Exception as e:
            logging.error(
                f"[ZombieRecordFinder-{self.collector}] exit with error : {e}")

        finally:
            with open(
                    f"{result_path}/{self.year}-{self.month}-zombie-record-finder-{self.collector}.json",
                    'w') as fp:
                json.dump(dump_path, fp)
Ejemplo n.º 19
0
def main():
    (options, args) = getopts()
    start = options.start_time
    end = options.end_time

    target_prefs = Set()
    with open('./../../atlas/anchor_prefix.txt', 'rb') as br:
        for l in br:
            target_prefs.add(l.strip())
        
    # Create a new bgpstream instance and a reusable bgprecord instance
    stream = BGPStream()
    rec = BGPRecord()

    with open('./data/stream_{0}'.format(start), 'wb') as bw:
        #for pref in target_prefs:
        for ptmp in target_prefs:
            stream.add_filter('prefix', ptmp)
        # stream.add_filter('prefix','0.0.0.0/0')

        # Consider RIPE RRC 10 only
        stream.add_filter('record-type', 'updates')
        stream.add_filter('collector', 'rrc00')

        # Consider this time interval:
        # Sat Aug  1 08:20:11 UTC 2015
        # stream.add_interval_filter(1438417216,1438417216)
        # stream.add_interval_filter(1451606400,1454785264
        stream.add_interval_filter(start, end)

        # Start the stream
        stream.start()

        # Get next record
        cnt = 0

        while stream.get_next_record(rec):
            # Print the record information only if it is not a valid record
            if rec.status != "valid":
                pass
                # print '*', rec.project, rec.collector, rec.type, rec.time, rec.status
            else:
                cnt += 1
                elem = rec.get_next_elem()
                while elem:
                    if elem.type == 'S':
                        continue
                    # Print record and elem information
                    # print rec.project, rec.collector, rec.type, rec.time, rec.status,
                    # print elem.type, elem.peer_address, elem.peer_asn, elem.fields, elem.pref
                    bw.write('{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}\t{8}\t{9}\n'.format(
                        rec.project, rec.collector, rec.type, rec.time, rec.status,
                        elem.type, elem.fields['prefix'], elem.peer_address, elem.peer_asn, elem.fields))
                    bw.flush()
                    elem = rec.get_next_elem()

    print 'Successful termination; Start time: {0}'.format(start)
Ejemplo n.º 20
0
    def __init__(self, route_collector="rrc00", rpki_validator="rpki-validator.realmv6.org:8282"):
        self.rc = route_collector

        rpki = rpki_validator.split(":")
        self.mgr = RTRManager(rpki[0], rpki[1])

        # self._start_rtr_manager()

        self.stream = BGPStream()
        self.rec = BGPRecord()
Ejemplo n.º 21
0
def main():
    (options, args) = getopts()
    limit = options.limit
    start = options.start_time
    end = options.end_time

    # Create a new bgpstream instance and a reusable bgprecord instance
    stream = BGPStream()
    rec = BGPRecord()

    # Consider RIPE RRC 10 only
    stream.add_filter('record-type', 'updates')
    stream.add_filter('collector', 'rrc00')
    stream.add_filter('prefix', '0.0.0.0/0')

    # Consider this time interval:
    # Sat Aug  1 08:20:11 UTC 2015
    # stream.add_interval_filter(1438417216,1438417216)
    # stream.add_interval_filter(1451606400,1454785264)
    #stream.add_interval_filter(1454630400, 1454716800)
    # 1 hour
    #1454284800 - 1454288400

    stream.add_interval_filter(start, end)

    # Start the stream
    stream.start()

    # Get next record
    prefixes_update = defaultdict(int)
    prefixes_withdraw = defaultdict(int)

    while stream.get_next_record(rec):
        # Print the record information only if it is not a valid record
        if rec.status != "valid":
            pass
            # print '*', rec.project, rec.collector, rec.type, rec.time, rec.status
        else:
            elem = rec.get_next_elem()
            while elem:
                if elem.type == 'A':
                    #print elem.fields['as-path']
                    prefixes_update[elem.fields['prefix']] += 1

                if elem.type == 'W':
                    prefixes_withdraw[elem.fields['prefix']] += 1

                #print rec.project, rec.collector, rec.type, rec.time, rec.status,
                #print elem.type, elem.peer_address, elem.peer_asn, elem.fields
                elem = rec.get_next_elem()

    for k in prefixes_update:
        if prefixes_update[k] >= limit:
            print k + "\t" + str(prefixes_update[k]) + "\t" + str(
                prefixes_withdraw[k])
Ejemplo n.º 22
0
def main():
    (options, args) = getopts()
    limit = options.limit
    start = options.start_time
    end = options.end_time

    # Create a new bgpstream instance and a reusable bgprecord instance
    stream = BGPStream()
    rec = BGPRecord()

    # Consider RIPE RRC 10 only
    stream.add_filter('record-type', 'updates')
    stream.add_filter('collector', 'rrc00')
    stream.add_filter('prefix', '0.0.0.0/0')

    # Consider this time interval:
    # Sat Aug  1 08:20:11 UTC 2015
    # stream.add_interval_filter(1438417216,1438417216)
    # stream.add_interval_filter(1451606400,1454785264)
    #stream.add_interval_filter(1454630400, 1454716800)
    # 1 hour
    #1454284800 - 1454288400

    stream.add_interval_filter(start, end)

    # Start the stream
    stream.start()

    # Get next record
    prefixes_update = defaultdict(int)
    prefixes_withdraw = defaultdict(int)

    while stream.get_next_record(rec):
        # Print the record information only if it is not a valid record
        if rec.status != "valid":
            pass
            # print '*', rec.project, rec.collector, rec.type, rec.time, rec.status
        else:
            elem = rec.get_next_elem()
            while elem:
                if elem.type == 'A':
                    #print elem.fields['as-path']
                    prefixes_update[elem.fields['prefix']] += 1

                if elem.type == 'W':
                    prefixes_withdraw[elem.fields['prefix']] += 1

                #print rec.project, rec.collector, rec.type, rec.time, rec.status,
                #print elem.type, elem.peer_address, elem.peer_asn, elem.fields
                elem = rec.get_next_elem()

    for k in prefixes_update:
        if prefixes_update[k] >= limit:
            print k + "\t" + str(prefixes_update[k]) + "\t" + str(prefixes_withdraw[k])
Ejemplo n.º 23
0
 def run(self):
     stream = BGPStream()
     rec = BGPRecord()
     if self.upd_file is None:
         stream.add_filter('collector', self.collector)
         stream.add_filter('record-type', self.record_type)
         stream.add_interval_filter(self.from_date, self.to_date)
         stream.set_live_mode()
     else:
         stream.set_data_interface('singlefile')
         if self.upd_file:
             stream.set_data_interface_option('singlefile', 'upd-file',
                                              self.upd_file)
         if self.rib_file:
             stream.set_data_interface_option('singlefile', 'rib-file',
                                              self.rib_file)
     if self.prefix_filter is not None:
         for prefix in self.prefix_filter:
             stream.add_filter('prefix', prefix)
     if self.peer_as_filter:
         for asn in self.peer_as_filter:
             stream.add_filter('peer-asn', str(asn))
     if self.communities_filter:
         for community in self.communities_filter:
             stream.add_filter('community', community)
     stream.start()
     stream.get_next_record(rec)
     prev = rec.time
     while (stream.get_next_record(rec)):
         now = rec.time
         if rec.status == 'valid':
             elem = rec.get_next_elem()
             while (elem):
                 statement = None
                 peer_address = elem.peer_address
                 peer_asn = elem.peer_asn
                 if peer_asn in self.asn_to_nexthop:
                     if elem.type == 'A' or elem.type == 'R':
                         prefix = elem.fields['prefix']
                         as_path = elem.fields['as-path']
                         nexthop = elem.fields['next-hop']
                         if peer_asn in self.asn_to_nexthop:
                             nexthop = self.asn_to_nexthop[peer_asn]
                             statement = 'announce route %s next-hop %s as-path' \
                                 ' [ %s ]' % (prefix, nexthop, as_path)
                     elif elem.type == 'W':
                         prefix = elem.fields['prefix']
                         statement = 'withdraw route %s' % prefix
                 if statement:
                     sys.stdout.write("%s\n" % statement)
                     sys.stdout.flush()
                 elem = rec.get_next_elem()
         time.sleep(self.delay + now - prev)
         prev = now
def download_data():
    peer_state = defaultdict(dict)
    results = defaultdict(defaultdict_list)
    current_bin = 0

    # create a new bgpstream instance
    stream = BGPStream()
    # create a reusable bgprecord instance
    rec = BGPRecord()
    bgprFilter = "type updates"

    bgprFilter += " and project ris "
    for prefix in prefixes:
        bgprFilter += " and prefix more %s " % prefix

    logging.info("Connecting to BGPstream... (%s)" % bgprFilter)
    logging.info("Timestamps: %s, %s" % (startts, endts))
    stream.parse_filter_string(bgprFilter)
    stream.add_interval_filter(startts, endts)

    stream.start()
    while (stream.get_next_record(rec)):
        if rec.status != "valid":
            print(rec.project, rec.collector, rec.type, rec.time, rec.status)
            # from IPython import embed
            # embed()

        if current_bin == 0:
            current_bin = rec.time

        # slide the time window:
        if current_bin + bin_size < rec.time:
            timebins = range(current_bin, rec.time, bin_size)
            for i in timebins[:-1]:
                results["other"]["timebin"].append(i)
                for pfx, p_s in peer_state.items():
                    for peeras, state in p_s.items():
                        results[pfx][peeras].append(state)

            current_bin = timebins[-1]

        elem = rec.get_next_elem()
        while (elem):
            # peerip g= elem.peer_address
            peeras = elem.peer_asn
            prefix = elem.fields["prefix"]

            peer_state[prefix][peeras] = elem.type

            elem = rec.get_next_elem()

    return results
Ejemplo n.º 25
0
def main():
    parser = argparse.ArgumentParser()
    parser.formatter_class = argparse.RawDescriptionHelpFormatter
    parser.description = textwrap.dedent('''\
        a proof-of-concept utility for watching updates from BGPstream
        and then printing out if an unexpected update is heard
        ''')
    parser.epilog = textwrap.dedent('''\
        Example: watch these route announcements
            %(prog)s -f routes.yaml ''')
    required = parser.add_argument_group('required arguments')
    required.add_argument("-f",
                          "--file",
                          required=True,
                          help="yaml file of prefixes to origin asn")
    parser.add_argument("-d",
                        "--debug",
                        action='store_true',
                        help="print out all updates containing these prefixes")
    args = parser.parse_args()

    routes = pytricia.PyTricia(48)  # longest reasonable pfx in dfz

    with open(args.file, 'r') as f:
        routesfile = yaml.safe_load(f)
    for pfx in routesfile:
        routes[pfx] = routesfile[pfx]

    stream = BGPStream()
    rec = BGPRecord()
    stream.add_filter('record-type', 'updates')
    stream.add_interval_filter(int(time.time()), 0)
    stream.set_live_mode()
    stream.start()

    while (stream.get_next_record(rec)):
        if rec.status == 'valid':
            elem = rec.get_next_elem()
            while (elem):
                if 'as-path' in elem.fields:
                    path = elem.fields['as-path'].split()
                    prefix = elem.fields['prefix']
                    if prefix in routes and (routes[prefix] != path[-1]
                                             or args.debug):
                        print('Heard prefix:', elem.fields['prefix'],
                              'AS-PATH:', elem.fields['as-path'],
                              '  Found by project:', rec.project, 'collector:',
                              rec.collector, 'type:', rec.type, 'at time:',
                              rec.time, 'Type:', elem.type, 'Peer:',
                              elem.peer_address, 'AS', elem.peer_asn)

                elem = rec.get_next_elem()
Ejemplo n.º 26
0
def run_bgpstream(args):
    (collector, start_time, end_time, data_type) = args

    # initialize and configure BGPStream
    stream = BGPStream()
    rec = BGPRecord()
    stream.add_filter('collector', collector)
    # NB: BGPStream uses inclusive/inclusive intervals, so subtract one off the
    # end time since we are using inclusive/exclusive intervals
    stream.add_interval_filter(start_time, end_time - 1)
    stream.add_filter('record-type', data_type)
    stream.start()

    # per-peer data
    peers_data = {}

    # loop over all records in the stream
    while stream.get_next_record(rec):
        elem = rec.get_next_elem()
        # to track the peers that have elems in this record
        peer_signatures = set()
        # loop over all elems in the record
        while elem:
            # create a peer signature for this elem
            sig = peer_signature(rec, elem)
            peer_signatures.add(sig)
            # if this is the first time we have ever seen this peer, create
            # an empty result: (elem_cnt, peer_record_cnt, coll_record_cnt)
            if sig not in peers_data:
                peers_data[sig] = [0, 0, 0]
            peers_data[sig][0] += 1  # increment elem cnt for this peer
            elem = rec.get_next_elem()

        # done with elems, increment the 'coll_record_cnt' field for just
        # one peer that was present in this record (allows a true, per-collector
        # count of records since each record can contain elems for many peers)
        if len(peer_signatures):
            first = True
            for sig in peer_signatures:  # increment peer_record_cnt for all
                if first:
                    peers_data[sig][2] += 1  # increment the coll_record_cnt
                    first = False
                peers_data[sig][1] += 1

    # the time in the output row is truncated down to a multiple of
    # RESULT_GRANULARITY so that slices can be merged correctly
    start_time = \
        int(math.floor(start_time/RESULT_GRANULARITY) * RESULT_GRANULARITY)

    # for each peer that we processed data for, create an output row
    return [((start_time, collector, p), (peers_data[p])) for p in peers_data]
Ejemplo n.º 27
0
def MOAS_prefixes():
    # Create a new bgpstream instance and a reusable bgprecord instance
    stream = BGPStream()
    rec = BGPRecord()

    # Consider Route Views Singapore only
    stream.add_filter('collector', 'route-views.sg')

    # Consider RIBs dumps only
    stream.add_filter('record-type', 'ribs')

    # Consider this time interval:
    # Sat, 01 Aug 2015 7:50:00 GMT -  08:10:00 GMT
    stream.add_interval_filter(1438415400, 1438416600)

    # Start the stream
    stream.start()

    # <prefix, origin-ASns-set > dictionary
    prefix_origin = defaultdict(set)
    prefix_origin_dict = defaultdict(list)

    # Get next record
    while (stream.get_next_record(rec)):
        elem = rec.get_next_elem()
        while (elem):
            # Get the prefix
            pfx = elem.fields['prefix']
            # Get the list of ASes in the AS path
            ases = elem.fields['as-path'].split(" ")
            if len(ases) > 0:
                # Get the origin ASn (rightmost)
                origin = ases[-1]
                if 262857 == origin:  #AS262857 - UFRN: AS262857 	177.20.128.0/19 	UNIVERSIDADE FEDERAL DO RIO GRANDE DO NORTE
                    '''
                    BI 	177.20.128.0/19 	198.32.125.84 	280 	100 	0 	1916, 262857 	IGP
                    E 	177.20.128.0/19 	213.248.67.117 	0 	70 	0 	1299, 2914, 1916, 262857 	IGP
                    E 	177.20.128.0/19 	213.248.98.93 	0 	70 	0 	1299, 2914, 1916, 262857 	IGP
                    '''
                    print "Achou UFRN"
                # Insert the origin ASn in the set of
                # origins for the prefix
                prefix_origin[pfx].add(origin)
                prefix_origin_dict[pfx].append(ases)

            elem = rec.get_next_elem()

    # Print the list of MOAS prefix and their origin ASns
    for pfx in prefix_origin:
        if len(prefix_origin[pfx]) > 1:
            pass  #print pfx, ",".join(prefix_origin[pfx])
Ejemplo n.º 28
0
def pushRIBData(AF, collector, startts, endts):

    stream = getBGPStream("ribs", AF, [collector], startts, endts)
    topicName = "ihr_bgp_" + collector + "_rib"
    admin_client = KafkaAdminClient(
        bootstrap_servers=['kafka1:9092', 'kafka2:9092', 'kafka3:9092'],
        client_id='bgp_producer_admin')

    try:
        topic_list = [
            NewTopic(name=topicName, num_partitions=1, replication_factor=1)
        ]
        admin_client.create_topics(new_topics=topic_list, validate_only=False)
    except:
        pass
    admin_client.close()

    stream.start()

    producer = KafkaProducer(
        bootstrap_servers=['kafka1:9092', 'kafka2:9092', 'kafka3:9092'],
        # acks=0,
        value_serializer=lambda v: msgpack.packb(v, use_bin_type=True),
        linger_ms=1000,
        request_timeout_ms=300000,
        compression_type='snappy')

    rec = BGPRecord()

    while stream and stream.get_next_record(rec):
        completeRecord = {}
        completeRecord["rec"] = getRecordDict(rec)
        completeRecord["elements"] = []

        recordTimeStamp = rec.time

        recordTimeStamp = int(recordTimeStamp) * 1000

        elem = rec.get_next_elem()

        while (elem):
            elementDict = getElementDict(elem)
            completeRecord["elements"].append(elementDict)
            elem = rec.get_next_elem()

        producer.send(topicName, completeRecord, timestamp_ms=recordTimeStamp)

    producer.close()
Ejemplo n.º 29
0
def getting_BGP_update():
    stream = BGPStream()
    rec = BGPRecord()
    stream.add_filter('collector', 'rrc11')
    stream.add_interval_filter(1438417216, 1438417216)
    # Start the stream
    stream.start()
    while (stream.get_next_record(rec)):
        if rec.status != "valid":
            print rec.project, rec.collector, rec.type, rec.time, rec.status
        else:
            elem = rec.get_next_elem()
            while (elem):
                print rec.project, rec.collector, rec.type, rec.time, rec.status,
                print elem.type, elem.peer_address, elem.peer_asn, elem.fields
                elem = rec.get_next_elem()
Ejemplo n.º 30
0
def generate_stream():
    bs = BGPStream()
    rec = BGPRecord()
    #initialize MySql
    a = MySqlDAL()
    a.start()

    #initialize the publisher in port number 12345
    publisher = ZmqPublisher(12345)

    bs.add_interval_filter(calendar.timegm(time.gmtime()) - delay, 0)
    # bs.add_filter('collector', 'route-views.sg')
    bs.add_filter('record-type', 'updates')
    bs.start()

    print('Beginning to read from stream')
    input_id = 0
    while bs.get_next_record(rec):
        elem = rec.get_next_elem()
        while elem is not None:
            # sleep until it is time to send this record
            '''
            now = calendar.timegm(time.gmtime())
            sim_time = now - delay
            if elem.time > sim_time:
                time.sleep(elem.time - sim_time)
            '''
            if elem.type not in ['A', 'W']:
                continue

            input_id += 1
            msg = elem2bgplay(rec, elem)
            msg['type'] = 'A'
            msg['id'] = input_id
            print(msg)

            # Publish the message
            publisher.publish(msg)

            # Write it to DB
            if elem.type == 'A':
                a.add(msg)
            elif elem.type == 'W':
                a.remove(msg)
            else:
                print "Error: Unknown type: " + elem.type
            elem = rec.get_next_elem()
Ejemplo n.º 31
0
def generate_stream():
    bs = BGPStream()
    rec = BGPRecord()
    #initialize MySql
    db_writer = MysqlWriter()
    db_writer.start()

    #initialize the publisher in port number 12345
    publisher = ZmqPublisher(12345)

    bs.add_interval_filter(calendar.timegm(time.gmtime()) - delay, 0)
    # bs.add_filter('collector', 'route-views.sg')
    bs.add_filter('record-type', 'updates')
    bs.start()

    print('Beginning to read from stream')
    input_id = 0
    while bs.get_next_record(rec):
        elem = rec.get_next_elem()
        while elem is not None:
            # sleep until it is time to send this record
            '''
            now = calendar.timegm(time.gmtime())
            sim_time = now - delay
            if elem.time > sim_time:
                time.sleep(elem.time - sim_time)
            '''
            if elem.type not in ['A', 'W']:
                continue

            input_id += 1
            msg = elem2bgplay(rec, elem)
            msg['type'] = 'A'
            msg['id'] = input_id
            print(msg)

            # Publish the message
            publisher.publish(msg)

            # Write it to DB
            if elem.type == 'A':
                db_writer.add(msg)
            elif elem.type == 'W':
                db_writer.remove(msg)
            else:
                print "Error: Unknown type: " + elem.type
            elem = rec.get_next_elem()
Ejemplo n.º 32
0
def get_ribs(collector, start, period):
    stream = BGPStream()
    rec = BGPRecord()
    stream.add_filter('collector', collector)
    stream.add_interval_filter(
        int(start.timestamp()) - period, int(start.timestamp()))
    stream.add_filter('record-type', 'ribs')
    stream.start()
    while (stream.get_next_record(rec)):
        if rec.status == "valid":
            elem = rec.get_next_elem()
            while (elem):
                yield (rec.project, rec.collector,
                       datetime.utcfromtimestamp(rec.time).isoformat(),
                       elem.peer_address, elem.peer_asn, elem.fields['prefix'],
                       elem.fields['as-path'])
                elem = rec.get_next_elem()
def initUpdates(seedingCheckOverride, lastBGPUpdateProcessed): # Consider passing in last BGP update processed because of new system.
	global cursor, startTime, endTime, seeded, stream, rec
	startTime = lastBGPUpdateProcessed
	#cursor.execute("""SELECT intVal AS lastBGPUpdateProcessed FROM metadata WHERE name = 'lastBGPUpdateProcessed'""")
	#result = cursor.fetchone()
	#if result == None:
	#	print "ERROR: NO Start Time Found in DB (aka lastBGPUpdateProcessed). Recommended snd time is 1489224750 - int(2592000 * 4) (which is 1478856750) because this is a 4 month backlog."
	#	exit()
		#cursor.execute("""INSERT INTO metadata (name, intVal) VALUES ('lastBGPUpdateProcessed', {0})""".format(1489224750 - int(2592000 * 4)))
		#conn.commit()
	#else:
	#	(startTime,) = result
	
	
	cursor.execute("""SELECT intVal AS endTime FROM metadata WHERE name = 'endTime'""")
	result = cursor.fetchone()
	if result == None:
		print "ERROR: NO End Time Found in DB. Recommended end time is 1489224749 because this is the timestamp of the first cert."
		print "ERROR: Other recommended end time is 1491775893 which is when all certs have been covered."
		exit()
	else:
		(endTime,) = result
	
	if not seedingCheckOverride:
		cursor.execute("""SELECT stringVal AS seeded FROM metadata WHERE name = 'seeded'""")
		result = cursor.fetchone()
		if result == None:
			# We will assume that the DB is not seeded if there is no entry and not throw any errors in this case.
			seeded = False
			print "line 59 DB not seeded upon call to initUpdates. DB should be seeded with RIBs before updates can be processed. Call initSeeding first. Exiting."
			exit()
		else:
			strValSeeded = ""
			(strValSeeded,) = result
			if strValSeeded == "false":
				seeded = False
				print "line 66 DB not seeded upon call to initUpdates. DB should be seeded with RIBs before updates can be processed. Call initSeeding first. Exiting."
				exit()
			elif strValSeeded == "true":
				seeded = True
			else:
				print "Invalid value for seeded metadata entry. Exiting."
				exit()
	else:
		seeded = True

	# create a new bgpstream instance
	stream = BGPStream()
	# create a reusable bgprecord instance
	rec = BGPRecord()
	stream.add_filter('record-type','updates') # here we collect updates. This could be changed to ribs to instead acquire periodic snapshots of the RIBs.
	# configure the stream to retrieve Updates from the route-views listener.
	stream.add_filter('collector', 'route-views2')
	# getting updates only from one peer gives us only the perferred route of this peer and no rejected routes.
	# only get updates from level3. Level3 is a provider to ViaWest so their choice is a likely choice of ViaWest.
	stream.add_filter('peer-asn', '3356')
	stream.add_interval_filter(startTime, endTime)
	stream.start()
Ejemplo n.º 34
0
def recv_bgpstream_updates(begin, until, collector, output_queue):
    logging.info ("CALL recv_bgpstream_updates")
    # wait for first RIB table dump to complete
    while (rib_ts < 0):
        time.sleep(RIB_TS_WAIT/10)
    time.sleep(RIB_TS_WAIT)
    # Create bgpstream
    stream = BGPStream()
    rec = BGPRecord()
    # set filtering
    stream.add_filter('collector', collector)
    stream.add_filter('record-type','updates')
    stream.add_interval_filter(rib_ts,until)
    # Start the stream
    stream.start()
    while (stream.get_next_record(rec)):
        if rec.status == 'valid':
            elem = rec.get_next_elem()
        else:
            logging.warn("stream record invalid, skipping ...")
            continue
        logging.info("Record TS: "+str(rec.time))
        while (elem):
            logging.info(" -- Record Element Type: " + elem.type + ", TS: " + str(elem.time))
            bgp_message = BGPmessage(elem.time, 'update')
            src_peer = dict()
            src_addr = elem.peer_address
            src_asn = elem.peer_asn
            src_peer['addr'] = src_addr
            src_peer['port'] = 0
            src_peer['asn'] = src_asn
            bgp_message.set_source(src_peer)
            if elem.type.upper() == 'A':
                bgp_message.add_announce(elem.fields['prefix'])
                bgp_message.set_nexthop(elem.fields['next-hop'])
                aspath = elem.fields['as-path'].split()
                for a in aspath:
                    if not '{' in a: # ignore AS-SETs
                        bgp_message.add_as_to_path(a)
                output_queue.put(bgp_message)
            elif elem.type.upper() == 'W':
                bgp_message.add_withdraw(elem.fields['prefix'])
                output_queue.put(bgp_message)
            elem = rec.get_next_elem()
Ejemplo n.º 35
0
def main(rib, target_prefs):

        
    # Create a new bgpstream instance and a reusable bgprecord instance
    stream = BGPStream()
    rec = BGPRecord()

    with open('./data/stream_{0}'.format(start), 'wb') as bw:

        for ptmp in target_prefs:
            stream.add_filter('prefix', ptmp)

        # Consider RIPE RRC 10 only
        stream.add_filter('record-type', 'updates')
        stream.add_filter('record-type', 'ribs')
        #stream.add_filter('collector', 'rrc04')
        stream.add_filter('project', 'ris')
        stream.add_filter('project', 'routeviews')

        stream.add_interval_filter(start-60*60*8, start)
        stream.add_rib_period_filter(10000000000000)        

        # Start the stream
        stream.start()

        while stream.get_next_record(rec):
            # Print the record information only if it is not a valid record
            if rec.status != "valid":
                continue

            #if rec.time < start:
            elem = rec.get_next_elem()
            while elem:

                if elem.type == 'A' or elem.type == 'R':
                    rib.add_to_rib(rec.collector, elem.peer_address, elem.fields['prefix'], elem.time, elem.fields['as-path'])

                elem = rec.get_next_elem()

            #else:

        rib.flush()

    print 'Successful termination; Start time: {0}'.format(start)
Ejemplo n.º 36
0
def recv_bgpstream_rib(begin, until, collector, output_queue):
    logging.info ("CALL recv_bgpstream_rib")
    # Create bgpstream
    stream = BGPStream()
    rec = BGPRecord()
    # set filtering
    stream.add_filter('collector', collector)
    stream.add_filter('record-type','updates')
    stream.add_interval_filter(begin,until)

    # Start the stream
    stream.start()
    while (stream.get_next_record(rec)):
        global rib_ts
        if rec.status == 'valid':
            elem = rec.get_next_elem()
        else:
            logging.warn("stream record invalid, skipping.")
            continue
        if (rib_ts > 0) and (rec.time > (rib_ts + RIB_TS_INTERVAL/2)):
            logging.info("received full RIB table dump.")
            break
        bgp_message = None
        while (elem):
            if (elem.type.upper() == 'A') or (elem.type.upper() == 'R'):
                rib_ts = elem.time
                bgp_message = BGPmessage(elem.time, 'update')
                bgp_message.set_nexthop(elem.fields['next-hop'])
                src_peer = dict()
                src_addr = elem.peer_address
                src_asn = elem.peer_asn
                src_peer['addr'] = src_addr
                src_peer['port'] = 0
                src_peer['asn'] = src_asn
                bgp_message.set_source(src_peer)
                aspath = elem.fields['as-path'].split()
                for a in aspath:
                    if not '{' in a: # ignore AS-SETs
                        bgp_message.add_as_to_path(a)
                bgp_message.add_announce(elem.fields['prefix'])
                output_queue.put(bgp_message)
            elem = rec.get_next_elem()
Ejemplo n.º 37
0
def init_stream(config_files, start_time, end_time):
    stream = BGPStream()
    rec = BGPRecord()
    stream.add_filter('project', 'ris')
    stream.add_filter('project', 'routeviews')
    stream.add_filter('record-type', 'ribs')
    for exp_id in config_files:
        config_file = config_files[exp_id]
        stream.add_filter('prefix', config_file['superprefix'])
    stream.add_interval_filter(start_time, end_time)
    return stream, rec
Ejemplo n.º 38
0
def count_valid_withdrawals_per_second(t0,num_seconds):
    # div0 in progress tracking if <100 seconds
    assert num_seconds>100;

    # Create a new bgpstream instance
    stream = BGPStream();
    # use Routeviews Saopaulo records
    stream.add_filter('project','routeviews');
    stream.add_filter('collector','route-views.saopaulo');
    stream.add_filter('record-type','updates');
    # time interval
    stream.add_interval_filter(t0,t0+tt-1);

    # progress tracking
    prog = t0;    

    # Create a reusable bgprecord instance
    rec = BGPRecord();
    # list of zeroes
    ctr = [0]*tt;
    # Start the stream
    stream.start();

    # iterate over records
    while(stream.get_next_record(rec)):
        if rec.status == "valid":
            elem = rec.get_next_elem();
            while(elem):
                if elem.type == 'W':
                    ctr[rec.time-t0] += 1;
                elem = rec.get_next_elem();
        # el-cheapo progress indication with dots. comment out if you don't want the dots.
        # """
        if rec.time > prog and (rec.time - t0) % floor(num_seconds/100) == 0:
            sys.stdout.write('.');
            sys.stdout.flush();
            prog = rec.time;
        # """
    # print newline after the dots
    print "";
    return ctr;
Ejemplo n.º 39
0
def stream_ribfirst(stream, filter_peer):
    rec = BGPRecord()

    rib_started = False

    while (stream.get_next_record(rec)):
        if rec.status != "valid":
            print rec.project, rec.collector, rec.type, rec.time, rec.status
        else:
            if rec.type == 'rib':
                rib_started = True

            if rib_started:
                elem = rec.get_next_elem()
                while (elem):
                    if elem.peer_address in filter_peer or len(
                            filter_peer) == 0:
                        if elem.type == 'W':
                            bgp_message = 'BGPSTREAM|'+str(rec.collector) \
                            +'|'+str(elem.type)+'|'+str(elem.peer_address) \
                            +'|'+str(elem.peer_asn)+'|'+str(elem.time)+'|'+str(elem.fields['prefix'])
                        elif 'as-path' in elem.fields:
                            as_path = elem.fields['as-path'].split(
                                '{')[0].split(',')[0].rstrip(' ')
                            bgp_message = 'BGPSTREAM|'+str(rec.collector) \
                            +'|'+str(elem.type)+'|'+str(elem.peer_address) \
                            +'|'+str(elem.peer_asn)+'|'+str(elem.time)+'|'+str(elem.fields['prefix']) \
                            +'|'+as_path
                        else:
                            bgp_message = None

                        #print 'YES'+'\t'+str(rec.project)+'\t'+str(rec.collector)+'\t'+str(rec.time)+'\t'+str(rec.type)+'\t'+str(rec.status)

                        if rec.collector not in peer_set:
                            peer_set[rec.collector] = set()
                        peer_set[rec.collector].add(elem.peer_address)

                        if bgp_message is not None:
                            yield bgp_message

                    elem = rec.get_next_elem()
Ejemplo n.º 40
0
    def start_collecting(self, start_timestamp, end_timestamp=0):
        self.stream.add_interval_filter(start_timestamp, end_timestamp)
        print("Start BGPStream:", start_timestamp, end_timestamp)
        self.stream.start()
        rec = BGPRecord()
        act_dump = "unknown"
        while (self.stream.get_next_record(rec)):
            self.i += 1
            if self.i % 10000 == 0:
                print(self.i)
            if rec.status == "valid":
                if (act_dump != rec.dump_position):
                    act_dump = rec.dump_position
                    print('Dump Position:', rec.dump_position)
                elem = rec.get_next_elem()
                while (elem):

                    self.counter.update(elem.type)

                    elem = rec.get_next_elem()

        print(self.counter)
def get_data(timestamp):
    graph = Graph(password="******")

    stream = BGPStream()
    rec = BGPRecord()
    rec_time = None

    # IPv4
    stream.add_filter('prefix', '198.41.0.0/24')  # A-root
    stream.add_filter('prefix', '192.33.4.0/24')  # C-root
    stream.add_filter('prefix', '199.7.91.0/24')  # D-root
    stream.add_filter('prefix', '192.203.230.0/24')  # E-root, IPv4 only
    stream.add_filter('prefix', '192.5.5.0/24')  # F-root
    stream.add_filter('prefix', '192.112.36.0/24')  # G-root, IPv4 only
    stream.add_filter('prefix', '198.97.190.0/24')  # H-root
    stream.add_filter('prefix', '192.36.148.0/24')  # I-root
    stream.add_filter('prefix', '192.58.128.0/24')  # J-root
    stream.add_filter('prefix', '193.0.14.0/24')  # K-root
    stream.add_filter('prefix', '199.7.83.0/24')  # L-root
    stream.add_filter('prefix', '202.12.27.0/24')  # M-root

    # IPv6
    stream.add_filter('prefix', '2001:503:ba3e::/48')  # A
    stream.add_filter('prefix', '2001:500:2::/48')  # C
    stream.add_filter('prefix', '2001:500:2d::/48')  # D
    stream.add_filter('prefix', '2001:500:2f::/48')  # F
    stream.add_filter('prefix', '2001:500:1::/48')  # H
    stream.add_filter('prefix', '2001:7fe::/33')  # I
    stream.add_filter('prefix', '2001:503:c27::/48')  # J
    stream.add_filter('prefix', '2001:7fd::/48')  # K
    stream.add_filter('prefix', '2001:500:9f::/48')  # L
    stream.add_filter('prefix', '2001:dc3::/32')  # M

    stream.add_filter('record-type', 'ribs')
    # stream.add_filter('collector', 'route-views.soxrs')
    stream.add_filter('project', 'routeviews')
    stream.add_interval_filter(timestamp, timestamp)

    stream.start()

    result = {}
    while stream.get_next_record(rec):
        rec_time = rec.time
        if rec.status == "valid":
            elem = rec.get_next_elem()
            while elem:
                print('{} {} {} {} {}'.format(rec.collector, elem.type, elem.peer_address, elem.peer_asn, elem.fields))
                as_path = elem.fields['as-path'].split()
                as_path.reverse()
                prefix = elem.fields['prefix']
                if prefix not in result:
                    result[prefix] = []
                result[prefix].append(as_path)
                elem = rec.get_next_elem()

    # get only unique lists in result
    for prefix in result:
        result[prefix] = [list(x) for x in set(tuple(x) for x in result[prefix])]
    print('timestamp {} ==> result: {}'.format(rec_time, result))

    for prefix in result:
        for path in result[prefix]:
            print('path: {}'.format(path))
            cur_node = None
            prev_node = None
            counter_as_prepend = 0
            for index, asn in enumerate(path):
                cur_node = asn
                graph.run('MERGE(s:asn{{name:"{0}", label:"{0}"}})'.format(asn))  # create new node if not exist.
                if index > 0:
                    if cur_node != prev_node:
                        query = 'MATCH (s:asn),(d:asn) ' \
                                'WHERE s.name="{0}" AND d.name="{1}" ' \
                                'MERGE (s)-[r:TO {{prefix: "{3}", time: {2}, prepended: {4}}}]->(d)'\
                            .format(cur_node, prev_node, rec_time, prefix, counter_as_prepend)
                        graph.run(query)
                        if counter_as_prepend > 0:
                            counter_as_prepend = 0  # reset
                    else:  # AS prepending
                        counter_as_prepend += 1
                # else:  # origin AS
                #     graph.run('MATCH (s:asn)'
                #               'WHERE s.name="{0}"'
                #               'SET s.origin="{1}_{2}"'
                #               'RETURN s'.format(asn, prefix, rec_time))
                prev_node = cur_node
def get_data(timestamp):
    graph = Graph(password="******")

    stream = BGPStream()
    rec = BGPRecord()
    rec_time = None

    # stream.add_filter('prefix', '198.41.0.0/24')  # A-root
    # stream.add_filter('prefix', '192.228.79.0/24')  # B-root, only 1 site
    # stream.add_filter('prefix', '192.33.4.0/24')  # C-root
    # stream.add_filter('prefix', '199.7.91.0/24')  # D-root
    # stream.add_filter('prefix', '192.203.230.0/24')  # E-root, IPv4 only
    # stream.add_filter('prefix', '192.5.5.0/24')  # F-root
    # stream.add_filter('prefix', '192.112.36.0/24')  # G-root, IPv4 only
    # stream.add_filter('prefix', '198.97.190.0/24')  # H-root
    # stream.add_filter('prefix', '192.36.148.0/24')  # I-root
    # stream.add_filter('prefix', '192.58.128.0/24')  # J-root
    stream.add_filter('prefix', '193.0.14.0/24')  # K-root
    # stream.add_filter('prefix', '199.7.83.0/24')  # L-root
    # stream.add_filter('prefix', '202.12.27.0/24')  # M-root

    # IPv6
    # stream.add_filter('prefix', '2001:503:ba3e::/48')  # A
    ## stream.add_filter('prefix', '2001:500:84::/48')  # B, only 1 site
    # stream.add_filter('prefix', '2001:500:2::/48')  # C
    # stream.add_filter('prefix', '2001:500:2d::/48')  # D
    # stream.add_filter('prefix', '2001:500:2f::/48')  # F
    # stream.add_filter('prefix', '2001:500:1::/48')  # H
    # stream.add_filter('prefix', '2001:7fe::/33')  # I
    # stream.add_filter('prefix', '2001:503:c27::/48')  # J
    # stream.add_filter('prefix', '2001:7fd::/48')  # K
    # stream.add_filter('prefix', '2001:500:9f::/48')  # L
    # stream.add_filter('prefix', '2001:dc3::/32')  # M

    stream.add_filter('record-type', 'ribs')
    # stream.add_filter('collector', 'rrc01')
    stream.add_filter('project', 'routeviews')
    stream.add_interval_filter(timestamp, timestamp)

    stream.start()

    result = {}
    while stream.get_next_record(rec):
        rec_time = rec.time
        if rec.status == "valid":
            elem = rec.get_next_elem()
            while elem:
                print rec.collector, elem.type, elem.peer_address, elem.peer_asn, elem.fields
                as_path = elem.fields['as-path'].split()
                as_path.reverse()
                prefix = elem.fields['prefix']
                if prefix not in result:
                    result[prefix] = []
                result[prefix].append(as_path)
                elem = rec.get_next_elem()

    # get only unique lists in result
    for prefix in result:
        result[prefix] = [list(x) for x in set(tuple(x) for x in result[prefix])]
    print('timestamp {} ==> result: {}'.format(rec_time, result))

    for prefix in result:
        for path in result[prefix]:
            print('path: {}'.format(path))
            cur_node = None
            prev_node = None
            counter_as_prepend = 0
            for index, asn in enumerate(path):
                searched_node = graph.find('asn', property_key='label', property_value=asn)
                try:
                    cur_node = searched_node.next()  # see if the AS node is already in the db or not. If yes, cur_node == prev_node
                except StopIteration:
                    cur_node = Node('asn', label=str(asn))  # if not exists, then create a new one
                if index > 0:
                    if index == len(path) - 1:
                        cur_node['path'] = path  # attach AS path to the last ASN
                    if cur_node != prev_node:
                        if counter_as_prepend > 0:
                            cur_node['prepended'] = counter_as_prepend
                            counter_as_prepend = 0  # reset
                        peering = Relationship(cur_node, 'TO', prev_node, time=rec_time, prefix=prefix)
                        # peering['time'] = rec_time
                        # peering['prefix'] = prefix
                        graph.create(peering)
                    else:  # AS prepending
                        counter_as_prepend += 1
                prev_node = cur_node
Ejemplo n.º 43
0
def main():
    parser = argparse.ArgumentParser(description='', epilog='')
    parser.add_argument('-b', '--begin',
                        help='Begin date (inclusive), format: yyyy-mm-dd HH:MM',
                        type=valid_date, required=True)
    parser.add_argument('-u', '--until',
                        help='Until date (exclusive), format: yyyy-mm-dd HH:MM',
                        type=valid_date, required=True)
    parser.add_argument('-c', '--collector',
                        help='Route collector from RIPE RIS or Route-Views project.',
                        type=str, required=True)
    parser.add_argument('-m', '--mongodb',
                        help='MongoDB connection parameters.',
                        type=str, default=None)
    parser.add_argument('-l', '--loglevel',
                        help='Set loglevel [DEBUG,INFO,WARNING,ERROR,CRITICAL].',
                        type=str, default='WARNING')

    args = vars(parser.parse_args())

    numeric_level = getattr(logging, args['loglevel'].upper(), None)
    if not isinstance(numeric_level, int):
        raise ValueError('Invalid log level: %s' % loglevel)
    logging.basicConfig(level=numeric_level,
                        format='%(asctime)s : %(levelname)s : %(message)s')

    ts_begin = int((args['begin'] - datetime(1970, 1, 1)).total_seconds())
    ts_until = int((args['until'] - datetime(1970, 1, 1)).total_seconds())

    mongodbstr = None
    if args['mongodb']:
        mongodbstr = args['mongodb'].strip()
    # BEGIN
    logging.info("START")

    # Create bgpstream
    stream = BGPStream()
    rec = BGPRecord()
    # set filtering
    stream.add_filter('collector',args['collector'])
    stream.add_filter('record-type','ribs')
    stream.add_interval_filter(ts_begin,ts_until)

    # Start the stream
    stream.start()

    rib_ts = 0
    rib_origins = dict()
    while(stream.get_next_record(rec)):
        if rec.status == 'valid':
            elem = rec.get_next_elem()
        else:
            logging.warn("stream record invalid, skipping ...")
            continue
        if rec.time > (rib_ts + RIB_TS_INTERVAL):
            rib_ts = rec.time
            if mongodbstr:
                store_rib_origins(rib_ts, rib_origins, mongodbstr)
            else:
                print_rib_origins(rib_ts, rib_origins)
            rib_origins = dict()
        while(elem):
            prefix = elem.fields['prefix']
            aspath = elem.fields['as-path'].split()
            for a in aspath: # remove AS-SETs
                if '{' in a:
                    aspath.remove(a)
            origin = aspath[-1]
            if prefix not in rib_origins:
                rib_origins[prefix] = list()
            if origin not in rib_origins[prefix]:
                rib_origins[prefix].append(origin)
            elem = rec.get_next_elem()
import time
from _pybgpstream import BGPStream, BGPRecord, BGPElem

start_time = time.time()

stream = BGPStream()
rec = BGPRecord()

stream.add_filter('prefix','140.78.0.0/16')
stream.add_filter('record-type','ribs')
stream.add_filter('collector','rrc11')
# stream.add_filter('project','ris')
stream.add_interval_filter(1464681000, 1464682200)  # 1464682200 ==> 05/31/2016 @ 8:10am (UTC)

stream.start()
#
# while(stream.get_next_record(rec)):
#     # Print the record information only if it is not a valid record
#     if rec.status == "valid":
#         elem = rec.get_next_elem()
#         while elem:
#             # Print record and elem information
#             print rec.collector, elem.peer_address, elem.peer_asn, elem.fields['as-path']
#             elem = rec.get_next_elem()

result = []
while stream.get_next_record(rec):
    if rec.status == "valid":
        elem = rec.get_next_elem()
        while elem:
            print rec.collector, elem.type, elem.peer_address, elem.peer_asn, elem.fields
Ejemplo n.º 45
0
def load_data(start, stop, collectors, window, threshold):
    peers = {}

    # collectors is a list of the collectors we want to include
    # Start and stop define the interval we are looking in the data

    # Create a new BGPStream instance and a reusable BGPRecord instance
    stream = BGPStream()
    rec = BGPRecord()

    # Add filter for each collector.
    # If no collector is mentioned, it will consider 16 of them
    if collectors:
        for collector in collectors:
            print collector
            stream.add_filter('collector', collector)
    else:
        for i in range(0, 10):
            stream.add_filter('collector', 'rrc0' + str(i))
        for i in range(10, 16):
            stream.add_filter('collector', 'rrc' + str(i))

    stream.add_filter('record-type', 'updates')

    # Consider the interval from "start" to "stop" in seconds since epoch
    stream.add_interval_filter(start, stop)

    # Start the stream
    stream.start()

    # For each record (one record = one second, can have multiple elements for the same second) we handle its updates
    while stream.get_next_record(rec):
        timestamp = rec.time
        if rec.status != "valid":
            print rec.project, rec.collector, rec.type, timestamp, rec.status
        else:
            # Go through all elements of the record
            elem = rec.get_next_elem()
            while elem:
                # Consider only the A and W updates
                if elem.type not in ['A', 'W']:
                    elem = rec.get_next_elem()
                    continue

                peer = elem.peer_address
                updatetype = elem.type
                prefix = elem.fields['prefix']
                if peer not in peers:
                    peers[peer] = {
                        'A': [],
                        'W': []
                    }
                update = {'tst': timestamp, 'prefix': prefix}
                if updatetype == 'A':
                    handleUpdate(peers[peer]['A'], burst2writeA, update, peer, updatetype, timestamp, window, threshold)
                    saveGraphPoint(peers[peer]['A'], updatetype, peer, timestamp, collectors, threshold)
                else:
                    handleUpdate(peers[peer]['W'], burst2writeW, update, peer, updatetype, timestamp, window, threshold)
                    saveGraphPoint(peers[peer]['W'], updatetype, peer, timestamp, collectors, threshold)
                elem = rec.get_next_elem()

    # After processing all records, we write the graph json files with the graph points recorded for each peer
    for peer in graph_points:
        peer_file_name = peer.replace(':', '_')
        if not os.path.exists(peer_file_name):
            os.makedirs(peer_file_name)
        with open(peer_file_name+'/'+peer_file_name + '-graph.json', 'w') as outfile:
            json.dump(graph_points[peer], outfile, indent=2)

    # Write the last burst of A updates if there is one left
    if burst2writeA:
        for peer in burst2writeA:
            if burst2writeA[peer]:
                for timestamp in burst2writeA[peer]:
                    writeBurst(peer, burst2writeA, 'A', timestamp)

    # Write the last burst of W updates if there is one left
    if burst2writeW:
        for peer in burst2writeW:
            if burst2writeW[peer]:
                for timestamp in burst2writeW[peer]:
                    writeBurst(peer, burst2writeW, 'W', timestamp)

    # transform csv names in json file to use getJSON in plotGrap
    # step to CSV is used to avoid appending to the end of a json file directly as appending
    # to a json file overwrite the whole file
    jsonlist = []
    with open('csv_peernames-'+'-'.join(collectors)+'.csv', 'rb') as f:
        reader = csv.reader(f)
        for row in reader:
            jsonlist.append(row[0])

    jsondata = json.dumps(jsonlist, indent=2)
    fd = open('json_file_names-' + '-'.join(collectors) + '.json', 'w')
    fd.write(jsondata)
    fd.close()
Ejemplo n.º 46
0
def main():
    parser = argparse.ArgumentParser(description='', epilog='')
    parser.add_argument('-b', '--begin',
                        help='Begin date (inclusive), format: yyyy-mm-dd HH:MM',
                        type=valid_date, required=True)
    parser.add_argument('-u', '--until',
                        help='Until date (exclusive), format: yyyy-mm-dd HH:MM',
                        type=valid_date, required=True)
    parser.add_argument('-c', '--collector',
                        help='Route collector from RIPE RIS or Route-Views project.',
                        type=str, required=True)
    parser.add_argument('-m', '--mongodb',
                        help='MongoDB connection parameters.',
                        type=str, default=None)
    parser.add_argument('-k', '--keepsnapshots',
                        help='Keep all snapshots, works only with -s.',
                        action='store_true')
    parser.add_argument('-s', '--snapshot',
                        help='Enable snapshoting.',
                        action='store_true')
    parser.add_argument('-l', '--loglevel',
                        help='Set loglevel [DEBUG,INFO,WARNING,ERROR,CRITICAL].',
                        type=str, default='WARNING')

    args = vars(parser.parse_args())

    numeric_level = getattr(logging, args['loglevel'].upper(), None)
    if not isinstance(numeric_level, int):
        raise ValueError('Invalid log level: %s' % loglevel)
    logging.basicConfig(level=numeric_level,
                        format='#> %(asctime)s : %(levelname)s : %(message)s')

    ts_begin = int((args['begin'] - datetime(1970, 1, 1)).total_seconds())
    ts_until = int((args['until'] - datetime(1970, 1, 1)).total_seconds())

    mongodbstr = None
    if args['mongodb']:
        mongodbstr = args['mongodb'].strip()

    rib_ts = 0
    rib_origins = dict()
    origins_lt = list()
    if args['snapshot']:
        rib_ts, rib_origins = load_snapshot(mongodbstr)
    if rib_ts > ts_begin:
        logging.info ("SKIP, found snapshot with newer ts")
        ts_begin = rib_ts - RIB_TS_THRESHOLD

    # BEGIN
    logging.info("START")

    # Create bgpstream
    stream = BGPStream()
    rec = BGPRecord()
    # set filtering
    stream.add_filter('collector',args['collector'])
    stream.add_filter('record-type','ribs')
    stream.add_interval_filter(ts_begin,ts_until)

    # Start the stream
    stream.start()

    while(stream.get_next_record(rec)):
        if rec.status == 'valid':
            elem = rec.get_next_elem()
        else:
            logging.warn("stream record invalid, skipping ...")
            continue
        #end if
        if rec.time > (rib_ts + RIB_TS_THRESHOLD):
            for p in rib_origins:
                for o in rib_origins[p]:
                    if rib_origins[p][o][1] < (rib_ts - RIB_TS_THRESHOLD):
                        origins_lt.append( (p,o,rib_origins[p][o][0],rib_origins[p][o][1]) )
                    #end if
                #end for
            #end for
            if args['snapshot'] and (len(rib_origins.keys()) > 0):
                store_snapshot(rec.time, rib_origins, mongodbstr)
                if not args['keepsnapshots']:
                    remove_snapshot(rib_ts, mongodbstr)
                # end if keepsnapshots
            # end if snapshot
            rib_ts = rec.time
            logging.info("ts: "+str(rib_ts))
            if len(origins_lt) > 0:
                if mongodbstr:
                    store_origins_lt(rib_ts,origins_lt, mongodbstr)
                else:
                    print_origins_lt(rib_ts,origins_lt)
                #end if
                for l in origins_lt:
                    del rib_origins[l[0]][l[1]]
                #end for
                origins_lt = list()
            # end if
        #end if
        while(elem):
            prefix = elem.fields['prefix']
            aspath = elem.fields['as-path'].split()
            for a in aspath: # remove AS-SETs
                if '{' in a:
                    aspath.remove(a)
                #end if
            #end for
            origin = aspath[-1]
            if prefix not in rib_origins:
                rib_origins[prefix] = dict()
            #end if
            if origin not in rib_origins[prefix]:
                rib_origins[prefix][origin] = (rib_ts,rib_ts)
            else:
                rib_origins[prefix][origin] = (rib_origins[prefix][origin][0],rib_ts)
            #end if
            elem = rec.get_next_elem()
        #end while
    #end while
    if args['snapshot']:
        print "NOTE: remaining origin lifetimes are stored in latest snapshot (%d)!\n" % rib_ts
        if (len(rib_origins.keys()) > 0):
            store_snapshot(rib_ts, rib_origins, mongodbstr)
        # end if
    else:
        print "NOTE: output remaining origin lifetimes with current ts (%d)\n" % rib_ts
        origins_lt = list()
        for p in rib_origins:
            for o in rib_origins[p]:
                origins_lt.append( (p,o,rib_origins[p][o][0],rib_ts) )
        if mongodbstr:
            store_origins_lt(rib_ts,origins_lt, mongodbstr)
        else:
            print_origins_lt(rib_ts,origins_lt)
def run_bgpstream(args):
    (collector, start_time, end_time, data_type) = args

    # initialize and configure BGPStream
    stream = BGPStream()
    rec = BGPRecord()
    stream.add_filter('collector', collector)
    # NB: BGPStream uses inclusive/inclusive intervals, so subtract one off the
    # end time since we are using inclusive/exclusive intervals
    stream.add_interval_filter(start_time, end_time-1)
    stream.add_filter('record-type', data_type)
    stream.start()

    # per-peer data
    peers_data = {}

    # loop over all records in the stream
    while stream.get_next_record(rec):
        elem = rec.get_next_elem()
        # loop over all elems in the record
        while elem:
            # create a peer signature for this elem
            sig = peer_signature(rec, elem)
            # if this is the first time we have ever seen this peer, create
            # an empty result: (Pfxs_v4_set                    ,  Pfxs_v6_set, 
            #                   dict(Transit_ASN)=Pfxs_v4_set  ,  dict(Transit_ASN)=Pfxs_v6_set)
            if sig not in peers_data:
                peers_data[sig] =[set(),set(),{},{}]


            pfx=""
            if('prefix' in elem.fields):            
                pfx=elem.fields['prefix'];                                    
                if(":" in pfx):
                    peers_data[sig][1].add(pfx)

                else:
                    peers_data[sig][0].add(pfx)
            
            if('as-path' in elem.fields):
                #Squash the AS Path to keep only distinct ASNs, i.e., remove prepending 
                path_split = [k for k, g in groupby(elem.fields['as-path'].split(" "))]    
                if(len(path_split)!=0): 
                    for i in range(1,len(path_split)-1):
                        transit=path_split[i]
                        if(":" in pfx):
                            if(transit not in peers_data[sig][3]):
                                peers_data[sig][3][transit]=set()
                            peers_data[sig][3][transit].add(pfx)                                
                        elif(pfx!=""):
                            if(transit not in peers_data[sig][2]):
                                peers_data[sig][2][transit]=set()
                            peers_data[sig][2][transit].add(pfx)  

            elem = rec.get_next_elem()

    # the time in the output row is truncated down to a multiple of
    # RESULT_GRANULARITY so that slices can be merged correctly
    start_time = \
        int(math.floor(start_time/RESULT_GRANULARITY) * RESULT_GRANULARITY)

    # for each peer that we processed data for, create an output row
    return [((start_time, collector, p), (peers_data[p])) for p in peers_data]