Esempio n. 1
0
    def getRoutingFileForDate(routing_date, files_path, DEBUG):
        db_handler = DBHandler('')
        available_routing_files = db_handler.getPathsToRoutingFilesForDate(
            routing_date)
        db_handler.close()

        routing_file = ''

        if 'bgprib.mrt' in available_routing_files:
            routing_file = available_routing_files['bgprib.mrt']

        elif 'dmp.gz' in available_routing_files and 'v6.dmp.gz' in available_routing_files:
            # If there is not bgprib.mrt file available, but the two dmp files
            # (for v4 and v6) are available, I use them
            dmp_file = available_routing_files['dmp.gz']
            readable_dmp = BGPDataHandler.getReadableFile(
                dmp_file, False, files_path, DEBUG)

            v6dmp_file = available_routing_files['v6.dmp.gz']
            readable_v6dmp = BGPDataHandler.getReadableFile(
                v6dmp_file, False, files_path, DEBUG)

            routing_file = BGPDataHandler.concatenateFiles('{}/{}_v4andv6.dmp.readable'\
                                                .format(files_path, routing_date),
                                                readable_dmp, readable_v6dmp)
        elif 'dmp.gz' in available_routing_files:
            # If there is only one of the dmp files available, I will work with it
            routing_file = available_routing_files['dmp.gz']

        elif 'v6.dmp.gz' in available_routing_files:
            # If there is only one of the dmp files available, I will work with it
            routing_file = available_routing_files['v6.dmp.gz']

        return routing_file
Esempio n. 2
0
    def data(self, NomLogique=None, DataType=None):
        if NomLogique == None or DataType == None:
            return "{}"
        else:
          db_handler = DBHandler(self.db_name)
          dds_data_dict = format_to_dict(db_handler.get_dds_data_list(NomLogique, DataType))
          for key in dds_data_dict:
              dds_data_dict[key]["dataType"] = DataType

              try:
                idObjet = dds_data_dict[key]["idObjet"]
                idInfo = dds_data_dict[key]["idInfo"]
                dds_data_dict[key]["libelle"] = self.code_to_libelle_dict[self.nom_to_type_dict[idObjet]][DataType][idInfo]
              except:
                dds_data_dict[key]["libelle"] = "Inconnu"
          dds_data_json = format_to_json(dds_data_dict)
          db_handler.close()
          return dds_data_json
Esempio n. 3
0
    def descobjet(self):
        db_handler = DBHandler(self.db_name)
        raw_descobjet_dict = format_to_dict(db_handler.get_descobjet_list())

        # Specific format for DescObjet
        descobjet_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(str))))
        for obj in raw_descobjet_dict:
            groupe = raw_descobjet_dict[obj]["idGroupe"]
            espace = raw_descobjet_dict[obj]["idEspace"]
            libelle = raw_descobjet_dict[obj]["libelle"]
            nom = raw_descobjet_dict[obj]["nom"]
            type = raw_descobjet_dict[obj]["type"]

            descobjet_dict[groupe][espace][libelle]["nom"] = nom
            descobjet_dict[groupe][espace][libelle]["type"] = type

            self.nom_to_type_dict[nom] = type

        descobjet_json = json.dumps(descobjet_dict, sort_keys=True, indent=4)
        db_handler.close()
        return descobjet_json
Esempio n. 4
0
db_routing_data_v4_dates = set(
    db_handler.getListOfDatesFromArchiveIndex_v4Only())
db_routing_data_v6_dates = set(
    db_handler.getListOfDatesFromArchiveIndex_v6Only())
db_routing_data_v4andv6_dates = set(
    db_handler.getListOfDatesFromArchiveIndex_v4andv6())

missing_routing = complete_dates_set - db_routing_data_v4andv6_dates.union(
    db_routing_data_v4_dates.intersection(db_routing_data_v6_dates))

missing_routing_v4 = missing_routing - db_routing_data_v6_dates
missing_routing_v6 = missing_routing - db_routing_data_v4_dates

db_updates_dates = set(db_handler.getListOfDatesForUpdates())
missing_updates = complete_dates_set - db_updates_dates

db_handler.close()

print "Dates missing in the DB"
print "{} dates missing for prefixes.".format(len(missing_pref))
print missing_pref
print "{} dates missing for ASes.".format(len(missing_ASes))
print missing_ASes
print "{} dates missing for routing data.".format(len(missing_routing))
print missing_routing
print "{} dates missing for v4 routing data.".format(len(missing_routing_v4))
print missing_routing_v4
print "{} dates missing for v6 routing data.".format(len(missing_routing_v6))
print missing_routing_v6
print "{} dates missing for updates.".format(len(missing_updates))
print missing_updates
Esempio n. 5
0
def main(argv):
    routing_file = ''
    readables_path = ''
    archive_folder = '/data/wattle/bgplog'
    proc_num = -1
    data_type = 'visibility'
    DEBUG = False

    try:
        opts, args = getopt.getopt(argv,"ht:A:f:n:D", ['data_type=', 'archive_folder=', 'procNumber=', 'routingFile=',])
    except getopt.GetoptError:
        print 'Usage: {} -h | -t <visibility/routing> (-A <archive folder> -n <process number> | -f <readable routing file>) [-D]'.format(sys.argv[0])
        print "t: Data type. Type of data to be inserted into the DB."
        print "Visibility -> To insert the dates during which prefixes, origin ASes and middle ASes were seen in the routing table."
        print "Routing -> To insert into the archive_index table the list of rows in the BGP routing table for the available dates."
        print "Visibility will be used by default."
        print "A: Provide the path to the folder containing hitorical routing data."
        print "AND"
        print "n: Provide a process number from 1 to 5, which allows the script to process a specific subset of the available files so that different scripts can process different files."
        print "OR"
        print "f: Provide the path to a routing file."
        print "D: DEBUG mode"
        sys.exit()

    for opt, arg in opts:
        if opt == '-h':
            print 'Usage: {} -h | -t <visibility/routing> (-A <archive folder> -n <process number> | -f <readable routing file>) [-D]'.format(sys.argv[0])
            print "t: Data type. Type of data to be inserted into the DB."
            print "Visibility -> To insert the dates during which prefixes, origin ASes and middle ASes were seen in the routing table."
            print "Routing -> To insert into the archive_index table the list of rows in the BGP routing table for the available dates."
            print "Visibility will be used by default."
            print "A: Provide the path to the folder containing hitorical routing data."
            print "AND"
            print "n: Provide a process number from 1 to 5, which allows the script to process a specific subset of the available files so that different scripts can process different files."
            print "OR"
            print "f: Provide the path to a routing file."
            print "D: DEBUG mode"
            sys.exit()
        elif opt == '-t':
            data_type = arg
            if data_type != 'visibility' and data_type != 'routing':
                print "Wrong data type! You MUST choose between 'visibility' and 'routing'."
                sys.exit(-1)
        elif opt == '-A':
            archive_folder = os.path.abspath(arg)
        elif opt == '-n':
            try:
                proc_num = int(arg)
            except ValueError:
                print "The process number MUST be a number."
                sys.exit(-1)
        elif opt == '-f':
            routing_file = os.path.abspath(arg)
        elif opt == '-D':
            DEBUG = True
        else:
            assert False, 'Unhandled option'
    
    if proc_num == -1:
        if routing_file == '':
            print "If you don't provide the path to a routing file you MUST provide a process number."
            sys.exit(-1)
        else:
            file_date = BGPDataHandler.getDateFromFileName(routing_file)
            
            if file_date.year in [2007, 2008, 2009]:
                proc_num = 1
            elif file_date.year in [2010, 2011]:
                proc_num = 2
            elif file_date.year in [2012, 2013]:
                proc_num = 3
            elif file_date.year in [2014, 2015]:
                proc_num = 4
            elif file_date.year in[2016, 2017]:
                proc_num = 5
            else:
                print "Routing file corresponds to date out of the considered range."
                sys.exit(-1)
            
    readables_path = '/home/sofia/BGP_stats_files/hist_part{}'.format(proc_num)
    
    files_path = '/home/sofia/BGP_stats_files/Visibility_Routing_CSVs/CSVs{}'.format(proc_num)
    
    output_file = '{}/CSVgeneration_{}_{}_{}.output'.format(files_path, data_type, proc_num, datetime.today().date())

    bgp_handler = BGPDataHandler(DEBUG, readables_path)
        
    if routing_file != '':
        generateFilesFromRoutingFile(files_path, routing_file,
                                             bgp_handler, data_type, dict(),
                                             output_file, archive_folder,
                                             DEBUG)
    else:
        db_handler = DBHandler()

        dates_ready = dict()

        if data_type == 'visibility':
            
            sys.stdout.write('Checking for dates already in the DB\n')
            
            existing_dates_pref = set(db_handler.getListOfDatesForPrefixes())
    
            for ex_date in existing_dates_pref:            
                # We don't want to insert duplicated data,
                # therefore, we assume that if the date is present
                # in the prefixes table, all the prefixes for that date,
                # v4 and v6, have already been inserted.
                # After finishing with the bulk insertion, all the dates need
                # to be checked to determine if there is any missing data.
                if ex_date not in dates_ready:
                    dates_ready[ex_date] = dict()
                if 'prefixes' not in dates_ready[ex_date]:
                    dates_ready[ex_date]['prefixes'] = defaultdict(bool)
                    
                dates_ready[ex_date]['prefixes']['v4'] = True
                dates_ready[ex_date]['prefixes']['v6'] = True
    
            existing_dates_orASes = set(db_handler.getListOfDatesForOriginASes())
            
            for ex_date in existing_dates_orASes:
                if ex_date not in dates_ready:
                    dates_ready[ex_date] = dict()
                if 'originASes' not in dates_ready[ex_date]:
                    dates_ready[ex_date]['originASes'] = defaultdict(bool)
                    
                dates_ready[ex_date]['originASes']['v4'] = True
                dates_ready[ex_date]['originASes']['v6'] = True
                
            existing_dates_midASes = set(db_handler.getListOfDatesForMiddleASes())

            for ex_date in existing_dates_midASes:
                if ex_date not in dates_ready:
                    dates_ready[ex_date] = dict()
                if 'middleASes' not in dates_ready[ex_date]:
                    dates_ready[ex_date]['middleASes'] = defaultdict(bool)
                    
                dates_ready[ex_date]['middleASes']['v4'] = True
                dates_ready[ex_date]['middleASes']['v6'] = True

        elif data_type == 'routing':
            existing_dates_v4 = set(db_handler.getListOfDatesFromArchiveIndex_v4Only())
                        
            for ex_date in existing_dates_v4:
                if ex_date not in dates_ready:
                    dates_ready[ex_date] = defaultdict(bool)
                dates_ready[ex_date]['routing_v4'] = True

            existing_dates_v6 = set(db_handler.getListOfDatesFromArchiveIndex_v6Only())
            
            for ex_date in existing_dates_v6:
                if ex_date not in dates_ready:
                    dates_ready[ex_date] = defaultdict(bool)
                dates_ready[ex_date]['routing_v6'] = True
                
            existing_dates_v4andv6 = set(db_handler.getListOfDatesFromArchiveIndex_v4andv6())
            
            for ex_date in existing_dates_v4andv6:
                if ex_date not in dates_ready:
                    dates_ready[ex_date] = defaultdict(bool)
                dates_ready[ex_date]['routing_v4andv6'] = True
                
        db_handler.close()
                         
        sys.stdout.write('Checking for existing CSV files\n')
                                       
        dates_ready = getDatesOfExistingCSVs(files_path, data_type, dates_ready)

        sys.stdout.write('Starting to generate CSV files from readable files\n')
        dates_ready = generateFilesFromReadables(readables_path, data_type,
                                                 dates_ready, files_path,
                                                 bgp_handler, output_file,
                                                 archive_folder, DEBUG)

        sys.stdout.write('Starting to generate CSV files from bgprib.mrt files\n')
        dates_ready = generateFilesFromOtherRoutingFiles(\
                                        archive_folder, data_type, dates_ready,
                                        files_path, bgp_handler, proc_num,
                                        'bgprib.mrt', output_file, DEBUG)
        
        sys.stdout.write('Starting to generate CSV files from dmp.gz files\n')
        dates_ready = generateFilesFromOtherRoutingFiles(\
                                        archive_folder, data_type, dates_ready,
                                        files_path, bgp_handler, proc_num,
                                        'dmp.gz', output_file, DEBUG)
                                                                    
        completeDatesSet = getCompleteDatesSet(proc_num)

        with open(output_file, 'a') as output:
            if data_type == 'visibility':
                output.write('Dates that are not in the prefixes or in the asns tables in the DB and for which some of the CSV files were not created.\n')
    
                for ex_date in completeDatesSet:
                    if ex_date not in dates_ready:
                        output.write('Visibility data not ready for date {}\n'.format(ex_date))
                    else:
                        for item in ['prefixes', 'originASes', 'middleASes']:
                            if item not in dates_ready[ex_date]:
                                output.write('Visibility data for {} not ready for date {}\n'.format(item, ex_date))
                            else:
                                for v in ['v4', 'v6']:
                                    if not dates_ready[ex_date][item][v]:
                                        output.write('Visibility data for {} coming from {} file not ready for date {}.\n'.format(item, v, ex_date))

            elif data_type == 'routing':
                output.write('Dates that are not in the archive_index table in the DB and for which some of the CSV files were not created.\n')

                for ex_date in completeDatesSet:
                    if ex_date not in dates_ready:
                        output.write('Routing data about v4 prefixes not ready for date {}\n'.format(ex_date))
                        output.write('Routing data about v6 prefixes not ready for date {}\n'.format(ex_date))
                    else:
                        if not dates_ready[ex_date]['routing_v4']:
                            output.write('Routing data about v4 prefixes not ready for date {}\n'.format(ex_date))
                        if not dates_ready[ex_date]['routing_v6']:
                            output.write('Routing data about v6 prefixes not ready for date {}\n'.format(ex_date))
                            
            sys.stdout.write('Finished generating CSV files. Output file {} created.\n'.format(output_file))
def generateCSVFromUpdatesFile(updates_file, files_path, readables_path, DEBUG,
                               output_file):

    sys.stdout.write(
        'Starting to generate CSV file from {}\n'.format(updates_file))

    db_handler = DBHandler('')
    file_already_exists = db_handler.checkIfUpdatesFileExists(
        updates_file,
        BGPDataHandler.getDateFromFileName(updates_file).year)
    db_handler.close()

    if file_already_exists:
        return ''

    filename = updates_file.split('/')[-1]
    csv_file = '{}/{}.csv'.format(files_path, filename)

    if os.path.exists(csv_file):
        with open(output_file, 'a') as output:
            output.write(
                'CSV file for updates file {} already exists.\n'.format(
                    updates_file))
        return 'already_existed'

    if updates_file.endswith('log.gz'):
        unzipped_file = '{}/{}'.format(files_path, filename[:-3])

        if not os.path.exists(unzipped_file):
            with gzip.open(updates_file, 'rb') as gzip_file,\
                open(unzipped_file, 'wb') as output:
                try:
                    output.write(gzip_file.read())
                except IOError:
                    with open(output_file, 'a') as output:
                        output.write(
                            'IOError unzipping file {}\n'.format(updates_file))
                    return ''

        filtered_file = '{}.filtered'.format(unzipped_file)

        if not os.path.exists(filtered_file):
            with open(filtered_file, 'w') as filtered:
                cmd = shlex.split('grep debugging {}'.format(unzipped_file))
                p = subprocess.Popen(cmd, stdout=subprocess.PIPE)

                cmd2 = shlex.split('grep rcvd')
                p2 = subprocess.Popen(cmd2, stdin=p.stdout, stdout=filtered)
                p2.communicate()
                p.kill()

        announcements_file = '{}.announcements'.format(unzipped_file)

        if not os.path.exists(announcements_file):
            with open(announcements_file, 'w') as announcements_f:
                cmd = shlex.split("grep -v withdrawn {}".format(filtered_file))
                p = subprocess.Popen(cmd, stdout=announcements_f)
                p.communicate()

        withdrawals_file = '{}.withdrawals'.format(unzipped_file)

        if not os.path.exists(withdrawals_file):
            with open(withdrawals_file, 'w') as withdrawals_f:
                cmd = shlex.split("grep withdrawn {}".format(filtered_file))
                p = subprocess.Popen(cmd, stdout=withdrawals_f)
                p.communicate()


#       2015/08/01 00:01:31 debugging: BGP: 202.12.28.1 rcvd UPDATE about 199.60.233.0/24 -- withdrawn
# We first get a TextFileReader to read the file in chunks (in case it is too big)
        withdrawals_reader = pd.read_csv(
            withdrawals_file,
            iterator=True,
            chunksize=1000,
            header=None,
            sep=' ',
            index_col=False,
            usecols=[0, 1, 4, 8],
            names=['update_date', 'update_time', 'bgp_neighbor', 'prefix'])

        # We then put the chunks into a single DataFrame
        withdrawals_df = pd.concat(withdrawals_reader, ignore_index=True)

        withdrawals_df['upd_type'] = 'W'
        withdrawals_df['peerAS'] = -1
        withdrawals_df['source_file'] = updates_file

        withdrawals_df.to_csv(csv_file,
                              header=False,
                              index=False,
                              quoting=2,
                              columns=[
                                  'update_date', 'update_time', 'upd_type',
                                  'bgp_neighbor', 'peerAS', 'prefix',
                                  'source_file'
                              ])

        with open(announcements_file,
                  'rb+') as announcements_f, open(csv_file, 'a') as csv_f:
            update_date = ''
            update_time = ''
            bgp_neighbor = ''
            peerAS = -1
            prefixes = []

            for line in announcements_f:
                if 'flapped' in line:
                    continue

                line_parts = line.strip().split()

                # If a new announcement starts
                #               2015/08/01 00:01:26 debugging: BGP: 64.71.180.177 rcvd UPDATE w/ attr: nexthop 64.71.180.177, origin i, path 6939 3491 12389 57617
                #               2015/08/01 00:01:26 debugging: BGP: 64.71.180.177 rcvd 91.106.234.0/24
                #               2015/08/01 00:01:26 debugging: BGP: 64.71.180.177 rcvd 37.1.77.0/24
                #               2015/08/01 00:01:26 debugging: BGP: 64.71.180.177 rcvd 37.1.64.0/20
                #               2015/08/01 00:01:26 debugging: BGP: 64.71.180.177 rcvd 91.106.232.0/21
                if 'UPDATE' in line:
                    # If we were processing another announcement, we write it
                    # to the csv file
                    if len(prefixes) > 0:
                        for prefix in prefixes:
                            csv_f.write('"{}","{}","{}","{}",{},"{}","{}"\n'\
                                        .format(update_date, update_time,
                                                'A', bgp_neighbor, peerAS,
                                                prefix, updates_file))

                    update_date = line_parts[0]
                    update_time = line_parts[1]
                    bgp_neighbor = line_parts[4]
                    if 'path' in line:
                        peerAS = line.split('path')[1].split()[0]

                        if '.' in peerAS:
                            left, right = peerAS.split('.')
                            peerAS = int(left) * 65536 + int(right)
                        else:
                            peerAS = int(peerAS)
                    else:
                        peerAS = -1
                    prefixes = []

                else:
                    prefixes.append(line_parts[6].replace('...duplicate', ''))

            # We have to write to the csv file the last announcement
            if len(prefixes) > 0:
                for prefix in prefixes:
                    csv_f.write('"{}","{}","{}","{}",{},"{}","{}"\n'\
                                .format(update_date, update_time,
                                        'A', bgp_neighbor, peerAS,
                                        prefix, updates_file))
        os.remove(unzipped_file)
        os.remove(filtered_file)
        os.remove(announcements_file)
        os.remove(withdrawals_file)

    elif updates_file.endswith('bgpupd.mrt'):
        readable_file = BGPDataHandler.getReadableFile(updates_file, False,
                                                       readables_path, DEBUG)

        readable_woSTATE = '{}.woSTATE'.format(readable_file)
        if not os.path.exists(readable_woSTATE):
            with open(readable_woSTATE, 'w') as woSTATE:
                cmd = shlex.split('grep -v STATE {}'.format(readable_file))
                p = subprocess.Popen(cmd, stdout=woSTATE)
                p.communicate()

        readable_announcements = '{}.announcements'.format(readable_file)
        if not os.path.exists(readable_announcements):
            with open(readable_announcements, 'w') as announcements:
                cmd = shlex.split('grep \'|A|\' {}'.format(readable_woSTATE))
                p = subprocess.Popen(cmd, stdout=announcements)
                p.communicate()

        announcements_df = getDF(readable_announcements, 'A', updates_file)

        readable_withdrawals = '{}.withdrawals'.format(readable_file)
        if not os.path.exists(readable_withdrawals):
            with open(readable_withdrawals, 'w') as withdrawals:
                cmd = shlex.split('grep \'|W|\' {}'.format(readable_woSTATE))
                p = subprocess.Popen(cmd, stdout=withdrawals)
                p.communicate()

        withdrawals_df = getDF(readable_withdrawals, 'W', updates_file)

        updates_df = pd.concat([announcements_df, withdrawals_df])

        updates_df.to_csv(csv_file,
                          header=False,
                          index=False,
                          quoting=2,
                          columns=[
                              'update_date', 'update_time', 'upd_type',
                              'bgp_neighbor', 'peerAS', 'prefix', 'source_file'
                          ])

        os.remove(readable_file)
        os.remove(readable_woSTATE)
        os.remove(readable_announcements)
        os.remove(readable_withdrawals)

    return csv_file
Esempio n. 7
0
 def loadUpdatesDFs(self, updates_date):
     db_handler = DBHandler('')
     self.updates_prefixes = db_handler.getUpdatesDF_prefix(updates_date)
     self.updates_peerASes = db_handler.getUpdatesDF_peerAS(updates_date)
     db_handler.close()
     return True
Esempio n. 8
0
    def loadStructuresFromArchive(self, routing_date=''):

        if routing_date == '':
            db_handler = DBHandler('')
            routing_files  =\
                        db_handler.getPathsToMostRecentRoutingFiles()
            db_handler.close()

            if len(routing_files) == 0:
                sys.stderr.write("There are no files in the archive!\n")
                return False

        else:
            db_handler = DBHandler('')
            routing_files = db_handler.getPathsToRoutingFilesForDate(
                routing_date)
            db_handler.close()

            if len(routing_files) == 0:
                sys.stderr.write(
                    "There are no paths to routing files for the date provided in the DB.\n"
                )
                return False

        bgprib_file = []
        dmp_files = []
        for extension in routing_files:
            if extension == 'bgprib.mrt':
                bgprib_file.append(routing_files[extension])
                break
            else:  # extension == 'dmp.gz' or extension == 'v6.dmp.gz'
                dmp_files.append(routing_files[extension])

        # If a bgprib file is available, we use it
        if len(bgprib_file) > 0:
            routing_files = bgprib_file
        #If not, we use the dmp files
        else:
            routing_files = dmp_files

        files_date, bgp_df, ipv4Prefixes_radix, ipv6Prefixes_radix,\
            ipv4_longest_pref, ipv6_longest_pref  =\
                        BGPDataHandler.processMultipleRoutingFiles(routing_files,
                                                                    True,
                                                                    False,
                                                                    self.files_path,
                                                                    self.DEBUG)

        aux_date = datetime.strptime('1970', '%Y').date()

        if files_date != aux_date and files_date is not None:
            routing_loaded = True

            self.routingDate = files_date

            updates_loaded = self.loadUpdatesDFs(files_date)

        if bgp_df.shape[0] != 0:
            self.bgp_df = bgp_df

        if len(ipv4Prefixes_radix.prefixes()) != 0:
            self.ipv4Prefixes_radix = ipv4Prefixes_radix

        if len(ipv6Prefixes_radix.prefixes()) != 0:
            self.ipv6Prefixes_radix = ipv6Prefixes_radix

        if ipv4_longest_pref != -1:
            self.ipv4_longest_pref = ipv4_longest_pref
        else:
            self.ipv4_longest_pref = 32

        if ipv6_longest_pref != -1:
            self.ipv6_longest_pref = ipv6_longest_pref
        else:
            self.ipv6_longest_pref = 64

        if routing_loaded and updates_loaded:
            sys.stdout.write(
                "Class data structures were loaded successfully!\n")
            return True
        else:
            return False