def main(): """Main function""" parser = argparse.ArgumentParser() __create_parser_arguments(parser) args = parser.parse_args() global logger logger = util.setup_logger(args.logging_file, 'delete_measurements', args.log_level) global engine engine = create_engine(args.database_name) Session = create_session_for_process(engine) db_session = Session() oldest_date_allowed = datetime.date.today() - datetime.timedelta( days=args.days_in_past) db_session.query(MeasurementResult).filter( MeasurementResult.timestamp < oldest_date_allowed).delete() db_session.commit() db_session.close() Session.remove() logger.info('deleted all measurements before {}'.format( oldest_date_allowed.strftime('%Y-%m-%d')))
def main(): """Main function""" parser = argparse.ArgumentParser() __create_parser_arguments(parser) args = parser.parse_args() global log log = setup_logger(args.logging_file, 'parse_ripe_probes') engine = create_engine(args.database_name) Session = create_session_for_process(engine) db_session = Session() ripe_sema = threading.BoundedSemaphore(50) stop_event = threading.Event() token_generator_thread = start_token_generating_thread( ripe_sema, args.ripe_requests_per_second, stop_event) probes = get_probes(db_session, ripe_sema) stop_event.set() token_generator_thread.join() log.info('writing probes to tmp') os.makedirs(os.path.dirname(PROBE_CACHING_PATH), exist_ok=True) with open(PROBE_CACHING_PATH, 'w') as ripe_temp_file: probe_info_to_write = { probe.id: is_in_nat for probe, is_in_nat in probes.values() } json.dump(probe_info_to_write, ripe_temp_file)
def main(): """Main function""" parser = argparse.ArgumentParser() __create_parser_arguments(parser) args = parser.parse_args() global logger logger = util.setup_logger(args.logging_file, 'parse_ripe_archive', args.log_level) logger.info('starting caida parsing for archivepath %s', args.archive_path) if not os.path.isdir(args.archive_path): print('Archive path does not lead to a directory', file=sys.stderr) return 1 global engine engine = create_engine(args.database_name) Session = create_session_for_process(engine) db_session = Session() parsed_file_name = '{}-parsed-caida-files.txt'.format(args.database_name) parsed_files = set() if not os.path.exists(parsed_file_name): logger.debug('Creating parsed files history file for database %s', args.database_name) else: with open(parsed_file_name) as parsed_files_histoy_file: for line in parsed_files_histoy_file: parsed_files.add(line.strip()) filenames, probe_dct = get_filenames(args.archive_path, args.file_regex, args.days_in_past, parsed_files, db_session) if not filenames: logger.info('found no files to parse') return 0 mp_manager = mp.Manager() new_parsed_files = mp_manager.Queue() with concurrent.ProcessPoolExecutor(max_workers=args.number_processes) as processing_executor: processing_results = processing_executor.map( functools.partial(parse_caida_data, not args.plaintext, args.days_in_past, probe_dct, new_parsed_files), filenames) try: while True: try: processing_results.__next__() except StopIteration: break except Exception: logger.exception('process threw exception') finally: with open(parsed_file_name, 'a') as parsed_files_histoy_file: while not new_parsed_files.empty(): parsed_files_histoy_file.write(new_parsed_files.get(timeout=1) + '\n')
def main(): """Main function""" parser = argparse.ArgumentParser() __create_parser_arguments(parser) args = parser.parse_args() global logger logger = util.setup_logger(args.logging_file, 'parse_zmap_results') global engine engine = create_engine(args.database_name) Session = create_session_for_process(engine) db_session = Session() filenames = get_filenames(args.zmap_results_dir, args.file_regex) locations = {} config_parser = configparser.ConfigParser() if os.path.isfile(args.locations_config_file): config_parser.read(args.locations_config_file) for filename in filenames: location_name = __get_location_name(filename) if location_name not in config_parser or \ 'lat' not in config_parser[location_name] or \ 'lon' not in config_parser[location_name]: logger.critical( '{} not defined in config file or has not the right format! ' 'Aborting!'.format(location_name)) return 3 location = location_for_coordinates( config_parser[location_name]['lat'], config_parser[location_name]['lon'], db_session) probe = ZmapProbe(probe_id=location_name, location=location) db_session.add(probe) db_session.commit() locations[location_name] = probe.id else: raise ValueError('locations_config_file path does not lead to a file') parse(filenames, locations, db_session) db_session.close() Session.remove()
def main(): """Main function""" parser = argparse.ArgumentParser() __create_parser_arguments(parser) args = parser.parse_args() global logger logger = setup_logger(args.log_file, 'parse_codes', loglevel=args.log_level) logger.debug('starting') global engine engine = create_engine(args.database_name) if args.database_recreate: inp = input('Do you really want to recreate the database structure? (y)') if inp == 'y': recreate_db(engine) parse_codes(args)
def main(): parser = configargparse.ArgParser( default_config_files=['ipdns_default.ini']) __create_parser_arguments(parser) args = parser.parse_args() start = time.time() global logger logger = util.setup_logger(args.logging_file, 'domain-processing', args.log_level) global engine engine = create_engine(args.database_name) if args.database_recreate: inp = input( 'Do you really want to recreate the database structure? (y)') if inp == 'y': recreate_db(engine) if args.isp_ip_filter: logger.info('using strategy: {}'.format(args.regex_strategy)) else: logger.info('processing without ip filtering') regex_strategy = RegexStrategy(value=args.regex_strategy) tlds = set() with open(args.tlds_file) as tldFile: for line in tldFile: line = line.strip() if line[0] != '#': tlds.add(line.lower()) whitelist = set() if args.white_list_file_path: with open(args.white_list_file_path) as filter_list_file: for line in filter_list_file: whitelist.add(line.strip()) else: whitelist = None processes = [] parsed_ips = set() parsed_ips_lock = mp.Lock() finished_reading_event = mp.Event() line_queue = mp.Queue(args.number_processes * args.buffer_lines_per_process) line_thread = threading.Thread(target=read_file, args=(args.filepath, line_queue, finished_reading_event), name='file-reader') line_thread.start() time.sleep(1) stop_event = threading.Event() domain_label_queue = mp.Queue() domain_label_handle_thread = threading.Thread(target=handle_labels, args=(domain_label_queue, stop_event), name='domain-label-handler') domain_label_handle_thread.start() for i in range(0, args.number_processes): process = mp.Process(target=preprocess_file_part, args=(args.filepath, i, line_queue, args.isp_ip_filter, regex_strategy, tlds, whitelist, parsed_ips, parsed_ips_lock, domain_label_queue, finished_reading_event), name='preprocessing_{}'.format(i)) processes.append(process) process.start() line_thread.join() alive = len(processes) while alive > 0: try: for process in processes: process.join() process_sts = [pro.is_alive() for pro in processes] if process_sts.count(True) != alive: logger.debug('{} processes alive'.format( process_sts.count(True))) alive = process_sts.count(True) except KeyboardInterrupt: pass stop_event.set() domain_label_handle_thread.join() domain_label_queue.join_thread() line_queue.close() line_queue.join_thread() whitelisted_not_parsed_as_correct = set(parsed_ips) - parsed_ips if whitelisted_not_parsed_as_correct: ips_missing = ',\n'.join(whitelisted_not_parsed_as_correct) logger.warning('IP addresses in whitelist but not parsed: \n{}'.format( ips_missing)) end = time.time() logger.info('Running time: {0}'.format((end - start)))
def main(): """Main function""" parser = argparse.ArgumentParser() __create_parser_arguments(parser) args = parser.parse_args() global logger logger = util.setup_logger(args.log_file, 'validate-stats', loglevel=args.log_level) logger.debug('starting') engine = create_engine(args.database_name) db_session = create_session_for_process(engine)() if args.allowed_measurement_age: oldest_date_allowed = datetime.datetime.now() - datetime.timedelta( seconds=args.allowed_measurement_age) sql_args = 'TIMESTAMP \'' + oldest_date_allowed.strftime( '%Y-%m-%d %H:%M:%S') + '\', ' else: sql_args = 'NULL, ' if args.minimum_measurement_age: sql_args += 'TIMESTAMP \'' + args.minimum_measurement_age + '\', ' else: sql_args += 'NULL, ' sql_args += str(args.exclude_traceroute) + ', ' + str( args.exclude_caida_measurements) slq_query = 'SELECT * from domainsWithDistanceRTTs({});'.format(sql_args) results = db_session.execute(slq_query) rtt_distances = [] domains_count = collections.defaultdict(int) location_id_count = collections.defaultdict(int) probes_count = collections.defaultdict(int) for domain_id, domain_name, hint_location_id, hint_location_name, location_hint_id, \ measurement_result_id, probe_id, distance, min_rtt in results: rtt_distances.append((domain_id, min_rtt, distance)) domains_count[domain_base_name(domain_name)] += 1 location_id_count[hint_location_id] += 1 probes_count[probe_id] += 1 with open(args.output_filename, 'w') as output_file: str_to_wrt = '\n'.join([ '{}; {}; {}'.format(domain_id, rtt, dist) for domain_id, rtt, dist in rtt_distances ]) output_file.write(str_to_wrt) print('domains count: ') pprint.pprint( sorted(domains_count.items(), key=operator.itemgetter(1), reverse=True)) print('location_id_count') pprint.pprint( sorted(location_id_count.items(), key=operator.itemgetter(1), reverse=True)) print('probes_count') pprint.pprint( sorted(probes_count.items(), key=operator.itemgetter(1), reverse=True))
def main(): """Main function""" parser = argparse.ArgumentParser() __create_parser_arguments(parser) args = parser.parse_args() global logger logger = util.setup_logger(args.logging_file, 'parse_ripe_archive') if not os.path.isdir(args.archive_path): print('Archive path does not lead to a directory', file=sys.stderr) return 1 global engine engine = create_engine(args.database_name) parsed_file_name = '{}-parsed-ripe-files.txt'.format(args.database_name) parsed_files = set() if not os.path.exists(parsed_file_name): logger.debug( 'Creating parsed files history file for database {}'.format( args.database_name)) else: with open(parsed_file_name) as parsed_files_histoy_file: for line in parsed_files_histoy_file: parsed_files.add(line.strip()) file_names = get_filenames(args.archive_path, args.file_regex, parsed_files, args.days_in_past) if not file_names: logger.info('No files found') return logger.info('%s files to parse', len(file_names)) Session = create_session_for_process(engine) db_session = Session() probe_dct = load_probes_from_cache(db_session) for probe, _ in probe_dct.values(): _ = probe.id db_session.expunge(probe) db_session.close() Session.remove() new_parsed_files = mp.Queue() probe_latency_queue = mp.Queue() finish_event = threading.Event() probe_latency_thread = threading.Thread(target=update_second_hop_latency, args=(probe_latency_queue, finish_event), name='update probe latency') probe_latency_thread.start() finished_reading_event = mp.Event() line_queue = mp.Queue(args.number_processes * buffer_lines_per_process) try: with concurrent.ThreadPoolExecutor( max_workers=args.workers) as read_thread_executor: read_thread_results = read_thread_executor.map( functools.partial(read_file, not args.plaintext, args.days_in_past, line_queue, new_parsed_files), file_names) time.sleep(1) processes = [] for index in range(0, args.number_processes): process = mp.Process(target=parse_ripe_data, args=(line_queue, finished_reading_event, probe_dct, probe_latency_queue), name='ripe-parsing-{}'.format(index)) processes.append(process) process.start() try: while True: try: read_thread_results.__next__() except StopIteration: break except Exception: logger.exception('read thread returned with exception') finished_reading_event.set() for process in processes: process.join() except KeyboardInterrupt: finished_reading_event.set() print( 'trying to do a graceful shutdown press Ctrl+C another time to force ' 'shutdown') for process in processes: process.join() finally: with open(parsed_file_name, 'a') as parsed_files_histoy_file: while not new_parsed_files.empty(): filename = new_parsed_files.get() parsed_files_histoy_file.write(filename + '\n') finish_event.set() logger.debug('finish event set waiting for second hop latency thread') probe_latency_thread.join()
def main(): """Main function""" parser = argparse.ArgumentParser() __create_parser_arguments(parser) args = parser.parse_args() global engine engine = create_engine(args.database_name) global logger logger = util.setup_logger(args.log_file, 'check', loglevel=args.log_level, hourly_log_rotation=True) logger.debug('starting') start_time = time.time() Session = create_session_for_process(engine) db_session = Session() db_session.expire_on_commit = False ripe_slow_down_sema = mp.BoundedSemaphore(args.ripe_request_burst_limit) ripe_create_sema = mp.Semaphore(args.measurement_limit) global MAX_THREADS if args.debug: MAX_THREADS = 1 else: MAX_THREADS = int(args.measurement_limit / args.number_processes * 1.5) finish_event = threading.Event() generator_thread = util.start_token_generating_thread( ripe_slow_down_sema, args.ripe_request_limit, finish_event) locations = db_session.query(LocationInfo) if not locations.count(): logger.error('No locations found! Aborting!') print('No locations found! Aborting!') return 1 if not args.disable_probe_fetching: probe_distances = load_probes_from_cache(db_session).values() location_to_probes_dct = assign_location_probes( locations, [probe for probe, _ in probe_distances], db_session) db_session.commit() null_locations = [ location for location in locations if location.id not in location_to_probes_dct ] logger.info('{} locations without nodes'.format(len(null_locations))) else: locations = db_session.query(LocationInfo) location_to_probes_dct = {} loc_without_probes = 0 probes = set() for location in locations: if location.nearby_probes: location_to_probes_dct[location.id] = [] for probe in location.nearby_probes: probes.add(probe) _ = str(probe.location.lat + probe.location.lon) + probe.location.id + \ str(probe.second_hop_latency) + probe.probe_id + str(probe.id) location_to_probes_dct[location.id].append( (probe, location.gps_distance_haversine(probe.location), probe.location)) else: loc_without_probes += 1 logger.debug('expunging probes') for probe in probes: try: db_session.expunge(probe.location) db_session.expunge(probe) except InvalidRequestError: pass logger.debug('updating probes') update_probes(probes) logger.info('{} locations without nodes'.format(loc_without_probes)) measurement_strategy = MeasurementStrategy(args.measurement_strategy) logger.debug('finished ripe') processes = [] process_count = args.number_processes if args.debug: process_count = 1 if args.ip_filter_file: ip_set = set() with open(args.ip_filter_file) as ip_filter_file: for line in ip_filter_file: ip_set.add(line.strip()) ips = list(ip_set) else: ips = None db_session.close() for pid in range(0, process_count): if ips: ips_count = len(ips) ips_start_index = int(pid * (ips_count / process_count)) ips_end_index = int((pid + 1) * (ips_count / process_count)) if pid + 1 == process_count: ips_end_index = ips_count ips_for_process = ips[ips_start_index:ips_end_index] else: ips_for_process = None process = mp.Process( target=ripe_check_process, args=(pid, ripe_create_sema, ripe_slow_down_sema, args.bill_to, args.without_new_measurements, args.allowed_measurement_age, args.api_key, args.domain_block_limit, process_count, args.include_ip_encoded, measurement_strategy, args.probes_per_measurement, args.buffer_time, args.measurement_packets, args.use_efficient_probes, location_to_probes_dct, args.stop_without_old_results, ips_for_process, args.endless_measurements, args.random_domains), name='domain_checking_{}'.format(pid)) processes.append(process) for process in processes: process.start() alive = len(processes) while alive > 0: try: process_sts = [pro.is_alive() for pro in processes] if process_sts.count(True) != alive: alive = process_sts.count(True) logger.debug('{} processes alive'.format(alive)) for process in processes: process.join() except KeyboardInterrupt: pass if finish_event: finish_event.set() if generator_thread: generator_thread.join() logger.debug('{} processes alive'.format(alive)) end_time = time.time() logger.info('running time: {}'.format((end_time - start_time))) return 0
def main(): """Main function""" parser = configargparse.ArgParser( default_config_files=['find_default.ini']) __create_parser_arguments(parser) args = parser.parse_args() global logger logger = util.setup_logger(args.logging_file, 'find', loglevel=args.log_level) global engine engine = create_engine(args.database_name) trie = create_trie(args.code_blacklist_file, args.word_blacklist_file) code_to_location_blacklist = {} if args.code_to_location_blacklist_file: with open(args.code_to_location_blacklist_file ) as code_to_location_blacklist_file: json_txt = "" for line in code_to_location_blacklist_file: line = line.strip() if line[0] != '#': json_txt += line code_to_location_blacklist = json.loads(json_txt) location_match_queue = mp.Queue() stop_event = threading.Event() handle_location_matches_thread = threading.Thread( target=handle_location_matches, name='handle-location-matches', args=(location_match_queue, stop_event)) handle_location_matches_thread.start() processes = [] for index in range(0, args.number_processes): process = mp.Process(target=search_process, args=(index, trie, code_to_location_blacklist, args.domain_block_limit, args.number_processes, location_match_queue), kwargs={ 'amount': args.amount, 'debug': args.log_level == 'DEBUG' }, name='find_locations_{}'.format(index)) process.start() processes.append(process) for process in processes: process.join() Session = create_session_for_process(engine) db_session = Session() update_query = update(DomainLabel).values( last_searched=datetime.datetime.now()) db_session.execute(update_query) db_session.close() stop_event.set() handle_location_matches_thread.join() location_match_queue.join_thread()