Ejemplo n.º 1
0
def main():
    """Main function"""
    parser = argparse.ArgumentParser()
    __create_parser_arguments(parser)
    args = parser.parse_args()

    global logger
    logger = util.setup_logger(args.logging_file, 'delete_measurements',
                               args.log_level)

    global engine
    engine = create_engine(args.database_name)

    Session = create_session_for_process(engine)
    db_session = Session()

    oldest_date_allowed = datetime.date.today() - datetime.timedelta(
        days=args.days_in_past)
    db_session.query(MeasurementResult).filter(
        MeasurementResult.timestamp < oldest_date_allowed).delete()

    db_session.commit()
    db_session.close()
    Session.remove()

    logger.info('deleted all measurements before {}'.format(
        oldest_date_allowed.strftime('%Y-%m-%d')))
Ejemplo n.º 2
0
def main():
    """Main function"""
    parser = argparse.ArgumentParser()
    __create_parser_arguments(parser)
    args = parser.parse_args()

    global log
    log = setup_logger(args.logging_file, 'parse_ripe_probes')

    engine = create_engine(args.database_name)
    Session = create_session_for_process(engine)
    db_session = Session()

    ripe_sema = threading.BoundedSemaphore(50)
    stop_event = threading.Event()
    token_generator_thread = start_token_generating_thread(
        ripe_sema, args.ripe_requests_per_second, stop_event)
    probes = get_probes(db_session, ripe_sema)

    stop_event.set()
    token_generator_thread.join()

    log.info('writing probes to tmp')

    os.makedirs(os.path.dirname(PROBE_CACHING_PATH), exist_ok=True)

    with open(PROBE_CACHING_PATH, 'w') as ripe_temp_file:
        probe_info_to_write = {
            probe.id: is_in_nat
            for probe, is_in_nat in probes.values()
        }
        json.dump(probe_info_to_write, ripe_temp_file)
Ejemplo n.º 3
0
def main():
    """Main function"""
    parser = argparse.ArgumentParser()
    __create_parser_arguments(parser)
    args = parser.parse_args()

    global logger
    logger = util.setup_logger(args.logging_file, 'parse_ripe_archive', args.log_level)

    logger.info('starting caida parsing for archivepath %s', args.archive_path)

    if not os.path.isdir(args.archive_path):
        print('Archive path does not lead to a directory', file=sys.stderr)
        return 1

    global engine
    engine = create_engine(args.database_name)

    Session = create_session_for_process(engine)
    db_session = Session()

    parsed_file_name = '{}-parsed-caida-files.txt'.format(args.database_name)
    parsed_files = set()
    
    if not os.path.exists(parsed_file_name):
        logger.debug('Creating parsed files history file for database %s', args.database_name)
    else:
        with open(parsed_file_name) as parsed_files_histoy_file:
            for line in parsed_files_histoy_file:
                parsed_files.add(line.strip())

    filenames, probe_dct = get_filenames(args.archive_path, args.file_regex, args.days_in_past,
                                         parsed_files, db_session)

    if not filenames:
        logger.info('found no files to parse')
        return 0

    mp_manager = mp.Manager()
    new_parsed_files = mp_manager.Queue()

    with concurrent.ProcessPoolExecutor(max_workers=args.number_processes) as processing_executor:
        processing_results = processing_executor.map(
            functools.partial(parse_caida_data, not args.plaintext, args.days_in_past,
                              probe_dct, new_parsed_files), filenames)

        try:
            while True:
                try:
                    processing_results.__next__()
                except StopIteration:
                    break
                except Exception:
                    logger.exception('process threw exception')
        finally:
            with open(parsed_file_name, 'a') as parsed_files_histoy_file:
                while not new_parsed_files.empty():
                    parsed_files_histoy_file.write(new_parsed_files.get(timeout=1) + '\n')
Ejemplo n.º 4
0
def main():
    """Main function"""
    parser = argparse.ArgumentParser()
    __create_parser_arguments(parser)
    args = parser.parse_args()

    global logger
    logger = util.setup_logger(args.logging_file, 'parse_zmap_results')

    global engine
    engine = create_engine(args.database_name)

    Session = create_session_for_process(engine)
    db_session = Session()

    filenames = get_filenames(args.zmap_results_dir, args.file_regex)
    locations = {}

    config_parser = configparser.ConfigParser()
    if os.path.isfile(args.locations_config_file):
        config_parser.read(args.locations_config_file)
        for filename in filenames:
            location_name = __get_location_name(filename)
            if location_name not in config_parser or \
                    'lat' not in config_parser[location_name] or \
                    'lon' not in config_parser[location_name]:
                logger.critical(
                    '{} not defined in config file or has not the right format! '
                    'Aborting!'.format(location_name))
                return 3

            location = location_for_coordinates(
                config_parser[location_name]['lat'],
                config_parser[location_name]['lon'], db_session)
            probe = ZmapProbe(probe_id=location_name, location=location)
            db_session.add(probe)
            db_session.commit()
            locations[location_name] = probe.id
    else:
        raise ValueError('locations_config_file path does not lead to a file')

    parse(filenames, locations, db_session)

    db_session.close()
    Session.remove()
Ejemplo n.º 5
0
def main():
    """Main function"""
    parser = argparse.ArgumentParser()
    __create_parser_arguments(parser)
    args = parser.parse_args()

    global logger
    logger = setup_logger(args.log_file, 'parse_codes', loglevel=args.log_level)
    logger.debug('starting')

    global engine
    engine = create_engine(args.database_name)

    if args.database_recreate:
        inp = input('Do you really want to recreate the database structure? (y)')
        if inp == 'y':
            recreate_db(engine)

    parse_codes(args)
Ejemplo n.º 6
0
def main():
    parser = configargparse.ArgParser(
        default_config_files=['ipdns_default.ini'])

    __create_parser_arguments(parser)
    args = parser.parse_args()

    start = time.time()

    global logger
    logger = util.setup_logger(args.logging_file, 'domain-processing',
                               args.log_level)

    global engine
    engine = create_engine(args.database_name)

    if args.database_recreate:
        inp = input(
            'Do you really want to recreate the database structure? (y)')
        if inp == 'y':
            recreate_db(engine)

    if args.isp_ip_filter:
        logger.info('using strategy: {}'.format(args.regex_strategy))
    else:
        logger.info('processing without ip filtering')

    regex_strategy = RegexStrategy(value=args.regex_strategy)

    tlds = set()
    with open(args.tlds_file) as tldFile:
        for line in tldFile:
            line = line.strip()
            if line[0] != '#':
                tlds.add(line.lower())

    whitelist = set()
    if args.white_list_file_path:
        with open(args.white_list_file_path) as filter_list_file:
            for line in filter_list_file:
                whitelist.add(line.strip())
    else:
        whitelist = None

    processes = []
    parsed_ips = set()
    parsed_ips_lock = mp.Lock()

    finished_reading_event = mp.Event()

    line_queue = mp.Queue(args.number_processes *
                          args.buffer_lines_per_process)
    line_thread = threading.Thread(target=read_file,
                                   args=(args.filepath, line_queue,
                                         finished_reading_event),
                                   name='file-reader')
    line_thread.start()
    time.sleep(1)

    stop_event = threading.Event()
    domain_label_queue = mp.Queue()
    domain_label_handle_thread = threading.Thread(target=handle_labels,
                                                  args=(domain_label_queue,
                                                        stop_event),
                                                  name='domain-label-handler')
    domain_label_handle_thread.start()

    for i in range(0, args.number_processes):
        process = mp.Process(target=preprocess_file_part,
                             args=(args.filepath, i, line_queue,
                                   args.isp_ip_filter, regex_strategy, tlds,
                                   whitelist, parsed_ips, parsed_ips_lock,
                                   domain_label_queue, finished_reading_event),
                             name='preprocessing_{}'.format(i))
        processes.append(process)
        process.start()

    line_thread.join()

    alive = len(processes)
    while alive > 0:
        try:
            for process in processes:
                process.join()
            process_sts = [pro.is_alive() for pro in processes]
            if process_sts.count(True) != alive:
                logger.debug('{} processes alive'.format(
                    process_sts.count(True)))
                alive = process_sts.count(True)
        except KeyboardInterrupt:
            pass

    stop_event.set()
    domain_label_handle_thread.join()
    domain_label_queue.join_thread()

    line_queue.close()
    line_queue.join_thread()

    whitelisted_not_parsed_as_correct = set(parsed_ips) - parsed_ips

    if whitelisted_not_parsed_as_correct:
        ips_missing = ',\n'.join(whitelisted_not_parsed_as_correct)
        logger.warning('IP addresses in whitelist but not parsed: \n{}'.format(
            ips_missing))

    end = time.time()
    logger.info('Running time: {0}'.format((end - start)))
Ejemplo n.º 7
0
def main():
    """Main function"""
    parser = argparse.ArgumentParser()
    __create_parser_arguments(parser)
    args = parser.parse_args()

    global logger
    logger = util.setup_logger(args.log_file,
                               'validate-stats',
                               loglevel=args.log_level)
    logger.debug('starting')

    engine = create_engine(args.database_name)

    db_session = create_session_for_process(engine)()

    if args.allowed_measurement_age:
        oldest_date_allowed = datetime.datetime.now() - datetime.timedelta(
            seconds=args.allowed_measurement_age)
        sql_args = 'TIMESTAMP \'' + oldest_date_allowed.strftime(
            '%Y-%m-%d %H:%M:%S') + '\', '
    else:
        sql_args = 'NULL, '

    if args.minimum_measurement_age:
        sql_args += 'TIMESTAMP \'' + args.minimum_measurement_age + '\', '
    else:
        sql_args += 'NULL, '

    sql_args += str(args.exclude_traceroute) + ', ' + str(
        args.exclude_caida_measurements)

    slq_query = 'SELECT * from domainsWithDistanceRTTs({});'.format(sql_args)

    results = db_session.execute(slq_query)

    rtt_distances = []
    domains_count = collections.defaultdict(int)
    location_id_count = collections.defaultdict(int)
    probes_count = collections.defaultdict(int)

    for domain_id, domain_name, hint_location_id, hint_location_name, location_hint_id, \
            measurement_result_id, probe_id, distance, min_rtt in results:
        rtt_distances.append((domain_id, min_rtt, distance))
        domains_count[domain_base_name(domain_name)] += 1
        location_id_count[hint_location_id] += 1
        probes_count[probe_id] += 1

    with open(args.output_filename, 'w') as output_file:
        str_to_wrt = '\n'.join([
            '{}; {}; {}'.format(domain_id, rtt, dist)
            for domain_id, rtt, dist in rtt_distances
        ])
        output_file.write(str_to_wrt)

    print('domains count: ')
    pprint.pprint(
        sorted(domains_count.items(), key=operator.itemgetter(1),
               reverse=True))
    print('location_id_count')
    pprint.pprint(
        sorted(location_id_count.items(),
               key=operator.itemgetter(1),
               reverse=True))
    print('probes_count')
    pprint.pprint(
        sorted(probes_count.items(), key=operator.itemgetter(1), reverse=True))
Ejemplo n.º 8
0
def main():
    """Main function"""
    parser = argparse.ArgumentParser()
    __create_parser_arguments(parser)
    args = parser.parse_args()

    global logger
    logger = util.setup_logger(args.logging_file, 'parse_ripe_archive')

    if not os.path.isdir(args.archive_path):
        print('Archive path does not lead to a directory', file=sys.stderr)
        return 1

    global engine
    engine = create_engine(args.database_name)

    parsed_file_name = '{}-parsed-ripe-files.txt'.format(args.database_name)
    parsed_files = set()

    if not os.path.exists(parsed_file_name):
        logger.debug(
            'Creating parsed files history file for database {}'.format(
                args.database_name))
    else:
        with open(parsed_file_name) as parsed_files_histoy_file:
            for line in parsed_files_histoy_file:
                parsed_files.add(line.strip())

    file_names = get_filenames(args.archive_path, args.file_regex,
                               parsed_files, args.days_in_past)

    if not file_names:
        logger.info('No files found')
        return

    logger.info('%s files to parse', len(file_names))

    Session = create_session_for_process(engine)
    db_session = Session()
    probe_dct = load_probes_from_cache(db_session)

    for probe, _ in probe_dct.values():
        _ = probe.id
        db_session.expunge(probe)

    db_session.close()
    Session.remove()

    new_parsed_files = mp.Queue()
    probe_latency_queue = mp.Queue()
    finish_event = threading.Event()

    probe_latency_thread = threading.Thread(target=update_second_hop_latency,
                                            args=(probe_latency_queue,
                                                  finish_event),
                                            name='update probe latency')
    probe_latency_thread.start()

    finished_reading_event = mp.Event()

    line_queue = mp.Queue(args.number_processes * buffer_lines_per_process)

    try:
        with concurrent.ThreadPoolExecutor(
                max_workers=args.workers) as read_thread_executor:
            read_thread_results = read_thread_executor.map(
                functools.partial(read_file, not args.plaintext,
                                  args.days_in_past, line_queue,
                                  new_parsed_files), file_names)
            time.sleep(1)

            processes = []

            for index in range(0, args.number_processes):
                process = mp.Process(target=parse_ripe_data,
                                     args=(line_queue, finished_reading_event,
                                           probe_dct, probe_latency_queue),
                                     name='ripe-parsing-{}'.format(index))
                processes.append(process)
                process.start()

            try:
                while True:
                    try:
                        read_thread_results.__next__()
                    except StopIteration:
                        break
                    except Exception:
                        logger.exception('read thread returned with exception')

                finished_reading_event.set()

                for process in processes:
                    process.join()
            except KeyboardInterrupt:
                finished_reading_event.set()
                print(
                    'trying to do a graceful shutdown press Ctrl+C another time to force '
                    'shutdown')

                for process in processes:
                    process.join()

    finally:
        with open(parsed_file_name, 'a') as parsed_files_histoy_file:
            while not new_parsed_files.empty():
                filename = new_parsed_files.get()
                parsed_files_histoy_file.write(filename + '\n')

    finish_event.set()
    logger.debug('finish event set waiting for second hop latency thread')

    probe_latency_thread.join()
Ejemplo n.º 9
0
def main():
    """Main function"""
    parser = argparse.ArgumentParser()
    __create_parser_arguments(parser)
    args = parser.parse_args()

    global engine
    engine = create_engine(args.database_name)

    global logger
    logger = util.setup_logger(args.log_file,
                               'check',
                               loglevel=args.log_level,
                               hourly_log_rotation=True)
    logger.debug('starting')

    start_time = time.time()
    Session = create_session_for_process(engine)
    db_session = Session()
    db_session.expire_on_commit = False

    ripe_slow_down_sema = mp.BoundedSemaphore(args.ripe_request_burst_limit)
    ripe_create_sema = mp.Semaphore(args.measurement_limit)
    global MAX_THREADS

    if args.debug:
        MAX_THREADS = 1
    else:
        MAX_THREADS = int(args.measurement_limit / args.number_processes * 1.5)

    finish_event = threading.Event()
    generator_thread = util.start_token_generating_thread(
        ripe_slow_down_sema, args.ripe_request_limit, finish_event)

    locations = db_session.query(LocationInfo)

    if not locations.count():
        logger.error('No locations found! Aborting!')
        print('No locations found! Aborting!')
        return 1

    if not args.disable_probe_fetching:
        probe_distances = load_probes_from_cache(db_session).values()

        location_to_probes_dct = assign_location_probes(
            locations, [probe for probe, _ in probe_distances], db_session)
        db_session.commit()

        null_locations = [
            location for location in locations
            if location.id not in location_to_probes_dct
        ]

        logger.info('{} locations without nodes'.format(len(null_locations)))
    else:
        locations = db_session.query(LocationInfo)
        location_to_probes_dct = {}

        loc_without_probes = 0
        probes = set()

        for location in locations:
            if location.nearby_probes:
                location_to_probes_dct[location.id] = []
                for probe in location.nearby_probes:
                    probes.add(probe)
                    _ = str(probe.location.lat + probe.location.lon) + probe.location.id + \
                        str(probe.second_hop_latency) + probe.probe_id + str(probe.id)
                    location_to_probes_dct[location.id].append(
                        (probe,
                         location.gps_distance_haversine(probe.location),
                         probe.location))
            else:
                loc_without_probes += 1

        logger.debug('expunging probes')

        for probe in probes:
            try:
                db_session.expunge(probe.location)
                db_session.expunge(probe)
            except InvalidRequestError:
                pass

        logger.debug('updating probes')
        update_probes(probes)

        logger.info('{} locations without nodes'.format(loc_without_probes))

    measurement_strategy = MeasurementStrategy(args.measurement_strategy)

    logger.debug('finished ripe')

    processes = []

    process_count = args.number_processes

    if args.debug:
        process_count = 1

    if args.ip_filter_file:
        ip_set = set()
        with open(args.ip_filter_file) as ip_filter_file:
            for line in ip_filter_file:
                ip_set.add(line.strip())

        ips = list(ip_set)
    else:
        ips = None

    db_session.close()

    for pid in range(0, process_count):

        if ips:
            ips_count = len(ips)
            ips_start_index = int(pid * (ips_count / process_count))
            ips_end_index = int((pid + 1) * (ips_count / process_count))

            if pid + 1 == process_count:
                ips_end_index = ips_count

            ips_for_process = ips[ips_start_index:ips_end_index]
        else:
            ips_for_process = None

        process = mp.Process(
            target=ripe_check_process,
            args=(pid, ripe_create_sema, ripe_slow_down_sema, args.bill_to,
                  args.without_new_measurements, args.allowed_measurement_age,
                  args.api_key, args.domain_block_limit, process_count,
                  args.include_ip_encoded, measurement_strategy,
                  args.probes_per_measurement, args.buffer_time,
                  args.measurement_packets, args.use_efficient_probes,
                  location_to_probes_dct, args.stop_without_old_results,
                  ips_for_process, args.endless_measurements,
                  args.random_domains),
            name='domain_checking_{}'.format(pid))

        processes.append(process)

    for process in processes:
        process.start()

    alive = len(processes)
    while alive > 0:
        try:
            process_sts = [pro.is_alive() for pro in processes]
            if process_sts.count(True) != alive:
                alive = process_sts.count(True)
                logger.debug('{} processes alive'.format(alive))
            for process in processes:
                process.join()
        except KeyboardInterrupt:
            pass

    if finish_event:
        finish_event.set()

    if generator_thread:
        generator_thread.join()

    logger.debug('{} processes alive'.format(alive))
    end_time = time.time()
    logger.info('running time: {}'.format((end_time - start_time)))
    return 0
Ejemplo n.º 10
0
def main():
    """Main function"""
    parser = configargparse.ArgParser(
        default_config_files=['find_default.ini'])

    __create_parser_arguments(parser)
    args = parser.parse_args()

    global logger
    logger = util.setup_logger(args.logging_file,
                               'find',
                               loglevel=args.log_level)

    global engine
    engine = create_engine(args.database_name)

    trie = create_trie(args.code_blacklist_file, args.word_blacklist_file)

    code_to_location_blacklist = {}
    if args.code_to_location_blacklist_file:
        with open(args.code_to_location_blacklist_file
                  ) as code_to_location_blacklist_file:
            json_txt = ""
            for line in code_to_location_blacklist_file:
                line = line.strip()
                if line[0] != '#':
                    json_txt += line
            code_to_location_blacklist = json.loads(json_txt)

    location_match_queue = mp.Queue()
    stop_event = threading.Event()
    handle_location_matches_thread = threading.Thread(
        target=handle_location_matches,
        name='handle-location-matches',
        args=(location_match_queue, stop_event))
    handle_location_matches_thread.start()

    processes = []
    for index in range(0, args.number_processes):
        process = mp.Process(target=search_process,
                             args=(index, trie, code_to_location_blacklist,
                                   args.domain_block_limit,
                                   args.number_processes,
                                   location_match_queue),
                             kwargs={
                                 'amount': args.amount,
                                 'debug': args.log_level == 'DEBUG'
                             },
                             name='find_locations_{}'.format(index))
        process.start()
        processes.append(process)

    for process in processes:
        process.join()

    Session = create_session_for_process(engine)
    db_session = Session()
    update_query = update(DomainLabel).values(
        last_searched=datetime.datetime.now())
    db_session.execute(update_query)
    db_session.close()

    stop_event.set()
    handle_location_matches_thread.join()
    location_match_queue.join_thread()