Ejemplo n.º 1
0
def download_file(logger, url, out_file):
    result = False

    try:
        out_fd = open('{}/{}'.format(sc.get_config_item('tmp_dir'), out_file),
                      'wb')
    except Exception as e:
        logger.log_err(
            'Failed to open output file {} in temporary directory ({})'.format(
                out_file, e))
        return False

    try:
        response = requests.get(url, stream=True)

        if response.status_code == 200:
            if 'Content-Length' in response.headers:
                logger.log_info('Downloading {} bytes from {}'.format(
                    response.headers['Content-Length'], url))
            else:
                logger.log_info('Downloading {}'.format(url))

            downloaded_bytes = 0

            for chunk in response.iter_content(1024 * 1024):
                out_fd.write(chunk)
                out_fd.flush()
                downloaded_bytes += len(chunk)

            logger.log_info('Downloaded {} bytes from {}'.format(
                downloaded_bytes, url))

            result = True
        else:
            logger.log_err('GET {} returned {}'.format(url,
                                                       response.status_code))
    except Exception as e:
        logger.log_err('Failed to start download from {} ({})'.format(url, e))

    out_fd.close()

    if not result:
        cleanup_tmp_file(out_file)

    return result
Ejemplo n.º 2
0
def process_avro_files(logger, day, avro_dir, proc_count, out_dir, tld,
                       tlsa_one_set, tlsa_all_set):
    avro_list = []

    for f in os.listdir(avro_dir):
        if f.lower().endswith('.avro'):
            avro_list.append(f)

    logger.log_info('Found {} Avro files in {}'.format(len(avro_list),
                                                       avro_dir))
    logger.log_info('Writing results to {}'.format(out_dir))
    oilog.mkdir_p(out_dir)

    analysis_queue = mp.Queue()

    for a in avro_list:
        analysis_queue.put(a)

    analysis_procs = set()

    for t in range(0, proc_count):
        analysis_procs.add(
            mp.Process(target=avro_check_proc,
                       args=(logger, analysis_queue, avro_dir, out_dir, tld,
                             tlsa_one_set, tlsa_all_set)))

    logger.log_info('Starting analysis processes')

    for t in analysis_procs:
        t.start()

    while len(analysis_procs) > 0:
        for t in analysis_procs:
            t.join(0.25)
            if not t.is_alive():
                analysis_procs.remove(t)
                break

    logger.log_info('Merging individual results')
    tot_count = 0

    result_name = '{}/{}-results-{}.json.bz2'.format(out_dir, tld, day)
    result_fd = bz2.open(result_name, 'wt')
    result_fd.write('[\n')

    stats_dict = dict()

    for a in avro_list:
        json_name = a.replace('.avro', '.json')

        logger.log_info('Merging in {}/{}'.format(out_dir, json_name))

        json_fd = open('{}/{}'.format(out_dir, json_name), 'r')
        count = 0

        for line in json_fd:
            line = line.strip('\r').strip('\n')
            result_fd.write('{},\n'.format(line))
            count += 1

        json_fd.close()

        logger.log_info('Merged {} results from {}/{}'.format(
            count, out_dir, json_name))

        tot_count += count

        os.unlink('{}/{}'.format(out_dir, json_name))

        stats_name = a.replace('.avro', '-stats.json')

        logger.log_info('Collecting stats from {}/{}'.format(
            out_dir, stats_name))

        stats_fd = open('{}/{}'.format(out_dir, stats_name), 'r')

        for line in stats_fd:
            line = line.strip('\r').strip('\n')

            avro_stats = json.loads(line)

            for key in avro_stats:
                stat = stats_dict.get(key, int(0))
                stat += avro_stats[key]
                stats_dict[key] = stat

        stats_fd.close()

        os.unlink('{}/{}'.format(out_dir, stats_name))

    result_fd.write('{}\n')
    result_fd.write(']\n')
    result_fd.close()

    logger.log_info('Done, wrote {} results to {}'.format(
        tot_count, result_name))

    stats_name = '{}/{}-stats-{}.json'.format(out_dir, tld, day)

    stats_out = open(stats_name, 'w')

    stats_out.write('{}\n'.format(json.dumps(stats_dict)))

    stats_out.close()

    logger.log_info('Wrote statistics to {}'.format(stats_name))

    consolidated_avro_log = '{}/{}-avrologs-{}.log.bz2'.format(
        sc.get_config_item('log_dir'), tld, day)

    cl_fd = bz2.open(consolidated_avro_log, 'wt')

    for a in avro_list:
        log_name = '{}/{}'.format(sc.get_config_item('log_dir'),
                                  a.replace('.avro', '.log'))

        log_fd = open(log_name, 'r')

        for line in log_fd:
            cl_fd.write(line)

        log_fd.close()

        logger.log_info('Added {} to consolidated Avro log'.format(log_name))

        os.unlink(log_name)

    cl_fd.close()

    logger.log_info(
        'Consolidated Avro logs to {}'.format(consolidated_avro_log))
Ejemplo n.º 3
0
def main():
    argparser = argparse.ArgumentParser(
        description='Perform DNSSEC checks against Avro files in a directory')

    argparser.add_argument('-c, --config',
                           nargs=1,
                           help='configuration file to use',
                           type=str,
                           metavar='config_file',
                           dest='config_file',
                           required=True)
    argparser.add_argument('-d, --date',
                           nargs=1,
                           help='date to process (defaults to yesterday)',
                           type=str,
                           metavar='process_date',
                           dest='process_date',
                           required=False)

    args = argparser.parse_args()

    # Load configuration
    try:
        sc.load_config(args.config_file[0])
    except Exception as e:
        print(e)
        sys.exit(1)

    oilog.set_log_dir(sc.get_config_item('log_dir'))

    day = datetime.date.today() - datetime.timedelta(days=1)

    if args.process_date is not None:
        day = dateutil.parser.parse(args.process_date[0]).date()

    logger = oilog.OILog()
    logger.open('oi-dnssecchecks-{}-{}.log'.format(day,
                                                   sc.get_config_item('tld')))

    # Download required data
    if not download_data(logger, day):
        logger.log_err(
            'Failed to download data for {}, bailing out'.format(day))
        sys.exit(1)

    # Load TLSA sets
    tlsa_one_set = load_tlsa_list(
        '{}/tlsa-all-{}-{}.txt'.format(sc.get_config_item('tmp_dir'),
                                       sc.get_config_item('tld'), day), logger)
    tlsa_all_set = load_tlsa_list(
        '{}/tlsa-one-{}-{}.txt'.format(sc.get_config_item('tmp_dir'),
                                       sc.get_config_item('tld'), day), logger)

    cleanup_tmp_file('tlsa-all-{}-{}.txt'.format(sc.get_config_item('tld'),
                                                 day))
    cleanup_tmp_file('tlsa-one-{}-{}.txt'.format(sc.get_config_item('tld'),
                                                 day))

    try:
        process_avro_files(logger, day, sc.get_config_item('tmp_dir'),
                           sc.get_config_item('multi_process_count', 1),
                           sc.get_config_item('out_dir'),
                           sc.get_config_item('tld'), tlsa_one_set,
                           tlsa_all_set)
    except Exception as e:
        logger.log_err('Process terminated with an exception')
        logger.log_err(e)

    logger.close()
Ejemplo n.º 4
0
def download_data(logger, day):
    tar_url = 'https://data.openintel.nl/data/open-tld/{}/openintel-open-tld-{:04d}{:02d}{:02d}.tar'.format(
        day.year, day.year, day.month, day.day)
    tlsa_all_url = 'https://data.openintel.nl/data/open-tld/{}/tlsa/{}-tlsa-all-mx-{}.txt'.format(
        day.year, sc.get_config_item('tld'), day)
    tlsa_one_url = 'https://data.openintel.nl/data/open-tld/{}/tlsa/{}-tlsa-one-mx-{}.txt'.format(
        day.year, sc.get_config_item('tld'), day)

    logger.log_info('Fetching Avro data from {}'.format(tar_url))

    if not download_file(logger, tar_url, 'opentld-{}.tar'.format(day)):
        return False

    logger.log_info(
        'Fetching domains with TLSA records for all MX records from {}'.format(
            tlsa_all_url))

    if not download_file(
            logger, tlsa_all_url, 'tlsa-all-{}-{}.txt'.format(
                sc.get_config_item('tld'), day)):
        cleanup_tmp_file('opentld-{}.tar'.format(day))
        return False

    logger.log_info(
        'Fetching domains with TLSA records for at least one MX record from {}'
        .format(tlsa_one_url))

    if not download_file(
            logger, tlsa_one_url, 'tlsa-one-{}-{}.txt'.format(
                sc.get_config_item('tld'), day)):
        cleanup_tmp_file('opentld-{}.tar'.format(day))
        cleanup_tmp_file('tlsa-all-{}-{}.txt'.format(sc.get_config_item('tld'),
                                                     day))
        return False

    try:
        untar = tarfile.open('{}/opentld-{}.tar'.format(
            sc.get_config_item('tmp_dir'), day))

        untar.extractall(sc.get_config_item('tmp_dir'))

        untar.close()
    except Exception as e:
        logger.log_err('Failed to unpack {}/{} ({})'.format(
            sc.get_config_item('tmp_dir'), 'opentld-{}.tar'.format(day), e))
        cleanup_tmp_file('opentld-{}.tar'.format(day))
        cleanup_tmp_file('tlsa-all-{}-{}.txt'.format(sc.get_config_item('tld'),
                                                     day))
        cleanup_tmp_file('tlsa-one-{}-{}.txt'.format(sc.get_config_item('tld'),
                                                     day))

        for t in glob.glob('{}/*.avro'.format(sc.get_config_item('tmp_dir'))):
            logger.log_info('Cleaning up {}'.format(t))
            os.unlink(t)

        return False

    cleanup_tmp_file('opentld-{}.tar'.format(day))

    return True
Ejemplo n.º 5
0
def cleanup_tmp_file(tmp_name):
    try:
        os.unlink('{}/{}'.format(sc.get_config_item('tmp_dir'), tmp_name))
    except:
        pass
Ejemplo n.º 6
0
def process_date(day):
    print('Ziggy is processing data for {}'.format(day))

    # Step 1: download tar archives for the specified date
    day_tarfiles = []
    base_url = "https://ftp.ripe.net/rpki"
    tals = [
        "afrinic.tal", "apnic-afrinic.tal", "apnic-arin.tal", "apnic-iana.tal",
        "apnic-lacnic.tal", "apnic-ripe.tal", "apnic.tal", "arin.tal",
        "lacnic.tal", "ripencc.tal"
    ]

    tmp_dir = sc.get_path_item('tmp-dir')

    for tal in tals:
        tal_file = '{}/{}.tar.gz'.format(tmp_dir, tal)
        tal_url = '{}/{}/{:04d}/{:02d}/{:02d}/repo.tar.gz'.format(
            base_url, tal, day.year, day.month, day.day)

        if download_from_url(tal_url, tal_file):
            day_tarfiles.append(tal_file)

    # Step 2: clean out the Routinator cache and TAL directory
    routinator_cache = sc.get_path_item('routinator-cache')
    routinator_tals = sc.get_path_item('routinator-tals')

    try:
        sys.stdout.write('Cleaning out {} ... '.format(routinator_cache))
        sys.stdout.flush()
        shutil.rmtree(routinator_cache)
        os.mkdir(routinator_cache)
        print('OK')

        sys.stdout.write('Cleaning out {} ... '.format(routinator_tals))
        sys.stdout.flush()
        shutil.rmtree(routinator_tals)
        os.mkdir(routinator_tals)
        print('OK')
    except Exception as e:
        print('FAILED')
        raise e

    # Step 3: extract the unvalidated data from the tar archives
    ignore_tals = sc.get_config_item('ignore-tals')
    latest_time = datetime.datetime.fromtimestamp(0)

    for tarchive in day_tarfiles:
        sys.stdout.write('Ziggy is processing {} ... '.format(tarchive))
        sys.stdout.flush()
        obj_count = 0

        try:
            t = tarfile.open('{}'.format(tarchive))
            basepath = None
            wrote_ta = False

            for member in t:
                if '/unvalidated/' in member.name and member.isfile():
                    pathcomp = member.name.split('/')

                    i = 0

                    while pathcomp[i] != 'unvalidated':
                        i += 1

                    i += 1

                    write_path = '{}/{}'.format(routinator_cache,
                                                '/'.join(pathcomp[i:-1]))
                    if basepath == None:
                        basepath = pathcomp[i]

                    try:
                        os.makedirs(write_path)
                    except:
                        pass

                    out_fd = open('{}/{}'.format(write_path, pathcomp[-1]),
                                  'wb')

                    in_fd = t.extractfile(member)

                    buf = in_fd.read(1024)

                    while len(buf) > 0:
                        out_fd.write(buf)
                        buf = in_fd.read(1024)

                    out_fd.close()
                    in_fd.close()
                    obj_count += 1

                    if datetime.datetime.fromtimestamp(
                            member.mtime) > latest_time:
                        latest_time = datetime.datetime.fromtimestamp(
                            member.mtime)
                elif member.name.endswith('.tal.cer') and member.isfile():
                    if wrote_ta:
                        raise Exception(
                            "Already wrote a TA for {}, wasn't expecting another one."
                            .format(tarchive))

                    out_fd = open('{}/tmp-ta.cer'.format(routinator_cache),
                                  'wb')

                    in_fd = t.extractfile(member)

                    buf = in_fd.read(1024)

                    while len(buf) > 0:
                        out_fd.write(buf)
                        buf = in_fd.read(1024)

                    out_fd.close()
                    in_fd.close()
                    wrote_ta = True

            print('OK ({} objects)'.format(obj_count))

            if not wrote_ta:
                print('Warning, found no TA in {}'.format(tarchive))
            else:
                if basepath in ignore_tals:
                    print('Ignoring TAL for {}'.format(basepath))
                    os.unlink('{}/tmp-ta.cer'.format(routinator_cache))
                else:
                    # For some older archives, the TA certificate is
                    # sometimes encoded in PEM format. Convert it to
                    # DER if necessary
                    ta_fd = open('{}/tmp-ta.cer'.format(routinator_cache),
                                 'rb')
                    is_pem = False
                    pem_header = bytes('-----BEGIN CERTIFICATE-----', 'utf8')

                    for line in ta_fd:
                        if len(line) >= len(pem_header) and line[:len(
                                pem_header)] == pem_header:
                            is_pem = True
                            break

                    ta_fd.close()

                    if is_pem:
                        print(
                            'Found an old TA certificate in PEM format, converting to DER'
                        )

                        osslcmd = 'openssl x509 -inform PEM -in {}/tmp-ta.cer -outform DER -out {}/tmp-ta-der.cer'.format(
                            routinator_cache, routinator_cache)

                        if os.system(osslcmd) != 0:
                            raise Exception(
                                "Fail to convert TA from PEM to DER")

                        os.unlink('{}/tmp-ta.cer'.format(routinator_cache))
                        os.rename('{}/tmp-ta-der.cer'.format(routinator_cache),
                                  '{}/tmp-ta.cer'.format(routinator_cache))

                    # Move the TA in place
                    ta_name = 'ta.cer'
                    tal_name = "{}.tal".format(basepath)

                    # From Oct 2012 - Apr 2018, APNIC had a different repo structure
                    # that we need to account for when recreating the TALs
                    if 'apnic' in tarchive:
                        fields = tarchive.split('.')

                        for field in fields:
                            if 'apnic' in field:
                                path_elems = field.split('/')
                                ta_name = 'ta-{}.cer'.format(path_elems[-1])
                                tal_name = '{}-{}.tal'.format(
                                    basepath, path_elems[-1])

                    ta_path = '{}/{}/ta'.format(routinator_cache, basepath)
                    sys.stdout.write('Moving TA to {}/{} ...'.format(
                        ta_path, ta_name))
                    sys.stdout.flush()

                    try:
                        os.makedirs(ta_path)
                    except:
                        pass

                    os.rename('{}/tmp-ta.cer'.format(routinator_cache),
                              '{}/{}'.format(ta_path, ta_name))
                    print('OK')

                    sys.stdout.write('Creating a TAL for this TA ... ')
                    sys.stdout.flush()

                    tal = open('{}/{}'.format(routinator_tals, tal_name), 'w')
                    tal.write('rsync://{}/ta/{}\n\n'.format(basepath, ta_name))
                    tal.close()

                    osslcmd = "openssl x509 -inform DER -in {}/{} -pubkey -noout | awk '!/-----(BEGIN|END)/' >> {}/{}".format(
                        ta_path, ta_name, routinator_tals, tal_name)

                    if os.system(osslcmd) != 0:
                        print('FAILED')
                        raise Exception('Failed to create a TAL')

                    print('OK')
        except Exception as e:
            print('Failed to process {}'.format(tarchive))
            raise e

    # Step 4: invoke the Routinator
    print('Ziggy thinks the Routinator should travel back to: {}'.format(
        latest_time))

    vrp_path = sc.get_path_item('vrp-out-name')
    log_path = sc.get_path_item('routinator-log-name')
    vrp_form = sc.get_config_item('vrp-out-format', 'csv')

    routinator_cmd = "faketime '{}' {} -vv --logfile {} vrps -n -o {} -f {}".format(
        latest_time, sc.get_config_item('routinator', 'routinator'),
        log_path.format(day), vrp_path.format(day), vrp_form)

    print('Invoking the Routinator as:')
    print(routinator_cmd)

    if os.system(routinator_cmd) != 0:
        print('Routinator exited with an error')
    else:
        print('Routinator indicated success!')

    # Step 5: clean up
    for tf in day_tarfiles:
        os.unlink(tf)