Python mkdir_p Examples, scanners.utils.mkdir_p Python Examples

Example #1

0

Show file

    def run(self):
        data = [self.clean_row(d) for d in self.a11y_raw]

        parsed_datasets = [
            ('a11y', self.make_a11y_data(data)),
            ('agencies', self.make_agency_data(data)),
            ('domains', self.make_domain_data(data)),
        ]

        mkdir_p(results_dir())
        for name, data in parsed_datasets:
            path = '{}/{}.json'.format(results_dir(), name)
            with open(path, 'w+') as f:
                json.dump(data, f, indent=2)

Example #2

0

Show file

File: process_a11y.py Project: robbi5/domain-scan

    def run(self):
        data = [self.clean_row(d) for d in self.a11y_raw]

        parsed_datasets = [
            ('a11y', self.make_a11y_data(data)),
            ('agencies', self.make_agency_data(data)),
            ('domains', self.make_domain_data(data)),
        ]

        mkdir_p(results_dir())
        for name, data in parsed_datasets:
            path = '{}/{}.json'.format(results_dir(), name)
            with open(path, 'w+') as f:
                json.dump(data, f, indent=2)

Example #3

0

Show file

File: sslyze.py Project: openbrian/domain-scan

def scan(domain, options):
	logging.debug("[%s][sslyze]" % domain)

	# Optional: skip domains which don't support HTTPS in prior inspection
	inspection = utils.data_for(domain, "inspect")
	if inspection and (not inspection.get("support_https")):
		logging.debug("\tSkipping, HTTPS not supported in inspection.")
		return None

	# Optional: if inspect data says canonical endpoint uses www and this domain
	# doesn't have it, add it.
	if inspection and (inspection.get("canonical_endpoint") == "www") and (not domain.startswith("www.")):
		scan_domain = "www.%s" % domain
	else:
		scan_domain = domain

	# cache XML from sslyze
	cache_xml = utils.cache_path(domain, "sslyze", ext="xml")
	# because sslyze manages its own output (can't yet print to stdout),
	# we have to mkdir_p the path ourselves
	utils.mkdir_p(os.path.dirname(cache_xml))

	force = options.get("force", False)

	if (force is False) and (os.path.exists(cache_xml)):
		logging.debug("\tCached.")
		xml = open(cache_xml).read()

	else:
		logging.debug("\t %s %s" % (command, domain))
		# use scan_domain (possibly www-prefixed) to do actual scan
		raw = utils.scan([command, "--regular", "--quiet", scan_domain, "--xml_out=%s" % cache_xml], env=command_env)
		
		if raw is None:
			# TODO: save standard invalid XML data...?
			logging.warn("\tBad news scanning, sorry!")
			return None

		xml = utils.scan(["cat", cache_xml])
		if not xml:
			logging.warn("\tBad news reading XML, sorry!")
			return None

		utils.write(xml, cache_xml)

	data = parse_sslyze(xml)

	if data is None:
		logging.warn("\tNo valid target for scanning, couldn't connect.")
		return None

	utils.write(utils.json_for(data), utils.cache_path(domain, "sslyze"))

	yield [
		data['protocols']['sslv2'], data['protocols']['sslv3'], 
		data['protocols']['tlsv1.0'], data['protocols']['tlsv1.1'], 
		data['protocols']['tlsv1.2'], 

		data['config'].get('any_dhe'), data['config'].get('all_dhe'),
		data['config'].get('weakest_dh'),
		data['config'].get('any_rc4'), data['config'].get('all_rc4'),

		data['config'].get('ocsp_stapling'),
		
		data['certs'].get('key_type'), data['certs'].get('key_length'),
		data['certs'].get('leaf_signature'), data['certs'].get('any_sha1'),
		data['certs'].get('not_before'), data['certs'].get('not_after'),
		data['certs'].get('served_issuer'), 

		data.get('errors')
	]

Example #4

0

Show file

def scan(domain, options):
    logging.debug("[%s][sslyze]" % domain)

    # Optional: skip domains which don't support HTTPS in pshtt scan.
    if utils.domain_doesnt_support_https(domain):
        logging.debug("\tSkipping, HTTPS not supported.")
        return None

    # Optional: if pshtt data says canonical endpoint uses www and this domain
    # doesn't have it, add it.
    if utils.domain_uses_www(domain):
        scan_domain = "www.%s" % domain
    else:
        scan_domain = domain

    # cache JSON from sslyze
    cache_json = utils.cache_path(domain, "sslyze")
    # because sslyze manages its own output (can't yet print to stdout),
    # we have to mkdir_p the path ourselves
    utils.mkdir_p(os.path.dirname(cache_json))

    force = options.get("force", False)

    if (force is False) and (os.path.exists(cache_json)):
        logging.debug("\tCached.")
        raw_json = open(cache_json).read()
        try:
            data = json.loads(raw_json)
            if (data.__class__ is dict) and data.get('invalid'):
                return None
        except json.decoder.JSONDecodeError as err:
            logging.warn("Error decoding JSON.  Cache probably corrupted.")
            return None

    else:
        # use scan_domain (possibly www-prefixed) to do actual scan
        logging.debug("\t %s %s" % (command, scan_domain))

        # This is --regular minus --heartbleed
        # See: https://github.com/nabla-c0d3/sslyze/issues/217
        raw_response = utils.scan([
            command,
            "--sslv2", "--sslv3", "--tlsv1", "--tlsv1_1", "--tlsv1_2",
            "--reneg", "--resum", "--certinfo",
            "--http_get", "--hide_rejected_ciphers",
            "--compression", "--openssl_ccs",
            "--fallback", "--quiet",
            scan_domain, "--json_out=%s" % cache_json
        ])

        if raw_response is None:
            # TODO: save standard invalid JSON data...?
            utils.write(utils.invalid({}), cache_json)
            logging.warn("\tBad news scanning, sorry!")
            return None

        raw_json = utils.scan(["cat", cache_json])
        if not raw_json:
            logging.warn("\tBad news reading JSON, sorry!")
            return None

        utils.write(raw_json, cache_json)

    data = parse_sslyze(raw_json)

    if data is None:
        logging.warn("\tNo valid target for scanning, couldn't connect.")
        return None

    yield [
        scan_domain,
        data['protocols']['sslv2'], data['protocols']['sslv3'],
        data['protocols']['tlsv1.0'], data['protocols']['tlsv1.1'],
        data['protocols']['tlsv1.2'],

        data['config'].get('any_dhe'), data['config'].get('all_dhe'),
        data['config'].get('weakest_dh'),
        data['config'].get('any_rc4'), data['config'].get('all_rc4'),

        data['certs'].get('key_type'), data['certs'].get('key_length'),
        data['certs'].get('leaf_signature'),
        data['certs'].get('any_sha1_served'),
        data['certs'].get('any_sha1_constructed'),
        data['certs'].get('not_before'), data['certs'].get('not_after'),
        data['certs'].get('served_issuer'), data['certs'].get('constructed_issuer'),

        data.get('errors')
    ]

Example #5

0

Show file

File: sslyze.py Project: yanggongwang/domain-scan

def scan(domain, options):
    logging.debug("[%s][sslyze]" % domain)

    # Optional: skip domains which don't support HTTPS in prior inspection
    if utils.domain_doesnt_support_https(domain):
        logging.debug("\tSkipping, HTTPS not supported in inspection.")
        return None

    # Optional: if pshtt data says canonical endpoint uses www and this domain
    # doesn't have it, add it.
    if utils.domain_uses_www(domain):
        scan_domain = "www.%s" % domain
    else:
        scan_domain = domain

    # cache XML from sslyze
    cache_xml = utils.cache_path(domain, "sslyze", ext="xml")
    # because sslyze manages its own output (can't yet print to stdout),
    # we have to mkdir_p the path ourselves
    utils.mkdir_p(os.path.dirname(cache_xml))

    force = options.get("force", False)

    if (force is False) and (os.path.exists(cache_xml)):
        logging.debug("\tCached.")
        xml = open(cache_xml).read()

    else:
        logging.debug("\t %s %s" % (command, scan_domain))
        # use scan_domain (possibly www-prefixed) to do actual scan

        # Give the Python shell environment a pyenv environment.
        pyenv_init = "eval \"$(pyenv init -)\" && pyenv shell %s" % pyenv_version
        # Really un-ideal, but calling out to Python2 from Python 3 is a nightmare.
        # I don't think this tool's threat model includes untrusted CSV, either.
        raw = utils.unsafe_execute(
            "%s && %s --regular --quiet %s --xml_out=%s" %
            (pyenv_init, command, scan_domain, cache_xml))

        if raw is None:
            # TODO: save standard invalid XML data...?
            logging.warn("\tBad news scanning, sorry!")
            return None

        xml = utils.scan(["cat", cache_xml])
        if not xml:
            logging.warn("\tBad news reading XML, sorry!")
            return None

        utils.write(xml, cache_xml)

    data = parse_sslyze(xml)

    if data is None:
        logging.warn("\tNo valid target for scanning, couldn't connect.")
        return None

    utils.write(utils.json_for(data), utils.cache_path(domain, "sslyze"))

    yield [
        data['protocols']['sslv2'], data['protocols']['sslv3'],
        data['protocols']['tlsv1.0'], data['protocols']['tlsv1.1'],
        data['protocols']['tlsv1.2'], data['config'].get('any_dhe'),
        data['config'].get('all_dhe'), data['config'].get('weakest_dh'),
        data['config'].get('any_rc4'), data['config'].get('all_rc4'),
        data['config'].get('ocsp_stapling'), data['certs'].get('key_type'),
        data['certs'].get('key_length'), data['certs'].get('leaf_signature'),
        data['certs'].get('any_sha1'), data['certs'].get('not_before'),
        data['certs'].get('not_after'), data['certs'].get('served_issuer'),
        data.get('errors')
    ]

Example #6

0

Show file

File: filter.py Project: robbi5/domain-scan

def main():
    options = utils.options()

    debug = options.get('debug', False)
    encoding = options.get('encoding', 'latin-1')

    name = options.get('name', 'hostnames')
    filter_name = options.get('filter', name)
    filter = filters.get(filter_name, None)
    if filter is None:
        print("No filter by that name. Specify one with --filter.")
        exit(1)

    # Drop output in a directory next to the script.
    this_dir = os.path.dirname(__file__)
    output = os.path.join(this_dir, "hostnames")
    utils.mkdir_p(output)

    out_filename = "%s.csv" % name
    out_file = open(os.path.join(output, out_filename), 'w', newline='')
    out_writer = csv.writer(out_file)

    if len(options["_"]) < 1:
        print("Provide the name to an input file.")
        exit(1)

    input_filename = options["_"][0]

    if not os.path.exists(input_filename):
        print("Input file doesn't exist.")
        exit(1)

    suffix = options.get("suffix", ".gov")

    # if it has a ., make sure the . is escaped
    if suffix.startswith("."):
        suffix = "\\%s" % suffix
    pattern = re.compile("%s\n?$" % suffix)

    max = int(options.get("max", -1))

    # Proceed

    missed = 0
    matched = 0
    name_map = {}
    curr = 0

    with open(input_filename, encoding=encoding) as f:

        try:
            for line in f:

                if pattern.search(line):
                    hostname = filter(line)
                    if debug:
                        print("Match!!!! %s" % hostname)
                    matched += 1
                    name_map[hostname] = None
                else:
                    if debug:
                        print("Didn't match: %s" % line.strip())
                    missed += 1

                curr += 1
                if (max > 0) and (curr >= max):
                    print("Stopping at %i." % curr)
                    break

                if (curr % 1000000) == 0:
                    print("Processing: %i" % curr)
        except UnicodeDecodeError as e:
            print(curr)
            print(utils.format_last_exception())
            exit(1)

    hostnames = list(name_map.keys())
    hostnames.sort()

    print("Matched %i (%i unique), missed on %i." % (matched, len(hostnames), missed))

    print("Writing out CSV.")
    for hostname in hostnames:
        out_writer.writerow([hostname])

    print("Done.")

Example #7

0

Show file

File: sslyze.py Project: robbi5/domain-scan

def scan(domain, options):
    logging.debug("[%s][sslyze]" % domain)

    # Optional: skip domains which don't support HTTPS in prior inspection
    if utils.domain_doesnt_support_https(domain):
        logging.debug("\tSkipping, HTTPS not supported in inspection.")
        return None

    # Optional: if pshtt data says canonical endpoint uses www and this domain
    # doesn't have it, add it.
    if utils.domain_uses_www(domain):
        scan_domain = "www.%s" % domain
    else:
        scan_domain = domain

    # cache XML from sslyze
    cache_xml = utils.cache_path(domain, "sslyze", ext="xml")
    # because sslyze manages its own output (can't yet print to stdout),
    # we have to mkdir_p the path ourselves
    utils.mkdir_p(os.path.dirname(cache_xml))

    force = options.get("force", False)

    if (force is False) and (os.path.exists(cache_xml)):
        logging.debug("\tCached.")
        xml = open(cache_xml).read()

    else:
        logging.debug("\t %s %s" % (command, scan_domain))
        # use scan_domain (possibly www-prefixed) to do actual scan

        # Give the Python shell environment a pyenv environment.
        pyenv_init = "eval \"$(pyenv init -)\" && pyenv shell %s" % pyenv_version
        # Really un-ideal, but calling out to Python2 from Python 3 is a nightmare.
        # I don't think this tool's threat model includes untrusted CSV, either.
        raw = utils.unsafe_execute("%s && %s --regular --quiet %s --xml_out=%s" % (pyenv_init, command, scan_domain, cache_xml))

        if raw is None:
            # TODO: save standard invalid XML data...?
            logging.warn("\tBad news scanning, sorry!")
            return None

        xml = utils.scan(["cat", cache_xml])
        if not xml:
            logging.warn("\tBad news reading XML, sorry!")
            return None

        utils.write(xml, cache_xml)

    data = parse_sslyze(xml)

    if data is None:
        logging.warn("\tNo valid target for scanning, couldn't connect.")
        return None

    utils.write(utils.json_for(data), utils.cache_path(domain, "sslyze"))

    yield [
        data['protocols']['sslv2'], data['protocols']['sslv3'],
        data['protocols']['tlsv1.0'], data['protocols']['tlsv1.1'],
        data['protocols']['tlsv1.2'],

        data['config'].get('any_dhe'), data['config'].get('all_dhe'),
        data['config'].get('weakest_dh'),
        data['config'].get('any_rc4'), data['config'].get('all_rc4'),

        data['config'].get('ocsp_stapling'),

        data['certs'].get('key_type'), data['certs'].get('key_length'),
        data['certs'].get('leaf_signature'), data['certs'].get('any_sha1'),
        data['certs'].get('not_before'), data['certs'].get('not_after'),
        data['certs'].get('served_issuer'),

        data.get('errors')
    ]

Example #8

0

Show file

File: censys_api.py Project: h-m-f-t/domain-scan

#!/usr/bin/env python

from scanners import utils
import csv
import os
import re
import time
# pip install censys
import censys.certificates

# Drop output in a directory next to the script.
this_dir = os.path.dirname(__file__)
output = os.path.join(this_dir, "hostnames")
utils.mkdir_p(output)

out_filename = "censys.csv"
out_file = open(os.path.join(output, out_filename), 'w', newline='')
out_writer = csv.writer(out_file)


options = utils.options()
debug = options.get("debug", False)
suffix = options.get("suffix", ".gov")

wildcard_pattern = re.compile("\*.")

# time to sleep between requests (defaults to 5s)
delay = int(options.get("delay", 5))

# Censys page size, fixed
page_size = 100

Example #9

0

Show file

File: filter.py Project: ruaronicola/securethe.gov

def main():
    options = utils.options()

    debug = options.get('debug', False)
    encoding = options.get('encoding', 'latin-1')

    name = options.get('name', 'hostnames')
    filter_name = options.get('filter', name)
    filter = filters.get(filter_name, None)
    if filter is None:
        print("No filter by that name. Specify one with --filter.")
        exit(1)

    # Drop output in a directory next to the script.
    this_dir = os.path.dirname(__file__)
    output = os.path.join(this_dir, "hostnames")
    utils.mkdir_p(output)

    out_filename = "%s.csv" % name
    out_file = open(os.path.join(output, out_filename), 'w', newline='')
    out_writer = csv.writer(out_file)

    if len(options["_"]) < 1:
        print("Provide the name to an input file.")
        exit(1)

    input_filename = options["_"][0]

    if not os.path.exists(input_filename):
        print("Input file doesn't exist.")
        exit(1)

    suffix = options.get("suffix", ".gov")

    # if it has a ., make sure the . is escaped
    if suffix.startswith("."):
        suffix = "\\%s" % suffix
    pattern = re.compile("%s\n?$" % suffix)

    max = int(options.get("max", -1))

    # Proceed

    missed = 0
    matched = 0
    name_map = {}
    curr = 0

    with open(input_filename, encoding=encoding) as f:

        try:
            for line in f:

                if pattern.search(line):
                    hostname = filter(line)
                    if debug:
                        print("Match!!!! %s" % hostname)
                    matched += 1
                    name_map[hostname] = None
                else:
                    if debug:
                        print("Didn't match: %s" % line.strip())
                    missed += 1

                curr += 1
                if (max > 0) and (curr >= max):
                    print("Stopping at %i." % curr)
                    break

                if (curr % 1000000) == 0:
                    print("Processing: %i" % curr)
        except UnicodeDecodeError as e:
            print(curr)
            print(utils.format_last_exception())
            exit(1)

    hostnames = list(name_map.keys())
    hostnames.sort()

    print("Matched %i (%i unique), missed on %i." %
          (matched, len(hostnames), missed))

    print("Writing out CSV.")
    for hostname in hostnames:
        out_writer.writerow([hostname])

    print("Done.")