def cli(): parser = argparse.ArgumentParser( description='Check HTTPs rules for validity') parser.add_argument('checker_config', help='an integer for the accumulator') parser.add_argument('rule_files', nargs="*", default=[], help="Specific XML rule files") parser.add_argument('--json_file', default=None, help='write results in json file') args = parser.parse_args() config = SafeConfigParser() config.read(args.checker_config) logfile = config.get("log", "logfile") loglevel = convertLoglevel(config.get("log", "loglevel")) if logfile == "-": logging.basicConfig(stream=sys.stderr, level=loglevel, format="%(levelname)s %(message)s") else: logging.basicConfig( filename=logfile, level=loglevel, format= "%(asctime)s %(levelname)s %(message)s [%(pathname)s:%(lineno)d]") autoDisable = False if config.has_option("rulesets", "auto_disable"): autoDisable = config.getboolean("rulesets", "auto_disable") # Test rules even if they have default_off=... includeDefaultOff = False if config.has_option("rulesets", "include_default_off"): includeDefaultOff = config.getboolean("rulesets", "include_default_off") ruledir = config.get("rulesets", "rulesdir") checkCoverage = False if config.has_option("rulesets", "check_coverage"): checkCoverage = config.getboolean("rulesets", "check_coverage") checkTargetValidity = False if config.has_option("rulesets", "check_target_validity"): checkTargetValidity = config.getboolean("rulesets", "check_target_validity") checkNonmatchGroups = False if config.has_option("rulesets", "check_nonmatch_groups"): checkNonmatchGroups = config.getboolean("rulesets", "check_nonmatch_groups") checkTestFormatting = False if config.has_option("rulesets", "check_test_formatting"): checkTestFormatting = config.getboolean("rulesets", "check_test_formatting") certdir = config.get("certificates", "basedir") if config.has_option("rulesets", "skiplist") and config.has_option( "rulesets", "skipfield"): skiplist = config.get("rulesets", "skiplist") skipfield = config.get("rulesets", "skipfield") with open(skiplist) as f: f.readline() for line in f: splitLine = line.split(",") fileHash = splitLine[0] if splitLine[int(skipfield)] == "1": skipdict[binascii.unhexlify(fileHash)] = 1 threadCount = config.getint("http", "threads") httpEnabled = True if config.has_option("http", "enabled"): httpEnabled = config.getboolean("http", "enabled") metricName = config.get("thresholds", "metric") thresholdDistance = config.getfloat("thresholds", "max_distance") metricClass = getMetricClass(metricName) metric = metricClass() # Debugging options, graphviz dump dumpGraphvizTrie = False if config.has_option("debug", "dump_graphviz_trie"): dumpGraphvizTrie = config.getboolean("debug", "dump_graphviz_trie") if dumpGraphvizTrie: graphvizFile = config.get("debug", "graphviz_file") exitAfterDump = config.getboolean("debug", "exit_after_dump") if args.rule_files: xmlFnames = args.rule_files else: xmlFnames = glob.glob(os.path.join(ruledir, "*.xml")) trie = RuleTrie() rulesets = [] coverageProblemsExist = False targetValidityProblemExist = False nonmatchGroupProblemsExist = False testFormattingProblemsExist = False for xmlFname in xmlFnames: logging.debug("Parsing {}".format(xmlFname)) if skipFile(xmlFname): logging.debug( "Skipping rule file '{}', matches skiplist.".format(xmlFname)) continue ruleset = Ruleset( etree.parse(open(xmlFname, "rb")).getroot(), xmlFname) if ruleset.defaultOff and not includeDefaultOff: logging.debug("Skipping rule '{}', reason: {}".format( ruleset.name, ruleset.defaultOff)) continue # Check whether ruleset coverage by tests was sufficient. if checkCoverage: logging.debug("Checking coverage for '{}'.".format(ruleset.name)) problems = ruleset.getCoverageProblems() for problem in problems: coverageProblemsExist = True logging.error(problem) if checkTargetValidity: logging.debug("Checking target validity for '{}'.".format( ruleset.name)) problems = ruleset.getTargetValidityProblems() for problem in problems: targetValidityProblemExist = True logging.error(problem) if checkNonmatchGroups: logging.debug("Checking non-match groups for '{}'.".format( ruleset.name)) problems = ruleset.getNonmatchGroupProblems() for problem in problems: nonmatchGroupProblemsExist = True logging.error(problem) if checkTestFormatting: logging.debug("Checking test formatting for '{}'.".format( ruleset.name)) problems = ruleset.getTestFormattingProblems() for problem in problems: testFormattingProblemsExist = True logging.error(problem) trie.addRuleset(ruleset) rulesets.append(ruleset) # Trie is built now, dump it if it's set in config if dumpGraphvizTrie: logging.debug("Dumping graphviz ruleset trie") graph = trie.generateGraphizGraph() if graphvizFile == "-": graph.dot() else: with open(graphvizFile, "w") as gvFd: graph.dot(gvFd) if exitAfterDump: sys.exit(0) fetchOptions = http_client.FetchOptions(config) fetchers = list() # Ensure "default" is in the platform dirs if not os.path.isdir(os.path.join(certdir, "default")): raise RuntimeError( "Platform 'default' is missing from certificate directories") platforms = http_client.CertificatePlatforms( os.path.join(certdir, "default")) fetchers.append( http_client.HTTPFetcher("default", platforms, fetchOptions, trie)) # fetches pages with unrewritten URLs fetcherPlain = http_client.HTTPFetcher("default", platforms, fetchOptions) urlList = [] if config.has_option("http", "url_list"): with open(config.get("http", "url_list")) as urlFile: urlList = [line.rstrip() for line in urlFile.readlines()] if httpEnabled: taskQueue = queue.Queue(1000) resQueue = queue.Queue() startTime = time.time() testedUrlPairCount = 0 config.getboolean("debug", "exit_after_dump") for i in range(threadCount): t = UrlComparisonThread(taskQueue, metric, thresholdDistance, autoDisable, resQueue) t.setDaemon(True) t.start() # set of main pages to test mainPages = set(urlList) # If list of URLs to test/scan was not defined, use the test URL extraction # methods built into the Ruleset implementation. if not urlList: for ruleset in rulesets: if ruleset.platform != "default" and os.path.isdir( os.path.join(certdir, ruleset.platform)): theseFetchers = copy.deepcopy(fetchers) platforms.addPlatform( ruleset.platform, os.path.join(certdir, ruleset.platform)) theseFetchers.append( http_client.HTTPFetcher(ruleset.platform, platforms, fetchOptions, trie)) else: theseFetchers = fetchers testUrls = [] for test in ruleset.tests: if not ruleset.excludes(test.url): testedUrlPairCount += 1 testUrls.append(test.url) else: # TODO: We should fetch the non-rewritten exclusion URLs to make # sure they still exist. logging.debug("Skipping excluded URL {}".format( test.url)) task = ComparisonTask(testUrls, fetcherPlain, theseFetchers, ruleset) taskQueue.put(task) taskQueue.join() logging.info( "Finished in {:.2f} seconds. Loaded rulesets: {}, URL pairs: {}.". format(time.time() - startTime, len(xmlFnames), testedUrlPairCount)) if args.json_file: json_output(resQueue, args.json_file, problems) if checkCoverage: if coverageProblemsExist: return 1 # exit with error code if checkTargetValidity: if targetValidityProblemExist: return 1 # exit with error code if checkNonmatchGroups: if nonmatchGroupProblemsExist: return 1 # exit with error code if checkTestFormatting: if testFormattingProblemsExist: return 1 # exit with error code return 0 # exit with success
# Trie is built now, dump it if it's set in config if dumpGraphvizTrie: logging.debug("Dumping graphviz ruleset trie") graph = trie.generateGraphizGraph() if graphvizFile == "-": graph.dot() else: with file(graphvizFile, "w") as gvFd: graph.dot(gvFd) if exitAfterDump: sys.exit(0) fetchOptions = http_client.FetchOptions(config) fetcherMap = dict() #maps platform to fetcher platforms = http_client.CertificatePlatforms(os.path.join(certdir, "default")) for platform in havePlatforms: #adding "default" again won't break things platforms.addPlatform(platform, os.path.join(certdir, platform)) fetcher = http_client.HTTPFetcher(platform, platforms, fetchOptions, trie) fetcherMap[platform] = fetcher #fetches pages with unrewritten URLs fetcherPlain = http_client.HTTPFetcher("default", platforms, fetchOptions) urlList = [] if config.has_option("http", "url_list"): with file(config.get("http", "url_list")) as urlFile: urlList = [line.rstrip() for line in urlFile.readlines()] if httpEnabled:
def cli(): if len(sys.argv) < 2: print >> sys.stderr, "check_rules.py checker.config" sys.exit(1) config = SafeConfigParser() config.read(sys.argv[1]) logfile = config.get("log", "logfile") loglevel = convertLoglevel(config.get("log", "loglevel")) if logfile == "-": logging.basicConfig( stream=sys.stderr, level=loglevel, format= "%(asctime)s %(levelname)s %(message)s [%(pathname)s:%(lineno)d]") else: logging.basicConfig( filename=logfile, level=loglevel, format= "%(asctime)s %(levelname)s %(message)s [%(pathname)s:%(lineno)d]") ruledir = config.get("rulesets", "rulesdir") certdir = config.get("certificates", "basedir") threadCount = config.getint("http", "threads") #get all platform dirs, make sure "default" is among them certdirFiles = glob.glob(os.path.join(certdir, "*")) havePlatforms = set([ os.path.basename(fname) for fname in certdirFiles if os.path.isdir(fname) ]) logging.debug("Loaded certificate platforms: %s", ",".join(havePlatforms)) if "default" not in havePlatforms: raise RuntimeError( "Platform 'default' is missing from certificate directories") metricName = config.get("thresholds", "metric") thresholdDistance = config.getfloat("thresholds", "max_distance") metricClass = getMetricClass(metricName) metric = metricClass() urlList = [] if config.has_option("http", "url_list"): with file(config.get("http", "url_list")) as urlFile: urlList = [line.rstrip() for line in urlFile.readlines()] # Debugging options, graphviz dump dumpGraphvizTrie = False if config.has_option("debug", "dump_graphviz_trie"): dumpGraphvizTrie = config.getboolean("debug", "dump_graphviz_trie") if dumpGraphvizTrie: graphvizFile = config.get("debug", "graphviz_file") exitAfterDump = config.getboolean("debug", "exit_after_dump") xmlFnames = glob.glob(os.path.join(ruledir, "*.xml")) trie = RuleTrie() # set of main pages to test mainPages = set(urlList) for xmlFname in xmlFnames: ruleset = Ruleset(etree.parse(file(xmlFname)).getroot(), xmlFname) if ruleset.defaultOff: logging.debug("Skipping rule '%s', reason: %s", ruleset.name, ruleset.defaultOff) continue #if list of URLs to test/scan was not defined, guess URLs from target elements if not urlList: for target in ruleset.uniqueTargetFQDNs(): targetHTTPLangingPage = "http://%s/" % target if not ruleset.excludes(targetHTTPLangingPage): mainPages.add(targetHTTPLangingPage) else: logging.debug("Skipping landing page %s", targetHTTPLangingPage) trie.addRuleset(ruleset) # Trie is built now, dump it if it's set in config if dumpGraphvizTrie: logging.debug("Dumping graphviz ruleset trie") graph = trie.generateGraphizGraph() if graphvizFile == "-": graph.dot() else: with file(graphvizFile, "w") as gvFd: graph.dot(gvFd) if exitAfterDump: sys.exit(0) fetchOptions = http_client.FetchOptions(config) fetcherMap = dict() #maps platform to fetcher platforms = http_client.CertificatePlatforms( os.path.join(certdir, "default")) for platform in havePlatforms: #adding "default" again won't break things platforms.addPlatform(platform, os.path.join(certdir, platform)) fetcher = http_client.HTTPFetcher(platform, platforms, fetchOptions, trie) fetcherMap[platform] = fetcher #fetches pages with unrewritten URLs fetcherPlain = http_client.HTTPFetcher("default", platforms, fetchOptions) taskQueue = Queue.Queue(1000) startTime = time.time() testedUrlPairCount = 0 for i in range(threadCount): t = UrlComparisonThread(taskQueue, metric, thresholdDistance) t.setDaemon(True) t.start() for plainUrl in mainPages: try: ruleFname = None ruleMatch = trie.transformUrl(plainUrl) transformedUrl = ruleMatch.url if plainUrl == transformedUrl: logging.info("Identical URL: %s", plainUrl) continue #URL was transformed, thus ruleset must exist that did it ruleFname = os.path.basename(ruleMatch.ruleset.filename) fetcher = fetcherMap.get(ruleMatch.ruleset.platform) if not fetcher: logging.warn( "Unknown platform '%s', using 'default' instead. Rulefile: %s.", ruleMatch.ruleset.platform, ruleFname) fetcher = fetcherMap["default"] except: logging.exception( "Failed to transform plain URL %s. Rulefile: %s.", plainUrl, ruleFname) continue testedUrlPairCount += 1 task = ComparisonTask(plainUrl, transformedUrl, fetcherPlain, fetcher, ruleFname) taskQueue.put(task) taskQueue.join() logging.info( "Finished in %.2f seconds. Loaded rulesets: %d, URL pairs: %d.", time.time() - startTime, len(xmlFnames), testedUrlPairCount)