def detect(self, url, limit=None, exclude=None, timeout=TIMEOUT): logging.info("- %s", url) findings = [] original_url = url if not self.expected_url(url, limit, exclude): return {} try: page = tools.urlopen(url, timeout=timeout) url = page.geturl() except (urllib2.URLError, HTTPException), e: # a network problem? page unavailable? wrong URL? logging.warning("Error opening %s, terminating: %s", url, tools.error_to_str(e)) return {}
def check_re(re_compiled, re_raw, text, found, det, app, show_match_only=False): # if re matches text, then add the app(lication) to found res = [] match = re_compiled["re"].search(text) if match: ver = None if show_match_only: show_text = match.group(0) else: show_text = text show_text = ''.join(show_text.splitlines()) if "version" in re_compiled: version_pattern = re_compiled["version"] # checking if version has "\\1?a:b" syntax # see https://github.com/AliasIO/Wappalyzer/wiki/Specification#version-syntax # # NB: Wappalyzer's implementation differs a bit: # https://github.com/AliasIO/Wappalyzer/blob/master/src/wappalyzer.js try: ternary = re.match(r"^(.*)\?(.*):(.*)$", version_pattern) if ternary: try: match.expand(ternary.group(1)) ver = ternary.group(2) except Exception: ver = ternary.group(3) else: ver = match.expand(version_pattern) except Exception, e: logging.debug("Version not detected: expanding '%s' with '%s' failed: %s", show_text, re_raw, tools.error_to_str(e)) ver = None if ver: ver = ver.strip() logging.info(" + %-7s -> %s (%s): %s =~ %s", det, app, ver, show_text, re_raw) res = [{'app': str(app), 'ver': ver}] found += res
def main(timeout=TIMEOUT): desc = """WAD - This component analyzes given URL(s) and detects technologies, libraries, frameworks etc. used by this application, from the OS and web server level, to the programming platform and frameworks, and server- and client-side applications, tools and libraries. For example: OS=Linux, webserver=Apache, platform=PHP, cms=Drupal, analytics=Google Analytics, javascript-lib=jQuery etc.""" parser = OptionParser(description=desc, usage="Usage: %prog -u <URLs|@URLfile>\nHelp: %prog -h", version="%prog 1.0") parser.add_option("-u", "--url", dest="urls", metavar="URLS|@FILE", help="list of URLs (comma-separated), or a file with a list of URLs (one per line)") parser.add_option("-l", "--limit", dest="limit", metavar="URLMASK", help="in case of redirections, only include pages with URLs matching this mask - " "e.g. 'https?://[^/]*\.abc\.com/'") parser.add_option("-x", "--exclude", dest="exclude", metavar="URLMASK", help="in case of redirections, exclude pages with URL matching this mask - " "e.g. 'https?://[^/]*/(login|logout)'") parser.add_option("-o", "--output", dest="output_file", metavar="FILE", help="output file for detection results (default: STDOUT)") parser.add_option("-c", "--clues", dest="clues_file", metavar="FILE", default=CLUES_FILE, help="clues for detecting web applications and technologies") parser.add_option("-t", "--timeout", action="store", dest="TIMEOUT", default=timeout, help="set timeout (in seconds) for accessing a single URL") tools.add_log_options(parser) options = parser.parse_args()[0] tools.use_log_options(options) if not options.urls: parser.error("Argument -u missing") return timeout = int(options.TIMEOUT) if options.urls[0] == "@": try: f = open(options.urls[1:]) urls = f.readlines() f.close() except Exception, e: # an I/O exception? logging.error("Error reading URL file %s, terminating: %s", options.urls[1:], tools.error_to_str(e)) return
urls = f.readlines() f.close() except Exception, e: # an I/O exception? logging.error("Error reading URL file %s, terminating: %s", options.urls[1:], tools.error_to_str(e)) return else: urls = [x.strip() for x in options.urls.split(",") if x.strip() != ""] Clues.get_clues(options.clues_file) results = Detector().detect_multiple(urls, limit=options.limit, exclude=options.exclude, timeout=timeout) if options.output_file: try: f = open(options.output_file, "w") f.write(json.dumps(results)) f.close() logging.debug("Results written to file %s", options.output_file) except Exception, e: # an I/O exception? logging.error("Error writing results to file %s, terminating: %s", options.output_file, tools.error_to_str(e)) return else: print json.dumps(results, indent=4) if __name__ == "__main__": main()
return {} if url != original_url: logging.info("` %s", url) if not self.expected_url(url, limit, exclude): return {} try: content = page.read() except socket.timeout, e: # timeout logging.info("Timeout when reading %s, terminating: %s", url, tools.error_to_str(e)) return {} except HTTPException, e: logging.info("HTTPException when reading %s, terminating: %s", url, tools.error_to_str(e)) return {} except SSLError, e: logging.info("SSLError when reading %s, terminating: %s", url, tools.error_to_str(e)) return {} findings += self.check_url(url) # 'url' if page: findings += self.check_headers(page.info()) # 'headers' if content: findings += self.check_meta(content) # 'meta' findings += self.check_script(content) # 'script' findings += self.check_html(content) # 'html' self.follow_implies(findings) # 'implies' self.remove_duplicates(findings)