Example #1
0
 def get_file(self):
     try:
         return open(self.filename, 'wb')
     except IOError:
         logging.error("Couldn't open %s while outputting results as csv",
                       self.filename, tools.error_to_str(e))
         raise
Example #2
0
 def get_page(self, url, timeout=TIMEOUT):
     try:
         page = tools.urlopen(url, timeout=timeout)
     except (six.moves.urllib.error.URLError, six.moves.http_client.HTTPException) as e:
         # a network problem? page unavailable? wrong URL?
         logging.warning("Error opening %s, terminating: %s", url, tools.error_to_str(e))
         return None
     return page
Example #3
0
 def get_page(self, url, timeout=TIMEOUT):
     try:
         page = tools.urlopen(url, timeout=timeout)
     except (six.moves.urllib.error.URLError, six.moves.http_client.HTTPException) as e:
         # a network problem? page unavailable? wrong URL?
         logging.warning("Error opening %s, terminating: %s", url, tools.error_to_str(e))
         return None
     return page
Example #4
0
    def detect(self, url, limit=None, exclude=None, timeout=TIMEOUT):
        logging.info("- %s", url)

        findings = []
        original_url = url

        if not self.expected_url(url, limit, exclude):
            return {}

        try:
            page = tools.urlopen(url, timeout=timeout)
            url = page.geturl()
        except (six.moves.urllib.error.URLError, six.moves.http_client.HTTPException) as e:
            # a network problem? page unavailable? wrong URL?
            logging.warning("Error opening %s, terminating: %s", url, tools.error_to_str(e))
            return {}

        if url != original_url:
            logging.info("` %s", url)

            if not self.expected_url(url, limit, exclude):
                return {}

        try:
            content = page.read()
        except (socket.timeout, six.moves.http_client.HTTPException, SSLError) as e:
            logging.info("Exception while reading %s, terminating: %s", url, tools.error_to_str(e))
            return {}

        if six.PY3:
            content = content.decode()

        findings += self.check_url(url)  # 'url'
        if page:
            findings += self.check_headers(page.info())  # 'headers'
        if content:
            findings += self.check_meta(content)  # 'meta'
            findings += self.check_script(content)  # 'script'
            findings += self.check_html(content)  # 'html'

        self.follow_implies(findings)  # 'implies'
        self.remove_duplicates(findings)
        self.remove_exclusions(findings)  # 'excludes'
        self.add_categories(findings)

        return {url: findings}
Example #5
0
    def read_clues_from_file(filename):
        logging.info("Reading clues file %s", filename)
        try:
            json_data = open(filename)
        except IOError as e:
            logging.error("Error while opening clues file, terminating: %s", tools.error_to_str(e))
            raise

        try:
            clues = json.load(json_data, encoding='utf-8')
        except ValueError as e:
            logging.error("Error while reading JSON file, terminating: %s", tools.error_to_str(e))
            raise

        json_data.close()
        categories = clues['categories']
        apps = clues['apps']
        return apps, categories
Example #6
0
 def get_content(self, page, url):
     """
     :return: Content if present, None on handled exception
     """
     try:
         content = page.read()
     except (socket.timeout, six.moves.http_client.HTTPException, SSLError) as e:
         logging.info("Exception while reading %s, terminating: %s", url, tools.error_to_str(e))
         return None
     return content
Example #7
0
 def get_content(self, page, url):
     """
     :return: Content if present, None on handled exception
     """
     try:
         content = page.read()
     except (socket.timeout, six.moves.http_client.HTTPException, SSLError) as e:
         logging.info("Exception while reading %s, terminating: %s", url, tools.error_to_str(e))
         return None
     return content
Example #8
0
    def check_re(re_compiled,
                 re_raw,
                 text,
                 found,
                 det,
                 app,
                 show_match_only=False):
        # if re matches text, then add the app(lication) to found
        res = []
        match = re_compiled["re"].search(text)
        if match:
            ver = None

            if show_match_only:
                show_text = match.group(0)
            else:
                show_text = text

            show_text = ''.join(show_text.splitlines())

            if "version" in re_compiled:
                version_pattern = re_compiled["version"]

                # checking if version has "\\1?a:b" syntax
                # see https://github.com/AliasIO/Wappalyzer/wiki/Specification#version-syntax
                #
                # NB: Wappalyzer's implementation differs a bit:
                # https://github.com/AliasIO/Wappalyzer/blob/master/src/wappalyzer.js
                try:
                    ternary = re.match(r"^(.*)\?(.*):(.*)$", version_pattern)
                    if ternary:
                        ver = ternary.group(3)
                        try:
                            if match.expand(ternary.group(1)):
                                ver = ternary.group(2)
                        except Exception:
                            pass
                    else:
                        ver = match.expand(version_pattern)
                except Exception as e:
                    logging.debug(
                        "Version not detected: expanding '%s' with '%s' failed: %s",
                        show_text, re_raw, tools.error_to_str(e))
                    ver = None

                if ver:
                    ver = ver.strip()

            logging.info("  + %-7s -> %s (%s): %s =~ %s", det, app, ver,
                         show_text, re_raw)

            res = [{'app': str(app), 'ver': ver or None}]
            found += res

        return res
Example #9
0
    def read_clues_from_file(filename):
        logging.info("Reading clues file %s", filename)
        try:
            json_data = open(filename)
        except IOError as e:
            logging.error("Error while opening clues file, terminating: %s",
                          tools.error_to_str(e))
            raise

        try:
            clues = json.load(json_data, encoding='utf-8')
        except ValueError as e:
            logging.error("Error while reading JSON file, terminating: %s",
                          tools.error_to_str(e))
            raise

        json_data.close()
        categories = clues['categories']
        apps = clues['apps']
        return apps, categories
Example #10
0
    def check_re(re_compiled, re_raw, text, found, det, app, show_match_only=False):
        # if re matches text, then add the app(lication) to found
        res = []
        match = re_compiled["re"].search(text)
        if match:
            ver = None

            if show_match_only:
                show_text = match.group(0)
            else:
                show_text = text

            show_text = ''.join(show_text.splitlines())

            if "version" in re_compiled:
                version_pattern = re_compiled["version"]

                # checking if version has "\\1?a:b" syntax
                # see https://github.com/AliasIO/Wappalyzer/wiki/Specification#version-syntax
                #
                # NB: Wappalyzer's implementation differs a bit:
                # https://github.com/AliasIO/Wappalyzer/blob/master/src/wappalyzer.js
                try:
                    ternary = re.match(r"^(.*)\?(.*):(.*)$", version_pattern)
                    if ternary:
                        try:
                            match.expand(ternary.group(1))
                            ver = ternary.group(2)
                        except Exception:
                            ver = ternary.group(3)
                    else:
                        ver = match.expand(version_pattern)
                except Exception as e:
                    logging.debug("Version not detected: expanding '%s' with '%s' failed: %s", show_text, re_raw,
                                  tools.error_to_str(e))
                    ver = None

                if ver:
                    ver = ver.strip()

            logging.info("  + %-7s -> %s (%s): %s =~ %s", det, app, ver, show_text, re_raw)

            res = [{'app': str(app), 'ver': ver}]
            found += res

        return res
Example #11
0
def main(timeout=TIMEOUT):
    desc = """WAD -
This component analyzes given URL(s) and detects technologies, libraries,
frameworks etc. used by this application, from the OS and web server level,
to the programming platform and frameworks, and server- and client-side
applications, tools and libraries. For example: OS=Linux, webserver=Apache,
platform=PHP, cms=Drupal, analytics=Google Analytics, javascript-lib=jQuery
etc."""

    parser = OptionParser(
        description=desc,
        usage="Usage: %prog -u <URLs|@URLfile>\nHelp:  %prog -h",
        version="%prog 1.0")

    parser.add_option(
        "-u",
        "--url",
        dest="urls",
        metavar="URLS|@FILE",
        help=
        "list of URLs (comma-separated), or a file with a list of URLs (one per line)"
    )

    parser.add_option(
        "-l",
        "--limit",
        dest="limit",
        metavar="URLMASK",
        help=
        "in case of redirections, only include pages with URLs matching this mask - "
        "e.g. 'https?://[^/]*\.abc\.com/'")

    parser.add_option(
        "-x",
        "--exclude",
        dest="exclude",
        metavar="URLMASK",
        help=
        "in case of redirections, exclude pages with URL matching this mask - "
        "e.g. 'https?://[^/]*/(login|logout)'")

    parser.add_option(
        "-o",
        "--output",
        dest="output_file",
        metavar="FILE",
        help="output file for detection results (default: STDOUT)")

    parser.add_option(
        "-c",
        "--clues",
        dest="clues_file",
        metavar="FILE",
        default=None,
        help="clues for detecting web applications and technologies")

    parser.add_option(
        "-t",
        "--timeout",
        action="store",
        dest="TIMEOUT",
        default=timeout,
        help="set timeout (in seconds) for accessing a single URL")

    parser.add_option(
        "-f",
        "--format",
        action="store",
        dest="format",
        default='json',
        help="output format, allowed values: csv, txt, json (default)")

    parser.add_option(
        "-g",
        "--group",
        action="store_true",
        dest="group",
        default=False,
        help=
        "group results (i.e. technologies found on subpages of other scanned URL "
        "aren't listed)")

    tools.add_log_options(parser)

    options = parser.parse_args()[0]

    tools.use_log_options(options)

    if not options.urls:
        parser.error("Argument -u missing")
        return

    timeout = int(options.TIMEOUT)

    if options.urls[0] == "@":
        try:
            f = open(options.urls[1:])
            urls = f.readlines()
            f.close()
        except Exception as e:
            # an I/O exception?
            logging.error("Error reading URL file %s, terminating: %s",
                          options.urls[1:], tools.error_to_str(e))
            return
    else:
        urls = [x.strip() for x in options.urls.split(",") if x.strip() != ""]

    if options.format not in output_format_map.keys():
        parser.error("Invalid format specified")
        return

    Clues.get_clues(options.clues_file)

    results = Detector().detect_multiple(urls,
                                         limit=options.limit,
                                         exclude=options.exclude,
                                         timeout=timeout)

    if options.group:
        results = group(results)

    output = output_format_map[options.format]().retrieve(results=results)

    if options.output_file:
        try:
            f = open(options.output_file, "w")
            f.write(output)
            f.close()
            logging.debug("Results written to file %s", options.output_file)
        except Exception as e:
            # an I/O exception?
            logging.error("Error writing results to file %s, terminating: %s",
                          options.output_file, tools.error_to_str(e))
            return

    print(output)
Example #12
0
 def get_file(self):
     try:
         return open(self.filename, 'wb')
     except IOError:
         logging.error("Couldn't open %s while outputting results as csv", self.filename, tools.error_to_str(e))
         raise
Example #13
0
def main(timeout=TIMEOUT):
    desc = """WAD -
This component analyzes given URL(s) and detects technologies, libraries,
frameworks etc. used by this application, from the OS and web server level,
to the programming platform and frameworks, and server- and client-side
applications, tools and libraries. For example: OS=Linux, webserver=Apache,
platform=PHP, cms=Drupal, analytics=Google Analytics, javascript-lib=jQuery
etc."""

    parser = OptionParser(description=desc,
                          usage="Usage: %prog -u <URLs|@URLfile>\nHelp:  %prog -h",
                          version="%prog 1.0")

    parser.add_option("-u", "--url", dest="urls", metavar="URLS|@FILE",
                      help="list of URLs (comma-separated), or a file with a list of URLs (one per line)")

    parser.add_option("-l", "--limit", dest="limit", metavar="URLMASK",
                      help="in case of redirections, only include pages with URLs matching this mask - "
                           "e.g. 'https?://[^/]*\.abc\.com/'")

    parser.add_option("-x", "--exclude", dest="exclude", metavar="URLMASK",
                      help="in case of redirections, exclude pages with URL matching this mask - "
                           "e.g. 'https?://[^/]*/(login|logout)'")

    parser.add_option("-o", "--output", dest="output_file", metavar="FILE",
                      help="output file for detection results (default: STDOUT)")

    parser.add_option("-c", "--clues", dest="clues_file", metavar="FILE", default=CLUES_FILE,
                      help="clues for detecting web applications and technologies")

    parser.add_option("-t", "--timeout", action="store", dest="TIMEOUT", default=timeout,
                      help="set timeout (in seconds) for accessing a single URL")

    parser.add_option("-f", "--format", action="store", dest="format", default='json',
                      help="output format, allowed values: csv, txt, json (default)")

    parser.add_option("-g", "--group", action="store_true", dest="group", default=False,
                      help="group results (i.e. technologies found on subpages of other scanned URL "
                           "aren't listed)")

    tools.add_log_options(parser)

    options = parser.parse_args()[0]

    tools.use_log_options(options)

    if not options.urls:
        parser.error("Argument -u missing")
        return

    timeout = int(options.TIMEOUT)

    if options.urls[0] == "@":
        try:
            f = open(options.urls[1:])
            urls = f.readlines()
            f.close()
        except Exception as e:
            # an I/O exception?
            logging.error("Error reading URL file %s, terminating: %s", options.urls[1:], tools.error_to_str(e))
            return
    else:
        urls = [x.strip() for x in options.urls.split(",") if x.strip() != ""]

    if options.format not in output_format_map.keys():
        parser.error("Invalid format specified")
        return

    Clues.get_clues(options.clues_file)

    results = Detector().detect_multiple(urls, limit=options.limit, exclude=options.exclude, timeout=timeout)

    if options.group:
        results = group(results)

    output = output_format_map[options.format]().retrieve(results=results)

    if options.output_file:
        try:
            f = open(options.output_file, "w")
            f.write(output)
            f.close()
            logging.debug("Results written to file %s", options.output_file)
        except Exception as e:
            # an I/O exception?
            logging.error("Error writing results to file %s, terminating: %s", options.output_file,
                          tools.error_to_str(e))
            return

    print(output)