def mapper(self, _, line):
        f = None
        #if self.options.runner in ['inline']:
        #  print self.options.runner + "lol"
        #  print 'Loading local file {}'.format(line)
        #  f = warc.WARCFile(fileobj=gzip.open(line))
        #else:
        conn = boto.connect_s3(anon=True)
        pds = conn.get_bucket('aws-publicdatasets')
        k = Key(pds, line)
        f = warc.WARCFile(fileobj=GzipStreamFile(k))

        for i, record in enumerate(f):
            if record['Content-Type'] == 'application/http; msgtype=response':
                payload = record.payload.read()
                headers, body = payload.split('\r\n\r\n', 1)
                data = []
                #data = data + Detector().check_headers(headers)
                data = data + Detector().check_script(body)
                data = data + Detector().check_html(body)
                data = {
                    "tech": data,
                    "url": record.url,
                    "date": record.date,
                    "domain": urlparse(record.url).netloc
                }
                yield data, 1
Exemple #2
0
    def scan_target(self, url):
        """Function to detect technologies running on target and list them in
        gui treeview"""
        _id = None
        try:
            d = Detector().detect(url=url, timeout=5)
            for result in d:
                if d[result]:
                    ext = tldextract.extract(url)
                    _id = self.treeview.insert('',
                                               'end',
                                               text='.'.join(ext[:3]))
                    tech_type, software = d[result][0].get('type'), \
                                          d[result][0].get('app')
                    version = d[result][0].get('ver')

                    # assign to gui treeview
                    if not version:
                        version = 'None'
                    self.treeview.insert(_id,
                                         'end',
                                         text=tech_type,
                                         values=(software, version))
                    self.status['text'] = 'done'
                else:
                    self.status['text'] = 'No results found'

        except ValueError:
            self.status['text'] = "Invalid! Please input a full url"
        finally:
            del _id
Exemple #3
0
    def test_remove_duplicates(self):
        with_duplicates = [
            {'app': 'A', 'ver': None}, {'app': 'B', 'ver': "1.5"},
            {'app': 'C', 'ver': None}, {'app': 'D', 'ver': "7.0"},
            {'app': 'E', 'ver': "1"}, {'app': 'F', 'ver': "2.2"},
            {'app': 'A', 'ver': None}, {'app': 'B', 'ver': "1.5"},
            {'app': 'C', 'ver': "be"}, {'app': 'D', 'ver': "222"},
            {'app': 'A', 'ver': None}, {'app': 'B', 'ver': "1.5"},
            {'app': 'E', 'ver': None}, {'app': 'E', 'ver': "1.3"},
            {'app': 'F', 'ver': "2"}, {'app': 'F', 'ver': None},
        ]

        without_duplicates = [
            {'app': 'A', 'ver': None}, {'app': 'B', 'ver': "1.5"},
            {'app': 'C', 'ver': "be"}, {'app': 'D', 'ver': "7.0"},
            {'app': 'E', 'ver': "1.3"},
            {'app': 'F', 'ver': "2.2"}, {'app': 'D', 'ver': "222"},
        ]

        Detector().remove_duplicates(with_duplicates)
        assert with_duplicates == without_duplicates
Exemple #4
0
 def setUp(self):
     self.detector = Detector()
     self.apps = self.detector.apps
     self.categories = self.detector.categories
Exemple #5
0
def main(timeout=TIMEOUT):
    desc = """WAD -
This component analyzes given URL(s) and detects technologies, libraries,
frameworks etc. used by this application, from the OS and web server level,
to the programming platform and frameworks, and server- and client-side
applications, tools and libraries. For example: OS=Linux, webserver=Apache,
platform=PHP, cms=Drupal, analytics=Google Analytics, javascript-lib=jQuery
etc."""

    parser = OptionParser(
        description=desc,
        usage="Usage: %prog -u <URLs|@URLfile>\nHelp:  %prog -h",
        version="%prog 1.0")

    parser.add_option(
        "-u",
        "--url",
        dest="urls",
        metavar="URLS|@FILE",
        help=
        "list of URLs (comma-separated), or a file with a list of URLs (one per line)"
    )

    parser.add_option(
        "-l",
        "--limit",
        dest="limit",
        metavar="URLMASK",
        help=
        "in case of redirections, only include pages with URLs matching this mask - "
        "e.g. 'https?://[^/]*\.abc\.com/'")

    parser.add_option(
        "-x",
        "--exclude",
        dest="exclude",
        metavar="URLMASK",
        help=
        "in case of redirections, exclude pages with URL matching this mask - "
        "e.g. 'https?://[^/]*/(login|logout)'")

    parser.add_option(
        "-o",
        "--output",
        dest="output_file",
        metavar="FILE",
        help="output file for detection results (default: STDOUT)")

    parser.add_option(
        "-c",
        "--clues",
        dest="clues_file",
        metavar="FILE",
        default=None,
        help="clues for detecting web applications and technologies")

    parser.add_option(
        "-t",
        "--timeout",
        action="store",
        dest="TIMEOUT",
        default=timeout,
        help="set timeout (in seconds) for accessing a single URL")

    parser.add_option(
        "-f",
        "--format",
        action="store",
        dest="format",
        default='json',
        help="output format, allowed values: csv, txt, json (default)")

    parser.add_option(
        "-g",
        "--group",
        action="store_true",
        dest="group",
        default=False,
        help=
        "group results (i.e. technologies found on subpages of other scanned URL "
        "aren't listed)")

    tools.add_log_options(parser)

    options = parser.parse_args()[0]

    tools.use_log_options(options)

    if not options.urls:
        parser.error("Argument -u missing")
        return

    timeout = int(options.TIMEOUT)

    if options.urls[0] == "@":
        try:
            f = open(options.urls[1:])
            urls = f.readlines()
            f.close()
        except Exception as e:
            # an I/O exception?
            logging.error("Error reading URL file %s, terminating: %s",
                          options.urls[1:], tools.error_to_str(e))
            return
    else:
        urls = [x.strip() for x in options.urls.split(",") if x.strip() != ""]

    if options.format not in output_format_map.keys():
        parser.error("Invalid format specified")
        return

    Clues.get_clues(options.clues_file)

    results = Detector().detect_multiple(urls,
                                         limit=options.limit,
                                         exclude=options.exclude,
                                         timeout=timeout)

    if options.group:
        results = group(results)

    output = output_format_map[options.format]().retrieve(results=results)

    if options.output_file:
        try:
            f = open(options.output_file, "w")
            f.write(output)
            f.close()
            logging.debug("Results written to file %s", options.output_file)
        except Exception as e:
            # an I/O exception?
            logging.error("Error writing results to file %s, terminating: %s",
                          options.output_file, tools.error_to_str(e))
            return

    print(output)