def mapper(self, _, line): f = None #if self.options.runner in ['inline']: # print self.options.runner + "lol" # print 'Loading local file {}'.format(line) # f = warc.WARCFile(fileobj=gzip.open(line)) #else: conn = boto.connect_s3(anon=True) pds = conn.get_bucket('aws-publicdatasets') k = Key(pds, line) f = warc.WARCFile(fileobj=GzipStreamFile(k)) for i, record in enumerate(f): if record['Content-Type'] == 'application/http; msgtype=response': payload = record.payload.read() headers, body = payload.split('\r\n\r\n', 1) data = [] #data = data + Detector().check_headers(headers) data = data + Detector().check_script(body) data = data + Detector().check_html(body) data = { "tech": data, "url": record.url, "date": record.date, "domain": urlparse(record.url).netloc } yield data, 1
def scan_target(self, url): """Function to detect technologies running on target and list them in gui treeview""" _id = None try: d = Detector().detect(url=url, timeout=5) for result in d: if d[result]: ext = tldextract.extract(url) _id = self.treeview.insert('', 'end', text='.'.join(ext[:3])) tech_type, software = d[result][0].get('type'), \ d[result][0].get('app') version = d[result][0].get('ver') # assign to gui treeview if not version: version = 'None' self.treeview.insert(_id, 'end', text=tech_type, values=(software, version)) self.status['text'] = 'done' else: self.status['text'] = 'No results found' except ValueError: self.status['text'] = "Invalid! Please input a full url" finally: del _id
def test_remove_duplicates(self): with_duplicates = [ {'app': 'A', 'ver': None}, {'app': 'B', 'ver': "1.5"}, {'app': 'C', 'ver': None}, {'app': 'D', 'ver': "7.0"}, {'app': 'E', 'ver': "1"}, {'app': 'F', 'ver': "2.2"}, {'app': 'A', 'ver': None}, {'app': 'B', 'ver': "1.5"}, {'app': 'C', 'ver': "be"}, {'app': 'D', 'ver': "222"}, {'app': 'A', 'ver': None}, {'app': 'B', 'ver': "1.5"}, {'app': 'E', 'ver': None}, {'app': 'E', 'ver': "1.3"}, {'app': 'F', 'ver': "2"}, {'app': 'F', 'ver': None}, ] without_duplicates = [ {'app': 'A', 'ver': None}, {'app': 'B', 'ver': "1.5"}, {'app': 'C', 'ver': "be"}, {'app': 'D', 'ver': "7.0"}, {'app': 'E', 'ver': "1.3"}, {'app': 'F', 'ver': "2.2"}, {'app': 'D', 'ver': "222"}, ] Detector().remove_duplicates(with_duplicates) assert with_duplicates == without_duplicates
def setUp(self): self.detector = Detector() self.apps = self.detector.apps self.categories = self.detector.categories
def main(timeout=TIMEOUT): desc = """WAD - This component analyzes given URL(s) and detects technologies, libraries, frameworks etc. used by this application, from the OS and web server level, to the programming platform and frameworks, and server- and client-side applications, tools and libraries. For example: OS=Linux, webserver=Apache, platform=PHP, cms=Drupal, analytics=Google Analytics, javascript-lib=jQuery etc.""" parser = OptionParser( description=desc, usage="Usage: %prog -u <URLs|@URLfile>\nHelp: %prog -h", version="%prog 1.0") parser.add_option( "-u", "--url", dest="urls", metavar="URLS|@FILE", help= "list of URLs (comma-separated), or a file with a list of URLs (one per line)" ) parser.add_option( "-l", "--limit", dest="limit", metavar="URLMASK", help= "in case of redirections, only include pages with URLs matching this mask - " "e.g. 'https?://[^/]*\.abc\.com/'") parser.add_option( "-x", "--exclude", dest="exclude", metavar="URLMASK", help= "in case of redirections, exclude pages with URL matching this mask - " "e.g. 'https?://[^/]*/(login|logout)'") parser.add_option( "-o", "--output", dest="output_file", metavar="FILE", help="output file for detection results (default: STDOUT)") parser.add_option( "-c", "--clues", dest="clues_file", metavar="FILE", default=None, help="clues for detecting web applications and technologies") parser.add_option( "-t", "--timeout", action="store", dest="TIMEOUT", default=timeout, help="set timeout (in seconds) for accessing a single URL") parser.add_option( "-f", "--format", action="store", dest="format", default='json', help="output format, allowed values: csv, txt, json (default)") parser.add_option( "-g", "--group", action="store_true", dest="group", default=False, help= "group results (i.e. technologies found on subpages of other scanned URL " "aren't listed)") tools.add_log_options(parser) options = parser.parse_args()[0] tools.use_log_options(options) if not options.urls: parser.error("Argument -u missing") return timeout = int(options.TIMEOUT) if options.urls[0] == "@": try: f = open(options.urls[1:]) urls = f.readlines() f.close() except Exception as e: # an I/O exception? logging.error("Error reading URL file %s, terminating: %s", options.urls[1:], tools.error_to_str(e)) return else: urls = [x.strip() for x in options.urls.split(",") if x.strip() != ""] if options.format not in output_format_map.keys(): parser.error("Invalid format specified") return Clues.get_clues(options.clues_file) results = Detector().detect_multiple(urls, limit=options.limit, exclude=options.exclude, timeout=timeout) if options.group: results = group(results) output = output_format_map[options.format]().retrieve(results=results) if options.output_file: try: f = open(options.output_file, "w") f.write(output) f.close() logging.debug("Results written to file %s", options.output_file) except Exception as e: # an I/O exception? logging.error("Error writing results to file %s, terminating: %s", options.output_file, tools.error_to_str(e)) return print(output)