def run(self): # lookup all the given domains domains = [] for line in self.input_file: domains.append(line.strip()) lookup_results = dnslib.lookup_domains(domains) lookup_results['exp-name'] = "lookups" self.results.append(lookup_results) chaos_results = dnslib.send_chaos_queries() chaos_results["exp-name"] = "chaos" self.results.append(chaos_results)
def test_lookup_domains_thread_error(self, invalid_domains): """ .2 test multiple domains """ #* test results is not none #+ given a great number of domains domains = invalid_domains if len(invalid_domains) is not 0 else [] result = dnslib.lookup_domains(domains) #- test 'error' is in results assert result is not None assert 'error' in result assert result['error'] is "Threads took too long to finish."
def test_loopup_domains_good(self, ipregex, valid_domains): """ .2 test multiple domains w/ domain names :param ipregex: valid IP regular expression fixture """ if len(valid_domains) is not 0: domains = valid_domains else: domains = ['www.google.com', 'www.github.com'] results = dnslib.lookup_domains(domains) #* test results is not None assert results is not None #* test 'error' is not in result assert 'error' not in results #* test name of domains are in results for name in domains: assert name in results
def run_file(self, input_file, result): file_name, file_contents = input_file run_start_time = time.time() tcp_connect_inputs = [] http_inputs = [] tls_inputs = [] dns_inputs = [] traceroute_inputs = [] url_metadata_results = {} file_metadata = {} file_comments = [] index_row = None # first parse the input and create data structures csvreader = csv.reader(file_contents, delimiter=',', quotechar='"') for row in csvreader: """ First few lines are expected to be comments in key: value format. The first line after that could be our column header row, starting with "url", and the rest are data rows. This is a sample input file we're trying to parse: # comment: Global List,,,,, # date: 03-17-2015,,,,, # version: 1,,,,, # description: This is the global list. Last updated in 2012.,,,, url,country,category,description,rationale,provider http://8thstreetlatinas.com,glo,P**N,,,PRIV http://abpr2.railfan.net,glo,MISC,Pictures of trains,,PRIV """ # parse file comments, if it looks like "key : value", # parse it as a key-value pair. otherwise, just # store it as a raw comment. if row[0][0] == '#': row = row[0][1:].strip() if len(row.split(':')) > 1: key, value = row.split(':', 1) key = key.strip() value = value.strip() file_metadata[key] = value else: file_comments.append(row) continue # detect the header row and store it # it is usually the first row and starts with "url," if row[0].strip().lower() == "url": index_row = row continue url = row[0].strip() if url is None: continue meta = row[1:] http_ssl = False ssl_port = 443 port = 80 # parse the URL to extract netlocation, HTTP path, domain name, # and HTTP method (SSL or plain) try: urlparse_object = urlparse.urlparse(url) http_netloc = urlparse_object.netloc # if netloc is not urlparse-able, add // to the start # of URL if http_netloc == '': urlparse_object = urlparse.urlparse('//%s' % url) http_netloc = urlparse_object.netloc domain_name = http_netloc.split(':')[0] http_path = urlparse_object.path if http_path == '': http_path = '/' # we assume scheme is either empty, or "http", or "https" # other schemes (e.g. "ftp") are out of the scope of this # measuremnt if urlparse_object.scheme == "https": http_ssl = True if len(http_netloc.split(':')) == 2: ssl_port = http_netloc.split(':')[1] if len(http_netloc.split(':')) == 2: port = int(http_netloc.split(':')[1]) except Exception as exp: logging.exception("%s: failed to parse URL: %s" % (url, exp)) http_netloc = url http_ssl = False port = 80 ssl_port = 443 http_path = '/' domain_name = url # TCP connect if http_ssl: if (domain_name, ssl_port) not in tcp_connect_inputs: tcp_connect_inputs.append((domain_name, ssl_port)) else: if (domain_name, port) not in tcp_connect_inputs: tcp_connect_inputs.append((domain_name, port)) # HTTP GET http_inputs.append({ "host": http_netloc, "path": http_path, "ssl": http_ssl, "url": url }) # TLS certificate # this will only work if the URL starts with https://, or # if tls_for_all config parameter is set if self.tls_for_all or http_ssl: key = "%s:%s" % (domain_name, ssl_port) if key not in tls_inputs: tls_inputs.append(key) # DNS Lookup if domain_name not in dns_inputs: dns_inputs.append(domain_name) # Traceroute if domain_name not in traceroute_inputs: traceroute_inputs.append(domain_name) # Meta-data url_metadata_results[url] = meta # the actual tests are run concurrently here if tcp_connect is not None: shuffle(tcp_connect_inputs) start = time.time() logging.info("Running TCP connect tests...") result["tcp_connect"] = {} tcp_connect.tcp_connect_batch(tcp_connect_inputs, results=result["tcp_connect"]) elapsed = time.time() - start logging.info("Running TCP requests took " "%d seconds for %d hosts and ports." % (elapsed, len(tcp_connect_inputs))) shuffle(http_inputs) start = time.time() logging.info("Running HTTP GET requests...") result["http"] = {} try: http.get_requests_batch(http_inputs, results=result["http"]) # backward-compatibility with verions that don't support this except TypeError: result["http"] = http.get_requests_batch(http_inputs) elapsed = time.time() - start logging.info("HTTP GET requests took " "%d seconds for %d URLs." % (elapsed, len(http_inputs))) shuffle(tls_inputs) start = time.time() logging.info("Running TLS certificate requests...") result["tls"] = {} try: tls.get_fingerprint_batch(tls_inputs, results=result["tls"]) # backward-compatibility with verions that don't support this except TypeError: result["tls"] = tls.get_fingerprint_batch(tls_inputs) elapsed = time.time() - start logging.info("TLS certificate requests took " "%d seconds for %d domains." % (elapsed, len(tls_inputs))) shuffle(dns_inputs) start = time.time() logging.info("Running DNS requests...") result["dns"] = {} if len(self.exclude_nameservers) > 0: logging.info("Excluding nameservers: %s" % ", ".join(self.exclude_nameservers)) try: dnslib.lookup_domains( dns_inputs, results=result["dns"], exclude_nameservers=self.exclude_nameservers) # backward-compatibility with verions that don't support this except TypeError: result["dns"] = dnslib.lookup_domains( dns_inputs, exclude_nameservers=self.exclude_nameservers) else: try: dnslib.lookup_domains(dns_inputs, results=result["dns"]) # backward-compatibility with verions that don't support this except TypeError: result["dns"] = dnslib.lookup_domains(dns_inputs) elapsed = time.time() - start logging.info("DNS requests took " "%d seconds for %d domains." % (elapsed, len(dns_inputs))) for method in self.traceroute_methods: shuffle(traceroute_inputs) start = time.time() logging.info("Running %s traceroutes..." % (method.upper())) result["traceroute.%s" % method] = {} try: traceroute.traceroute_batch(traceroute_inputs, results=result["traceroute.%s" % method], method=method) # backward-compatibility with verions that don't support this except TypeError: result["traceroute.%s" % method] = traceroute.traceroute_batch( traceroute_inputs, method) elapsed = time.time() - start logging.info("Traceroutes took %d seconds for %d " "domains." % (elapsed, len(traceroute_inputs))) # if we have an index row, we should turn URL metadata # into dictionaries if index_row is not None: indexed_url_metadata = {} for url, meta in url_metadata_results.items(): indexed_meta = {} try: for i in range(1, len(index_row)): indexed_meta[index_row[i]] = meta[i - 1] indexed_url_metadata[url] = indexed_meta except: indexed_url_metadata[url] = indexed_meta continue url_metadata_results = indexed_url_metadata result["url_metadata"] = url_metadata_results result["file_metadata"] = file_metadata result["file_comments"] = file_comments run_finish_time = time.time() elapsed = run_finish_time - run_start_time result["total_time"] = elapsed logging.info("Testing took a total of %d seconds." % elapsed)
def run_file(self, input_file): file_name, file_contents = input_file # Initialize the results for this input file. # This can be anything from file name to version # to any useful information. result = {"file_name": file_name} run_start_time = time.time() http_results = {} http_inputs = [] tls_results = {} tls_inputs = [] dns_results = {} dns_inputs = [] traceroute_results = {} traceroute_inputs = [] url_metadata_results = {} file_metadata = {} file_comments = [] index_row = None comments = "" # first parse the input and create data structures csvreader = csv.reader(file_contents, delimiter=',', quotechar='"') for row in csvreader: """ First few lines are expected to be comments in key: value format. The first line after that could be our column header row, starting with "url", and the rest are data rows. This is a sample input file we're trying to parse: # comment: Global List,,,,, # date: 03-17-2015,,,,, # version: 1,,,,, # description: This is the global list. Last updated in 2012.,,,, url,country,category,description,rationale,provider http://8thstreetlatinas.com,glo,P**N,,,PRIV http://abpr2.railfan.net,glo,MISC,Pictures of trains,,PRIV """ # parse file comments, if it looks like "key : value", # parse it as a key-value pair. otherwise, just # store it as a raw comment. if row[0][0] == '#': row = row[0][1:].strip() if len(row.split(':')) > 1: key, value = row.split(':', 1) key = key.strip() value = value.strip() file_metadata[key] = value else: file_comments.append(row) continue # detect the header row and store it # it is usually the first row and starts with "url," if row[0].strip().lower() == "url": index_row = row continue url = row[0].strip() if url is None: continue meta = row[1:] http_ssl = False ssl_port = 443 http_path = '/' # parse the URL to extract netlocation, HTTP path, domain name, # and HTTP method (SSL or plain) try: urlparse_object = urlparse.urlparse(url) http_netloc = urlparse_object.netloc # if netloc is not urlparse-able, add // to the start # of URL if http_netloc == '': urlparse_object = urlparse.urlparse('//%s' % (url)) http_netloc = urlparse_object.netloc domain_name = http_netloc.split(':')[0] http_path = urlparse_object.path if http_path == '': http_path = '/' # we assume scheme is either empty, or "http", or "https" # other schemes (e.g. "ftp") are out of the scope of this # measuremnt if urlparse_object.scheme == "https": http_ssl = True if len(http_netloc.split(':')) == 2: ssl_port = http_netloc.split(':')[1] except Exception as exp: logging.exception("%s: failed to parse URL: %s" % (url, exp)) http_netloc = url http_ssl = False ssl_port = 443 http_path = '/' domain_name = url # HTTP GET http_inputs.append( { "host": http_netloc, "path": http_path, "ssl": http_ssl, "url": url } ) # TLS certificate # this will only work if the URL starts with https:// if http_ssl: tls_inputs.append("%s:%s" % (domain_name, ssl_port)) # DNS Lookup dns_inputs.append(domain_name) # Traceroute traceroute_inputs.append(domain_name) # Meta-data url_metadata_results[url] = meta # the actual tests are run concurrently here shuffle(http_inputs) start = time.time() logging.info("Running HTTP GET requests...") result["http"] = http.get_requests_batch(http_inputs) elapsed = time.time() - start logging.info("HTTP GET requests took " "%d seconds for %d URLs." % (elapsed, len(http_inputs))) shuffle(tls_inputs) start = time.time() logging.info("Running TLS certificate requests...") result["tls"] = tls.get_fingerprint_batch(tls_inputs) elapsed = time.time() - start logging.info("TLS certificate requests took " "%d seconds for %d domains." % (elapsed, len(tls_inputs))) shuffle(dns_inputs) start = time.time() logging.info("Running DNS requests...") result["dns"] = dnslib.lookup_domains(dns_inputs) elapsed = time.time() - start logging.info("DNS requests took " "%d seconds for %d domains." % (elapsed, len(dns_inputs))) for method in self.traceroute_methods: shuffle(traceroute_inputs) start = time.time() logging.info("Running %s traceroutes..." % (method.upper()) ) result["traceroute.%s" % (method) ] = ( traceroute.traceroute_batch(traceroute_inputs, method)) elapsed = time.time() - start logging.info("Traceroutes took %d seconds for %d " "domains." % (elapsed, len(traceroute_inputs))) # if we have an index row, we should turn URL metadata # into dictionaries if index_row is not None: indexed_url_metadata = {} for url, meta in url_metadata_results.items(): try: indexed_meta = {} for i in range(1,len(index_row)): indexed_meta[index_row[i]] = meta[i - 1] indexed_url_metadata[url] = indexed_meta except: indexed_url_metadata[url] = indexed_meta continue url_metadata_results = indexed_url_metadata result["url_metadata"] = url_metadata_results result["file_metadata"] = file_metadata result["file_comments"] = file_comments run_finish_time = time.time() elapsed = run_finish_time - run_start_time result["total_time"] = elapsed logging.info("Testing took a total of %d seconds." % (elapsed) ) return result