def run_file(self, input_file): file_name, file_contents = input_file # Initialize the results for this input file. # This can be anything from file name to version # to any useful information. result = {'file_name': file_name} http_results = {} tls_results = {} dns_results = {} traceroute_results = {} url_metadata_results = {} file_metadata = {} file_comments = [] # each pcap file is stored in a separate file # designated by a number. the indexes are stored # in the json file and the pcap files are stored # with their indexes as file names. pcap_results = {} pcap_indexes = {} url_index = 0 index_row = None comments = "" # we may want to make this threaded and concurrent csvreader = csv.reader(file_contents, delimiter=',', quotechar='"') for row in csvreader: """ First few lines are expected to be comments in key: value format. The first line after that could be our column header row, starting with "url", and the rest are data rows. This is a sample input file we're trying to parse: # comment: Global List,,,,, # date: 03-17-2015,,,,, # version: 1,,,,, # description: This is the global list. Last updated in 2012.,,,, url,country,category,description,rationale,provider http://8thstreetlatinas.com,glo,P**N,,,PRIV http://abpr2.railfan.net,glo,MISC,Pictures of trains,,PRIV """ # parse file comments, if it looks like "key : value", # parse it as a key-value pair. otherwise, just # store it as a raw comment. if row[0][0] == '#': row = row[0][1:].strip() if len(row.split(':')) > 1: key, value = row.split(':', 1) key = key.strip() value = value.strip() file_metadata[key] = value else: file_comments.append(row) continue # detect the header row and store it # it is usually the first row and starts with "url," if row[0].strip().lower() == "url": index_row = row continue url = row[0].strip() if url is None: continue meta = row[1:] url_index = url_index + 1 http_ssl = False ssl_port = 443 http_path = '/' # parse the URL to extract netlocation, HTTP path, domain name, # and HTTP method (SSL or plain) try: urlparse_object = urlparse.urlparse(url) http_netloc = urlparse_object.netloc # if netloc is not urlparse-able, add // to the start # of URL if http_netloc == '': urlparse_object = urlparse.urlparse('//%s' % (url)) http_netloc = urlparse_object.netloc domain_name = http_netloc.split(':')[0] http_path = urlparse_object.path if http_path == '': http_path = '/' # we assume scheme is either empty, or "http", or "https" # other schemes (e.g. "ftp") are out of the scope of this # measuremnt if urlparse_object.scheme == "https": http_ssl = True if len(http_netloc.split(':')) == 2: ssl_port = http_netloc.split(':')[1] except Exception as exp: logging.warning("%s: failed to parse URL: %s" % (url, exp)) http_netloc = url http_ssl = False ssl_port = 443 http_path = '/' domain_name = url # start tcpdump td = Tcpdump() tcpdump_started = False try: if self.record_pcaps: td.start() tcpdump_started = True logging.info("%s: tcpdump started..." % (url)) # wait for tcpdump to initialize time.sleep(1) except Exception as exp: logging.warning("%s: tcpdump failed: %s" % (url, exp)) # HTTP GET logging.info("%s: HTTP" % (url)) try: http_results[url] = http.get_request(http_netloc, http_path, ssl=http_ssl) except Exception as exp: logging.warning("%s: HTTP test failed: %s" % (url, exp)) http_results[url] = {"exception": str(exp)} # TLS certificate # this will only work if the URL starts with https:// if http_ssl: try: tls_result = {} logging.info("%s: TLS certificate" % (domain_name)) fingerprint, cert = tls.get_fingerprint( domain_name, ssl_port) tls_result['port'] = ssl_port tls_result['fingerprint'] = fingerprint tls_result['cert'] = cert tls_results[domain_name] = tls_result except Exception as exp: logging.warning( "%s: TLS certfiticate download failed: %s" % (domain_name, exp)) tls_results[domain_name] = {"exception": str(exp)} # DNS Lookup logging.info("%s: DNS" % (domain_name)) try: dns_results[domain_name] = dnslib.lookup_domain(domain_name) except Exception as exp: logging.warning("%s: DNS lookup failed: %s" % (domain_name, exp)) dns_results[domain_name] = {"exception": str(exp)} # Traceroute for method in self.traceroute_methods: try: logging.info("%s: Traceroute (%s)" % (domain_name, method.upper())) traceroute_results[domain_name] = traceroute.traceroute( domain_name, method=method) except Exception as exp: logging.warning("%s: Traceroute (%s) failed: %s" % (domain_name, method.upper(), exp)) traceroute_results[domain_name] = {"exception": str(exp)} # end tcpdump if tcpdump_started: logging.info("%s: waiting for tcpdump..." % (url)) # 2 seconds should be enough. time.sleep(2) td.stop() logging.info("%s: tcpdump stopped." % (url)) pcap_indexes[url] = '%s-%s.pcap' % (file_name, format(url_index, '04')) pcap_results[pcap_indexes[url]] = td.pcap() # Meta-data url_metadata_results[url] = meta result["http"] = http_results result["tls"] = tls_results result["dns"] = dns_results result["traceroute"] = traceroute_results # if we have an index row, we should turn URL metadata # into dictionaries if index_row is not None: indexed_url_metadata = {} for url, meta in url_metadata_results.items(): try: indexed_meta = {} for i in range(1, len(index_row)): indexed_meta[index_row[i]] = meta[i - 1] indexed_url_metadata[url] = indexed_meta except: indexed_url_metadata[url] = indexed_meta continue url_metadata_results = indexed_url_metadata result["url_metadata"] = url_metadata_results result["file_metadata"] = file_metadata result["file_comments"] = file_comments if self.record_pcaps: result['pcap_indexes'] = pcap_indexes self.external_results = dict(self.external_results.items() + pcap_results.items()) return result
def run_exp(self, name, exp_config=None, schedule_name=None): if name[-3:] == ".py": name = name[:-3] if name not in self.experiments: logging.error("Experiment file %s not found! Skipping." % name) else: exp_class = self.experiments[name] results = {"meta": {}} try: logging.debug("Getting metadata for experiment...") meta = self.get_meta() results["meta"] = meta except Exception as exception: logging.exception("Error fetching metadata for " "%s: %s" % (name, exception)) results["meta_exception"] = str(exception) if schedule_name is not None: results["meta"]["schedule_name"] = schedule_name else: results["meta"]["schedule_name"] = name start_time = datetime.now() results["meta"]["client_time"] = start_time.isoformat() results["meta"]["centinel_version"] = centinel.__version__ # include vpn provider in metadata if self.vpn_provider: results["meta"]["vpn_provider"] = self.vpn_provider input_files = {} if exp_config is not None: if (('input_files' in exp_config) and (exp_config['input_files'] is not None)): for filename in exp_config['input_files']: file_handle = self.load_input_file(filename) if file_handle is not None: input_files[filename] = file_handle if (('params' in exp_config) and (exp_config['params'] is not None)): exp_class.params = exp_config['params'] # if the scheduler does not specify input files, but # the experiment class specifies a list of input file names, # load them. failing to load input files does not stop # experiment from running. if len(input_files) == 0: if exp_class.input_files is not None: for filename in exp_class.input_files: file_handle = self.load_input_file(filename) if file_handle is not None: input_files[filename] = file_handle # otherwise, fall back to [schedule name].txt (deprecated) else: filename = "%s.txt" % name file_handle = self.load_input_file(filename) if file_handle is not None: input_files[filename] = file_handle try: # instantiate the experiment logging.debug("Initializing the experiment class for %s" % name) # these constants can be useful for some experiments, but it is not # encouraged to use these directly global_constants = { 'experiments_dir': self.config['dirs']['experiments_dir'], 'results_dir': self.config['dirs']['results_dir'], 'data_dir': self.config['dirs']['data_dir'] } exp_class.global_constants = global_constants exp = exp_class(input_files) except Exception as exception: logging.exception("Error initializing %s: %s" % (name, exception)) results["init_exception"] = str(exception) return exp.global_constants = global_constants run_tcpdump = True if self.config['results']['record_pcaps'] is False: logging.info("Your configuration has disabled pcap " "recording, tcpdump will not start.") run_tcpdump = False # disable this on the experiment too exp.record_pcaps = False if run_tcpdump and os.geteuid() != 0: logging.info("Centinel is not running as root, " "tcpdump will not start.") run_tcpdump = False if run_tcpdump and exp_class.overrides_tcpdump: logging.info("Experiment overrides tcpdump recording.") run_tcpdump = False tcpdump_started = False try: if run_tcpdump: td = Tcpdump() tds.append(td) td.start() tcpdump_started = True logging.info("tcpdump started...") # wait for tcpdump to initialize time.sleep(2) except Exception as exp: logging.exception("Failed to run tcpdump: %s" % (exp, )) try: # run the experiment exp.run() except Exception as exception: logging.exception("Error running %s: %s" % (name, exception)) results["runtime_exception"] = str(exception) except KeyboardInterrupt: logging.warn( "Keyboard interrupt received, stopping experiment...") # save any external results that the experiment has generated # they could be anything that doesn't belong in the json file # (e.g. pcap files) # these should all be compressed with bzip2 # the experiment is responsible for giving these a name and # keeping a list of files in the json results results_dir = self.config['dirs']['results_dir'] if exp.external_results is not None: logging.debug("Writing external files for %s" % name) for fname, fcontents in exp.external_results.items(): external_file_name = ( "external_%s-%s-%s" ".bz2" % (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f"), fname)) external_file_path = os.path.join(results_dir, external_file_name) try: with open(external_file_path, 'w:bz2') as file_p: data = bz2.compress(fcontents) file_p.write(data) logging.debug("External file " "%s written successfully" % fname) except Exception as exp: logging.exception("Failed to write external file:" "%s" % exp) logging.debug("Finished writing external files for %s" % name) if tcpdump_started: logging.info("Waiting for tcpdump to process packets...") # 5 seconds should be enough. this hasn't been tested on # a RaspberryPi or a Hummingboard i2 time.sleep(5) td.stop() logging.info("tcpdump stopped.") bz2_successful = False data = None try: pcap_file_name = ( "pcap_%s-%s.pcap" ".bz2" % (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f"))) pcap_file_path = os.path.join(results_dir, pcap_file_name) with open(pcap_file_path, 'wb') as pcap_bz2, open(td.pcap_filename(), 'rb') as pcap: compressor = bz2.BZ2Compressor() compressed_size_so_far = 0 for pcap_data in iter(lambda: pcap.read(10 * 1024), b''): compressed_chunk = compressor.compress(pcap_data) pcap_bz2.write(compressed_chunk) if len(compressed_chunk): compressed_size_so_far += len(compressed_chunk) compressed_chunk = compressor.flush() pcap_bz2.write(compressed_chunk) if len(compressed_chunk): compressed_size_so_far += len(compressed_chunk) uncompressed_size = os.path.getsize(td.pcap_filename()) compression_ratio = 100 * ( float(compressed_size_so_far) / float(uncompressed_size)) logging.debug( "pcap BZ2 compression: compressed/uncompressed (ratio):" " %d/%d (%.1f%%)" % (compressed_size_so_far, uncompressed_size, compression_ratio)) logging.info("Saved pcap to " "%s." % pcap_file_path) bz2_successful = True except Exception as exception: logging.exception("Failed to compress and write " "pcap file: %s" % exception) if not bz2_successful: logging.info("Writing pcap file uncompressed") try: pcap_file_name = ( "pcap_%s-%s" ".pcap" % (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f"))) pcap_file_path = os.path.join(results_dir, pcap_file_name) with open(pcap_file_path, 'wb') as pcap_out, open( td.pcap_filename(), 'rb') as pcap: for pcap_data in iter(lambda: pcap.read(10 * 1024), b''): pcap_out.write(pcap_data) logging.info("Saved pcap to " "%s." % pcap_file_path) except Exception as exception: logging.exception("Failed to write " "pcap file: %s" % exception) # delete pcap data to free up some memory logging.debug("Removing pcap data from memory") td.delete() del data del td # close input file handle(s) logging.debug("Closing input files for %s" % name) if type(input_files) is dict: for file_name, file_handle in input_files.items(): try: file_handle.close() except AttributeError: logging.warning("Closing %s failed" % file_name) logging.debug("Input files closed for %s" % name) logging.debug("Storing results for %s" % name) try: results[name] = exp.results except Exception as exception: logging.exception("Error storing results for " "%s: %s" % (name, exception)) if "results_exception" not in results: results["results_exception"] = {} results["results_exception"][name] = str(exception) end_time = datetime.now() time_taken = (end_time - start_time) results["meta"]["time_taken"] = time_taken.total_seconds() logging.info("%s took %s to finish." % (name, time_taken)) logging.debug("Saving %s results to file" % name) try: # pretty printing results will increase file size, but files are # compressed before sending. result_file_path = self\ .get_result_file(name, start_time.strftime("%Y-%m-%dT%H%M%S.%f")) result_file = bz2.BZ2File(result_file_path, "w") json.dump( results, result_file, indent=2, separators=(',', ': '), # ignore encoding errors, these will be dealt with on the server ensure_ascii=False) result_file.close() # free up memory by deleting results from memory del results del result_file except Exception as exception: logging.exception("Error saving results for " "%s to file: %s" % (name, exception)) logging.debug("Done saving %s results to file" % name)
def run_exp(self, name, exp_config=None, schedule_name=None): if name not in self.experiments: logging.error("Experiment file %s not found! Skipping." % (name)) else: Exp = self.experiments[name] results = {} results["meta"] = {} try: logging.debug("Getting metadata for experiment...") meta = self.get_meta() results["meta"] = meta except Exception as exception: logging.exception("Error fetching metadata for " "%s: %s" % (name, exception)) results["meta_exception"] = str(exception) if schedule_name is not None: results["meta"]["schedule_name"] = schedule_name else: results["meta"]["schedule_name"] = name start_time = datetime.now() results["meta"]["client_time"] = start_time.isoformat() results["meta"]["centinel_version"] = centinel.__version__ input_files = {} if exp_config is not None: if (('input_files' in exp_config) and (exp_config['input_files'] is not None)): for filename in exp_config['input_files']: file_handle = self.load_input_file(filename) if file_handle is not None: input_files[filename] = file_handle if (('params' in exp_config) and (exp_config['params'] is not None)): Exp.params = exp_config['params'] # if the experiment specifies a list of input file names, # load them. failing to load input files does not stop # experiment from running. if Exp.input_files is not None: for filename in Exp.input_files: file_handle = self.load_input_file(filename) if file_handle is not None: input_files[filename] = file_handle # otherwise, fall back on [experiment name].txt else: input_files = self.load_input_file("%s.txt" % (name)) try: # instantiate the experiment logging.debug("Initializing the experiment class for %s" % (name)) exp = Exp(input_files) except Exception as exception: logging.exception("Error initializing %s: %s" % (name, exception)) results["init_exception"] = str(exception) return run_tcpdump = True if self.config['results']['record_pcaps'] is False: logging.info("Your configuration has disabled pcap " "recording, tcpdump will not start.") run_tcpdump = False # disable this on the experiment too exp.record_pcaps = False if run_tcpdump and os.geteuid() != 0: logging.info("Centinel is not running as root, " "tcpdump will not start.") run_tcpdump = False if run_tcpdump and Exp.overrides_tcpdump: logging.info("Experiment overrides tcpdump recording.") run_tcpdump = False td = Tcpdump() tcpdump_started = False try: if run_tcpdump: td.start() tcpdump_started = True logging.info("tcpdump started...") # wait for tcpdump to initialize time.sleep(2) except Exception as exp: logging.exception("Failed to run tcpdump: %s" % (exp,)) try: # run the experiment exp.run() except Exception as exception: logging.exception("Error running %s: %s" % (name, exception)) results["runtime_exception"] = str(exception) # save any external results that the experiment has generated # they could be anything that doesn't belong in the json file # (e.g. pcap files) # these should all be compressed with bzip2 # the experiment is responsible for giving these a name and # keeping a list of files in the json results results_dir = self.config['dirs']['results_dir'] if exp.external_results is not None: logging.debug("Writing external files for %s" % (name)) for fname, fcontents in exp.external_results.items(): external_file_name = ("external_%s-%s-%s" ".bz2" % (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f"), fname)) external_file_path = os.path.join(results_dir, external_file_name) try: with open(external_file_path, 'w:bz2') as file_p: data = bz2.compress(fcontents) file_p.write(data) logging.debug("External file " "%s written successfully" % (fname)) except Exception as exp: logging.exception("Failed to write external file:" "%s" % (exp)) logging.debug("Finished writing external files for %s" % (name)) if tcpdump_started: logging.info("Waiting for tcpdump to process packets...") # 5 seconds should be enough. this hasn't been tested on # a RaspberryPi or a Hummingboard i2 time.sleep(5) td.stop() logging.info("tcpdump stopped.") try: pcap_file_name = ("pcap_%s-%s.pcap" ".bz2" % (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f"))) pcap_file_path = os.path.join(results_dir, pcap_file_name) with open(pcap_file_path, 'w:bz2') as file_p: data = bz2.compress(td.pcap()) file_p.write(data) logging.info("Saved pcap to " "%s." % (pcap_file_path)) except Exception as exception: logging.exception("Failed to write pcap file: %s" % (exception)) # close input file handle(s) logging.debug("Closing input files for %s" % (name)) if type(input_files) is dict: for file_name, file_handle in input_files.items(): file_handle.close() else: input_files.close() logging.debug("Input files closed for %s" % (name)) logging.debug("Storing results for %s" % (name)) try: results[name] = exp.results except Exception as exception: logging.exception("Error storing results for " "%s: %s" % (name, exception)) results["results_exception"] = str(exception) end_time = datetime.now() time_taken = (end_time - start_time) results["meta"]["time_taken"] = time_taken.total_seconds() logging.info("%s took %s to finish." % (name, time_taken)) logging.debug("Saving %s results to file" % (name)) try: # Pretty printing results will increase file size, but files are # compressed before sending. result_file_path = self.get_result_file(name, start_time.strftime("%Y-%m-%dT%H%M%S.%f")) result_file = bz2.BZ2File(result_file_path, "w") json.dump(results, result_file, indent=2, separators=(',', ': ')) result_file.close() except Exception as exception: logging.exception("Error saving results for " "%s to file: %s" % (name, exception)) results["results_exception"] = str(exception) logging.debug("Done saving %s results to file" % (name))
def run_file(self, input_file): file_name, file_contents = input_file # Initialize the results for this input file. # This can be anything from file name to version # to any useful information. result = {} result["file_name"] = file_name http_results = {} tls_results = {} dns_results = {} traceroute_results = {} url_metadata_results = {} file_metadata = {} file_comments = [] # each pcap file is stored in a separate file # designated by a number. the indexes are stored # in the json file and the pcap files are stored # with their indexes as file names. pcap_results = {} pcap_indexes = {} url_index = 0 index_row = None comments = "" # we may want to make this threaded and concurrent csvreader = csv.reader(file_contents, delimiter=',', quotechar='"') for row in csvreader: """ First few lines are expected to be comments in key: value format. The first line after that could be our column header row, starting with "url", and the rest are data rows. This is a sample input file we're trying to parse: # comment: Global List,,,,, # date: 03-17-2015,,,,, # version: 1,,,,, # description: This is the global list. Last updated in 2012.,,,, url,country,category,description,rationale,provider http://8thstreetlatinas.com,glo,P**N,,,PRIV http://abpr2.railfan.net,glo,MISC,Pictures of trains,,PRIV """ # parse file comments, if it looks like "key : value", # parse it as a key-value pair. otherwise, just # store it as a raw comment. if row[0][0] == '#': row = row[0][1:].strip() if len(row.split(':')) > 1: key, value = row.split(':', 1) key = key.strip() value = value.strip() file_metadata[key] = value else: file_comments.append(row) continue # detect the header row and store it # it is usually the first row and starts with "url," if row[0].strip().lower() == "url": index_row = row continue url = row[0].strip() if url is None: continue meta = row[1:] url_index = url_index + 1 http_ssl = False ssl_port = 443 http_path = '/' # parse the URL to extract netlocation, HTTP path, domain name, # and HTTP method (SSL or plain) try: urlparse_object = urlparse.urlparse(url) http_netloc = urlparse_object.netloc # if netloc is not urlparse-able, add // to the start # of URL if http_netloc == '': urlparse_object = urlparse.urlparse('//%s' % (url)) http_netloc = urlparse_object.netloc domain_name = http_netloc.split(':')[0] http_path = urlparse_object.path if http_path == '': http_path = '/' # we assume scheme is either empty, or "http", or "https" # other schemes (e.g. "ftp") are out of the scope of this # measuremnt if urlparse_object.scheme == "https": http_ssl = True if len(http_netloc.split(':')) == 2: ssl_port = http_netloc.split(':')[1] except Exception as exp: logging.warning("%s: failed to parse URL: %s" % (url, exp)) http_netloc = url http_ssl = False ssl_port = 443 http_path = '/' domain_name = url # start tcpdump td = Tcpdump() tcpdump_started = False try: if self.record_pcaps: td.start() tcpdump_started = True logging.info("%s: tcpdump started..." % (url)) # wait for tcpdump to initialize time.sleep(1) except Exception as exp: logging.warning("%s: tcpdump failed: %s" % (url, exp)) # HTTP GET logging.info("%s: HTTP" % (url)) try: http_results[url] = http.get_request(http_netloc, http_path, ssl=http_ssl) except Exception as exp: logging.info("%s: HTTP test failed: %s" % (url, exp)) http_results[url] = { "exception" : str(exp) } # TLS certificate # this will only work if the URL starts with https:// if http_ssl: try: tls_result = {} logging.info("%s: TLS certificate" % (domain_name)) fingerprint, cert = tls.get_fingerprint(domain_name, ssl_port) tls_result['port'] = ssl_port tls_result['fingerprint'] = fingerprint tls_result['cert'] = cert tls_results[domain_name] = tls_result except Exception as exp: logging.info("%s: TLS certfiticate download failed: %s" % (domain_name, exp)) tls_results[domain_name] = { "exception" : str(exp) } # DNS Lookup logging.info("%s: DNS" % (domain_name)) try: dns_results[domain_name] = dnslib.lookup_domain(domain_name) except Exception as exp: logging.info("%s: DNS lookup failed: %s" % (domain_name, exp)) dns_results[domain_name] = { "exception" : str(exp) } # Traceroute for method in self.traceroute_methods: try: logging.info("%s: Traceroute (%s)" % (domain_name, method.upper())) traceroute_results[domain_name] = traceroute.traceroute( domain_name, method=method) except Exception as exp: logging.info("%s: Traceroute (%s) failed: %s" % (domain_name, method.upper(), exp)) traceroute_results[domain_name] = { "exception" : str(exp) } # end tcpdump if tcpdump_started: logging.info("%s: waiting for tcpdump..." % (url)) # 2 seconds should be enough. time.sleep(2) td.stop() logging.info("%s: tcpdump stopped." % (url)) pcap_indexes[url] = '%s-%s.pcap' % (file_name, format(url_index, '04')) pcap_results[pcap_indexes[url]] = td.pcap() # Meta-data url_metadata_results[url] = meta result["http"] = http_results result["tls"] = tls_results result["dns"] = dns_results result["traceroute"] = traceroute_results # if we have an index row, we should turn URL metadata # into dictionaries if index_row is not None: indexed_url_metadata = {} for url, meta in url_metadata_results.items(): try: indexed_meta = {} for i in range(1,len(index_row)): indexed_meta[index_row[i]] = meta[i - 1] indexed_url_metadata[url] = indexed_meta except: indexed_url_metadata[url] = indexed_meta continue url_metadata_results = indexed_url_metadata result["url_metadata"] = url_metadata_results result["file_metadata"] = file_metadata result["file_comments"] = file_comments if self.record_pcaps: result['pcap_indexes'] = pcap_indexes self.external_results = dict(self.external_results.items() + pcap_results.items()) return result
def run_exp(self, name, exp_config=None, schedule_name=None): if name[-3:] == ".py": name = name[:-3] if name not in self.experiments: logging.error("Experiment file %s not found! Skipping." % name) else: exp_class = self.experiments[name] results = {"meta": {}} try: logging.debug("Getting metadata for experiment...") meta = self.get_meta() results["meta"] = meta except Exception as exception: logging.exception("Error fetching metadata for " "%s: %s" % (name, exception)) results["meta_exception"] = str(exception) if schedule_name is not None: results["meta"]["schedule_name"] = schedule_name else: results["meta"]["schedule_name"] = name start_time = datetime.now() results["meta"]["client_time"] = start_time.isoformat() results["meta"]["centinel_version"] = centinel.__version__ # include vpn provider in metadata if self.vpn_provider: results["meta"]["vpn_provider"] = self.vpn_provider input_files = {} if exp_config is not None: if (('input_files' in exp_config) and (exp_config['input_files'] is not None)): for filename in exp_config['input_files']: file_handle = self.load_input_file(filename) if file_handle is not None: input_files[filename] = file_handle if (('params' in exp_config) and (exp_config['params'] is not None)): exp_class.params = exp_config['params'] # if the scheduler does not specify input files, but # the experiment class specifies a list of input file names, # load them. failing to load input files does not stop # experiment from running. if len(input_files) == 0: if exp_class.input_files is not None: for filename in exp_class.input_files: file_handle = self.load_input_file(filename) if file_handle is not None: input_files[filename] = file_handle # otherwise, fall back to [schedule name].txt (deprecated) else: filename = "%s.txt" % name file_handle = self.load_input_file(filename) if file_handle is not None: input_files[filename] = file_handle try: # instantiate the experiment logging.debug("Initializing the experiment class for %s" % name) # these constants can be useful for some experiments, but it is not # encouraged to use these directly global_constants = {'experiments_dir': self.config['dirs']['experiments_dir'], 'results_dir': self.config['dirs']['results_dir'], 'data_dir': self.config['dirs']['data_dir']} exp_class.global_constants = global_constants exp = exp_class(input_files) except Exception as exception: logging.exception("Error initializing %s: %s" % (name, exception)) results["init_exception"] = str(exception) return exp.global_constants = global_constants run_tcpdump = True if self.config['results']['record_pcaps'] is False: logging.info("Your configuration has disabled pcap " "recording, tcpdump will not start.") run_tcpdump = False # disable this on the experiment too exp.record_pcaps = False if run_tcpdump and os.geteuid() != 0: logging.info("Centinel is not running as root, " "tcpdump will not start.") run_tcpdump = False if run_tcpdump and exp_class.overrides_tcpdump: logging.info("Experiment overrides tcpdump recording.") run_tcpdump = False tcpdump_started = False try: if run_tcpdump: td = Tcpdump() tds.append(td) td.start() tcpdump_started = True logging.info("tcpdump started...") # wait for tcpdump to initialize time.sleep(2) except Exception as exp: logging.exception("Failed to run tcpdump: %s" % (exp,)) try: # run the experiment exp.run() except Exception as exception: logging.exception("Error running %s: %s" % (name, exception)) results["runtime_exception"] = str(exception) except KeyboardInterrupt: logging.warn("Keyboard interrupt received, stopping experiment...") # save any external results that the experiment has generated # they could be anything that doesn't belong in the json file # (e.g. pcap files) # these should all be compressed with bzip2 # the experiment is responsible for giving these a name and # keeping a list of files in the json results results_dir = self.config['dirs']['results_dir'] if exp.external_results is not None: logging.debug("Writing external files for %s" % name) for fname, fcontents in exp.external_results.items(): external_file_name = ("external_%s-%s-%s" ".bz2" % (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f"), fname)) external_file_path = os.path.join(results_dir, external_file_name) try: with open(external_file_path, 'w:bz2') as file_p: data = bz2.compress(fcontents) file_p.write(data) logging.debug("External file " "%s written successfully" % fname) except Exception as exp: logging.exception("Failed to write external file:" "%s" % exp) logging.debug("Finished writing external files for %s" % name) if tcpdump_started: logging.info("Waiting for tcpdump to process packets...") # 5 seconds should be enough. this hasn't been tested on # a RaspberryPi or a Hummingboard i2 time.sleep(5) td.stop() logging.info("tcpdump stopped.") bz2_successful = False data = None try: pcap_file_name = ("pcap_%s-%s.pcap" ".bz2" % (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f"))) pcap_file_path = os.path.join(results_dir, pcap_file_name) with open(pcap_file_path, 'wb') as pcap_bz2, open(td.pcap_filename(), 'rb') as pcap: compressor = bz2.BZ2Compressor() compressed_size_so_far = 0 for pcap_data in iter(lambda: pcap.read(10 * 1024), b''): compressed_chunk = compressor.compress(pcap_data) pcap_bz2.write(compressed_chunk) if len(compressed_chunk): compressed_size_so_far += len(compressed_chunk) compressed_chunk = compressor.flush() pcap_bz2.write(compressed_chunk) if len(compressed_chunk): compressed_size_so_far += len(compressed_chunk) uncompressed_size = os.path.getsize(td.pcap_filename()) compression_ratio = 100 * (float(compressed_size_so_far) / float(uncompressed_size)) logging.debug("pcap BZ2 compression: compressed/uncompressed (ratio):" " %d/%d (%.1f%%)" % (compressed_size_so_far, uncompressed_size, compression_ratio)) logging.info("Saved pcap to " "%s." % pcap_file_path) bz2_successful = True except Exception as exception: logging.exception("Failed to compress and write " "pcap file: %s" % exception) if not bz2_successful: logging.info("Writing pcap file uncompressed") try: pcap_file_name = ("pcap_%s-%s" ".pcap" % (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f"))) pcap_file_path = os.path.join(results_dir, pcap_file_name) with open(pcap_file_path, 'wb') as pcap_out, open(td.pcap_filename(), 'rb') as pcap: for pcap_data in iter(lambda: pcap.read(10 * 1024), b''): pcap_out.write(pcap_data) logging.info("Saved pcap to " "%s." % pcap_file_path) except Exception as exception: logging.exception("Failed to write " "pcap file: %s" % exception) # delete pcap data to free up some memory logging.debug("Removing pcap data from memory") td.delete() del data del td # close input file handle(s) logging.debug("Closing input files for %s" % name) if type(input_files) is dict: for file_name, file_handle in input_files.items(): try: file_handle.close() except AttributeError: logging.warning("Closing %s failed" % file_name) logging.debug("Input files closed for %s" % name) logging.debug("Storing results for %s" % name) try: results[name] = exp.results except Exception as exception: logging.exception("Error storing results for " "%s: %s" % (name, exception)) if "results_exception" not in results: results["results_exception"] = {} results["results_exception"][name] = str(exception) end_time = datetime.now() time_taken = (end_time - start_time) results["meta"]["time_taken"] = time_taken.total_seconds() logging.info("%s took %s to finish." % (name, time_taken)) logging.debug("Saving %s results to file" % name) try: # pretty printing results will increase file size, but files are # compressed before sending. result_file_path = self\ .get_result_file(name, start_time.strftime("%Y-%m-%dT%H%M%S.%f")) result_file = bz2.BZ2File(result_file_path, "w") json.dump(results, result_file, indent=2, separators=(',', ': '), # ignore encoding errors, these will be dealt with on the server ensure_ascii=False) result_file.close() # free up memory by deleting results from memory del results del result_file except Exception as exception: logging.exception("Error saving results for " "%s to file: %s" % (name, exception)) logging.debug("Done saving %s results to file" % name)