예제 #1
    def run_file(self, input_file):
        file_name, file_contents = input_file

        # Initialize the results for this input file.
        # This can be anything from file name to version
        # to any useful information.
        result = {}
        result["file_name"] = file_name

        http_results = {}
        tls_results = {}
        dns_results = {}
        traceroute_results = {}
        url_metadata_results = {}
        file_metadata = {}
        file_comments = []

        # each pcap file is stored in a separate file
        # designated by a number. the indexes are stored
        # in the json file and the pcap files are stored
        # with their indexes as file names.
        pcap_results = {}
        pcap_indexes = {}
        url_index = 0
        index_row = None
        comments = ""

        # we may want to make this threaded and concurrent
        csvreader = csv.reader(file_contents, delimiter=',', quotechar='"')
        for row in csvreader:
            First few lines are expected to be comments in key: value
            format. The first line after that could be our column header
            row, starting with "url", and the rest are data rows.
            This is a sample input file we're trying to parse:

            # comment: Global List,,,,,
            # date: 03-17-2015,,,,,
            # version: 1,,,,,
            # description: This is the global list. Last updated in 2012.,,,,
            http://abpr2.railfan.net,glo,MISC,Pictures of trains,,PRIV


            # parse file comments, if it looks like "key : value",
            # parse it as a key-value pair. otherwise, just
            # store it as a raw comment.
            if row[0][0] == '#':
                row = row[0][1:].strip()
                if len(row.split(':')) > 1:
                    key, value = row.split(':', 1)
                    key = key.strip()
                    value = value.strip()
                    file_metadata[key] = value

            # detect the header row and store it
            # it is usually the first row and starts with "url,"
            if row[0].strip().lower() == "url":
                index_row = row

            url = row[0].strip()
            if url is None:

            meta = row[1:]
            url_index = url_index + 1
            http_ssl = False
            ssl_port = 443
            http_path = '/'

            # parse the URL to extract netlocation, HTTP path, domain name,
            # and HTTP method (SSL or plain)
                urlparse_object = urlparse.urlparse(url)
                http_netloc = urlparse_object.netloc

                # if netloc is not urlparse-able, add // to the start
                # of URL
                if http_netloc == '':
                    urlparse_object = urlparse.urlparse('//%s' % (url))
                    http_netloc = urlparse_object.netloc

                domain_name = http_netloc.split(':')[0]

                http_path = urlparse_object.path
                if http_path == '':
                    http_path = '/'

                # we assume scheme is either empty, or "http", or "https"
                # other schemes (e.g. "ftp") are out of the scope of this
                # measuremnt
                if urlparse_object.scheme == "https":
                    http_ssl = True
                    if len(http_netloc.split(':')) == 2:
                        ssl_port = http_netloc.split(':')[1]

            except Exception as exp:
                logging.warning("%s: failed to parse URL: %s" % (url, exp))
                http_netloc = url
                http_ssl    = False
                ssl_port = 443
                http_path   = '/'
                domain_name = url

            # start tcpdump
            td = Tcpdump()
            tcpdump_started = False

                if self.record_pcaps:
                    tcpdump_started = True
                    logging.info("%s: tcpdump started..." % (url))
                    # wait for tcpdump to initialize
            except Exception as exp:
                logging.warning("%s: tcpdump failed: %s" % (url, exp))

            # HTTP GET
            logging.info("%s: HTTP" % (url))
                http_results[url] = http.get_request(http_netloc,
            except Exception as exp:
                logging.info("%s: HTTP test failed: %s" %
                             (url, exp))
                http_results[url] = { "exception" : str(exp) }

            # TLS certificate
            # this will only work if the URL starts with https://
            if http_ssl:
                    tls_result = {}
                    logging.info("%s: TLS certificate" %
                    fingerprint, cert = tls.get_fingerprint(domain_name, ssl_port)
                    tls_result['port'] = ssl_port
                    tls_result['fingerprint'] = fingerprint
                    tls_result['cert'] = cert

                    tls_results[domain_name] = tls_result
                except Exception as exp:
                    logging.info("%s: TLS certfiticate download failed: %s" %
                                 (domain_name, exp))
                    tls_results[domain_name] = { "exception" : str(exp) }

            # DNS Lookup
            logging.info("%s: DNS" % (domain_name))
                dns_results[domain_name] = dnslib.lookup_domain(domain_name)
            except Exception as exp:
                logging.info("%s: DNS lookup failed: %s" %
                             (domain_name, exp))
                dns_results[domain_name] = { "exception" : str(exp) }

            # Traceroute
            for method in self.traceroute_methods:
                    logging.info("%s: Traceroute (%s)"
                                 % (domain_name, method.upper()))
                    traceroute_results[domain_name] = traceroute.traceroute(
                        domain_name, method=method)
                except Exception as exp:
                    logging.info("%s: Traceroute (%s) failed: %s" %
                                    (domain_name, method.upper(), exp))
                    traceroute_results[domain_name] = {
                        "exception" : str(exp) }

            # end tcpdump
            if tcpdump_started:
                logging.info("%s: waiting for tcpdump..." % (url))
                # 2 seconds should be enough.
                logging.info("%s: tcpdump stopped." % (url))
                pcap_indexes[url] = '%s-%s.pcap' % (file_name,
                    format(url_index, '04'))
                pcap_results[pcap_indexes[url]] = td.pcap()

            # Meta-data
            url_metadata_results[url] = meta

        result["http"] = http_results
        result["tls"] = tls_results
        result["dns"] = dns_results
        result["traceroute"] = traceroute_results

        # if we have an index row, we should turn URL metadata
        # into dictionaries
        if index_row is not None:
            indexed_url_metadata = {}
            for url, meta in url_metadata_results.items():
                    indexed_meta = {}
                    for i in range(1,len(index_row)):
                        indexed_meta[index_row[i]] = meta[i - 1]
                    indexed_url_metadata[url] = indexed_meta
                    indexed_url_metadata[url] = indexed_meta
            url_metadata_results = indexed_url_metadata

        result["url_metadata"] = url_metadata_results
        result["file_metadata"] = file_metadata
        result["file_comments"] = file_comments
        if self.record_pcaps:
            result['pcap_indexes'] = pcap_indexes
            self.external_results = dict(self.external_results.items() +

        return result
예제 #2
    def run_file(self, input_file):
        file_name, file_contents = input_file

        # Initialize the results for this input file.
        # This can be anything from file name to version
        # to any useful information.
        result = {'file_name': file_name}

        http_results = {}
        tls_results = {}
        dns_results = {}
        traceroute_results = {}
        url_metadata_results = {}
        file_metadata = {}
        file_comments = []

        # each pcap file is stored in a separate file
        # designated by a number. the indexes are stored
        # in the json file and the pcap files are stored
        # with their indexes as file names.
        pcap_results = {}
        pcap_indexes = {}
        url_index = 0
        index_row = None
        comments = ""

        # we may want to make this threaded and concurrent
        csvreader = csv.reader(file_contents, delimiter=',', quotechar='"')
        for row in csvreader:
            First few lines are expected to be comments in key: value
            format. The first line after that could be our column header
            row, starting with "url", and the rest are data rows.
            This is a sample input file we're trying to parse:

            # comment: Global List,,,,,
            # date: 03-17-2015,,,,,
            # version: 1,,,,,
            # description: This is the global list. Last updated in 2012.,,,,
            http://abpr2.railfan.net,glo,MISC,Pictures of trains,,PRIV


            # parse file comments, if it looks like "key : value",
            # parse it as a key-value pair. otherwise, just
            # store it as a raw comment.
            if row[0][0] == '#':
                row = row[0][1:].strip()
                if len(row.split(':')) > 1:
                    key, value = row.split(':', 1)
                    key = key.strip()
                    value = value.strip()
                    file_metadata[key] = value

            # detect the header row and store it
            # it is usually the first row and starts with "url,"
            if row[0].strip().lower() == "url":
                index_row = row

            url = row[0].strip()
            if url is None:

            meta = row[1:]
            url_index = url_index + 1
            http_ssl = False
            ssl_port = 443
            http_path = '/'

            # parse the URL to extract netlocation, HTTP path, domain name,
            # and HTTP method (SSL or plain)
                urlparse_object = urlparse.urlparse(url)
                http_netloc = urlparse_object.netloc

                # if netloc is not urlparse-able, add // to the start
                # of URL
                if http_netloc == '':
                    urlparse_object = urlparse.urlparse('//%s' % (url))
                    http_netloc = urlparse_object.netloc

                domain_name = http_netloc.split(':')[0]

                http_path = urlparse_object.path
                if http_path == '':
                    http_path = '/'

                # we assume scheme is either empty, or "http", or "https"
                # other schemes (e.g. "ftp") are out of the scope of this
                # measuremnt
                if urlparse_object.scheme == "https":
                    http_ssl = True
                    if len(http_netloc.split(':')) == 2:
                        ssl_port = http_netloc.split(':')[1]

            except Exception as exp:
                logging.warning("%s: failed to parse URL: %s" % (url, exp))
                http_netloc = url
                http_ssl = False
                ssl_port = 443
                http_path = '/'
                domain_name = url

            # start tcpdump
            td = Tcpdump()
            tcpdump_started = False

                if self.record_pcaps:
                    tcpdump_started = True
                    logging.info("%s: tcpdump started..." % (url))
                    # wait for tcpdump to initialize
            except Exception as exp:
                logging.warning("%s: tcpdump failed: %s" % (url, exp))

            # HTTP GET
            logging.info("%s: HTTP" % (url))
                http_results[url] = http.get_request(http_netloc,
            except Exception as exp:
                logging.warning("%s: HTTP test failed: %s" % (url, exp))
                http_results[url] = {"exception": str(exp)}

            # TLS certificate
            # this will only work if the URL starts with https://
            if http_ssl:
                    tls_result = {}
                    logging.info("%s: TLS certificate" % (domain_name))
                    fingerprint, cert = tls.get_fingerprint(
                        domain_name, ssl_port)
                    tls_result['port'] = ssl_port
                    tls_result['fingerprint'] = fingerprint
                    tls_result['cert'] = cert

                    tls_results[domain_name] = tls_result
                except Exception as exp:
                        "%s: TLS certfiticate download failed: %s" %
                        (domain_name, exp))
                    tls_results[domain_name] = {"exception": str(exp)}

            # DNS Lookup
            logging.info("%s: DNS" % (domain_name))
                dns_results[domain_name] = dnslib.lookup_domain(domain_name)
            except Exception as exp:
                logging.warning("%s: DNS lookup failed: %s" %
                                (domain_name, exp))
                dns_results[domain_name] = {"exception": str(exp)}

            # Traceroute
            for method in self.traceroute_methods:
                    logging.info("%s: Traceroute (%s)" %
                                 (domain_name, method.upper()))
                    traceroute_results[domain_name] = traceroute.traceroute(
                        domain_name, method=method)
                except Exception as exp:
                    logging.warning("%s: Traceroute (%s) failed: %s" %
                                    (domain_name, method.upper(), exp))
                    traceroute_results[domain_name] = {"exception": str(exp)}

            # end tcpdump
            if tcpdump_started:
                logging.info("%s: waiting for tcpdump..." % (url))
                # 2 seconds should be enough.
                logging.info("%s: tcpdump stopped." % (url))
                pcap_indexes[url] = '%s-%s.pcap' % (file_name,
                                                    format(url_index, '04'))
                pcap_results[pcap_indexes[url]] = td.pcap()

            # Meta-data
            url_metadata_results[url] = meta

        result["http"] = http_results
        result["tls"] = tls_results
        result["dns"] = dns_results
        result["traceroute"] = traceroute_results

        # if we have an index row, we should turn URL metadata
        # into dictionaries
        if index_row is not None:
            indexed_url_metadata = {}
            for url, meta in url_metadata_results.items():
                    indexed_meta = {}
                    for i in range(1, len(index_row)):
                        indexed_meta[index_row[i]] = meta[i - 1]
                    indexed_url_metadata[url] = indexed_meta
                    indexed_url_metadata[url] = indexed_meta
            url_metadata_results = indexed_url_metadata

        result["url_metadata"] = url_metadata_results
        result["file_metadata"] = file_metadata
        result["file_comments"] = file_comments
        if self.record_pcaps:
            result['pcap_indexes'] = pcap_indexes
            self.external_results = dict(self.external_results.items() +

        return result
예제 #3
파일: client.py 프로젝트: jakubd/centinel
    def run_exp(self, name, exp_config=None, schedule_name=None):
        if name not in self.experiments:
            logging.error("Experiment file %s not found! Skipping." % (name))
            Exp = self.experiments[name]
            results = {}

            results["meta"] = {}
                logging.debug("Getting metadata for experiment...")
                meta = self.get_meta()
                results["meta"] = meta
            except Exception as exception:
                logging.exception("Error fetching metadata for "
                                  "%s: %s" % (name, exception))
                results["meta_exception"] = str(exception)

            if schedule_name is not None:
                results["meta"]["schedule_name"] = schedule_name
                results["meta"]["schedule_name"] = name

            start_time = datetime.now()
            results["meta"]["client_time"] = start_time.isoformat()

            results["meta"]["centinel_version"] = centinel.__version__
            input_files = {}
            if exp_config is not None:
                if (('input_files' in exp_config) and
                        (exp_config['input_files'] is not None)):
                    for filename in exp_config['input_files']:
                        file_handle = self.load_input_file(filename)
                        if file_handle is not None:
                            input_files[filename] = file_handle
                if (('params' in exp_config) and
                        (exp_config['params'] is not None)):
                    Exp.params = exp_config['params']

            # if the experiment specifies a list of input file names,
            # load them. failing to load input files does not stop
            # experiment from running.
            if Exp.input_files is not None:
                for filename in Exp.input_files:
                    file_handle = self.load_input_file(filename)
                    if file_handle is not None:
                        input_files[filename] = file_handle
            # otherwise, fall back on [experiment name].txt
                input_files = self.load_input_file("%s.txt" % (name))

                # instantiate the experiment
                logging.debug("Initializing the experiment class for %s" % (name))
                exp = Exp(input_files)
            except Exception as exception:
                logging.exception("Error initializing %s: %s" % (name, exception))
                results["init_exception"] = str(exception)

            run_tcpdump = True

            if self.config['results']['record_pcaps'] is False:
                logging.info("Your configuration has disabled pcap "
                             "recording, tcpdump will not start.")
                run_tcpdump = False
                # disable this on the experiment too
                exp.record_pcaps = False

            if run_tcpdump and os.geteuid() != 0:
                logging.info("Centinel is not running as root, "
                             "tcpdump will not start.")
                run_tcpdump = False

            if run_tcpdump and Exp.overrides_tcpdump:
                logging.info("Experiment overrides tcpdump recording.")
                run_tcpdump = False

            td = Tcpdump()
            tcpdump_started = False

                if run_tcpdump:
                    tcpdump_started = True
                    logging.info("tcpdump started...")
                    # wait for tcpdump to initialize
            except Exception as exp:
                logging.exception("Failed to run tcpdump: %s" % (exp,))

                # run the experiment
            except Exception as exception:
                logging.exception("Error running %s: %s" % (name, exception))
                results["runtime_exception"] = str(exception)

            # save any external results that the experiment has generated
            # they could be anything that doesn't belong in the json file
            # (e.g. pcap files)
            # these should all be compressed with bzip2
            # the experiment is responsible for giving these a name and
            # keeping a list of files in the json results
            results_dir = self.config['dirs']['results_dir']
            if exp.external_results is not None:
                logging.debug("Writing external files for %s" % (name))
                for fname, fcontents in exp.external_results.items():
                    external_file_name = ("external_%s-%s-%s"
                                          ".bz2" % (name,
                    external_file_path = os.path.join(results_dir,
                        with open(external_file_path, 'w:bz2') as file_p:
                            data = bz2.compress(fcontents)
                            logging.debug("External file "
                                          "%s written successfully" % (fname))
                    except Exception as exp:
                        logging.exception("Failed to write external file:"
                                          "%s" % (exp))
                logging.debug("Finished writing external files for %s" % (name))

            if tcpdump_started:
                logging.info("Waiting for tcpdump to process packets...")
                # 5 seconds should be enough. this hasn't been tested on
                # a RaspberryPi or a Hummingboard i2
                logging.info("tcpdump stopped.")
                    pcap_file_name = ("pcap_%s-%s.pcap"
                                      ".bz2" % (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f")))
                    pcap_file_path = os.path.join(results_dir,

                    with open(pcap_file_path, 'w:bz2') as file_p:
                        data = bz2.compress(td.pcap())
                        logging.info("Saved pcap to "
                                     "%s." % (pcap_file_path))
                except Exception as exception:
                    logging.exception("Failed to write pcap file: %s" %

            # close input file handle(s)
            logging.debug("Closing input files for %s" % (name))
            if type(input_files) is dict:
                for file_name, file_handle in input_files.items():
            logging.debug("Input files closed for %s" % (name))

            logging.debug("Storing results for %s" % (name))
                results[name] = exp.results
            except Exception as exception:
                logging.exception("Error storing results for "
                                  "%s: %s" % (name, exception))
                results["results_exception"] = str(exception)

            end_time = datetime.now()
            time_taken = (end_time - start_time)
            results["meta"]["time_taken"] = time_taken.total_seconds()

            logging.info("%s took %s to finish." % (name, time_taken))

            logging.debug("Saving %s results to file" % (name))
                # Pretty printing results will increase file size, but files are
                # compressed before sending.
                result_file_path = self.get_result_file(name,
                result_file = bz2.BZ2File(result_file_path, "w")
                json.dump(results, result_file, indent=2,
                          separators=(',', ': '))
            except Exception as exception:
                logging.exception("Error saving results for "
                                  "%s to file: %s" % (name, exception))
                results["results_exception"] = str(exception)
            logging.debug("Done saving %s results to file" % (name))