Exemple #1
0
    def run_file(self, input_file):
        file_name, file_contents = input_file

        # Initialize the results for this input file.
        # This can be anything from file name to version
        # to any useful information.
        result = {'file_name': file_name}

        http_results = {}
        tls_results = {}
        dns_results = {}
        traceroute_results = {}
        url_metadata_results = {}
        file_metadata = {}
        file_comments = []

        # each pcap file is stored in a separate file
        # designated by a number. the indexes are stored
        # in the json file and the pcap files are stored
        # with their indexes as file names.
        pcap_results = {}
        pcap_indexes = {}
        url_index = 0
        index_row = None
        comments = ""

        # we may want to make this threaded and concurrent
        csvreader = csv.reader(file_contents, delimiter=',', quotechar='"')
        for row in csvreader:
            """
            First few lines are expected to be comments in key: value
            format. The first line after that could be our column header
            row, starting with "url", and the rest are data rows.
            This is a sample input file we're trying to parse:

            # comment: Global List,,,,,
            # date: 03-17-2015,,,,,
            # version: 1,,,,,
            # description: This is the global list. Last updated in 2012.,,,,
            url,country,category,description,rationale,provider
            http://8thstreetlatinas.com,glo,P**N,,,PRIV
            http://abpr2.railfan.net,glo,MISC,Pictures of trains,,PRIV

            """

            # parse file comments, if it looks like "key : value",
            # parse it as a key-value pair. otherwise, just
            # store it as a raw comment.
            if row[0][0] == '#':
                row = row[0][1:].strip()
                if len(row.split(':')) > 1:
                    key, value = row.split(':', 1)
                    key = key.strip()
                    value = value.strip()
                    file_metadata[key] = value
                else:
                    file_comments.append(row)
                continue

            # detect the header row and store it
            # it is usually the first row and starts with "url,"
            if row[0].strip().lower() == "url":
                index_row = row
                continue

            url = row[0].strip()
            if url is None:
                continue

            meta = row[1:]
            url_index = url_index + 1
            http_ssl = False
            ssl_port = 443
            http_path = '/'

            # parse the URL to extract netlocation, HTTP path, domain name,
            # and HTTP method (SSL or plain)
            try:
                urlparse_object = urlparse.urlparse(url)
                http_netloc = urlparse_object.netloc

                # if netloc is not urlparse-able, add // to the start
                # of URL
                if http_netloc == '':
                    urlparse_object = urlparse.urlparse('//%s' % (url))
                    http_netloc = urlparse_object.netloc

                domain_name = http_netloc.split(':')[0]

                http_path = urlparse_object.path
                if http_path == '':
                    http_path = '/'

                # we assume scheme is either empty, or "http", or "https"
                # other schemes (e.g. "ftp") are out of the scope of this
                # measuremnt
                if urlparse_object.scheme == "https":
                    http_ssl = True
                    if len(http_netloc.split(':')) == 2:
                        ssl_port = http_netloc.split(':')[1]

            except Exception as exp:
                logging.warning("%s: failed to parse URL: %s" % (url, exp))
                http_netloc = url
                http_ssl = False
                ssl_port = 443
                http_path = '/'
                domain_name = url

            # start tcpdump
            td = Tcpdump()
            tcpdump_started = False

            try:
                if self.record_pcaps:
                    td.start()
                    tcpdump_started = True
                    logging.info("%s: tcpdump started..." % (url))
                    # wait for tcpdump to initialize
                    time.sleep(1)
            except Exception as exp:
                logging.warning("%s: tcpdump failed: %s" % (url, exp))

            # HTTP GET
            logging.info("%s: HTTP" % (url))
            try:
                http_results[url] = http.get_request(http_netloc,
                                                     http_path,
                                                     ssl=http_ssl)
            except Exception as exp:
                logging.warning("%s: HTTP test failed: %s" % (url, exp))
                http_results[url] = {"exception": str(exp)}

            # TLS certificate
            # this will only work if the URL starts with https://
            if http_ssl:
                try:
                    tls_result = {}
                    logging.info("%s: TLS certificate" % (domain_name))
                    fingerprint, cert = tls.get_fingerprint(
                        domain_name, ssl_port)
                    tls_result['port'] = ssl_port
                    tls_result['fingerprint'] = fingerprint
                    tls_result['cert'] = cert

                    tls_results[domain_name] = tls_result
                except Exception as exp:
                    logging.warning(
                        "%s: TLS certfiticate download failed: %s" %
                        (domain_name, exp))
                    tls_results[domain_name] = {"exception": str(exp)}

            # DNS Lookup
            logging.info("%s: DNS" % (domain_name))
            try:
                dns_results[domain_name] = dnslib.lookup_domain(domain_name)
            except Exception as exp:
                logging.warning("%s: DNS lookup failed: %s" %
                                (domain_name, exp))
                dns_results[domain_name] = {"exception": str(exp)}

            # Traceroute
            for method in self.traceroute_methods:
                try:
                    logging.info("%s: Traceroute (%s)" %
                                 (domain_name, method.upper()))
                    traceroute_results[domain_name] = traceroute.traceroute(
                        domain_name, method=method)
                except Exception as exp:
                    logging.warning("%s: Traceroute (%s) failed: %s" %
                                    (domain_name, method.upper(), exp))
                    traceroute_results[domain_name] = {"exception": str(exp)}

            # end tcpdump
            if tcpdump_started:
                logging.info("%s: waiting for tcpdump..." % (url))
                # 2 seconds should be enough.
                time.sleep(2)
                td.stop()
                logging.info("%s: tcpdump stopped." % (url))
                pcap_indexes[url] = '%s-%s.pcap' % (file_name,
                                                    format(url_index, '04'))
                pcap_results[pcap_indexes[url]] = td.pcap()

            # Meta-data
            url_metadata_results[url] = meta

        result["http"] = http_results
        result["tls"] = tls_results
        result["dns"] = dns_results
        result["traceroute"] = traceroute_results

        # if we have an index row, we should turn URL metadata
        # into dictionaries
        if index_row is not None:
            indexed_url_metadata = {}
            for url, meta in url_metadata_results.items():
                try:
                    indexed_meta = {}
                    for i in range(1, len(index_row)):
                        indexed_meta[index_row[i]] = meta[i - 1]
                    indexed_url_metadata[url] = indexed_meta
                except:
                    indexed_url_metadata[url] = indexed_meta
                    continue
            url_metadata_results = indexed_url_metadata

        result["url_metadata"] = url_metadata_results
        result["file_metadata"] = file_metadata
        result["file_comments"] = file_comments
        if self.record_pcaps:
            result['pcap_indexes'] = pcap_indexes
            self.external_results = dict(self.external_results.items() +
                                         pcap_results.items())

        return result
Exemple #2
0
    def run_exp(self, name, exp_config=None, schedule_name=None):
        if name[-3:] == ".py":
            name = name[:-3]
        if name not in self.experiments:
            logging.error("Experiment file %s not found! Skipping." % name)
        else:
            exp_class = self.experiments[name]
            results = {"meta": {}}
            try:
                logging.debug("Getting metadata for experiment...")
                meta = self.get_meta()
                results["meta"] = meta
            except Exception as exception:
                logging.exception("Error fetching metadata for "
                                  "%s: %s" % (name, exception))
                results["meta_exception"] = str(exception)

            if schedule_name is not None:
                results["meta"]["schedule_name"] = schedule_name
            else:
                results["meta"]["schedule_name"] = name

            start_time = datetime.now()
            results["meta"]["client_time"] = start_time.isoformat()

            results["meta"]["centinel_version"] = centinel.__version__

            # include vpn provider in metadata
            if self.vpn_provider:
                results["meta"]["vpn_provider"] = self.vpn_provider

            input_files = {}
            if exp_config is not None:
                if (('input_files' in exp_config)
                        and (exp_config['input_files'] is not None)):
                    for filename in exp_config['input_files']:
                        file_handle = self.load_input_file(filename)
                        if file_handle is not None:
                            input_files[filename] = file_handle

                if (('params' in exp_config)
                        and (exp_config['params'] is not None)):
                    exp_class.params = exp_config['params']

            # if the scheduler does not specify input files, but
            # the experiment class specifies a list of input file names,
            # load them. failing to load input files does not stop
            # experiment from running.
            if len(input_files) == 0:
                if exp_class.input_files is not None:
                    for filename in exp_class.input_files:
                        file_handle = self.load_input_file(filename)
                        if file_handle is not None:
                            input_files[filename] = file_handle
                # otherwise, fall back to [schedule name].txt (deprecated)
                else:
                    filename = "%s.txt" % name
                    file_handle = self.load_input_file(filename)
                    if file_handle is not None:
                        input_files[filename] = file_handle

            try:
                # instantiate the experiment
                logging.debug("Initializing the experiment class for %s" %
                              name)

                # these constants can be useful for some experiments, but it is not
                # encouraged to use these directly
                global_constants = {
                    'experiments_dir': self.config['dirs']['experiments_dir'],
                    'results_dir': self.config['dirs']['results_dir'],
                    'data_dir': self.config['dirs']['data_dir']
                }

                exp_class.global_constants = global_constants

                exp = exp_class(input_files)
            except Exception as exception:
                logging.exception("Error initializing %s: %s" %
                                  (name, exception))
                results["init_exception"] = str(exception)
                return

            exp.global_constants = global_constants

            run_tcpdump = True

            if self.config['results']['record_pcaps'] is False:
                logging.info("Your configuration has disabled pcap "
                             "recording, tcpdump will not start.")
                run_tcpdump = False
                # disable this on the experiment too
                exp.record_pcaps = False

            if run_tcpdump and os.geteuid() != 0:
                logging.info("Centinel is not running as root, "
                             "tcpdump will not start.")
                run_tcpdump = False

            if run_tcpdump and exp_class.overrides_tcpdump:
                logging.info("Experiment overrides tcpdump recording.")
                run_tcpdump = False

            tcpdump_started = False

            try:
                if run_tcpdump:
                    td = Tcpdump()
                    tds.append(td)
                    td.start()
                    tcpdump_started = True
                    logging.info("tcpdump started...")
                    # wait for tcpdump to initialize
                    time.sleep(2)
            except Exception as exp:
                logging.exception("Failed to run tcpdump: %s" % (exp, ))

            try:
                # run the experiment
                exp.run()
            except Exception as exception:
                logging.exception("Error running %s: %s" % (name, exception))
                results["runtime_exception"] = str(exception)
            except KeyboardInterrupt:
                logging.warn(
                    "Keyboard interrupt received, stopping experiment...")

            # save any external results that the experiment has generated
            # they could be anything that doesn't belong in the json file
            # (e.g. pcap files)
            # these should all be compressed with bzip2
            # the experiment is responsible for giving these a name and
            # keeping a list of files in the json results
            results_dir = self.config['dirs']['results_dir']
            if exp.external_results is not None:
                logging.debug("Writing external files for %s" % name)
                for fname, fcontents in exp.external_results.items():
                    external_file_name = (
                        "external_%s-%s-%s"
                        ".bz2" %
                        (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f"),
                         fname))
                    external_file_path = os.path.join(results_dir,
                                                      external_file_name)
                    try:
                        with open(external_file_path, 'w:bz2') as file_p:
                            data = bz2.compress(fcontents)
                            file_p.write(data)
                            logging.debug("External file "
                                          "%s written successfully" % fname)
                    except Exception as exp:
                        logging.exception("Failed to write external file:"
                                          "%s" % exp)
                logging.debug("Finished writing external files for %s" % name)

            if tcpdump_started:
                logging.info("Waiting for tcpdump to process packets...")
                # 5 seconds should be enough. this hasn't been tested on
                # a RaspberryPi or a Hummingboard i2
                time.sleep(5)
                td.stop()
                logging.info("tcpdump stopped.")
                bz2_successful = False
                data = None
                try:
                    pcap_file_name = (
                        "pcap_%s-%s.pcap"
                        ".bz2" %
                        (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f")))
                    pcap_file_path = os.path.join(results_dir, pcap_file_name)

                    with open(pcap_file_path,
                              'wb') as pcap_bz2, open(td.pcap_filename(),
                                                      'rb') as pcap:
                        compressor = bz2.BZ2Compressor()
                        compressed_size_so_far = 0
                        for pcap_data in iter(lambda: pcap.read(10 * 1024),
                                              b''):
                            compressed_chunk = compressor.compress(pcap_data)
                            pcap_bz2.write(compressed_chunk)

                            if len(compressed_chunk):
                                compressed_size_so_far += len(compressed_chunk)

                        compressed_chunk = compressor.flush()
                        pcap_bz2.write(compressed_chunk)

                        if len(compressed_chunk):
                            compressed_size_so_far += len(compressed_chunk)
                        uncompressed_size = os.path.getsize(td.pcap_filename())
                        compression_ratio = 100 * (
                            float(compressed_size_so_far) /
                            float(uncompressed_size))
                        logging.debug(
                            "pcap BZ2 compression: compressed/uncompressed (ratio):"
                            " %d/%d (%.1f%%)" %
                            (compressed_size_so_far, uncompressed_size,
                             compression_ratio))

                    logging.info("Saved pcap to " "%s." % pcap_file_path)
                    bz2_successful = True
                except Exception as exception:
                    logging.exception("Failed to compress and write "
                                      "pcap file: %s" % exception)
                if not bz2_successful:
                    logging.info("Writing pcap file uncompressed")
                    try:
                        pcap_file_name = (
                            "pcap_%s-%s"
                            ".pcap" %
                            (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f")))
                        pcap_file_path = os.path.join(results_dir,
                                                      pcap_file_name)

                        with open(pcap_file_path, 'wb') as pcap_out, open(
                                td.pcap_filename(), 'rb') as pcap:
                            for pcap_data in iter(lambda: pcap.read(10 * 1024),
                                                  b''):
                                pcap_out.write(pcap_data)

                        logging.info("Saved pcap to " "%s." % pcap_file_path)
                    except Exception as exception:
                        logging.exception("Failed to write "
                                          "pcap file: %s" % exception)
                # delete pcap data to free up some memory
                logging.debug("Removing pcap data from memory")
                td.delete()
                del data
                del td

            # close input file handle(s)
            logging.debug("Closing input files for %s" % name)
            if type(input_files) is dict:
                for file_name, file_handle in input_files.items():
                    try:
                        file_handle.close()
                    except AttributeError:
                        logging.warning("Closing %s failed" % file_name)
            logging.debug("Input files closed for %s" % name)

            logging.debug("Storing results for %s" % name)
            try:
                results[name] = exp.results
            except Exception as exception:
                logging.exception("Error storing results for "
                                  "%s: %s" % (name, exception))
                if "results_exception" not in results:
                    results["results_exception"] = {}

                results["results_exception"][name] = str(exception)

            end_time = datetime.now()
            time_taken = (end_time - start_time)
            results["meta"]["time_taken"] = time_taken.total_seconds()

            logging.info("%s took %s to finish." % (name, time_taken))

            logging.debug("Saving %s results to file" % name)
            try:
                # pretty printing results will increase file size, but files are
                # compressed before sending.
                result_file_path = self\
                    .get_result_file(name, start_time.strftime("%Y-%m-%dT%H%M%S.%f"))
                result_file = bz2.BZ2File(result_file_path, "w")
                json.dump(
                    results,
                    result_file,
                    indent=2,
                    separators=(',', ': '),
                    # ignore encoding errors, these will be dealt with on the server
                    ensure_ascii=False)
                result_file.close()

                # free up memory by deleting results from memory
                del results
                del result_file
            except Exception as exception:
                logging.exception("Error saving results for "
                                  "%s to file: %s" % (name, exception))
            logging.debug("Done saving %s results to file" % name)
Exemple #3
0
    def run_exp(self, name, exp_config=None, schedule_name=None):
        if name not in self.experiments:
            logging.error("Experiment file %s not found! Skipping." % (name))
        else:
            Exp = self.experiments[name]
            results = {}

            results["meta"] = {}
            try:
                logging.debug("Getting metadata for experiment...")
                meta = self.get_meta()
                results["meta"] = meta
            except Exception as exception:
                logging.exception("Error fetching metadata for "
                                  "%s: %s" % (name, exception))
                results["meta_exception"] = str(exception)

            if schedule_name is not None:
                results["meta"]["schedule_name"] = schedule_name
            else:
                results["meta"]["schedule_name"] = name

            start_time = datetime.now()
            results["meta"]["client_time"] = start_time.isoformat()

            results["meta"]["centinel_version"] = centinel.__version__
            input_files = {}
            if exp_config is not None:
                if (('input_files' in exp_config) and
                        (exp_config['input_files'] is not None)):
                    for filename in exp_config['input_files']:
                        file_handle = self.load_input_file(filename)
                        if file_handle is not None:
                            input_files[filename] = file_handle
                if (('params' in exp_config) and
                        (exp_config['params'] is not None)):
                    Exp.params = exp_config['params']

            # if the experiment specifies a list of input file names,
            # load them. failing to load input files does not stop
            # experiment from running.
            if Exp.input_files is not None:
                for filename in Exp.input_files:
                    file_handle = self.load_input_file(filename)
                    if file_handle is not None:
                        input_files[filename] = file_handle
            # otherwise, fall back on [experiment name].txt
            else:
                input_files = self.load_input_file("%s.txt" % (name))

            try:
                # instantiate the experiment
                logging.debug("Initializing the experiment class for %s" % (name))
                exp = Exp(input_files)
            except Exception as exception:
                logging.exception("Error initializing %s: %s" % (name, exception))
                results["init_exception"] = str(exception)
                return

            run_tcpdump = True

            if self.config['results']['record_pcaps'] is False:
                logging.info("Your configuration has disabled pcap "
                             "recording, tcpdump will not start.")
                run_tcpdump = False
                # disable this on the experiment too
                exp.record_pcaps = False

            if run_tcpdump and os.geteuid() != 0:
                logging.info("Centinel is not running as root, "
                             "tcpdump will not start.")
                run_tcpdump = False

            if run_tcpdump and Exp.overrides_tcpdump:
                logging.info("Experiment overrides tcpdump recording.")
                run_tcpdump = False

            td = Tcpdump()
            tcpdump_started = False

            try:
                if run_tcpdump:
                    td.start()
                    tcpdump_started = True
                    logging.info("tcpdump started...")
                    # wait for tcpdump to initialize
                    time.sleep(2)
            except Exception as exp:
                logging.exception("Failed to run tcpdump: %s" % (exp,))

            try:
                # run the experiment
                exp.run()
            except Exception as exception:
                logging.exception("Error running %s: %s" % (name, exception))
                results["runtime_exception"] = str(exception)

            # save any external results that the experiment has generated
            # they could be anything that doesn't belong in the json file
            # (e.g. pcap files)
            # these should all be compressed with bzip2
            # the experiment is responsible for giving these a name and
            # keeping a list of files in the json results
            results_dir = self.config['dirs']['results_dir']
            if exp.external_results is not None:
                logging.debug("Writing external files for %s" % (name))
                for fname, fcontents in exp.external_results.items():
                    external_file_name = ("external_%s-%s-%s"
                                          ".bz2" % (name,
                                                    start_time.strftime("%Y-%m-%dT%H%M%S.%f"),
                                                    fname))
                    external_file_path = os.path.join(results_dir,
                                                      external_file_name)
                    try:
                        with open(external_file_path, 'w:bz2') as file_p:
                            data = bz2.compress(fcontents)
                            file_p.write(data)
                            logging.debug("External file "
                                          "%s written successfully" % (fname))
                    except Exception as exp:
                        logging.exception("Failed to write external file:"
                                          "%s" % (exp))
                logging.debug("Finished writing external files for %s" % (name))

            if tcpdump_started:
                logging.info("Waiting for tcpdump to process packets...")
                # 5 seconds should be enough. this hasn't been tested on
                # a RaspberryPi or a Hummingboard i2
                time.sleep(5)
                td.stop()
                logging.info("tcpdump stopped.")
                try:
                    pcap_file_name = ("pcap_%s-%s.pcap"
                                      ".bz2" % (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f")))
                    pcap_file_path = os.path.join(results_dir,
                                                  pcap_file_name)

                    with open(pcap_file_path, 'w:bz2') as file_p:
                        data = bz2.compress(td.pcap())
                        file_p.write(data)
                        logging.info("Saved pcap to "
                                     "%s." % (pcap_file_path))
                except Exception as exception:
                    logging.exception("Failed to write pcap file: %s" %
                                      (exception))

            # close input file handle(s)
            logging.debug("Closing input files for %s" % (name))
            if type(input_files) is dict:
                for file_name, file_handle in input_files.items():
                    file_handle.close()
            else:
                input_files.close()
            logging.debug("Input files closed for %s" % (name))

            logging.debug("Storing results for %s" % (name))
            try:
                results[name] = exp.results
            except Exception as exception:
                logging.exception("Error storing results for "
                                  "%s: %s" % (name, exception))
                results["results_exception"] = str(exception)

            end_time = datetime.now()
            time_taken = (end_time - start_time)
            results["meta"]["time_taken"] = time_taken.total_seconds()

            logging.info("%s took %s to finish." % (name, time_taken))

            logging.debug("Saving %s results to file" % (name))
            try:
                # Pretty printing results will increase file size, but files are
                # compressed before sending.
                result_file_path = self.get_result_file(name,
                                                        start_time.strftime("%Y-%m-%dT%H%M%S.%f"))
                result_file = bz2.BZ2File(result_file_path, "w")
                json.dump(results, result_file, indent=2,
                          separators=(',', ': '))
                result_file.close()
            except Exception as exception:
                logging.exception("Error saving results for "
                                  "%s to file: %s" % (name, exception))
                results["results_exception"] = str(exception)
            logging.debug("Done saving %s results to file" % (name))
Exemple #4
0
    def run_file(self, input_file):
        file_name, file_contents = input_file

        # Initialize the results for this input file.
        # This can be anything from file name to version
        # to any useful information.
        result = {}
        result["file_name"] = file_name


        http_results = {}
        tls_results = {}
        dns_results = {}
        traceroute_results = {}
        url_metadata_results = {}
        file_metadata = {}
        file_comments = []

        # each pcap file is stored in a separate file
        # designated by a number. the indexes are stored
        # in the json file and the pcap files are stored
        # with their indexes as file names.
        pcap_results = {}
        pcap_indexes = {}
        url_index = 0
        index_row = None
        comments = ""

        # we may want to make this threaded and concurrent
        csvreader = csv.reader(file_contents, delimiter=',', quotechar='"')
        for row in csvreader:
            """
            First few lines are expected to be comments in key: value
            format. The first line after that could be our column header
            row, starting with "url", and the rest are data rows.
            This is a sample input file we're trying to parse:

            # comment: Global List,,,,,
            # date: 03-17-2015,,,,,
            # version: 1,,,,,
            # description: This is the global list. Last updated in 2012.,,,,
            url,country,category,description,rationale,provider
            http://8thstreetlatinas.com,glo,P**N,,,PRIV
            http://abpr2.railfan.net,glo,MISC,Pictures of trains,,PRIV

            """

            # parse file comments, if it looks like "key : value",
            # parse it as a key-value pair. otherwise, just
            # store it as a raw comment.
            if row[0][0] == '#':
                row = row[0][1:].strip()
                if len(row.split(':')) > 1:
                    key, value = row.split(':', 1)
                    key = key.strip()
                    value = value.strip()
                    file_metadata[key] = value
                else:
                    file_comments.append(row)
                continue

            # detect the header row and store it
            # it is usually the first row and starts with "url,"
            if row[0].strip().lower() == "url":
                index_row = row
                continue

            url = row[0].strip()
            if url is None:
                continue

            meta = row[1:]
            url_index = url_index + 1
            http_ssl = False
            ssl_port = 443
            http_path = '/'

            # parse the URL to extract netlocation, HTTP path, domain name,
            # and HTTP method (SSL or plain)
            try:
                urlparse_object = urlparse.urlparse(url)
                http_netloc = urlparse_object.netloc

                # if netloc is not urlparse-able, add // to the start
                # of URL
                if http_netloc == '':
                    urlparse_object = urlparse.urlparse('//%s' % (url))
                    http_netloc = urlparse_object.netloc

                domain_name = http_netloc.split(':')[0]

                http_path = urlparse_object.path
                if http_path == '':
                    http_path = '/'

                # we assume scheme is either empty, or "http", or "https"
                # other schemes (e.g. "ftp") are out of the scope of this
                # measuremnt
                if urlparse_object.scheme == "https":
                    http_ssl = True
                    if len(http_netloc.split(':')) == 2:
                        ssl_port = http_netloc.split(':')[1]

            except Exception as exp:
                logging.warning("%s: failed to parse URL: %s" % (url, exp))
                http_netloc = url
                http_ssl    = False
                ssl_port = 443
                http_path   = '/'
                domain_name = url

            # start tcpdump
            td = Tcpdump()
            tcpdump_started = False

            try:
                if self.record_pcaps:
                    td.start()
                    tcpdump_started = True
                    logging.info("%s: tcpdump started..." % (url))
                    # wait for tcpdump to initialize
                    time.sleep(1)
            except Exception as exp:
                logging.warning("%s: tcpdump failed: %s" % (url, exp))

            # HTTP GET
            logging.info("%s: HTTP" % (url))
            try:
                http_results[url] = http.get_request(http_netloc,
                                                     http_path,
                                                     ssl=http_ssl)
            except Exception as exp:
                logging.info("%s: HTTP test failed: %s" %
                             (url, exp))
                http_results[url] = { "exception" : str(exp) }

            # TLS certificate
            # this will only work if the URL starts with https://
            if http_ssl:
                try:
                    tls_result = {}
                    logging.info("%s: TLS certificate" %
                                 (domain_name))
                    fingerprint, cert = tls.get_fingerprint(domain_name, ssl_port)
                    tls_result['port'] = ssl_port
                    tls_result['fingerprint'] = fingerprint
                    tls_result['cert'] = cert

                    tls_results[domain_name] = tls_result
                except Exception as exp:
                    logging.info("%s: TLS certfiticate download failed: %s" %
                                 (domain_name, exp))
                    tls_results[domain_name] = { "exception" : str(exp) }

            # DNS Lookup
            logging.info("%s: DNS" % (domain_name))
            try:
                dns_results[domain_name] = dnslib.lookup_domain(domain_name)
            except Exception as exp:
                logging.info("%s: DNS lookup failed: %s" %
                             (domain_name, exp))
                dns_results[domain_name] = { "exception" : str(exp) }

            # Traceroute
            for method in self.traceroute_methods:
                try:
                    logging.info("%s: Traceroute (%s)"
                                 % (domain_name, method.upper()))
                    traceroute_results[domain_name] = traceroute.traceroute(
                        domain_name, method=method)
                except Exception as exp:
                    logging.info("%s: Traceroute (%s) failed: %s" %
                                    (domain_name, method.upper(), exp))
                    traceroute_results[domain_name] = {
                        "exception" : str(exp) }

            # end tcpdump
            if tcpdump_started:
                logging.info("%s: waiting for tcpdump..." % (url))
                # 2 seconds should be enough.
                time.sleep(2)
                td.stop()
                logging.info("%s: tcpdump stopped." % (url))
                pcap_indexes[url] = '%s-%s.pcap' % (file_name,
                    format(url_index, '04'))
                pcap_results[pcap_indexes[url]] = td.pcap()

            # Meta-data
            url_metadata_results[url] = meta

        result["http"] = http_results
        result["tls"] = tls_results
        result["dns"] = dns_results
        result["traceroute"] = traceroute_results

        # if we have an index row, we should turn URL metadata
        # into dictionaries
        if index_row is not None:
            indexed_url_metadata = {}
            for url, meta in url_metadata_results.items():
                try:
                    indexed_meta = {}
                    for i in range(1,len(index_row)):
                        indexed_meta[index_row[i]] = meta[i - 1]
                    indexed_url_metadata[url] = indexed_meta
                except:
                    indexed_url_metadata[url] = indexed_meta
                    continue
            url_metadata_results = indexed_url_metadata

        result["url_metadata"] = url_metadata_results
        result["file_metadata"] = file_metadata
        result["file_comments"] = file_comments
        if self.record_pcaps:
            result['pcap_indexes'] = pcap_indexes
            self.external_results = dict(self.external_results.items() +
                                         pcap_results.items())

        return result
Exemple #5
0
    def run_exp(self, name, exp_config=None, schedule_name=None):
        if name[-3:] == ".py":
            name = name[:-3]
        if name not in self.experiments:
            logging.error("Experiment file %s not found! Skipping." % name)
        else:
            exp_class = self.experiments[name]
            results = {"meta": {}}
            try:
                logging.debug("Getting metadata for experiment...")
                meta = self.get_meta()
                results["meta"] = meta
            except Exception as exception:
                logging.exception("Error fetching metadata for "
                                  "%s: %s" % (name, exception))
                results["meta_exception"] = str(exception)

            if schedule_name is not None:
                results["meta"]["schedule_name"] = schedule_name
            else:
                results["meta"]["schedule_name"] = name

            start_time = datetime.now()
            results["meta"]["client_time"] = start_time.isoformat()

            results["meta"]["centinel_version"] = centinel.__version__

            # include vpn provider in metadata
            if self.vpn_provider:
                results["meta"]["vpn_provider"] = self.vpn_provider

            input_files = {}
            if exp_config is not None:
                if (('input_files' in exp_config) and
                        (exp_config['input_files'] is not None)):
                    for filename in exp_config['input_files']:
                        file_handle = self.load_input_file(filename)
                        if file_handle is not None:
                            input_files[filename] = file_handle

                if (('params' in exp_config) and
                        (exp_config['params'] is not None)):
                    exp_class.params = exp_config['params']

            # if the scheduler does not specify input files, but
            # the experiment class specifies a list of input file names,
            # load them. failing to load input files does not stop
            # experiment from running.
            if len(input_files) == 0:
                if exp_class.input_files is not None:
                    for filename in exp_class.input_files:
                        file_handle = self.load_input_file(filename)
                        if file_handle is not None:
                            input_files[filename] = file_handle
                # otherwise, fall back to [schedule name].txt (deprecated)
                else:
                    filename = "%s.txt" % name
                    file_handle = self.load_input_file(filename)
                    if file_handle is not None:
                        input_files[filename] = file_handle

            try:
                # instantiate the experiment
                logging.debug("Initializing the experiment class for %s" % name)

                # these constants can be useful for some experiments, but it is not
                # encouraged to use these directly
                global_constants = {'experiments_dir': self.config['dirs']['experiments_dir'],
                                    'results_dir': self.config['dirs']['results_dir'],
                                    'data_dir': self.config['dirs']['data_dir']}

                exp_class.global_constants = global_constants

                exp = exp_class(input_files)
            except Exception as exception:
                logging.exception("Error initializing %s: %s" % (name, exception))
                results["init_exception"] = str(exception)
                return

            exp.global_constants = global_constants

            run_tcpdump = True

            if self.config['results']['record_pcaps'] is False:
                logging.info("Your configuration has disabled pcap "
                             "recording, tcpdump will not start.")
                run_tcpdump = False
                # disable this on the experiment too
                exp.record_pcaps = False

            if run_tcpdump and os.geteuid() != 0:
                logging.info("Centinel is not running as root, "
                             "tcpdump will not start.")
                run_tcpdump = False

            if run_tcpdump and exp_class.overrides_tcpdump:
                logging.info("Experiment overrides tcpdump recording.")
                run_tcpdump = False

            tcpdump_started = False

            try:
                if run_tcpdump:
                    td = Tcpdump()
                    tds.append(td)
                    td.start()
                    tcpdump_started = True
                    logging.info("tcpdump started...")
                    # wait for tcpdump to initialize
                    time.sleep(2)
            except Exception as exp:
                logging.exception("Failed to run tcpdump: %s" % (exp,))

            try:
                # run the experiment
                exp.run()
            except Exception as exception:
                logging.exception("Error running %s: %s" % (name, exception))
                results["runtime_exception"] = str(exception)
            except KeyboardInterrupt:
                logging.warn("Keyboard interrupt received, stopping experiment...")


            # save any external results that the experiment has generated
            # they could be anything that doesn't belong in the json file
            # (e.g. pcap files)
            # these should all be compressed with bzip2
            # the experiment is responsible for giving these a name and
            # keeping a list of files in the json results
            results_dir = self.config['dirs']['results_dir']
            if exp.external_results is not None:
                logging.debug("Writing external files for %s" % name)
                for fname, fcontents in exp.external_results.items():
                    external_file_name = ("external_%s-%s-%s"
                                          ".bz2" % (name,
                                                    start_time.strftime("%Y-%m-%dT%H%M%S.%f"),
                                                    fname))
                    external_file_path = os.path.join(results_dir,
                                                      external_file_name)
                    try:
                        with open(external_file_path, 'w:bz2') as file_p:
                            data = bz2.compress(fcontents)
                            file_p.write(data)
                            logging.debug("External file "
                                          "%s written successfully" % fname)
                    except Exception as exp:
                        logging.exception("Failed to write external file:"
                                          "%s" % exp)
                logging.debug("Finished writing external files for %s" % name)

            if tcpdump_started:
                logging.info("Waiting for tcpdump to process packets...")
                # 5 seconds should be enough. this hasn't been tested on
                # a RaspberryPi or a Hummingboard i2
                time.sleep(5)
                td.stop()
                logging.info("tcpdump stopped.")
                bz2_successful = False
                data = None
                try:
                    pcap_file_name = ("pcap_%s-%s.pcap"
                                      ".bz2" % (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f")))
                    pcap_file_path = os.path.join(results_dir,
                                                  pcap_file_name)

                    with open(pcap_file_path, 'wb') as pcap_bz2, open(td.pcap_filename(), 'rb') as pcap:
                        compressor = bz2.BZ2Compressor()
                        compressed_size_so_far = 0
                        for pcap_data in iter(lambda: pcap.read(10 * 1024), b''):
                            compressed_chunk  = compressor.compress(pcap_data)
                            pcap_bz2.write(compressed_chunk)

                            if len(compressed_chunk):
                                compressed_size_so_far += len(compressed_chunk)

                        compressed_chunk = compressor.flush()
                        pcap_bz2.write(compressed_chunk)

                        if len(compressed_chunk):
                            compressed_size_so_far += len(compressed_chunk)
                        uncompressed_size = os.path.getsize(td.pcap_filename())
                        compression_ratio = 100 * (float(compressed_size_so_far) / float(uncompressed_size))
                        logging.debug("pcap BZ2 compression: compressed/uncompressed (ratio):"
                                      " %d/%d (%.1f%%)" % (compressed_size_so_far, uncompressed_size, compression_ratio))

                    logging.info("Saved pcap to "
                                 "%s." % pcap_file_path)
                    bz2_successful = True
                except Exception as exception:
                    logging.exception("Failed to compress and write "
                                      "pcap file: %s" % exception)
                if not bz2_successful:
                    logging.info("Writing pcap file uncompressed")
                    try:
                        pcap_file_name = ("pcap_%s-%s"
                                          ".pcap" % (name, start_time.strftime("%Y-%m-%dT%H%M%S.%f")))
                        pcap_file_path = os.path.join(results_dir,
                                                      pcap_file_name)

                        with open(pcap_file_path, 'wb') as pcap_out, open(td.pcap_filename(), 'rb') as pcap:
                            for pcap_data in iter(lambda: pcap.read(10 * 1024), b''):
                                pcap_out.write(pcap_data)

                        logging.info("Saved pcap to "
                                     "%s." % pcap_file_path)
                    except Exception as exception:
                        logging.exception("Failed to write "
                                          "pcap file: %s" % exception)
                # delete pcap data to free up some memory
                logging.debug("Removing pcap data from memory")
                td.delete()
                del data
                del td

            # close input file handle(s)
            logging.debug("Closing input files for %s" % name)
            if type(input_files) is dict:
                for file_name, file_handle in input_files.items():
                    try:
                        file_handle.close()
                    except AttributeError:
                        logging.warning("Closing %s failed" % file_name)
            logging.debug("Input files closed for %s" % name)

            logging.debug("Storing results for %s" % name)
            try:
                results[name] = exp.results
            except Exception as exception:
                logging.exception("Error storing results for "
                                  "%s: %s" % (name, exception))
                if "results_exception" not in results:
                    results["results_exception"] = {}

                results["results_exception"][name] = str(exception)

            end_time = datetime.now()
            time_taken = (end_time - start_time)
            results["meta"]["time_taken"] = time_taken.total_seconds()

            logging.info("%s took %s to finish." % (name, time_taken))

            logging.debug("Saving %s results to file" % name)
            try:
                # pretty printing results will increase file size, but files are
                # compressed before sending.
                result_file_path = self\
                    .get_result_file(name, start_time.strftime("%Y-%m-%dT%H%M%S.%f"))
                result_file = bz2.BZ2File(result_file_path, "w")
                json.dump(results, result_file, indent=2, separators=(',', ': '),
                    # ignore encoding errors, these will be dealt with on the server
                    ensure_ascii=False)
                result_file.close()

                # free up memory by deleting results from memory
                del results
                del result_file
            except Exception as exception:
                logging.exception("Error saving results for "
                                  "%s to file: %s" % (name, exception))
            logging.debug("Done saving %s results to file" % name)