Example #1
0
class HTTPScanningEngine(ANPNodeEngine, MySQLCollectionEngine, Engine): # XXX do I need to specify Engine here?

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        # if set to True then we don't delete the work directories
        self.keep_work_dir = False

        # the location of the incoming http streams
        self.bro_http_dir = os.path.join(saq.SAQ_HOME, self.config['bro_http_dir'])

        # the list of streams (connection ids) that we need to process
        self.stream_list = collections.deque()

        # http whitelist
        self.whitelist = None

        # path to the whitelist file
        self.whitelist_path = os.path.join(saq.SAQ_HOME, self.config['whitelist_path'])

    @property
    def name(self):
        return 'http_scanner'

    def initialize_collection(self, *args, **kwargs):
        # before we start collecting, make sure that everything in our local directory
        # has a matching entry in the workload database
        # TODO
        pass

        super().initialize_collection(*args, **kwargs)

    def anp_command_handler(self, anp, command):
        """Handle inbound ANP commands from remote http engines."""

        if command.command == ANP_COMMAND_COPY_FILE:
            anp.send_message(ANPCommandOK())
        elif command.command == ANP_COMMAND_PROCESS:
            self.add_sql_work_item(command.target)
            anp.send_message(ANPCommandOK())
        else:
            self.default_command_handler(anp, command)

    def get_next_stream(self):
        """Returns the next HTTP stream to be processed or None if nothing is available to be processed."""
        # do we have a list yet?
        if len(self.stream_list) == 0:
            for file_name in os.listdir(self.bro_http_dir):
                m = REGEX_CONNECTION_ID.match(file_name)
                if m:
                    self.stream_list.append(m.group(1))

        if len(self.stream_list) == 0:
            return None

        return self.stream_list.popleft()

    def submit_stream(self, stream_prefix, node_id):
        # submit http request files
        logging.info("sending stream {}".format(stream_prefix))
        source_files = [ os.path.join(self.bro_http_dir, '{}.request'.format(stream_prefix)),
                         os.path.join(self.bro_http_dir, '{}.request.entity'.format(stream_prefix)),
                         os.path.join(self.bro_http_dir, '{}.reply'.format(stream_prefix)),
                         os.path.join(self.bro_http_dir, '{}.reply.entity'.format(stream_prefix)),
                         os.path.join(self.bro_http_dir, '{}.ready'.format(stream_prefix)) ]

        sent_files = []
        for source_file in source_files:
            if not os.path.exists(source_file):
                continue
            
            result = self.submit_command(ANPCommandCOPY_FILE(source_file, source_file), node_id)
            if result is None:
                # no servers available at the moment
                return False
            elif result.command == ANP_COMMAND_OK:
                sent_files.append(source_file)
                continue
            elif result.command == ANP_COMMAND_ERROR:
                raise RuntimeError("remote server returned error message: {}".fomrat(result.error_message))
            else:
                raise ValueError("got unexpected command {}".format(result))

        # tell the remote system to process the files
        result = self.submit_command(ANPCommandPROCESS(stream_prefix), node_id)
        if result is None:
            logging.warning("did not receive a response for PROCESS command on {}".format(stream_prefix))
            return False
        elif result.command == ANP_COMMAND_OK:
            # if we get this far then all the files have been sent
            for sent_file in sent_files:
                try:
                    logging.info("removing {}".format(sent_file))
                    os.remove(sent_file)
                except Exception as e:
                    logging.error("unable to delete {}: {}".format(sent_file, e))
        elif result.command == ANP_COMMAND_ERROR:
            logging.warning("remote server returned error message: {}".format(result.error_message))
            return False
        else:
            logging.error("got unexpected command {}".format(result))
            return False

    def collect_client_mode(self):
        while not self.collection_shutdown:
            # gather extracted http files and submit them to the server node
            stream_prefix = self.get_next_stream()

            if stream_prefix is None:
                # nothing to do right now...
                logging.debug("no streams available to send")
                return False

            # do we have an anp node to send data to?
            node_id = self.get_available_node()
            if node_id is None:
                logging.info("waiting for available ANP node...")
                return False

            try:
                self.submit_stream(stream_prefix, node_id)
            except Exception as e:
                logging.error("unable to submit stream {}: {}".format(stream_prefix, e))
                report_exception() 

    def collect_local_mode(self):
        # gather extracted files and just process them
        stream_prefix = self.get_next_stream()
        if stream_prefix:
            self.add_work_item(stream_prefix)
            return True

        return False

    def collect_server_mode(self):
        # in server mode we just process our local workload
        return MySQLCollectionEngine.collect(self)

    def process(self, stream_prefix):

        # process the .ready file
        # file format is as follows
        #
        # C7kebl1wNwKQ1qOPck.1.ready
        # time = 1537467014.49546
        # interrupted = F
        # finish_msg = message ends normally
        # body_length = 433994
        # content_gap_length = 0
        # header_length = 494
        #

        details = {
            HTTP_DETAILS_REQUEST: [],
            HTTP_DETAILS_REPLY: [],
            HTTP_DETAILS_READY: [],
        }

        base_path = os.path.join(self.bro_http_dir, stream_prefix)
        # the ready file contains stream summary info
        ready_path = '{}.ready'.format(base_path)
        # http request headers
        request_path = '{}.request'.format(base_path)
        # http request content (POST content for example)
        request_entity_path = '{}.request.entity'.format(base_path)
        # http response headers
        reply_path = '{}.reply'.format(base_path)
        # http response content
        reply_entity_path = '{}.reply.entity'.format(base_path)

        # make sure we have at least the files we expect (summary, and request headers)
        for path in [ ready_path, request_path ]:
            if not os.path.exists(path):
                logging.error("missing expected file {}".format(path))
                return False

        # parse the ready file
        stream_time = None
        interrupted = False
        content_gap_length = 0

        with open(ready_path, 'r') as fp:
            for line in fp:
                details[HTTP_DETAILS_READY].append(line.strip())
                key, value = [_.strip() for _ in line.split(' = ')]
                
                if key == 'time':
                    stream_time = datetime.datetime.fromtimestamp(float(value))
                elif key == 'interrupted':
                    interrupted = value == 'T'
                elif key == 'content_gap_length':
                    content_gap_length = int(value)

        # parse the request
        request_headers = [] # of tuples of key, value
        request_headers_lookup = {} # key = key.lower()

        with open(request_path, 'r') as fp:
            request_ipv4 = fp.readline().strip()
            request_method = fp.readline().strip()
            request_original_uri = fp.readline().strip()
            request_unescaped_uri = fp.readline().strip()
            request_version = fp.readline().strip()

            logging.info("processing {} ipv4 {} method {} uri {}".format(stream_prefix, request_ipv4,
                                                                         request_method, request_original_uri))

            details[HTTP_DETAILS_REQUEST].append(request_ipv4)
            details[HTTP_DETAILS_REQUEST].append(request_method)
            details[HTTP_DETAILS_REQUEST].append(request_original_uri)
            details[HTTP_DETAILS_REQUEST].append(request_unescaped_uri)
            details[HTTP_DETAILS_REQUEST].append(request_version)

            for line in fp:
                details[HTTP_DETAILS_REQUEST].append(line.strip())
                key, value = [_.strip() for _ in line.split('\t')]
                request_headers.append((key, value))
                request_headers_lookup[key.lower()] = value

        # parse the response if it exists
        reply_headers = [] # of tuples of key, value
        reply_headers_lookup = {} # key = key.lower()
        reply_version = None
        reply_code = None
        reply_reason = None
        reply_ipv4 = None
        reply_port = None

        if os.path.exists(reply_path):
            with open(reply_path, 'r') as fp:
                first_line = fp.readline()
                details[HTTP_DETAILS_REPLY].append(first_line)
                reply_ipv4, reply_port = [_.strip() for _ in first_line.split('\t')]
                reply_port = int(reply_port)
                reply_version = fp.readline().strip()
                reply_code = fp.readline().strip()
                reply_reason = fp.readline().strip()

                details[HTTP_DETAILS_REPLY].append(reply_version)
                details[HTTP_DETAILS_REPLY].append(reply_code)
                details[HTTP_DETAILS_REPLY].append(reply_reason)

                for line in fp:
                    details[HTTP_DETAILS_REPLY].append(line.strip())
                    key, value = [_.strip() for _ in line.split('\t')]
                    reply_headers.append((key, value))
                    reply_headers_lookup[key.lower()] = value

        self.root = RootAnalysis()
        self.root.uuid = str(uuid.uuid4())
        self.root.storage_dir = os.path.join(self.collection_dir, self.root.uuid[0:3], self.root.uuid)
        self.root.initialize_storage()

        self.root.tool = 'ACE - Bro HTTP Scanner'
        self.root.tool_instance = self.hostname
        self.root.alert_type = 'http'
        self.root.description = 'BRO HTTP Scanner Detection - {} {}'.format(request_method, request_original_uri)
        self.root.event_time = datetime.datetime.now() if stream_time is None else stream_time
        self.root.details = details

        self.root.add_observable(F_IPV4, request_ipv4)
        if reply_ipv4:
            self.root.add_observable(F_IPV4, reply_ipv4)
            self.root.add_observable(F_IPV4_CONVERSATION, create_ipv4_conversation(request_ipv4, reply_ipv4))

        if 'host' in request_headers_lookup:
            self.root.add_observable(F_FQDN, request_headers_lookup['host'])

        uri = request_original_uri[:]
        if 'host' in request_headers_lookup:
            # I don't think we'll ever see https here as that gets parsed as a different protocol in bro
            # we should only be seeing HTTP traffic
            uri = '{}://{}{}{}'.format('https' if reply_port == 443 else 'http', 
                                       request_headers_lookup['host'], 
                                       # if the default port is used then leave it out, otherwise include it in the url
                                       '' if reply_port == 80 else ':{}'.format(reply_port), 
                                       uri)
            self.root.add_observable(F_URL, uri)

        if request_original_uri != request_unescaped_uri:
            uri = request_unescaped_uri[:]
            if 'host' in request_headers_lookup:
                uri = '{}:{}'.format(request_headers_lookup['host'], uri)
                self.root.add_observable(F_URL, uri)

        # move all the files into the work directory and add them as file observables
        shutil.move(ready_path, self.root.storage_dir)
        self.root.add_observable(F_FILE, os.path.basename(ready_path))
        shutil.move(request_path, self.root.storage_dir)
        self.root.add_observable(F_FILE, os.path.basename(request_path))
        if os.path.exists(request_entity_path):
            shutil.move(request_entity_path, self.root.storage_dir)
            self.root.add_observable(F_FILE, os.path.basename(request_entity_path))
        if os.path.exists(reply_path):
            shutil.move(reply_path, self.root.storage_dir)
            self.root.add_observable(F_FILE, os.path.basename(reply_path))
        if os.path.exists(reply_entity_path):
            shutil.move(reply_entity_path, self.root.storage_dir)
            self.root.add_observable(F_FILE, os.path.basename(reply_entity_path))

        try:
            self.root.save()
        except Exception as e:
            logging.error("unable to save {}: {}".format(self.root, e))
            report_exception()
            return False

        # has the destination host been whitelisted?
        try:
            if self.whitelist is None:
                self.whitelist = BrotexWhitelist(self.whitelist_path)
                self.whitelist.load_whitelist()
            else:
                self.whitelist.check_whitelist()

            if 'host' in request_headers_lookup and request_headers_lookup['host']:
                if self.whitelist.is_whitelisted_fqdn(request_headers_lookup['host']):
                    logging.debug("stream {} whitelisted by fqdn {}".format(stream_prefix, request_headers_lookup['host']))
                    return

        except Exception as e:
            logging.error("whitelist check failed for {}: {}".format(stream_prefix, e))
            report_exception()

        # now analyze the file
        try:
            self.analyze(self.root)
        except Exception as e:
            logging.error("analysis failed for {}: {}".format(path, e))
            report_exception()

    def post_analysis(self, root):
        if self.should_alert(self.root):
            self.root.submit()
            self.cancel_analysis()

    def cleanup(self, work_item):
        if not self.root:
            return

        if self.root.delayed:
            return

        if not self.keep_work_dir:
            logging.debug("deleting {}".format(self.root.storage_dir))
            self.root.delete()
Example #2
0
    def process(self, stream_prefix):

        # process the .ready file
        # file format is as follows
        #
        # C7kebl1wNwKQ1qOPck.1.ready
        # time = 1537467014.49546
        # interrupted = F
        # finish_msg = message ends normally
        # body_length = 433994
        # content_gap_length = 0
        # header_length = 494
        #

        details = {
            HTTP_DETAILS_REQUEST: [],
            HTTP_DETAILS_REPLY: [],
            HTTP_DETAILS_READY: [],
        }

        base_path = os.path.join(self.bro_http_dir, stream_prefix)
        # the ready file contains stream summary info
        ready_path = '{}.ready'.format(base_path)
        # http request headers
        request_path = '{}.request'.format(base_path)
        # http request content (POST content for example)
        request_entity_path = '{}.request.entity'.format(base_path)
        # http response headers
        reply_path = '{}.reply'.format(base_path)
        # http response content
        reply_entity_path = '{}.reply.entity'.format(base_path)

        # make sure we have at least the files we expect (summary, and request headers)
        for path in [ ready_path, request_path ]:
            if not os.path.exists(path):
                logging.error("missing expected file {}".format(path))
                return False

        # parse the ready file
        stream_time = None
        interrupted = False
        content_gap_length = 0

        with open(ready_path, 'r') as fp:
            for line in fp:
                details[HTTP_DETAILS_READY].append(line.strip())
                key, value = [_.strip() for _ in line.split(' = ')]
                
                if key == 'time':
                    stream_time = datetime.datetime.fromtimestamp(float(value))
                elif key == 'interrupted':
                    interrupted = value == 'T'
                elif key == 'content_gap_length':
                    content_gap_length = int(value)

        # parse the request
        request_headers = [] # of tuples of key, value
        request_headers_lookup = {} # key = key.lower()

        with open(request_path, 'r') as fp:
            request_ipv4 = fp.readline().strip()
            request_method = fp.readline().strip()
            request_original_uri = fp.readline().strip()
            request_unescaped_uri = fp.readline().strip()
            request_version = fp.readline().strip()

            logging.info("processing {} ipv4 {} method {} uri {}".format(stream_prefix, request_ipv4,
                                                                         request_method, request_original_uri))

            details[HTTP_DETAILS_REQUEST].append(request_ipv4)
            details[HTTP_DETAILS_REQUEST].append(request_method)
            details[HTTP_DETAILS_REQUEST].append(request_original_uri)
            details[HTTP_DETAILS_REQUEST].append(request_unescaped_uri)
            details[HTTP_DETAILS_REQUEST].append(request_version)

            for line in fp:
                details[HTTP_DETAILS_REQUEST].append(line.strip())
                key, value = [_.strip() for _ in line.split('\t')]
                request_headers.append((key, value))
                request_headers_lookup[key.lower()] = value

        # parse the response if it exists
        reply_headers = [] # of tuples of key, value
        reply_headers_lookup = {} # key = key.lower()
        reply_version = None
        reply_code = None
        reply_reason = None
        reply_ipv4 = None
        reply_port = None

        if os.path.exists(reply_path):
            with open(reply_path, 'r') as fp:
                first_line = fp.readline()
                details[HTTP_DETAILS_REPLY].append(first_line)
                reply_ipv4, reply_port = [_.strip() for _ in first_line.split('\t')]
                reply_port = int(reply_port)
                reply_version = fp.readline().strip()
                reply_code = fp.readline().strip()
                reply_reason = fp.readline().strip()

                details[HTTP_DETAILS_REPLY].append(reply_version)
                details[HTTP_DETAILS_REPLY].append(reply_code)
                details[HTTP_DETAILS_REPLY].append(reply_reason)

                for line in fp:
                    details[HTTP_DETAILS_REPLY].append(line.strip())
                    key, value = [_.strip() for _ in line.split('\t')]
                    reply_headers.append((key, value))
                    reply_headers_lookup[key.lower()] = value

        self.root = RootAnalysis()
        self.root.uuid = str(uuid.uuid4())
        self.root.storage_dir = os.path.join(self.collection_dir, self.root.uuid[0:3], self.root.uuid)
        self.root.initialize_storage()

        self.root.tool = 'ACE - Bro HTTP Scanner'
        self.root.tool_instance = self.hostname
        self.root.alert_type = 'http'
        self.root.description = 'BRO HTTP Scanner Detection - {} {}'.format(request_method, request_original_uri)
        self.root.event_time = datetime.datetime.now() if stream_time is None else stream_time
        self.root.details = details

        self.root.add_observable(F_IPV4, request_ipv4)
        if reply_ipv4:
            self.root.add_observable(F_IPV4, reply_ipv4)
            self.root.add_observable(F_IPV4_CONVERSATION, create_ipv4_conversation(request_ipv4, reply_ipv4))

        if 'host' in request_headers_lookup:
            self.root.add_observable(F_FQDN, request_headers_lookup['host'])

        uri = request_original_uri[:]
        if 'host' in request_headers_lookup:
            # I don't think we'll ever see https here as that gets parsed as a different protocol in bro
            # we should only be seeing HTTP traffic
            uri = '{}://{}{}{}'.format('https' if reply_port == 443 else 'http', 
                                       request_headers_lookup['host'], 
                                       # if the default port is used then leave it out, otherwise include it in the url
                                       '' if reply_port == 80 else ':{}'.format(reply_port), 
                                       uri)
            self.root.add_observable(F_URL, uri)

        if request_original_uri != request_unescaped_uri:
            uri = request_unescaped_uri[:]
            if 'host' in request_headers_lookup:
                uri = '{}:{}'.format(request_headers_lookup['host'], uri)
                self.root.add_observable(F_URL, uri)

        # move all the files into the work directory and add them as file observables
        shutil.move(ready_path, self.root.storage_dir)
        self.root.add_observable(F_FILE, os.path.basename(ready_path))
        shutil.move(request_path, self.root.storage_dir)
        self.root.add_observable(F_FILE, os.path.basename(request_path))
        if os.path.exists(request_entity_path):
            shutil.move(request_entity_path, self.root.storage_dir)
            self.root.add_observable(F_FILE, os.path.basename(request_entity_path))
        if os.path.exists(reply_path):
            shutil.move(reply_path, self.root.storage_dir)
            self.root.add_observable(F_FILE, os.path.basename(reply_path))
        if os.path.exists(reply_entity_path):
            shutil.move(reply_entity_path, self.root.storage_dir)
            self.root.add_observable(F_FILE, os.path.basename(reply_entity_path))

        try:
            self.root.save()
        except Exception as e:
            logging.error("unable to save {}: {}".format(self.root, e))
            report_exception()
            return False

        # has the destination host been whitelisted?
        try:
            if self.whitelist is None:
                self.whitelist = BrotexWhitelist(self.whitelist_path)
                self.whitelist.load_whitelist()
            else:
                self.whitelist.check_whitelist()

            if 'host' in request_headers_lookup and request_headers_lookup['host']:
                if self.whitelist.is_whitelisted_fqdn(request_headers_lookup['host']):
                    logging.debug("stream {} whitelisted by fqdn {}".format(stream_prefix, request_headers_lookup['host']))
                    return

        except Exception as e:
            logging.error("whitelist check failed for {}: {}".format(stream_prefix, e))
            report_exception()

        # now analyze the file
        try:
            self.analyze(self.root)
        except Exception as e:
            logging.error("analysis failed for {}: {}".format(path, e))
            report_exception()
Example #3
0
 def load_config(self):
     self.whitelist = BrotexWhitelist(os.path.join(saq.SAQ_HOME, self.config['whitelist_path']))
     self.auto_reload()
Example #4
0
    def execute_pre_analysis(self):
        if self.root.alert_type != ANALYSIS_TYPE_BRO_HTTP:
            return False

        # process the .ready file
        # file format is as follows
        #
        # C7kebl1wNwKQ1qOPck.1.ready
        # time = 1537467014.49546
        # interrupted = F
        # finish_msg = message ends normally
        # body_length = 433994
        # content_gap_length = 0
        # header_length = 494
        #

        self.root.details = {
            HTTP_DETAILS_REQUEST: [],
            HTTP_DETAILS_REPLY: [],
            HTTP_DETAILS_READY: [],
        }

        stream_prefix = None
        ready_path = None
        request_path = None
        request_entity_path = None
        reply_path = None
        reply_entity_path = None

        for file_observable in self.root.observables:
            m = REGEX_CONNECTION_ID.match(file_observable.value)
            if m:
                stream_prefix = m.group(1)
                # the ready file contains stream summary info
                ready_path = os.path.join(self.root.storage_dir, file_observable.value)
            elif file_observable.value.endswith('.request'):
                # http request headers
                request_path = os.path.join(self.root.storage_dir, file_observable.value)
            elif file_observable.value.endswith('request.entity'):
                # http request content (POST content for example)
                request_entity_path = os.path.join(self.root.storage_dir, file_observable.value)
            elif file_observable.value.endswith('.reply'):
                # http response headers
                reply_path = os.path.join(self.root.storage_dir, file_observable.value)
            elif file_observable.value.endswith('.reply.entity'):
                # http response content
                reply_entity_path = os.path.join(self.root.storage_dir, file_observable.value)

        if stream_prefix is None:
            logging.error("unable to find .ready file for http submission in {}".format(self.root))
            return False

        # make sure we have at least the files we expect (summary, and request headers)
        for path in [ ready_path, request_path ]:
            if not os.path.exists(path):
                logging.error("missing expected file {}".format(path))
                return False

        # parse the ready file
        stream_time = None
        interrupted = False
        content_gap_length = 0

        with open(ready_path, 'r') as fp:
            for line in fp:
                self.root.details[HTTP_DETAILS_READY].append(line.strip())
                key, value = [_.strip() for _ in line.split(' = ')]
                
                if key == 'time':
                    stream_time = datetime.datetime.fromtimestamp(float(value))
                elif key == 'interrupted':
                    interrupted = value == 'T'
                elif key == 'content_gap_length':
                    content_gap_length = int(value)

        # parse the request
        request_headers = [] # of tuples of key, value
        request_headers_lookup = {} # key = key.lower()

        with open(request_path, 'r') as fp:
            request_ipv4 = fp.readline().strip()
            request_method = fp.readline().strip()
            request_original_uri = fp.readline().strip()
            request_unescaped_uri = fp.readline().strip()
            request_version = fp.readline().strip()

            logging.info("processing {} ipv4 {} method {} uri {}".format(stream_prefix, request_ipv4,
                                                                         request_method, request_original_uri))

            self.root.details[HTTP_DETAILS_REQUEST].append(request_ipv4)
            self.root.details[HTTP_DETAILS_REQUEST].append(request_method)
            self.root.details[HTTP_DETAILS_REQUEST].append(request_original_uri)
            self.root.details[HTTP_DETAILS_REQUEST].append(request_unescaped_uri)
            self.root.details[HTTP_DETAILS_REQUEST].append(request_version)

            for line in fp:
                self.root.details[HTTP_DETAILS_REQUEST].append(line.strip())
                key, value = [_.strip() for _ in line.split('\t')]
                request_headers.append((key, value))
                request_headers_lookup[key.lower()] = value

        # parse the response if it exists
        reply_headers = [] # of tuples of key, value
        reply_headers_lookup = {} # key = key.lower()
        reply_version = None
        reply_code = None
        reply_reason = None
        reply_ipv4 = None
        reply_port = None

        if os.path.exists(reply_path):
            with open(reply_path, 'r') as fp:
                first_line = fp.readline()
                self.root.details[HTTP_DETAILS_REPLY].append(first_line)
                reply_ipv4, reply_port = [_.strip() for _ in first_line.split('\t')]
                reply_port = int(reply_port)
                reply_version = fp.readline().strip()
                reply_code = fp.readline().strip()
                reply_reason = fp.readline().strip()

                self.root.details[HTTP_DETAILS_REPLY].append(reply_version)
                self.root.details[HTTP_DETAILS_REPLY].append(reply_code)
                self.root.details[HTTP_DETAILS_REPLY].append(reply_reason)

                for line in fp:
                    self.root.details[HTTP_DETAILS_REPLY].append(line.strip())
                    key, value = [_.strip() for _ in line.split('\t')]
                    reply_headers.append((key, value))
                    reply_headers_lookup[key.lower()] = value

        self.root.description = 'BRO HTTP Scanner Detection - {} {}'.format(request_method, request_original_uri)
        self.root.event_time = datetime.datetime.now() if stream_time is None else stream_time

        self.root.add_observable(F_IPV4, request_ipv4)
        if reply_ipv4:
            self.root.add_observable(F_IPV4, reply_ipv4)
            self.root.add_observable(F_IPV4_CONVERSATION, create_ipv4_conversation(request_ipv4, reply_ipv4))

        if 'host' in request_headers_lookup:
            self.root.add_observable(F_FQDN, request_headers_lookup['host'])

        uri = request_original_uri[:]
        if 'host' in request_headers_lookup:
            # I don't think we'll ever see https here as that gets parsed as a different protocol in bro
            # we should only be seeing HTTP traffic
            uri = '{}://{}{}{}'.format('https' if reply_port == 443 else 'http', 
                                       request_headers_lookup['host'], 
                                       # if the default port is used then leave it out, otherwise include it in the url
                                       '' if reply_port == 80 else ':{}'.format(reply_port), 
                                       uri)
            self.root.add_observable(F_URL, uri)

        if request_original_uri != request_unescaped_uri:
            uri = request_unescaped_uri[:]
            if 'host' in request_headers_lookup:
                uri = '{}:{}'.format(request_headers_lookup['host'], uri)
                self.root.add_observable(F_URL, uri)

        # has the destination host been whitelisted?
        if self.whitelist is None:
            self.whitelist = BrotexWhitelist(self.whitelist_path)
            self.whitelist.load_whitelist()
        else:
            self.whitelist.check_whitelist()

        if 'host' in request_headers_lookup and request_headers_lookup['host']:
            if self.whitelist.is_whitelisted_fqdn(request_headers_lookup['host']):
                logging.debug("stream {} whitelisted by fqdn {}".format(stream_prefix, request_headers_lookup['host']))
                self.root.whitelisted = True
                return
Example #5
0
class BrotexHTTPPackageAnalyzer(AnalysisModule):
    def verify_environment(self):
        self.verify_config_exists('whitelist_path')
        self.verify_config_exists('maximum_http_requests')
        self.verify_path_exists(self.config['whitelist_path'])

    def load_config(self):
        self.whitelist = BrotexWhitelist(os.path.join(saq.SAQ_HOME, self.config['whitelist_path']))
        self.auto_reload()

    @property
    def generated_analysis_type(self):
        return BrotexHTTPPackageAnalysis

    @property
    def valid_observable_types(self):
        return F_FILE

    def auto_reload(self):
        # make sure the whitelist if up-to-date
        self.whitelist.check_whitelist()

    def execute_analysis(self, _file):
        # is this a brotex package?
        if not _pattern_brotex_package.match(_file.value):
            logging.debug("{} does not appear to be a brotex http package".format(_file))
            return False

        analysis = self.create_analysis(_file)
        logging.debug("{} is a valid brotex http package".format(_file))

        # extract the contents of the pacakge
        file_path = os.path.join(self.root.storage_dir, _file.value)

        brotex_dir = '{}.brotex'.format(os.path.join(self.root.storage_dir, _file.value))
        if not os.path.isdir(brotex_dir):
            try:
                os.mkdir(brotex_dir)
            except Exception as e:
                logging.error("unable to create directory {}: {}".format(brotex_dir, e))
                return False

        # extract all the things into the brotex_dir
        p = Popen(['tar', 'xf', file_path, '-C', brotex_dir], stdout=PIPE, stderr=PIPE)
        stdout, stderr = p.communicate()
        p.wait()

        if p.returncode:
                logging.warning("unable to extract files from {} (tar returned error code {}".format(
                                _file, p.returncode))
                return False

        if stderr:
            logging.warning("tar reported errors on {}: {}".format(_file, stderr))

        # iterate over all the extracted files
        message_dirs = {}
        for dirpath, dirnames, filenames in os.walk(brotex_dir):
            for dirname in dirnames:
                m = _pattern_message_dir.match(dirname)
                if m:
                    message_number = m.group(1)
                    if message_number not in message_dirs:
                        message_dirs[message_number] = os.path.relpath(os.path.join(dirpath, dirname), start=brotex_dir)
                        logging.debug("found message number {} in {}".format(message_number, _file))
                        continue

        count = 0
        maximum_http_requests = self.config.getint('maximum_http_requests')

        for message_number in message_dirs.keys():
            if maximum_http_requests:
                count += 1
                if count > maximum_http_requests:
                    logging.debug("{} exceeded maximum_http_requests".format(_file))
                    break

            message_dir = os.path.join(brotex_dir, message_dirs[message_number])
            # there should be a file called protocol.http in this directory
            protocol_path = os.path.join(message_dir, 'protocol.http')
            if not os.path.exists(protocol_path):
                logging.error("missing {} for message {} for {}".format(protocol_path, message_number, _file))
                continue

            is_whitelisted = False
            http_request = {
                KEY_TIME: None,
                KEY_SRC_IP: None,
                KEY_SRC_PORT: None,
                KEY_DEST_IP: None,
                KEY_DEST_PORT: None,
                KEY_METHOD: None,
                KEY_HOST: None,
                KEY_URI: None,
                KEY_REFERRER: None,
                KEY_USER_AGENT: None,
                KEY_STATUS_CODE: None,
                KEY_FILES: [] 
            }

            # parse this file for the http protocol information
            with open(protocol_path, 'rb') as fp:
                for line in fp:
                    if line.startswith(b'ts:'):
                        http_request[KEY_TIME] = float(line.decode().strip()[len('ts:'):])
                        logging.debug("parsed event time {} from protocol file".format(http_request[KEY_TIME]))
                        continue

                    if line.startswith(b'host:'):
                        http_request[KEY_HOST] = line.decode().strip()[len('host:'):].strip()
                        if self.whitelist.is_whitelisted(WHITELIST_TYPE_HTTP_HOST, http_request[KEY_HOST]):
                            logging.debug("http {} message_number {} whitelisted by {} {}".format(
                                          _file, message_number, WHITELIST_TYPE_HTTP_HOST, http_request[KEY_HOST]))
                            is_whitelisted = True

                        continue

                    if line.startswith(b'method:'):
                        http_request[KEY_METHOD] = line.decode().strip()[len('method:'):].strip()
                        continue

                    if line.startswith(b'uri:'):
                        http_request[KEY_URI] = line.decode(errors='ignore').strip()[len('uri:'):].strip()
                        continue

                    if line.startswith(b'referrer:'):
                        http_request[KEY_REFERRER] = line.decode().strip()[len('referrer:'):].strip()
                        continue

                    if line.startswith(b'user_agent:'):
                        http_request[KEY_USER_AGENT] = line.decode().strip()[len('user_agent:'):].strip()
                        continue

                    if line.startswith(b'status_code:'):
                        http_request[KEY_STATUS_CODE] = line.decode().strip()[len('status_code:'):].strip()
                        continue

                    if line.startswith(b'id:'):
                        http_connection_details = line.decode().strip()[len('id:'):]
                        m = re.match(r'^\[orig_h=([^,]+?), orig_p=([^,]+?), resp_h=([^,]+?), resp_p=([^\]]+?)\]$', 
                                     http_connection_details.strip())
                        if m:
                            http_request[KEY_SRC_IP], http_request[KEY_SRC_PORT], http_request[KEY_DEST_IP], http_request[KEY_DEST_PORT] = m.groups()

                            if self.whitelist.is_whitelisted(WHITELIST_TYPE_HTTP_SRC_IP, http_request[KEY_SRC_IP]):
                                is_whitelisted = True
                                logging.debug("http {} message_number {} whitelisted by {} {}".format(
                                              _file, message_number, WHITELIST_TYPE_HTTP_SRC_IP, http_request[KEY_SRC_IP]))
                            if self.whitelist.is_whitelisted(WHITELIST_TYPE_HTTP_DEST_IP, http_request[KEY_DEST_IP]):
                                is_whitelisted = True
                                logging.debug("http {} message_number {} whitelisted by {} {}".format(
                                              _file, message_number, WHITELIST_TYPE_HTTP_DEST_IP, http_request[KEY_DEST_IP]))
                        else:
                            logging.debug("could not determine IP addresses for {}".format(http_connection_details))

            if is_whitelisted:
                logging.debug("message_number {} is whitelisted".format(message_number))
                continue

            # then add any files you can find in this directory
            http_request[KEY_FILES] = []
            for file_name in os.listdir(message_dir):
                # skip these generated protocol files we've already parsed
                if file_name == 'protocol.http':
                    continue

                file_path = os.path.relpath(os.path.join(message_dir, file_name), start=self.root.storage_dir)
                http_request[KEY_FILES].append(file_path)
                analysis.add_observable(F_FILE, file_path)

            if http_request[KEY_SRC_IP]:
                analysis.add_observable(F_IPV4, http_request[KEY_SRC_IP])

            if http_request[KEY_DEST_IP]:
                analysis.add_observable(F_IPV4, http_request[KEY_DEST_IP])

            if http_request[KEY_SRC_IP] and http_request[KEY_DEST_IP]:
                analysis.add_observable(F_IPV4_CONVERSATION, create_ipv4_conversation(
                                        http_request[KEY_SRC_IP], http_request[KEY_DEST_IP]))

            if http_request[KEY_HOST]:
                analysis.add_observable(F_FQDN, http_request[KEY_HOST])

            if http_request[KEY_URI]:
                analysis.add_observable(F_URL, http_request[KEY_URI])

            analysis.requests.append(http_request)

        # if we didn't get any requests then we whitelist the whole thing
        if not analysis.requests:
            logging.debug("no requests available from {} -- whitelisting".format(_file))
            _file.mark_as_whitelisted()

        return True