예제 #1
0
    def setUp(self, *args, **kwargs):
        super().setUp(*args, **kwargs)

        # make sure we have a connection to carbon black
        import cbapi_legacy as cbapi
        cb_url = saq.CONFIG['carbon_black']['url']
        cb_token = saq.CONFIG['carbon_black']['token']
        cb = cbapi.CbApi(cb_url, token=cb_token, ssl_verify=False) # XXX <-- get rid of that

        try:
            info = cb.info()
        except Exception as e:
            self.skipTest("carbon black not available at {}".format(cb_url))
예제 #2
0
    def collect_binaries(self):

        # get the list of hashes available to download in the past X minutes
        cb = cbapi.CbApi(self.cb_url, token=self.cb_token,
                         ssl_verify=False)  # XXX <-- get rid of that

        # do we need a new query to execute?
        if self.current_query is None:

            # build the time range for the carbon black query
            if self.last_search_time is not None:  # have we already searched at least one time?
                time_range = 'server_added_timestamp:[{} TO *]'.format(
                    self.last_search_time.strftime('%Y-%m-%dT%H:%M:%S'))
                #(datetime.datetime.utcnow() - datetime.timedelta(minutes=self.search_offset)).strftime('%Y-%m-%dT%H:%M:%S'))
            elif self.initial_search_offset == 0:
                time_range = ''  # get EVERYTHING available (useful when running this entire system for the first time or to get caught up)
            else:  # first time running, go back N hours
                time_range = 'server_added_timestamp:[{} TO *]'.format(
                    (datetime.datetime.utcnow() -
                     datetime.timedelta(hours=self.initial_search_offset)
                     ).strftime('%Y-%m-%dT%H:%M:%S'))

            self.current_query = 'is_executable_image:true -digsig_result:Signed {}'.format(
                time_range)
            self.current_index = 0

        try:
            json_result = cb.binary_search(self.current_query,
                                           start=self.current_index,
                                           rows=self.download_batch_size,
                                           sort='server_added_timestamp asc')

            # if we're executing a new query for the first time
            # then remember how many binaries we need to get
            if self.current_result_count is None:
                self.current_result_count = json_result['total_results']

        except requests.exceptions.HTTPError as e:
            logging.error(
                "carbon black server returned an error: {}".format(e))
            return
        except Exception as e:
            logging.error(
                "communication error with carbon black server: {}".format(e))
            return

        logging.info(
            "requested binary data from {} at index {} result count {} with query {}"
            .format(self.cb_url, self.current_index, self.current_result_count,
                    self.current_query))

        if len(json_result['results']) < 1:
            logging.debug("got no more results from search")
            self.current_query = None
            self.current_index = None
            self.current_result_count = None
            return

        for binary in json_result['results']:

            if self.shutdown_event.is_set():
                return

            # move to the next set of items after processing these
            self.current_index += 1

            # figure out when this binary arrived to the carbon black server
            # some times the time does not have the .%fZ at the end for some reason
            time_stamp_format = "%Y-%m-%dT%H:%M:%SZ"
            if '.' in binary['server_added_timestamp']:
                time_stamp_format = "%Y-%m-%dT%H:%M:%S.%fZ"

            event_time = datetime.datetime.strptime(
                binary['server_added_timestamp'],
                time_stamp_format).replace(tzinfo=pytz.utc)

            # this also becomes our new starting point next time we search
            if self.last_search_time is None or event_time > self.last_search_time:
                # we move one second past the last time we saw something added
                self.last_search_time = event_time + datetime.timedelta(
                    seconds=1)

            binary_dir = os.path.join(self.storage_dir, binary['md5'][0:2])
            binary_path = os.path.join(binary_dir, binary['md5'])
            binary_zip_path = '{}.zip'.format(binary_path)
            binary_json_path = '{}.json'.format(binary_path)
            submit_path = '{}.submit'.format(binary_path)

            # have we already submitted this one for analysis?
            if os.path.exists(submit_path):
                logging.debug("already submitted {}".format(binary['md5']))
                continue

            # have we already downloaded this md5?
            if os.path.exists(binary_path):
                logging.debug("already have binary {} at {}".format(
                    binary['md5'], binary_path))
                continue

            else:
                # go get it from Carbon Black
                if not os.path.isdir(binary_dir):
                    try:
                        os.makedirs(binary_dir)
                    except Exception as e:
                        logging.error(
                            "unable to create directory {}: {}".format(
                                binary_dir, e))
                        continue

                logging.info("downloading {}".format(binary['md5']))
                try:
                    # XXX see if you can do this without pulling the entire binary into memory
                    binary_content = cb.binary(binary['md5'])
                except Exception as e:
                    logging.info("unable to download {}: {}".format(
                        binary['md5'], e))
                    continue

                if len(binary_content) == 0:
                    logging.warning(
                        "got 0 bytes for {}".format(binary_zip_path))
                    continue

                with open(binary_zip_path, 'wb') as fp:
                    try:
                        fp.write(binary_content)
                    except Exception as e:
                        logging.error("unable to write to {}: {}".format(
                            binary_zip_path, e))

                # also save the json that came with the file
                with open(binary_json_path, 'w') as fp:
                    json.dump(binary, fp, indent=4)

                # extract the file
                with zipfile.ZipFile(binary_zip_path) as zip_fp:
                    with zip_fp.open('filedata') as unzipped_fp:
                        with open(binary_path, 'wb') as fp:
                            fp.write(unzipped_fp.read())

                # delete the zip file
                os.remove(binary_zip_path)

                logging.debug("downloaded {}".format(binary_path))

            # we have to copy the file into the new storage directory for it to be analyzed
            # we use the file name that Carbon Black saw on the endpoint
            try:
                file_name = binary['observed_filename'][-1]
            except Exception as e:
                logging.error(
                    "cannot determine file name for {}".format(binary_path))
                file_name = 'unknown'

            # we need to figure out if this is a path from a Windows machine or a Unix machine
            # so we count the number of backslashes and forward slashes # it's a hack but it should work 99.9% of the time
            if file_name.count('\\') > file_name.count('/'):
                logging.debug(
                    "{} appears to be a windows path".format(file_name))
                file_name = ntpath.basename(file_name)
            else:
                logging.debug("{} appears to be a unix path".format(file_name))
                file_name = os.path.basename(file_name)

            observables = []
            for endpoint in binary['endpoint']:
                if '|' in endpoint:
                    endpoint = endpoint[:endpoint.index('|')]
                    observables.append({'type': F_HOSTNAME, 'value': endpoint})

            for file_path in binary['observed_filename']:
                observables.append({'type': F_FILE_PATH, 'value': file_path})

            # create a new submission request for this
            self.work_list.append(
                CarbonBlackBinarySubmission(
                    description='Carbon Black binary {}'.format(file_name),
                    analysis_mode=ANALYSIS_MODE_BINARY,
                    tool='ACE - Carbon Black Binary Analysis',
                    tool_instance=self.fqdn,
                    type='carbon_black_binary',
                    event_time=event_time,
                    details=binary,
                    observables=[],
                    tags=[],
                    files=[binary_path]))
예제 #3
0
파일: cb_engine.py 프로젝트: code4days/ACE
    def collect(self):
        # get the list of hashes available to download in the past X minutes
        # TODO past X minutes
        cb = cbapi.CbApi(self.config['url'],
                         ssl_verify=False,
                         token=self.config['token'])
        total_results = None

        # how far back do we look?
        # normally we look back over some period of time for any new binaries that were uploaded
        if self.last_search_time is not None:  # have we already searched at least one time?
            # NOTE remember to use UTC time here
            self.time_range = 'server_added_timestamp:[{0} TO *]'.format((
                datetime.datetime.utcnow() -
                datetime.timedelta(minutes=self.config.getint('search_offset'))
            ).strftime('%Y-%m-%dT%H:%M:%S'))
        elif self.config.getint('initial_search_offset') == 0:
            self.time_range = ''  # get EVERYTHING available (useful when running this entire system for the first time or to get caught up)
        else:  # first time running, go back N hours
            self.time_range = 'server_added_timestamp:[{0} TO *]'.format(
                (datetime.datetime.utcnow() - datetime.timedelta(
                    hours=self.config.getint('initial_search_offset'))
                 ).strftime('%Y-%m-%dT%H:%M:%S'))

        # remember the last time we searched
        # this was used to determine the next time range
        # now it's just a marker that at least one search was performed
        self.last_search_time = datetime.datetime.utcnow()

        while not self.shutdown:

            query = 'is_executable_image:true -digsig_result:Signed {}'.format(
                self.time_range)

            try:
                json_result = cb.binary_search(query,
                                               start=self.index,
                                               rows=self.increment)
            except requests.exceptions.HTTPError as e:
                logging.error(
                    "carbon black server returned an error: {}".format(e))
                return
            except Exception as e:
                logging.error(
                    "communication error with carbon black server: {}".format(
                        e))
                #report_exception()
                return

            logging.info(
                "requested binary data from {0} index {1} of {2} with query {3}"
                .format(self.config['url'], self.index,
                        json_result['total_results'], query))
            self.index += self.increment

            if len(json_result['results']) < 1:
                logging.debug("got no more results from search")
                # then we reset and use a new time range next time
                self.index = 0
                self.time_range = None
                return

            for binary in json_result['results']:
                if self.shutdown:
                    return

                binary_dir = os.path.join(
                    os.path.join(self.config['storage_dir'],
                                 binary['md5'][0:2]))
                binary_path = os.path.join(self.config['storage_dir'],
                                           binary['md5'][0:2], binary['md5'])
                binary_zip_path = os.path.join(self.config['storage_dir'],
                                               binary['md5'][0:2],
                                               '{0}.zip'.format(binary['md5']))
                binary_json_path = '{0}.json'.format(binary_path)

                # have we already downloaded this md5?
                if os.path.exists(binary_path):
                    logging.debug("already have binary {0} at {1}".format(
                        binary['md5'], binary_path))
                else:
                    # go get it from Carbon Black
                    if not os.path.isdir(binary_dir):
                        os.makedirs(binary_dir)

                    logging.info("downloading {0}".format(binary['md5']))
                    with open(binary_zip_path, 'wb') as fp:
                        try:
                            fp.write(cb.binary(binary['md5']))
                        except Exception as e:
                            logging.warning(
                                "unable to download {0}: {1}".format(
                                    binary['md5'], str(e)))
                            continue

                    # also save the json that came with the file
                    with open(binary_json_path, 'w') as fp:
                        json.dump(binary, fp, indent=4)

                    # extract the file
                    with ZipFile(binary_zip_path) as zip_fp:
                        with zip_fp.open('filedata') as unzipped_fp:
                            with open(binary_path, 'wb') as fp:
                                fp.write(unzipped_fp.read())

                    # delete the zip file
                    os.remove(binary_zip_path)

                    logging.debug("downloaded {0}".format(binary_path))

                    # add this file to the work queue
                    while not self.shutdown:
                        try:
                            self.work_queue.put(binary_path,
                                                block=True,
                                                timeout=1)
                            break
                        except Full:
                            logging.debug("work queue is full...")

            # in SINGLE_THREADED mode we only loop through once
            if saq.SINGLE_THREADED:
                return
예제 #4
0
    def collect(self):
        cb = cbapi.CbApi(self.config['url'],
                         ssl_verify=False,
                         token=self.config['token'])
        total_results = None

        # how far back do we look?
        # normally we look back over some period of time for any new binaries that were uploaded
        if self.last_search_time is not None:  # have we already searched at least one time?
            # NOTE remember to use UTC time here
            self.time_range = 'server_added_timestamp:[{0} TO *]'.format((
                datetime.datetime.utcnow() -
                datetime.timedelta(minutes=self.config.getint('search_offset'))
            ).strftime('%Y-%m-%dT%H:%M:%S'))
        elif self.config.getint('initial_search_offset') == 0:
            self.time_range = ''  # get EVERYTHING available (useful when running this entire system for the first time or to get caught up)
        else:  # first time running, go back N hours
            self.time_range = 'server_added_timestamp:[{0} TO *]'.format(
                (datetime.datetime.utcnow() - datetime.timedelta(
                    hours=self.config.getint('initial_search_offset'))
                 ).strftime('%Y-%m-%dT%H:%M:%S'))

        # remember the last time we searched
        # this was used to determine the next time range
        # now it's just a marker that at least one search was performed
        self.last_search_time = datetime.datetime.utcnow()

        while not self.shutdown:

            query = 'path:microsoft\\ office* '
            query += '-process_name:Moc.exe '
            query += '-process_name:xlview.exe '
            query += '-hostname:PC* '
            query += '-hostname:NAKYLEXRDA* '
            query += 'username:ASHLAND username:i50* '
            query += 'cmdline:AppData\\Local\\Microsoft\\Windows\\Temporary\ Internet\ Files\\Content.IE5 '
            query += self.time_range

            try:
                logging.info("searching {} for {} starting at {}".format(
                    self.config['url'], query, self.index))
                json_result = cb.process_search(query,
                                                start=self.index,
                                                rows=self.increment)
            except requests.exceptions.HTTPError as e:
                logging.error(
                    "carbon black server returned an error: {}".format(e))
                return
            except Exception as e:
                logging.error(
                    "communication error with carbon black server: {}".format(
                        e))
                #report_exception()
                return

            self.index += self.increment

            if len(json_result['results']) < 1:
                logging.debug("got no more results from search")
                # then we reset and use a new time range next time
                self.index = 0
                self.time_range = None
                return

            for process in json_result['results']:
                if self.shutdown:
                    return

                # have we already downloaded this file?
                logging.debug("checking for {}".format(process['id']))
                with shelve.open(self.tracking_db) as db:
                    if process['id'] in db:
                        logging.debug("already downloaded {}".format(
                            process['id']))
                        continue

                # add this process json to the work queue
                while not self.shutdown:
                    try:
                        self.work_queue.put(process, block=True, timeout=1)
                        break
                    except Full:
                        logging.debug("work queue is full...")

            # in SINGLE_THREADED mode we only loop through once
            if saq.SINGLE_THREADED:
                return