Esempio n. 1
0
    def test_detections_000_ole(self):

        submissions = {}  # key = storage_dir, value = path to file

        for file_name in os.listdir(OFFICE_SAMPLES):
            source_path = os.path.join(OFFICE_SAMPLES, file_name)
            root = create_root_analysis(uuid=str(uuid.uuid4()))
            root.initialize_storage()
            shutil.copy(source_path, root.storage_dir)
            root.add_observable(F_FILE, file_name)
            root.save()
            root.schedule()
            submissions[root.storage_dir] = source_path

        engine = TestEngine()
        engine.enable_module('analysis_module_archive')
        engine.enable_module('analysis_module_file_type')
        engine.enable_module('analysis_module_olevba_v1_1')
        engine.enable_module('analysis_module_officeparser_v1_0')
        engine.enable_module('analysis_module_yara_scanner_v3_4')
        engine.controlled_stop()
        engine.start()
        engine.wait()

        for storage_dir in submissions:
            with self.subTest(storage_dir=storage_dir,
                              source_path=submissions[storage_dir]):
                root = RootAnalysis()
                root.storage_dir = storage_dir
                root.load()
                detections = root.all_detection_points
                self.assertGreater(len(detections), 0)
Esempio n. 2
0
def submit():

    if KEY_ANALYSIS not in request.values:
        abort(
            Response(
                "missing {} field (see documentation)".format(KEY_ANALYSIS),
                400))

    r = json.loads(request.values[KEY_ANALYSIS])

    # the specified company needs to match the company of this node
    # TODO eventually we'll have a single node that serves API to all configured companies

    if KEY_COMPANY_NAME in r and r[KEY_COMPANY_NAME] != saq.CONFIG['global'][
            'company_name']:
        abort(
            Response(
                "wrong company {} (are you sending to the correct system?)".
                format(r[KEY_COMPANY_NAME]), 400))

    if KEY_DESCRIPTION not in r:
        abort(
            Response("missing {} field in submission".format(KEY_DESCRIPTION),
                     400))

    root = RootAnalysis()
    root.uuid = str(uuid.uuid4())

    # does the engine use a different drive for the workload?
    analysis_mode = r[
        KEY_ANALYSIS_MODE] if KEY_ANALYSIS_MODE in r else saq.CONFIG['engine'][
            'default_analysis_mode']
    if analysis_mode != ANALYSIS_MODE_CORRELATION:
        root.storage_dir = workload_storage_dir(root.uuid)
    else:
        root.storage_dir = storage_dir_from_uuid(root.uuid)

    root.initialize_storage()

    try:

        root.analysis_mode = r[
            KEY_ANALYSIS_MODE] if KEY_ANALYSIS_MODE in r else saq.CONFIG[
                'engine']['default_analysis_mode']
        root.company_id = saq.CONFIG['global'].getint('company_id')
        root.tool = r[KEY_TOOL] if KEY_TOOL in r else 'api'
        root.tool_instance = r[
            KEY_TOOL_INSTANCE] if KEY_TOOL_INSTANCE in r else 'api({})'.format(
                request.remote_addr)
        root.alert_type = r[KEY_TYPE] if KEY_TYPE in r else saq.CONFIG['api'][
            'default_alert_type']
        root.description = r[KEY_DESCRIPTION]
        root.event_time = LOCAL_TIMEZONE.localize(datetime.datetime.now())
        if KEY_EVENT_TIME in r:
            try:
                root.event_time = parse_event_time(r[KEY_EVENT_TIME])
            except ValueError as e:
                abort(
                    Response(
                        "invalid event time format for {} (use {} format)".
                        format(r[KEY_EVENT_TIME],
                               event_time_format_json_tz), 400))

        root.details = r[KEY_DETAILS] if KEY_DETAILS in r else {}

        # go ahead and allocate storage
        # XXX use temp dir instead...

        if KEY_TAGS in r:
            for tag in r[KEY_TAGS]:
                root.add_tag(tag)

        # add the observables
        if KEY_OBSERVABLES in r:
            for o in r[KEY_OBSERVABLES]:
                # check for required fields
                for field in [KEY_O_TYPE, KEY_O_VALUE]:
                    if field not in o:
                        abort(
                            Response(
                                "an observable is missing the {} field".format(
                                    field), 400))

                o_type = o[KEY_O_TYPE]
                o_value = o[KEY_O_VALUE]
                o_time = None
                if KEY_O_TIME in o:
                    try:
                        o_time = parse_event_time(o[KEY_O_TIME])
                    except ValueError:
                        abort(
                            Response(
                                "an observable has an invalid time format {} (use {} format)"
                                .format(o[KEY_O_TIME],
                                        event_time_format_json_tz), 400))

                observable = root.add_observable(o_type,
                                                 o_value,
                                                 o_time=o_time)

                if KEY_O_TAGS in o:
                    for tag in o[KEY_O_TAGS]:
                        observable.add_tag(tag)

                if KEY_O_DIRECTIVES in o:
                    for directive in o[KEY_O_DIRECTIVES]:
                        # is this a valid directive?
                        if directive not in VALID_DIRECTIVES:
                            abort(
                                Response(
                                    "observable {} has invalid directive {} (choose from {})"
                                    .format('{}:{}'.format(o_type,
                                                           o_value), directive,
                                            ','.join(VALID_DIRECTIVES)), 400))

                        observable.add_directive(directive)

                if KEY_O_LIMITED_ANALYSIS in o:
                    for module_name in o[KEY_O_LIMITED_ANALYSIS]:
                        observable.limit_analysis(module_name)

        # save the files to disk and add them as observables of type file
        for f in request.files.getlist('file'):
            logging.debug("recording file {}".format(f.filename))
            #temp_dir = tempfile.mkdtemp(dir=saq.CONFIG.get('api', 'incoming_dir'))
            #_path = os.path.join(temp_dir, secure_filename(f.filename))
            try:
                #if os.path.exists(_path):
                #logging.error("duplicate file name {}".format(_path))
                #abort(400)

                #logging.debug("saving file to {}".format(_path))
                #try:
                #f.save(_path)
                #except Exception as e:
                #logging.error("unable to save file to {}: {}".format(_path, e))
                #abort(400)

                full_path = os.path.join(root.storage_dir, f.filename)

                try:
                    dest_dir = os.path.dirname(full_path)
                    if not os.path.isdir(dest_dir):
                        try:
                            os.makedirs(dest_dir)
                        except Exception as e:
                            logging.error(
                                "unable to create directory {}: {}".format(
                                    dest_dir, e))
                            abort(400)

                    logging.debug("saving file {}".format(full_path))
                    f.save(full_path)

                    # add this as a F_FILE type observable
                    root.add_observable(
                        F_FILE,
                        os.path.relpath(full_path, start=root.storage_dir))

                except Exception as e:
                    logging.error(
                        "unable to copy file from {} to {} for root {}: {}".
                        format(_path, full_path, root, e))
                    abort(400)

            except Exception as e:
                logging.error("unable to deal with file {}: {}".format(f, e))
                report_exception()
                abort(400)

            #finally:
            #try:
            #shutil.rmtree(temp_dir)
            #except Exception as e:
            #logging.error("unable to delete temp dir {}: {}".format(temp_dir, e))

        try:
            if not root.save():
                logging.error("unable to save analysis")
                abort(
                    Response(
                        "an error occured trying to save the alert - review the logs",
                        400))

            # if we received a submission for correlation mode then we go ahead and add it to the database
            if root.analysis_mode == ANALYSIS_MODE_CORRELATION:
                ALERT(root)

            # add this analysis to the workload
            root.schedule()

        except Exception as e:
            logging.error("unable to sync to database: {}".format(e))
            report_exception()
            abort(
                Response(
                    "an error occured trying to save the alert - review the logs",
                    400))

        return json_result({'result': {'uuid': root.uuid}})

    except Exception as e:
        logging.error("error processing submit: {}".format(e))
        report_exception()

        try:
            if os.path.isdir(root.storage_dir):
                logging.info("removing failed submit dir {}".format(
                    root.storage_dir))
                shutil.rmtree(root.storage_dir)
        except Exception as e2:
            logging.error("unable to delete failed submit dir {}: {}".format(
                root.storage_dir, e))

        raise e
Esempio n. 3
0
def _create_analysis(url, reprocess, details, db, c):
    assert isinstance(url, str)
    assert isinstance(reprocess, bool)
    assert isinstance(details, dict)

    sha256_url = hash_url(url)

    if reprocess:
        # if we're reprocessing the url then we clear any existing analysis
        # IF the current analysis has completed
        # it's OK if we delete nothing here
        execute_with_retry("""DELETE FROM cloudphish_analysis_results 
                              WHERE sha256_url = UNHEX(%s) AND status = 'ANALYZED'""", 
                          (sha256_url,), commit=True)

    # if we're at this point it means that when we asked the database for an entry from cloudphish_analysis_results
    # it was empty, OR, we cleared existing analysis
    # however, we could have multiple requests coming in at the same time for the same url
    # so we need to take that into account here

    # first we'll generate our analysis uuid we're going to use
    _uuid = str(uuid.uuid4())

    # so first we try to insert it
    try:
        execute_with_retry(db, c, ["""INSERT INTO cloudphish_analysis_results ( sha256_url, uuid, insert_date ) 
                                      VALUES ( UNHEX(%s), %s, NOW() )""",
                                   """INSERT INTO cloudphish_url_lookup ( sha256_url, url )
                                      VALUES ( UNHEX(%s), %s )"""],
                           [(sha256_url, _uuid),
                            (sha256_url, url)], commit=True)
    except pymysql.err.IntegrityError as e:
        # (<class 'pymysql.err.IntegrityError'>--(1062, "Duplicate entry
        # if we get a duplicate key entry here then it means that an entry was created between when we asked
        # and now
        if e.args[0] != 1062:
            raise e

        # so just return that one that was already created
        return get_cached_analysis(url)

    # at this point we've inserted an entry into cloudphish_analysis_results for this url
    # now at it's processing to the workload

    root = RootAnalysis()
    root.uuid = _uuid
    root.storage_dir = storage_dir_from_uuid(root.uuid)
    root.initialize_storage()
    root.analysis_mode = ANALYSIS_MODE_CLOUDPHISH
    # this is kind of a kludge but,
    # the company_id initially starts out as whatever the default is for this node
    # later, should the analysis turn into an alert, the company_id changes to whatever
    # is stored as the "d" field in the KEY_DETAILS_CONTEXT
    root.company_id = saq.COMPANY_ID
    root.tool = 'ACE - Cloudphish'
    root.tool_instance = saq.SAQ_NODE
    root.alert_type = ANALYSIS_TYPE_CLOUDPHISH
    root.description = 'ACE Cloudphish Detection - {}'.format(url)
    root.event_time = datetime.datetime.now()
    root.details = {
        KEY_DETAILS_URL: url,
        KEY_DETAILS_SHA256_URL: sha256_url,
        # this used to be configurable but it's always true now
        KEY_DETAILS_ALERTABLE: True,
        KEY_DETAILS_CONTEXT: details, # <-- optionally contains the source company_id
    }

    url_observable = root.add_observable(F_URL, url)
    if url_observable:
        url_observable.add_directive(DIRECTIVE_CRAWL)

    root.save()
    root.schedule()

    return get_cached_analysis(url)
Esempio n. 4
0
    def execute_analysis(self, url):

        analysis = url.get_analysis(CloudphishAnalysis)
        if analysis is None:
            analysis = self.create_analysis(url)

        try:
            parsed_url = urlparse(url.value)
            if parsed_url.hostname and '.' not in parsed_url.hostname:
                logging.debug("ignoring invalid FQDN {} in url {}".format(
                    parsed_url.hostname, url.value))
                return False

            # only analyze http, https and ftp schemes
            if parsed_url.scheme not in ['http', 'https', 'ftp']:
                logging.debug(
                    "{} is not a supported scheme for cloudphish".format(
                        parsed_url.scheme))
                return False

        except:
            pass

        # start the clock
        if analysis.query_start is None:
            analysis.query_start = int(time.time())
        #else:
        ## or has the clock expired?
        #if int(time.time()) - analysis.query_start > self.query_timeout:
        #logging.warning("cloudphish query for {} has timed out".format(url.value))
        #analysis.result = RESULT_ERROR
        #analysis.result_details = 'QUERY TIMED OUT'
        #return

        # do we have a local cache result for this url?
        sha256_url = hash_url(url.value)
        cache_dir = os.path.join(self.local_cache_dir, sha256_url[0:2])
        cache_path = os.path.join(cache_dir, sha256_url)
        alert_cache_path = '{}.ace.tar.gz'.format(cache_path)
        used_cache = False
        json_result = None

        # XXX need to fix this correctly
        #if os.path.exists(cache_path):
        if False:
            logging.debug("using local cache results for {}".format(url.value))
            try:
                with open(cache_path, 'r') as fp:
                    json_result = json.load(fp)

                used_cache = True

            except Exception as e:
                logging.warning(
                    "unable to load local cache result for {} from {}: {}".
                    format(url.value, cache_path, e))
                #report_exception()
        else:
            logging.debug("making cloudphish query for {}".format(url.value))

            try:
                response = requests.request(
                    'POST',
                    self.get_submit_url(),
                    params={
                        'url':
                        url.value,
                        'c':
                        self.root.uuid,  # context
                        'i':
                        self.root.company_name if self.root.company_name else
                        saq.CONFIG['global']['company_name'],
                        'd':
                        self.root.company_id if self.root.company_id else
                        saq.CONFIG['global'].getint('company_id'),
                        'a':
                        '1' if self.generate_alert else '0',
                        's':
                        self.engine.name,
                    },
                    data={
                        't':
                        json.dumps(
                            self.engine.get_tracking_information(self.root)),
                    },
                    timeout=self.timeout,
                    proxies=saq.PROXIES if self.use_proxy else {},
                    verify=saq.CA_CHAIN_PATH,
                    stream=False)

            except Exception as e:
                logging.warning("cloudphish request failed: {}".format(e))
                analysis.result = RESULT_ERROR
                analysis.result_details = 'REQUEST FAILED ({})'.format(e)
                return False

            if response.status_code != 200:
                logging.error(
                    "cloudphish returned status {} for {} - {}".format(
                        response.status_code, url.value, response.reason))
                analysis.result = RESULT_ERROR
                analysis.result_details = 'REQUEST FAILED ({}:{})'.format(
                    response.status_code, response.reason)
                return False

            # check the results first
            # if the analysis isn't ready yet then we come back later
            json_result = response.json()
            if json_result[KEY_RESULT] == RESULT_OK:
                if json_result[KEY_STATUS] == STATUS_ANALYZING or json_result[
                        KEY_STATUS] == STATUS_NEW:
                    # deal with the possibility that cloudphish messed up
                    if json_result[KEY_ANALYSIS_RESULT] != SCAN_RESULT_ALERT:
                        # has the clock expired?
                        if int(time.time()
                               ) - analysis.query_start > self.query_timeout:
                            logging.warning(
                                "cloudphish query for {} has timed out".format(
                                    url.value))
                            analysis.result = RESULT_ERROR
                            analysis.result_details = 'QUERY TIMED OUT'
                            return False

                        # otherwise we delay analysis
                        logging.info(
                            "waiting for cloudphish analysis of {} ({})".
                            format(url.value, json_result[KEY_STATUS]))
                        return self.delay_analysis(url,
                                                   analysis,
                                                   seconds=self.frequency)

            # cache the analysis results if we didn't load it from cache
            while True:
                if not os.path.isdir(cache_dir):
                    try:
                        os.mkdir(cache_dir)
                    except Exception as e:
                        logging.error(
                            "unable to create directory {}: {}".format(
                                cache_dir, e))
                        report_exception()
                        break

                cache_path = os.path.join(cache_dir, sha256_url)
                if os.path.exists(cache_path):
                    logging.debug(
                        "cloudphish cache entry {} already exists".format(
                            cache_path))
                    #break

                try:
                    logging.debug(
                        "saving cloudphish cache entry {} for {}".format(
                            cache_path, url.value))
                    with open(cache_path, 'wb') as fp:
                        fp.write(response.content)
                except Exception as e:
                    logging.error(
                        "unable to save cloudphish cache entry for {} at {}: {}"
                        .format(url.value, cache_path, e))
                    report_exception()
                    cache_path = None
                    break

                break

        # save the analysis results
        analysis.query_result = json_result

        # sha256 E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855 is the hash for the empty string
        # we ignore this case
        if analysis.sha256_content and analysis.sha256_content.upper(
        ) == 'E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855':
            logging.debug("ignoring result of 0 length data for {}".format(
                url.value))
            return False

        # what did cloudphish see?
        if analysis.analysis_result == SCAN_RESULT_ALERT:
            temp_dir = None
            try:
                # create a temporary directory to load the alert into
                temp_dir = tempfile.mkdtemp(
                    prefix='cloudphish_',
                    dir=os.path.join(saq.SAQ_HOME,
                                     saq.CONFIG['global']['tmp_dir']))

                # is the alert cached?
                if os.path.exists(alert_cache_path):
                    logging.debug("using alert cache {} for url {}".format(
                        alert_cache_path, url.value))
                    p = Popen(['tar', 'zxf', alert_cache_path, '-C', temp_dir],
                              stdout=PIPE,
                              stderr=PIPE)
                else:
                    # grab the alert it created
                    logging.info("downloading alert info for {}".format(
                        url.value))
                    response = requests.request(
                        'GET',
                        self.get_download_alert_url(),
                        params={'s': analysis.sha256_content},
                        timeout=self.timeout,
                        proxies=saq.PROXIES if self.use_proxy else {},
                        verify=saq.CA_CHAIN_PATH,
                        stream=True)

                    p = Popen(['tar', 'zxf', '-', '-C', temp_dir],
                              stdin=PIPE,
                              stdout=PIPE,
                              stderr=PIPE)

                    alert_cache_fp = None
                    try:
                        alert_cache_fp = open(alert_cache_path, 'wb')
                    except Exception as e:
                        logging.error(
                            "unable to cache alert data for {} at {}: {}".
                            format(url.value, alert_cache_path, e))
                        report_exception()

                    for chunk in response.iter_content(chunk_size=None):
                        if alert_cache_fp:
                            try:
                                alert_cache_fp.write(chunk)
                            except Exception as e:
                                logging.error(
                                    "error writing data to cache alert data for {} at {}: {}"
                                    .format(url.value, alert_cache_path, e))
                                report_exception()

                                try:
                                    alert_cache_fp.close()
                                except:
                                    pass
                                finally:
                                    alert_cache_fp = None

                        p.stdin.write(chunk)

                    if alert_cache_fp:
                        try:
                            alert_cache_fp.close()
                        except:
                            pass

                stdout, stderr = p.communicate()

                if stderr:
                    logging.warning(
                        "tar produced output on stderr for {}: {}".format(
                            url.value, stderr))

                # load the new alert
                cloudphish_alert = RootAnalysis()
                cloudphish_alert.storage_dir = temp_dir
                try:
                    cloudphish_alert.load()
                except Exception as e:
                    logging.warning(
                        "unable to load cloudphish alert for {}: {}".format(
                            url.value, e))
                    # XXX there is a reason for this but I forget what it was

                # merge this alert into the analysis for this url
                self.root.merge(analysis, cloudphish_alert)
            finally:
                # make sure we clean up these temp directories
                try:
                    if temp_dir:
                        shutil.rmtree(temp_dir)
                except Exception as e:
                    logging.error("unable to delete directory {}: {}".format(
                        temp_dir, e))
                    report_exception()

        # are we forcing the download of the URL?
        elif url.has_directive(
                DIRECTIVE_FORCE_DOWNLOAD) and analysis.file_name:
            target_file = os.path.join(self.root.storage_dir,
                                       analysis.file_name)
            if os.path.exists(target_file):
                logging.warning("target file {} exists".format(target_file))
                return

            try:
                logging.info("downloading file {} from {}".format(
                    target_file, url.value))
                response = requests.request(
                    'GET',
                    self.get_download_url(),
                    params={'s': analysis.sha256_content},
                    timeout=self.timeout,
                    proxies=saq.PROXIES if self.use_proxy else {},
                    verify=saq.CA_CHAIN_PATH,
                    stream=True)

                with open(target_file, 'wb') as fp:
                    for chunk in response.iter_content(
                            chunk_size=io.DEFAULT_BUFFER_SIZE):
                        if chunk:
                            fp.write(chunk)

                analysis.add_observable(
                    F_FILE,
                    os.path.relpath(target_file, start=self.root.storage_dir))

            except Exception as e:
                logging.error(
                    "unable to download file {} for url {} from cloudphish: {}"
                    .format(target_file, url.value, e))
                report_exception()

        return True
Esempio n. 5
0
    def process(self, binary_path):
        logging.debug("processing {0}".format(binary_path))
        analysis_start_time = datetime.datetime.now()

        # load the JSON acquired from Carbon Black
        try:
            with open('{0}.json'.format(binary_path), 'r') as fp:
                binary_json = json.load(fp)
        except Exception as e:
            logging.error(
                "unable to parse JSON from Carbon Black for {}: {}".format(
                    binary_path, str(e)))
            return

        # we have to copy the file into the new storage directory for it to be analyzed
        # we use the file name that Carbon Black saw on the endpoint
        try:
            file_name = binary_json['observed_filename'][-1]
        except Exception as e:
            logging.error(
                "cannot determine file name for {}".format(binary_path))
            file_name = 'unknown'

        # we need to figure out if this is a path from a Windows machine or a Unix machine
        # so we count the number of backslashes and forward slashes
        # it's a hack but it should work 99.9% of the time
        if file_name.count('\\') > file_name.count('/'):
            logging.debug("{0} appears to be a windows path".format(file_name))
            file_name = ntpath.basename(file_name)
        else:
            logging.debug("{0} appears to be a unix path".format(file_name))
            file_name = os.path.basename(file_name)

        # figure out when this binary arrived to the carbon black server
        # some times the time does not have the .%fZ at the end for some reason
        time_stamp_format = "%Y-%m-%dT%H:%M:%SZ"
        if '.' in binary_json['server_added_timestamp']:
            time_stamp_format = "%Y-%m-%dT%H:%M:%S.%fZ"
        event_time = datetime.datetime.strptime(
            binary_json['server_added_timestamp'],
            time_stamp_format).replace(tzinfo=pytz.utc)
        event_time = pytz.timezone('US/Eastern').normalize(event_time)

        # create the root analysis object
        root = RootAnalysis()
        # set all of the properties individually
        # XXX fix me
        # it looks like the construction logic doesn't quite work here
        # when loading from the arguments to the constructor, the internal
        # variables with leading underscores get set rather than the properties
        # representing the database columns it was designed that way to allow the
        # JSON stuff to work correctly, so I'll need to revisit that later
        root.tool = 'ACE - Carbon Black Binary Analysis'
        root.tool_instance = socket.gethostname()
        root.alert_type = 'carbon_black_binary'
        root.description = 'Carbon Black binary {0}'.format(file_name)
        root.event_time = event_time
        root.details = binary_json

        # XXX database.Alert does not automatically create this
        root.uuid = str(uuid.uuid4())

        # we use a temporary directory while we process the file
        root.storage_dir = os.path.join(self.work_dir, root.uuid[0:3],
                                        root.uuid)

        root.initialize_storage()

        logging.debug("using storage directory {0} for {1}".format(
            root.storage_dir, binary_path))
        dest_path = os.path.join(root.storage_dir, file_name)

        try:
            shutil.copy(binary_path, dest_path)
        except Exception as e:
            logging.error("unable to copy {0} to {1}: {2}".format(
                binary_path, dest_path, str(e)))
            report_exception()
            return

        # note that the path is relative to the storage directory
        root.add_observable(F_FILE, file_name)

        # the endpoints are stored as an array of host names optionally appended with a pipe and count
        # I assume the number of times that executable has executed on that host?
        for endpoint in binary_json['endpoint']:
            if '|' in endpoint:
                endpoint = endpoint[:endpoint.index('|')]
                root.add_observable(F_HOSTNAME, endpoint)

        for file_path in binary_json['observed_filename']:
            root.add_observable(F_FILE_PATH, file_path)

        # now analyze the file
        try:
            self.analyze(root)
        except Exception as e:
            logging.error("analysis failed for {0}: {1}".format(
                binary_path, str(e)))
            report_exception()

        logging.info("completed {0} analysis time {1}".format(
            binary_path,
            datetime.datetime.now() - analysis_start_time))
Esempio n. 6
0
    def execute_analysis(self, url):
        # don't run cloudphish on cloudphish alerts
        if self.root.alert_type == ANALYSIS_TYPE_CLOUDPHISH:
            return False

        # we keep track of what URLs we've given to cloudphish to process
        if self.state is None:
            self.state = {}
            self.state['requests'] = {}

        analysis = url.get_analysis(CloudphishAnalysis)
        if analysis is None:
            try:
                if len(self.state['requests']
                       ) >= self.cloudphish_request_limit:
                    logging.info(
                        f"skipping cloudphis analysis for {url.value} reached cloudphish limit for {self.root}"
                    )
                    return False

                # do basic URL sanity checks
                parsed_url = urlparse(url.value)

                #if parsed_url.hostname and '.' not in parsed_url.hostname:
                #logging.debug("ignoring invalid FQDN {} in url {}".format(parsed_url.hostname, url.value))
                #return False

                # only analyze http, https and ftp schemes
                if parsed_url.scheme not in ['http', 'https', 'ftp']:
                    logging.debug(
                        "{} is not a supported scheme for cloudphish".format(
                            parsed_url.scheme))
                    return False

                # URL seems ok
                analysis = self.create_analysis(url)

            except Exception as e:
                logging.debug("possible invalid URL: {}: {}".format(
                    url.value, e))
                return False

        # start the clock XXX isn't this built-in to the delay analysis system?
        if analysis.query_start is None:
            analysis.query_start = int(time.time())
        #else:
        ## or has the clock expired?
        #if int(time.time()) - analysis.query_start > self.query_timeout:
        #logging.warning("cloudphish query for {} has timed out".format(url.value))
        #analysis.result = RESULT_ERROR
        #analysis.result_details = 'QUERY TIMED OUT'
        #return

        # do we have a local cache result for this url?
        sha256_url = hash_url(url.value)
        json_result = None

        # once we decide on a cloudphish server to use we need to keep using the same one
        # for the same url
        if 'cloudphish_server' in self.state:
            cloudphish_server = self.state['cloudphish_server']
        else:
            cloudphish_server = self.get_cloudphish_server()
            self.state['cloudphish_server'] = cloudphish_server

        logging.debug("making cloudphish query against {} for {}".format(
            cloudphish_server, url.value))

        try:
            context = {
                'c': self.root.uuid,  # context
                't': None,  # tracking (see below)
            }

            tracking = []
            for o in self.root.all_observables:
                if o.has_directive(DIRECTIVE_TRACKED):
                    tracking.append({
                        'type':
                        o.type,
                        'value':
                        o.value,
                        'time':
                        None if o.time is None else
                        o.time.strftime(event_time_format_json_tz)
                    })

            context['t'] = json.dumps(tracking, cls=_JSONEncoder)
            response = ace_api.cloudphish_submit(
                url.value,
                context=context,
                remote_host=cloudphish_server,
                ssl_verification=saq.CA_CHAIN_PATH,
                proxies=saq.PROXIES if self.use_proxy else None,
                timeout=self.timeout)

            logging.debug(
                "got result {} for cloudphish query @ {} for {}".format(
                    response, cloudphish_server, url.value))

        except Exception as e:
            logging.warning("cloudphish request failed: {}".format(e))
            analysis.result = RESULT_ERROR
            analysis.result_details = 'REQUEST FAILED ({})'.format(e)
            return True

        # check the results first
        # if the analysis isn't ready yet then we come back later
        if response[KEY_RESULT] == RESULT_OK:
            if response[KEY_STATUS] == STATUS_ANALYZING or response[
                    KEY_STATUS] == STATUS_NEW:
                # keep track of the requests that resulted in work for ACE
                self.state['requests'][url.value] = True

                # otherwise we delay analysis
                logging.info(
                    "waiting for cloudphish analysis of {} ({})".format(
                        url.value, response[KEY_STATUS]))

                if not self.delay_analysis(url,
                                           analysis,
                                           seconds=self.frequency,
                                           timeout_seconds=self.query_timeout):
                    # analysis timed out
                    analysis.result = RESULT_ERROR
                    analysis.result_details = 'QUERY TIMED OUT'
                    return True

        # sha256 E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855 is the hash for the empty string
        # we ignore this case
        if response[KEY_SHA256_CONTENT] and response[KEY_SHA256_CONTENT].upper() == \
        'E3B0C44298FC1C149AFBF4C8996FB92427AE41E4649B934CA495991B7852B855':
            logging.debug("ignoring result of 0 length data for {}".format(
                url.value))
            analysis.result = RESULT_ERROR
            analysis.result_details = 'EMPTY CONTENT'
            return True

        # save the analysis results
        analysis.query_result = response

        # did cloudphish generate an alert?
        if analysis.analysis_result == SCAN_RESULT_ALERT:
            # if cloudphish generated an alert then we'll need to wait for the alert correlation to finish
            # TODO

            temp_dir = None
            try:
                # create a temporary directory to load the alert into
                temp_dir = tempfile.mkdtemp(prefix='cloudphish_',
                                            dir=saq.TEMP_DIR)

                # grab the alert it created
                logging.info("downloading alert info for {}".format(url.value))
                ace_api.download(
                    analysis.uuid,
                    temp_dir,
                    remote_host=cloudphish_server,
                    ssl_verification=saq.CA_CHAIN_PATH,
                    proxies=saq.PROXIES if self.use_proxy else None,
                    timeout=self.timeout)

                #response = requests.request('GET', self.get_download_alert_url(),
                #params={ 's': analysis.sha256_content },
                #timeout=self.timeout,
                #proxies=saq.PROXIES if self.use_proxy else {},
                #verify=saq.CA_CHAIN_PATH,
                #stream=True)

                # load the new alert
                cloudphish_alert = RootAnalysis()
                cloudphish_alert.storage_dir = temp_dir
                try:
                    cloudphish_alert.load()
                except Exception as e:
                    logging.warning(
                        "unable to load cloudphish alert for {}: {}".format(
                            url.value, e))
                    # XXX there is a reason for this but I forget what it was lol

                # merge this alert into the analysis for this url
                self.root.merge(analysis, cloudphish_alert)

            finally:
                # make sure we clean up these temp directories
                try:
                    if temp_dir:
                        shutil.rmtree(temp_dir)
                except Exception as e:
                    logging.error("unable to delete directory {}: {}".format(
                        temp_dir, e))
                    report_exception()

        # are we forcing the download of the URL?
        elif url.has_directive(
                DIRECTIVE_FORCE_DOWNLOAD) and analysis.file_name:
            # TODO fix this file naming scheme
            target_file = os.path.join(self.root.storage_dir,
                                       analysis.file_name)
            if os.path.exists(target_file):
                logging.warning("target file {} exists".format(target_file))
                return True

            try:
                logging.info("downloading file {} from {}".format(
                    target_file, url.value))
                ace_api.cloudphish_download(
                    url=url.value,
                    output_path=target_file,
                    remote_host=cloudphish_server,
                    ssl_verification=saq.CA_CHAIN_PATH,
                    proxies=saq.PROXIES if self.use_proxy else None,
                    timeout=self.timeout)

                #response = requests.request('GET', self.get_download_url(),
                #params={ 's': analysis.sha256_content },
                #timeout=self.timeout,
                #proxies=saq.PROXIES if self.use_proxy else {},
                #verify=saq.CA_CHAIN_PATH,
                #stream=True)

                #with open(target_file, 'wb') as fp:
                #for chunk in response.iter_content(chunk_size=io.DEFAULT_BUFFER_SIZE):
                #if chunk:
                #fp.write(chunk)

                analysis.add_observable(
                    F_FILE,
                    os.path.relpath(target_file, start=self.root.storage_dir))

            except Exception as e:
                logging.error(
                    "unable to download file {} for url {} from cloudphish: {}"
                    .format(target_file, url.value, e))
                report_exception()

        return True
Esempio n. 7
0
    def post_smtp_analysis(self, root):
        from saq.modules.email import EmailAnalysis, SMTPStreamAnalysis, \
                                      BrotexSMTPPackageAnalysis, \
                                      KEY_ENVELOPES_MAIL_FROM, KEY_ENVELOPES_RCPT_TO

        # get the paths to the email scanning system
        #email_scanner_dir = saq.CONFIG['engine_email_scanner']['collection_dir']
        email_scanner_dir = self.collection_dir

        # create a new analysis root for each email analysis we found
        for analysis in root.all_analysis:
            if not isinstance(analysis, EmailAnalysis) or not analysis.email:
                continue

            env_mail_from = None
            env_rcpt_to = None
            connection_id = None

            # the observable for this EmailAnalysis will be a file
            email_file = analysis.observable
            if email_file.type != F_FILE:
                logging.warning(
                    "the observable for {} should be F_FILE but it is {}".
                    format(analysis, email_file.type))
            else:
                # this will be either an rfc822 file generated by the SMTPStreamAnalysis module
                # (which will have the envelope information)
                # OR it is a "broken stream" file, which does not
                stream_analysis = [
                    a for a in root.all_analysis
                    if isinstance(a, SMTPStreamAnalysis)
                    and email_file in a.observables
                ]
                if len(stream_analysis) > 1:
                    logging.error("there should not be more than one of these")
                elif len(stream_analysis) == 1:
                    stream_analysis = stream_analysis[0]
                    logging.debug(
                        "detected stream analysis for {}".format(email_file))
                    # get the MAIL FROM and RCPT TO from this
                    if not analysis.env_mail_from:
                        if email_file.value in stream_analysis.envelopes:
                            analysis.env_mail_from = stream_analysis.envelopes[
                                email_file.value][KEY_ENVELOPES_MAIL_FROM]
                    if not analysis.env_rcpt_to:
                        if email_file.value in stream_analysis.envelopes:
                            analysis.env_rcpt_to = stream_analysis.envelopes[
                                email_file.value][KEY_ENVELOPES_RCPT_TO]

                    # get the original brotex package file that the stream came from
                    stream_package = stream_analysis.observable
                    # get the BrotexSMTPPackageAnalysis for this stream package so we can get the connection id
                    package_analysis = [
                        a for a in root.all_analysis
                        if isinstance(a, BrotexSMTPPackageAnalysis)
                        and stream_package in a.observables
                    ]
                    if len(package_analysis) > 1:
                        logging.error(
                            "there should not be more than one of these!")
                    elif len(package_analysis) == 1:
                        package_analysis = package_analysis[0]
                        connection_id = package_analysis.connection_id

                # if we could not find the stream, we will want to find the brotex smtp package so we can have the connection id
                package_analysis = [
                    a for a in root.all_analysis
                    if isinstance(a, BrotexSMTPPackageAnalysis)
                    and email_file in a.observables
                ]
                if len(package_analysis) > 1:
                    logging.error(
                        "there should not be more than one of these!")
                elif len(package_analysis) == 1:
                    package_analysis = package_analysis[0]
                    connection_id = package_analysis.connection_id

            subroot = RootAnalysis()
            subroot.company_name = root.company_name
            subroot.tool = root.tool
            subroot.tool_instance = root.tool_instance
            subroot.alert_type = root.alert_type
            subroot.description = 'Brotex SMTP Stream Detection - '

            if analysis.decoded_subject:
                subroot.description += '{} '.format(analysis.decoded_subject)
            elif analysis.subject:
                subroot.description += '{} '.format(analysis.subject)
            else:
                subroot.description += '(no subject) '
                if analysis.env_mail_from:
                    subroot.description += 'From {} '.format(
                        normalize_email_address(analysis.env_mail_from))
                elif analysis.mail_from:
                    subroot.description += 'From {} '.format(
                        normalize_email_address(analysis.mail_from))
                if analysis.env_rcpt_to:
                    if len(analysis.env_rcpt_to) == 1:
                        subroot.description += 'To {} '.format(
                            analysis.env_rcpt_to[0])
                    else:
                        subroot.description += 'To ({} recipients) '.format(
                            len(analysis.env_rcpt_to))
                elif analysis.mail_to:
                    if isinstance(analysis.mail_to,
                                  list):  # XXX I think this *has* to be a list
                        if len(analysis.mail_to) == 1:
                            subroot.description += 'To {} '.format(
                                analysis.mail_to[0])
                        else:
                            subroot.description += 'To ({} recipients) '.format(
                                len(analysis.mail_to))
                    else:
                        subroot.description += 'To {} '.format(
                            analysis.mail_to)

            subroot.event_time = root.event_time
            subroot.details = analysis.details
            subroot.details['connection_id'] = connection_id
            subroot.uuid = str(uuid.uuid4())

            # we use a temporary directory while we process the file
            subroot.storage_dir = os.path.join(email_scanner_dir,
                                               subroot.uuid[0:3], subroot.uuid)

            subroot.initialize_storage()

            # copy the original file
            src_path = os.path.join(root.storage_dir,
                                    analysis.observable.value)
            dest_path = os.path.join(subroot.storage_dir,
                                     analysis.observable.value)

            subroot.add_observable(
                F_FILE, os.path.relpath(dest_path, start=subroot.storage_dir))

            # so the EmailAnalysis that will trigger on the RFC822 file (or whatever you have)
            # will *not* have the envelope headers
            # so we do that here in the main alert
            env_mail_from = None
            if analysis.env_mail_from:
                # this is to handle this: <*****@*****.**> SIZE=80280
                # XXX assuming there can be no spaces in an email address
                env_mail_from = analysis.env_mail_from.split(' ', 1)
                env_mail_from = env_mail_from[0]

                # is this not the empty indicator?
                if env_mail_from != '<>':
                    env_mail_from = normalize_email_address(env_mail_from)
                    subroot.add_observable(F_EMAIL_ADDRESS, env_mail_from)

            if analysis.env_rcpt_to:
                for address in analysis.env_rcpt_to:
                    address = normalize_email_address(address)
                    if address:
                        subroot.add_observable(F_EMAIL_ADDRESS, address)
                        if env_mail_from:
                            subroot.add_observable(
                                F_EMAIL_CONVERSATION,
                                create_email_conversation(
                                    env_mail_from, address))

            try:
                subroot.save()
            except Exception as e:
                logging.error("unable to save {}: {}".format(alert, e))
                report_exception()
                continue

            # TODO also add the stream and update any envelopment headers and stuff

            try:
                logging.debug("copying {} to {}".format(src_path, dest_path))
                shutil.copy(src_path, dest_path)
            except Exception as e:
                logging.error("unable to copy {} to {}: {}".format(
                    src_path, dest_path, e))
                report_exception()
                continue

            # submit the path to the database of the email scanner for analysis
            try:
                submit_sql_work_item('EMAIL', subroot.storage_dir)
            except Exception as e:
                logging.error("unable to add work item: {}".format(e))
                report_exception()
                continue
Esempio n. 8
0
    def post_http_analysis(self, root):

        from saq.modules.http import BrotexHTTPPackageAnalysis, \
                                     KEY_TIME, \
                                     KEY_SRC_IP, \
                                     KEY_SRC_PORT, \
                                     KEY_DEST_IP, \
                                     KEY_DEST_PORT, \
                                     KEY_METHOD, \
                                     KEY_HOST, \
                                     KEY_URI, \
                                     KEY_REFERRER, \
                                     KEY_USER_AGENT, \
                                     KEY_STATUS_CODE, \
                                     KEY_FILES

        # get the paths to the http scanning system
        #http_scanner_dir = saq.CONFIG['engine_http_scanner']['collection_dir']
        http_scanner_dir = self.collection_dir

        analysis = None
        for a in root.all_analysis:
            if isinstance(a, BrotexHTTPPackageAnalysis) and a.requests:
                analysis = a
                break

        # this can happen if the request was whitelisted
        if analysis:
            for request in analysis.requests:
                subroot = RootAnalysis()
                subroot.company_name = root.company_name
                subroot.tool = root.tool
                subroot.tool_instance = root.tool_instance
                subroot.alert_type = root.alert_type
                subroot.description = "Brotex HTTP Stream Detection - "
                if request[KEY_HOST]:
                    subroot.description += " {} ".format(request[KEY_HOST])

                if request[KEY_DEST_IP]:
                    subroot.description += " ({}) ".format(
                        request[KEY_DEST_IP])

                if request[KEY_URI]:
                    # don't want to show all the fragments and query params
                    try:
                        parts = urlparse(request[KEY_URI])
                        subroot.description += parts.path
                    except Exception as e:
                        logging.warning("unable to parse {}: {}".format(
                            request[KEY_URI], e))
                        subroot.description += request[KEY_URI]

                subroot.event_time = root.event_time
                subroot.details = request
                subroot.uuid = str(uuid.uuid4())

                # we use a temporary directory while we process the file
                subroot.storage_dir = os.path.join(http_scanner_dir,
                                                   subroot.uuid[0:3],
                                                   subroot.uuid)

                subroot.initialize_storage()

                if request[KEY_SRC_IP]:
                    subroot.add_observable(F_IPV4, request[KEY_SRC_IP])

                if request[KEY_DEST_IP]:
                    subroot.add_observable(F_IPV4, request[KEY_DEST_IP])

                if request[KEY_SRC_IP] and request[KEY_DEST_IP]:
                    subroot.add_observable(
                        F_IPV4_CONVERSATION,
                        create_ipv4_conversation(request[KEY_SRC_IP],
                                                 request[KEY_DEST_IP]))

                if request[KEY_HOST]:
                    subroot.add_observable(F_FQDN, request[KEY_HOST])

                if request[KEY_URI]:
                    subroot.add_observable(F_URL, request[KEY_URI])

                if request[KEY_REFERRER]:
                    subroot.add_observable(F_URL, request[KEY_REFERRER])

                for file_path in request[KEY_FILES]:
                    src_path = os.path.join(root.storage_dir, file_path)
                    dest_path = os.path.join(subroot.storage_dir,
                                             os.path.basename(file_path))
                    try:
                        shutil.copy(src_path, dest_path)
                    except Exception as e:
                        logging.error("unable to copy {} to {}: {}".format(
                            src_path, dest_path, e))
                        report_exception()

                    subroot.add_observable(
                        F_FILE,
                        os.path.basename(file_path))  # already relative

                try:
                    subroot.save()
                except Exception as e:
                    logging.error("unable to save {}: {}".format(alert, e))
                    report_exception()
                    continue

                # submit the path to the database of the email scanner for analysis
                try:
                    submit_sql_work_item(
                        'HTTP', subroot.storage_dir)  # XXX hard coded constant
                except:
                    # failure is already logged inside the call
                    continue
Esempio n. 9
0
    def process(self, process):
        logging.debug("processing json")
        analysis_start_time = datetime.datetime.now()

        try:
            file_path = process['cmdline'].split('"')[-2]
        except:
            logging.error("cannot determine file path for {}".format(
                process['cmdline']))
            file_path = 'unknown'

        try:
            file_name = file_path.split('\\')[-1]
        except:
            logging.error(
                "cannot determine file name for {}".format(file_path))
            file_name = 'unknown'

        # figure out when this binary arrived to the carbon black server
        # some times the time does not have the .%fZ at the end for some reason
        time_stamp_format = "%Y-%m-%dT%H:%M:%SZ"
        if '.' in process['start']:
            time_stamp_format = "%Y-%m-%dT%H:%M:%S.%fZ"
        event_time = datetime.datetime.strptime(
            process['start'], time_stamp_format).replace(tzinfo=pytz.utc)
        event_time = pytz.timezone('US/Eastern').normalize(event_time)

        # create the root analysis object
        root = RootAnalysis()
        # set all of the properties individually
        # XXX fix me
        # it looks like the construction logic doesn't quite work here
        # when loading from the arguments to the constructor, the internal
        # variables with leading underscores get set rather than the properties
        # representing the database columns it was designed that way to allow the
        # JSON stuff to work correctly, so I'll need to revisit that later
        root.tool = 'ACE - Carbon Black Internet Office File Analysis'
        root.tool_instance = socket.gethostname()
        root.alert_type = 'carbon_black_internet_office_file'
        root.description = 'Carbon Black Internet Office File {0}'.format(
            file_name)
        root.event_time = event_time
        root.details = process

        # XXX database.Alert does not automatically create this
        root.uuid = str(uuid.uuid4())

        # we use a temporary directory while we process the file
        root.storage_dir = os.path.join(self.work_dir, root.uuid[0:3],
                                        root.uuid)
        root.initialize_storage()

        # note that the path is relative to the storage directory
        fl_observable = root.add_observable(
            F_FILE_LOCATION,
            create_file_location(process['hostname'], file_path))
        if fl_observable: fl_observable.add_directive(DIRECTIVE_COLLECT_FILE)
        root.add_observable(F_FILE_PATH, file_path)
        root.add_observable(F_FILE_NAME, file_name)
        root.add_observable(F_HOSTNAME, process['hostname'])

        # now analyze the file
        try:
            self.analyze(root)
        except Exception as e:
            logging.error("analysis failed for {}: {}".format(
                process['id'], e))
            report_exception()

        logging.info("completed {} analysis time {}".format(
            process['id'],
            datetime.datetime.now() - analysis_start_time))
Esempio n. 10
0
    def process(self, work_item):
        url, alertable, details = work_item
        # any other result means we should process it
        logging.info("processing url {} (alertable {})".format(url, alertable))
        #logging.debug("details = {}".format(details))

        sha256_url = hash_url(url)

        # create or update our analysis entry
        with get_db_connection('cloudphish') as db:
            c = db.cursor()
            c.execute(
                """UPDATE analysis_results SET status = %s WHERE sha256_url = UNHEX(%s)""",
                (STATUS_ANALYZING, sha256_url))
            db.commit()

        root = RootAnalysis()
        # create a temporary storage directory for this work
        root.tool = 'ACE - Cloudphish'
        root.tool_instance = self.location
        root.alert_type = 'cloudphish'
        root.description = 'ACE Cloudphish Detection - {}'.format(url)
        root.event_time = datetime.datetime.now()
        root.uuid = str(uuid.uuid4())
        root.storage_dir = os.path.join(self.work_dir, root.uuid[0:2],
                                        root.uuid)
        root.initialize_storage()

        if 'i' in details:
            root.company_name = details['i']

        if 'd' in details:
            root.company_id = details['d']

        root.details = {
            KEY_DETAILS_URL: url,
            KEY_DETAILS_SHA256_URL: sha256_url,
            KEY_DETAILS_ALERTABLE: alertable,
            KEY_DETAILS_CONTEXT: details,
        }

        url_observable = root.add_observable(F_URL, url)
        if url_observable is None:
            logging.error("request for invalid url received: {}".format(url))
            return

        url_observable.add_directive(DIRECTIVE_CRAWL)

        # the "details context" can also contain observables
        for key in root.details[KEY_DETAILS_CONTEXT].keys():
            if key in VALID_OBSERVABLE_TYPES:
                root.add_observable(key,
                                    root.details[KEY_DETAILS_CONTEXT][key])

        try:
            self.analyze(root)
        except Exception as e:
            logging.error("analysis failed for {}: {}".format(url, e))
            report_exception()

            with get_db_connection('cloudphish') as db:
                c = db.cursor()
                c.execute(
                    """UPDATE analysis_results SET 
                                 result = %s,
                                 status = %s,
                                 http_result_code = NULL,
                                 http_message = NULL,
                                 sha256_content = NULL
                             WHERE sha256_url = UNHEX(%s)""",
                    (SCAN_RESULT_ERROR, STATUS_ANALYZED, sha256_url))
                db.commit()
                return