class CymonAPIService(ServiceBase):
    SERVICE_CATEGORY        = "External"
    SERVICE_DESCRIPTION     = "This service checks the file hash against the Cymon API"
    SERVICE_VERSION         = "1"

    SERVICE_ACCEPTS         = ".*"
    SERVICE_REVISION        = ServiceBase.parse_revision("$Id$")
    SERVICE_ENABLED         = True

    SERVICE_IS_EXTERNAL     = True
    SERVICE_STAGE           = "CORE"
    SERVICE_DEFAULT_CONFIG  = {
        'API_KEY': '',
        'BASE_URL': 'https://api.cymon.io/v2/ioc/search/sha256'
    }

    def __init__(self, cfg=None):
        super(CymonAPIService, self).__init__(cfg)
        self.api_key = self.cfg.get('API_KEY')


    # `requests` is needed to communicate with the API
    def import_service_deps(self):
        global requests
        import requests


    def start(self):
        self.log.debug("CymonAPIService Started")

    
    def execute(self, request):
        response = self.process_file(request)
        result   = self.parse_results(response)
        requests.result = result




    def process_file(self, request):
        url = self.cfg.get('BASE_URL') + request.sha256
        params = requests.get(url)

        try:
            json_response = r.json()
        except ValueError:
            self.log.warn(
                "Invalid response from Cymon, ",
                "HTTP Code: %s",
                "content length: %i",
                "headers: %s" % (r.status_code, len(r.content), repr(r.headers))    
            )
            if len(r.content) == 0:
                raise RecoverableError("Cymon didn't return a JSON object, HTTP code %s" % r.status_code)
            raise
        return json_response


    def parse_results(self, request):
        pass
Example #2
0
class Characterize(ServiceBase):
    """ Basic File Characterization.

    Currently characterize only generates file partition entropy data.
    """

    SERVICE_ACCEPTS = '.*'
    SERVICE_CATEGORY = 'Static Analysis'
    SERVICE_DESCRIPTION = "Partitions the file and calculates visual entropy for each partition."
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: b605ff7747fa3638fab5e7e67045485dd0f00fb6 $')
    SERVICE_VERSION = '1'
    SERVICE_CPU_CORES = 0.25
    SERVICE_RAM_MB = 256

    def __init__(self, cfg=None):
        super(Characterize, self).__init__(cfg)

    def execute(self, request):
        path = request.download()
        with open(path, 'rb') as fin:
            (entropy, part_entropies) = calculate_partition_entropy(fin)

        entropy_graph_data = {
            'type': 'colormap',
            'data': {
                'domain': [0, 8],
                'values': part_entropies
            }
        }
        section = ResultSection(SCORE.NULL,
                                'Entropy.\tEntire File: {}'.format(
                                    round(entropy, 3)),
                                self.SERVICE_CLASSIFICATION,
                                body_format=TEXT_FORMAT.GRAPH_DATA,
                                body=json.dumps(entropy_graph_data))
        result = Result()
        result.add_section(section)
        request.result = result
Example #3
0
class Avg(ServiceBase):
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: d7a9d50b72c5814f42d5d8791048317ebf10dbae $')
    SERVICE_VERSION = '1'
    SERVICE_DEFAULT_CONFIG = {
        'AUTOUPDATE': True,
        'AVG_PATH': '/usr/bin/avgscan',
        'UPDATER_OFFLINE_URL': None
    }
    SERVICE_DESCRIPTION = "This services wraps AVG's linux command line scanner 'avgscan'"
    SERVICE_CPU_CORES = 0.5
    SERVICE_RAM_MB = 256
    SERVICE_CATEGORY = "Antivirus"

    def __init__(self, cfg=None):
        super(Avg, self).__init__(cfg)
        self.avg_path = self.cfg.get('AVG_PATH')
        if not os.path.exists(self.avg_path):
            self.log.error(
                "AVG not found at %s. Avg service will likely be non functional.",
                self.avg_path)
        self._av_info = ''
        self.last_update = None

    def _fetch_raw_version(self):
        proc = subprocess.Popen([self.avg_path, 'fakearg'],
                                stdout=subprocess.PIPE,
                                stderr=subprocess.PIPE)
        out, _err = proc.communicate()
        av_date = None
        av_version = None
        for line in out.splitlines():
            if "Virus database version" in line:
                av_version = line.split(': ')[1]
            elif "Virus database release date" in line:
                av_date = line.split(': ')[1].strip()
                dt = parse(av_date)  # pylint: disable=E0602
                av_date = dt.strftime("%Y%m%d")
            if av_version and av_date:
                break
        return av_date, av_version, out

    def execute(self, request):
        request.result = Result()
        request.set_service_context(self._av_info)
        filename = request.download()

        # Generate the temporary resulting filename which AVG is going to dump the results in
        out_file = os.path.join(self.working_directory, "scanning_results.txt")

        cmd = [
            self.avg_path, "-H", "-p", "-o", "-w", "-b", "-j", "-a",
            "--report=%s" % out_file, filename
        ]
        devnull = open('/dev/null', 'wb')
        proc = subprocess.Popen(cmd,
                                stdout=devnull,
                                stderr=devnull,
                                cwd=os.path.dirname(self.avg_path))
        proc.wait()

        try:
            # AVG does not support unicode file names, so any results it returns for these files will be filtered out
            out_file_handle = codecs.open(out_file,
                                          mode='rb',
                                          encoding="utf-8",
                                          errors="replace")
            output = out_file_handle.read()
            out_file_handle.close()

            # 2- Parse the output and fill in the result objects
            self.parse_results_seq(output, request.result,
                                   len(self.working_directory))

        except Exception, scan_exception:
            self.log.error("AVG scanning was not completed: %s" %
                           str(scan_exception))
            raise
Example #4
0
class Unpacker(ServiceBase):
    SERVICE_ACCEPTS = 'executable/*'
    SERVICE_CATEGORY = "Static Analysis"
    SERVICE_DESCRIPTION = "This service unpacks UPX packed executables for further analysis."
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: b41b38b69d20023571ec9584eee7e803b119755f $')
    SERVICE_STAGE = 'SECONDARY'
    SERVICE_VERSION = '1'
    SERVICE_CPU_CORES = 0.5
    SERVICE_RAM_MB = 256

    SERVICE_DEFAULT_CONFIG = {
        'UPX_EXE': r'/usr/bin/upx',
    }

    def __init__(self, cfg=None):
        super(Unpacker, self).__init__(cfg)
        self.upx_exe = self.cfg.get('UPX_EXE')
        if not os.path.exists(self.upx_exe):
            raise Exception('UPX executable not found on system: %s',
                            self.upx_exe)

    def execute(self, request):
        request.result = Result()
        uresult = self._unpack(request, ['upx'])
        if uresult.ok and uresult.localpath:
            request.add_extracted(uresult.localpath,
                                  'Unpacked from %s' % request.srl,
                                  display_name=uresult.displayname)
            request.result.add_section(
                ResultSection(
                    SCORE.NULL, "%s successfully unpacked!" %
                    (os.path.basename(uresult.displayname)),
                    self.SERVICE_CLASSIFICATION))

    def _unpack_upx(self, packedfile, outputpath, displayname):
        # Test the file to see if UPX agrees with our identification.
        p = subprocess.Popen((self.upx_exe, '-t', packedfile),
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        (stdout, stderr) = p.communicate()

        if '[OK]' in stdout and 'Tested 1 file' in stdout:
            p = subprocess.Popen(
                (self.upx_exe, '-d', '-o', outputpath, packedfile),
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)
            (stdout, stderr) = p.communicate()

            if 'Unpacked 1 file' in stdout:
                # successfully unpacked.
                return UnpackResult(True, outputpath, displayname,
                                    {'stdout': stdout[:1024]})
        else:
            self.log.info(
                'UPX extractor said this file was not UPX packed:\n%s\n%s',
                stdout[:1024], stderr[:1024])
        # UPX unpacking is failure prone due to the number of samples that are identified as UPX
        # but are really some minor variant. For that reason we can't really fail the result
        # every time upx has problems with a file.
        return UnpackResult(True, None, None, None)

    def _unpack(self, request, packer_names):
        for name in packer_names:
            if 'upx' in name.lower():
                packedfile = request.download()
                unpackedfile = packedfile + '.unUPX'
                displayname = os.path.basename(request.path) + '.unUPX'
                return self._unpack_upx(packedfile, unpackedfile, displayname)

        return UnpackResult(True, None, None, None)
Example #5
0
class VirusTotalStatic(ServiceBase):
    SERVICE_CATEGORY = "External"
    SERVICE_DESCRIPTION = "This service checks the file hash to see if there's an existing VirusTotal report."
    SERVICE_ENABLED = False
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: 4d57064b3f6c68e285a6105fc52cbcad407cb0e1 $')
    SERVICE_STAGE = "CORE"
    SERVICE_TIMEOUT = 60
    SERVICE_IS_EXTERNAL = True
    SERVICE_DEFAULT_CONFIG = {
        'API_KEY': '',
        'BASE_URL': 'https://www.virustotal.com/vtapi/v2/'
    }

    def __init__(self, cfg=None):
        super(VirusTotalStatic, self).__init__(cfg)
        self.api_key = self.cfg.get('API_KEY')

    # noinspection PyGlobalUndefined,PyUnresolvedReferences
    def import_service_deps(self):
        global requests
        import requests

    def start(self):
        self.log.debug("VirusTotalStatic service started")

    def execute(self, request):
        response = self.scan_file(request)
        result = self.parse_results(response)
        request.result = result

    def scan_file(self, request):

        # Check to see if the file has been seen before
        url = self.cfg.get('BASE_URL') + "file/report"
        params = {'apikey': self.api_key, 'resource': request.sha256}
        r = requests.post(url, params)
        try:
            json_response = r.json()
        except ValueError:
            self.log.warn("Invalid response from VirusTotal, "
                          "HTTP code: %s, "
                          "content length: %i, "
                          "headers: %s" %
                          (r.status_code, len(r.content), repr(r.headers)))
            if len(r.content) == 0:
                raise RecoverableError(
                    "VirusTotal didn't return a JSON object, HTTP code %s" %
                    r.status_code)
            raise
        return json_response

    def parse_results(self, response):
        res = Result()
        response = response.get('results', response)

        if response is not None and response.get('response_code') == 1:
            av_hits = ResultSection(title_text='Anti-Virus Detections')
            url_section = ResultSection(
                SCORE.NULL,
                'Virus total report permalink',
                self.SERVICE_CLASSIFICATION,
                body_format=TEXT_FORMAT.URL,
                body=json.dumps({"url": response.get('permalink')}))
            res.add_section(url_section)

            scans = response.get('scans', response)
            av_hits.add_line(
                'Found %d AV hit(s) from %d scans.' %
                (response.get('positives'), response.get('total')))
            for majorkey, subdict in sorted(scans.iteritems()):
                if subdict['detected']:
                    virus_name = subdict['result']
                    res.append_tag(
                        VirusHitTag(virus_name,
                                    context="scanner:%s" % majorkey))
                    av_hits.add_section(
                        AvHitSection(majorkey, virus_name, SCORE.SURE))
            res.add_result(av_hits)

        return res
Example #6
0
class Yara(ServiceBase):
    SERVICE_CATEGORY = 'Static Analysis'
    SERVICE_DESCRIPTION = "This services runs all DEPLOYED and NOISY signatures on submitted files. NOISY rules " \
                          "are reported but do not influence the score. DEPLOYED rules score according to their " \
                          "rule group (implant => 1000 | exploit & tool => 500 | technique => 100 | info => 0)."
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision('$Id: 66d20671b5f37f91cf5f8f7c20fbc201462f1d12 $')
    SERVICE_VERSION = '1'
    SERVICE_DEFAULT_CONFIG = {
        "USE_RIAK_FOR_RULES": True,
        "RULE_PATH": 'rules.yar',
        "SIGNATURE_USER": '******',
        "SIGNATURE_PASS": '******',
        "SIGNATURE_URL": 'https://*****:*****@ %s]%s'" % (
                entry_name,
                string_value,
                string_offset,
                ' (' + str(count) + 'x)' if count > 1 else ''
            )
            section.add_line(string_hit)

        for entry_name, result_list in result_dict.iteritems():
            for result in result_list[:5]:
                string_hit = "Found %s string: '%s' [@ %s]%s" % (
                    entry_name,
                    result[0],
                    result[1],
                    ' (' + str(result[2]) + 'x)' if result[2] > 1 else ''
                )
                section.add_line(string_hit)
            more = len(result_list[5:])
            if more:
                section.add_line("Found %s string %d more time%s" % (
                    entry_name, more, 's' if more > 1 else ''))

    def _compile_rules(self, rules_txt):
        tmp_dir = tempfile.mkdtemp(dir='/tmp')
        try:
            # Extract the first line of the rules which should look like this:
            # // Signatures last updated: LAST_UPDATE_IN_ISO_FORMAT
            first_line, clean_data = rules_txt.split('\n', 1)
            prefix = '// Signatures last updated: '

            if first_line.startswith(prefix):
                last_update = first_line.replace(prefix, '')
            else:
                self.log.warning(
                    "Couldn't read last update time from %s", rules_txt[:40]
                )
                last_update = now_as_iso()
                clean_data = rules_txt

            rules_file = os.path.join(tmp_dir, 'rules.yar')
            with open(rules_file, 'w') as f:
                f.write(rules_txt)
            try:
                validate = YaraValidator(externals=self.get_yara_externals, logger=self.log)
                edited = validate.validate_rules(rules_file, datastore=True)
            except Exception as e:
                raise e
            # Grab the final output if Yara Validator found problem rules
            if edited:
                with open(rules_file, 'r') as f:
                    sdata = f.read()
                first_line, clean_data = sdata.split('\n', 1)
                if first_line.startswith(prefix):
                    last_update = first_line.replace(prefix, '')
                else:
                    last_update = now_as_iso()
                    clean_data = sdata

            rules = yara.compile(rules_file, externals=self.get_yara_externals)
            rules_md5 = md5(clean_data).hexdigest()
            return last_update, rules, rules_md5
        except Exception as e:
            raise e
        finally:
            shutil.rmtree(tmp_dir)

    def _extract_result_from_matches(self, matches):
        result = Result(default_usage=TAG_USAGE.CORRELATION)
        for match in matches:
            self._add_resultinfo_for_match(result, match)
        return result

    @staticmethod
    def _get_non_wide_char(string):
        res = []
        for (i, c) in enumerate(string):
            if i % 2 == 0:
                res.append(c)

        return ''.join(res)

    @staticmethod
    def _is_wide_char(string):
        if len(string) >= 2 and len(string) % 2 == 0:
            is_wide_char = True
            for (i, c) in enumerate(string):
                if ((i % 2 == 0 and ord(c) == 0) or
                        (i % 2 == 1 and ord(c) != 0)):
                    is_wide_char = False
                    break
        else:
            is_wide_char = False

        return is_wide_char

    @staticmethod
    def _normalize_metadata(almeta):
        almeta.classification = almeta.classification.upper()

    def _update_rules(self, **_):
        self.log.info("Starting Yara's rule updater...")

        if not self.update_client:
            self.update_client = Client(self.signature_url, auth=(self.signature_user, self.signature_pass))

        if self.signature_cache.exists(self.rule_path):
            api_response = self.update_client.signature.update_available(self.last_update)
            update_available = api_response.get('update_available', False)
            if not update_available:
                self.log.info("No update available. Stopping...")
                return

        self.log.info("Downloading signatures with query: %s (%s)" % (self.signature_query, str(self.last_update)))

        signature_data = StringIO()
        self.update_client.signature.download(output=signature_data, query=self.signature_query, safe=True)

        rules_txt = signature_data.getvalue()
        if not rules_txt:
            errormsg = "No rules to compile:\n%s" % rules_txt
            self.log.error("{}/api/v3/signature/download/?query={} - {}:{}".format(
                self.signature_url, self.signature_query, self.signature_user, self.signature_pass)
            )
            self.log.error(errormsg)
            raise ConfigException(errormsg)

        self.signature_cache.save(self.rule_path, rules_txt)

        last_update, rules, rules_md5 = self._compile_rules(rules_txt)
        if rules:
            with self.initialization_lock:
                self.last_update = last_update
                self.rules = rules
                self.rules_md5 = rules_md5

    def execute(self, request):
        if not self.rules:
            return

        self.task = request.task
        local_filename = request.download()

        yara_externals = {}
        for k, i in self.get_yara_externals.iteritems():
            # Check default request.task fields
            try:
                sval = self.task.get(i)
            except:
                sval = None
            if not sval:
                # Check metadata dictionary
                smeta = self.task.metadata
                if not smeta:
                    sval = smeta.get(i, None)
            if not sval:
                # Check params dictionary
                smeta = self.task.params
                if not smeta:
                    sval = smeta.get(i, None)
            # Create dummy value if item not found
            if not sval:
                sval = i

            yara_externals[k] = sval

        with self.initialization_lock:
            try:
                matches = self.rules.match(local_filename, externals=yara_externals)
                self.counters[RULE_HITS] += len(matches)
                request.result = self._extract_result_from_matches(matches)
            except Exception as e:
                if e.message != "internal error: 30":
                    raise
                else:
                    self.log.warning("Yara internal error 30 detected on submission {}" .format(self.task.sid))
                    section = ResultSection(title_text="Yara scan not completed.")
                    section.add_line("File returned too many matches with current rule set and Yara exited.")
                    result = Result()
                    request.result = result
                    result.add_result(section)

    def get_service_version(self):
        basic_version = super(Yara, self).get_service_version()
        return '{}.r{}'.format(basic_version, self.rules_md5 or "0")

    # noinspection PyGlobalUndefined,PyUnresolvedReferences
    def import_service_deps(self):
        global yara
        import yara

        # noinspection PyUnresolvedReferences,PyBroadException
        try:
            requests.packages.urllib3.disable_warnings()
        except:  # pylint: disable=W0702
            pass

    def start(self):
        force_rule_download = False
        # noinspection PyBroadException
        try:
            # Even if we are using riak for rules we may have a saved copy
            # of the rules. Try to load and compile them first.
            self.signature_cache.makedirs(os.path.dirname(self.rule_path))
            rules_txt = self.signature_cache.get(self.rule_path)
            if rules_txt:
                self.log.info("Yara loaded rules from cached file: %s", self.rule_path)
                self.last_update, self.rules, self.rules_md5 = \
                    self._compile_rules(rules_txt)
            else:
                self.log.info("No cached Yara rules found.")
                force_rule_download = True

        except Exception, e:  # pylint: disable=W0702
            if not self.use_riak_for_rules:
                sleep(30)  # Try and avoid flailing.
                raise
            self.log.warning("Something went wrong while trying to load cached rules: %s" % e.message)
            force_rule_download = True

        if self.use_riak_for_rules:
            self._register_update_callback(self._update_rules, execute_now=force_rule_download,
                                           freq=UpdaterFrequency.MINUTE)

        self.log.info(
            "yara started with service version: %s", self.get_service_version()
        )
Example #7
0
class PEFile(ServiceBase):
    """ This services dumps the PE header and attempts to find
    some anomalies which could indicate that they are malware related.

    PEiD signature style searching should be done using the yara service"""
    SERVICE_ACCEPTS = 'executable/windows'
    SERVICE_CATEGORY = "Static Analysis"
    SERVICE_DESCRIPTION = "This service extracts imports, exports, section names, ... " \
                          "from windows PE files using the python library pefile."
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision('$Id$')
    SERVICE_VERSION = '1'
    SERVICE_CPU_CORES = 0.2
    SERVICE_RAM_MB = 256

    # Heuristic info
    AL_PEFile_001 = Heuristic(
        "AL_PEFile_001", "Invalid Signature", "executable/windows",
        dedent("""\
                                         Signature data found in PE but doesn't match the content.
                                         This is either due to malicious copying of signature data or
                                         an error in transmission.
                                         """))
    AL_PEFile_002 = Heuristic(
        "AL_PEFile_002", "Legitimately Signed EXE", "executable/windows",
        dedent("""\
                                         This PE appears to have a legitimate signature.
                                         """))
    AL_PEFile_003 = Heuristic(
        "AL_PEFile_003", "Self Signed", "executable/windows",
        dedent("""\
                                         This PE appears is self-signed
                                         """))

    # noinspection PyGlobalUndefined,PyUnresolvedReferences
    def import_service_deps(self):
        global pefile
        import pefile

        try:
            global signed_pe, signify
            from signify import signed_pe
            import signify
            self.use_signify = True
        except ImportError:
            self.log.warning(
                "signify package not installed (unable to import). Reinstall the PEFile service with "
                "/opt/al/assemblyline/al/install/reinstall_service.py PEFile")

    def __init__(self, cfg=None):
        super(PEFile, self).__init__(cfg)
        # Service Initialization
        self.log.debug(
            "LCID DB loaded (%s entries). Running information parsing..." %
            (len(G_LCID), ))
        self.filesize_from_peheader = -1
        self.print_slack = False
        self.pe_file = None
        self._sect_list = None
        self.entropy_warning = False
        self.file_res = None
        self.unexpected_sname = []
        self.import_hash = None
        self.filename = None
        self.patch_section = None
        self.request = None
        self.path = None
        self.use_signify = False

    def get_imphash(self):
        return self.pe_file.get_imphash()

    # noinspection PyPep8Naming
    def get_pe_info(self, lcid):
        """Dumps the PE header as Results in the FileResult."""

        # PE Header
        pe_header_res = ResultSection(SCORE['NULL'], "PE: HEADER")

        # PE Header: Header Info
        pe_header_info_res = ResultSection(SCORE.NULL,
                                           "[HEADER INFO]",
                                           parent=pe_header_res)
        pe_header_info_res.add_line(
            "Entry point address: 0x%08X" %
            self.pe_file.OPTIONAL_HEADER.AddressOfEntryPoint)
        pe_header_info_res.add_line(
            "Linker Version: %02d.%02d" %
            (self.pe_file.OPTIONAL_HEADER.MajorLinkerVersion,
             self.pe_file.OPTIONAL_HEADER.MinorLinkerVersion))
        pe_header_info_res.add_line(
            "OS Version: %02d.%02d" %
            (self.pe_file.OPTIONAL_HEADER.MajorOperatingSystemVersion,
             self.pe_file.OPTIONAL_HEADER.MinorOperatingSystemVersion))
        pe_header_info_res.add_line([
            "Time Date Stamp: %s (" %
            time.ctime(self.pe_file.FILE_HEADER.TimeDateStamp),
            res_txt_tag(str(self.pe_file.FILE_HEADER.TimeDateStamp),
                        TAG_TYPE['PE_LINK_TIME_STAMP']), ")"
        ])
        try:
            pe_header_info_res.add_line(
                "Machine Type: %s (%s)" %
                (hex(self.pe_file.FILE_HEADER.Machine),
                 pefile.MACHINE_TYPE[self.pe_file.FILE_HEADER.Machine]))
        except KeyError:
            pass

        # PE Header: Rich Header
        # noinspection PyBroadException
        try:

            if self.pe_file.RICH_HEADER is not None:
                pe_rich_header_info = ResultSection(SCORE.NULL,
                                                    "[RICH HEADER INFO]",
                                                    parent=pe_header_res)
                values_list = self.pe_file.RICH_HEADER.values
                pe_rich_header_info.add_line("VC++ tools used:")
                for i in range(0, len(values_list) / 2):
                    line = "Tool Id: %3d Version: %6d Times used: %3d" % (
                        values_list[2 * i] >> 16, values_list[2 * i] & 0xFFFF,
                        values_list[2 * i + 1])
                    pe_rich_header_info.add_line(line)
        except:
            self.log.exception("Unable to parse PE Rich Header")

        # PE Header: Data Directories
        pe_dd_res = ResultSection(SCORE.NULL,
                                  "[DATA DIRECTORY]",
                                  parent=pe_header_res)
        for data_directory in self.pe_file.OPTIONAL_HEADER.DATA_DIRECTORY:
            if data_directory.Size or data_directory.VirtualAddress:
                pe_dd_res.add_line(
                    "%s - va: 0x%08X - size: 0x%08X" %
                    (data_directory.name[len("IMAGE_DIRECTORY_ENTRY_"):],
                     data_directory.VirtualAddress, data_directory.Size))

        # PE Header: Sections
        pe_sec_res = ResultSection(SCORE.NULL,
                                   "[SECTIONS]",
                                   parent=pe_header_res)

        self._init_section_list()

        try:
            for (sname, section, sec_md5, sec_entropy) in self._sect_list:
                txt = [
                    sname,
                    " - Virtual: 0x%08X (0x%08X bytes)"
                    " - Physical: 0x%08X (0x%08X bytes) - " %
                    (section.VirtualAddress, section.Misc_VirtualSize,
                     section.PointerToRawData, section.SizeOfRawData), "hash:",
                    res_txt_tag(sec_md5, TAG_TYPE['PE_SECTION_HASH']),
                    " - entropy:%f (min:0.0, Max=8.0)" % sec_entropy
                ]
                # add a search tag for the Section Hash
                make_tag(self.file_res,
                         'PE_SECTION_HASH',
                         sec_md5,
                         'HIGH',
                         usage='CORRELATION')
                pe_sec_res.add_line(txt)

        except AttributeError:
            pass

        self.file_res.add_section(pe_header_res)

        # debug
        try:
            if self.pe_file.DebugTimeDateStamp:
                pe_debug_res = ResultSection(SCORE['NULL'], "PE: DEBUG")
                self.file_res.add_section(pe_debug_res)

                pe_debug_res.add_line(
                    "Time Date Stamp: %s" %
                    time.ctime(self.pe_file.DebugTimeDateStamp))

                # When it is a unicode, we know we are coming from RSDS which is UTF-8
                # otherwise, we come from NB10 and we need to guess the charset.
                if type(self.pe_file.pdb_filename) != unicode:
                    char_enc_guessed = translate_str(self.pe_file.pdb_filename)
                    pdb_filename = char_enc_guessed['converted']
                else:
                    char_enc_guessed = {'confidence': 1.0, 'encoding': 'utf-8'}
                    pdb_filename = self.pe_file.pdb_filename

                pe_debug_res.add_line([
                    "PDB: '",
                    res_txt_tag_charset(pdb_filename,
                                        TAG_TYPE['PE_PDB_FILENAME'],
                                        char_enc_guessed['encoding'],
                                        char_enc_guessed['confidence']), "'"
                ])

                # self.log.debug(u"\tPDB: %s" % pdb_filename)
        except AttributeError:
            pass

        # imports
        try:
            if hasattr(self.pe_file, 'DIRECTORY_ENTRY_IMPORT') and len(
                    self.pe_file.DIRECTORY_ENTRY_IMPORT) > 0:
                pe_import_res = ResultSection(SCORE['NULL'], "PE: IMPORTS")
                self.file_res.add_section(pe_import_res)

                for entry in self.pe_file.DIRECTORY_ENTRY_IMPORT:
                    pe_import_dll_res = ResultSection(SCORE.NULL,
                                                      "[%s]" % entry.dll,
                                                      parent=pe_import_res)
                    first_element = True
                    line = StringIO()
                    for imp in entry.imports:
                        if first_element:
                            first_element = False
                        else:
                            line.write(", ")

                        if imp.name is None:
                            line.write(str(imp.ordinal))
                        else:
                            line.write(imp.name)

                    pe_import_dll_res.add_line(line.getvalue())

            else:
                pe_import_res = ResultSection(SCORE['NULL'],
                                              "PE: NO IMPORTS DETECTED ")
                self.file_res.add_section(pe_import_res)

        except AttributeError:
            pass

        # exports
        try:
            if self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp is not None:
                pe_export_res = ResultSection(SCORE['NULL'], "PE: EXPORTS")
                self.file_res.add_section(pe_export_res)

                # noinspection PyBroadException
                try:
                    pe_export_res.add_line([
                        "Module Name: ",
                        res_txt_tag(safe_str(self.pe_file.ModuleName),
                                    TAG_TYPE['PE_EXPORT_MODULE_NAME'])
                    ])
                except:
                    pass

                if self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp == 0:
                    pe_export_res.add_line("Time Date Stamp: 0")
                else:
                    pe_export_res.add_line(
                        "Time Date Stamp: %s" %
                        time.ctime(self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.
                                   TimeDateStamp))

                first_element = True
                txt = []
                for exp in self.pe_file.DIRECTORY_ENTRY_EXPORT.symbols:
                    if first_element:
                        first_element = False
                    else:
                        txt.append(", ")

                    txt.append(str(exp.ordinal))
                    if exp.name is not None:
                        txt.append(": ")
                        txt.append(
                            res_txt_tag(exp.name,
                                        TAG_TYPE['PE_EXPORT_FCT_NAME']))

                pe_export_res.add_line(txt)
        except AttributeError:
            pass

        # resources
        try:
            if len(self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries) > 0:
                pe_resource_res = ResultSection(SCORE['NULL'], "PE: RESOURCES")
                self.file_res.add_section(pe_resource_res)

                for res_entry in self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries:
                    if res_entry.name is None:
                        # noinspection PyBroadException
                        try:
                            entry_name = pefile.RESOURCE_TYPE[res_entry.id]
                        except:
                            # pylint: disable-msg=W0702
                            # unfortunately this code was done before we started to really care about which
                            # exception to catch so, I actually don't really know at this point, would need to try
                            # out :-\
                            entry_name = "UNKNOWN"
                    else:
                        entry_name = res_entry.name

                    for name_id in res_entry.directory.entries:
                        if name_id.name is None:
                            name_id.name = hex(name_id.id)

                        for language in name_id.directory.entries:
                            try:
                                language_desc = lcid[language.id]
                            except KeyError:
                                language_desc = 'Unknown language'

                            line = []
                            if res_entry.name is None:
                                line.append(entry_name)
                            else:
                                line.append(
                                    res_txt_tag(str(entry_name),
                                                TAG_TYPE['PE_RESOURCE_NAME']))

                            line.append(" " + str(name_id.name) + " ")
                            line.append("0x")
                            # this will add a link to search in AL for the value
                            line.append(
                                res_txt_tag("%04X" % language.id,
                                            TAG_TYPE['PE_RESOURCE_LANGUAGE']))
                            line.append(" (%s)" % language_desc)

                            make_tag(self.file_res,
                                     'PE_RESOURCE_LANGUAGE',
                                     language.id,
                                     weight='LOW',
                                     usage='IDENTIFICATION')

                            # get the size of the resource
                            res_size = language.data.struct.Size
                            line.append(" Size: 0x%x" % res_size)

                            pe_resource_res.add_line(line)

        except AttributeError:
            pass

        # Resources-VersionInfo
        try:
            if len(self.pe_file.FileInfo) > 2:
                pass

            for file_info in self.pe_file.FileInfo:
                if file_info.name == "StringFileInfo":
                    if len(file_info.StringTable) > 0:
                        pe_resource_verinfo_res = ResultSection(
                            SCORE['NULL'], "PE: RESOURCES-VersionInfo")
                        self.file_res.add_section(pe_resource_verinfo_res)

                        try:
                            if "LangID" in file_info.StringTable[0].entries:
                                lang_id = file_info.StringTable[0].get(
                                    "LangID")
                                if not int(lang_id, 16) >> 16 == 0:
                                    txt = ('LangId: ' + lang_id + " (" +
                                           lcid[int(lang_id, 16) >> 16] + ")")
                                    pe_resource_verinfo_res.add_line(txt)
                                else:
                                    txt = ('LangId: ' + lang_id + " (NEUTRAL)")
                                    pe_resource_verinfo_res.add_line(txt)
                        except (ValueError, KeyError):
                            txt = ('LangId: %s is invalid' % lang_id)
                            pe_resource_verinfo_res.add_line(txt)

                        for entry in file_info.StringTable[0].entries.items():
                            txt = ['%s: ' % entry[0]]

                            if entry[0] == 'OriginalFilename':
                                txt.append(
                                    res_txt_tag(
                                        entry[1], TAG_TYPE[
                                            'PE_VERSION_INFO_ORIGINAL_FILENAME']
                                    ))
                            elif entry[0] == 'FileDescription':
                                txt.append(
                                    res_txt_tag(
                                        entry[1], TAG_TYPE[
                                            'PE_VERSION_INFO_FILE_DESCRIPTION']
                                    ))
                            else:
                                txt.append(entry[1])

                            pe_resource_verinfo_res.add_line(txt)

        except AttributeError:
            pass

        # Resources Strings
        try:
            BYTE = 1
            WORD = 2
            DWORD = 4

            DS_SETFONT = 0x40

            DIALOG_LEAD = DWORD + DWORD + WORD + WORD + WORD + WORD + WORD
            DIALOG_ITEM_LEAD = DWORD + DWORD + WORD + WORD + WORD + WORD + WORD

            DIALOGEX_LEAD = WORD + WORD + DWORD + DWORD + DWORD + WORD + WORD + WORD + WORD + WORD
            DIALOGEX_TRAIL = WORD + WORD + BYTE + BYTE
            DIALOGEX_ITEM_LEAD = DWORD + DWORD + DWORD + WORD + WORD + WORD + WORD + DWORD
            DIALOGEX_ITEM_TRAIL = WORD

            ITEM_TYPES = {
                0x80: "BUTTON",
                0x81: "EDIT",
                0x82: "STATIC",
                0x83: "LIST BOX",
                0x84: "SCROLL BAR",
                0x85: "COMBO BOX"
            }

            if hasattr(self.pe_file, 'DIRECTORY_ENTRY_RESOURCE'):
                for dir_type in self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries:
                    if dir_type.name is None:
                        if dir_type.id in pefile.RESOURCE_TYPE:
                            dir_type.name = pefile.RESOURCE_TYPE[dir_type.id]
                    for nameID in dir_type.directory.entries:
                        if nameID.name is None:
                            nameID.name = hex(nameID.id)
                        for language in nameID.directory.entries:
                            strings = []
                            if str(dir_type.name) == "RT_DIALOG":
                                data_rva = language.data.struct.OffsetToData
                                size = language.data.struct.Size
                                data = self.pe_file.get_memory_mapped_image(
                                )[data_rva:data_rva + size]

                                offset = 0
                                if self.pe_file.get_word_at_rva(data_rva + offset) == 0x1 \
                                        and self.pe_file.get_word_at_rva(data_rva + offset + WORD) == 0xFFFF:
                                    # Use Extended Dialog Parsing

                                    # Remove leading bytes
                                    offset += DIALOGEX_LEAD
                                    if data[offset:offset + 2] == "\xFF\xFF":
                                        offset += DWORD
                                    else:
                                        offset += WORD
                                    if data[offset:offset + 2] == "\xFF\xFF":
                                        offset += DWORD
                                    else:
                                        offset += WORD

                                    # Get window title
                                    window_title = self.pe_file.get_string_u_at_rva(
                                        data_rva + offset)
                                    if len(window_title) != 0:
                                        strings.append(
                                            ("DIALOG_TITLE", window_title))
                                    offset += len(window_title) * 2 + WORD

                                    # Remove trailing bytes
                                    offset += DIALOGEX_TRAIL
                                    offset += len(
                                        self.pe_file.get_string_u_at_rva(
                                            data_rva + offset)) * 2 + WORD

                                    # alignment adjustment
                                    if (offset % 4) != 0:
                                        offset += WORD

                                    while True:

                                        if offset >= size:
                                            break

                                        offset += DIALOGEX_ITEM_LEAD

                                        # Get item type
                                        if self.pe_file.get_word_at_rva(
                                                data_rva + offset) == 0xFFFF:
                                            offset += WORD
                                            item_type = ITEM_TYPES[
                                                self.pe_file.get_word_at_rva(
                                                    data_rva + offset)]
                                            offset += WORD
                                        else:
                                            item_type = self.pe_file.get_string_u_at_rva(
                                                data_rva + offset)
                                            offset += len(item_type) * 2 + WORD

                                        # Get item text
                                        item_text = self.pe_file.get_string_u_at_rva(
                                            data_rva + offset)
                                        if len(item_text) != 0:
                                            strings.append(
                                                (item_type, item_text))
                                        offset += len(item_text) * 2 + WORD

                                        extra_bytes = self.pe_file.get_word_at_rva(
                                            data_rva + offset)
                                        offset += extra_bytes + DIALOGEX_ITEM_TRAIL

                                        # Alignment adjustment
                                        if (offset % 4) != 0:
                                            offset += WORD

                                else:
                                    # TODO: Use Non extended Dialog Parsing
                                    # Remove leading bytes
                                    style = self.pe_file.get_word_at_rva(
                                        data_rva + offset)

                                    offset += DIALOG_LEAD
                                    if data[offset:offset + 2] == "\xFF\xFF":
                                        offset += DWORD
                                    else:
                                        offset += len(
                                            self.pe_file.get_string_u_at_rva(
                                                data_rva + offset)) * 2 + WORD
                                    if data[offset:offset + 2] == "\xFF\xFF":
                                        offset += DWORD
                                    else:
                                        offset += len(
                                            self.pe_file.get_string_u_at_rva(
                                                data_rva + offset)) * 2 + WORD

                                    # Get window title
                                    window_title = self.pe_file.get_string_u_at_rva(
                                        data_rva + offset)
                                    if len(window_title) != 0:
                                        strings.append(
                                            ("DIALOG_TITLE", window_title))
                                    offset += len(window_title) * 2 + WORD

                                    if (style & DS_SETFONT) != 0:
                                        offset += WORD
                                        offset += len(
                                            self.pe_file.get_string_u_at_rva(
                                                data_rva + offset)) * 2 + WORD

                                    # Alignment adjustment
                                    if (offset % 4) != 0:
                                        offset += WORD

                                    while True:

                                        if offset >= size:
                                            break

                                        offset += DIALOG_ITEM_LEAD

                                        # Get item type
                                        if self.pe_file.get_word_at_rva(
                                                data_rva + offset) == 0xFFFF:
                                            offset += WORD
                                            item_type = ITEM_TYPES[
                                                self.pe_file.get_word_at_rva(
                                                    data_rva + offset)]
                                            offset += WORD
                                        else:
                                            item_type = self.pe_file.get_string_u_at_rva(
                                                data_rva + offset)
                                            offset += len(item_type) * 2 + WORD

                                        # Get item text
                                        if self.pe_file.get_word_at_rva(
                                                data_rva + offset) == 0xFFFF:
                                            offset += DWORD
                                        else:
                                            item_text = self.pe_file.get_string_u_at_rva(
                                                data_rva + offset)
                                            if len(item_text) != 0:
                                                strings.append(
                                                    (item_type, item_text))
                                            offset += len(item_text) * 2 + WORD

                                        extra_bytes = self.pe_file.get_word_at_rva(
                                            data_rva + offset)
                                        offset += extra_bytes + WORD

                                        # Alignment adjustment
                                        if (offset % 4) != 0:
                                            offset += WORD

                            elif str(dir_type.name) == "RT_STRING":
                                data_rva = language.data.struct.OffsetToData
                                size = language.data.struct.Size
                                data = self.pe_file.get_memory_mapped_image(
                                )[data_rva:data_rva + size]
                                offset = 0
                                while True:
                                    if offset >= size:
                                        break

                                    ustr_length = self.pe_file.get_word_from_data(
                                        data[offset:offset + 2], 0)
                                    offset += 2

                                    if ustr_length == 0:
                                        continue

                                    ustr = self.pe_file.get_string_u_at_rva(
                                        data_rva + offset,
                                        max_length=ustr_length)
                                    offset += ustr_length * 2
                                    strings.append((None, ustr))

                            if len(strings) > 0:
                                success = False
                                try:
                                    comment = "%s (id:%s - lang_id:0x%04X [%s])" % (
                                        str(dir_type.name), str(nameID.name),
                                        language.id, lcid[language.id])
                                except KeyError:
                                    comment = "%s (id:%s - lang_id:0x%04X [Unknown language])" % (
                                        str(dir_type.name), str(
                                            nameID.name), language.id)
                                res = ResultSection(
                                    SCORE['NULL'],
                                    "PE: STRINGS - %s" % comment)
                                for idx in xrange(len(strings)):
                                    # noinspection PyBroadException
                                    try:
                                        tag_value = strings[idx][1]

                                        # The following line crash chardet if a
                                        # UPX packed file as packed the resources...
                                        chardet.detect(
                                            tag_value
                                        )  # TODO: Find a better way to do this

                                        tag_value = tag_value.replace(
                                            '\r', ' ').replace('\n', ' ')
                                        if strings[idx][0] is not None:
                                            res.add_line([
                                                strings[idx][0], ": ",
                                                res_txt_tag(
                                                    tag_value,
                                                    TAG_TYPE['FILE_STRING'])
                                            ])
                                        else:
                                            res.add_line(
                                                res_txt_tag(
                                                    tag_value,
                                                    TAG_TYPE['FILE_STRING']))

                                        make_tag(self.file_res,
                                                 'FILE_STRING',
                                                 tag_value,
                                                 weight='NULL',
                                                 usage='IDENTIFICATION')

                                        success = True
                                    except:
                                        pass
                                if success:
                                    self.file_res.add_section(res)
                else:
                    pass

        except AttributeError, e:
            self.log.debug("\t Error parsing output: " + repr(e))

        except Exception, e:
            print e
Example #8
0
class Beaver(ServiceBase):
    SERVICE_ACCEPTS = '.*'
    SERVICE_ENABLED = True
    SERVICE_CATEGORY = Category.STATIC_ANALYSIS
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: aba2c63c70ffeb791512a84ac573d93413dfba15 $')
    SERVICE_VERSION = '1'
    SERVICE_DEFAULT_CONFIG = {
        "host": "127.0.0.1",
        "user": "******",
        "passwd": "password",
        "port": 3306,
        "db": "beaver",
        "direct_db": True
    }
    SERVICE_DESCRIPTION = "Performs hash lookups against the CCIRC Malware Database."
    SERVICE_CPU_CORES = 0.05
    SERVICE_CPU_RAM = 64

    def __init__(self, cfg=None):
        super(Beaver, self).__init__(cfg)
        self.direct_db = 'db' in cfg and 'port' in cfg
        self._connect_params = {}
        self.api_url = None
        self.auth = None
        self.session = None
        self.connection = None

    def start(self):
        self._connect_params = {
            'host': self.cfg.get('host'),
            'user': self.cfg.get('user'),
            'passwd': self.cfg.get('passwd')
        }
        if self.direct_db:
            self._connect_params.update({
                'port': int(self.cfg.get('port')),
                'db': self.cfg.get('db')
            })
        else:
            self.api_url = "%s/al/report/%%s" % self.cfg.get('host')
            self.auth = (self.cfg.get('user'), self.cfg.get('passwd'))
            self.session = None

        self.connection = BeaverDatasource(self.log, **self._connect_params)

    # noinspection PyUnresolvedReferences
    def import_service_deps(self):
        global BeaverDatasource
        from al_services.alsvc_beaver.datasource.beaver import Beaver as BeaverDatasource

    @staticmethod
    def lookup_callouts(response):
        results = response.get('callout', None)

        if not results:
            return None, []

        tags = []
        r_section = ResultSection(title_text='Sandbox Call-Outs')
        r_section.score = SCORE.HIGH
        analyser = ''
        r_sub_section = None
        for result in results[:10]:
            if analyser != result['analyser']:
                title = '%s (Analysed on %s)' % (result['analyser'],
                                                 result['date'])
                r_sub_section = ResultSection(title_text=title,
                                              parent=r_section)
                analyser = result['analyser']

            channel = result['request']
            if channel is not None:
                channel = "(%s)" % channel.split('~~')[0]
            else:
                channel = ""

            r_sub_section.add_line("{0:s}:{1:d}{2:s}".format(
                result['callout'], result['port'], channel))

            try:
                p1, p2, p3, p4 = result['callout'].split(".")
                if int(p1) <= 255 and int(p2) <= 255 and int(
                        p3) <= 255 and int(p4) <= 255:
                    tags.append(
                        Tag(TAG_TYPE.NET_IP,
                            result['callout'],
                            TAG_WEIGHT.MED,
                            context=Context.BEACONS))
            except ValueError:
                tags.append(
                    Tag(TAG_TYPE.NET_DOMAIN_NAME,
                        result['callout'],
                        TAG_WEIGHT.MED,
                        context=Context.BEACONS))

            if result['port'] != 0:
                tags.append(
                    Tag(TAG_TYPE.NET_PORT,
                        str(result['port']),
                        TAG_WEIGHT.MED,
                        context=Context.BEACONS))

        if len(results) > 10:
            r_section.add_line("And %s more..." % str(len(results) - 10))
        return r_section, tags

    @staticmethod
    def lookup_av_hits(response):
        results = response.get('antivirus', None)

        if not results:
            return None, []

        tags = []
        r_section = ResultSection(title_text='Anti-Virus Detections')

        r_section.add_line('Found %d AV hit(s).' % len(results))
        for result in results:
            r_section.add_section(
                AvHitSection(result['scannerID'], result['name'], SCORE.SURE))
            tags.append(
                VirusHitTag(result['name'],
                            context="scanner:%s" % result['scannerID']))

        return r_section, tags

    @staticmethod
    def lookup_source(response):
        result = response.get('source', None)
        if not result:
            return None

        if result['count'] > 0:
            r_section = ResultSection(title_text='File Frequency')
            r_section.score = SCORE.NULL
            r_section.add_line('First Seen: %s' % result['first_seen'])
            r_section.add_line('Last Seen: %s' % result['last_seen'])
            r_section.add_line('Source Count: %d' % result['count'])
            return r_section

    @staticmethod
    def lookup_upatre_downloader(response):
        result = response.get('upatre', None)
        if not result:
            return None

        result = result[0]
        r_section = ResultSection(title_text='Upatre activity')
        r_section.score = SCORE.VHIGH
        r_section.add_line('The file %s decodes to %s using XOR key %s' %
                           (result['firstSeen'], result['decrypted_md5'],
                            result['decryption_key']))
        return r_section

    @staticmethod
    def lookup_spam_feed(response):
        result = response.get('spam_feed', None)
        if not result:
            return None

        result = result[0]
        r_section = ResultSection(title_text='SPAM feed')
        r_section.score = SCORE.HIGH
        r_section.add_line('Found %d related spam emails' % result['count'])
        r_section.add_line('\tFirst Seen: %s' % result['first_seen'])
        r_section.add_line('\tLast Seen: %s' % result['last_seen'])
        r_sub_section = ResultSection(title_text='Attachments',
                                      parent=r_section)
        r_sub_section.add_line('%s - md5: %s' %
                               (result['filename'], result['filename_md5']))
        if result['attachment']:
            r_sub_section.add_line(
                '\t%s - md5: %s' %
                (result['attachment'], result['attachment_md5']))
        return r_section

    def parse_direct_db(self, response):
        result = Result()

        res = self.lookup_source(response)
        if res:
            # Display source frequency if found
            result.add_section(res)

            res = self.lookup_upatre_downloader(response)
            if res:
                # Display Upatre data
                result.add_section(res)

            res, tags = self.lookup_callouts(response)
            if res:
                # Display Call-Outs
                result.add_section(res)

                # Add domain, ip and port tags
                _ = [result.append_tag(tag) for tag in tags]

            res = self.lookup_spam_feed(response)
            if res:
                # Display info from SPAM feed
                result.add_section(res)

            res, tags = self.lookup_av_hits(response)
            if res:
                # Display Anti-virus result
                result.add_section(res)

                # Add Virus Tags
                _ = [result.append_tag(tag) for tag in tags]

        return result

    @staticmethod
    def parse_api(data):
        result = Result()

        # Info block
        hash_info = data.get('hash_info')
        if not hash_info:
            return result
        r_info = ResultSection(title_text='File Info')
        r_info.score = SCORE.NULL
        r_info.add_line('Received Data: %s-%s-%s' %
                        (data['received_date'][:4], data['received_date'][4:6],
                         data['received_date'][6:]))
        r_info.add_line('Size: %s' % hash_info.get('filesize', ""))
        r_info.add_line('MD5: %s' % hash_info.get('md5', ""))
        r_info.add_line('SHA1: %s' % hash_info.get('sha1', ""))
        r_info.add_line('SHA256: %s' % hash_info.get('sha256', ""))
        r_info.add_line('SSDeep Blocksize: %s' %
                        hash_info.get('ssdeep_blocksize', ""))
        r_info.add_line('SSDeep Hash1: %s' % hash_info.get('ssdeep_hash1', ""))
        r_info.add_line('SSDeep Hash2: %s' % hash_info.get('ssdeep_hash1', ""))
        result.add_result(r_info)

        callouts = data.get('callouts', [])
        if len(callouts) > 0:
            max_callouts = 10
            r_callouts = ResultSection(title_text='Sandbox Call-Outs')
            r_callouts.score = SCORE.VHIGH
            analyser = ''
            r_call_sub_section = None

            reported_count = 0
            for callout in callouts:
                reported_count += 1
                if reported_count <= max_callouts:
                    if analyser != callout['ip']:
                        title = '%s (Analysed on %s)' % (callout['ip'],
                                                         callout['addedDate'])
                        r_call_sub_section = ResultSection(title_text=title,
                                                           parent=r_callouts)
                        analyser = callout['ip']

                    channel = callout['channel']
                    if channel is not None:
                        channel = "(%s)" % channel.split('~~')[0]
                    else:
                        channel = ""

                    r_call_sub_section.add_line("{0:s}:{1:d}{2:s}".format(
                        callout['callout'], callout['port'], channel))

                try:
                    p1, p2, p3, p4 = callout['callout'].split(".")
                    if int(p1) <= 255 and int(p2) <= 255 and int(
                            p3) <= 255 and int(p4) <= 255:
                        result.append_tag(
                            Tag(TAG_TYPE.NET_IP,
                                callout['callout'],
                                TAG_WEIGHT.MED,
                                context=Context.BEACONS))
                except ValueError:
                    result.append_tag(
                        Tag(TAG_TYPE.NET_DOMAIN_NAME,
                            callout['callout'],
                            TAG_WEIGHT.MED,
                            context=Context.BEACONS))

                if callout['port'] != 0:
                    result.append_tag(
                        Tag(TAG_TYPE.NET_PORT,
                            str(callout['port']),
                            TAG_WEIGHT.MED,
                            context=Context.BEACONS))

            if len(callouts) > max_callouts:
                r_callouts.add_line("And %s more..." % str(len(callouts) - 10))
            result.add_result(r_callouts)

        spamcount = data.get('spamCount', {})
        if spamcount:
            r_spam = ResultSection(title_text='SPAM feed')
            r_spam.score = SCORE.VHIGH
            r_spam.add_line('Found %d related spam emails' %
                            spamcount['count'])
            email_sample = spamcount.get("email_sample", {})
            r_spam.add_line('\tFirst Seen: %s' % email_sample['firstSeen'])
            r_spam.add_line('\tLast Seen: %s' % email_sample['lastSeen'])
            r_sub_section = ResultSection(title_text='Attachments',
                                          parent=r_spam)
            if email_sample['filename']:
                r_sub_section.add_line(
                    '%s - md5: %s' %
                    (email_sample['filename'], email_sample['filenameMD5']))
            if email_sample['attachment']:
                r_sub_section.add_line('%s - md5: %s' %
                                       (email_sample['attachment'],
                                        email_sample['attachmentMD5']))
            result.add_result(r_spam)

        av_results = data.get('av_results', [])
        if len(av_results) > 0:
            r_av_sec = ResultSection(title_text='Anti-Virus Detections')
            r_av_sec.add_line('Found %d AV hit(s).' % len(av_results))
            for av_result in av_results:
                r_av_sec.add_section(
                    AvHitSection(av_result['scannerID'], av_result['name'],
                                 SCORE.SURE))
                result.append_tag(
                    VirusHitTag(av_result['name'],
                                context="scanner:%s" % av_result['scannerID']))
            result.add_result(r_av_sec)

        return result

    def execute(self, request):
        try:
            response = self.connection.query(request.md5)
        except BeaverDatasource.DatabaseException:
            raise RecoverableError("Query failed")
        if self.connection.direct_db:
            request.result = self.parse_direct_db(response)
        else:
            request.result = self.parse_api(response)
Example #9
0
class ResultSample(ServiceBase):
    """
    This service generates fake data to showcase
    the different features of the AL Result object
    """

    SERVICE_ACCEPTS = '.*'
    SERVICE_CATEGORY = 'Static Analysis'
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: 23d614f6c7970245a41c9639820bf6bca7dfbe8f $')
    SERVICE_VERSION = '1'

    def __init__(self, cfg=None):
        super(ResultSample, self).__init__(cfg)

    def execute(self, request):
        # Create a result object where all the sections will be stored
        result = Result()

        # ==================================================================
        # Default Section:
        #     Default section basically just dumps the text to the screen...
        #       All sections scores will be SUMed in the service result
        #       The Result classification will be the highest classification found in the sections
        default_section = ResultSection(SCORE.LOW,
                                        'Example of a default section',
                                        Classification.RESTRICTED)
        default_section.add_line("You can add line by line!")
        default_section.add_lines(["Or", "Multiple lines", "Inside a list!"])

        # ==================================================================
        # Color map Section:
        #     Creates a color map bar using a minimum and maximum domain
        cmap_min = 0
        cmap_max = 20
        color_map_data = {
            'type': 'colormap',
            'data': {
                'domain': [cmap_min, cmap_max],
                'values': [random.random() * cmap_max for _ in xrange(50)]
            }
        }
        section_color_map = ResultSection(SCORE.NULL,
                                          "Example of colormap result section",
                                          self.SERVICE_CLASSIFICATION,
                                          body_format=TEXT_FORMAT.GRAPH_DATA,
                                          body=json.dumps(color_map_data))

        # ==================================================================
        # URL section:
        #     Generate a list of clickable urls using a json encoded format
        url_section = ResultSection(SCORE.NULL,
                                    'Example of a simple url section',
                                    self.SERVICE_CLASSIFICATION,
                                    body_format=TEXT_FORMAT.URL,
                                    body=json.dumps({
                                        "name":
                                        "Google",
                                        "url":
                                        "https://www.google.com/"
                                    }))

        # You can add tags to any section although those tag will be brought up to the result object
        #     Tags are defined by a type, value and weight (confidence lvl)
        #         you can also add a classification and context if needed
        url_section.add_tag(TAG_TYPE.NET_DOMAIN_NAME, "google.com",
                            TAG_WEIGHT.LOW)
        url_section.add_tag(TAG_TYPE.NET_DOMAIN_NAME,
                            "bob.com",
                            TAG_WEIGHT.LOW,
                            classification=Classification.RESTRICTED)
        url_section.add_tag(TAG_TYPE.NET_DOMAIN_NAME,
                            "baddomain.com",
                            TAG_WEIGHT.LOW,
                            context=Context.BEACONS)

        # You may also want to provide a list of url! Also, No need to provide a name, the url link will be displayed
        urls = [{
            "url": "https://google.com/"
        }, {
            "url": "https://google.ca/"
        }, {
            "url": "https://microsoft.com/"
        }]
        url_section2 = ResultSection(
            SCORE.MED,
            'Example of a url section with multiple links',
            self.SERVICE_CLASSIFICATION,
            body_format=TEXT_FORMAT.URL,
            body=json.dumps(urls))
        # Add url_section2 as a subsection of url section
        #     The score of the subsections will automatically be added to the parent section
        url_section.add_section(url_section2)

        # ==================================================================
        # Memory dump section:
        #     Dump whatever string content you have into a <pre/> html tag so you can do your own formatting
        data = hexdump(
            "This is some random text that we will format as an hexdump and you'll see "
            "that the hexdump formatting will be preserved by the memory dump section!"
        )
        memdump_section = ResultSection(SCORE.NULL,
                                        'Example of a memory dump section',
                                        self.SERVICE_CLASSIFICATION,
                                        body_format=TEXT_FORMAT.MEMORY_DUMP,
                                        body=data)

        # ==================================================================
        # Re-Submitting files to the system
        #     Adding extracted files will have them resubmitted to the system for analysis
        if request.srl != '8cf8277a71e85122bf7ea4610c7cfcc0bfb6dee799be50a41b2f4b1321b3317f':
            # This IF just prevents resubmitting the same file in a loop for this exemple...
            temp_path = tempfile.mktemp(dir=self.working_directory)
            with open(temp_path, "w") as myfile:
                myfile.write(data)
            request.add_extracted(temp_path,
                                  "Extracted by some random magic!",
                                  display_name="file.txt")

        # ==================================================================
        # Supplementary files
        #     Adding supplementary files will save them on the datastore for future
        #      reference but wont reprocess those files.
        temp_path = tempfile.mktemp(dir=self.working_directory)
        with open(temp_path, "w") as myfile:
            myfile.write(json.dumps(urls))
        request.add_supplementary(temp_path,
                                  "These are urls as a JSON",
                                  display_name="urls.json")

        # ==================================================================
        # Wrap-up:
        #     Add all sections to the Result object
        result.add_section(default_section)
        result.add_section(section_color_map)
        result.add_section(url_section)
        result.add_section(memdump_section)
        request.result = result

    def start(self):
        # ==================================================================
        # On Startup actions:
        #     Your service might have to do so warmup on startup to make things faster
        #       or simply register an updater function to keep it up to date.

        # Here is a example of registering an updater function
        # Parameters are:
        #       func: The callback funtion (function called at each interval)
        #       blocking: Do we have to block processing while the update takes place? (Default: False)
        #       execute_now: Should we run the updater while registering it? (Default: True)
        #       utype: Type of update
        #               PROCESS is only for current process,
        #               BOX only one update per physical/virtual machine
        #               CLUSTER only one update for the full cluster (Default: PROCESS)
        #       freq: Frequency at which the update takes place(Default: HOURLY)
        self._register_update_callback(self.update,
                                       blocking=True,
                                       execute_now=False,
                                       utype=UpdaterType.PROCESS,
                                       freq=UpdaterFrequency.MINUTE)

    def update(self, *args, **kwargs):
        # ==================================================================
        # This is a sample update callback function
        #       NOTE: all update callback functions must be able to receive *args and **kwargs.
        import time

        runtime = 5
        self.log.info("Updater started... args=%s, kwargs=%s" % (args, kwargs))
        time.sleep(runtime)
        self.log.info("Updater ran for %s seconds..." % runtime)
Example #10
0
class Oletools(ServiceBase):
    AL_Oletools_001 = Heuristic("AL_Oletools_001", "Attached Document Template", "document/office/ole",
                                dedent("""\
                                       /Attached template specified in xml relationships. This can be used
                                       for malicious purposes.
                                       """))
    AL_Oletools_002 = Heuristic("AL_Oletools_002", "Multi-embedded documents", "document/office/ole",
                                dedent("""\
                                       /File contains both old OLE format and new ODF format. This can be
                                        used to obfuscate malicious content.
                                       """))
    AL_Oletools_003 = Heuristic("AL_Oletools_003", "Massive document", "document/office/ole",
                                dedent("""\
                                       /File contains parts which are massive. Could not scan entire document.
                                       """))

    SERVICE_CATEGORY = 'Static Analysis'
    SERVICE_ACCEPTS = 'document/office/.*'
    SERVICE_DESCRIPTION = "This service extracts metadata and network information and reports anomalies in " \
                          "Microsoft OLE and XML documents using the Python library py-oletools."
    SERVICE_ENABLED = True
    SERVICE_VERSION = '3'
    SERVICE_REVISION = ServiceBase.parse_revision('$Id: fe3b497c521a5b81dc362b0009b726189cfc8a96 $')
    SERVICE_CPU_CORES = 0.5
    SERVICE_RAM_MB = 1024
    SERVICE_DEFAULT_CONFIG = {
        'MACRO_SCORE_MAX_FILE_SIZE': 5 * 1024**2,
        'MACRO_SCORE_MIN_ALERT': 0.6
    }

    MAX_STRINGDUMP_CHARS = 500
    MAX_STRING_SCORE = SCORE.VHIGH
    MAX_MACRO_SECTIONS = 3
    MIN_MACRO_SECTION_SCORE = SCORE.MED

    # in addition to those from olevba.py
    ADDITIONAL_SUSPICIOUS_KEYWORDS = ('WinHttp', 'WinHttpRequest', 'WinInet', 'Lib "kernel32" Alias')

    def __init__(self, cfg=None):
        super(Oletools, self).__init__(cfg)
        self.request = None
        self.task = None
        self.ole_result = None
        self.scored_macro_uri = False
        self.ip_re = re.compile(
            r'^((?:(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9]).){3}(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9]?[0-9]))'
        )
        self.domain_re = re.compile('^((?:(?:[a-zA-Z0-9\-]+)\.)+[a-zA-Z]{2,5})')
        self.uri_re = re.compile(r'[a-zA-Z]+:/{1,3}[^/]+/[^\s]+')

        self.word_chains = None
        self.macro_skip_words = None
        self.macro_words_re = re.compile("[a-z]{3,}")
        self.macro_score_max_size = cfg.get('MACRO_SCORE_MAX_FILE_SIZE', None)
        self.macro_score_min_alert = cfg.get('MACRO_SCORE_MIN_ALERT', 0.6)

        self.all_macros = None
        self.all_vba = None
        self.filetypes = ['application',
                          'exec',
                          'image',
                          'text',
                          ]

    # noinspection PyUnresolvedReferences
    def import_service_deps(self):
        from oletools.olevba import VBA_Parser, VBA_Scanner
        from oletools.oleid import OleID, Indicator
        from oletools.thirdparty.olefile import olefile, olefile2
        from oletools.rtfobj import rtf_iter_objects
        import magic
        try:
            from al_services.alsvc_frankenstrings.balbuzard.patterns import PatternMatch
            global PatternMatch
        except ImportError:
            pass
        global VBA_Parser, VBA_Scanner
        global OleID, Indicator, olefile, olefile2, rtf_iter_objects
        global magic

    def start(self):
        self.log.debug("Service started")

        chain_path = os.path.join(os.path.dirname(__file__), "chains.json.gz")
        with gzip.open(chain_path) as fh:
            self.word_chains = json.load(fh)

        for k, v in self.word_chains.items():
            self.word_chains[k] = set(v)

        # Don't reward use of common keywords
        self.macro_skip_words = {'var', 'unescape', 'exec', 'for', 'while', 'array', 'object',
                                 'length', 'len', 'substr', 'substring', 'new', 'unicode', 'name', 'base',
                                 'dim', 'set', 'public', 'end', 'getobject', 'createobject', 'content',
                                 'regexp', 'date', 'false', 'true', 'none', 'break', 'continue', 'ubound',
                                 'none', 'undefined', 'activexobject', 'document', 'attribute', 'shell',
                                 'thisdocument', 'rem', 'string', 'byte', 'integer', 'int', 'function',
                                 'text', 'next', 'private', 'click', 'change', 'createtextfile', 'savetofile',
                                 'responsebody', 'opentextfile', 'resume', 'open', 'environment', 'write', 'close',
                                 'error', 'else', 'number', 'chr', 'sub', 'loop'}

    def get_tool_version(self):
        return self.SERVICE_VERSION

    # CIC: Call If Callable
    @staticmethod
    def cic(expression):
        """
        From 'base64dump.py' by Didier Stevens@https://DidierStevens.com
        """
        if callable(expression):
            return expression()
        else:
            return expression

    # IFF: IF Function
    @classmethod
    def iff(cls, expression, value_true, value_false):
        """
        From 'base64dump.py' by Didier Stevens@https://DidierStevens.com
        """
        if expression:
            return cls.cic(value_true)
        else:
            return cls.cic(value_false)

    # Ascii Dump
    @classmethod
    def ascii_dump(cls, data):
        return ''.join([cls.iff(ord(b) >= 32, b, '.') for b in data])

    def execute(self, request):
        self.task = request.task
        request.result = Result()
        self.ole_result = request.result
        self.request = request
        self.scored_macro_uri = False

        self.all_macros = []
        self.all_vba = []

        path = request.download()
        filename = os.path.basename(path)
        file_contents = request.get()

        try:
            self.check_for_macros(filename, file_contents, request.sha256)
            self.check_xml_strings(path)
            self.rip_mhtml(file_contents)
            self.extract_streams(path, file_contents)
            self.create_macro_sections(request.sha256)
        except Exception as e:
            self.log.error("We have encountered a critical error: {}".format(e))

        score_check = 0
        for section in self.ole_result.sections:
            score_check += self.calculate_nested_scores(section)

        if score_check == 0:
            request.result = Result()

        self.all_macros = None
        self.all_vba = None

    def check_for_indicators(self, filename):
        # noinspection PyBroadException
        try:
            ole_id = OleID(filename)
            indicators = ole_id.check()

            for indicator in indicators:
                # ignore these OleID indicators, they aren't all that useful
                if indicator.id in ("ole_format", "has_suminfo",):
                    continue

                indicator_score = SCORE.LOW  # default to LOW

                if indicator.value is True:
                    if indicator.id in ("word", "excel", "ppt", "visio"):
                        # good to know that the filetypes have been detected, but not a score-able offense
                        indicator_score = SCORE.NULL

                    section = ResultSection(indicator_score, "OleID indicator : " + indicator.name)
                    if indicator.description:
                        section.add_line(indicator.description)
                    self.ole_result.add_section(section)
        except:
            self.log.debug("OleID analysis failed")

    # Returns True if the URI should score
    # noinspection PyUnusedLocal
    def parse_uri(self, check_uri):
        m = self.uri_re.match(check_uri)
        if m is None:
            return False
        else:
            full_uri = m.group(0)

        proto, uri = full_uri.split('://', 1)
        if proto == 'file':
            return False

        scorable = False
        if "http://purl.org/" not in full_uri and \
                "http://xml.org/" not in full_uri and \
                ".openxmlformats.org/" not in full_uri and \
                ".oasis-open.org/" not in full_uri and \
                ".xmlsoap.org/" not in full_uri and \
                ".microsoft.com/" not in full_uri and \
                ".w3.org/" not in full_uri and \
                ".gc.ca/" not in full_uri and \
                ".mil.ca/" not in full_uri:

            self.ole_result.add_tag(TAG_TYPE.NET_FULL_URI,
                                    full_uri,
                                    TAG_WEIGHT.MED,
                                    usage=TAG_USAGE.CORRELATION)
            scorable = True

            domain = self.domain_re.match(uri)
            ip = self.ip_re.match(uri)
            if ip:
                ip_str = ip.group(1)
                if not is_ip_reserved(ip_str):
                    self.ole_result.add_tag(TAG_TYPE.NET_IP,
                                            ip_str,
                                            TAG_WEIGHT.HIGH,
                                            usage=TAG_USAGE.CORRELATION)
            elif domain:
                dom_str = domain.group(1)
                self.ole_result.add_tag(TAG_TYPE.NET_DOMAIN_NAME,
                                        dom_str,
                                        TAG_WEIGHT.HIGH,
                                        usage=TAG_USAGE.CORRELATION)

        return scorable

    def check_xml_strings(self, path):
        xml_target_res = ResultSection(score=SCORE.NULL, title_text="Attached External Template Targets in XML")
        xml_ioc_res = ResultSection(score=SCORE.NULL, title_text="IOCs in XML:")
        xml_b64_res = ResultSection(score=SCORE.NULL, title_text="Base64 in XML:")
        try:
            template_re = re.compile(r'/attachedTemplate"\s+[Tt]arget="((?!file)[^"]+)"\s+[Tt]argetMode="External"')
            uris = []
            zip_uris = []
            b64results = {}
            b64_extracted = set()
            if zipfile.is_zipfile(path):
                try:
                    patterns = PatternMatch()
                except:
                    patterns = None
                z = zipfile.ZipFile(path)
                for f in z.namelist():
                    data = z.open(f).read()
                    if len(data) > 500000:
                        data = data[:500000]
                        xml_ioc_res.report_heuristics(Oletools.AL_Oletools_003)
                        xml_ioc_res.score = min(xml_ioc_res.score, 1)
                    zip_uris.extend(template_re.findall(data))
                    # Use FrankenStrings modules to find other strings of interest
                    # Plain IOCs
                    if patterns:
                        pat_strs = ["http://purl.org", "schemas.microsoft.com", "schemas.openxmlformats.org",
                                    "www.w3.org"]
                        pat_ends = ["themeManager.xml", "MSO.DLL", "stdole2.tlb", "vbaProject.bin", "VBE6.DLL", "VBE7.DLL"]
                        pat_whitelist = ['Management', 'Manager', "microsoft.com"]

                        st_value = patterns.ioc_match(data, bogon_ip=True)
                        if len(st_value) > 0:
                            for ty, val in st_value.iteritems():
                                if val == "":
                                    asc_asc = unicodedata.normalize('NFKC', val).encode('ascii', 'ignore')
                                    if any(x in asc_asc for x in pat_strs) \
                                            or asc_asc.endswith(tuple(pat_ends)) \
                                            or asc_asc in pat_whitelist:
                                        continue
                                    else:
                                        xml_ioc_res.score += 1
                                        xml_ioc_res.add_line("Found %s string: %s in file %s}"
                                                             % (TAG_TYPE[ty].replace("_", " "), asc_asc, f))
                                        xml_ioc_res.add_tag(TAG_TYPE[ty], asc_asc, TAG_WEIGHT.LOW)
                                else:
                                    ulis = list(set(val))
                                    for v in ulis:
                                        if any(x in v for x in pat_strs) \
                                                or v.endswith(tuple(pat_ends)) \
                                                or v in pat_whitelist:
                                            continue
                                        else:
                                            xml_ioc_res.score += 1
                                            xml_ioc_res.add_line("Found %s string: %s in file %s"
                                                                 % (TAG_TYPE[ty].replace("_", " "), v, f))
                                            xml_ioc_res.add_tag(TAG_TYPE[ty], v, TAG_WEIGHT.LOW)

                    # Base64
                    b64_matches = set()
                    for b64_tuple in re.findall('(([\x20]{0,2}[A-Za-z0-9+/]{3,}={0,2}[\r]?[\n]?){6,})',
                                                data):
                        b64 = b64_tuple[0].replace('\n', '').replace('\r', '').replace(' ', '')
                        uniq_char = ''.join(set(b64))
                        if len(uniq_char) > 6:
                            if len(b64) >= 16 and len(b64) % 4 == 0:
                                b64_matches.add(b64)
                        """
                        Using some selected code from 'base64dump.py' by Didier Stevens@https://DidierStevens.com
                        """
                        for b64_string in b64_matches:
                            try:
                                b64_extract = False
                                base64data = binascii.a2b_base64(b64_string)
                                sha256hash = hashlib.sha256(base64data).hexdigest()
                                if sha256hash in b64_extracted:
                                    continue
                                # Search for embedded files of interest
                                if 500 < len(base64data) < 8000000:
                                    m = magic.Magic(mime=True)
                                    ftype = m.from_buffer(base64data)
                                    if 'octet-stream' not in ftype:
                                        for ft in self.filetypes:
                                            if ft in ftype:
                                                b64_file_path = os.path.join(self.working_directory,
                                                                             "{}_b64_decoded"
                                                                             .format(sha256hash[0:10]))
                                                self.request.add_extracted(b64_file_path,
                                                                           "Extracted b64 file during "
                                                                           "OLETools analysis.")
                                                with open(b64_file_path, 'wb') as b64_file:
                                                    b64_file.write(base64data)
                                                    self.log.debug("Submitted dropped file for analysis: {}"
                                                                   .format(b64_file_path))

                                                b64results[sha256hash] = [len(b64_string), b64_string[0:50],
                                                                          "[Possible base64 file contents in {}. "
                                                                          "See extracted files.]" .format(f), "", ""]

                                                b64_extract = True
                                                b64_extracted.add(sha256hash)
                                                break
                                if not b64_extract and len(base64data) > 30:
                                    if all(ord(c) < 128 for c in base64data):
                                        check_utf16 = base64data.decode('utf-16').encode('ascii', 'ignore')
                                        if check_utf16 != "":
                                            asc_b64 = check_utf16
                                        else:
                                            asc_b64 = self.ascii_dump(base64data)
                                        # If data has less then 7 uniq chars then ignore
                                        uniq_char = ''.join(set(asc_b64))
                                        if len(uniq_char) > 6:
                                            if patterns:
                                                st_value = patterns.ioc_match(asc_b64, bogon_ip=True)
                                                if len(st_value) > 0:
                                                    for ty, val in st_value.iteritems():
                                                        if val == "":
                                                            asc_asc = unicodedata.normalize('NFKC', val)\
                                                                .encode('ascii', 'ignore')
                                                            xml_ioc_res.add_tag(TAG_TYPE[ty], asc_asc, TAG_WEIGHT.LOW)
                                                        else:
                                                            ulis = list(set(val))
                                                            for v in ulis:
                                                                xml_ioc_res.add_tag(TAG_TYPE[ty], v, TAG_WEIGHT.LOW)
                                            b64results[sha256hash] = [len(b64_string), b64_string[0:50], asc_b64,
                                                                          base64data, "{}" .format(f)]
                            except:
                                pass

                b64index = 0
                for b64k, b64l in b64results.iteritems():
                    xml_b64_res.score = 100
                    b64index += 1
                    sub_b64_res = (ResultSection(SCORE.NULL, title_text="Result {0} in file {1}"
                                                 .format(b64index, f), parent=xml_b64_res))
                    sub_b64_res.add_line('BASE64 TEXT SIZE: {}'.format(b64l[0]))
                    sub_b64_res.add_line('BASE64 SAMPLE TEXT: {}[........]'.format(b64l[1]))
                    sub_b64_res.add_line('DECODED SHA256: {}'.format(b64k))
                    subb_b64_res = (ResultSection(SCORE.NULL, title_text="DECODED ASCII DUMP:",
                                                  body_format=TEXT_FORMAT.MEMORY_DUMP,
                                                  parent=sub_b64_res))
                    subb_b64_res.add_line('{}'.format(b64l[2]))
                    if b64l[3] != "":
                        if patterns:
                            st_value = patterns.ioc_match(b64l[3], bogon_ip=True)
                            if len(st_value) > 0:
                                xml_b64_res.score += 1
                                for ty, val in st_value.iteritems():
                                    if val == "":
                                        asc_asc = unicodedata.normalize('NFKC', val).encode\
                                            ('ascii', 'ignore')
                                        xml_b64_res.add_tag(TAG_TYPE[ty], asc_asc, TAG_WEIGHT.LOW)
                                    else:
                                        ulis = list(set(val))
                                        for v in ulis:
                                            xml_b64_res.add_tag(TAG_TYPE[ty], v, TAG_WEIGHT.LOW)
                z.close()
                for uri in zip_uris:
                    if self.parse_uri(uri):
                        uris.append(uri)

                uris = list(set(uris))
                # If there are domains or IPs, report them
                if uris:
                    xml_target_res.score = 500
                    xml_target_res.add_lines(uris)
                    xml_target_res.report_heuristics(Oletools.AL_Oletools_001)

        except Exception as e:
            self.log.debug("Failed to analyze XML: {}".format(e))

        if xml_target_res.score > 0:
            self.ole_result.add_section(xml_target_res)
        if xml_ioc_res.score > 0:
            self.ole_result.add_section(xml_ioc_res)
        if xml_b64_res.score > 0:
            self.ole_result.add_section(xml_b64_res)

    # chains.json contains common English trigraphs. We score macros on how common these trigraphs appear in code,
    # skipping over some common keywords. A lower score indicates more randomized text, random variable/function names
    # are common in malicious macros.
    def flag_macro(self, macro_text):
        if self.macro_score_max_size is not None and len(macro_text) > self.macro_score_max_size:
            return False

        macro_text = macro_text.lower()
        score = 0.0

        word_count = 0
        byte_count = 0

        for m_cw in self.macro_words_re.finditer(macro_text):
            cw = m_cw.group(0)
            word_count += 1
            byte_count += len(cw)
            if cw in self.macro_skip_words:
                continue
            prefix = cw[0]
            tc = 0
            for i in xrange(1, len(cw) - 1):
                c = cw[i:i + 2]
                if c in self.word_chains.get(prefix, []):
                    tc += 1
                prefix = cw[i]

            score += tc / float(len(cw) - 2)

        if byte_count < 128 or word_count < 32:
            # these numbers are arbitrary, but if the sample is too short the score is worthless
            return False

        return (score / word_count) < self.macro_score_min_alert

    def create_macro_sections(self, request_hash):
        # noinspection PyBroadException
        try:
            filtered_macros = []
            if len(self.all_macros) > 0:
                # noinspection PyBroadException
                try:
                    # first sort all analyzed macros by their relative score, highest first
                    self.all_macros.sort(key=attrgetter('macro_score'), reverse=True)

                    # then only keep, theoretically, the most interesting ones
                    filtered_macros = self.all_macros[0:min(len(self.all_macros), self.MAX_MACRO_SECTIONS)]
                except:
                    self.log.debug("Sort and filtering of macro scores failed, "
                                   "reverting to full list of extracted macros")
                    filtered_macros = self.all_macros
            else:
                self.ole_result.add_section(ResultSection(SCORE.NULL, "No interesting macros found."))

            for macro in filtered_macros:
                if macro.macro_score >= self.MIN_MACRO_SECTION_SCORE:
                    self.ole_result.add_section(macro.macro_section)

            # Create extracted file for all VBA script.
            if len(self.all_vba) > 0:
                vba_file_path = ""
                all_vba = "\n".join(self.all_vba)
                vba_all_sha256 = hashlib.sha256(all_vba).hexdigest()
                if vba_all_sha256 == request_hash:
                    return

                try:
                    vba_file_path = os.path.join(self.working_directory, vba_all_sha256)
                    with open(vba_file_path, 'w') as fh:
                        fh.write(all_vba)

                    self.request.add_extracted(vba_file_path, "vba_code",
                                               "all_vba_%s.vba" % vba_all_sha256[:7])
                except Exception as e:
                    self.log.error("Error while adding extracted"
                                   " macro: {}: {}".format(vba_file_path, str(e)))
        except Exception as e:
            self.log.debug("OleVBA VBA_Parser.detect_vba_macros failed: {}".format(e))
            section = ResultSection(SCORE.NULL, "OleVBA : Error parsing macros: {}".format(e))
            self.ole_result.add_section(section)

    def check_for_macros(self, filename, file_contents, request_hash):
        # noinspection PyBroadException
        try:
            vba_parser = VBA_Parser(filename=filename, data=file_contents)

            try:
                if vba_parser.detect_vba_macros():
                    self.ole_result.add_tag(TAG_TYPE.TECHNIQUE_MACROS,
                                            "Contains VBA Macro(s)",
                                            weight=TAG_WEIGHT.LOW,
                                            usage=TAG_USAGE.IDENTIFICATION)

                    try:
                        for (subfilename, stream_path, vba_filename, vba_code) in vba_parser.extract_macros():
                            if vba_code.strip() == '':
                                continue
                            vba_code_sha256 = hashlib.sha256(vba_code).hexdigest()
                            if vba_code_sha256 == request_hash:
                                continue

                            self.all_vba.append(vba_code)
                            macro_section = self.macro_section_builder(vba_code)
                            toplevel_score = self.calculate_nested_scores(macro_section)

                            self.all_macros.append(Macro(vba_code, vba_code_sha256, macro_section, toplevel_score))
                    except Exception as e:
                        self.log.debug("OleVBA VBA_Parser.extract_macros failed: {}".format(str(e)))
                        section = ResultSection(SCORE.NULL, "OleVBA : Error extracting macros")
                        self.ole_result.add_section(section)

            except Exception as e:
                self.log.debug("OleVBA VBA_Parser.detect_vba_macros failed: {}".format(e))
                section = ResultSection(SCORE.NULL, "OleVBA : Error parsing macros: {}".format(e))
                self.ole_result.add_section(section)
        except:
            self.log.debug("OleVBA VBA_Parser constructor failed, may not be a supported OLE document")

    def calculate_nested_scores(self, section):
        score = section.score
        if len(section.subsections) > 0:
            for subsection in section.subsections:
                score = score + self.calculate_nested_scores(subsection)
        return score

    def macro_section_builder(self, vba_code):

        vba_code_sha256 = hashlib.sha256(vba_code).hexdigest()
        macro_section = ResultSection(SCORE.NULL, "OleVBA : Macro detected")
        macro_section.add_line("Macro SHA256 : %s" % vba_code_sha256)
        #macro_section.add_line("Resubmitted macro as: macro_%s.vba" % vba_code_sha256[:7])
        macro_section.add_tag(TAG_TYPE.OLE_MACRO_SHA256,
                              vba_code_sha256,
                              weight=TAG_WEIGHT.LOW,
                              usage=TAG_USAGE.CORRELATION)

        dump_title = "Macro contents dump"
        analyzed_code = self.deobfuscator(vba_code)
        req_deob = False
        if analyzed_code != vba_code:
            req_deob = True
            dump_title += " [deobfuscated]"

        if len(analyzed_code) > self.MAX_STRINGDUMP_CHARS:
            dump_title += " - Displaying only the first %s characters." % self.MAX_STRINGDUMP_CHARS
            dump_subsection = ResultSection(SCORE.NULL, dump_title, body_format=TEXT_FORMAT.MEMORY_DUMP)
            dump_subsection.add_line(analyzed_code[0:self.MAX_STRINGDUMP_CHARS])
        else:
            dump_subsection = ResultSection(SCORE.NULL, dump_title, body_format=TEXT_FORMAT.MEMORY_DUMP)
            dump_subsection.add_line(analyzed_code)

        if req_deob:
            dump_subsection.add_tag(TAG_TYPE.TECHNIQUE_OBFUSCATION,
                                    "VBA Macro String Functions",
                                    weight=TAG_WEIGHT.LOW,
                                    usage=TAG_USAGE.IDENTIFICATION)

        score_subsection = self.macro_scorer(analyzed_code)
        if score_subsection:
            macro_section.add_section(score_subsection)
            macro_section.add_section(dump_subsection)

        # Flag macros
        if self.flag_macro(analyzed_code):
            macro_section.add_section(ResultSection(SCORE.HIGH, "Macro may be packed or obfuscated."))

        return macro_section

    # TODO: deobfuscator is very primitive; visual inspection and dynamic analysis will often be most useful
    # TODO: may want to eventually pull this out into a Deobfuscation helper that supports multi-languages
    def deobfuscator(self, text):
        self.log.debug("Deobfuscation running")
        deobf = text
        # noinspection PyBroadException
        try:
            # leading & trailing quotes in each local function are to facilitate the final re.sub in deobfuscator()

            # repeated chr(x + y) calls seen in wild, as per SANS ISC diary from May 8, 2015
            def deobf_chrs_add(m):
                if m.group(0):
                    i = int(m.group(1)) + int(m.group(2))

                    if (i >= 0) and (i <= 255):
                        return "\"%s\"" % chr(i)
                return ''

            deobf = re.sub(r'chr[\$]?\((\d+) \+ (\d+)\)', deobf_chrs_add, deobf, flags=re.IGNORECASE)

            def deobf_unichrs_add(m):
                result = ''
                if m.group(0):
                    result = m.group(0)

                    i = int(m.group(1)) + int(m.group(2))

                    # unichr range is platform dependent, either [0..0xFFFF] or [0..0x10FFFF]
                    if (i >= 0) and ((i <= 0xFFFF) or (i <= 0x10FFFF)):
                        result = "\"%s\"" % unichr(i)
                return result

            deobf = re.sub(r'chrw[\$]?\((\d+) \+ (\d+)\)', deobf_unichrs_add, deobf, flags=re.IGNORECASE)

            # suspect we may see chr(x - y) samples as well
            def deobf_chrs_sub(m):
                if m.group(0):
                    i = int(m.group(1)) - int(m.group(2))

                    if (i >= 0) and (i <= 255):
                        return "\"%s\"" % chr(i)
                return ''

            deobf = re.sub(r'chr[\$]?\((\d+) \- (\d+)\)', deobf_chrs_sub, deobf, flags=re.IGNORECASE)

            def deobf_unichrs_sub(m):
                if m.group(0):
                    i = int(m.group(1)) - int(m.group(2))

                    # unichr range is platform dependent, either [0..0xFFFF] or [0..0x10FFFF]
                    if (i >= 0) and ((i <= 0xFFFF) or (i <= 0x10FFFF)):
                        return "\"%s\"" % unichr(i)
                return ''

            deobf = re.sub(r'chrw[\$]?\((\d+) \- (\d+)\)', deobf_unichrs_sub, deobf, flags=re.IGNORECASE)

            def deobf_chr(m):
                if m.group(1):
                    i = int(m.group(1))

                    if (i >= 0) and (i <= 255):
                        return "\"%s\"" % chr(i)
                return ''

            deobf = re.sub('chr[\$]?\((\d+)\)', deobf_chr, deobf, flags=re.IGNORECASE)

            def deobf_unichr(m):
                if m.group(1):
                    i = int(m.group(1))

                    # unichr range is platform dependent, either [0..0xFFFF] or [0..0x10FFFF]
                    if (i >= 0) and ((i <= 0xFFFF) or (i <= 0x10FFFF)):
                        return "\"%s\"" % unichr(i)
                return ''

            deobf = re.sub('chrw[\$]?\((\d+)\)', deobf_unichr, deobf, flags=re.IGNORECASE)

            # handle simple string concatenations
            deobf = re.sub('" & "', '', deobf)

        except:
            self.log.debug("Deobfuscator regex failure, reverting to original text")
            deobf = text

        return deobf

    #  note: we manually add up the score_section.score value here so that it is usable before the service finishes
    #        otherwise it is not calculated until finalize() is called on the top-level ResultSection
    def macro_scorer(self, text):
        self.log.debug("Macro scorer running")
        score_section = None

        try:
            vba_scanner = VBA_Scanner(text)
            vba_scanner.scan(include_decoded_strings=True)

            for string in self.ADDITIONAL_SUSPICIOUS_KEYWORDS:
                if re.search(string, text, re.IGNORECASE):
                    # play nice with detect_suspicious from olevba.py
                    vba_scanner.suspicious_keywords.append((string, 'May download files from the Internet'))

            stringcount = len(vba_scanner.autoexec_keywords) + len(vba_scanner.suspicious_keywords) + \
                len(vba_scanner.iocs)

            if stringcount > 0:
                score_section = ResultSection(SCORE.NULL, "Interesting macro strings found")

                if len(vba_scanner.autoexec_keywords) > 0:
                    subsection = ResultSection(min(self.MAX_STRING_SCORE,
                                                   SCORE.LOW * len(vba_scanner.autoexec_keywords)),
                                               "Autoexecution strings")

                    for keyword, description in vba_scanner.autoexec_keywords:
                        subsection.add_line(keyword)
                        subsection.add_tag(TAG_TYPE.OLE_MACRO_SUSPICIOUS_STRINGS,
                                           keyword, TAG_WEIGHT.HIGH,
                                           usage=TAG_USAGE.IDENTIFICATION)
                    score_section.add_section(subsection)

                if len(vba_scanner.suspicious_keywords) > 0:
                    subsection = ResultSection(min(self.MAX_STRING_SCORE,
                                                   SCORE.MED * len(vba_scanner.suspicious_keywords)),
                                               "Suspicious strings or functions")

                    for keyword, description in vba_scanner.suspicious_keywords:
                        subsection.add_line(keyword)
                        subsection.add_tag(TAG_TYPE.OLE_MACRO_SUSPICIOUS_STRINGS,
                                           keyword, TAG_WEIGHT.HIGH,
                                           usage=TAG_USAGE.IDENTIFICATION)
                    score_section.add_section(subsection)

                if len(vba_scanner.iocs) > 0:
                    subsection = ResultSection(min(500, SCORE.MED * len(vba_scanner.iocs)),
                                               "Potential host or network IOCs")

                    scored_macro_uri = False
                    for keyword, description in vba_scanner.iocs:
                        # olevba seems to have swapped the keyword for description during iocs extraction
                        # this holds true until at least version 0.27

                        subsection.add_line("{}: {}".format(keyword, description))
                        desc_ip = self.ip_re.match(description)
                        if self.parse_uri(description) is True:
                            scored_macro_uri = True
                        elif desc_ip:
                            ip_str = desc_ip.group(1)
                            if not is_ip_reserved(ip_str):
                                scored_macro_uri = True
                                subsection.add_tag(TAG_TYPE.NET_IP,
                                                   ip_str,
                                                   TAG_WEIGHT.HIGH,
                                                   usage=TAG_USAGE.CORRELATION)
                    score_section.add_section(subsection)
                    if scored_macro_uri and self.scored_macro_uri is False:
                        self.scored_macro_uri = True
                        scored_uri_section = ResultSection(score=500,
                                                           title_text="Found network indicator(s) within macros")
                        self.ole_result.add_section(scored_uri_section)

        except Exception as e:
            self.log.debug("OleVBA VBA_Scanner constructor failed: {}".format(str(e)))

        return score_section

    def rip_mhtml(self, data):
        if self.task.tag != 'document/office/mhtml':
            return

        mime_res = ResultSection(score=500,
                                 title_text="ActiveMime Document(s) in multipart/related")

        mhtml = email.message_from_string(data)
        # find all the attached files:
        for part in mhtml.walk():
            content_type = part.get_content_type()
            if content_type == "application/x-mso":
                part_data = part.get_payload(decode=True)
                if len(part_data) > 0x32 and part_data[:10].lower() == "activemime":
                    try:
                        part_data = zlib.decompress(part_data[0x32:])  # Grab  the zlib-compressed data
                        part_filename = part.get_filename(None) or hashlib.sha256(part_data).hexdigest()
                        part_path = os.path.join(self.working_directory, part_filename)
                        with open(part_path, 'w') as fh:
                            fh.write(part_data)
                        try:
                            mime_res.add_line(part_filename)
                            self.request.add_extracted(part_path, "ActiveMime x-mso from multipart/related.")
                        except Exception as e:
                            self.log.error("Error submitting extracted file: {}".format(e))
                    except Exception as e:
                        self.log.debug("Could not decompress ActiveMime part: {}".format(e))

        if len(mime_res.body) > 0:
            self.ole_result.add_section(mime_res)

    def process_ole10native(self, stream_name, data, streams_section):
        try:
            ole10native = Ole10Native(data)

            ole10_stream_file = os.path.join(self.working_directory,
                                             hashlib.sha256(ole10native.native_data).hexdigest())

            with open(ole10_stream_file, 'w') as fh:
                fh.write(ole10native.native_data)

            stream_desc = "{} ({}):\n\tFilename: {}\n\tData Length: {}".format(
                stream_name, ole10native.label, ole10native.filename, ole10native.native_data_size
            )
            streams_section.add_line(stream_desc)
            self.request.add_extracted(ole10_stream_file, "Embedded OLE Stream", stream_name)

            # handle embedded native macros
            if ole10native.label.endswith(".vbs") or \
                    ole10native.command.endswith(".vbs") or \
                    ole10native.filename.endswith(".vbs"):

                self.ole_result.add_tag(TAG_TYPE.TECHNIQUE_MACROS,
                                        "Contains Embedded VBA Macro(s)",
                                        weight=TAG_WEIGHT.LOW,
                                        usage=TAG_USAGE.IDENTIFICATION)

                self.all_vba.append(ole10native.native_data)
                macro_section = self.macro_section_builder(ole10native.native_data)
                toplevel_score = self.calculate_nested_scores(macro_section)

                self.all_macros.append(Macro(ole10native.native_data,
                                             hashlib.sha256(ole10native.native_data).hexdigest(),
                                             macro_section,
                                             toplevel_score))

            return True
        except Exception as e:
            self.log.debug("Failed to parse Ole10Native stream: {}".format(e))
            return False

    def process_powerpoint_stream(self, data, streams_section):
        try:
            powerpoint = PowerPointDoc(data)
            pp_line = "PowerPoint Document"
            if len(powerpoint.objects) > 0:
                streams_section.add_line(pp_line)
            for obj in powerpoint.objects:
                if obj.rec_type == "ExOleObjStg":
                    if obj.error is not None:
                        streams_section.add_line("\tError parsing ExOleObjStg stream. This is suspicious.")
                        streams_section.score += 50
                        continue

                    ole_hash = hashlib.sha256(obj.raw).hexdigest()
                    ole_obj_filename = os.path.join(self.working_directory,
                                                    "{}.pp_ole".format(ole_hash))
                    with open(ole_obj_filename, 'w') as fh:
                        fh.write(obj.raw)

                    streams_section.add_line(
                        "\tPowerPoint Embedded OLE Storage:\n\t\tSHA-256: {}\n\t\t"
                        "Length: {}\n\t\tCompressed: {}".format(
                            ole_hash, len(obj.raw), obj.compressed)
                    )
                    self.log.debug("Added OLE stream within a PowerPoint Document Stream: {}".format(ole_obj_filename))
                    self.request.add_extracted(ole_obj_filename,
                                               "Embedded OLE Storage within PowerPoint Document Stream",
                                               "ExeOleObjStg_{}".format(ole_hash)
                                               )
            return True
        except Exception as e:
            self.log.error("Failed to parse PowerPoint Document stream: {}".format(e))
            return False

    def process_ole_stream(self, ole, streams_section):
        listdir = ole.listdir()
        streams = []
        for dir_entry in listdir:
            streams.append('/'.join(dir_entry))

        if "\x05HwpSummaryInformation" in streams:
            decompress = True
        else:
            decompress = False

        decompress_macros = []

        for stream in streams:
            self.log.debug("Extracting stream: {}".format(stream))
            data = ole.openstream(stream).getvalue()
            try:

                if "Ole10Native" in stream:
                    if self.process_ole10native(stream, data, streams_section) is True:
                        continue

                elif "PowerPoint Document" in stream:
                    if self.process_powerpoint_stream(data, streams_section) is True:
                        continue

                if decompress:
                    try:
                        data = zlib.decompress(data, -15)
                    except zlib.error:
                        pass

                streams_section.add_line(safe_str(stream))
                # Only write all streams with deep scan.
                stream_name = '{}.ole_stream'.format(hashlib.sha256(data).hexdigest())
                if self.request.deep_scan:
                    stream_path = os.path.join(self.working_directory, stream_name)
                    with open(stream_path, 'w') as fh:
                        fh.write(data)
                    self.request.add_extracted(stream_path, "Embedded OLE Stream.", stream)
                    if decompress and (stream.endswith(".ps") or stream.startswith("Scripts/")):
                        decompress_macros.append(data)

            except Exception as e:
                self.log.error("Error adding extracted stream {}: {}".format(stream, e))
                continue
        if decompress_macros:
            macros = "\n".join(decompress_macros)
            stream_name = '{}.macros'.format(hashlib.sha256(macros).hexdigest())
            stream_path = os.path.join(self.working_directory, stream_name)
            with open(stream_path, 'w') as fh:
                fh.write(macros)

            self.request.add_extracted(stream_path, "Combined macros.", "all_macros.ps")
            return True
        return False

    # noinspection PyBroadException
    def extract_streams(self, file_name, file_contents):
        oles = {}
        try:
            streams_res = ResultSection(score=SCORE.INFO,
                                        title_text="Embedded document stream(s)")

            is_zip = False
            is_ole = False
            # Get the OLEs
            if zipfile.is_zipfile(file_name):
                is_zip = True
                z = zipfile.ZipFile(file_name)
                for f in z.namelist():
                    if f in oles:
                        continue
                    bin_data = z.open(f).read()
                    bin_fname = os.path.join(self.working_directory,
                                             "{}.tmp".format(hashlib.sha256(bin_data).hexdigest()))
                    with open(bin_fname, 'w') as bin_fh:
                        bin_fh.write(bin_data)
                    if olefile.isOleFile(bin_fname):
                        oles[f] = olefile.OleFileIO(bin_fname)
                    elif olefile2.isOleFile(bin_fname):
                        oles[f] = olefile2.OleFileIO(bin_fname)
                z.close()

            if olefile.isOleFile(file_name):
                is_ole = True
                oles[file_name] = olefile.OleFileIO(file_name)

            elif olefile2.isOleFile(file_name):
                is_ole = True
                oles[file_name] = olefile2.OleFileIO(file_name)

            if is_zip and is_ole:
                streams_res.report_heuristics(Oletools.AL_Oletools_002)

            decompressed_macros = False
            for ole_filename in oles.iterkeys():
                try:
                    decompressed_macros |= self.process_ole_stream(oles[ole_filename], streams_res)
                except Exception:
                    continue

            if decompressed_macros:
                streams_res.score = SCORE.HIGH

            for _, offset, rtfobject in rtf_iter_objects(file_contents):
                rtfobject_name = hex(offset) + '.rtfobj'
                extracted_obj = os.path.join(self.working_directory, rtfobject_name)
                with open(extracted_obj, 'wb') as fh:
                    fh.write(rtfobject)
                self.request.add_extracted(extracted_obj,
                                           'Embedded RTF Object at offset %s' % hex(offset),
                                           rtfobject_name)

            if len(streams_res.body) > 0:
                self.ole_result.add_section(streams_res)

        except Exception:
            self.log.debug("Error extracting streams: {}".format(traceback.format_exc(limit=2)))

        finally:
            for fd in oles.itervalues():
                try:
                    fd.close()
                except:
                    pass
Example #11
0
class Firmsplode(ServiceBase):
    SERVICE_CATEGORY = 'Static Analysis'
    SERVICE_ACCEPTS = 'unknown'
    SERVICE_REVISION = ServiceBase.parse_revision('$Id$')
    SERVICE_VERSION = '1'
    SERVICE_ENABLED = True
    SERVICE_STAGE = 'CORE'
    SERVICE_CPU_CORES = 1
    SERVICE_RAM_MB = 1024

    def __init__(self, cfg=None):
        super(Firmsplode, self).__init__(cfg)

    def start(self):
        self.log.debug("Firmsplode service started")

    def execute(self, request):
        local = request.download()

        al_result = Result()

        command = self.construct_command(request)

        request.task.set_milestone("started", True)

        extract_section = ResultSection(SCORE.NULL, 'Extracted and Carved Files')

        for module in binwalk.scan(local, **command):
            section = ResultSection(SCORE.NULL, module.name, body_format=TEXT_FORMAT.MEMORY_DUMP)
            for result in module.results:
                section.add_line("0x%.8X : %s" % (result.offset, result.description))

                if(module.extractor.output.has_key(result.file.path)):

                    if module.extractor.output[result.file.path].carved.has_key(result.offset):
                        extract_section.add_line("Carved data from offset 0x%X to %s" % (result.offset, module.extractor.output[result.file.path].carved[result.offset]))
                        file_name = module.extractor.output[result.file.path].carved[result.offset].split("/")[-1]
                        request.add_extracted(module.extractor.output[result.file.path].carved[result.offset], 'Carved File', file_name)

                    if module.extractor.output[result.file.path].extracted.has_key(result.offset) and \
                            len(module.extractor.output[result.file.path].extracted[result.offset].files) > 0:

                        path = module.extractor.output[result.file.path].extracted[result.offset].files[0]
                        extract = module.extractor.output[result.file.path].extracted[result.offset].command

                        extract_section.add_line("Extracted %d files from offset 0x%X to '%s' using '%s'" % (
                            len(module.extractor.output[result.file.path].extracted[result.offset].files),
                            result.offset,
                            path,
                            extract))

                        if(os.path.isdir(path)):
                            file = zipfile.ZipFile("%s.zip" % path.split("/")[-1], 'w', zipfile.ZIP_DEFLATED)
                            self.zip_dir(path, file)
                            file.close()
                            request.add_supplementary(file.filename, extract, file.filename.split("/")[-1])
                        else:
                            request.add_extracted(path, extract, path.split("/")[-1])

            al_result.add_section(section)

        request.task.set_milestone("finished", True)
        al_result.add_section(extract_section)
        request.result = al_result

    def zip_dir(self, path, ziph):
        for root, dirs, files in os.walk(path):
            for file in files:
                ziph.write(os.path.join(root, file))

    def construct_command(self, request):
        cmd = {}

        cmd['signature'] = True
        cmd['entropy'] = True
        cmd['directory'] = self.working_directory
        cmd['extract'] = True
        cmd['quiet'] = True
        cmd['disasm'] = True

        return cmd
Example #12
0
class Symantec(ServiceBase):
    SERVICE_CATEGORY = 'Antivirus'
    SERVICE_DEFAULT_CONFIG = {
        'ICAP_HOST': '127.0.0.1',
        'ICAP_PORT': 1344,
    }
    SERVICE_DESCRIPTION = "This services wraps Symantec's ICAP proxy."
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: 12e8cb1e37ab67654bd346b23565bce525fe7857 $')
    SERVICE_SUPPORTED_PLATFORMS = [
        'Linux',
        'Windows',
    ]
    SERVICE_VERSION = '1'
    SERVICE_CPU_CORES = 0.05
    SERVICE_RAM_MB = 32

    def __init__(self, cfg=None):
        super(Symantec, self).__init__(cfg)
        self.icap_host = self.cfg.get('ICAP_HOST')
        self.icap_port = self.cfg.get('ICAP_PORT')
        self._av_info = ''
        self.icap = None

    def connect_icap(self):
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        sock.connect((self.icap_host, self.icap_port))
        return sock

    def execute(self, request):
        request.result = Result()
        local_filename = request.download()
        with open(local_filename) as f:
            file_content = f.read()
        request.set_service_context(self._av_info)
        max_retry = 2
        done = False
        retry = 0

        while not done:
            # If this is a retry, sleep for a second
            if retry:
                # Sleep between 1 and 3 seconds times the number of retry
                time.sleep(retry * random.randrange(100, 300, 1) / float(100))

            output = self.icap.scan_data(file_content)

            ret = self.parse_results(output, request.result, local_filename)
            if ret in [201, 204]:
                done = True
            elif ret == 500:
                # Symantec often 500's on truncated zips and other formats. It tries to decompress/parse
                # them and can't proceed.
                request.result.add_section(
                    ResultSection(SCORE.NULL,
                                  'Symantec could not scan this file.'))
                done = True
            elif ret == 551:
                if retry == max_retry:
                    raise Exception("[FAILED %s times] Resources unvailable" %
                                    max_retry)
                else:
                    self.log.info("Resource unavailable... retrying")
                    retry += 1
            elif ret == 558:
                raise Exception(
                    "Could not scan file, Symantec license is expired!")
            elif ret == 100:
                raise Exception("Could not find response from icap service, "
                                "response header %s" %
                                output.partition("\r")[0])
            else:
                raise Exception("Unknown return code from symantec: %s" % ret)
        return

    def get_symantec_version(self):
        engine, vers = self.icap.get_service_version()
        return "Engine: {} DAT: {}".format(engine, vers)

    def get_tool_version(self):
        return self._av_info

    def parse_results(self, result_content, file_res, local_filename):
        absolute_filename = ''
        nvirusfound = 0

        lines = result_content.splitlines()
        i = 0

        while i < len(lines):
            if "204 No Content Necessary" in lines[i]:
                return 204
            elif "500 Internal Server Error" in lines[i]:
                return 500
            elif "551 Resource unavailable" in lines[i]:
                return 551
            elif "558 Aborted" in lines[i]:
                return 558
            elif "X-Violations-Found:" in lines[i]:
                nvirusfound = int(lines[i].split(': ')[1])
            elif nvirusfound:
                i += 1
                virus_name = lines[i].split('|')[0].strip()
                self.set_result_values(local_filename, file_res, virus_name,
                                       absolute_filename)
                i += 2
                nvirusfound -= 1

                if not nvirusfound:
                    return 201

            i += 1

        return 100

    @staticmethod
    def set_result_values(local_filename, file_res, virus_name,
                          absolute_filename):
        valid_embedded_filename = ""
        if len(absolute_filename) != len(local_filename):
            embedded_char_index = len(local_filename) + 1
            valid_embedded_filename = absolute_filename[embedded_char_index:]

        score = SCORE.SURE
        if virus_name in CONTAINER_ERRORS:
            score = SCORE.INFO

        if valid_embedded_filename != "":
            if os.path.sep == '\\':
                valid_embedded_filename = valid_embedded_filename.replace(
                    '/', '\\')

            res = VirusHitSection(virus_name, score, valid_embedded_filename)
        else:
            res = VirusHitSection(virus_name, score)

        file_res.append_tag(VirusHitTag(virus_name))

        cve_found = re.search("CVE-[0-9]{4}-[0-9]{4}", virus_name)
        if cve_found:
            file_res.add_tag(TAG_TYPE.EXPLOIT_NAME,
                             virus_name[cve_found.start():cve_found.end()],
                             TAG_SCORE.MED,
                             usage='IDENTIFICATION')
            file_res.add_tag(TAG_TYPE.FILE_SUMMARY,
                             virus_name[cve_found.start():cve_found.end()],
                             TAG_SCORE.MED,
                             usage='IDENTIFICATION')

        file_res.add_result(res)

    def start(self):
        self.icap = SymantecIcapClient(self.icap_host, self.icap_port)
        self._av_info = self.get_symantec_version()
Example #13
0
class ConfigDecoder(ServiceBase):
    SERVICE_CATEGORY = 'Static Analysis'
    SERVICE_DEFAULT_CONFIG = {
        "USE_RIAK_FOR_RULES":
        True,
        "RULE_PATH":
        'config_dec_rules.yar',
        "SIGNATURE_USER":
        '******',
        "SIGNATURE_PASS":
        '******',
        "SIGNATURE_URL":
        'https://localhost:443',
        "SIGNATURE_QUERY":
        'meta.al_configparser:* AND (meta.al_status:DEPLOYED OR meta.al_status:NOISY)'
    }
    SERVICE_DESCRIPTION = "This service runs implant configuration extraction routines for implants identified " \
                          "by Yara rules."
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: cb9a8924dbd5a65cfcd53d222665fd7cb32db7ff $')
    SERVICE_VERSION = '1'

    SERVICE_CPU_CORES = 0.20
    SERVICE_RAM_MB = 1024

    def __init__(self, cfg=None):
        super(ConfigDecoder, self).__init__(cfg)
        self.config_parsers = []
        self.rules = None
        self.signature_user = self.cfg.get('SIGNATURE_USER')
        self.signature_pass = self.cfg.get('SIGNATURE_PASS')
        self.signature_url = self.cfg.get('SIGNATURE_URL')
        self.signature_query = self.cfg.get(
            'SIGNATURE_QUERY',
            'meta.al_configparser:* AND (meta.al_status:DEPLOYED OR meta.al_status:NOISY)'
        )
        self.yara_rulepath = None

    # noinspection PyUnresolvedReferences,PyGlobalUndefined
    def import_service_deps(self):
        global yara
        import yara

    # noinspection PyBroadException
    def init_rules(self):
        try:
            self.log.info("Loading rule file...")
            if not self.cfg.get('USE_RIAK_FOR_RULES', False):
                self.yara_rulepath = self.cfg.get('RULE_PATH')
                self.rules = yara.compile(self.yara_rulepath)
            else:
                sig_client = Client(self.signature_url,
                                    auth=(self.signature_user,
                                          self.signature_pass))
                al_temp_dir = os.path.join(tempfile.gettempdir(), 'al',
                                           self.SERVICE_NAME, str(os.getpid()))
                try:
                    os.makedirs(al_temp_dir)
                except:
                    pass
                self.yara_rulepath = os.path.join(al_temp_dir, 'rules.yar')
                sig_client.signature.download(output=self.yara_rulepath,
                                              query=self.signature_query,
                                              safe=True)
                self.rules = yara.compile(self.yara_rulepath)
                try:
                    os.remove(self.yara_rulepath)
                except:  # pylint: disable=W0702
                    pass

            self.log.info("Using rule file: %s" % self.yara_rulepath)

        except:  # pylint: disable=W0702
            self.log.exception('Problem initializing yara rules:')

    def load_parsers(self):
        from al_services.alsvc_configdecoder import parsers

        self.config_parsers.extend([
            parsers.DarkComet51Parser(),
            parsers.GenericParser(),
        ])

    def start(self):
        self.load_parsers()
        self.init_rules()

    # noinspection PyBroadException
    def apply_parser(self, config_parser, request, hit, content):
        result = request.result

        # if the config_parser statisfies the prerequisite...
        if config_parser.accept(request, hit, content):
            # Attempt to parse config.
            parsed_configs = []
            try:
                parsed_configs = config_parser.parse(request, hit, content)
            except:  # pylint: disable=W0702
                self.log.exception("Parse failure:")

            failed = set()
            for parsed in parsed_configs:
                try:

                    if type(
                            parsed
                    ) == configparser.NullParsedConfig and parsed.name not in failed:
                        failed.add(parsed.name)
                        section = ResultSection(
                            SCORE['LOW'],
                            "Configuration identified for %s but "
                            "was not successfully parsed!" % parsed.name,
                            parsed.classification)
                    else:
                        section = ResultSection(SCORE['SURE'], [
                            parsed.name, " configuration successfully parsed."
                        ], parsed.classification)
                        result.add_tag(TAG_TYPE['FILE_CONFIG'],
                                       parsed.name,
                                       TAG_WEIGHT['HIGH'],
                                       classification=parsed.classification)

                        # Add parsed config to the report.
                        parsed.report(request, section, self)

                    if section:
                        result.add_section(section)
                except:  # pylint: disable=W0702
                    self.log.exception("Parse failure:")

    # noinspection PyBroadException
    def execute(self, request):
        request.result = Result()
        content = request.get()

        # Run yara rules for all parsers.
        all_hits = {}
        matches = self.rules.match(data=content)

        # Reorganise the matches in a dictionary
        for match in matches:
            try:
                name = match.meta.get('al_configparser', None)
                if name:
                    all_hits[name] = all_hits.get(name, []) + [match]
            except:  # pylint: disable=W0702
                self.log.exception('Failed iterating over yara matches:')

        # Go through every config parser.
        for config_parser in self.config_parsers:
            try:
                name = config_parser.__class__.__name__.split('.')[-1]
                hits = all_hits.get(name, [])
                self.apply_parser(config_parser, request, hits, content)
            except:  # pylint: disable=W0702
                self.log.exception("Config parser failed:")
class KasperskyIcap(ServiceBase):
    SERVICE_CATEGORY = 'Antivirus'
    SERVICE_DESCRIPTION = "This services wraps Kaspersky ICAP Proxy."
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision('$Id: fb90b7c4859501dc24213ba17df1e228a3f53af1 $')
    SERVICE_VERSION = '1'
    SERVICE_DEFAULT_CONFIG = {
        "ICAP_HOST": "localhost",
        "ICAP_PORT": 1344,
    }
    SERVICE_CPU_CORES = 0.3
    SERVICE_RAM_MB = 128

    def __init__(self, cfg=None):
        super(KasperskyIcap, self).__init__(cfg)
        self.icap_host = None
        self.icap_port = None
        self.kaspersy_version = None
        self.icap = None
        self._av_info = ''

    def execute(self, request):
        payload = request.get()
        icap_result = self.icap.scan_data(payload)
        request.result = self.icap_to_alresult(icap_result)
        request.task.report_service_context(self._av_info)

        # if deepscan request include the ICAP HTTP in debug info.
        if request.task.deep_scan and request.task.profile:
            request.task.set_debug_info(icap_result)

    def get_kaspersky_version(self):
        av_info = 'Kaspersky Antivirus for Proxy 5.5'
        defs = self.result_store.get_blob("kaspersky_update_definition")
        if defs:
            return "%s - Defs %s" % (av_info, defs.replace(".zip", "").replace("Updates", ""))
        return av_info

    def get_tool_version(self):
        return self._av_info

    def icap_to_alresult(self, icap_result):
        x_response_info = None
        x_virus_id = None
        result_lines = icap_result.strip().splitlines()
        if not len(result_lines) > 3:
            raise Exception('Invalid result from Kaspersky ICAP server: %s' % str(icap_result))

        xri_key = 'X-Response-Info:'
        xvirus_key = 'X-Virus-ID:'
        for line in result_lines:
            if line.startswith(xri_key):
                x_response_info = line[len(xri_key):].strip()
            elif line.startswith(xvirus_key):
                x_virus_id = line[len(xvirus_key):].strip()

        result = Result()
        # Virus hits should have XRI of 'blocked' and XVIRUS containing the virus information.
        # Virus misses should have XRI of 'passed' and no XVIRUS section
        if x_virus_id:
            if not x_response_info == 'blocked':
                self.log.warn('found virus id but response was: %s', str(x_response_info))
            virus_name = x_virus_id.replace('INFECTED ', '')
            result.add_section(VirusHitSection(virus_name, SCORE.SURE))
            result.append_tag(VirusHitTag(virus_name))
            
        return result

    def start(self):
        self.icap_host = self.cfg.get('ICAP_HOST')
        self.icap_port = int(self.cfg.get('ICAP_PORT'))
        self.icap = KasperskyIcapClient(self.icap_host, self.icap_port)
        self._av_info = self.get_kaspersky_version()
Example #15
0
class MetaDefender(ServiceBase):
    SERVICE_CATEGORY = "Antivirus"
    SERVICE_DESCRIPTION = "This service is a multi scanner with 20 engines."
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: af7a1fdfcfa4df9fb8f95b0399cb866ece5d9de7 $')
    SERVICE_VERSION = '1'
    SERVICE_STAGE = 'CORE'
    SERVICE_CPU_CORES = 0.1
    SERVICE_RAM_MB = 64
    SERVICE_DEFAULT_CONFIG = {
        'BASE_URL': 'http://localhost:8008/',
        "MD_VERSION": 4,
        'MD_TIMEOUT': 40
    }

    def __init__(self, cfg=None):
        super(MetaDefender, self).__init__(cfg)
        self.dat_hash = "0"
        self.engine_map = {}
        self.engine_list = []
        self.newest_dat = epoch_to_local(0)
        self.oldest_dat = now_as_local()
        self.session = None
        self._updater_id = "ENABLE_SERVICE_BLK_MSG"
        self.timeout = cfg.get('MD_TIMEOUT', (self.SERVICE_TIMEOUT * 2) / 3)
        self.init_vmap = False

    # noinspection PyUnresolvedReferences,PyGlobalUndefined
    def import_service_deps(self):
        global requests
        import requests

    def start(self):
        self.log.debug("MetaDefender service started")
        self.session = requests.session()
        try:
            self._get_version_map()
            self.init_vmap = True
        except Exception as e:
            self.log.warn(
                "Metadefender get_version_map failed with error code %s" %
                e.message)
            self.init_vmap = False

    @staticmethod
    def _format_engine_name(name):
        new_name = name.lower().replace(" ", "").replace("!", "")
        if new_name.endswith("av"):
            new_name = new_name[:-2]
        return new_name

    def _get_version_map(self):
        self.engine_map = {}
        engine_list = []
        newest_dat = 0
        oldest_dat = now()

        url = self.cfg.get('BASE_URL') + "stat/engines"
        try:
            r = self.session.get(url=url, timeout=self.timeout)
        except requests.exceptions.Timeout:
            raise Exception("Metadefender service timeout.")

        engines = r.json()

        for engine in engines:
            if self.cfg.get("MD_VERSION") == 4:
                name = self._format_engine_name(engine["eng_name"])
                version = engine['eng_ver']
                def_time = engine['def_time']
                etype = engine['engine_type']
            elif self.cfg.get("MD_VERSION") == 3:
                name = self._format_engine_name(engine["eng_name"]).replace(
                    "scanengine", "")
                version = engine['eng_ver']
                def_time = engine['def_time'].replace(" AM", "").replace(
                    " PM", "").replace("/", "-").replace(" ", "T")
                def_time = def_time[6:10] + "-" + def_time[:5] + def_time[
                    10:] + "Z"
                etype = engine['eng_type']
            else:
                raise Exception("Unknown metadefender version")

            # Compute newest DAT
            dat_epoch = iso_to_epoch(def_time)
            if dat_epoch > newest_dat:
                newest_dat = dat_epoch

            if dat_epoch < oldest_dat and dat_epoch != 0 and etype in [
                    "av", "Bundled engine"
            ]:
                oldest_dat = dat_epoch

            self.engine_map[name] = {
                'version': version,
                'def_time': iso_to_local(def_time)[:19]
            }
            engine_list.append(name)
            engine_list.append(version)
            engine_list.append(def_time)

        self.newest_dat = epoch_to_local(newest_dat)[:19]
        self.oldest_dat = epoch_to_local(oldest_dat)[:19]
        self.dat_hash = hashlib.md5("".join(engine_list)).hexdigest()

    def get_tool_version(self):
        return self.dat_hash

    def execute(self, request):
        if self.init_vmap is False:
            self._get_version_map()
            self.init_vmap = True

        filename = request.download()
        response = self.scan_file(filename)
        result = self.parse_results(response)
        request.result = result
        request.set_service_context("Definition Time Range: %s - %s" %
                                    (self.oldest_dat, self.newest_dat))

    def get_scan_results_by_data_id(self, data_id):
        url = self.cfg.get('BASE_URL') + 'file/{0}'.format(data_id)
        try:
            return self.session.get(url=url, timeout=self.timeout)
        except requests.exceptions.Timeout:
            raise Exception("Metadefender service timeout.")

    def scan_file(self, filename):
        # Let's scan the file
        url = self.cfg.get('BASE_URL') + "file"
        with open(filename, 'rb') as f:
            sample = f.read()

        try:
            r = self.session.post(url=url, data=sample, timeout=self.timeout)
        except requests.exceptions.Timeout:
            raise Exception("Metadefender service timeout.")

        if r.status_code == requests.codes.ok:
            data_id = r.json()['data_id']
            while True:
                r = self.get_scan_results_by_data_id(data_id=data_id)
                if r.status_code != requests.codes.ok:
                    return r.json()
                if r.json()['scan_results']['progress_percentage'] == 100:
                    break
                else:
                    time.sleep(0.2)

        json_response = r.json()

        return json_response

    def parse_results(self, response):
        res = Result()
        response = response.get('scan_results', response)
        virus_name = ""

        if response is not None and response.get('progress_percentage') == 100:
            hit = False
            av_hits = ResultSection(title_text='Anti-Virus Detections')

            scans = response.get('scan_details', response)
            for majorkey, subdict in sorted(scans.iteritems()):
                score = SCORE.NULL
                if subdict['scan_result_i'] == 1:
                    virus_name = subdict['threat_found']
                    if virus_name:
                        score = SCORE.SURE
                elif subdict['scan_result_i'] == 2:
                    virus_name = subdict['threat_found']
                    if virus_name:
                        score = SCORE.VHIGH

                if score:
                    virus_name = virus_name.replace("a variant of ", "")
                    engine = self.engine_map[self._format_engine_name(
                        majorkey)]
                    res.append_tag(
                        VirusHitTag(virus_name,
                                    context="scanner:%s" % majorkey))
                    av_hits.add_section(
                        AvHitSection(majorkey, virus_name, engine, score))
                    hit = True

            if hit:
                res.add_result(av_hits)

        return res
Example #16
0
class Swiffer(ServiceBase):
    AL_Swiffer_001 = Heuristic(
        "AL_Swiffer_001", "Large String Buffer", "audiovisual/flash",
        dedent("""\
                                      Checks for printable character buffers larger than 512 bytes.
                                      """))

    AL_Swiffer_002 = Heuristic(
        "AL_Swiffer_002", "Recent Compilation", "audiovisual/flash",
        dedent("""\
                                      Checks if the SWF was compiled within the last 24 hours.
                                      """))

    AL_Swiffer_003 = Heuristic(
        "AL_Swiffer_003", "Embedded Binary Data", "audiovisual/flash",
        dedent("""\
                                      Checks if the SWF contains embedded binary data.
                                      """))
    AL_Swiffer_004 = Heuristic(
        "AL_Swiffer_004", "Incomplete Disassembly", "audiovisual/flash",
        dedent("""\
                                      Attempts disassembly and reports errors which may be indicative
                                      of intentional obfuscation.
                                      """))

    SERVICE_CATEGORY = 'Static Analysis'
    SERVICE_ACCEPTS = 'audiovisual/flash'
    SERVICE_DESCRIPTION = "This service extracts metadata and performs anomaly detection on SWF files."
    SERVICE_ENABLED = True
    SERVICE_VERSION = '1'
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: cb5f1e5c7926b0025a8a9aa84f17501788fd894a $')
    SERVICE_CPU_CORES = 0.05
    SERVICE_RAM_MB = 128

    SERVICE_DEFAULT_CONFIG = {
        'RABCDASM': r'/opt/al/support/swiffer/rabcdasm/rabcdasm',
    }

    def __init__(self, cfg=None):
        super(Swiffer, self).__init__(cfg)
        self.result = None
        self.request = None
        self.tag_analyzers = {
            'DoABC': self._do_abc,
            'DefineBinaryData': self._define_binary_data,
            'ExportAssets': self._export_assets,
            'NameCharacter': self._namecharacter,
            'ProductInfo': self._productinfo,
            'SymbolClass': self._symbolclass,
        }
        self.swf = None
        self.tag_summary = None
        self.symbols = None
        self.binary_data = None
        self.exported_assets = None
        self.big_buffers = None
        self.rabcdasm = self.cfg.get('RABCDASM')
        self.has_product_info = False
        self.anti_decompilation = False
        self.recent_compile = False
        self.disasm_path = None

    def start(self):
        self.log.debug("Service started")
        if not os.path.isfile(self.rabcdasm):
            self.rabcdasm = None

    def get_tool_version(self):
        return self.SERVICE_VERSION

    # noinspection PyGlobalUndefined,PyUnresolvedReferences
    def import_service_deps(self):
        global SWF, ProductKind, ProductEdition
        from swf.movie import SWF
        from swf.consts import ProductKind, ProductEdition

    def execute(self, request):
        self.request = request
        request.result = Result()
        self.result = self.request.result
        file_path = self.request.download()
        fh = open(file_path, 'rb')
        try:
            self.swf = SWF(fh)
            if self.swf is None:
                raise
        except:
            self.log.exception("Unable to parse srl %s:" % self.request.srl)
            fh.close()
            raise
        self.tag_summary = defaultdict(list)
        self.symbols = {}
        self.binary_data = {}
        self.exported_assets = []
        self.big_buffers = set()
        self.has_product_info = False
        self.anti_decompilation = False
        self.recent_compile = False
        self.disasm_path = None

        header_subsection = ResultSection(score=0, title_text="SWF Header")
        header_subsection.add_line("Version: %d" % self.swf.header.version)
        header_subsection.add_line("FileLength: %d" %
                                   self.swf.header.file_length)
        header_subsection.add_line("FrameSize: %s" %
                                   self.swf.header.frame_size.__str__())
        header_subsection.add_line("FrameRate: %d" %
                                   self.swf.header.frame_rate)
        header_subsection.add_line("FrameCount: %d" %
                                   self.swf.header.frame_count)
        self.result.add_section(header_subsection)

        # Parse Tags
        tag_types = []
        for tag in self.swf.tags:
            self.tag_analyzers.get(SWF_TAGS.get(tag.type), self._dummy)(tag)
            tag_types.append(str(tag.type))
        tag_list = ','.join(tag_types)
        tags_ssdeep = ssdeep.hash(tag_list)
        _, hash_one, hash_two = tags_ssdeep.split(':')
        self.result.add_tag(tag_type=TAG_TYPE.SWF_TAGS_SSDEEP,
                            value=hash_one,
                            weight=TAG_WEIGHT.NULL)
        self.result.add_tag(tag_type=TAG_TYPE.SWF_TAGS_SSDEEP,
                            value=hash_two,
                            weight=TAG_WEIGHT.NULL)
        # Script Overview
        if len(self.symbols.keys()) > 0:
            root_symbol = 'unspecified'
            if 0 in self.symbols:
                root_symbol = self.symbols[0]
                self.symbols.pop(0)
            symbol_subsection = ResultSection(score=SCORE.NULL,
                                              title_text="Symbol Summary")
            symbol_subsection.add_line('Main Timeline: %s' % root_symbol)
            if len(self.symbols.keys()) > 0:
                symbol_subsection.add_line('Other Symbols:')
                for tag_id, name in self.symbols.iteritems():
                    symbol_subsection.add_line('\tTagId: %s\tName: %s' %
                                               (tag_id, name))
            self.result.add_section(symbol_subsection)

        if len(self.binary_data.keys()) > 0:
            self.result.report_heuristic(Swiffer.AL_Swiffer_003)
            binary_subsection = ResultSection(
                score=SCORE.NULL, title_text="Attached Binary Data")
            for tag_id, tag_data in self.binary_data.iteritems():
                tag_name = self.symbols.get(tag_id, 'unspecified')
                binary_subsection.add_line('\tTagId: %s\tName: %s\tSize: %d' %
                                           (tag_id, tag_name, len(tag_data)))
                try:
                    binary_filename = hashlib.sha256(
                        tag_data).hexdigest() + '.attached_binary'
                    binary_path = os.path.join(self.working_directory,
                                               binary_filename)
                    with open(binary_path, 'w') as fh:
                        fh.write(tag_data)
                    self.request.add_extracted(
                        binary_path, "SWF Embedded Binary Data %d" % tag_id,
                        tag_name)
                except:
                    self.log.exception(
                        "Error submitting embedded binary data for swf:")

            self.result.add_section(binary_subsection)

        tags_subsection = ResultSection(score=SCORE.INFO,
                                        title_text="Tags of Interest")
        for tag in sorted(self.tag_summary.keys()):
            tags_subsection.add_line(tag)
            summaries = self.tag_summary[tag]
            for summary in summaries:
                summary_line = '\t' + '\t'.join(summary)
                tags_subsection.add_line(summary_line)
            tags_subsection.add_line('')
        if len(tags_subsection.body) > 0:
            self.result.add_section(tags_subsection)

        if len(self.big_buffers) > 0:
            self.result.report_heuristic(Swiffer.AL_Swiffer_001)
            bbs = ResultSection(score=SCORE.HIGH,
                                title_text="Large String Buffers")
            for buf in self.big_buffers:
                bbs.add_line("Found a %d byte string." % len(buf))
                buf_filename = ""
                try:
                    buf_filename = hashlib.sha256(
                        buf).hexdigest() + '.stringbuf'
                    buf_path = os.path.join(self.working_directory,
                                            buf_filename)
                    with open(buf_path, 'w') as fh:
                        fh.write(buf)
                    self.request.add_extracted(buf_path,
                                               "AVM2 Large String Buffer.")
                except:
                    self.log.exception(
                        "Error submitting AVM2 String Buffer %s" %
                        buf_filename)
            self.result.add_section(bbs)

        if not self.has_product_info:
            self.log.debug("Missing product info.")
            no_info = ResultSection(score=SCORE.INFO,
                                    title_text="Missing Product Information")
            no_info.add_line(
                "This SWF doesn't specify information about the product that created it."
            )
            self.result.add_section(no_info)

        if self.anti_decompilation:
            self.result.report_heuristic(Swiffer.AL_Swiffer_004)
            self.log.debug("Anti-disassembly techniques may be present.")
            no_dis = ResultSection(score=SCORE.LOW,
                                   title_text="Incomplete Disassembly")
            no_dis.add_line(
                "This SWF may contain intentional corruption or obfuscation to prevent disassembly."
            )

            self.result.add_section(no_dis)

        if self.recent_compile:
            recent_compile = ResultSection(score=SCORE.LOW,
                                           title_text="Recent Compilation")
            recent_compile.add_line(
                "This SWF was compiled within the last 24 hours.")
            self.result.add_section(recent_compile)
            self.result.report_heuristic(Swiffer.AL_Swiffer_002)

        fh.close()

    def analyze_asasm(self, asm):
        # Check for large string buffers
        big_buff_re = r'([A-Za-z0-9+/=]{512,})[^A-Za-z0-9+/=]'
        for buf in re.finditer(big_buff_re, asm):
            self.big_buffers.add(buf.group(1))

        # Check for incomplete decompilation (obfuscation or intentional corruption)
        hexbytes = re.findall(r';\s+0x[A-F0-9]{2}', asm)
        if len(hexbytes) > 10:
            self.anti_decompilation = True

    def analyze_abc(self, a_bytes):
        # Drop the file and disassemble
        abc_path = ""
        try:
            abc_hash = hashlib.sha256(a_bytes).hexdigest()
            abc_filename = abc_hash + '.abc'
            abc_path = os.path.join(self.working_directory, abc_filename)
            disasm_path = os.path.join(self.working_directory, abc_hash)
            with open(abc_path, 'w') as fh:
                fh.write(a_bytes)
            rabcdasm = Popen([self.rabcdasm, abc_path],
                             stdout=PIPE,
                             stderr=PIPE)
            stdout, _ = rabcdasm.communicate()
            # rabcdasm makes a directory from the filename.
            if os.path.isdir(disasm_path):
                for root, dirs, file_names in os.walk(disasm_path):
                    for file_name in file_names:
                        asasm_path = os.path.join(root, file_name)
                        with open(asasm_path, 'r') as fh:
                            self.analyze_asasm(fh.read())
                self.disasm_path = disasm_path
        except:
            self.log.exception("Error disassembling abc file %s:" % abc_path)

    def _do_abc(self, tag):
        self.tag_summary['DoABC'].append(
            ("Name: %s" % tag.abcName, "Length: %d" % len(tag.bytes)))
        if self.rabcdasm:
            self.analyze_abc(tag.bytes)

    def _define_binary_data(self, tag):
        self.binary_data[tag.characterId] = tag.data

    def _export_assets(self, tag):
        if not hasattr(tag, 'exports'):
            return
        for export in tag.exports:
            export_tup = ("Character ID: %s" % export.characterId,
                          "Name: %s" % export.characterName)
            if export_tup not in self.exported_assets:
                self.tag_summary['ExportAssets'].append(export_tup)
                self.exported_assets.append(export_tup)

    def _namecharacter(self, tag):
        self.tag_summary['NameCharacter'].append(
            ("Character ID: %s" % tag.characterId,
             "Name: %s" % tag.characterName))

    def _symbolclass(self, tag):
        for symbol in tag.symbols:
            self.symbols[symbol.tagId] = symbol.name

    def _productinfo(self, tag):
        self.has_product_info = True

        if hasattr(tag, 'compileTime'):
            try:
                compile_time = datetime.fromtimestamp(tag.compileTime / 1000)
                compile_time_str = compile_time.ctime()
                # Flag recent compile time:
                if (datetime.now() - compile_time) < timedelta(hours=24):
                    self.recent_compile = True
            except:
                compile_time_str = "Invalid Compile Time: %s" % repr(
                    tag.compileTime)
        else:
            compile_time_str = 'Missing'

        self.tag_summary['ProductInfo'].append(
            ("Product: %s" % ProductKind.tostring(tag.product),
             "Edition: %s" % ProductEdition.tostring(tag.edition),
             "Version (Major.Minor.Build): %d.%d.%d" %
             (tag.majorVersion, tag.minorVersion, tag.build),
             "Compile Time: %s" % compile_time_str))

    def _dummy(self, tag):
        pass
Example #17
0
class TorrentSlicer(ServiceBase):
    SERVICE_CATEGORY = 'Static Analysis'
    SERVICE_ACCEPTS = 'meta/torrent'
    SERVICE_DESCRIPTION = "Extracts information from torrent files"
    SERVICE_REVISION = ServiceBase.parse_revision('$Id: ebb685f586dd7a9b652ba105558bdb9dc822f287 $')
    SERVICE_VERSION = '1'
    SERVICE_ENABLED = True
    SERVICE_STAGE = 'CORE'
    SERVICE_CPU_CORES = 1
    SERVICE_RAM_MB = 256

    def __init__(self, cfg=None):
        super(TorrentSlicer, self).__init__(cfg)

    def start(self):
        self.log.debug("TorrentSlicer service started")

    # noinspection PyUnresolvedReferences,PyGlobalUndefined
    def import_service_deps(self):
        global bencode, binascii, humanfriendly, size, si
        from hurry.filesize import size, si
        import bencode
        import binascii
        import humanfriendly

    # noinspection PyUnusedLocal
    @staticmethod
    def create_tables(infohash,
                      announce,
                      announce_list,
                      creation_date,
                      comment,
                      created_by,
                      encoding,
                      piece_length,
                      private,
                      name,
                      sflength,
                      sfmd5sum,
                      files,
                      piecehashes,
                      last_piece_size,
                      torrent_size,
                      torrent_type):

        announce_str = ""
        for x in announce_list:
            for y in x:
                announce_str += "{} " .format(y)

        meta_dict = {
            'InfoHash:': infohash,
            'Announce:': announce,
            'Announce List*:': announce_str,
            'Creation Date*:': creation_date,
            'Comment*:': comment,
            'Created By*:': created_by,
            'Encoding*:': encoding,
            'Piece Length:': "%s (%s)" % (str(piece_length), size(piece_length, system=si)),
            'Private*:': private,
            'Name*:': name,
        }

        meta = []
        for k, i in sorted(meta_dict.iteritems()):
            meta.append('{0:20s} {1}' .format(k, i))

        cal_dict = {
            'Type of Torrent:': torrent_type,
            'Number of Pieces:': str(len(piecehashes)),
            'Last Piece Size:': "%s (%s)" % (str(last_piece_size), size(last_piece_size, system=si)),
            'Size of Torrent:': "%s (%s)" % (str(torrent_size), size(torrent_size, system=si)),
        }

        cal = []
        for k, i in sorted(cal_dict.iteritems()):
            cal.append('{0:18s} {1}' .format(k, i))

        des = []
        if len(files) > 0:
            des.append('{:100s} {:10s} {:32s}' .format('File Path', 'Length', 'MD5Sum*'))
            des.append('{:100s} {:10s} {:32s}' .format('-' * 9, '-' * 6, '-' * 7))
            for f in files:
                fmd5 = ""
                path = ""
                for k, i in f.iteritems():
                    if k == "hash":
                        fmd5 = i
                    if k == "path":
                        for x in i:
                            path = str(x)
                des.append('{:100s} {:10s} {:32s}' .format(path, size(f['length'], system=si), fmd5))

        return meta, cal, des

    def run_tosl(self, filename, request):
        file_res = request.result

        torrent_file = open(filename, "rb").read()

        # noinspection PyBroadException
        try:
            metainfo = bencode.bdecode(torrent_file)
        except:
            res = (ResultSection(SCORE.NULL, "This is not a valid *.torrent file"))
            file_res.add_result(res)
            return

        # Grab specific data from file

        announce = metainfo['announce']
        if 'announce-list' in metainfo:
            announce_list = metainfo['announce-list']
        else:
            announce_list = ""
        if 'creation date' in metainfo:
            creation_date = metainfo['creation date']
        else:
            creation_date = ""
        if 'comment' in metainfo:
            comment = metainfo['comment']
        else:
            comment = ""
        if 'created by' in metainfo:
            created_by = metainfo['created by']
        else:
            created_by = ""
        if 'encoding' in metainfo:
            encoding = metainfo['encoding']
        else:
            encoding = ""
        if 'url-list' in metainfo:
            url_list = metainfo['url-list']
        else:
            url_list = []

        info = metainfo['info']
        piece_length = info['piece length']
        pieces = info['pieces']
        if 'private' in info:
            private = info['private']
        else:
            private = ""
        if 'name' in info:
            name = info['name']
        else:
            name = ""
        if 'length' in info:
            sflength = info['length']
        else:
            sflength = ""
        if 'md5sum' in info:
            sfmd5sum = info['md5sum']
        else:
            sfmd5sum = ""
        if 'files' in info:
            files = info['files']
        else:
            files = []

        infohash = hashlib.sha1(bencode.bencode(info)).hexdigest()
        piecehashes = [binascii.hexlify(pieces[i:i+20]) for i in range(0, len(pieces), 20)]
        torrent_size = 0

        for i in files:
            torrent_size += i['length']
            i['length'] = i['length']
            for j in range(len(i['path'])):
                i['path'][j] = unicode(i['path'][j], "utf8")

        if torrent_size == 0:
            torrent_type = 'single file torrent'
            torrent_size = sflength
        else:
            torrent_type = 'multiple file torrent'

        last_piece_size = min(torrent_size, (len(piecehashes) * int(piece_length)) - torrent_size)

        errmsg = []
        if last_piece_size > piece_length:
            errmsg.append("WARNING: The calculated length of the last piece is greater than the stated piece length")
        if (piece_length > torrent_size) and (torrent_type == 'multiple file torrent'):
            errmsg.append("WARNING: The stated length of an individual piece is greater "
                          "than the calculated torrent size")

        if creation_date != "":
            creation_date_conv = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(creation_date))
            creation_date_str = "{0} ({1})" .format(str(creation_date), creation_date_conv)
        else:
            creation_date_str = creation_date

        # Generate result output
        meta, cal, des = self.create_tables(
            infohash,
            announce,
            announce_list,
            creation_date_str,
            comment,
            created_by,
            encoding,
            piece_length,
            private,
            name,
            sflength,
            sfmd5sum,
            files,
            piecehashes,
            last_piece_size,
            torrent_size,
            torrent_type
        )

        tosl_res = (ResultSection(SCORE.NULL, "Torrent File Details"))
        comment = "NOTE: '*' Denotes an optional field in the Torrent Descriptor File. As a result it may be blank. " \
                  "Refer to the BitTorrent Specification.\n"
        tosl_res.add_line(comment)

        if len(errmsg) > 0:
            error_res = (ResultSection(SCORE.NULL, "Errors Detected:", body_format=TEXT_FORMAT.MEMORY_DUMP,
                                       parent=tosl_res))
            for line in errmsg:
                error_res.add_line(line)

        meta_res = (ResultSection(SCORE.NULL, "Meta Data:", body_format=TEXT_FORMAT.MEMORY_DUMP,
                                  parent=tosl_res))
        for line in meta:
            meta_res.add_line(line)

        cal_res = (ResultSection(SCORE.NULL, "Calculated Data:", body_format=TEXT_FORMAT.MEMORY_DUMP,
                                 parent=tosl_res))
        comment = "NOTE: the length of last piece is calculated as:" \
                  "(number of pieces X piece length) - size of torrent\n"
        cal_res.add_line(comment)
        for line in cal:
            cal_res.add_line(line)

        if len(des) > 0:
            des_res = (ResultSection(SCORE.NULL, "File paths:",
                                     body_format=TEXT_FORMAT.MEMORY_DUMP, parent=tosl_res))
            for line in des:
                des_res.add_line(line)

        if url_list:
            url_res = (ResultSection(SCORE.NULL, "Urls found in metadata:", body_format=TEXT_FORMAT.MEMORY_DUMP,
                                     parent=tosl_res))
            for url in url_list:
                url_res.add_line(url)
                url_res.add_tag(TAG_TYPE['NET_FULL_URI'], url, TAG_WEIGHT.LOW)

        sha1_hashes = os.path.join(self.working_directory, "hash_of_pieces.json")
        with open(sha1_hashes, "wb") as sha1_file:
            sha1_file.write(json.dumps(piecehashes))

        request.add_supplementary(sha1_hashes, "List of hashes in order of the different pieces of the torrent (json)")

        # Tags
        if len(announce) > 0:
            tosl_res.add_tag(TAG_TYPE['NET_FULL_URI'], announce, TAG_WEIGHT.LOW)

        for it in announce_list:
            for uri in it:
                tosl_res.add_tag(TAG_TYPE['NET_FULL_URI'], uri, TAG_WEIGHT.LOW)

        if name != "":
            tosl_res.add_tag(TAG_TYPE['FILE_NAME'], name, TAG_WEIGHT.LOW)

        for f in files:
                for k, i in f.iteritems():
                    if k == "hash" and len(k) > 0:
                        tosl_res.add_tag(TAG_TYPE['FILE_MD5'], i, TAG_WEIGHT.LOW)
                    if k == "path" and len(k) > 0:
                        for x in i:
                            tosl_res.add_tag(TAG_TYPE['FILE_NAME'], str(x), TAG_WEIGHT.LOW)

        file_res.add_result(tosl_res)

    def execute(self, request):
        request.result = Result()
        local_path = request.download()
        self.run_tosl(local_path, request)
Example #18
0
class Extract(ServiceBase):
    SERVICE_ACCEPTS = '(archive|executable|java|android)/.*|document/email|document/office/unknown'
    SERVICE_CATEGORY = "Extraction"
    SERVICE_DESCRIPTION = "This service extracts embedded files from file containers (like ZIP, RAR, 7z, ...)"
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision('$Id: 68af56186e355dbcc031625fb6353a7f58a0cfe4 $')
    SERVICE_STAGE = 'EXTRACT'
    SERVICE_TIMEOUT = 60
    SERVICE_VERSION = '1'
    SERVICE_CPU_CORES = 0.1
    SERVICE_RAM_MB = 256

    SERVICE_DEFAULT_CONFIG = {
        "DEFAULT_PW_LIST": ["password", "infected", "add_more_passwords"],
        "NAMED_EMAIL_ATTACHMENTS_ONLY": True,
        "MAX_EMAIL_ATTACHMENT_SIZE": 10 * 1024**3,
    }
    SERVICE_DEFAULT_SUBMISSION_PARAMS = [{"default": "", "name": "password", "type": "str", "value": ""},
                                         {"default": False,
                                          "name": "extract_pe_sections",
                                          "type": "bool",
                                          "value": False},
                                         {"default": False,
                                          "name": "continue_after_extract",
                                          "type": "bool",
                                          "value": False}]

    FORBIDDEN_EXE = [".text", ".rsrc", ".rdata", ".reloc", ".pdata", ".idata", "UPX", "file"]
    FORBIDDEN_ELF_EXE = [str(x) for x in xrange(20)]
    MAX_EXTRACT = 500
    MAX_EXTRACT_LIVE = 100

    LAUNCHABLE_EXTENSIONS = [
        '.ade',
        '.adp',
        '.as',  # Adobe ActionScript
        '.bat',  # DOS/Windows batch file
        '.chm',
        '.cmd',  # Windows command
        '.com',  # DOS command
        '.cpl',
        '.exe',  # DOS/Windows executable
        '.dll',  # Windows library
        '.hta',
        '.inf',  # Windows autorun file
        '.ins',
        '.isp',
        '.jar',  # Java JAR
        '.jse',
        '.js',  # Javascript
        '.lib',
        '.lnk',  # Windows shortcut
        '.mde',
        '.msc',
        '.msp',
        '.mst',
        '.pif',
        '.py',  # Python script
        '.scr',  # Windows screen saver
        '.sct',
        '.shb',
        '.sys',
        '.vb',  # VB Script
        '.vbe',  # Encrypted VB script
        '.vbs',  # VB Script
        '.vxd',
        '.wsc',
        '.wsf',
        '.wsh'
    ]

    def __init__(self, cfg=None):
        super(Extract, self).__init__(cfg)
        self._last_password = None
        self.extract_methods = [
            self.extract_7zip,
            self.extract_tnef,
            self.extract_swf,
            self.extract_ace,
            self.extract_eml,
            self.extract_docx
        ]
        self.anomaly_detections = [self.archive_with_executables, self.archive_is_arc]
        self.white_listing_methods = [self.jar_whitelisting]
        self.st = None
        self.named_attachments_only = None
        self.max_attachment_size = None

    # noinspection PyUnresolvedReferences
    def import_service_deps(self):
        global extract_docx, ExtractionError, PasswordError
        from al_services.alsvc_extract.doc_extract import extract_docx, ExtractionError, PasswordError

    def start(self):
        self.st = SubprocessTimer(2*self.SERVICE_TIMEOUT/3)
        self.named_attachments_only = self.cfg.get('NAMED_EMAIL_ATTACHMENTS_ONLY', True)
        self.max_attachment_size = self.cfg.get('MAX_EMAIL_ATTACHMENT_SIZE', None)

    def execute(self, request):
        result = Result()
        continue_after_extract = request.get_param('continue_after_extract')
        self._last_password = None
        local = request.download()
        password_protected = False
        white_listed = 0

        try:
            password_protected, white_listed = self.extract(request, local)
        except ExtractMaxExceeded, e:
            result.add_section(ResultSection(score=SCORE["NULL"], title_text=str(e)))
        except ExtractIgnored, e:
            result.add_section(ResultSection(score=SCORE["NULL"], title_text=str(e)))
Example #19
0
class CFMD(ServiceBase):
    SERVICE_ACCEPTS = '.*'
    SERVICE_ENABLED = True
    SERVICE_CATEGORY = Category.FILTERING
    SERVICE_STAGE = Stage.FILTER
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: cec448493c66e8e70c7e6f8f022094ad85a13886 $')
    SERVICE_VERSION = '1'
    SERVICE_DEFAULT_CONFIG = {
        "host": "127.0.0.1",
        "user": "******",
        "passwd": "password",
        "port": 3306,
        "db": "cfmd"
    }
    SERVICE_DESCRIPTION = "Performs hash lookups against Microsoft's CleanFileMetaData database."
    SERVICE_CPU_CORES = 0.05
    SERVICE_RAM_MB = 64

    def __init__(self, cfg=None):
        super(CFMD, self).__init__(cfg)

        self._connect_params = {
            'host': self.cfg.get('host'),
            'user': self.cfg.get('user'),
            'port': int(self.cfg.get('port')),
            'passwd': self.cfg.get('passwd'),
            'db': self.cfg.get('db')
        }
        self.connection = None

    def start(self):
        self.connection = CFMDDatasource(self.log, **self._connect_params)

    # noinspection PyUnresolvedReferences
    def import_service_deps(self):
        global CFMDDatasource
        from al_services.alsvc_cfmd.datasource.cfmd import CFMD as CFMDDatasource

    def execute(self, request):
        result = Result()

        try:
            res = self.connection.query(request.sha256)
        except CFMDDatasource.DatabaseException:
            raise RecoverableError("Query failed")
        if res:
            res_sec = ResultSection(
                title_text="This file was found in the %s. It is not malware."
                % CFMDDatasource.Name,
                score=SCORE['NOT'])

            for item in res:
                res_sec.add_line("%s (%s bytes)" %
                                 (item['filename'], item['size']))
                res_sec.add_line(" MD5: %s" % item['md5'])
                res_sec.add_line(" SHA1: %s" % item['sha1'])
                res_sec.add_line(" SHA256: %s" % item['sha256'])
                res_sec.add_line("")

            result.add_section(res_sec)

        request.result = result
Example #20
0
class CrowBar(ServiceBase):
    SERVICE_CATEGORY = 'Static Analysis'
    SERVICE_ACCEPTS = '(code/.*|unknown)'
    SERVICE_DESCRIPTION = "Code File De-obfuscator"
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: 7299547e56acbd0a5bf0f44f4389a15d671e1489 $')
    SERVICE_VERSION = '1'
    SERVICE_TIMEOUT = 150
    SERVICE_ENABLED = True
    SERVICE_CPU_CORES = 0.5
    SERVICE_RAM_MB = 256

    def __init__(self, cfg=None):
        super(CrowBar, self).__init__(cfg)
        self.validchars = \
            ' 0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'
        self.binchars = ''.join(
            [c for c in map(chr, range(0, 256)) if c not in self.validchars])
        self.max_attempts = 10

    def start(self):
        self.log.debug("CrowBar service started")

    # noinspection PyUnresolvedReferences,PyGlobalUndefined
    def import_service_deps(self):
        global PatternMatch, BeautifulSoup
        from bs4 import BeautifulSoup
        from al_services.alsvc_frankenstrings.balbuzard.patterns import PatternMatch

    # --- Support Modules ----------------------------------------------------------------------------------------------

    def submit_extracted(self, res_file, res, request):
        h = hash(res_file)
        file_path = path.join(self.working_directory,
                              "{}_beautified_script".format(abs(h)))
        request.add_extracted(file_path,
                              "Extracted file during CrowBar analysis.")
        res.add_line("Extracted file during CrowBar analysis.")
        with open(file_path, 'wb') as exe_file:
            res_file = ''.join([x for x in res_file if ord(x) < 128])
            exe_file.write(res_file)

    # noinspection PyBroadException
    @staticmethod
    def decode(data):
        """
        Modified code that was written by Didier Stevens
        https://blog.didierstevens.com/2016/03/29/decoding-vbe/
        """
        try:
            d_decode = {
                9: '\x57\x6E\x7B',
                10: '\x4A\x4C\x41',
                11: '\x0B\x0B\x0B',
                12: '\x0C\x0C\x0C',
                13: '\x4A\x4C\x41',
                14: '\x0E\x0E\x0E',
                15: '\x0F\x0F\x0F',
                16: '\x10\x10\x10',
                17: '\x11\x11\x11',
                18: '\x12\x12\x12',
                19: '\x13\x13\x13',
                20: '\x14\x14\x14',
                21: '\x15\x15\x15',
                22: '\x16\x16\x16',
                23: '\x17\x17\x17',
                24: '\x18\x18\x18',
                25: '\x19\x19\x19',
                26: '\x1A\x1A\x1A',
                27: '\x1B\x1B\x1B',
                28: '\x1C\x1C\x1C',
                29: '\x1D\x1D\x1D',
                30: '\x1E\x1E\x1E',
                31: '\x1F\x1F\x1F',
                32: '\x2E\x2D\x32',
                33: '\x47\x75\x30',
                34: '\x7A\x52\x21',
                35: '\x56\x60\x29',
                36: '\x42\x71\x5B',
                37: '\x6A\x5E\x38',
                38: '\x2F\x49\x33',
                39: '\x26\x5C\x3D',
                40: '\x49\x62\x58',
                41: '\x41\x7D\x3A',
                42: '\x34\x29\x35',
                43: '\x32\x36\x65',
                44: '\x5B\x20\x39',
                45: '\x76\x7C\x5C',
                46: '\x72\x7A\x56',
                47: '\x43\x7F\x73',
                48: '\x38\x6B\x66',
                49: '\x39\x63\x4E',
                50: '\x70\x33\x45',
                51: '\x45\x2B\x6B',
                52: '\x68\x68\x62',
                53: '\x71\x51\x59',
                54: '\x4F\x66\x78',
                55: '\x09\x76\x5E',
                56: '\x62\x31\x7D',
                57: '\x44\x64\x4A',
                58: '\x23\x54\x6D',
                59: '\x75\x43\x71',
                60: '\x4A\x4C\x41',
                61: '\x7E\x3A\x60',
                62: '\x4A\x4C\x41',
                63: '\x5E\x7E\x53',
                64: '\x40\x4C\x40',
                65: '\x77\x45\x42',
                66: '\x4A\x2C\x27',
                67: '\x61\x2A\x48',
                68: '\x5D\x74\x72',
                69: '\x22\x27\x75',
                70: '\x4B\x37\x31',
                71: '\x6F\x44\x37',
                72: '\x4E\x79\x4D',
                73: '\x3B\x59\x52',
                74: '\x4C\x2F\x22',
                75: '\x50\x6F\x54',
                76: '\x67\x26\x6A',
                77: '\x2A\x72\x47',
                78: '\x7D\x6A\x64',
                79: '\x74\x39\x2D',
                80: '\x54\x7B\x20',
                81: '\x2B\x3F\x7F',
                82: '\x2D\x38\x2E',
                83: '\x2C\x77\x4C',
                84: '\x30\x67\x5D',
                85: '\x6E\x53\x7E',
                86: '\x6B\x47\x6C',
                87: '\x66\x34\x6F',
                88: '\x35\x78\x79',
                89: '\x25\x5D\x74',
                90: '\x21\x30\x43',
                91: '\x64\x23\x26',
                92: '\x4D\x5A\x76',
                93: '\x52\x5B\x25',
                94: '\x63\x6C\x24',
                95: '\x3F\x48\x2B',
                96: '\x7B\x55\x28',
                97: '\x78\x70\x23',
                98: '\x29\x69\x41',
                99: '\x28\x2E\x34',
                100: '\x73\x4C\x09',
                101: '\x59\x21\x2A',
                102: '\x33\x24\x44',
                103: '\x7F\x4E\x3F',
                104: '\x6D\x50\x77',
                105: '\x55\x09\x3B',
                106: '\x53\x56\x55',
                107: '\x7C\x73\x69',
                108: '\x3A\x35\x61',
                109: '\x5F\x61\x63',
                110: '\x65\x4B\x50',
                111: '\x46\x58\x67',
                112: '\x58\x3B\x51',
                113: '\x31\x57\x49',
                114: '\x69\x22\x4F',
                115: '\x6C\x6D\x46',
                116: '\x5A\x4D\x68',
                117: '\x48\x25\x7C',
                118: '\x27\x28\x36',
                119: '\x5C\x46\x70',
                120: '\x3D\x4A\x6E',
                121: '\x24\x32\x7A',
                122: '\x79\x41\x2F',
                123: '\x37\x3D\x5F',
                124: '\x60\x5F\x4B',
                125: '\x51\x4F\x5A',
                126: '\x20\x42\x2C',
                127: '\x36\x65\x57'
            }

            d_combination = {
                0: 0,
                1: 1,
                2: 2,
                3: 0,
                4: 1,
                5: 2,
                6: 1,
                7: 2,
                8: 2,
                9: 1,
                10: 2,
                11: 1,
                12: 0,
                13: 2,
                14: 1,
                15: 2,
                16: 0,
                17: 2,
                18: 1,
                19: 2,
                20: 0,
                21: 0,
                22: 1,
                23: 2,
                24: 2,
                25: 1,
                26: 0,
                27: 2,
                28: 1,
                29: 2,
                30: 2,
                31: 1,
                32: 0,
                33: 0,
                34: 2,
                35: 1,
                36: 2,
                37: 1,
                38: 2,
                39: 0,
                40: 2,
                41: 0,
                42: 0,
                43: 1,
                44: 2,
                45: 0,
                46: 2,
                47: 1,
                48: 0,
                49: 2,
                50: 1,
                51: 2,
                52: 0,
                53: 0,
                54: 1,
                55: 2,
                56: 2,
                57: 0,
                58: 0,
                59: 1,
                60: 2,
                61: 0,
                62: 2,
                63: 1
            }

            result = ''
            index = -1
            for char in data \
                    .replace('@&', chr(10)) \
                    .replace('@#', chr(13)) \
                    .replace('@*', '>') \
                    .replace('@!', '<') \
                    .replace('@$', '@'):
                byte = ord(char)
                if byte < 128:
                    index += 1
                if (byte == 9 or 31 < byte < 128
                    ) and byte != 60 and byte != 62 and byte != 64:
                    char = [c
                            for c in d_decode[byte]][d_combination[index % 64]]
                result += char
            return result
        except:
            result = None
            return result

    def printable_ratio(self, text):
        return float(
            float(len(text.translate(None, self.binchars))) / float(len(text)))

    @staticmethod
    def add1b(s, k):
        return ''.join([chr((ord(c) + k) & 0xff) for c in s])

    def charcode(self, text):
        final = False
        output = None
        arrayofints = filter(
            lambda n: n < 256,
            map(int,
                re.findall('(\d+)', str(re.findall('\D{1,2}\d{2,3}', text)))))
        if len(arrayofints) > 20:
            s1 = ''.join(map(chr, arrayofints))
            if self.printable_ratio(s1) > .75 and (float(len(s1)) /
                                                   float(len(text))) > .10:
                # if the output is mostly readable and big enough
                output = s1

        return final, output

    @staticmethod
    def charcode_hex(text):

        final = False
        output = None
        s1 = text
        enc_str = ['\u', '%u', '\\x', '0x']

        for encoding in enc_str:
            char_len = [
                (16,
                 re.compile(r'(?:' + re.escape(encoding) +
                            '[A-Fa-f0-9]{16})+')),
                (8,
                 re.compile(r'(?:' + re.escape(encoding) +
                            '[A-Fa-f0-9]{8})+')),
                (4,
                 re.compile(r'(?:' + re.escape(encoding) +
                            '[A-Fa-f0-9]{4})+')),
                (2,
                 re.compile(r'(?:' + re.escape(encoding) + '[A-Fa-f0-9]{2})+'))
            ]

            for r in char_len:
                hexchars = set(re.findall(r[1], text))

                for hc in hexchars:
                    data = hc
                    decoded = ''
                    if r[0] == 2:
                        while data != '':
                            decoded += binascii.a2b_hex(data[2:4])
                            data = data[4:]
                    if r[0] == 4:
                        while data != '':
                            decoded += binascii.a2b_hex(
                                data[4:6]) + binascii.a2b_hex(data[2:4])
                            data = data[6:]
                    if r[0] == 8:
                        while data != '':
                            decoded += binascii.a2b_hex(data[8:10]) + binascii.a2b_hex(data[6:8]) + \
                                       binascii.a2b_hex(data[4:6]) + binascii.a2b_hex(data[2:4])
                            data = data[10:]
                    if r[0] == 16:
                        while data != '':
                            decoded += binascii.a2b_hex(data[16:18]) + binascii.a2b_hex(data[14:16]) + \
                                       binascii.a2b_hex(data[12:14]) + binascii.a2b_hex(data[10:12]) + \
                                       binascii.a2b_hex(data[8:10]) + binascii.a2b_hex(data[6:8]) + \
                                       binascii.a2b_hex(data[4:6]) + binascii.a2b_hex(data[2:4])
                            data = data[18:]

                    # Remove trailing NULL bytes
                    final_dec = re.sub('[\x00]*$', '', decoded)

                    if all(ord(c) < 128 for c in final_dec):
                        s1 = s1.replace(hc, final_dec)

        if s1 != text:
            output = s1

        return final, output

    @staticmethod
    def string_replace(text):
        final = False
        output = None
        if 'replace(' in text.lower():
            # Process string with replace functions calls
            # Such as "SaokzueofpigxoFile".replace(/ofpigx/g, "T").replace(/okzu/g, "v")
            s1 = text
            # Find all occurrences of string replace (JS)
            for strreplace in [
                    o[0] for o in re.findall(
                        '(["\'][^"\']+["\']((\.replace\([^)]+\))+))',
                        s1,
                        flags=re.I)
            ]:
                s2 = strreplace
                # Extract all substitutions
                for str1, str2 in re.findall(
                        '\.replace\([/\'"]([^,]+)[/\'\"]g?\s*,\s*[\'\"]([^)]*)[\'\"]\)',
                        s2):
                    # Execute the substitution
                    s2 = s2.replace(str1, str2)
                # Remove the replace calls from the layer (prevent accidental substitutions in the next step)
                s2 = s2[:s2.index('.replace(')]
                s1 = s1.replace(strreplace, s2)

            # Process global string replace
            replacements = [
                q for q in re.findall(
                    'replace\(\s*/([^)]+)/g?, [\'"]([^\'"]*)[\'"]', s1)
            ]
            for str1, str2 in replacements:
                s1 = s1.replace(str1, str2)
            # Process VB string replace
            replacements = [
                q for q in re.findall(
                    'Replace\(\s*["\']?([^,"\']*)["\']?\s*,\s*["\']?([^,"\']*)["\']?\s*,\s*["\']?([^,"\']*)["\']?',
                    s1)
            ]
            for str1, str2, str3 in replacements:
                s1 = s1.replace(str1, str1.replace(str2, str3))
            output = re.sub('\.replace\(\s*/([^)]+)/g?, [\'"]([^\'"]*)[\'"]\)',
                            '', s1)
        return final, output

    def b64decode_str(self, text):
        final = False
        output = None
        b64str = re.findall('"([A-Za-z0-9+/]{4,}=?=?)"', text)
        s1 = text
        for s in b64str:
            if len(s) % 4 == 0:
                try:
                    d = binascii.a2b_base64(s)
                except binascii.Error:
                    continue
                if all(ord(c) < 128 for c in d):
                    s1 = s1.replace(s, d)
        if s1 != text:
            output = s1
        return final, output

    @staticmethod
    def vars_of_fake_arrays(text):
        final = False
        output = None
        replacements = re.findall(
            'var\s+([^\s=]+)\s*=\s*\[([^\]]+)\]\[(\d+)\]', text)
        if len(replacements) > 0:
            #    ,- Make sure we do not process these again
            s1 = re.sub(r'var\s+([^=]+)\s*=', r'XXX \1 =', text)
            for varname, array, pos in replacements:
                try:
                    value = re.split('\s*,\s*', array)[int(pos)]
                except IndexError:
                    # print '[' + array + '][' + pos + ']'
                    raise
                s1 = s1.replace(varname, value)
            if s1 != text:
                output = s1
        return final, output

    @staticmethod
    def array_of_strings(text):
        final = False
        output = None

        replacements = re.findall('var\s+([^\s=]+)\s*=\s*\[([^\]]+)\]\s*;',
                                  text)
        if len(replacements) > 0:
            #    ,- Make sure we do not process these again
            s1 = text
            for varname, values in replacements:
                occurences = [
                    int(x) for x in re.findall(varname + '\s*\[(\d+)\]', s1)
                ]
                for i in occurences:
                    try:
                        s1 = re.sub(varname + '\s*\[(%d)\]' % i,
                                    values.split(',')[i], s1)
                    except IndexError:
                        # print '[' + array + '][' + pos + ']'
                        raise
            if s1 != text:
                output = s1
        return final, output

    @staticmethod
    def concat_strings(text):
        final = False
        output = None

        s1 = re.sub('[\'"]\s*[+&]\s*[\'"]', '', text)
        if s1 != text:
            output = s1

        return final, output

    @staticmethod
    def powershell_vars(text):
        final = False
        output = None
        replacements_string = re.findall(
            r'(\$\w+)\s*=[^=]\s*[\"\']([^\"\']+)[\"\']', text)
        replacements_func = re.findall(
            r'(\$\w+)\s*=[^=]\s*([^\"\'][^\s]+)[\s]', text)
        if len(replacements_string) > 0 or len(replacements_func) > 0:
            #    ,- Make sure we do not process these again
            s1 = re.sub(r'[^_](\$\w+)\s*=', r'_\1 =', text)
            for varname, string in replacements_string:
                s1 = s1.replace(varname, string)
            for varname, string in replacements_func:
                s1 = s1.replace(varname, string)
            if output != text:
                output = s1

        return final, output

    @staticmethod
    def powershell_carets(text):
        final = False
        output = text.replace("^", "")
        if output == text:
            output = None
        return final, output

    def mswordmacro_vars(self, text):
        final = False
        output = None
        s1 = text.replace('\r', '')
        # bad, prevent false var replacements like YG="86"
        replacements = re.findall(r'^((\w+)\s*=\s*("[^"]+"))$', s1, re.M)
        if len(replacements) > 0:
            for full, varname, value in replacements:
                #    Make sure we do not process these again
                if len(re.findall(r'(\b' + varname + r'\b)', s1)) == 1:
                    # If there is only one instance of these, it's noise.
                    s1 = s1.replace(
                        full,
                        '<crowbar:mswordmacro_unused_variable_assignment>')
                else:
                    s1 = s1.replace(full,
                                    '<crowbar:mswordmacro_var_assignment>')
                    s1 = re.sub(r'(\b' + varname + r'\b)', value, s1)
                    # Create loop for stacking variables. i.e.
                    # b = "he"
                    # b = b & "llo "
                    # b = b & "world!"
                    repeat_var = value
                    repeat_true = re.findall(
                        '(' + repeat_var + '\s*=\s*(".+))', s1)
                    idx = 0
                    while True:
                        if len(repeat_true) == 0 or idx > self.max_attempts:
                            break
                        for fl, vl in repeat_true:
                            s1 = s1.replace(
                                fl, '<crowbar:mswordmacro_var_assignment>')
                            s1 = re.sub(repeat_var, vl, s1)
                            # only do once
                            break
                        repeat_var = vl
                        repeat_true = re.findall(
                            '(' + repeat_var + '\s*=\s*(".+))', s1)
                        idx += 1

            if s1 != text:
                output = s1
        return final, output

    def simple_xor_function(self, text):
        final = False
        output = None
        xorstrings = re.findall(
            '(\w+\("((?:[0-9A-Fa-f][0-9A-Fa-f])+)"\s*,\s*"([^"]+)"\))', text)
        option_a = []
        option_b = []
        s1 = text
        for f, x, k in xorstrings:
            res = self.xor_with_key(x.decode("hex"), k)
            if self.printable_ratio(res) == 1:
                option_a.append((f, x, k, res))
                # print 'A:',f,x,k, res
            else:
                option_a.append((f, x, k, None))
            # try by shifting the key by 1
            res = self.xor_with_key(x.decode("hex"), k[1:] + k[0])
            if self.printable_ratio(res) == 1:
                option_b.append((f, x, k, res))
                # print 'B:',f,x,k, res
            else:
                option_b.append((f, x, k, None))

        xorstrings = []
        if None not in map(lambda y: y[3], option_a):
            xorstrings = option_a
        elif None not in map(lambda z: z[3], option_b):
            xorstrings = option_b

        for f, x, k, r in xorstrings:
            if r is not None:
                s1 = s1.replace(f, '"' + r + '"')

        if text != s1:
            output = s1
        return final, output

    @staticmethod
    def xor_with_key(s, k):
        return ''.join([
            chr(ord(a) ^ ord(b)) for a, b in zip(s, (len(s) / len(k) + 1) * k)
        ])

    @staticmethod
    def zp_xor_with_key(s, k):
        return ''.join([
            a if a == '\0' or a == b else chr(ord(a) ^ ord(b))
            for a, b in zip(s, (len(s) / len(k) + 1) * k)
        ])

    @staticmethod
    def clean_up_final_layer(text):
        output = re.sub(r'<crowbar:[^>]+>', '', text)
        output = re.sub(r'\n\s*\n', '', output)
        return output

    # noinspection PyBroadException
    def vbe_decode(self, text):
        output = None
        final = False
        try:
            evbe_regex = re.compile(r'#@~\^......==(.+)......==\^#~@')
            evbe_present = re.search(evbe_regex, text)
            if evbe_present:
                evbe_res = self.decode(evbe_present.groups()[0])
                if evbe_res and evbe_present != text:
                    evbe_start = evbe_present.start()
                    evbe_end = evbe_present.end()
                    if evbe_start == 0 and evbe_end == len(text):
                        final = True
                        output = evbe_res
                    else:
                        output = text[:evbe_start] + text + text[:evbe_end]
        except:
            pass
        finally:
            return final, output

    # noinspection PyBroadException
    @staticmethod
    def convert_wide_unicode(text):
        normalized = []
        try:
            conv = text.decode('utf-16').encode('ascii', 'ignore')
            if len(conv) > 0:
                normalized.append(conv)
            else:
                normalized = None
        except:
            normalized = None
        return normalized

    # noinspection PyBroadException
    @staticmethod
    def extract_htmlscript(text):
        scripts = []
        try:
            for s in BeautifulSoup(text, 'lxml').find_all('script'):
                if s.string is not None:
                    scripts.append(s.string)
        except:
            scripts = None
        return scripts

    # --- Run Service --------------------------------------------------------------------------------------------------
    def execute(self, request):
        """
        Main Module.
        """
        result = Result()
        request.result = result

        if (request.task.size or 0) < 50000 and (
                request.tag.startswith('code') or
            (request.tag == "unknown" and (request.task.size or 0) < 5000)):
            patterns = PatternMatch()

            alfile = request.download()
            with open(alfile, "rb") as f:
                raw = f.read()

            # Get all IOCs that originally hit in file (to filter later- service FrankenStrings SHOULD catch it anyways)
            pat_values = patterns.ioc_match(raw,
                                            bogon_ip=True,
                                            just_network=False)
            before = []
            for k, val in pat_values.iteritems():
                if val == "":
                    asc_asc = unicodedata.normalize('NFKC', val).encode(
                        'ascii', 'ignore')
                    before.append(asc_asc)
                else:
                    for v in val:
                        before.append(v)

            # --- Stage 1 ----------------------------------------------------------------------------------------------
            # Get script(s) that we want
            code_extracts = [('^unknown$', self.convert_wide_unicode),
                             ('.*html.*', self.extract_htmlscript)]

            extracted_parts = None
            for tu in code_extracts:
                if re.match(re.compile(tu[0]), request.tag):
                    extracted_parts = tu[1](raw)
                    break
            if extracted_parts:
                parsed = [x for x in extracted_parts]
            else:
                parsed = [raw]

            # --- Stage 2 ----------------------------------------------------------------------------------------------
            # Hack time!
            for script in parsed:
                extract_file = False
                layer = script
                layers_list = []

                if request.deep_scan:
                    self.max_attempts = 50

                techniques = [
                    ('VBE Decode', self.vbe_decode, True),
                    ('MSWord macro vars', self.mswordmacro_vars, False),
                    ('Powershell vars', self.powershell_vars, False),
                    ('Concat strings', self.concat_strings, False),
                    ('String replace', self.string_replace, False),
                    ('Powershell carets', self.powershell_carets, False),
                    ('Array of strings', self.array_of_strings, False),
                    ('Fake array vars', self.vars_of_fake_arrays, False),
                    ('Simple XOR function', self.simple_xor_function, False),
                    ('Charcode', self.charcode, False),
                    ('Charcode hex', self.charcode_hex, False),
                    ('B64 Decode', self.b64decode_str, False)
                ]

                done = False
                idx = 0
                while not done:
                    if idx > self.max_attempts:
                        break
                    done = True
                    for name, technique, extract in techniques:
                        final, res = technique(layer)
                        if res:
                            layers_list.append((name, res))
                            if extract:
                                extract_file = True
                            # Looks like it worked, restart with new layer
                            layer = res
                            done = final
                            if done:
                                break
                    idx += 1

                if len(layers_list) > 0:
                    final_score = len(layers_list) * 10
                    clean = self.clean_up_final_layer(layers_list[-1][1])
                    if clean != raw:
                        pat_values = patterns.ioc_match(clean,
                                                        bogon_ip=True,
                                                        just_network=False)
                        after = []
                        for k, val in pat_values.iteritems():
                            if val == "":
                                asc_asc = unicodedata.normalize(
                                    'NFKC', val).encode('ascii', 'ignore')
                                after.append(asc_asc)
                            else:
                                for v in val:
                                    after.append(v)
                        diff_tags = list(
                            set(before).symmetric_difference(set(after)))
                        # Add additional checks to see if the file should be extracted. 1500 is an arbitrary score...
                        if (len(clean) > 1000 and final_score > 500) or (
                                len(before) < len(after)):
                            extract_file = True
                        res = (ResultSection(
                            SCORE.NULL,
                            "CrowBar detected possible obfuscated script:"))
                        mres = (ResultSection(
                            SCORE.NULL,
                            "The following CrowBar modules made deofuscation attempts:",
                            parent=res))
                        mres.score = final_score
                        lcount = Counter([x[0] for x in layers_list])
                        for l, c in lcount.iteritems():
                            mres.add_line("{0}, {1} time(s).".format(l, c))
                        if extract_file:
                            self.submit_extracted(clean, res, request)
                        # Display final layer
                        lres = (ResultSection(
                            SCORE.NULL,
                            "Final layer:",
                            body_format=TEXT_FORMAT.MEMORY_DUMP,
                            parent=res))
                        if extract_file:
                            lres.add_line("First 500 bytes of file:")
                            lres.add_line(clean[:500])
                        else:
                            lres.add_line("First 5000 bytes of file:")
                            lres.add_line(clean[:5000])
                        # Look for all IOCs in final layer
                        if len(pat_values) > 0 and len(diff_tags) > 0:
                            for ty, val in pat_values.iteritems():
                                if val == "":
                                    asc_asc = unicodedata.normalize(
                                        'NFKC', val).encode('ascii', 'ignore')
                                    if asc_asc in diff_tags:
                                        res.add_tag(TAG_TYPE[ty], asc_asc,
                                                    TAG_WEIGHT.LOW)
                                else:
                                    for v in val:
                                        if v in diff_tags:
                                            res.add_tag(
                                                TAG_TYPE[ty], v,
                                                TAG_WEIGHT.LOW)
                        result.add_result(res)
Example #21
0
class PeePDF(ServiceBase):
    AL_PeePDF_001 = Heuristic("AL_PeePDF_001", "Embedded PDF in XDP", "document/pdf",
                              dedent("""\
                                     If there is the <chunk> tag in the PDF file contents, there is an 
                                     embedded PDF in the XDP.
                                     """))
    AL_PeePDF_002 = Heuristic("AL_PeePDF_002", "Large Buffers", "document/pdf",
                              dedent("""\
                                     A buffer was found in the javascript code.
                                     """))
    AL_PeePDF_003 = Heuristic("AL_PeePDF_003", "Contains eval", "document/pdf",
                              dedent("""\
                                     The eval() function is found in the javascript block. This is 
                                     commonly used to launch deofuscated javascript code.
                                     """))
    AL_PeePDF_004 = Heuristic("AL_PeePDF_004", "Contains unescape", "document/pdf",
                              dedent("""\
                                     The unescape() function is found in the javascript block. Malware 
                                     could use this to deobfuscate code blocks.
                                     """))
    AL_PeePDF_005 = Heuristic("AL_PeePDF_005", "Javascript Shellcode", "document/pdf",
                              dedent("""\
                                     Getting the unescaped bytes from the PeePDF tool and running those 
                                     in an emulator, if they execute then there was hidden shallcode 
                                     found inside.
                                     """))
    AL_PeePDF_006 = Heuristic("AL_PeePDF_006", "Unescaped Javascript Buffer", "document/pdf",
                              dedent("""\
                                     If looking for javascript shellcode fails, the javascript is an 
                                     unknown unescaped buffer.
                                     """))
    AL_PeePDF_007 = Heuristic("AL_PeePDF_007", "Suspicious Javascript", "document/pdf",
                              dedent("""\
                                     If the file contents of the PDF has either "eval" or "unescape" or 
                                     we were able to find large buffer variables, this is a good flag 
                                     for malicious content.
                                     """))
    
    SERVICE_ACCEPTS = '(document/pdf|code/xml)'
    SERVICE_CATEGORY = "Static Analysis"
    SERVICE_DESCRIPTION = "This service uses the Python PeePDF library information from PDFs including javascript " \
                          "blocks which it will attempt to deobfuscate, if necessary, for further analysis."
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision('$Id: 51f2c8113147f7d324d247167979f1d8d499a72e $')
    SERVICE_VERSION = '1'
    SERVICE_CPU_CORES = 0.5
    SERVICE_RAM_MB = 512

    SERVICE_DEFAULT_CONFIG = {
        'max_pdf_size': 3000000
    }

    def __init__(self, cfg=None):
        super(PeePDF, self).__init__(cfg)
        self.max_pdf_size = cfg.get('max_pdf_size', 3000000)

    # noinspection PyUnresolvedReferences
    def import_service_deps(self):
        global analyseJS, isPostscript, PDFParser, vulnsDict, unescape
        from al_services.alsvc_peepdf.peepdf.JSAnalysis import analyseJS, isPostscript, unescape
        from al_services.alsvc_peepdf.peepdf.PDFCore import PDFParser, vulnsDict

    # noinspection PyUnusedLocal,PyMethodMayBeStatic
    def _report_embedded_xdp(self, file_res, chunk_number, binary, leftover):
        file_res.add_section(ResultSection(SCORE['INFO'], ["Found %s " % chunk_number, "Embedded PDF (in XDP)"]))
        file_res.add_tag(TAG_TYPE['FILE_SUMMARY'], "Embedded PDF (in XDP)", 10, 'IDENTIFICATION')
        file_res.report_heuristic(PeePDF.AL_PeePDF_001)

    def find_xdp_embedded(self, filename, cbin, request):
        file_res = request.result
        if "<pdf" in cbin and "<document>"in cbin and "<chunk>" in cbin:
            chunks = cbin.split("<chunk>")

            chunk_number = 0
            leftover = ""
            for chunk in chunks:
                if "</chunk>" not in chunk:
                    leftover += chunk.replace("<document>", "").replace('<pdf xmlns="http://ns.adobe.com/xdp/pdf/">',
                                                                        "")
                    continue

                chunk_number += 1

                un_b64 = None
                # noinspection PyBroadException
                try:
                    un_b64 = b64decode(chunk.split("</chunk>")[0])
                except:
                    self.log.error("Found <pdf>, <document> and <chunk> tags inside an xdp file but could not "
                                   "un-base64 the content.")

                if un_b64:
                    new_filename = "xdp_%d.pdf" % chunk_number
                    file_path = os.path.join(self.working_directory, new_filename)
                    f = open(file_path, "wb")
                    f.write(un_b64)
                    f.close()
                    request.add_extracted(file_path, "UnXDP from %s" % filename)

            if chunk_number > 0:
                self._report_embedded_xdp(file_res, chunk_number, cbin, leftover)

        return file_res

    def execute(self, request):
        request.result = Result()
        temp_filename = request.download()

        # Filter out large documents
        if os.path.getsize(temp_filename) > self.max_pdf_size:
            request.result.add_section(ResultSection(SCORE['NULL'], "PDF Analysis of the file was skipped because the "
                                                                    "file is too big (limit is %i MB)." % (
                                                                    self.max_pdf_size / 1000 / 1000)))
            return

        filename = os.path.basename(temp_filename)
        # noinspection PyUnusedLocal
        file_content = ''
        with open(temp_filename, 'r') as f:
            file_content = f.read()

        if '<xdp:xdp' in file_content:
            self.find_xdp_embedded(filename, file_content, request)

        self.peepdf_analysis(temp_filename, file_content, request)

    # noinspection PyBroadException
    @staticmethod
    def get_big_buffs(data, buff_min_size=256):
        # Hunt for big variables
        var_re = r'[^\\]?"(.*?[^\\])"'
        last_m = None
        out = []

        for m in re.finditer(var_re, data):
            # noinspection PyUnresolvedReferences
            pos = m.regs[0]
            match = m.group(1)
            if last_m:
                last_pos, last_match = last_m
                between = data[last_pos[1]:pos[0] + 1]
                try:
                    between, rest = between.split("//", 1)
                    try:
                        between = between.strip() + rest.split("\n", 1)[1].strip()
                    except:
                        pass
                except:
                    pass
                finally:
                    between = between.strip()

                if between == "+":
                    match = last_match + match
                    pos = (last_pos[0], pos[1])
                else:
                    if validate_non_humanreadable_buff(last_match, buff_min_size=buff_min_size):
                        out.append(last_match)

            last_m = (pos, match)

        if last_m:
            if validate_non_humanreadable_buff(last_m[1]):
                out.append(last_m[1])

        # Hunt for big comments
        var_comm_re = r"<!--(.*?)--\s?>"

        for m in re.finditer(var_comm_re, data, flags=re.DOTALL):
            match = m.group(1)
            if validate_non_humanreadable_buff(match):
                out.append(match)

        return out

    @staticmethod
    def check_dangerous_func(data):
        has_eval = False
        has_unescape = False
        # eval
        temp_eval = data.split("eval")
        if len(temp_eval) > 1:
            idx = 0
            for i in temp_eval[:-1]:
                idx += 1
                if (97 <= ord(i[-1]) <= 122) or (65 <= ord(i[-1]) <= 90):
                    continue
                if (97 <= ord(temp_eval[idx][0]) <= 122) or \
                        (65 <= ord(temp_eval[idx][0]) <= 90):
                    continue

                has_eval = True
                break

        # unescape
        temp_unesc = data.split("unescape")
        if len(temp_unesc) > 1:
            idx = 0
            for i in temp_unesc[:-1]:
                idx += 1
                if (97 <= ord(i[-1]) <= 122) or (65 <= ord(i[-1]) <= 90):
                    continue
                if (97 <= ord(temp_unesc[idx][0]) <= 122) or \
                        (65 <= ord(temp_unesc[idx][0]) <= 90):
                    continue

                has_unescape = True
                break

        return has_eval, has_unescape

    @staticmethod
    def list_first_x(mylist, size=20):
        add_reminder = len(mylist) > size

        mylist = mylist[:size]
        if add_reminder:
            mylist.append("...")

        return str(mylist)

    # noinspection PyBroadException,PyUnboundLocalVariable
    def peepdf_analysis(self, temp_filename, file_content, request):
        file_res = request.result
        try:
            res_list = []
            js_stream = []
            f_list = []
            js_dump = []

            pdf_parser = PDFParser()
            ret, pdf_file = pdf_parser.parse(temp_filename, True, False, file_content)
            if ret == 0:
                stats_dict = pdf_file.getStats()

                if ", ".join(stats_dict['Errors']) == "Bad PDF header, %%EOF not found, PDF sections not found, No " \
                                                      "indirect objects found in the body":
                    # Not a PDF
                    return

                res = ResultSection(SCORE['NULL'], "PDF File information")
                res.add_line('File: ' + stats_dict['File'])
                res.add_line(['MD5: ', stats_dict['MD5']])
                res.add_line(['SHA1: ', stats_dict['SHA1']])
                res.add_line('SHA256: ' + stats_dict['SHA256'])
                res.add_line(['Size: ', stats_dict['Size'], ' bytes'])
                res.add_line('Version: ' + stats_dict['Version'])
                res.add_line('Binary: ' + stats_dict['Binary'])
                res.add_line('Linearized: ' + stats_dict['Linearized'])
                res.add_line('Encrypted: ' + stats_dict['Encrypted'])
                if stats_dict['Encryption Algorithms']:
                    temp = ' ('
                    for algorithmInfo in stats_dict['Encryption Algorithms']:
                        temp += algorithmInfo[0] + ' ' + str(algorithmInfo[1]) + ' bits, '
                    temp = temp[:-2] + ')'
                    res.add_line(temp)
                res.add_line('Updates: ' + stats_dict['Updates'])
                res.add_line('Objects: ' + stats_dict['Objects'])
                res.add_line('Streams: ' + stats_dict['Streams'])
                res.add_line('Comments: ' + stats_dict['Comments'])
                res.add_line('Errors: ' + {True: ", ".join(stats_dict['Errors']),
                                           False: "None"}[len(stats_dict['Errors']) != 0])
                res.add_line("")

                for version in range(len(stats_dict['Versions'])):
                    stats_version = stats_dict['Versions'][version]
                    res_version = ResultSection(SCORE['NULL'], 'Version ' + str(version), parent=res)
                    if stats_version['Catalog'] is not None:
                        res_version.add_line('Catalog: ' + stats_version['Catalog'])
                    else:
                        res_version.add_line('Catalog: ' + 'No')
                    if stats_version['Info'] is not None:
                        res_version.add_line('Info: ' + stats_version['Info'])
                    else:
                        res_version.add_line('Info: ' + 'No')
                    res_version.add_line('Objects (' + stats_version['Objects'][0] + '): ' +
                                         self.list_first_x(stats_version['Objects'][1]))
                    if stats_version['Compressed Objects'] is not None:
                        res_version.add_line('Compressed objects (' + stats_version['Compressed Objects'][0] + '): ' +
                                             self.list_first_x(stats_version['Compressed Objects'][1]))

                    if stats_version['Errors'] is not None:
                        res_version.add_line('Errors (' + stats_version['Errors'][0] + '): ' +
                                             self.list_first_x(stats_version['Errors'][1]))
                    res_version.add_line('Streams (' + stats_version['Streams'][0] + '): ' +
                                         self.list_first_x(stats_version['Streams'][1]))
                    if stats_version['Xref Streams'] is not None:
                        res_version.add_line('Xref streams (' + stats_version['Xref Streams'][0] + '): ' +
                                             self.list_first_x(stats_version['Xref Streams'][1]))
                    if stats_version['Object Streams'] is not None:
                        res_version.add_line('Object streams (' + stats_version['Object Streams'][0] + '): ' +
                                             self.list_first_x(stats_version['Object Streams'][1]))
                    if int(stats_version['Streams'][0]) > 0:
                        res_version.add_line('Encoded (' + stats_version['Encoded'][0] + '): ' +
                                             self.list_first_x(stats_version['Encoded'][1]))
                        if stats_version['Decoding Errors'] is not None:
                            res_version.add_line('Decoding errors (' + stats_version['Decoding Errors'][0] + '): ' +
                                                 self.list_first_x(stats_version['Decoding Errors'][1]))
                    if stats_version['Objects with JS code'] is not None:
                        res_version.add_line('Objects with JS '
                                             'code (' + stats_version['Objects with JS code'][0] + '): ' +
                                             self.list_first_x(stats_version['Objects with JS code'][1]))
                        js_stream.extend(stats_version['Objects with JS code'][1])

                    suspicious_score = SCORE['NULL']
                    actions = stats_version['Actions']
                    events = stats_version['Events']
                    vulns = stats_version['Vulns']
                    elements = stats_version['Elements']
                    if events is not None or actions is not None or vulns is not None or elements is not None:
                        res_suspicious = ResultSection(SCORE['NULL'], 'Suspicious elements', parent=res_version)
                        if events is not None:
                            for event in events:
                                res_suspicious.add_line(event + ': ' + self.list_first_x(events[event]))
                                suspicious_score += SCORE['LOW']
                        if actions is not None:
                            for action in actions:
                                res_suspicious.add_line(action + ': ' + self.list_first_x(actions[action]))
                                suspicious_score += SCORE['LOW']
                        if vulns is not None:
                            for vuln in vulns:
                                if vuln in vulnsDict:
                                    temp = [vuln, ' (']
                                    for vulnCVE in vulnsDict[vuln]:
                                        if len(temp) != 2:
                                            temp.append(',')
                                        temp.append(vulnCVE)
                                        cve_found = re.search("CVE-[0-9]{4}-[0-9]{4}", vulnCVE)
                                        if cve_found:
                                            file_res.add_tag(TAG_TYPE['EXPLOIT_NAME'],
                                                             vulnCVE[cve_found.start():cve_found.end()],
                                                             TAG_WEIGHT['MED'],
                                                             usage='IDENTIFICATION')
                                            file_res.add_tag(TAG_TYPE['FILE_SUMMARY'],
                                                             vulnCVE[cve_found.start():cve_found.end()],
                                                             TAG_WEIGHT['MED'],
                                                             usage='IDENTIFICATION')
                                    temp.append('): ')
                                    temp.append(str(vulns[vuln]))
                                    res_suspicious.add_line(temp)
                                else:
                                    res_suspicious.add_line(vuln + ': ' + str(vulns[vuln]))
                                suspicious_score += SCORE['HIGH']
                        if elements is not None:
                            for element in elements:
                                if element in vulnsDict:
                                    temp = [element, ' (']
                                    for vulnCVE in vulnsDict[element]:
                                        if len(temp) != 2:
                                            temp.append(',')
                                        temp.append(vulnCVE)
                                        cve_found = re.search("CVE-[0-9]{4}-[0-9]{4}", vulnCVE)
                                        if cve_found:
                                            file_res.add_tag(TAG_TYPE['EXPLOIT_NAME'],
                                                             vulnCVE[cve_found.start():cve_found.end()],
                                                             TAG_WEIGHT['MED'],
                                                             usage='IDENTIFICATION')
                                            file_res.add_tag(TAG_TYPE['FILE_SUMMARY'],
                                                             vulnCVE[cve_found.start():cve_found.end()],
                                                             TAG_WEIGHT['MED'],
                                                             usage='IDENTIFICATION')
                                    temp.append('): ')
                                    temp.append(str(elements[element]))
                                    res_suspicious.add_line(temp)
                                    suspicious_score += SCORE['HIGH']
                                else:
                                    res_suspicious.add_line('\t\t' + element + ': ' + str(elements[element]))
                                    suspicious_score += SCORE['LOW']
                        res_suspicious.change_score(suspicious_score)

                    url_score = SCORE['NULL']
                    urls = stats_version['URLs']
                    if urls is not None:
                        res.add_line("")
                        res_url = ResultSection(SCORE['NULL'], 'Found URLs', parent=res)
                        for url in urls:
                            res_url.add_line('\t\t' + url)
                            url_score += SCORE['MED']

                        res_url.change_score(url_score)

                    for obj in stats_version['Objects'][1]:
                        cur_obj = pdf_file.getObject(obj, version)

                        if cur_obj.containsJScode:
                            cur_res = ResultSection(SCORE['NULL'], 'Object [%s %s] contains %s block of Javascript' %
                                                    (obj, version, len(cur_obj.JSCode)))
                            score_modifier = SCORE['NULL']

                            js_idx = 0
                            for js in cur_obj.JSCode:
                                js_idx += 1
                                js_score = 0
                                js_code, unescaped_bytes, _, _ = analyseJS(js)

                                js_dump += [x for x in js_code if not isPostscript(x)]

                                # Malicious characteristics
                                big_buffs = self.get_big_buffs("".join(js_code))
                                if len(big_buffs) > 0:
                                    js_score += SCORE['VHIGH'] * len(big_buffs)
                                has_eval, has_unescape = self.check_dangerous_func("".join(js_code))
                                if has_unescape:
                                    js_score += SCORE['HIGH']
                                if has_eval:
                                    js_score += SCORE['HIGH']

                                js_cmt = ""
                                if has_eval or has_unescape or len(big_buffs) > 0:
                                    score_modifier += js_score
                                    js_cmt = "Suspiciously malicious "
                                    file_res.add_tag(TAG_TYPE['FILE_SUMMARY'], "Suspicious javascript in PDF",
                                                     TAG_WEIGHT['MED'], usage='IDENTIFICATION')
                                    file_res.report_heuristic(PeePDF.AL_PeePDF_007)
                                js_res = ResultSection(0, "%sJavascript Code (block: %s)" % (js_cmt, js_idx),
                                                       parent=cur_res)

                                if js_score > SCORE['NULL']:
                                    temp_js_outname = "object%s-%s_%s.js" % (obj, version, js_idx)
                                    temp_js_path = os.path.join(self.working_directory, temp_js_outname)
                                    temp_js_bin = "".join(js_code).encode("utf-8")
                                    f = open(temp_js_path, "wb")
                                    f.write(temp_js_bin)
                                    f.close()
                                    f_list.append(temp_js_path)

                                    js_res.add_line(["The javascript block was saved as ", temp_js_outname])
                                    if has_eval or has_unescape:
                                        analysis_score = SCORE['NULL']
                                        analysis_res = ResultSection(analysis_score, "[Suspicious Functions]",
                                                                     parent=js_res)
                                        if has_eval:
                                            analysis_res.add_line("eval: This javascript block uses eval() function"
                                                                  " which is often used to launch deobfuscated"
                                                                  " javascript code.")
                                            analysis_score += SCORE['HIGH']
                                            file_res.report_heuristic(PeePDF.AL_PeePDF_003)
                                        if has_unescape:
                                            analysis_res.add_line("unescape: This javascript block uses unescape() "
                                                                  "function. It may be legitimate but it is definitely"
                                                                  " suspicious since malware often use this to "
                                                                  "deobfuscate code blocks.")
                                            analysis_score += SCORE['HIGH']
                                            file_res.report_heuristic(PeePDF.AL_PeePDF_004)

                                        analysis_res.change_score(analysis_score)

                                    buff_idx = 0
                                    for buff in big_buffs:
                                        buff_idx += 1
                                        error, new_buff = unescape(buff)
                                        if error == 0:
                                            buff = new_buff

                                        if buff not in unescaped_bytes:
                                            temp_path_name = None
                                            if ";base64," in buff[:100] and "data:" in buff[:100]:
                                                temp_path_name = "obj%s_unb64_%s.buff" % (obj, buff_idx)
                                                try:
                                                    buff = b64decode(buff.split(";base64,")[1].strip())
                                                    temp_path = os.path.join(self.working_directory, temp_path_name)
                                                    f = open(temp_path, "wb")
                                                    f.write(buff)
                                                    f.close()
                                                    f_list.append(temp_path)
                                                except:
                                                    self.log.error("Found 'data:;base64, ' buffer "
                                                                   "but failed to base64 decode.")
                                                    temp_path_name = None

                                            ResultSection(SCORE['VHIGH'],
                                                          "A %s bytes buffer was found in the javascript "
                                                          "block%s. Here are the first 256 bytes." %
                                                          (len(buff), {True: " and was resubmitted as %s" %
                                                                             temp_path_name,
                                                                       False: ""}[temp_path_name is not None]),
                                                          parent=js_res, body=hexdump(buff[:256]),
                                                          body_format=TEXT_FORMAT.MEMORY_DUMP)
                                            file_res.report_heuristic(PeePDF.AL_PeePDF_002)

                                processed_sc = []
                                sc_idx = 0
                                for sc in unescaped_bytes:
                                    if sc not in processed_sc:
                                        sc_idx += 1
                                        processed_sc.append(sc)

                                        try:
                                            sc = sc.decode("hex")
                                        except:
                                            pass

                                        shell_score = SCORE['VHIGH']
                                        temp_path_name = "obj%s_unescaped_%s.buff" % (obj, sc_idx)

                                        shell_res = ResultSection(shell_score,
                                                                  "Unknown unescaped  %s bytes "
                                                                  "javascript buffer (id: %s) was resubmitted as %s. "
                                                                  "Here are the first 256 bytes." % (len(sc),
                                                                                                     sc_idx,
                                                                                                     temp_path_name),
                                                                  parent=js_res)
                                        shell_res.set_body(hexdump(sc[:256]), TEXT_FORMAT.MEMORY_DUMP)

                                        temp_path = os.path.join(self.working_directory, temp_path_name)
                                        f = open(temp_path, "wb")
                                        f.write(sc)
                                        f.close()
                                        f_list.append(temp_path)

                                        file_res.add_tag(TAG_TYPE['FILE_SUMMARY'], "Unescaped Javascript Buffer",
                                                         TAG_WEIGHT['MED'],
                                                         usage='IDENTIFICATION')
                                        file_res.report_heuristic(PeePDF.AL_PeePDF_006)
                                        score_modifier += shell_score

                            if score_modifier > SCORE['NULL']:
                                res_list.append(cur_res)

                        elif cur_obj.type == "stream":
                            if cur_obj.isEncodedStream and cur_obj.filter is not None:
                                data = cur_obj.decodedStream
                                encoding = cur_obj.filter.value.replace("[", "").replace("]", "").replace("/",
                                                                                                          "").strip()
                                val = cur_obj.rawValue
                                otype = cur_obj.elements.get("/Type", None)
                                sub_type = cur_obj.elements.get("/Subtype", None)
                                length = cur_obj.elements.get("/Length", None)

                            else:
                                data = cur_obj.rawStream
                                encoding = None
                                val = cur_obj.rawValue
                                otype = cur_obj.elements.get("/Type", None)
                                sub_type = cur_obj.elements.get("/Subtype", None)
                                length = cur_obj.elements.get("/Length", None)

                            if otype:
                                otype = otype.value.replace("/", "").lower()
                            if sub_type:
                                sub_type = sub_type.value.replace("/", "").lower()
                            if length:
                                length = length.value

                            if otype == "embeddedfile":
                                if len(data) > 4096:
                                    # TODO: we might have to be smarter here.
                                    cur_res = ResultSection(SCORE['NULL'], 'Embedded file found (%s bytes) [obj: %s %s]'
                                                                           ' and dumped for analysis %s%s%s' %
                                                            (length, obj, version, {True: "(Type: %s) " % otype,
                                                                                    False: ""}[otype is not None],
                                                             {True: "(SubType: %s) " % sub_type,
                                                              False: ""}[sub_type is not None],
                                                             {True: "(Encoded with %s)" % encoding,
                                                              False: ""}[encoding is not None]))
                                    temp_path_name = "EmbeddedFile_%s%s.obj" % (obj, {True: "_%s" % encoding,
                                                                                      False: ""}[encoding is not None])
                                    temp_path = os.path.join(self.working_directory, temp_path_name)
                                    f = open(temp_path, "wb")
                                    f.write(data)
                                    f.close()
                                    f_list.append(temp_path)

                                    cur_res.add_line(["The EmbeddedFile object was saved as ", temp_path_name])
                                    res_list.append(cur_res)

                            elif otype not in BANNED_TYPES:
                                cur_res = ResultSection(SCORE['NULL'], 'Unknown stream found [obj: %s %s] %s%s%s' %
                                                        (obj, version, {True: "(Type: %s) " % otype,
                                                                        False: ""}[otype is not None],
                                                         {True: "(SubType: %s) " % sub_type,
                                                          False: ""}[sub_type is not None],
                                                         {True: "(Encoded with %s)" % encoding,
                                                          False: ""}[encoding is not None]))
                                for line in val.splitlines():
                                    cur_res.add_line(line)

                                emb_res = ResultSection(SCORE.NULL, 'First 256 bytes', parent=cur_res)
                                emb_res.set_body(hexdump(data[:256]), TEXT_FORMAT.MEMORY_DUMP)
                                res_list.append(cur_res)
                        else:
                            pass

                file_res.add_section(res)

                for results in res_list:
                    file_res.add_section(results)

                if js_dump:
                    js_dump_res = ResultSection(SCORE['NULL'], 'Full Javascript dump')

                    temp_js_dump = "javascript_dump.js"
                    temp_js_dump_path = os.path.join(self.working_directory, temp_js_dump)
                    try:
                        temp_js_dump_bin = "\n\n----\n\n".join(js_dump).encode("utf-8")
                    except UnicodeDecodeError:
                        temp_js_dump_bin = "\n\n----\n\n".join(js_dump)
                    temp_js_dump_sha1 = hashlib.sha1(temp_js_dump_bin).hexdigest()
                    f = open(temp_js_dump_path, "wb")
                    f.write(temp_js_dump_bin)
                    f.flush()
                    f.close()
                    f_list.append(temp_js_dump_path)

                    js_dump_res.add_line(["The javascript dump was saved as ", temp_js_dump])
                    js_dump_res.add_line(["The sha1 for the javascript dump is ", temp_js_dump_sha1])

                    file_res.add_tag(TAG_TYPE['PDF_JAVASCRIPT_SHA1'], temp_js_dump_sha1, TAG_WEIGHT['HIGH'],
                                     usage='CORRELATION')
                    file_res.add_section(js_dump_res)

                for filename in f_list:
                    request.add_extracted(filename, "Dumped from %s" % os.path.basename(temp_filename))

            else:
                res = ResultSection(SCORE['INFO'], "ERROR: Could not parse file with peepdf.")
                file_res.add_section(res)
        finally:
            try:
                del pdf_file
            except:
                pass

            try:
                del pdf_parser
            except:
                pass

            gc.collect()
Example #22
0
class Manalyze(ServiceBase):
    SERVICE_CATEGORY = 'Static Analysis'
    SERVICE_ACCEPTS = 'executable/windows'
    SERVICE_REVISION = ServiceBase.parse_revision('$Id$')
    SERVICE_VERSION = '1'
    SERVICE_ENABLED = True
    SERVICE_STAGE = 'CORE'
    SERVICE_CPU_CORES = 1
    SERVICE_RAM_MB = 256

    #Set config defaults for plugins
    SERVICE_DEFAULT_CONFIG = {
        'ClamAV': False,
        'Compilers': True,
        'Strings': True,
        'FindCrypt': True,
        'CryptoAddress': True,
        'Packer': True,
        'Imports': True,
        'Resources': True,
        'Mitigation': True,
        'Overlay': True,
        "Authenticode": False,
        "Virustotal": False
    }

    #Heuristics

    def __init__(self, cfg=None):
        super(Manalyze, self).__init__(cfg)
        self.result = None

    def start(self):
        self.log.debug("Manalyze service started")

    def execute(self, request):
        local = request.download()
        self.result = request.result

        #Start construction of CLI string
        local_dir = os.path.dirname(
            os.path.realpath(__file__)) + '/Manalyze/bin'

        os.chdir(local_dir)

        cmdLine = ['./manalyze', local, '-o', 'json', '-d', 'all', '--hashes']

        self.construct_plugins(cmdLine)

        try:
            result_section = self.parse(output=subprocess.check_output(
                cmdLine, preexec_fn=set_death_signal()))
        except:
            result_section = ResultSection(SCORE.NULL, "Summary")
            result_section.add_line(subprocess.check_output(cmdLine))
            result_section.add_line("JSON Decoding Failed!")
            raise

        result = Result()
        result.add_section(result_section)
        # result.add_section(test_section)
        request.result = result

    def parse(self, output=None):
        data = json.loads(str(output))
        parent_section = ResultSection(SCORE.NULL, "Manalyze Results:")
        for name, level2 in data.iteritems():
            # Skip the first level (Its the filename)
            for key, value in level2.iteritems():
                section = ResultSection(SCORE.NULL, key)
                self.recurse_dict(value, section)

                if section.body.count("\n") > 25:
                    section.body_format = TEXT_FORMAT.MEMORY_DUMP
                parent_section.add_section(section)

        return parent_section

    def recurse_dict(self, item, parent_section):
        for key, value in item.iteritems():
            if isinstance(value, dict):
                section = ResultSection(SCORE.NULL,
                                        key,
                                        body_format=TEXT_FORMAT.MEMORY_DUMP)
                self.recurse_dict(value, section)
                parent_section.add_section(section)

            elif isinstance(value, list):
                parent_section.add_line(key + ":")
                parent_section.add_lines(value)

            else:

                while True:
                    retry = False
                    try:
                        if key in self.indicator_keys:
                            func = self.indicator_keys.get(key)
                            func(self, value, parent_section)

                        elif isinstance(value, int):
                            parent_section.add_line(key + ": " + str(value) +
                                                    " (" + str(hex(value)) +
                                                    ")")

                        else:
                            if isinstance(value, str):
                                self.tag_analyze(value, parent_section)
                            parent_section.add_line(key + ": " + str(value))
                    except (UnicodeDecodeError, UnicodeEncodeError) as e:
                        if retry: break
                        value = value.encode("ascii", "ignore")
                        retry = True
                        self.log.debug(str(e) + "\n----Retrying...----")
                        continue
                    break

    def construct_plugins(self, cmd_line):
        cmd_line.append('-p')

        plugin_line = ''
        for key, value in self.cfg.iteritems():
            if value:
                plugin_line += key.lower() + ","

        if plugin_line.endswith(","): plugin_line = plugin_line[:-1]

        if plugin_line != '': cmd_line.append(plugin_line)
        else: cmd_line.pop()

        return cmd_line

    def tag_analyze(self, value, section):
        if is_valid_ip(value):
            section.add_tag(TAG_TYPE["NET_IP"], value, TAG_WEIGHT.LOW)

        if is_valid_email(value):
            section.add_tag(TAG_TYPE["NET_EMAIL"], value, TAG_WEIGHT.LOW)

        if is_valid_domain(value):
            section.add_tag(TAG_TYPE["NET_DOMAIN"], value, TAG_WEIGHT.LOW)

    def level_score(self, value, parent_section):

        if value == 1:
            parent_section.change_score(SCORE.INFO)
        elif value == 2:
            parent_section.change_score(SCORE.LOW)
        elif value == 3:
            parent_section.change_score(SCORE.HIGH)

    def entropy_score(self, value, parent_section):
        if value > 7.5:
            parent_section.add_section(
                ResultSection(SCORE.HIGH, "Section has high entropy!"))

    indicator_keys = {'level': level_score, 'entropy': entropy_score}
Example #23
0
class Cuckoo(ServiceBase):
    SERVICE_ACCEPTS = "(document/.*|executable/.*|java/.*|code/.*|archive/(zip|rar)|unknown|android/apk)"
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: d3e33baad06150e654fadf86f46861172599cf23 $')
    SERVICE_STAGE = "CORE"
    SERVICE_TIMEOUT = 800
    SERVICE_CATEGORY = "Dynamic Analysis"
    SERVICE_CPU_CORES = 1.1
    SERVICE_RAM_MB = 4096
    SERVICE_SAFE_START = True

    SERVICE_DEFAULT_CONFIG = {
        "cuckoo_image": "cuckoo/cuckoobox:latest",
        "vm_meta": "cuckoo.config",
        "REMOTE_DISK_ROOT": "var/support/vm/disks/cuckoo/",
        "LOCAL_DISK_ROOT": "cuckoo_vms/",
        "LOCAL_VM_META_ROOT": "var/cuckoo/",
        "ramdisk_size": "2048M",
        "ram_limit": "3072m",
        "dedup_similar_percent": 80
    }

    SERVICE_DEFAULT_SUBMISSION_PARAMS = [{
        "default": CUCKOO_TIMEOUT,
        "name": "analysis_timeout",
        "type": "int",
        "value": CUCKOO_TIMEOUT,
    }, {
        "default": True,
        "name": "generate_report",
        "type": "bool",
        "value": True,
    }, {
        "default": False,
        "name": "dump_processes",
        "type": "bool",
        "value": False,
    }, {
        "default": "",
        "name": "dll_function",
        "type": "str",
        "value": "",
    }, {
        "default": "",
        "name": "arguments",
        "type": "str",
        "value": "",
    }, {
        "default": False,
        "name": "pull_memory",
        "type": "bool",
        "value": False,
    }, {
        "default": False,
        "name": "dump_memory",
        "type": "bool",
        "value": False,
    }, {
        "default": False,
        "name": "no_monitor",
        "type": "bool",
        "value": False,
    }, {
        "default": "inetsim",
        "list": ["inetsim", "gateway"],
        "name": "routing",
        "type": "list",
        "value": "inetsim",
    }]

    def __init__(self, cfg=None):

        super(Cuckoo, self).__init__(cfg)
        self.cfg = cfg
        self.vmm = None
        self.cm = None
        self.vm_xml = None
        self.vm_snapshot_xml = None
        self.vm_meta = None
        self.file_name = None
        self.base_url = None
        self.submit_url = None
        self.query_task_url = None
        self.delete_task_url = None
        self.query_report_url = None
        self.query_pcap_url = None
        self.query_machines_url = None
        self.query_machine_info_url = None
        self.task = None
        self.file_res = None
        self.cuckoo_task = None
        self.al_report = None
        self.session = None
        self.enabled_routes = None
        self.cuckoo_ip = None
        self.ssdeep_match_pct = 0
        self.restart_interval = 0

    def __del__(self):
        if self.cm is not None:
            try:
                self.cm.stop()
            except DockerException:
                pass

    # noinspection PyUnresolvedReferences
    def import_service_deps(self):
        global generate_al_result, CuckooVmManager, CuckooContainerManager
        from al_services.alsvc_cuckoo.cuckooresult import generate_al_result
        from al_services.alsvc_cuckoo.cuckoo_managers import CuckooVmManager, CuckooContainerManager

    def set_urls(self):
        base_url = "http://%s:%s" % (self.cuckoo_ip, CUCKOO_API_PORT)
        self.submit_url = "%s/%s" % (base_url, CUCKOO_API_SUBMIT)
        self.query_task_url = "%s/%s" % (base_url, CUCKOO_API_QUERY_TASK)
        self.delete_task_url = "%s/%s" % (base_url, CUCKOO_API_DELETE_TASK)
        self.query_report_url = "%s/%s" % (base_url, CUCKOO_API_QUERY_REPORT)
        self.query_pcap_url = "%s/%s" % (base_url, CUCKOO_API_QUERY_PCAP)
        self.query_machines_url = "%s/%s" % (base_url,
                                             CUCKOO_API_QUERY_MACHINES)
        self.query_machine_info_url = "%s/%s" % (base_url,
                                                 CUCKOO_API_QUERY_MACHINE_INFO)

    def start(self):
        self.vmm = CuckooVmManager(self.cfg)
        self.cm = CuckooContainerManager(self.cfg, self.vmm)

        self._register_cleanup_op({
            'type':
            'shell',
            'args':
            shlex.split("docker rm --force %s" % self.cm.name)
        })

        self.log.debug("VMM and CM started!")
        # Start the container
        self.cuckoo_ip = self.cm.start_container(self.cm.name)
        self.restart_interval = random.randint(45, 55)
        self.file_name = None
        self.set_urls()
        self.ssdeep_match_pct = int(self.cfg.get("dedup_similar_percent", 80))

        for param in forge.get_datastore().get_service(
                self.SERVICE_NAME)['submission_params']:
            if param['name'] == "routing":
                self.enabled_routes = param['list']
                if self.enabled_routes[0] != param['default']:
                    self.enabled_routes.remove(param['default'])
                    self.enabled_routes.insert(0, param['default'])

        if self.enabled_routes is None:
            raise ValueError("No routing submission_parameter.")
        self.log.debug("Cuckoo started!")

    def find_machine(self, full_tag, route):
        # substring search
        vm_list = Counter()
        if route not in self.cm.tag_map or route not in self.enabled_routes:
            self.log.debug(
                "Invalid route selected for Cuckoo submission. Chosen: %s, permitted: %s, enabled: %s"
                % (route, self.enabled_routes, self.cm.tag_map.keys()))
            return None

        for tag, vm_name in self.cm.tag_map[route].iteritems():
            if tag == "default":
                vm_list[vm_name] += 0
                continue
            try:
                vm_list[vm_name] += full_tag.index(tag) + len(tag)
            except ValueError:
                continue

        if len(vm_list) == 0:
            pick = None
        else:
            pick = vm_list.most_common(1)[0][0]

        return pick

    def trigger_cuckoo_reset(self, retry_cnt=30):
        self.log.info("Forcing docker container reboot due to Cuckoo failure.")
        try:
            self.cm.stop()
        except DockerException:
            pass
        self.cuckoo_ip = self.cm.start_container(self.cm.name)
        self.restart_interval = random.randint(45, 55)
        self.set_urls()
        return self.is_cuckoo_ready(retry_cnt)

    # noinspection PyTypeChecker
    def execute(self, request):
        if request.task.depth > 3:
            self.log.debug(
                "Cuckoo is exiting because it currently does not execute on great great grand children."
            )
            request.set_save_result(False)
            return
        self.session = requests.Session()
        self.task = request.task
        request.result = Result()
        self.file_res = request.result
        file_content = request.get()
        self.cuckoo_task = None
        self.al_report = None
        self.file_name = os.path.basename(request.path)

        full_memdump = False
        pull_memdump = False

        # Check the file extension
        original_ext = self.file_name.rsplit('.', 1)
        tag_extension = tag_to_extension.get(self.task.tag)

        # NOTE: Cuckoo still tries to identify files itself, so we only force the extension/package if the user
        # specifies one. However, we go through the trouble of renaming the file because the only way to have
        # certain modules run is to use the appropriate suffix (.jar, .vbs, etc.)

        # Check for a valid tag
        if tag_extension is not None and 'unknown' not in self.task.tag:
            file_ext = tag_extension
        # Check if the file was submitted with an extension
        elif len(original_ext) == 2:
            submitted_ext = original_ext[1]
            if submitted_ext not in SUPPORTED_EXTENSIONS:
                # This is the case where the submitted file was NOT identified, and  the provided extension
                # isn't in the list of extensions that we explicitly support.
                self.log.debug(
                    "Cuckoo is exiting because it doesn't support the provided file type."
                )
                request.set_save_result(False)
                return
            else:
                # This is a usable extension. It might not run (if the submitter has lied to us).
                file_ext = '.' + submitted_ext
        else:
            # This is unknown without an extension that we accept/recognize.. no scan!
            self.log.debug(
                "Cuckoo is exiting because the file type could not be identified. %s %s"
                % (tag_extension, self.task.tag))
            return

        # Rename based on the found extension.
        if file_ext and self.task.sha256:
            self.file_name = self.task.sha256 + file_ext

        # Parse user-specified options
        kwargs = dict()
        task_options = []

        analysis_timeout = request.get_param('analysis_timeout')

        generate_report = request.get_param('generate_report')
        if generate_report is True:
            self.log.debug("Setting generate_report flag.")

        dump_processes = request.get_param('dump_processes')
        if dump_processes is True:
            self.log.debug("Setting procmemdump flag in task options")
            task_options.append('procmemdump=yes')

        dll_function = request.get_param('dll_function')
        if dll_function:
            task_options.append('function={}'.format(dll_function))

        arguments = request.get_param('arguments')
        if arguments:
            task_options.append('arguments={}'.format(arguments))

        # Parse extra options (these aren't user selectable because they are dangerous/slow)
        if request.get_param('pull_memory') and request.task.depth == 0:
            pull_memdump = True

        if request.get_param('dump_memory') and request.task.depth == 0:
            # Full system dump and volatility scan
            full_memdump = True
            kwargs['memory'] = True

        if request.get_param('no_monitor'):
            task_options.append("free=yes")

        routing = request.get_param('routing')
        if routing is None:
            routing = self.enabled_routes[0]

        select_machine = self.find_machine(self.task.tag, routing)

        if select_machine is None:
            # No matching VM and no default
            self.log.debug(
                "No Cuckoo vm matches tag %s and no machine is tagged as default."
                % select_machine)
            request.set_save_result(False)
            return

        kwargs['timeout'] = analysis_timeout
        kwargs['options'] = ','.join(task_options)
        if select_machine:
            kwargs['machine'] = select_machine

        self.cuckoo_task = CuckooTask(self.file_name, **kwargs)

        if self.restart_interval <= 0 or not self.is_cuckoo_ready():
            cuckoo_up = self.trigger_cuckoo_reset()
            if not cuckoo_up:
                self.session.close()
                raise RecoverableError(
                    "While restarting Cuckoo, Cuckoo never came back up.")
        else:
            self.restart_interval -= 1

        try:
            self.cuckoo_submit(file_content)
            if self.cuckoo_task.report:
                try:
                    machine_name = None
                    report_info = self.cuckoo_task.report.get('info', {})
                    machine = report_info.get('machine', {})

                    if isinstance(machine, dict):
                        machine_name = machine.get('name')

                    if machine_name is None:
                        self.log.debug(
                            'Unable to retrieve machine name from result.')
                        guest_ip = ""
                    else:
                        guest_ip = self.report_machine_info(machine_name)
                    self.log.debug(
                        "Generating AL Result from Cuckoo results..")
                    success = generate_al_result(self.cuckoo_task.report,
                                                 self.file_res, file_ext,
                                                 guest_ip,
                                                 self.SERVICE_CLASSIFICATION)
                    if success is False:
                        err_str = self.get_errors()
                        if "Machinery error: Unable to restore snapshot" in err_str:
                            raise RecoverableError(
                                "Cuckoo is restarting container: %s", err_str)

                        raise CuckooProcessingException(
                            "Cuckoo was unable to process this file. %s",
                            err_str)
                except RecoverableError:
                    self.trigger_cuckoo_reset(5)
                    raise
                except Exception as e:
                    # This is non-recoverable unless we were stopped during processing
                    self.trigger_cuckoo_reset(1)
                    if self.should_run:
                        self.log.exception("Error generating AL report: ")
                        raise CuckooProcessingException(
                            "Unable to generate cuckoo al report for task %s: %s"
                            % (safe_str(self.cuckoo_task.id), safe_str(e)))

                if self.check_stop():
                    raise RecoverableError(
                        "Cuckoo stopped during result processing..")

                if generate_report is True:
                    self.log.debug("Generating cuckoo report tar.gz.")

                    # Submit cuckoo analysis report archive as a supplementary file
                    tar_report = self.cuckoo_query_report(self.cuckoo_task.id,
                                                          fmt='all',
                                                          params={'tar': 'gz'})
                    if tar_report is not None:
                        tar_report_path = os.path.join(self.working_directory,
                                                       "cuckoo_report.tar.gz")
                        try:
                            report_file = open(tar_report_path, 'w')
                            report_file.write(tar_report)
                            report_file.close()
                            self.task.add_supplementary(
                                tar_report_path,
                                "Cuckoo Sandbox analysis report archive (tar.gz)"
                            )
                        except:
                            self.log.exception(
                                "Unable to add tar of complete report for task %s"
                                % self.cuckoo_task.id)

                self.log.debug("Checking for dropped files and pcap.")
                # Submit dropped files and pcap if available:
                self.check_dropped(request, self.cuckoo_task.id)
                self.check_pcap(self.cuckoo_task.id)

                # Check process memory dumps
                if dump_processes is True:
                    self.download_memdump('procmemdump')

                # We only retrieve full memory dumps for top-level files, and only if it was specified in
                # extra options.
                if full_memdump and pull_memdump:
                    self.download_memdump('fullmemdump')
            else:
                # We didn't get a report back.. cuckoo has failed us
                if self.should_run:
                    self.trigger_cuckoo_reset(5)
                    self.log.info("Raising recoverable error for running job.")
                    raise RecoverableError(
                        "Unable to retrieve cuckoo report. The following errors were detected: %s"
                        % safe_str(self.cuckoo_task.errors))

        except Exception as e:
            # Delete the task now..
            self.log.info('General exception caught during processing: %s' % e)
            if self.cuckoo_task and self.cuckoo_task.id is not None:
                self.cuckoo_delete_task(self.cuckoo_task.id)
            self.session.close()

            # Send the exception off to ServiceBase
            raise

        # Delete and exit
        if self.cuckoo_task and self.cuckoo_task.id is not None:
            self.cuckoo_delete_task(self.cuckoo_task.id)

        self.session.close()

    @staticmethod
    def get_name():
        return "Cuckoo"

    def check_stop(self):
        if not self.should_run:
            try:
                self.cm.stop()
            except DockerException:
                pass
            return True
        return False

    def cuckoo_submit(self, file_content):
        try:
            """ Submits a new file to Cuckoo for analysis """
            task_id = self.cuckoo_submit_file(file_content)
            self.log.debug("Submitted file. Task id: %s.", task_id)
            if not task_id:
                err_msg = "Failed to get task for submitted file."
                self.cuckoo_task.errors.append(err_msg)
                self.log.error(err_msg)
                return
            else:
                self.cuckoo_task.id = task_id
        except Exception as e:
            err_msg = "Error submitting to Cuckoo"
            self.cuckoo_task.errors.append('%s: %s' % (err_msg, safe_str(e)))
            raise RecoverableError("Unable to submit to Cuckoo")

        self.log.debug("Submission succeeded. File: %s -- Task ID: %s" %
                       (self.cuckoo_task.file, self.cuckoo_task.id))

        # Quick sleep to avoid failing when the API can't get the task yet.
        for i in xrange(5):
            if self.check_stop():
                return
            time.sleep(1)
        try:
            status = self.cuckoo_poll_started()
        except RetryError:
            self.log.info("VM startup timed out")
            status = None

        if status == "started":
            try:
                status = self.cuckoo_poll_report()
            except RetryError:
                self.log.info("Max retries exceeded for report status.")
                status = None

        err_msg = None
        if status is None:
            err_msg = "Timed out while waiting for cuckoo to analyze file."
        elif status == "missing":
            err_msg = "Task went missing while waiting for cuckoo to analyze file."
        elif status == "stopped":
            err_msg = "Service has been stopped while waiting for cuckoo to analyze file."

        if err_msg:
            self.log.debug(err_msg)
            raise RecoverableError(err_msg)

    def stop(self):
        # Need to kill the container; we're about to go down..
        self.log.info(
            "Service is being stopped; removing all running containers and metadata.."
        )
        try:
            self.cm.stop()
        except DockerException:
            pass

    @retry(wait_fixed=1000,
           stop_max_attempt_number=GUEST_VM_START_TIMEOUT,
           retry_on_result=_retry_on_none)
    def cuckoo_poll_started(self):

        # Bail if we were stopped
        if not self.should_run:
            return "stopped"

        task_info = self.cuckoo_query_task(self.cuckoo_task.id)
        if task_info is None:
            # The API didn't return a task..
            return "missing"

        # Detect if mismatch
        if task_info.get("id") != self.cuckoo_task.id:
            self.log.warning(
                "Cuckoo returned mismatched task info for task: %s. Trying again.."
                % self.cuckoo_task.id)
            return None

        if task_info.get("guest", {}).get("status") == "starting":
            return None

        return "started"

    @retry(wait_fixed=CUCKOO_POLL_DELAY * 1000,
           stop_max_attempt_number=CUCKOO_MAX_TIMEOUT / CUCKOO_POLL_DELAY,
           retry_on_result=_retry_on_none)
    def cuckoo_poll_report(self):

        # Bail if we were stopped
        if self.check_stop():
            return "stopped"

        task_info = self.cuckoo_query_task(self.cuckoo_task.id)
        if task_info is None or task_info == {}:
            # The API didn't return a task..
            return "missing"

        # Detect if mismatch
        if task_info.get("id") != self.cuckoo_task.id:
            self.log.warning(
                "Cuckoo returned mismatched task info for task: %s. Trying again.."
                % self.cuckoo_task.id)
            return None

        # Check for errors first to avoid parsing exceptions
        status = task_info.get("status")
        if "fail" in status:
            self.log.error(
                "Analysis has failed. Check cuckoo server logs for errors.")
            self.cuckoo_task.errors = self.cuckoo_task.errors + task_info.get(
                'errors')
            return status
        elif status == "completed":
            self.log.debug(
                "Analysis has completed, waiting on report to be produced.")
        elif status == "reported":
            self.log.debug("Cuckoo report generation has completed.")
            for i in xrange(5):
                if self.check_stop():
                    return
                time.sleep(
                    1
                )  # wait a few seconds in case report isn't actually ready

            self.cuckoo_task.report = self.cuckoo_query_report(
                self.cuckoo_task.id)
            if self.cuckoo_task.report and isinstance(self.cuckoo_task.report,
                                                      dict):
                return status
        else:
            self.log.debug(
                "Waiting for task %d to finish. Current status: %s." %
                (self.cuckoo_task.id, status))

        return None

    @retry(wait_fixed=2000)
    def cuckoo_submit_file(self, file_content):
        if self.check_stop():
            return None
        self.log.debug("Submitting file: %s to server %s" %
                       (self.cuckoo_task.file, self.submit_url))
        files = {"file": (self.cuckoo_task.file, file_content)}

        resp = self.session.post(self.submit_url,
                                 files=files,
                                 data=self.cuckoo_task)
        if resp.status_code != 200:
            self.log.debug("Failed to submit file %s. Status code: %s" %
                           (self.cuckoo_task.file, resp.status_code))
            return None
        else:
            resp_dict = dict(resp.json())
            task_id = resp_dict.get("task_id")
            if not task_id:
                # Spender case?
                task_id = resp_dict.get("task_ids", [])
                if isinstance(task_id, list) and len(task_id) > 0:
                    task_id = task_id[0]
                else:
                    return None
            return task_id

    @retry(wait_fixed=2000)
    def cuckoo_query_report(self, task_id, fmt="json", params=None):
        if self.check_stop():
            return None
        self.log.debug("Querying report, task_id: %s - format: %s", task_id,
                       fmt)
        resp = self.session.get(self.query_report_url % task_id + '/' + fmt,
                                params=params or {})
        if resp.status_code != 200:
            if resp.status_code == 404:
                self.log.error("Task or report not found for task: %s" %
                               task_id)
                return None
            else:
                self.log.error("Failed to query report %s. Status code: %d" %
                               (task_id, resp.status_code))
                self.log.error(resp.text)
                return None
        if fmt == "json":
            resp_dict = dict(resp.json())
            report_data = resp_dict
        else:
            report_data = resp.content

        if not report_data or report_data == '':
            raise

        return report_data

    @retry(wait_fixed=2000)
    def cuckoo_query_pcap(self, task_id):
        if self.check_stop():
            return None
        resp = self.session.get(self.query_pcap_url % task_id)
        if resp.status_code != 200:
            if resp.status_code == 404:
                self.log.debug("Task or pcap not found for task: %s" % task_id)
                return None
            else:
                self.log.debug(
                    "Failed to query pcap for task %s. Status code: %d" %
                    (task_id, resp.status_code))
                return None
        else:
            pcap_data = resp.content
            return pcap_data

    @retry(wait_fixed=500,
           stop_max_attempt_number=3,
           retry_on_result=_retry_on_none)
    def cuckoo_query_task(self, task_id):
        if self.check_stop():
            return {}
        resp = self.session.get(self.query_task_url % task_id)
        if resp.status_code != 200:
            if resp.status_code == 404:
                self.log.debug("Task not found for task: %s" % task_id)
                return None
            else:
                self.log.debug("Failed to query task %s. Status code: %d" %
                               (task_id, resp.status_code))
                return None
        else:
            resp_dict = dict(resp.json())
            task_dict = resp_dict.get('task')
            if task_dict is None or task_dict == '':
                self.log.warning(
                    'Failed to query task. Returned task dictionary is None or empty'
                )
                return None
            return task_dict

    @retry(wait_fixed=2000)
    def cuckoo_query_machine_info(self, machine_name):
        if self.check_stop():
            self.log.debug("Service stopped during machine info query.")
            return None

        resp = self.session.get(self.query_machine_info_url % machine_name)
        if resp.status_code != 200:
            self.log.debug("Failed to query machine %s. Status code: %d" %
                           (machine_name, resp.status_code))
            return None
        else:
            resp_dict = dict(resp.json())
            machine_dict = resp_dict.get('machine')
            return machine_dict

    @retry(wait_fixed=1000, stop_max_attempt_number=2)
    def cuckoo_delete_task(self, task_id):
        if self.check_stop():
            return
        resp = self.session.get(self.delete_task_url % task_id)
        if resp.status_code != 200:
            self.log.debug("Failed to delete task %s. Status code: %d" %
                           (task_id, resp.status_code))
        else:
            self.log.debug("Deleted task: %s." % task_id)
            if self.cuckoo_task:
                self.cuckoo_task.id = None

    # Fixed retry amount to avoid starting an analysis too late.
    @retry(wait_fixed=2000, stop_max_attempt_number=15)
    def cuckoo_query_machines(self):
        if self.check_stop():
            self.log.debug("Service stopped during machine query.")
            return False
        self.log.debug("Querying for available analysis machines..")
        resp = self.session.get(self.query_machines_url)
        if resp.status_code != 200:
            self.log.debug("Failed to query machines: %s" % resp.status_code)
            raise CuckooVMBusyException()
        resp_dict = dict(resp.json())
        if not self._all_vms_busy(resp_dict.get('machines')):
            return True
        return False

    @staticmethod
    def _all_vms_busy(result):
        if result:
            for sandbox in result:
                if ((sandbox["status"] == u"poweroff" or sandbox["status"]
                     == u"saved" or sandbox["status"] is None)
                        and not sandbox["locked"]):
                    return False
        return True

    def is_cuckoo_ready(self, retry_cnt=30):
        # In theory, we should always have a VM available since we're matched 1:1; in practice, we sometimes
        # have to wait.
        ready = False
        attempts = 0
        while not ready:
            if self.check_stop():
                return False
            try:
                ready = self.cuckoo_query_machines()
                if ready:
                    return ready
            except:
                # pass, since the api might not even be up yet
                pass
            time.sleep(1)
            attempts += 1
            if attempts >= retry_cnt:
                return False
        return ready

    def check_dropped(self, request, task_id):
        self.log.debug("Checking dropped files.")
        dropped_tar_bytes = self.cuckoo_query_report(task_id, 'dropped')
        added_hashes = set()
        if dropped_tar_bytes is not None:
            try:
                dropped_tar = tarfile.open(
                    fileobj=io.BytesIO(dropped_tar_bytes))
                for tarobj in dropped_tar:
                    if self.check_stop():
                        return
                    if tarobj.isfile(
                    ) and not tarobj.isdir():  # a file, not a dir
                        # A dropped file found
                        dropped_name = os.path.split(tarobj.name)[1]
                        # Fixup the name.. the tar originally has files/your/file/path
                        tarobj.name = tarobj.name.replace("/",
                                                          "_").split('_', 1)[1]
                        dropped_tar.extract(tarobj, self.working_directory)
                        dropped_file_path = os.path.join(
                            self.working_directory, tarobj.name)

                        # Check the file hash for whitelisting:
                        with open(dropped_file_path, 'r') as fh:
                            data = fh.read()
                            if not request.deep_scan:
                                ssdeep_hash = ssdeep.hash(data)
                                skip_file = False
                                for seen_hash in added_hashes:
                                    if ssdeep.compare(
                                            ssdeep_hash, seen_hash
                                    ) >= self.ssdeep_match_pct:
                                        skip_file = True
                                        break
                                if skip_file is True:
                                    request.result.add_tag(
                                        tag_type=TAG_TYPE.FILE_SUMMARY,
                                        value="Truncated extraction set",
                                        weight=TAG_WEIGHT.NULL)
                                    continue
                                else:
                                    added_hashes.add(ssdeep_hash)
                            dropped_hash = hashlib.md5(data).hexdigest()
                            if dropped_hash == self.task.md5:
                                continue
                        if not (wlist_check_hash(dropped_hash)
                                or wlist_check_dropped(dropped_name)
                                or dropped_name.endswith('_info.txt')):
                            # Resubmit
                            self.task.exclude_service("Dynamic Analysis")
                            self.task.add_extracted(
                                dropped_file_path,
                                "Dropped file during Cuckoo analysis.")
                            self.log.debug(
                                "Submitted dropped file for analysis: %s" %
                                dropped_file_path)
            except Exception, e_x:
                self.log.error("Error extracting dropped files: %s" % e_x)
                return
Example #24
0
class PDFId(ServiceBase):
    AL_PDFID_001 = Heuristic(
        "AL_PDFID_001", "PDF_Launch", "document/pdf",
        dedent("""\
                                    /Launch command used
                                    """))
    AL_PDFID_002 = Heuristic(
        "AL_PDFID_002", "After last %%EOF", "document/pdf",
        dedent("""\
                                    There are byte(s) following the end of the PDF
                                    """))
    AL_PDFID_003 = Heuristic(
        "AL_PDFID_003", "JBIG2Decode", "document/pdf",
        dedent("""\
                                    looking for /JBIG2Decode. Using the JBIG2 compression
                                    """))
    AL_PDFID_004 = Heuristic(
        "AL_PDFID_004", "AcroForm", "document/pdf",
        dedent("""\
                                    looking for /AcroForm.  This is an action launched by Forms
                                    """))
    AL_PDFID_005 = Heuristic(
        "AL_PDFID_005", "RichMedia", "document/pdf",
        dedent("""\
                                    looking for /RichMedia.  This can be use to embed Flash in a PDF
                                    """))
    AL_PDFID_006 = Heuristic(
        "AL_PDFID_006", "PDF Date Modified", "document/pdf",
        dedent("""\
                                    Date tag is ModDate. Will output the date value.
                                    """))
    AL_PDFID_007 = Heuristic(
        "AL_PDFID_007", "PDF Date Creation", "document/pdf",
        dedent("""\
                                    Date tag is CreationDate. Will output the date value.
                                    """))
    AL_PDFID_008 = Heuristic(
        "AL_PDFID_008", "PDF Date Last Modified", "document/pdf",
        dedent("""\
                                    Date tag is LastModified. Will output the date value.
                                    """))
    AL_PDFID_009 = Heuristic(
        "AL_PDFID_009", "PDF Date Source Modified", "document/pdf",
        dedent("""\
                                    Date tag is SourceModified. Will output the date value.
                                    """))
    AL_PDFID_010 = Heuristic(
        "AL_PDFID_010", "PDF Date PDFX", "document/pdf",
        dedent("""\
                                    Date tag is pdfx. Will output the date value.
                                    """))
    AL_PDFID_011 = Heuristic(
        "AL_PDFID_011", "Encrypt", "document/pdf",
        dedent("""\
                                    Found the /Encrypt string in the file. Will need to figure out why.
                                    """))

    SERVICE_ACCEPTS = 'document/pdf'
    SERVICE_CATEGORY = "Static Analysis"
    SERVICE_DESCRIPTION = "This service extracts metadata from PDFs using Didier Stevens python library PDFId."
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: e6e97935849b2be2b1350161648d171c2de704e0 $')
    SERVICE_VERSION = '1'
    SERVICE_CPU_CORES = 1
    SERVICE_RAM_MB = 256

    def __init__(self, cfg=None):
        super(PDFId, self).__init__(cfg)

    @staticmethod
    def get_switch_count(line):
        if line[-1] == ')':
            switch_count_end_index = line.rfind('(')
        else:
            switch_count_end_index = len(line)

        switch_count_start_index = switch_count_end_index - 1
        while line[switch_count_start_index].isdigit():
            switch_count_start_index -= 1

        return int(line[switch_count_start_index + 1:switch_count_end_index])

    def parse_line(self, line, file_res, res):
        line = line.lstrip()

        if line.startswith('Not a PDF document'):
            res.add_line(line)
            return False
        elif line.startswith('PDF_iD 0.0.11 '):
            return True
        elif (line.startswith('/JBIG2Decode') or line.startswith('/RichMedia')
              or line.startswith('/Launch')
              or line.startswith('After last %%EOF')
              or line.startswith('/AcroForm')):
            # 1. switch count:
            switch_count = self.get_switch_count(line)

            # 4. is it using the Launch feature?
            if line.startswith('/Launch') and switch_count > 0:
                file_res.add_tag(TAG_TYPE['EXPLOIT_NAME'], "PDF_Launch",
                                 TAG_WEIGHT['MED'])
                file_res.add_section(
                    ResultSection(
                        SCORE['MED'], "/Launch command used ... "
                        "this is very suspicious."))
                file_res.report_heuristic(PDFId.AL_PDFID_001)

            elif line.startswith('After last %%EOF') and switch_count > 0:
                file_res.add_section(
                    ResultSection(
                        SCORE['MED'],
                        "There is %d byte(s) following the end of "
                        "the PDF, this is very suspicious." % switch_count))
                file_res.report_heuristic(PDFId.AL_PDFID_002)

            elif line.startswith('/JBIG2Decode') and switch_count > 0:
                file_res.add_section(
                    ResultSection(
                        SCORE['LOW'],
                        "Using the JBIG2 compression ... potentially "
                        "exploiting the vulnerability?"))
                file_res.report_heuristic(PDFId.AL_PDFID_003)

            elif line.startswith('/AcroForm') and switch_count > 0:
                file_res.add_section(
                    ResultSection(
                        SCORE['LOW'],
                        "Using /AcroForm.  This is an action launched "
                        "by Forms ... suspicious (needs further "
                        "investigation)."))
                file_res.report_heuristic(PDFId.AL_PDFID_004)

            elif line.startswith('/RichMedia') and switch_count > 0:
                file_res.add_section(
                    ResultSection(
                        SCORE['LOW'],
                        "Using /RichMedia.  This can be use to embed "
                        "Flash in a PDF ... suspicious (needs further "
                        "investigation)."))
                file_res.report_heuristic(PDFId.AL_PDFID_005)

        elif line.startswith('D:'):
            sep_index = line.find(' /')
            if sep_index != -1:
                date_tag = line[sep_index + len(' /'):]
                date_value = line[2:sep_index].rstrip()
                txt_tag = ""
                if date_tag == "ModDate":
                    file_res.add_tag(TAG_TYPE['PDF_DATE_MOD'], date_value,
                                     TAG_WEIGHT['MED'])
                    file_res.report_heuristic(PDFId.AL_PDFID_006)
                    txt_tag = date_value
                elif date_tag == "CreationDate":
                    file_res.add_tag(TAG_TYPE['PDF_DATE_CREATION'], date_value,
                                     TAG_WEIGHT['MED'])
                    file_res.report_heuristic(PDFId.AL_PDFID_007)
                    txt_tag = date_value
                elif date_tag == "LastModified":
                    file_res.add_tag(TAG_TYPE['PDF_DATE_LASTMODIFIED'],
                                     date_value, TAG_WEIGHT['MED'])
                    file_res.report_heuristic(PDFId.AL_PDFID_008)
                    txt_tag = date_value
                elif date_tag == "SourceModified":
                    file_res.add_tag(TAG_TYPE['PDF_DATE_SOURCEMODIFIED'],
                                     date_value, TAG_WEIGHT['MED'])
                    file_res.report_heuristic(PDFId.AL_PDFID_009)
                    txt_tag = date_value
                elif date_tag == "pdfx":
                    file_res.add_tag(TAG_TYPE['PDF_DATE_PDFX'], date_value,
                                     TAG_WEIGHT['MED'])
                    file_res.report_heuristic(PDFId.AL_PDFID_010)
                    txt_tag = date_value

                if txt_tag != "":
                    line = ["D:", txt_tag, " /%s" % date_tag]

        elif line.startswith('/Encrypt') and int(line.split()[1]) > 0:
            file_res.add_section(
                ResultSection(
                    SCORE['HIGH'], "Using /Encrypt.  ... suspicious "
                    "(needs further investigation)."))
            file_res.report_heuristic(PDFId.AL_PDFID_011)

        res.add_line(line)
        return True

    # noinspection PyMethodMayBeStatic
    def _report_section(self, file_res, res, request):
        if file_res.score > 0 or request.deep_scan:
            file_res.add_section(res)

    def parse_pdfid(self, pdfid_output, request):
        file_res = request.result
        res = ResultSection(SCORE['NULL'],
                            "PDF_iD output:",
                            body_format=TEXT_FORMAT.MEMORY_DUMP)

        for line in pdfid_output.splitlines():
            if not self.parse_line(line, file_res, res):
                return False

        self._report_section(file_res, res, request)

        return True

    # THIS FUNCTION is an extract of the Main() function of the pdfparser.py code from Didier Stevens
    # noinspection PyPep8Naming
    @staticmethod
    def run_pdfparser(filename, request):
        file_res = request.result
        oPDFParser = None
        try:
            oPDFParser = cPDFParser(filename, False, None)
            cntComment = 0
            cntXref = 0
            cntTrailer = 0
            cntStartXref = 0
            cntIndirectObject = 0
            dicObjectTypes = {}

            while True:
                pdf_obj = oPDFParser.GetObject()
                if pdf_obj is not None:
                    if pdf_obj.type == PDF_ELEMENT_COMMENT:
                        cntComment += 1
                    elif pdf_obj.type == PDF_ELEMENT_XREF:
                        cntXref += 1
                    elif pdf_obj.type == PDF_ELEMENT_TRAILER:
                        cntTrailer += 1
                    elif pdf_obj.type == PDF_ELEMENT_STARTXREF:
                        cntStartXref += 1
                    elif pdf_obj.type == PDF_ELEMENT_INDIRECT_OBJECT:
                        cntIndirectObject += 1
                        obj_type = pdf_obj.GetType()
                        if obj_type not in dicObjectTypes:
                            dicObjectTypes[obj_type] = [pdf_obj.id]
                        else:
                            dicObjectTypes[obj_type].append(pdf_obj.id)
                else:
                    break

            stats_output = 'Comment: %s\nXREF: %s\nTrailer: %s\nStartXref: %s\nIndirect pdf_obj: %s\n' % \
                           (cntComment, cntXref, cntTrailer, cntStartXref, cntIndirectObject)
            names = dicObjectTypes.keys()
            names.sort()
            for key in names:
                stats_output = "%s %s %d: %s\n" % (
                    stats_output, key, len(dicObjectTypes[key]), ', '.join(
                        map(lambda x: '%d' % x, dicObjectTypes[key])))

            stats_hash = hashlib.sha1(stats_output).hexdigest()
            file_res.add_tag(TAG_TYPE['PDF_STATS_SHA1'], stats_hash,
                             TAG_WEIGHT['MED'])

            if file_res.score > 0 or request.deep_scan:
                res = ResultSection(SCORE['NULL'],
                                    "PDF-parser --stats output:",
                                    body_format=TEXT_FORMAT.MEMORY_DUMP)
                for line in stats_output.splitlines():
                    res.add_line(line)
                file_res.add_section(res)

        finally:
            if oPDFParser is not None:
                oPDFParser.CloseOpenFiles()

    # noinspection PyUnusedLocal,PyMethodMayBeStatic
    def _report_embedded_xdp(self, file_res, chunk_number, binary, leftover):
        file_res.add_section(
            ResultSection(
                SCORE['INFO'],
                ["Found %s " % chunk_number, "Embedded PDF (in XDP)"]))
        file_res.add_tag(TAG_TYPE['FILE_SUMMARY'], "Embedded PDF (in XDP)", 10,
                         'IDENTIFICATION')

    def find_xdp_embedded(self, filename, binary, request):
        file_res = request.result
        if "<pdf" in binary and "<document>" in binary and "<chunk>" in binary:
            chunks = binary.split("<chunk>")

            chunk_number = 0
            leftover = ""
            for chunk in chunks:
                if "</chunk>" not in chunk:
                    leftover += chunk.replace("<document>", "").replace(
                        '<pdf xmlns="'
                        'http://ns.adobe.com/xdp/pdf/">', "")
                    continue

                chunk_number += 1

                un_b64 = None
                # noinspection PyBroadException
                try:
                    un_b64 = b64decode(chunk.split("</chunk>")[0])
                except:
                    self.log.error(
                        "Found <pdf>, <document> and <chunk> tags inside an xdp file "
                        "but could not unbase64 the content.")

                if un_b64:
                    new_filename = "%s_%d.pdf" % (filename, chunk_number)
                    file_path = os.path.join(self.working_directory,
                                             new_filename)
                    f = open(file_path, "wb")
                    f.write(un_b64)
                    f.close()
                    request.add_extracted(file_path,
                                          "UnXDP from %s" % filename)

            if chunk_number > 0:
                self._report_embedded_xdp(file_res, chunk_number, binary,
                                          leftover)

    def execute(self, request):
        request.result = Result()
        temp_filename = request.download()
        filename = os.path.basename(temp_filename)

        with open(temp_filename, 'r') as f:
            file_content = f.read()

        if '<xdp:xdp' in file_content:
            self.find_xdp_embedded(filename, file_content, request)

        if len(file_content) < 3000000:
            pdf = PDFiD2String(PDF_iD(temp_filename, False, True, False),
                               False)

            if pdf:
                if self.parse_pdfid(pdf, request):
                    self.run_pdfparser(temp_filename, request)
        else:
            # a file too big error message would be better but, this will do for now.
            request.result.add_section(
                ResultSection(
                    SCORE['NULL'], "PDF Analysis of the file was"
                    " skipped because the file is "
                    "too big (limit is 3 MB)."))
Example #25
0
class NSRL(ServiceBase):
    """ NSRL (Checks a list of known good files using SHA1 and size). """

    SERVICE_CATEGORY = "Filtering"
    SERVICE_DEFAULT_CONFIG = {
        "host": "127.0.0.1",
        "user": "******",
        "passwd": "guest",
        "port": 5432,
        "db": "nsrl"
    }
    SERVICE_DESCRIPTION = "This service performs hash lookups against the NSRL database of known good files."
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: 91b14abc01546bf85c0001b915239266fa29fe18 $')
    SERVICE_STAGE = 'FILTER'
    SERVICE_VERSION = '1'
    SERVICE_CPU_CORES = 0.05
    SERVICE_RAM_MB = 64

    def __init__(self, cfg=None):
        super(NSRL, self).__init__(cfg)
        self._connect_params = {
            'host': self.cfg.get('host'),
            'user': self.cfg.get('user'),
            'port': int(self.cfg.get('port')),
            'passwd': self.cfg.get('passwd'),
            'db': self.cfg.get('db')
        }
        self.connection = None

    def start(self):
        self.connection = NSRLDatasource(self.log, **self._connect_params)

    # noinspection PyUnresolvedReferences
    def import_service_deps(self):
        global NSRLDatasource
        from al_services.alsvc_nsrl.datasource.nsrl import NSRL as NSRLDatasource

    def execute(self, request):
        # We have the sha1 digest in the task object so there is no need to
        # fetch the sample for NSRL execution.
        cur_result = Result()
        try:
            dbresults = self.connection.query(request.sha1)
        except NSRLDatasource.DatabaseException:
            raise RecoverableError("Query failed")

        # If we found a result in the NSRL database, drop this task as we don't want to process it further.
        if dbresults:
            request.drop()
            benign = "This file was found in the NSRL database. It is not malware."
            res = ResultSection(title_text=benign)
            res.score = SCORE.NOT
            for dbresult in dbresults[:10]:
                res.add_line(dbresult[0] + " - %s (%s) - v: %s - by: %s [%s]" %
                             (dbresult[1], dbresult[2], dbresult[3],
                              dbresult[4], dbresult[5]))

            if len(dbresults) > 10:
                res.add_line("And %s more..." % str(len(dbresults) - 10))

            cur_result.add_section(res)
        request.result = cur_result
Example #26
0
class SigCheck(ServiceBase):
    """SigCheck service. """
    AL_SigCheck_001 = Heuristic(
        "AL_SigCheck_001",
        "Invalid Signature",
        ".*",
        dedent("""\
               Unable to find any of the strings in the output of SigCheck:
                   "	Verified:" 
                   "	Verified:	Untrusted Root"
                   "	Verified:	Untrusted Authority"
                   "	Verified:	Untrusted Certificate"
                   "	Verified:	Malformed"
                   "	Verified:	Invalid Chain"
               meaning the file has an invalid/untrusted signature.
               The file might be modified or the signature is fake.
               """),
    )
    AL_SigCheck_002 = Heuristic(
        "AL_SigCheck_002",
        "Expired Signature",
        ".*",
        dedent("""\
               If "	Verified:	Expired" is found in the SigCheck output,
               it means the file has an expired signature.
               """),
    )
    AL_SigCheck_003 = Heuristic(
        "AL_SigCheck_003",
        "Trusted Signers",
        ".*",
        dedent("""\
               If "	Verified:	Signed" is found in the SigCheck output,
               and the signer is on a list of Authorised Signers.
               """),
    )
    AL_SigCheck_004 = Heuristic(
        "AL_SigCheck_004",
        "NonFiltered Signers",
        ".*",
        dedent("""\
               If "	Verified:	Signed" is found in the SigCheck output,
               but the signer is not on list of Authorised Signers.
               """),
    )
    AL_SigCheck_005 = Heuristic(
        "AL_SigCheck_005",
        "Sigcheck Unexpected Behavior",
        ".*",
        dedent("""\
               When the SigCheck tool returns unexpected results.
               """),
    )

    SERVICE_CATEGORY = 'Filtering'
    SERVICE_DEFAULT_CONFIG = {
        'SIGCHECK_PATH':
        r'/al/support/sigcheck/sigcheck.exe',
        'SIGNTOOL_PATH':
        r'/al/support/sigcheck/signtool.exe',
        'SIGCHECK_TRUSTED_NAMES': [
            [
                "Microsoft Corporation", "Microsoft Code Signing PCA",
                "Microsoft Root Authority"
            ],
            [
                "Microsoft Corporation", "Microsoft Code Signing PCA",
                "Microsoft Root Certificate Authority"
            ],
            [
                "Microsoft Developer Platform Side-by-Side Assembly Publisher",
                "Microsoft Code Signing PCA",
                "Microsoft Root Certificate Authority"
            ],
            [
                "Microsoft Fusion Verification", "Microsoft Code Signing PCA",
                "Microsoft Root Certificate Authority"
            ],
            [
                "Microsoft Windows", "Microsoft Windows Verification PCA",
                "Microsoft Root Certificate Authority"
            ],
            [
                "Microsoft Windows Hardware Compatibility Publisher",
                "Microsoft Windows Hardware Compatibility PCA",
                "Microsoft Root Authority"
            ],
            [
                "Microsoft Windows Publisher",
                "Microsoft Windows Verification PCA",
                "Microsoft Root Certificate Authority"
            ],
            [
                "Microsoft Windows Side-by-Side Assembly Publisher",
                "Microsoft Code Signing PCA",
                "Microsoft Root Certificate Authority"
            ],
        ]
    }
    SERVICE_ACCEPTS = "(archive/.*|executable/.*|unknown)"
    SERVICE_DESCRIPTION = "This service checks for known good files signed by trusted signing authorities."
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: 5339a920931b7a56a7e682d81714ca8da2015456 $')
    SERVICE_STAGE = 'FILTER'
    SERVICE_SUPPORTED_PLATFORMS = ['Windows']
    SERVICE_VERSION = '1'
    SERVICE_CPU_CORES = 0.75
    SERVICE_RAM_MB = 512

    def __init__(self, cfg=None):
        super(SigCheck, self).__init__(cfg)
        self.sigcheck_exe = None
        self.signtool_exe = None
        self.trusted_name_list = self.cfg.get('SIGCHECK_TRUSTED_NAMES', [])

    def start(self):
        # Validate configuration and tool locations.
        self.sigcheck_exe = self.cfg.get('SIGCHECK_PATH', '')
        if not os.path.isfile(self.sigcheck_exe):
            raise ConfigException('SIGCHECK_PATH (%s) is invalid or missing.' %
                                  self.sigcheck_exe)

        self.signtool_exe = self.cfg.get('SIGNTOOL_PATH', '')
        if not os.path.isfile(self.signtool_exe):
            raise ConfigException('SIGNTOOL_PATH (%s) is invalid or missing.' %
                                  self.signtool_exe)

    def execute(self, request):
        local_filename = request.download()
        proc = subprocess.Popen(
            [self.sigcheck_exe, '-i', '-q', '-h', local_filename],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE)
        stdout, stderr = proc.communicate()
        stdout = unicode(stdout, 'mbcs')
        stderr = unicode(stderr, 'mbcs')
        if stderr:
            self.log.warn('SigCheck returned data on stderr: %s', stderr)
        request.result = self.populate_result(stdout.splitlines(),
                                              local_filename, request)

    def populate_result(self, current_lines, filename, request):
        result = Result()

        should_filter_out = False
        dump_sign_tool_output = False
        skip_detailed_output = False

        status_line = current_lines[1]
        if len(current_lines) <= 1 or status_line == "\tVerified:\tUnsigned":
            return result

        elif status_line.find("\tVerified:") != 0 or                   \
                status_line == "\tVerified:\tUntrusted Root" or           \
                status_line == "\tVerified:\tUntrusted Authority" or      \
                status_line == "\tVerified:\tUntrusted Certificate" or    \
                status_line == "\tVerified:\tMalformed" or                \
                status_line == "\tVerified:\tInvalid Chain":
            # This file has a signature but is not verified.
            result_section = ResultSection(
                score=SCORE.HIGH,
                title_text=("This file has an invalid/untrusted signature."
                            "The file might have been modified or the "
                            "signature is just a fake one."))
            dump_sign_tool_output = True
            result.report_heuristic(SigCheck.AL_SigCheck_001)

        elif status_line == "\tVerified:\tExpired":
            # This file has a signature but is not verified.
            result_section = ResultSection(
                score=SCORE.LOW,
                title_text="This file has an expired signature.")
            dump_sign_tool_output = True
            result.report_heuristic(SigCheck.AL_SigCheck_002)

        elif status_line == "\tVerified:\tSigned":
            is_authorised_signers = False
            # Build the list of signers
            signers = []
            signers_tag_found = False
            i = 0
            while i < len(current_lines):
                if signers_tag_found:
                    if current_lines[i][0:2] == '\t\t':
                        # Skip the first two tabs.
                        signers.append(current_lines[i][2:])
                    else:
                        break
                elif current_lines[i].find("\tSigners:") == 0:
                    signers_tag_found = True
                i += 1

            for trusted_name_item in self.trusted_name_list:
                if trusted_name_item == signers:
                    is_authorised_signers = True
                    break

            if is_authorised_signers:
                result_section = ResultSection(
                    score=SCORE.NOT,
                    title_text="This file is signed with trusted signers")
                result.report_heuristic(SigCheck.AL_SigCheck_003)
                should_filter_out = True

            else:
                result_section = ResultSection(
                    score=SCORE.INFO,
                    title_text=
                    "Signed with signers we don't automatically filter out")
                result.report_heuristic(SigCheck.AL_SigCheck_004)

        else:
            self.log.error(
                "The sigcheck output:\n%s\ncontained unexpected results %s" %
                ("\n".join(current_lines)))
            result_section = ResultSection(
                score=SCORE.MED,
                title_text="Unexpected result from sigcheck ... to investigate."
            )
            result.report_heuristic(SigCheck.AL_SigCheck_005)

        if should_filter_out and not request.ignore_filtering:
            request.drop()

        if skip_detailed_output:
            result.add_section(result_section)
            return result

        # Expand our result with the sigcheck output.
        self._add_sigcheck_output(current_lines, result_section)

        # Optionally expand our result with the signtool output.
        if dump_sign_tool_output:
            self._add_signtool_output(filename, result_section)

        result.add_section(result_section)
        return result

    @staticmethod
    def _add_sigcheck_output(tool_output, result_section):
        result_section.add_line("[SigCheck]")
        for line in tool_output:
            # File date is our copy file date so, not relevant at all.
            if not line.startswith('\t') or line.startswith("\tFile date:\t"):
                continue

            if 'MD5' in line:
                result_section.add_line("MD5: %s" % line.split(':')[1].strip())
            elif 'SHA1' in line:
                result_section.add_line("SHA1: %s" %
                                        line.split(':')[1].strip())
            else:
                # skip the '\t'
                result_section.add_line(line[1:])

    def _add_signtool_output(self, filename, result_section):
        # To provide a little more details ... let's run signtool as well when we find something
        # weird with sigcheck
        signtool_proc = subprocess.Popen(
            [self.signtool_exe, 'verify', '/pa', '/v', '/a', filename],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            cwd=os.path.dirname(self.signtool_exe))
        signtool_stdout, signtool_stderr = signtool_proc.communicate()

        result_section.add_line("\n[SignTool]")
        for line in chain(signtool_stdout.splitlines(),
                          signtool_stderr.splitlines()):
            if 'SHA1' in line:
                result_section.add_line(
                    line.split(':')[0] + ": " + line.split(':')[1].strip())
            if 'Verifying' in line:
                continue
            else:
                result_section.add_line(line)
Example #27
0
class Suricata(ServiceBase):
    SERVICE_ACCEPTS = 'network/tcpdump'
    SERVICE_CATEGORY = 'Networking'
    SERVICE_ENABLED = True
    SERVICE_STAGE = "CORE"
    SERVICE_REVISION = ServiceBase.parse_revision(
        '$Id: 77a8b7f071ca246d60ec3f0b9361b7fb9b0b9e82 $')
    SERVICE_TIMEOUT = 60
    SERVICE_VERSION = '1'
    SERVICE_CPU_CORES = 1
    SERVICE_RAM_MB = 1024

    SERVICE_DEFAULT_CONFIG = {
        "SURICATA_BIN":
        "/usr/local/bin/suricata",
        "SURE_SCORE":
        "MALWARE TROJAN CURRENT_EVENTS CnC Checkin",
        "VHIGH_SCORE":
        "EXPLOIT SCAN Adware PUP",
        "RULES_URLS": [
            "http://rules.emergingthreats.net/open/suricata/emerging.rules.tar.gz"
        ],
        "HOME_NET":
        "any"
    }

    def __init__(self, cfg=None):
        super(Suricata, self).__init__(cfg)
        self.suricata_socket = None
        self.suricata_sc = None
        self.suricata_process = None
        self.last_rule_update = None
        self.rules_urls = cfg.get("RULES_URLS",
                                  self.SERVICE_DEFAULT_CONFIG["RULES_URLS"])
        self.home_net = cfg.get("HOME_NET",
                                self.SERVICE_DEFAULT_CONFIG["HOME_NET"])
        self.oinkmaster_update_file = '/etc/suricata/oinkmaster'
        self.run_dir = None

    # Update our local rules using Oinkmaster
    def update_suricata(self, **_):
        command = ["/usr/sbin/oinkmaster", "-Q", "-o", "/etc/suricata/rules"]
        for rules_url in self.rules_urls:
            command.extend(["-u", rules_url])
        subprocess.call(command)
        subprocess.call(["touch", self.oinkmaster_update_file])

    # Use an external tool to strip frame headers
    def strip_frame_headers(self, filepath):
        new_filepath = os.path.join(os.path.dirname(filepath), "striped.pcap")
        command = ["/usr/local/bin/stripe", "-r", filepath, "-w", new_filepath]

        subprocess.call(command)

        return new_filepath

    def start(self):
        self.run_dir = tempfile.mkdtemp(dir="/tmp")
        self._register_update_callback(self.update_suricata,
                                       execute_now=True,
                                       utype=UpdaterType.BOX,
                                       freq=UpdaterFrequency.QUARTER_DAY)
        self.replace_suricata_config()
        self.start_suricata_if_necessary()

    # The rules are updated once per day, so each day we have a new tool version
    def get_tool_version(self):
        return os.path.getmtime(self.oinkmaster_update_file)

    # When we're shutting down, kill the Suricata child process as well
    def stop(self):
        self.kill_suricata()
        if self.run_dir is not None:
            if os.path.exists(self.run_dir):
                shutil.rmtree(self.run_dir)
            self.run_dir = None

    # Kill the process if it isn't ending
    def kill_suricata(self):
        if self.suricata_process:
            try:
                self.log.info("Trying to kill Suricata (%s)" %
                              (str(self.suricata_process.pid)))
                self.suricata_process.kill()
            except Exception as e:
                self.log.exception("Failed to kill Suricata (%s): %s" %
                                   (str(self.suricata_process.pid), e.message))

    # Reapply our service configuration to the Suricata yaml configuration
    def replace_suricata_config(self):
        source_path = os.path.join(self.source_directory, 'conf',
                                   'suricata.yaml')
        dest_path = os.path.join(self.run_dir, 'suricata.yaml')
        home_net = re.sub(r"([/\[\]])", r"\\\1", self.home_net)
        with open(source_path) as sp:
            with open(dest_path, "w") as dp:
                dp.write(sp.read().replace("__HOME_NET__", home_net))

    def reload_rules_if_necessary(self):
        if self.last_rule_update < self.get_tool_version():
            self.reload_rules()

    # Send the reload_rules command to the socket
    def reload_rules(self):
        ret = self.suricata_sc.send_command("reload-rules")

        if not ret and ret["return"] != "OK":
            self.log.exception("Failed to reload Suricata rules")

    def start_suricata_if_necessary(self):
        if not self.suricata_running():
            self.launch_suricata()

    # Try connecting to the Suricata socket
    def suricata_running(self):
        if self.suricata_sc is None:
            return False
        try:
            self.suricata_sc.connect()
        except suricatasc.SuricataException:
            return False
        return True

    # Retry with exponential backoff until we can actually connect to the Suricata socket
    @retry(retry_on_result=lambda x: x is False,
           wait_exponential_multiplier=1000,
           wait_exponential_max=10000,
           stop_max_delay=120000)
    def suricata_running_retry(self):
        return self.suricata_running()

    # Launch Suricata using a UID socket
    def launch_suricata(self):
        self.suricata_socket = os.path.join(self.run_dir,
                                            str(uuid.uuid4()) + '.socket')

        command = [
            self.cfg.get('SURICATA_BIN'),
            "-c",
            os.path.join(self.run_dir, 'suricata.yaml'),
            "--unix-socket=%s" % self.suricata_socket,
            "--pidfile",
            "%s/suricata.pid" % self.run_dir,
        ]

        self.log.info('Launching Suricata: %s' % (' '.join(command)))

        self.suricata_process = subprocess.Popen(command)

        self.suricata_sc = suricatasc.SuricataSC(self.suricata_socket)

        # Schedule a job to delete the socket when it isn't needed any longer
        self._register_cleanup_op({
            'type': 'shell',
            'args': ["rm", "-rf", self.run_dir]
        })
        # Note, in case the process is terminated without calling stop()
        self._register_cleanup_op({
            'type':
            'shell',
            'args': [
                "pkill", "--SIGKILL", "--nslist", "pid", "--ns",
                str(self.suricata_process.pid), "-f",
                self.cfg.get('SURICATA_BIN')
            ]
        })

        if not self.suricata_running_retry():
            raise Exception('Suricata could not be started.')
        self.last_rule_update = time.time()

    # noinspection PyUnresolvedReferences
    def import_service_deps(self):
        global suricatasc, dateparser
        import suricatasc
        import dateutil.parser as dateparser

    def execute(self, request):
        file_path = request.download()
        result = Result()

        # restart Suricata if we need to
        self.start_suricata_if_necessary()

        # Update our rules if they're stale,
        self.reload_rules_if_necessary()

        # Strip frame headers from the PCAP, since Suricata sometimes has trouble parsing strange PCAPs
        stripped_filepath = self.strip_frame_headers(file_path)

        # Pass the pcap file to Suricata via the socket
        ret = self.suricata_sc.send_command(
            "pcap-file", {
                "filename": stripped_filepath,
                "output-dir": self.working_directory
            })

        if not ret or ret["return"] != "OK":
            self.log.exception("Failed to submit PCAP for processing: %s" %
                               ret['message'])

        # Wait for the socket finish processing our PCAP
        while True:
            time.sleep(1)
            ret = self.suricata_sc.send_command("pcap-current")

            if ret and ret["message"] == "None":
                break

        alerts = {}
        signatures = {}
        domains = []
        ips = []
        urls = []

        # Parse the json results of the service
        for line in open(os.path.join(self.working_directory, 'eve.json')):
            record = json.loads(line)

            timestamp = dateparser.parse(record['timestamp']).isoformat(' ')
            src_ip = record['src_ip']
            src_port = record['src_port']
            dest_ip = record['dest_ip']
            dest_port = record['dest_port']

            if src_ip not in ips:
                ips.append(src_ip)
            if dest_ip not in ips:
                ips.append(dest_ip)

            if record['event_type'] == 'http':
                if 'hostname' not in record['http'] or 'url' not in record[
                        'http']:
                    continue

                domain = record['http']['hostname']
                if domain not in domains and domain not in ips:
                    domains.append(domain)
                url = "http://" + domain + record['http']['url']
                if url not in urls:
                    urls.append(url)

            if record['event_type'] == 'dns':
                if 'rrname' not in record['dns']:
                    continue
                domain = record['dns']['rrname']
                if domain not in domains and domain not in ips:
                    domains.append(domain)

            if record['event_type'] == 'alert':
                if 'signature_id' not in record[
                        'alert'] or 'signature' not in record['alert']:
                    continue
                signature_id = record['alert']['signature_id']
                signature = record['alert']['signature']

                if signature_id not in alerts:
                    alerts[signature_id] = []
                if signature_id not in signatures:
                    signatures[signature_id] = signature

                alerts[signature_id].append(
                    "%s %s:%s -> %s:%s" %
                    (timestamp, src_ip, src_port, dest_ip, dest_port))

        # Create the result sections if there are any hits
        if len(alerts) > 0:
            for signature_id, signature in signatures.iteritems():
                score = SCORE.NULL
                tag_weight = TAG_WEIGHT.NULL

                if any(x in signature
                       for x in self.cfg.get("SURE_SCORE").split()):
                    score = SCORE.SURE
                    tag_weight = TAG_WEIGHT.SURE

                if any(x in signature
                       for x in self.cfg.get("VHIGH_SCORE").split()):
                    score = SCORE.VHIGH
                    tag_weight = TAG_WEIGHT.VHIGH

                section = ResultSection(score,
                                        '%s: %s' % (signature_id, signature))
                for flow in alerts[signature_id][:10]:
                    section.add_line(flow)
                if len(alerts[signature_id]) > 10:
                    section.add_line('And %s more flows' %
                                     (len(alerts[signature_id]) - 10))
                result.add_section(section)

                # Add a tag for the signature id and the message
                result.add_tag(TAG_TYPE.SURICATA_SIGNATURE_ID,
                               str(signature_id),
                               tag_weight,
                               usage=TAG_USAGE.IDENTIFICATION)
                result.add_tag(TAG_TYPE.SURICATA_SIGNATURE_MESSAGE,
                               signature,
                               tag_weight,
                               usage=TAG_USAGE.IDENTIFICATION)

            # Add tags for the domains, urls, and IPs we've discovered
            for domain in domains:
                result.add_tag(TAG_TYPE.NET_DOMAIN_NAME,
                               domain,
                               TAG_WEIGHT.VHIGH,
                               usage=TAG_USAGE.CORRELATION)
            for url in urls:
                result.add_tag(TAG_TYPE.NET_FULL_URI,
                               url,
                               TAG_WEIGHT.VHIGH,
                               usage=TAG_USAGE.CORRELATION)
            for ip in ips:
                result.add_tag(TAG_TYPE.NET_IP,
                               ip,
                               TAG_WEIGHT.VHIGH,
                               usage=TAG_USAGE.CORRELATION)

            # Add the original Suricata output as a supplementary file in the result
            request.add_supplementary(
                os.path.join(self.working_directory, 'eve.json'), 'json',
                'SuricataEventLog.json')

        # Add the stats.log to the result, which can be used to determine service success
        if os.path.exists(os.path.join(self.working_directory, 'stats.log')):
            request.add_supplementary(
                os.path.join(self.working_directory, 'stats.log'), 'log',
                'stats.log')

        request.result = result
Example #28
0
class Beach(ServiceBase):
    """
    Service verifies a sysmon xml log by matching potential threats
    """
    SERVICE_CATEGORY = 'Static Analysis'
    SERVICE_ACCEPTS = 'document/xml'
    SERVICE_REVISION = ServiceBase.parse_revision('$Id$')
    SERVICE_VERSION = '1'
    SERVICE_ENABLED = True
    SERVICE_STAGE = 'CORE'
    SERVICE_CPU_CORES = 1
    SERVICE_RAM_MB = 256

    def __init__(self, cfg=None):
        super(Beach, self).__init__(cfg)

    def start(self):
        self.log.debug("Beachcomber service started")

    def execute(self, request):
        local_filename = request.download()
        try:
            with open(local_filename, "r", encoding='utf-8',
                      errors='ignore') as file_content:
                xml = file_content.read()
        except ExpatError:
            print("Error: Format error in the log")
            sys.exit(1)

        script.run_script(xml)

        with open("opt/al/pkg/al_services/alsvc_beach/alerts_generated.txt",
                  "r") as alerts:
            output = alerts.readlines()

        result = self.parse_alerts(output)
        request.result = result

        # os.remove("opt/al/pkg/al_services/alsvc_beach/alerts_generated.txt")
        os.remove("opt/al/pkg/al_services/alsvc_beach/event_json.json")
        os.remove("opt/al/pkg/al_services/alsvc_beach/indicators.json")
        os.remove("opt/al/pkg/al_services/alsvc_beach/indicators.yaml")

    def parse_alerts(self, alerts):
        res = Result()
        line_count = 0
        newline_count = 0
        content = ""
        yml_indicator = ""
        xml_hits = ResultSection(title_text='xml Malware Indicator Match')

        if os.stat("/opt/al/pkg/al_services/alsvc_beach/alerts_generated.txt"
                   ).st_size == 0:
            # Result file is empty, nothing to report
            return res

        for line in alerts:
            # Otherwise we iterate through each line to read the required information
            if line != "\n":
                line_count += 1
                if line_count == 1:
                    yml_indicator = line
                else:
                    content += line + "\n"
            elif line_count == 0:
                newline_count += 1
            else:
                newline_count = 0
                xml_hits.add_section(
                    XmlResultObject(yml_indicator, content, SCORE.VHIGH))
                content = ""
                line_count = 0

        res.add_result(xml_hits)
        return res
Example #29
0
class FSecure(ServiceBase):
    SERVICE_CATEGORY = 'Antivirus'
    SERVICE_DESCRIPTION = "This services wraps FSecure ICAP Proxy."
    SERVICE_ENABLED = True
    SERVICE_REVISION = ServiceBase.parse_revision('$Id: f6948d80a574291a3d3045ec11dad8dbbfef343a $')
    SERVICE_VERSION = '1'
    SERVICE_DEFAULT_CONFIG = {
        "ICAP_HOST": "localhost",
        "ICAP_PORT": 1344,
    }
    SERVICE_CPU_CORES = 0.3
    SERVICE_RAM_MB = 128

    def __init__(self, cfg=None):
        super(FSecure, self).__init__(cfg)
        self.icap_host = None
        self.icap_port = None
        self.fsecure_version = None
        self.icap = None
        self._av_info = ''

    def execute(self, request):
        payload = request.get()
        icap_result = self.icap.scan_data(payload)
        request.result = self.icap_to_alresult(icap_result)
        request.task.report_service_context(self._av_info)

        # if deepscan request include the ICAP HTTP in debug info.
        if request.task.deep_scan:
            request.task.service_debug_info = icap_result

    def get_tool_version(self):
        return self._av_info

    def icap_to_alresult(self, icap_result):
        infection_type = ''
        infection_name = ''
        result_lines = icap_result.strip().splitlines()
        if not len(result_lines) > 3:
            raise Exception('Invalid result from FSecure ICAP server: %s' % str(icap_result))

        x_scan_result = 'X-FSecure-Scan-Result:'
        x_infection_name = 'X-FSecure-Infection-Name:'
        istag = 'ISTag:'

        for line in result_lines:
            if line.startswith(x_scan_result):
                infection_type = line[len(x_scan_result):].strip()
            elif line.startswith(x_infection_name):
                infection_name = line[len(x_infection_name):].strip().strip('"')
            elif line.startswith(istag):
                version_info = line[len(istag):].strip()
                self._set_av_ver(version_info)

        result = Result()
        if infection_name:
            result.add_section(VirusHitSection(infection_name, SCORE.SURE, detection_type=infection_type))
            result.append_tag(VirusHitTag(infection_name))
            
        return result

    def _set_av_ver(self, dbver):
        self._av_info = 'FSecure Internet Linux 5. [%s]' % dbver.strip('"')

    def start(self):
        self.icap_host = self.cfg.get('ICAP_HOST')
        self.icap_port = int(self.cfg.get('ICAP_PORT'))
        self.icap = FSecureIcapClient(self.icap_host, self.icap_port)
        self._set_av_ver(self.icap.get_service_version())
Example #30
0
class VirusTotalDynamic(ServiceBase):
    SERVICE_CATEGORY = "External"
    SERVICE_DESCRIPTION = "This service submits files/URLs to VirusTotal for analysis."
    SERVICE_ENABLED = False
    SERVICE_REVISION = ServiceBase.parse_revision('$Id: 9e6fa54878d0adbadeed565a3450f7d8d7f1bbe9 $')
    SERVICE_STAGE = "CORE"
    SERVICE_TIMEOUT = 600
    SERVICE_IS_EXTERNAL = True
    SERVICE_DEFAULT_CONFIG = {
        'private_api': False,
        'API_KEY': '',
        'BASE_URL': 'https://www.virustotal.com/vtapi/v2/'
    }

    def __init__(self, cfg=None):
        super(VirusTotalDynamic, self).__init__(cfg)
        self.api_key = self.cfg.get('API_KEY')
        self.private_api = self.cfg.get('private_api')

    # noinspection PyGlobalUndefined,PyUnresolvedReferences
    def import_service_deps(self):
        global requests
        import requests

    def start(self):
        self.log.debug("VirusTotal service started")

    def execute(self, request):
        filename = request.download()
        response = self.scan_file(request, filename)
        result = self.parse_results(response)
        if self.private_api:
            # Call some private API functions
            pass

        request.result = result

    # noinspection PyUnusedLocal
    def scan_file(self, request, filename):

        # Let's scan the file
        url = self.cfg.get('BASE_URL') + "file/scan"
        try:
            f = open(filename, "rb")
        except:
            print "Could not open file"
            return {}

        files = {"file": f}
        values = {"apikey": self.api_key}
        r = requests.post(url, values, files=files)
        try:
            json_response = r.json()
        except ValueError:
            self.log.warn("Invalid response from VirusTotal, "
                          "HTTP code: %s, "
                          "content length: %i, "
                          "headers: %s" % (r.status_code, len(r.content), repr(r.headers)))
            if len(r.content) == 0:
                raise RecoverableError("VirusTotal didn't return a JSON object, HTTP code %s" % r.status_code)
            raise

        # File has been scanned, if response is successful, let's get the response

        if json_response is not None and json_response.get('response_code') <= 0:
            return json_response

        sha256 = json_response.get('sha256', 0)
        if not sha256:
            return json_response

        # Have to wait for the file scan to be available -- might take a few minutes...
        while True:
            url = self.cfg.get('BASE_URL') + "file/report"
            params = {'apikey': self.api_key, 'resource': sha256}
            r = requests.post(url, params)
            json_response = r.json()
            if 'scans' in json_response or json_response.get('response_code') <= 0:
                break
            # Limit is 4 public API calls per minute, make sure we don't exceed quota
            # time.sleep(20)
            time.sleep(20)

        return json_response

    def parse_results(self, response):
        res = Result()
        response = response.get('results', response)

        if response is not None and response.get('response_code') == 204:
            message = "You exceeded the public API request rate limit (4 requests of any nature per minute)"
            raise VTException(message)
        elif response is not None and response.get('response_code') == 203:
            message = "You tried to perform calls to functions for which you require a Private API key."
            raise VTException(message)
        elif response is not None and response.get('response_code') == 1:
            av_hits = ResultSection(title_text='Anti-Virus Detections')
            url_section = ResultSection(
                SCORE.NULL,
                'Virus total report permalink',
                self.SERVICE_CLASSIFICATION,
                body_format=TEXT_FORMAT.URL,
                body=json.dumps({"url": response.get('permalink')}))
            res.add_section(url_section)

            scans = response.get('scans', response)
            av_hits.add_line('Found %d AV hit(s) from %d scans.' % (response.get('positives'), response.get('total')))
            for majorkey, subdict in sorted(scans.iteritems()):
                if subdict['detected']:
                    virus_name = subdict['result']
                    res.append_tag(VirusHitTag(virus_name, context="scanner:%s" % majorkey))
                    av_hits.add_section(AvHitSection(majorkey, virus_name, SCORE.SURE))
            res.add_result(av_hits)

        return res