예제 #1
0
def is_whitelisted(notice):  # df node def
    reason, hit = get_whitelist_verdict(whitelist, notice)
    hit = {x: dotdump(safe_str(y)) for x, y in hit.iteritems()}

    sha256 = notice.get('sha256')

    if not reason:
        with whitelisted_lock:
            reason = whitelisted.get(sha256, None)
            if reason:
                hit = 'cached'

    if reason:
        if hit != 'cached':
            with whitelisted_lock:
                whitelisted[sha256] = reason

        notice.set(
            'failure', "Whitelisting due to reason %s (%s)" %
            (dotdump(safe_str(reason)), hit))
        dropq.push(notice.raw)  # df push push

        ingester_counts.increment('ingest.whitelisted')
        whitelister_counts.increment('whitelist.' + reason)

    return reason
예제 #2
0
def process_signatures(sigs, al_result, classification):
    log.debug("Processing signature results.")
    if len(sigs) > 0:
        sigs_score = 0
        sigs_res = ResultSection(title_text="Signatures", classification=classification)
        skipped_sigs = ['dead_host', 'has_authenticode', 'network_icmp', 'network_http', 'allocates_rwx', 'has_pdb']
        # Severity is 0-5ish with 0 being least severe.
        for sig in sigs:
            severity = float(sig.get('severity', 0))
            actor = sig.get('actor', '')
            sig_classification = sig.get('classification', CLASSIFICATION.UNRESTRICTED)
            sig_score = int(severity * 100)
            sig_name = sig.get('name', 'unknown')
            sig_categories = sig.get('categories', [])
            sig_families = sig.get('families', [])

            # Skipped Signature Checks:
            if sig_name in skipped_sigs:
                continue

            sigs_score += sig_score

            sigs_res.add_line(sig_name + ' [' + str(sig_score) + ']')
            sigs_res.add_line('\tDescription: ' + sig.get('description'))
            if len(sig_categories) > 0:
                sigs_res.add_line('\tCategories: ' + ','.join([safe_str(x) for x in sig_categories]))
                for category in sig_categories:
                    al_result.add_tag(tag_type=TAG_TYPE.DYNAMIC_SIGNATURE_CATEGORY,
                                      value=category,
                                      weight=TAG_WEIGHT.HIGH,
                                      classification=sig_classification)

            if len(sig_families) > 0:
                sigs_res.add_line('\tFamilies: ' + ','.join([safe_str(x) for x in sig_families]))
                for family in sig_families:
                    al_result.add_tag(tag_type=TAG_TYPE.DYNAMIC_SIGNATURE_FAMILY,
                                      value=family,
                                      weight=TAG_WEIGHT.VHIGH,
                                      classification=sig_classification)

            if sig_name != 'unknown' and sig_name != '':
                al_result.add_tag(tag_type=TAG_TYPE.DYNAMIC_SIGNATURE_NAME,
                                  value=sig_name,
                                  weight=TAG_WEIGHT.VHIGH,
                                  classification=sig_classification)

            sigs_res.add_line('')
            if actor and actor != '':
                al_result.add_tag(tag_type=TAG_TYPE.THREAT_ACTOR,
                                  value=actor,
                                  weight=TAG_WEIGHT.VHIGH,
                                  classification=sig_classification)

        # We don't want to get carried away..
        sigs_res.score = min(1000, sigs_score)
        al_result.add_section(sigs_res)
예제 #3
0
    def __init__(
        self,
        score=0,
        title_text=None,
        classification=Classification.UNRESTRICTED,
        parent=None,
        body='',
        body_format=None,
        tags=None,
    ):
        super(ResultSection, self).__init__()
        self.parent = parent
        self.score = score
        self.classification = classification
        self.body = body
        self.body_format = body_format
        self.links = []
        self.subsections = []
        self.tags = tags or []
        self.depth = 0
        self.finalized = False
        self.truncated = False
        if isinstance(title_text, list):
            title_text = ''.join(title_text)
        self.title_text = safe_str(title_text)

        if parent is not None:
            parent.add_section(self)

        self._warn_on_validation_errors()
예제 #4
0
def download_file(srl, **kwargs):
    """
    Download the file using the default encoding method. This api
    will force the browser in download mode.
    
    Variables: 
    srl       => A resource locator for the file (sha256)
    
    Arguments: 
    name      => Name of the file to download
    format    => Format to encode the file in
    password  => Password of the password protected zip
    
    Data Block:
    None

    API call example:
    /api/v3/file/download/123456...654321/

    Result example:
    <THE FILE BINARY ENCODED IN SPECIFIED FORMAT>
    """
    user = kwargs['user']
    file_obj = STORAGE.get_file(srl)

    if not file_obj:
        return make_api_response({}, "The file was not found in the system.", 404)

    if user and Classification.is_accessible(user['classification'], file_obj['classification']):
        params = load_user_settings(user)
    
        name = request.args.get('name', srl)
        if name == "": 
            name = srl
        else:
            name = basename(name)
        name = safe_str(name)

        file_format = request.args.get('format', params['download_encoding'])
        if file_format == "raw" and not ALLOW_RAW_DOWNLOADS:
            return make_api_response({}, "RAW file download has been disabled by administrators.", 403)

        password = request.args.get('password', None)
        
        with forge.get_filestore() as f_transport:
            data = f_transport.get(srl)

        if not data:
            return make_api_response({}, "The file was not found in the system.", 404)

        data, error, already_encoded = encode_file(data, file_format, name, password)
        if error:
            return make_api_response({}, error['text'], error['code'])

        if file_format != "raw" and not already_encoded:
            name = "%s.%s" % (name, file_format)
    
        return make_file_response(data, name, len(data))
    else:
        return make_api_response({}, "You are not allowed to download this file.", 403)
예제 #5
0
def add_access_control(user):
    user.update(
        Classification.get_access_control_parts(user.get(
            "classification", Classification.UNRESTRICTED),
                                                user_classification=True))

    gl2_query = " OR ".join(
        ['__access_grp2__:__EMPTY__'] +
        ['__access_grp2__:"%s"' % x for x in user["__access_grp2__"]])
    gl2_query = "(%s) AND " % gl2_query

    gl1_query = " OR ".join(
        ['__access_grp1__:__EMPTY__'] +
        ['__access_grp1__:"%s"' % x for x in user["__access_grp1__"]])
    gl1_query = "(%s) AND " % gl1_query

    req = list(
        set(Classification.get_access_control_req()).difference(
            set(user["__access_req__"])))
    req_query = " OR ".join(['__access_req__:"%s"' % r for r in req])
    if req_query:
        req_query = "-(%s) AND " % req_query

    lvl_query = "__access_lvl__:[0 TO %s]" % user["__access_lvl__"]

    query = "".join([gl2_query, gl1_query, req_query, lvl_query])
    user['access_control'] = safe_str(query)
예제 #6
0
def process_clsid(key, result_map):
    clsid_map = result_map.get('clsids', defaultdict(str))
    for uuid in set(UUID_RE.findall(safe_str(key))):
        # Check if we have a matching CLSID
        uuid = uuid.upper()
        name = clsids.get(uuid)
        if name:
            clsid_map[name] = uuid
    result_map['clsids'] = clsid_map
예제 #7
0
    def submit_existing_file(self, path, **kw):
        request = {
            0: {
                'path': safe_str(path),
                'sha256': kw['sha256'],
            }
        }

        return self.submit_requests(request, **kw)
예제 #8
0
    def extract_ace(self, request, local, encoding):
        if encoding != 'ace':
            return [], False

        path = os.path.join(self.working_directory, "ace")
        try:
            os.mkdir(path)
        except OSError:
            pass

        # noinspection PyBroadException
        try:
            with tempfile.NamedTemporaryFile(suffix=".ace", dir=path) as tf:
                # unace needs the .ace file extension
                with open(local, "rb") as fh:
                    tf.write(fh.read())
                    tf.flush()

                proc = self.st.run(subprocess.Popen(
                    '/usr/bin/unace e -y %s' % tf.name,
                    stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                    stderr=subprocess.STDOUT, cwd=path, env=os.environ, shell=True,
                    preexec_fn=set_death_signal()))

                # Note, proc.communicate() hangs
                stdoutput = proc.stdout.read()
                while True:
                    stdoutput += proc.stdout.read()
                    if proc.poll() is not None:
                        break
                    time.sleep(0.01)

            if stdoutput:
                extracted_children = []
                if "extracted:" in stdoutput:
                    for line in stdoutput.splitlines():
                        line = line.strip()
                        m = re.match("extracting (.+?)[ ]*(CRC OK)?$", line)
                        if not m:
                            continue

                        filename = m.group(1)
                        filepath = os.path.join(path, filename)
                        if os.path.isdir(filepath):
                            continue
                        else:
                            extracted_children.append([filepath, encoding, safe_str(filename)])

                return extracted_children, False

        except ExtractIgnored:
            raise
        except Exception:
            self.log.exception('While extracting %s with unace', request.srl)

        return [], False
예제 #9
0
    def process_ole_stream(self, ole, streams_section):
        listdir = ole.listdir()
        streams = []
        for dir_entry in listdir:
            streams.append('/'.join(dir_entry))

        if "\x05HwpSummaryInformation" in streams:
            decompress = True
        else:
            decompress = False

        decompress_macros = []

        for stream in streams:
            self.log.debug("Extracting stream: {}".format(stream))
            data = ole.openstream(stream).getvalue()
            try:

                if "Ole10Native" in stream:
                    if self.process_ole10native(stream, data, streams_section) is True:
                        continue

                elif "PowerPoint Document" in stream:
                    if self.process_powerpoint_stream(data, streams_section) is True:
                        continue

                if decompress:
                    try:
                        data = zlib.decompress(data, -15)
                    except zlib.error:
                        pass

                streams_section.add_line(safe_str(stream))
                # Only write all streams with deep scan.
                stream_name = '{}.ole_stream'.format(hashlib.sha256(data).hexdigest())
                if self.request.deep_scan:
                    stream_path = os.path.join(self.working_directory, stream_name)
                    with open(stream_path, 'w') as fh:
                        fh.write(data)
                    self.request.add_extracted(stream_path, "Embedded OLE Stream.", stream)
                    if decompress and (stream.endswith(".ps") or stream.startswith("Scripts/")):
                        decompress_macros.append(data)

            except Exception as e:
                self.log.error("Error adding extracted stream {}: {}".format(stream, e))
                continue
        if decompress_macros:
            macros = "\n".join(decompress_macros)
            stream_name = '{}.macros'.format(hashlib.sha256(macros).hexdigest())
            stream_path = os.path.join(self.working_directory, stream_name)
            with open(stream_path, 'w') as fh:
                fh.write(macros)

            self.request.add_extracted(stream_path, "Combined macros.", "all_macros.ps")
            return True
        return False
예제 #10
0
 def add_line(self, text, _deprecated_format=None):
     # add_line with a list should join without newline seperator.
     # use add_lines if list should be split one element per line.
     if isinstance(text, list):
         text = ''.join(text)
     textstr = safe_str(text)
     if len(self.body) != 0:
         # noinspection PyAugmentAssignment
         textstr = '\n' + textstr
     self.body = self.body + textstr
예제 #11
0
    def add_tag(self,
                tag_type,
                value,
                weight,
                usage=None,
                classification=Classification.UNRESTRICTED,
                context=None):
        tag = {
            'type': tag_type,
            'value': safe_str(value),
            'weight': weight,
            'usage': usage or self.default_usage,
            'classification': classification,
            'context': context
        }

        for existing_tag in self.tags:
            if existing_tag['type'] == tag['type'] and \
               existing_tag['value'] == tag['value']:
                return
        self.tags.append(tag)

        if not TAG_TYPE.contains_value(tag_type):
            tb = traceback.format_stack(limit=4)
            log.warn("Invalid tag_type: %s -- %s", tag_type, tb)
        if len(value) <= 0 or len(value) >= 2048:
            tb = traceback.format_stack(limit=4)
            log.warn("invalid tag_value: %s:'%s' -- %s", tag_type,
                     safe_str(value), tb)
        if not (isinstance(weight, int) and -1000 < weight < 1000):
            log.warn("invalid weight: %s", weight)
        if usage and not TAG_USAGE.contains_value(usage):
            log.warn("invalid tag usage: %s", usage)
        if not Classification.is_valid(classification):
            tb = traceback.format_stack(limit=4)
            log.warn("invalid classification:%s.\n%s",
                     str(self.classification), str(tb))
        if context:
            if not Context.verify_context(tag_type, context):
                log.warn("Invalid tag_type: %s and context: %s combination" %
                         (tag_type, context))
예제 #12
0
 def report_machine_info(self, machine_name):
     try:
         self.log.debug("Querying machine info for %s" % machine_name)
         machine = self.cuckoo_query_machine_info(machine_name)
         machine_section = ResultSection(
             title_text='Machine Information',
             classification=self.SERVICE_CLASSIFICATION)
         machine_section.add_line('ID: ' + str(machine.get('id')))
         machine_section.add_line('Name: ' + str(machine.get('name')))
         machine_section.add_line('Label: ' + str(machine.get('label')))
         machine_section.add_line('Platform: ' +
                                  str(machine.get('platform')))
         machine_section.add_line('Tags:')
         for tag in machine.get('tags', []):
             machine_section.add_line('\t ' +
                                      safe_str(tag).replace('_', ' '))
         self.file_res.add_section(machine_section)
         return str(machine.get('ip', ""))
     except Exception as e:
         self.log.error(
             'Unable to retrieve machine information for %s: %s' %
             (machine_name, safe_str(e)))
예제 #13
0
    def extract_tnef(self, _, file_path, encoding):
        children = []

        if encoding != 'tnef':
            return children, False

        # noinspection PyBroadException
        try:
            # noinspection PyUnresolvedReferences
            from tnefparse import tnef
            tnef_logger = logging.getLogger("tnef-decode")
            tnef_logger.setLevel(60)  # This completely turns off the TNEF logger

            count = 0
            for a in tnef.TNEF(open(file_path).read()).attachments:
                # This may not exist so try to access it and deal the
                # possible AttributeError, by skipping this entry as
                # there is no point if there is no data.
                try:
                    data = a.data
                except AttributeError:
                    continue

                count += 1

                # This may not exist either but long_filename still
                # seems to return so deal with the AttributeError
                # here rather than blowing up.
                try:
                    name = a.long_filename() or a.name
                    if not name:
                        continue

                    name = safe_str(name)
                except AttributeError:
                    name = 'unknown_tnef_%d' % count

                if not name:
                    continue

                path = os.path.join(self.working_directory, str(count))
                with open(path, 'w') as f:
                    f.write(data)

                children.append([path, encoding, name])
        except ImportError:
            self.log.exception("Import error: tnefparse library not installed:")
        except Exception:
            self.log.exception("Error extracting from tnef file:")

        return children, False
예제 #14
0
    def cuckoo_submit(self, file_content):
        try:
            """ Submits a new file to Cuckoo for analysis """
            task_id = self.cuckoo_submit_file(file_content)
            self.log.debug("Submitted file. Task id: %s.", task_id)
            if not task_id:
                err_msg = "Failed to get task for submitted file."
                self.cuckoo_task.errors.append(err_msg)
                self.log.error(err_msg)
                return
            else:
                self.cuckoo_task.id = task_id
        except Exception as e:
            err_msg = "Error submitting to Cuckoo"
            self.cuckoo_task.errors.append('%s: %s' % (err_msg, safe_str(e)))
            raise RecoverableError("Unable to submit to Cuckoo")

        self.log.debug("Submission succeeded. File: %s -- Task ID: %s" %
                       (self.cuckoo_task.file, self.cuckoo_task.id))

        # Quick sleep to avoid failing when the API can't get the task yet.
        for i in xrange(5):
            if self.check_stop():
                return
            time.sleep(1)
        try:
            status = self.cuckoo_poll_started()
        except RetryError:
            self.log.info("VM startup timed out")
            status = None

        if status == "started":
            try:
                status = self.cuckoo_poll_report()
            except RetryError:
                self.log.info("Max retries exceeded for report status.")
                status = None

        err_msg = None
        if status is None:
            err_msg = "Timed out while waiting for cuckoo to analyze file."
        elif status == "missing":
            err_msg = "Task went missing while waiting for cuckoo to analyze file."
        elif status == "stopped":
            err_msg = "Service has been stopped while waiting for cuckoo to analyze file."

        if err_msg:
            self.log.debug(err_msg)
            raise RecoverableError(err_msg)
예제 #15
0
 def _init_section_list(self):
     # Lazy init
     if self._sect_list is None:
         self._sect_list = []
         try:
             for section in self.pe_file.sections:
                 zero_idx = section.Name.find(chr(0x0))
                 if not zero_idx == -1:
                     sname = section.Name[:zero_idx]
                 else:
                     sname = safe_str(section.Name)
                 entropy = section.get_entropy()
                 self._sect_list.append((sname, section, section.get_hash_md5(), entropy))
         except AttributeError:
             pass
예제 #16
0
    def extract_libarchive(self, request, local, encoding):
        extracted_children = []

        try:
            # noinspection PyUnresolvedReferences
            from libarchive import Archive

            for file_encoding in ["utf8", "cp437"]:
                try:
                    with Archive(local, encoding=file_encoding) as archive:
                        count = 0

                        for entry in archive:
                            name = safe_str(entry.pathname)
                            if entry.isdir():
                                continue

                            count += 1
                            path = os.path.join(self.working_directory, str(count))

                            with open(path, 'w') as f:
                                archive.readpath(f)

                            if os.stat(path).st_size != entry.size:
                                raise RuntimeError("Extracted file size mismatch, archive is probably "
                                                   "password protected: %s" % name)

                            extracted_children.append([path, encoding, name])

                    break
                except RuntimeError:
                    extracted_children = []
                except UnicodeDecodeError:
                    extracted_children = []
                    self.log.debug("Archive is not using %s charset. Trying another one...", file_encoding)
                except Exception as e:
                    extracted_children = []
                    msg = str(e)
                    if msg.endswith("Unrecognized archive format."):
                        return extracted_children, False
                    elif msg == "Fatal error executing function, message is: None.":
                        return extracted_children, False
                    if request.tag != 'archive/cab':
                        self.log.exception('while extracting (%s) with libarchive', request.srl)
        except ImportError:
            self.log.exception("Import error: libarchive library not installed:")

        return extracted_children, False
예제 #17
0
def apply_overlay(module_name, overlay):
    from assemblyline.common.charset import safe_str
    if not overlay:
        return False

    import sys
    module = sys.modules[module_name]

    for k, v in overlay.iteritems():
        t = type(v)
        if t == unicode or t == str:
            v = safe_str(v)

        setattr(module, k, v)

    return True
예제 #18
0
 def _init_section_list(self):
     # Lazy init
     if self._sect_list is None:
         self._sect_list = []
         try:
             for section in self.pe_file.sections:
                 zero_idx = section.Name.find(chr(0x0))
                 if not zero_idx == -1:
                     sname = section.Name[:zero_idx]
                 else:
                     sname = safe_str(section.Name)
                 entropy = section.get_entropy()
                 self._sect_list.append(
                     (sname, section, section.get_hash_md5(), entropy))
         except AttributeError:
             pass
예제 #19
0
def get_digests_for_file(path,
                         blocksize=DEFAULT_BLOCKSIZE,
                         calculate_entropy=True,
                         on_first_block=lambda b, l: {}):
    """ Generate digests for file reading only 'blocksize bytes at a time."""
    bc = None
    if calculate_entropy:
        try:
            bc = entropy.BufferedCalculator()
        except:  # pylint: disable=W0702
            calculate_entropy = False

    result = {'path': safe_str(path)}

    md5 = hashlib.md5()
    sha1 = hashlib.sha1()
    sha256 = hashlib.sha256()
    size = 0

    with open(path, 'rb') as f:
        data = f.read(blocksize)
        length = len(data)

        if not size:
            result.update(on_first_block(data, length))

        while length > 0:
            if calculate_entropy:
                bc.update(data, length)
            md5.update(data)
            sha1.update(data)
            sha256.update(data)
            size += length

            data = f.read(blocksize)
            length = len(data)

    if calculate_entropy:
        result['entropy'] = bc.entropy()
    else:
        result['entropy'] = 0
    result['md5'] = md5.hexdigest()
    result['sha1'] = sha1.hexdigest()
    result['sha256'] = sha256.hexdigest()
    result['size'] = size

    return result
예제 #20
0
def make_file_response(data,
                       name,
                       size,
                       status_code=200,
                       content_type="application/octet-stream"):
    quota_user = flsk_session.pop("quota_user", None)
    quota_id = flsk_session.pop("quota_id", None)
    quota_set = flsk_session.pop("quota_set", False)
    if quota_user and quota_set:
        RATE_LIMITER.dec(quota_user, track_id=quota_id)
        RATE_LIMITER.dec("__global__", track_id=quota_id)

    response = make_response(data, status_code)
    response.headers["Content-Type"] = content_type
    response.headers["Content-Length"] = size
    response.headers[
        "Content-Disposition"] = 'attachment; filename="%s"' % safe_str(name)
    return response
예제 #21
0
def process_debug(debug, al_result, classification):
    failed = False
    if 'errors' in debug:
        error_res = ResultSection(title_text='Analysis Errors', classification=classification)
        for error in debug['errors']:
            err_str = str(error)
            err_str = err_str.lower()
            if err_str is not None and len(err_str) > 0:
                # Timeouts - ok, just means the process never exited
                # Start Error - probably a corrupt file..
                # Initialization Error - restart the docker container
                error_res.add_line(error)
                if "analysis hit the critical timeout" not in err_str and \
                    "Unable to execute the initial process" not in err_str:
                    raise RecoverableError("An error prevented cuckoo from "
                                           "generating complete results: %s" % safe_str(error))
        if len(error_res.body) > 0:
            al_result.add_section(error_res)
    return failed
예제 #22
0
    def _7zip_submit_extracted(self, request, lines, path, encoding):
        extract_pe_sections = request.get_param('extract_pe_sections')
        extracted_children = []

        for line in lines:
            if line.startswith("Extracting  "):
                filename = line.split("Extracting  ", 1)[1]

                if not extract_pe_sections and \
                        ((encoding.startswith("executable/windows") and
                          [f for f in self.FORBIDDEN_EXE if filename.startswith(f)]) or
                         (encoding.startswith("executable/linux")and filename in self.FORBIDDEN_ELF_EXE)):
                    raise ExtractIgnored("Detected extraction of forbidden PE/ELF file sections. "
                                         "No files will be extracted.")

                if os.path.isdir(path + "/" + filename):
                    continue
                else:
                    extracted_children.append([path + "/" + filename, encoding, safe_str(filename)])

        return extracted_children
예제 #23
0
 def __init__(self, result_as_xml_string):
     # There is a bug in McAfee XML output.
     # Occasionally the mcafee xml result with have non escaped utf-8 characters
     # in the embedded filename etc that will not parse. remove them.
     result_as_xml_string = safe_str(
         re.sub(self.BAD_XML_CHARREF, 'INV', result_as_xml_string))
     root = ElementTree.fromstring(
         result_as_xml_string)  # @UndefinedVariable
     if root.tag != ROOT_TAG:
         raise Exception('Unexpected root in XML result: %s.' % root.tag)
     element_parsers = {
         CHILD_PREAMBLE: self._add_preamble,
         CHILD_DATETIME: self._ignore_element,
         CHILD_TIME: self._add_duration,
         CHILD_OPTIONS: self._ignore_element,
         CHILD_FILE: self._add_file_result,
     }
     self.preamble = None
     self.file_results = []
     for child in root:
         element_parsers.get(child.tag,
                             self._handle_unexpected_element)(child)
예제 #24
0
def stream_file_response(reader, name, size, status_code=200):
    quota_user = flsk_session.pop("quota_user", None)
    quota_id = flsk_session.pop("quota_id", None)
    quota_set = flsk_session.pop("quota_set", False)
    if quota_user and quota_set:
        RATE_LIMITER.dec(quota_user, track_id=quota_id)
        RATE_LIMITER.dec("__global__", track_id=quota_id)

    chunk_size = 65535

    def generate():
        reader.seek(0)
        while True:
            data = reader.read(chunk_size)
            if not data:
                break
            yield data

    headers = {
        "Content-Type": 'application/octet-stream',
        "Content-Length": size,
        "Content-Disposition": 'attachment; filename="%s"' % safe_str(name)
    }
    return Response(generate(), status=status_code, headers=headers)
예제 #25
0
def fileinfo(path):
    path = safe_str(path)

    data = get_digests_for_file(path, on_first_block=ident)
    if data['mime'].lower() == 'application/cdfv2-corrupt':
        with open(path, 'r') as fh:
            buf = fh.read()
            buflen = len(buf)
            data.update(ident(buf, buflen))
    data['ssdeep'] = ssdeep_from_file(path) if ssdeep_from_file else ''

    if not int(data.get('size', -1)):
        data['tag'] = 'empty'
    elif data['tag'] == 'archive/zip' or data['tag'] == 'java/jar':
        data['tag'] = zip_ident(path)
    elif data['tag'] == 'unknown':
        data['tag'], _ = guess_language(path)
    elif data['tag'] == 'archive/cart':
        data['tag'] = cart_ident(path)
    elif data['tag'] == 'executable/windows/dos':
        # The default magic file misidentifies PE files with a munged DOS header
        data['tag'] = dos_ident(path)

    return data
예제 #26
0
    def _do_respmod(self, filename, data):
        encoded = self.chunk_encode(data)

        # ICAP RESPMOD req-hdr is the start of the original HTTP request.
        respmod_req_hdr = "GET /{FILENAME} HTTP/1.1\r\n\r\n".format(
            FILENAME=safe_str(filename))

        # ICAP RESPMOD res-hdr is the start of the HTTP response for above request.
        respmod_res_hdr = ("HTTP/1.1 200 OK\r\n"
                           "Transfer-Encoding: chunked\r\n\r\n")

        res_hdr_offset = len(respmod_req_hdr)
        res_bdy_offset = len(respmod_res_hdr) + res_hdr_offset

        # The ICAP RESPMOD header. Note:
        # res-hdr offset should match the start of the GET request above.
        # res-body offset should match the start of the response above.

        respmod_icap_hdr = (
            "RESPMOD icap://{HOST}:{PORT}/{SERVICE}{ACTION} ICAP/1.0\r\n"
            "Host:{HOST}:{PORT}\r\n"
            "Allow:204\r\n"
            "Encapsulated: req-hdr=0, res-hdr={RES_HDR}, res-body={RES_BODY}\r\n\r\n"
        ).format(HOST=self.host,
                 PORT=self.port,
                 SERVICE=self.service,
                 ACTION=self.action,
                 RES_HDR=res_hdr_offset,
                 RES_BODY=res_bdy_offset)

        sio = StringIO()
        sio.write(respmod_icap_hdr)
        sio.write(respmod_req_hdr)
        sio.write(respmod_res_hdr)
        sio.write(encoded)
        serialized_request = sio.getvalue()

        for i in xrange(self.MAX_RETRY):
            s = None
            try:
                s = socket.create_connection((self.host, self.port),
                                             timeout=10)
                s.sendall(serialized_request)
                response = temp_resp = s.recv(self.RESP_CHUNK_SIZE)
                while len(temp_resp) == self.RESP_CHUNK_SIZE:
                    temp_resp = s.recv(self.RESP_CHUNK_SIZE)
                    response += temp_resp

                return response
            except:
                if i == (self.MAX_RETRY - 1):
                    raise
            finally:
                if s is not None:
                    try:
                        # try to close the connection anyways
                        s.close()
                    except:
                        pass

        raise Exception("Icap server refused to respond.")
예제 #27
0
                                           status) != status and item.get(
                                               'status', status) != "TRIAGE":
                        labels.append("CONFLICT.%s" % item['status'])
                    msg = {
                        "action": "workflow",
                        "label": labels,
                        "priority": priority,
                        "status": status,
                        "event_id": item['_yz_rk'],
                        "queue_priority": QUEUE_PRIORITY
                    }
                    action_queue.push(QUEUE_PRIORITY, msg)
            except SearchException:
                log.warning("Invalid query '{query}' in "
                            "workflow filter '{name}' by '{user}'".format(
                                query=safe_str(aq.get('query', '')),
                                name=aq.get('name', 'unknown'),
                                user=aq.get('created_by', 'unknown')))
                continue

            if count:
                log.info(
                    "{count} Alert(s) were affected by this filter.".format(
                        count=count))
                if 'id' in aq:
                    ds.increment_workflow_counter(aq['id'], count)

    else:
        log.info(
            "Skipping all workflow filter since there where no alerts created in the specified time period."
        )
예제 #28
0
    def get_pe_info(self, lcid):
        """Dumps the PE header as Results in the FileResult."""

        # PE Header
        pe_header_res = ResultSection(SCORE['NULL'], "PE: HEADER")

        # PE Header: Header Info
        pe_header_info_res = ResultSection(SCORE.NULL,
                                           "[HEADER INFO]",
                                           parent=pe_header_res)
        pe_header_info_res.add_line(
            "Entry point address: 0x%08X" %
            self.pe_file.OPTIONAL_HEADER.AddressOfEntryPoint)
        pe_header_info_res.add_line(
            "Linker Version: %02d.%02d" %
            (self.pe_file.OPTIONAL_HEADER.MajorLinkerVersion,
             self.pe_file.OPTIONAL_HEADER.MinorLinkerVersion))
        pe_header_info_res.add_line(
            "OS Version: %02d.%02d" %
            (self.pe_file.OPTIONAL_HEADER.MajorOperatingSystemVersion,
             self.pe_file.OPTIONAL_HEADER.MinorOperatingSystemVersion))
        pe_header_info_res.add_line([
            "Time Date Stamp: %s (" %
            time.ctime(self.pe_file.FILE_HEADER.TimeDateStamp),
            res_txt_tag(str(self.pe_file.FILE_HEADER.TimeDateStamp),
                        TAG_TYPE['PE_LINK_TIME_STAMP']), ")"
        ])
        try:
            pe_header_info_res.add_line(
                "Machine Type: %s (%s)" %
                (hex(self.pe_file.FILE_HEADER.Machine),
                 pefile.MACHINE_TYPE[self.pe_file.FILE_HEADER.Machine]))
        except KeyError:
            pass

        # PE Header: Rich Header
        # noinspection PyBroadException
        try:

            if self.pe_file.RICH_HEADER is not None:
                pe_rich_header_info = ResultSection(SCORE.NULL,
                                                    "[RICH HEADER INFO]",
                                                    parent=pe_header_res)
                values_list = self.pe_file.RICH_HEADER.values
                pe_rich_header_info.add_line("VC++ tools used:")
                for i in range(0, len(values_list) / 2):
                    line = "Tool Id: %3d Version: %6d Times used: %3d" % (
                        values_list[2 * i] >> 16, values_list[2 * i] & 0xFFFF,
                        values_list[2 * i + 1])
                    pe_rich_header_info.add_line(line)
        except:
            self.log.exception("Unable to parse PE Rich Header")

        # PE Header: Data Directories
        pe_dd_res = ResultSection(SCORE.NULL,
                                  "[DATA DIRECTORY]",
                                  parent=pe_header_res)
        for data_directory in self.pe_file.OPTIONAL_HEADER.DATA_DIRECTORY:
            if data_directory.Size or data_directory.VirtualAddress:
                pe_dd_res.add_line(
                    "%s - va: 0x%08X - size: 0x%08X" %
                    (data_directory.name[len("IMAGE_DIRECTORY_ENTRY_"):],
                     data_directory.VirtualAddress, data_directory.Size))

        # PE Header: Sections
        pe_sec_res = ResultSection(SCORE.NULL,
                                   "[SECTIONS]",
                                   parent=pe_header_res)

        self._init_section_list()

        try:
            for (sname, section, sec_md5, sec_entropy) in self._sect_list:
                txt = [
                    sname,
                    " - Virtual: 0x%08X (0x%08X bytes)"
                    " - Physical: 0x%08X (0x%08X bytes) - " %
                    (section.VirtualAddress, section.Misc_VirtualSize,
                     section.PointerToRawData, section.SizeOfRawData), "hash:",
                    res_txt_tag(sec_md5, TAG_TYPE['PE_SECTION_HASH']),
                    " - entropy:%f (min:0.0, Max=8.0)" % sec_entropy
                ]
                # add a search tag for the Section Hash
                make_tag(self.file_res,
                         'PE_SECTION_HASH',
                         sec_md5,
                         'HIGH',
                         usage='CORRELATION')
                pe_sec_res.add_line(txt)

        except AttributeError:
            pass

        self.file_res.add_section(pe_header_res)

        # debug
        try:
            if self.pe_file.DebugTimeDateStamp:
                pe_debug_res = ResultSection(SCORE['NULL'], "PE: DEBUG")
                self.file_res.add_section(pe_debug_res)

                pe_debug_res.add_line(
                    "Time Date Stamp: %s" %
                    time.ctime(self.pe_file.DebugTimeDateStamp))

                # When it is a unicode, we know we are coming from RSDS which is UTF-8
                # otherwise, we come from NB10 and we need to guess the charset.
                if type(self.pe_file.pdb_filename) != unicode:
                    char_enc_guessed = translate_str(self.pe_file.pdb_filename)
                    pdb_filename = char_enc_guessed['converted']
                else:
                    char_enc_guessed = {'confidence': 1.0, 'encoding': 'utf-8'}
                    pdb_filename = self.pe_file.pdb_filename

                pe_debug_res.add_line([
                    "PDB: '",
                    res_txt_tag_charset(pdb_filename,
                                        TAG_TYPE['PE_PDB_FILENAME'],
                                        char_enc_guessed['encoding'],
                                        char_enc_guessed['confidence']), "'"
                ])

                # self.log.debug(u"\tPDB: %s" % pdb_filename)
        except AttributeError:
            pass

        # imports
        try:
            if hasattr(self.pe_file, 'DIRECTORY_ENTRY_IMPORT') and len(
                    self.pe_file.DIRECTORY_ENTRY_IMPORT) > 0:
                pe_import_res = ResultSection(SCORE['NULL'], "PE: IMPORTS")
                self.file_res.add_section(pe_import_res)

                for entry in self.pe_file.DIRECTORY_ENTRY_IMPORT:
                    pe_import_dll_res = ResultSection(SCORE.NULL,
                                                      "[%s]" % entry.dll,
                                                      parent=pe_import_res)
                    first_element = True
                    line = StringIO()
                    for imp in entry.imports:
                        if first_element:
                            first_element = False
                        else:
                            line.write(", ")

                        if imp.name is None:
                            line.write(str(imp.ordinal))
                        else:
                            line.write(imp.name)

                    pe_import_dll_res.add_line(line.getvalue())

            else:
                pe_import_res = ResultSection(SCORE['NULL'],
                                              "PE: NO IMPORTS DETECTED ")
                self.file_res.add_section(pe_import_res)

        except AttributeError:
            pass

        # exports
        try:
            if self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp is not None:
                pe_export_res = ResultSection(SCORE['NULL'], "PE: EXPORTS")
                self.file_res.add_section(pe_export_res)

                # noinspection PyBroadException
                try:
                    pe_export_res.add_line([
                        "Module Name: ",
                        res_txt_tag(safe_str(self.pe_file.ModuleName),
                                    TAG_TYPE['PE_EXPORT_MODULE_NAME'])
                    ])
                except:
                    pass

                if self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp == 0:
                    pe_export_res.add_line("Time Date Stamp: 0")
                else:
                    pe_export_res.add_line(
                        "Time Date Stamp: %s" %
                        time.ctime(self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.
                                   TimeDateStamp))

                first_element = True
                txt = []
                for exp in self.pe_file.DIRECTORY_ENTRY_EXPORT.symbols:
                    if first_element:
                        first_element = False
                    else:
                        txt.append(", ")

                    txt.append(str(exp.ordinal))
                    if exp.name is not None:
                        txt.append(": ")
                        txt.append(
                            res_txt_tag(exp.name,
                                        TAG_TYPE['PE_EXPORT_FCT_NAME']))

                pe_export_res.add_line(txt)
        except AttributeError:
            pass

        # resources
        try:
            if len(self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries) > 0:
                pe_resource_res = ResultSection(SCORE['NULL'], "PE: RESOURCES")
                self.file_res.add_section(pe_resource_res)

                for res_entry in self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries:
                    if res_entry.name is None:
                        # noinspection PyBroadException
                        try:
                            entry_name = pefile.RESOURCE_TYPE[res_entry.id]
                        except:
                            # pylint: disable-msg=W0702
                            # unfortunately this code was done before we started to really care about which
                            # exception to catch so, I actually don't really know at this point, would need to try
                            # out :-\
                            entry_name = "UNKNOWN"
                    else:
                        entry_name = res_entry.name

                    for name_id in res_entry.directory.entries:
                        if name_id.name is None:
                            name_id.name = hex(name_id.id)

                        for language in name_id.directory.entries:
                            try:
                                language_desc = lcid[language.id]
                            except KeyError:
                                language_desc = 'Unknown language'

                            line = []
                            if res_entry.name is None:
                                line.append(entry_name)
                            else:
                                line.append(
                                    res_txt_tag(str(entry_name),
                                                TAG_TYPE['PE_RESOURCE_NAME']))

                            line.append(" " + str(name_id.name) + " ")
                            line.append("0x")
                            # this will add a link to search in AL for the value
                            line.append(
                                res_txt_tag("%04X" % language.id,
                                            TAG_TYPE['PE_RESOURCE_LANGUAGE']))
                            line.append(" (%s)" % language_desc)

                            make_tag(self.file_res,
                                     'PE_RESOURCE_LANGUAGE',
                                     language.id,
                                     weight='LOW',
                                     usage='IDENTIFICATION')

                            # get the size of the resource
                            res_size = language.data.struct.Size
                            line.append(" Size: 0x%x" % res_size)

                            pe_resource_res.add_line(line)

        except AttributeError:
            pass

        # Resources-VersionInfo
        try:
            if len(self.pe_file.FileInfo) > 2:
                pass

            for file_info in self.pe_file.FileInfo:
                if file_info.name == "StringFileInfo":
                    if len(file_info.StringTable) > 0:
                        pe_resource_verinfo_res = ResultSection(
                            SCORE['NULL'], "PE: RESOURCES-VersionInfo")
                        self.file_res.add_section(pe_resource_verinfo_res)

                        try:
                            if "LangID" in file_info.StringTable[0].entries:
                                lang_id = file_info.StringTable[0].get(
                                    "LangID")
                                if not int(lang_id, 16) >> 16 == 0:
                                    txt = ('LangId: ' + lang_id + " (" +
                                           lcid[int(lang_id, 16) >> 16] + ")")
                                    pe_resource_verinfo_res.add_line(txt)
                                else:
                                    txt = ('LangId: ' + lang_id + " (NEUTRAL)")
                                    pe_resource_verinfo_res.add_line(txt)
                        except (ValueError, KeyError):
                            txt = ('LangId: %s is invalid' % lang_id)
                            pe_resource_verinfo_res.add_line(txt)

                        for entry in file_info.StringTable[0].entries.items():
                            txt = ['%s: ' % entry[0]]

                            if entry[0] == 'OriginalFilename':
                                txt.append(
                                    res_txt_tag(
                                        entry[1], TAG_TYPE[
                                            'PE_VERSION_INFO_ORIGINAL_FILENAME']
                                    ))
                            elif entry[0] == 'FileDescription':
                                txt.append(
                                    res_txt_tag(
                                        entry[1], TAG_TYPE[
                                            'PE_VERSION_INFO_FILE_DESCRIPTION']
                                    ))
                            else:
                                txt.append(entry[1])

                            pe_resource_verinfo_res.add_line(txt)

        except AttributeError:
            pass

        # Resources Strings
        try:
            BYTE = 1
            WORD = 2
            DWORD = 4

            DS_SETFONT = 0x40

            DIALOG_LEAD = DWORD + DWORD + WORD + WORD + WORD + WORD + WORD
            DIALOG_ITEM_LEAD = DWORD + DWORD + WORD + WORD + WORD + WORD + WORD

            DIALOGEX_LEAD = WORD + WORD + DWORD + DWORD + DWORD + WORD + WORD + WORD + WORD + WORD
            DIALOGEX_TRAIL = WORD + WORD + BYTE + BYTE
            DIALOGEX_ITEM_LEAD = DWORD + DWORD + DWORD + WORD + WORD + WORD + WORD + DWORD
            DIALOGEX_ITEM_TRAIL = WORD

            ITEM_TYPES = {
                0x80: "BUTTON",
                0x81: "EDIT",
                0x82: "STATIC",
                0x83: "LIST BOX",
                0x84: "SCROLL BAR",
                0x85: "COMBO BOX"
            }

            if hasattr(self.pe_file, 'DIRECTORY_ENTRY_RESOURCE'):
                for dir_type in self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries:
                    if dir_type.name is None:
                        if dir_type.id in pefile.RESOURCE_TYPE:
                            dir_type.name = pefile.RESOURCE_TYPE[dir_type.id]
                    for nameID in dir_type.directory.entries:
                        if nameID.name is None:
                            nameID.name = hex(nameID.id)
                        for language in nameID.directory.entries:
                            strings = []
                            if str(dir_type.name) == "RT_DIALOG":
                                data_rva = language.data.struct.OffsetToData
                                size = language.data.struct.Size
                                data = self.pe_file.get_memory_mapped_image(
                                )[data_rva:data_rva + size]

                                offset = 0
                                if self.pe_file.get_word_at_rva(data_rva + offset) == 0x1 \
                                        and self.pe_file.get_word_at_rva(data_rva + offset + WORD) == 0xFFFF:
                                    # Use Extended Dialog Parsing

                                    # Remove leading bytes
                                    offset += DIALOGEX_LEAD
                                    if data[offset:offset + 2] == "\xFF\xFF":
                                        offset += DWORD
                                    else:
                                        offset += WORD
                                    if data[offset:offset + 2] == "\xFF\xFF":
                                        offset += DWORD
                                    else:
                                        offset += WORD

                                    # Get window title
                                    window_title = self.pe_file.get_string_u_at_rva(
                                        data_rva + offset)
                                    if len(window_title) != 0:
                                        strings.append(
                                            ("DIALOG_TITLE", window_title))
                                    offset += len(window_title) * 2 + WORD

                                    # Remove trailing bytes
                                    offset += DIALOGEX_TRAIL
                                    offset += len(
                                        self.pe_file.get_string_u_at_rva(
                                            data_rva + offset)) * 2 + WORD

                                    # alignment adjustment
                                    if (offset % 4) != 0:
                                        offset += WORD

                                    while True:

                                        if offset >= size:
                                            break

                                        offset += DIALOGEX_ITEM_LEAD

                                        # Get item type
                                        if self.pe_file.get_word_at_rva(
                                                data_rva + offset) == 0xFFFF:
                                            offset += WORD
                                            item_type = ITEM_TYPES[
                                                self.pe_file.get_word_at_rva(
                                                    data_rva + offset)]
                                            offset += WORD
                                        else:
                                            item_type = self.pe_file.get_string_u_at_rva(
                                                data_rva + offset)
                                            offset += len(item_type) * 2 + WORD

                                        # Get item text
                                        item_text = self.pe_file.get_string_u_at_rva(
                                            data_rva + offset)
                                        if len(item_text) != 0:
                                            strings.append(
                                                (item_type, item_text))
                                        offset += len(item_text) * 2 + WORD

                                        extra_bytes = self.pe_file.get_word_at_rva(
                                            data_rva + offset)
                                        offset += extra_bytes + DIALOGEX_ITEM_TRAIL

                                        # Alignment adjustment
                                        if (offset % 4) != 0:
                                            offset += WORD

                                else:
                                    # TODO: Use Non extended Dialog Parsing
                                    # Remove leading bytes
                                    style = self.pe_file.get_word_at_rva(
                                        data_rva + offset)

                                    offset += DIALOG_LEAD
                                    if data[offset:offset + 2] == "\xFF\xFF":
                                        offset += DWORD
                                    else:
                                        offset += len(
                                            self.pe_file.get_string_u_at_rva(
                                                data_rva + offset)) * 2 + WORD
                                    if data[offset:offset + 2] == "\xFF\xFF":
                                        offset += DWORD
                                    else:
                                        offset += len(
                                            self.pe_file.get_string_u_at_rva(
                                                data_rva + offset)) * 2 + WORD

                                    # Get window title
                                    window_title = self.pe_file.get_string_u_at_rva(
                                        data_rva + offset)
                                    if len(window_title) != 0:
                                        strings.append(
                                            ("DIALOG_TITLE", window_title))
                                    offset += len(window_title) * 2 + WORD

                                    if (style & DS_SETFONT) != 0:
                                        offset += WORD
                                        offset += len(
                                            self.pe_file.get_string_u_at_rva(
                                                data_rva + offset)) * 2 + WORD

                                    # Alignment adjustment
                                    if (offset % 4) != 0:
                                        offset += WORD

                                    while True:

                                        if offset >= size:
                                            break

                                        offset += DIALOG_ITEM_LEAD

                                        # Get item type
                                        if self.pe_file.get_word_at_rva(
                                                data_rva + offset) == 0xFFFF:
                                            offset += WORD
                                            item_type = ITEM_TYPES[
                                                self.pe_file.get_word_at_rva(
                                                    data_rva + offset)]
                                            offset += WORD
                                        else:
                                            item_type = self.pe_file.get_string_u_at_rva(
                                                data_rva + offset)
                                            offset += len(item_type) * 2 + WORD

                                        # Get item text
                                        if self.pe_file.get_word_at_rva(
                                                data_rva + offset) == 0xFFFF:
                                            offset += DWORD
                                        else:
                                            item_text = self.pe_file.get_string_u_at_rva(
                                                data_rva + offset)
                                            if len(item_text) != 0:
                                                strings.append(
                                                    (item_type, item_text))
                                            offset += len(item_text) * 2 + WORD

                                        extra_bytes = self.pe_file.get_word_at_rva(
                                            data_rva + offset)
                                        offset += extra_bytes + WORD

                                        # Alignment adjustment
                                        if (offset % 4) != 0:
                                            offset += WORD

                            elif str(dir_type.name) == "RT_STRING":
                                data_rva = language.data.struct.OffsetToData
                                size = language.data.struct.Size
                                data = self.pe_file.get_memory_mapped_image(
                                )[data_rva:data_rva + size]
                                offset = 0
                                while True:
                                    if offset >= size:
                                        break

                                    ustr_length = self.pe_file.get_word_from_data(
                                        data[offset:offset + 2], 0)
                                    offset += 2

                                    if ustr_length == 0:
                                        continue

                                    ustr = self.pe_file.get_string_u_at_rva(
                                        data_rva + offset,
                                        max_length=ustr_length)
                                    offset += ustr_length * 2
                                    strings.append((None, ustr))

                            if len(strings) > 0:
                                success = False
                                try:
                                    comment = "%s (id:%s - lang_id:0x%04X [%s])" % (
                                        str(dir_type.name), str(nameID.name),
                                        language.id, lcid[language.id])
                                except KeyError:
                                    comment = "%s (id:%s - lang_id:0x%04X [Unknown language])" % (
                                        str(dir_type.name), str(
                                            nameID.name), language.id)
                                res = ResultSection(
                                    SCORE['NULL'],
                                    "PE: STRINGS - %s" % comment)
                                for idx in xrange(len(strings)):
                                    # noinspection PyBroadException
                                    try:
                                        tag_value = strings[idx][1]

                                        # The following line crash chardet if a
                                        # UPX packed file as packed the resources...
                                        chardet.detect(
                                            tag_value
                                        )  # TODO: Find a better way to do this

                                        tag_value = tag_value.replace(
                                            '\r', ' ').replace('\n', ' ')
                                        if strings[idx][0] is not None:
                                            res.add_line([
                                                strings[idx][0], ": ",
                                                res_txt_tag(
                                                    tag_value,
                                                    TAG_TYPE['FILE_STRING'])
                                            ])
                                        else:
                                            res.add_line(
                                                res_txt_tag(
                                                    tag_value,
                                                    TAG_TYPE['FILE_STRING']))

                                        make_tag(self.file_res,
                                                 'FILE_STRING',
                                                 tag_value,
                                                 weight='NULL',
                                                 usage='IDENTIFICATION')

                                        success = True
                                    except:
                                        pass
                                if success:
                                    self.file_res.add_section(res)
                else:
                    pass

        except AttributeError, e:
            self.log.debug("\t Error parsing output: " + repr(e))
예제 #29
0
    def get_pe_info(self, lcid):
        """Dumps the PE header as Results in the FileResult."""

        # PE Header
        pe_header_res = ResultSection(SCORE['NULL'], "PE: HEADER")

        # PE Header: Header Info
        pe_header_info_res = ResultSection(SCORE.NULL, "[HEADER INFO]", parent=pe_header_res)
        pe_header_info_res.add_line("Entry point address: 0x%08X" % self.pe_file.OPTIONAL_HEADER.AddressOfEntryPoint)
        pe_header_info_res.add_line("Linker Version: %02d.%02d" % (self.pe_file.OPTIONAL_HEADER.MajorLinkerVersion,
                                                                   self.pe_file.OPTIONAL_HEADER.MinorLinkerVersion))
        pe_header_info_res.add_line("OS Version: %02d.%02d" %
                                    (self.pe_file.OPTIONAL_HEADER.MajorOperatingSystemVersion,
                                     self.pe_file.OPTIONAL_HEADER.MinorOperatingSystemVersion))
        pe_header_info_res.add_line(["Time Date Stamp: %s (" % time.ctime(self.pe_file.FILE_HEADER.TimeDateStamp),
                                     res_txt_tag(str(self.pe_file.FILE_HEADER.TimeDateStamp),
                                                 TAG_TYPE['PE_LINK_TIME_STAMP']),
                                     ")"])
        try:
            pe_header_info_res.add_line("Machine Type: %s (%s)" % (
                hex(self.pe_file.FILE_HEADER.Machine), pefile.MACHINE_TYPE[self.pe_file.FILE_HEADER.Machine]))
        except KeyError:
            pass

        # PE Header: Rich Header
        # noinspection PyBroadException
        try:

            if self.pe_file.RICH_HEADER is not None:
                pe_rich_header_info = ResultSection(SCORE.NULL, "[RICH HEADER INFO]", parent=pe_header_res)
                values_list = self.pe_file.RICH_HEADER.values
                pe_rich_header_info.add_line("VC++ tools used:")
                for i in range(0, len(values_list) / 2):
                    line = "Tool Id: %3d Version: %6d Times used: %3d" % (
                        values_list[2 * i] >> 16, values_list[2 * i] & 0xFFFF, values_list[2 * i + 1])
                    pe_rich_header_info.add_line(line)
        except:
            self.log.exception("Unable to parse PE Rich Header")

        # PE Header: Data Directories
        pe_dd_res = ResultSection(SCORE.NULL, "[DATA DIRECTORY]", parent=pe_header_res)
        for data_directory in self.pe_file.OPTIONAL_HEADER.DATA_DIRECTORY:
            if data_directory.Size or data_directory.VirtualAddress:
                pe_dd_res.add_line("%s - va: 0x%08X - size: 0x%08X"
                                   % (data_directory.name[len("IMAGE_DIRECTORY_ENTRY_"):],
                                      data_directory.VirtualAddress, data_directory.Size))

        # PE Header: Sections
        pe_sec_res = ResultSection(SCORE.NULL, "[SECTIONS]", parent=pe_header_res)

        self._init_section_list()

        try:
            for (sname, section, sec_md5, sec_entropy) in self._sect_list:
                txt = [sname, " - Virtual: 0x%08X (0x%08X bytes)"
                              " - Physical: 0x%08X (0x%08X bytes) - " %
                       (section.VirtualAddress, section.Misc_VirtualSize,
                        section.PointerToRawData, section.SizeOfRawData), "hash:",
                       res_txt_tag(sec_md5, TAG_TYPE['PE_SECTION_HASH']),
                       " - entropy:%f (min:0.0, Max=8.0)" % sec_entropy]
                # add a search tag for the Section Hash
                make_tag(self.file_res, 'PE_SECTION_HASH', sec_md5, 'HIGH', usage='CORRELATION')
                pe_sec_res.add_line(txt)

        except AttributeError:
            pass

        self.file_res.add_section(pe_header_res)

        # debug
        try:
            if self.pe_file.DebugTimeDateStamp:
                pe_debug_res = ResultSection(SCORE['NULL'], "PE: DEBUG")
                self.file_res.add_section(pe_debug_res)

                pe_debug_res.add_line("Time Date Stamp: %s" % time.ctime(self.pe_file.DebugTimeDateStamp))

                # When it is a unicode, we know we are coming from RSDS which is UTF-8
                # otherwise, we come from NB10 and we need to guess the charset.
                if type(self.pe_file.pdb_filename) != unicode:
                    char_enc_guessed = translate_str(self.pe_file.pdb_filename)
                    pdb_filename = char_enc_guessed['converted']
                else:
                    char_enc_guessed = {'confidence': 1.0, 'encoding': 'utf-8'}
                    pdb_filename = self.pe_file.pdb_filename

                pe_debug_res.add_line(["PDB: '",
                                       res_txt_tag_charset(pdb_filename,
                                                           TAG_TYPE['PE_PDB_FILENAME'],
                                                           char_enc_guessed['encoding'],
                                                           char_enc_guessed['confidence']),
                                       "'"])

                # self.log.debug(u"\tPDB: %s" % pdb_filename)
        except AttributeError:
            pass

        # imports
        try:
            if hasattr(self.pe_file, 'DIRECTORY_ENTRY_IMPORT') and len(self.pe_file.DIRECTORY_ENTRY_IMPORT) > 0:
                pe_import_res = ResultSection(SCORE['NULL'], "PE: IMPORTS")
                self.file_res.add_section(pe_import_res)

                for entry in self.pe_file.DIRECTORY_ENTRY_IMPORT:
                    pe_import_dll_res = ResultSection(SCORE.NULL, "[%s]" % entry.dll, parent=pe_import_res)
                    first_element = True
                    line = StringIO()
                    for imp in entry.imports:
                        if first_element:
                            first_element = False
                        else:
                            line.write(", ")

                        if imp.name is None:
                            line.write(str(imp.ordinal))
                        else:
                            line.write(imp.name)

                    pe_import_dll_res.add_line(line.getvalue())

            else:
                pe_import_res = ResultSection(SCORE['NULL'], "PE: NO IMPORTS DETECTED ")
                self.file_res.add_section(pe_import_res)

        except AttributeError:
            pass

        # exports
        try:
            if self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp is not None:
                pe_export_res = ResultSection(SCORE['NULL'], "PE: EXPORTS")
                self.file_res.add_section(pe_export_res)

                # noinspection PyBroadException
                try:
                    pe_export_res.add_line(["Module Name: ",
                                            res_txt_tag(safe_str(self.pe_file.ModuleName),
                                                        TAG_TYPE['PE_EXPORT_MODULE_NAME'])])
                except:
                    pass

                if self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp == 0:
                    pe_export_res.add_line("Time Date Stamp: 0")
                else:
                    pe_export_res.add_line("Time Date Stamp: %s"
                                           % time.ctime(self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp))

                first_element = True
                txt = []
                for exp in self.pe_file.DIRECTORY_ENTRY_EXPORT.symbols:
                    if first_element:
                        first_element = False
                    else:
                        txt.append(", ")

                    txt.append(str(exp.ordinal))
                    if exp.name is not None:
                        txt.append(": ")
                        txt.append(res_txt_tag(exp.name, TAG_TYPE['PE_EXPORT_FCT_NAME']))

                pe_export_res.add_line(txt)
        except AttributeError:
            pass

        # resources
        try:
            if len(self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries) > 0:
                pe_resource_res = ResultSection(SCORE['NULL'], "PE: RESOURCES")
                self.file_res.add_section(pe_resource_res)

                for res_entry in self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries:
                    if res_entry.name is None:
                        # noinspection PyBroadException
                        try:
                            entry_name = pefile.RESOURCE_TYPE[res_entry.id]
                        except:
                            # pylint: disable-msg=W0702
                            # unfortunately this code was done before we started to really care about which
                            # exception to catch so, I actually don't really know at this point, would need to try
                            # out :-\
                            entry_name = "UNKNOWN"
                    else:
                        entry_name = res_entry.name

                    for name_id in res_entry.directory.entries:
                        if name_id.name is None:
                            name_id.name = hex(name_id.id)

                        for language in name_id.directory.entries:
                            try:
                                language_desc = lcid[language.id]
                            except KeyError:
                                language_desc = 'Unknown language'

                            line = []
                            if res_entry.name is None:
                                line.append(entry_name)
                            else:
                                line.append(res_txt_tag(str(entry_name), TAG_TYPE['PE_RESOURCE_NAME']))

                            line.append(" " + str(name_id.name) + " ")
                            line.append("0x")
                            # this will add a link to search in AL for the value
                            line.append(res_txt_tag("%04X" % language.id, TAG_TYPE['PE_RESOURCE_LANGUAGE']))
                            line.append(" (%s)" % language_desc)

                            make_tag(self.file_res, 'PE_RESOURCE_LANGUAGE', language.id,
                                     weight='LOW', usage='IDENTIFICATION')

                            # get the size of the resource
                            res_size = language.data.struct.Size
                            line.append(" Size: 0x%x" % res_size)

                            pe_resource_res.add_line(line)

        except AttributeError:
            pass

        # Resources-VersionInfo
        try:
            if len(self.pe_file.FileInfo) > 2:
                pass

            for file_info in self.pe_file.FileInfo:
                if file_info.name == "StringFileInfo":
                    if len(file_info.StringTable) > 0:
                        pe_resource_verinfo_res = ResultSection(SCORE['NULL'], "PE: RESOURCES-VersionInfo")
                        self.file_res.add_section(pe_resource_verinfo_res)

                        try:
                            if "LangID" in file_info.StringTable[0].entries:
                                lang_id = file_info.StringTable[0].get("LangID")
                                if not int(lang_id, 16) >> 16 == 0:
                                    txt = ('LangId: ' + lang_id + " (" + lcid[
                                        int(lang_id, 16) >> 16] + ")")
                                    pe_resource_verinfo_res.add_line(txt)
                                else:
                                    txt = ('LangId: ' + lang_id + " (NEUTRAL)")
                                    pe_resource_verinfo_res.add_line(txt)
                        except (ValueError, KeyError):
                            txt = ('LangId: %s is invalid' % lang_id)
                            pe_resource_verinfo_res.add_line(txt)

                        for entry in file_info.StringTable[0].entries.items():
                            txt = ['%s: ' % entry[0]]

                            if entry[0] == 'OriginalFilename':
                                txt.append(res_txt_tag(entry[1], TAG_TYPE['PE_VERSION_INFO_ORIGINAL_FILENAME']))
                            elif entry[0] == 'FileDescription':
                                txt.append(res_txt_tag(entry[1], TAG_TYPE['PE_VERSION_INFO_FILE_DESCRIPTION']))
                            else:
                                txt.append(entry[1])

                            pe_resource_verinfo_res.add_line(txt)

        except AttributeError:
            pass

        # Resources Strings
        try:
            BYTE = 1
            WORD = 2
            DWORD = 4

            DS_SETFONT = 0x40

            DIALOG_LEAD = DWORD + DWORD + WORD + WORD + WORD + WORD + WORD
            DIALOG_ITEM_LEAD = DWORD + DWORD + WORD + WORD + WORD + WORD + WORD

            DIALOGEX_LEAD = WORD + WORD + DWORD + DWORD + DWORD + WORD + WORD + WORD + WORD + WORD
            DIALOGEX_TRAIL = WORD + WORD + BYTE + BYTE
            DIALOGEX_ITEM_LEAD = DWORD + DWORD + DWORD + WORD + WORD + WORD + WORD + DWORD
            DIALOGEX_ITEM_TRAIL = WORD

            ITEM_TYPES = {0x80: "BUTTON", 0x81: "EDIT", 0x82: "STATIC", 0x83: "LIST BOX", 0x84: "SCROLL BAR",
                          0x85: "COMBO BOX"}

            if hasattr(self.pe_file, 'DIRECTORY_ENTRY_RESOURCE'):
                for dir_type in self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries:
                    if dir_type.name is None:
                        if dir_type.id in pefile.RESOURCE_TYPE:
                            dir_type.name = pefile.RESOURCE_TYPE[dir_type.id]
                    for nameID in dir_type.directory.entries:
                        if nameID.name is None:
                            nameID.name = hex(nameID.id)
                        for language in nameID.directory.entries:
                            strings = []
                            if str(dir_type.name) == "RT_DIALOG":
                                data_rva = language.data.struct.OffsetToData
                                size = language.data.struct.Size
                                data = self.pe_file.get_memory_mapped_image()[data_rva:data_rva + size]

                                offset = 0
                                if self.pe_file.get_word_at_rva(data_rva + offset) == 0x1 \
                                        and self.pe_file.get_word_at_rva(data_rva + offset + WORD) == 0xFFFF:
                                    # Use Extended Dialog Parsing

                                    # Remove leading bytes
                                    offset += DIALOGEX_LEAD
                                    if data[offset:offset + 2] == "\xFF\xFF":
                                        offset += DWORD
                                    else:
                                        offset += WORD
                                    if data[offset:offset + 2] == "\xFF\xFF":
                                        offset += DWORD
                                    else:
                                        offset += WORD

                                    # Get window title
                                    window_title = self.pe_file.get_string_u_at_rva(data_rva + offset)
                                    if len(window_title) != 0:
                                        strings.append(("DIALOG_TITLE", window_title))
                                    offset += len(window_title) * 2 + WORD

                                    # Remove trailing bytes
                                    offset += DIALOGEX_TRAIL
                                    offset += len(self.pe_file.get_string_u_at_rva(data_rva + offset)) * 2 + WORD

                                    # alignment adjustment
                                    if (offset % 4) != 0:
                                        offset += WORD

                                    while True:

                                        if offset >= size:
                                            break

                                        offset += DIALOGEX_ITEM_LEAD

                                        # Get item type
                                        if self.pe_file.get_word_at_rva(data_rva + offset) == 0xFFFF:
                                            offset += WORD
                                            item_type = ITEM_TYPES[self.pe_file.get_word_at_rva(data_rva + offset)]
                                            offset += WORD
                                        else:
                                            item_type = self.pe_file.get_string_u_at_rva(data_rva + offset)
                                            offset += len(item_type) * 2 + WORD

                                        # Get item text
                                        item_text = self.pe_file.get_string_u_at_rva(data_rva + offset)
                                        if len(item_text) != 0:
                                            strings.append((item_type, item_text))
                                        offset += len(item_text) * 2 + WORD

                                        extra_bytes = self.pe_file.get_word_at_rva(data_rva + offset)
                                        offset += extra_bytes + DIALOGEX_ITEM_TRAIL

                                        # Alignment adjustment
                                        if (offset % 4) != 0:
                                            offset += WORD

                                else:
                                    # TODO: Use Non extended Dialog Parsing
                                    # Remove leading bytes
                                    style = self.pe_file.get_word_at_rva(data_rva + offset)

                                    offset += DIALOG_LEAD
                                    if data[offset:offset + 2] == "\xFF\xFF":
                                        offset += DWORD
                                    else:
                                        offset += len(self.pe_file.get_string_u_at_rva(data_rva + offset)) * 2 + WORD
                                    if data[offset:offset + 2] == "\xFF\xFF":
                                        offset += DWORD
                                    else:
                                        offset += len(self.pe_file.get_string_u_at_rva(data_rva + offset)) * 2 + WORD

                                    # Get window title
                                    window_title = self.pe_file.get_string_u_at_rva(data_rva + offset)
                                    if len(window_title) != 0:
                                        strings.append(("DIALOG_TITLE", window_title))
                                    offset += len(window_title) * 2 + WORD

                                    if (style & DS_SETFONT) != 0:
                                        offset += WORD
                                        offset += len(self.pe_file.get_string_u_at_rva(data_rva + offset)) * 2 + WORD

                                    # Alignment adjustment
                                    if (offset % 4) != 0:
                                        offset += WORD

                                    while True:

                                        if offset >= size:
                                            break

                                        offset += DIALOG_ITEM_LEAD

                                        # Get item type
                                        if self.pe_file.get_word_at_rva(data_rva + offset) == 0xFFFF:
                                            offset += WORD
                                            item_type = ITEM_TYPES[self.pe_file.get_word_at_rva(data_rva + offset)]
                                            offset += WORD
                                        else:
                                            item_type = self.pe_file.get_string_u_at_rva(data_rva + offset)
                                            offset += len(item_type) * 2 + WORD

                                        # Get item text
                                        if self.pe_file.get_word_at_rva(data_rva + offset) == 0xFFFF:
                                            offset += DWORD
                                        else:
                                            item_text = self.pe_file.get_string_u_at_rva(data_rva + offset)
                                            if len(item_text) != 0:
                                                strings.append((item_type, item_text))
                                            offset += len(item_text) * 2 + WORD

                                        extra_bytes = self.pe_file.get_word_at_rva(data_rva + offset)
                                        offset += extra_bytes + WORD

                                        # Alignment adjustment
                                        if (offset % 4) != 0:
                                            offset += WORD

                            elif str(dir_type.name) == "RT_STRING":
                                data_rva = language.data.struct.OffsetToData
                                size = language.data.struct.Size
                                data = self.pe_file.get_memory_mapped_image()[data_rva:data_rva + size]
                                offset = 0
                                while True:
                                    if offset >= size:
                                        break

                                    ustr_length = self.pe_file.get_word_from_data(data[offset:offset + 2], 0)
                                    offset += 2

                                    if ustr_length == 0:
                                        continue

                                    ustr = self.pe_file.get_string_u_at_rva(data_rva + offset, max_length=ustr_length)
                                    offset += ustr_length * 2
                                    strings.append((None, ustr))

                            if len(strings) > 0:
                                success = False
                                try:
                                    comment = "%s (id:%s - lang_id:0x%04X [%s])" % (
                                        str(dir_type.name), str(nameID.name), language.id, lcid[language.id])
                                except KeyError:
                                    comment = "%s (id:%s - lang_id:0x%04X [Unknown language])" % (
                                        str(dir_type.name), str(nameID.name), language.id)
                                res = ResultSection(SCORE['NULL'], "PE: STRINGS - %s" % comment)
                                for idx in xrange(len(strings)):
                                    # noinspection PyBroadException
                                    try:
                                        tag_value = strings[idx][1]

                                        # The following line crash chardet if a
                                        # UPX packed file as packed the resources...
                                        chardet.detect(tag_value)  # TODO: Find a better way to do this

                                        tag_value = tag_value.replace('\r', ' ').replace('\n', ' ')
                                        if strings[idx][0] is not None:
                                            res.add_line(
                                                [strings[idx][0], ": ",
                                                 res_txt_tag(tag_value, TAG_TYPE['FILE_STRING'])])
                                        else:
                                            res.add_line(res_txt_tag(tag_value, TAG_TYPE['FILE_STRING']))

                                        make_tag(self.file_res, 'FILE_STRING', tag_value, weight='NULL',
                                                 usage='IDENTIFICATION')

                                        success = True
                                    except:
                                        pass
                                if success:
                                    self.file_res.add_section(res)
                else:
                    pass

        except AttributeError, e:
            self.log.debug("\t Error parsing output: " + repr(e))
예제 #30
0
    def dump_rule_file(rule_list, fake_dependencies=False, show_header=True):
        if show_header:
            out = ["//\t%s rule(s)" % len(rule_list), "", ""]
        else:
            out = []

        modules = list(
            set([m for rule in rule_list for m in rule.get('modules', [])]))
        for m in modules:
            out.append('import "%s"' % m)
            out.append("")

        if fake_dependencies:
            depends = list(
                set([d for rule in rule_list
                     for d in rule.get('depends', [])]))
            for d in depends:
                out.append(YaraParser.FAKE_RULE % d)
                out.append("")

        for rule in rule_list:
            if rule is None:
                continue

            out.append("%s %s%s {" % (rule['type'], rule['name'], {
                True: ": %s" % " ".join(rule['tags']),
                False: ""
            }[len(rule["tags"]) > 0]))

            # Do comments
            for c in rule['comments']:
                out.append("    // %s" % c)

            # Do meta. Try to preserve ordering
            if rule['meta']:
                out.append("    meta:")
                keys = rule['meta'].keys()
                if "rule_group" in keys:
                    out.append('        rule_group = "%s"' %
                               rule['meta']['rule_group'])
                    keys.remove('rule_group')
                    if rule['meta']['rule_group'] in keys:
                        out.append('        %s = "%s"' %
                                   (rule['meta']['rule_group'],
                                    rule['meta'][rule['meta']['rule_group']]))
                        keys.remove(rule['meta']['rule_group'])
                    for x in YaraParser.RULE_GROUPS:
                        if x in keys:
                            out.append('        %s = "%s"' %
                                       (x, rule['meta'][x]))
                            keys.remove(x)
                    out.append("        ")

                do_space = False
                for i in YaraParser.RULE_IMPORTANT:
                    if i in keys:
                        do_space = True
                        out.append('        %s = "%s"' % (i, rule['meta'][i]))
                        keys.remove(i)
                if do_space:
                    out.append("        ")

                keys.sort()
                for k in keys:
                    out.append('        %s = "%s"' % (k, rule['meta'][k]))

                out.append("    ")

            # Do Strings
            if rule['strings']:
                if len(set(rule['strings'])) > 1 or rule['strings'][0] != "":
                    out.append("    strings:")
                    for s in rule['strings']:
                        out.append('        %s' % s)
                    out.append("    ")

            # Do conditions
            if rule['condition']:
                out.append("    condition:")
                for c in rule['condition']:
                    out.append('        %s' % c)

                out.append("    ")

            out.extend(["}", "", ""])

        return safe_str("\n".join(out))
예제 #31
0
    def parse_rule_file(self, data, debug=False, force_safe_str=False):
        out = []
        for line in data.splitlines():
            if self.in_rule and debug:
                print line

            line = line.strip()

            if line.startswith("/*"):
                self.in_comment = True
            if self.in_comment:
                if "*/" in line:
                    self.in_comment = False
                    # There might be data after the inline comment
                    line = line.split('*/', 1)[1]
                    if not line:
                        continue
                else:
                    continue

            if not line.startswith(
                    "//"
            ) and not self.in_meta and not self.in_strings and not self.in_condition:
                prev_bracket = self.open_bracket
                self.open_bracket += line.count("{")
                if prev_bracket == 0 and self.open_bracket == 1:
                    self.got_open = True

                # Conditions may be on the same line as the curly bracket
                temp_line = line.strip('{').strip()
                if temp_line.startswith("meta"):
                    self._switch_to("meta")
                elif temp_line.startswith("strings"):
                    self._switch_to("strings")
                elif temp_line.startswith("condition"):
                    self._switch_to("condition")

            if self.in_rule and not self.in_meta and not self.in_condition and not self.in_strings \
                    and line.startswith("//"):
                line_data = line[2:].strip()
                if force_safe_str:
                    line_data = safe_str(line_data)
                self.cur_rule['comments'].append(line_data)

            if line.startswith("rule "):
                self.cur_rule['type'] = "rule"
                self.in_rule = True
            elif line.startswith("private rule "):
                self.cur_rule['type'] = "private rule"
                self.in_rule = True
            elif line.startswith("global rule "):
                self.cur_rule['type'] = "global rule"
                self.in_rule = True
            elif line.startswith("global private rule "):
                self.cur_rule['type'] = "global private rule"
                self.in_rule = True

            if line.startswith("rule ") or line.startswith(
                    "private rule ") or line.startswith(
                        "global private rule "):
                if debug:
                    print line
                line = line[len(self.cur_rule['type']) + 1:].split("//")[0]
                self.cur_rule['tags'] = []

                if ":" in line:
                    self.cur_rule['name'], tags = line.split(':')
                    self.cur_rule['name'] = self.cur_rule['name'].strip()
                    tags = tags.split("{")[0].strip().split(" ")
                    for t in tags:
                        if force_safe_str:
                            t = safe_str(t)

                        self.cur_rule['tags'].append(t)
                else:
                    self.cur_rule['name'] = line.split("{")[0].strip()

                if force_safe_str:
                    self.cur_rule['name'] = safe_str(self.cur_rule['name'])

            if line.startswith("meta"):
                self._switch_to("meta")
            elif line.startswith("strings"):
                self._switch_to("strings")
            elif line.startswith("condition"):
                self._switch_to("condition")
            elif not line.startswith("}"):
                if self.in_meta and "=" in line:
                    key, val = line.split("=", 1)
                    key = key.strip()
                    val = val.strip().strip('"')
                    if force_safe_str:
                        val = safe_str(val)
                    self.cur_rule['meta'][key] = val
                elif self.in_strings and line != "":
                    if force_safe_str:
                        line = safe_str(line)
                    self.cur_rule['strings'].append(line)
                elif self.in_condition and line != "":
                    if force_safe_str:
                        line = safe_str(line)
                    self.cur_rule["condition"].append(line)

            if not line.startswith(
                    "//") and not self.in_meta and not self.in_strings:
                self.open_bracket -= line.count("}")
                if self.got_open and self.open_bracket == 0:
                    if debug:
                        pprint.pprint(self.cur_rule)
                        print ""
                        print ""

                    yara_version = self.cur_rule.get('meta', {}).get(
                        'yara_version', "3.6")
                    modules = self.YARA_MODULES.get(yara_version, [])
                    self.cur_rule['depends'], self.cur_rule['modules'] = \
                        self.parse_dependencies(self.cur_rule['condition'], modules)

                    out.append(self.cur_rule)
                    self._reset()

        return out
예제 #32
0
    def execute(self, request):
        if request.task.depth > 3:
            self.log.debug(
                "Cuckoo is exiting because it currently does not execute on great great grand children."
            )
            request.set_save_result(False)
            return
        self.session = requests.Session()
        self.task = request.task
        request.result = Result()
        self.file_res = request.result
        file_content = request.get()
        self.cuckoo_task = None
        self.al_report = None
        self.file_name = os.path.basename(request.path)

        full_memdump = False
        pull_memdump = False

        # Check the file extension
        original_ext = self.file_name.rsplit('.', 1)
        tag_extension = tag_to_extension.get(self.task.tag)

        # NOTE: Cuckoo still tries to identify files itself, so we only force the extension/package if the user
        # specifies one. However, we go through the trouble of renaming the file because the only way to have
        # certain modules run is to use the appropriate suffix (.jar, .vbs, etc.)

        # Check for a valid tag
        if tag_extension is not None and 'unknown' not in self.task.tag:
            file_ext = tag_extension
        # Check if the file was submitted with an extension
        elif len(original_ext) == 2:
            submitted_ext = original_ext[1]
            if submitted_ext not in SUPPORTED_EXTENSIONS:
                # This is the case where the submitted file was NOT identified, and  the provided extension
                # isn't in the list of extensions that we explicitly support.
                self.log.debug(
                    "Cuckoo is exiting because it doesn't support the provided file type."
                )
                request.set_save_result(False)
                return
            else:
                # This is a usable extension. It might not run (if the submitter has lied to us).
                file_ext = '.' + submitted_ext
        else:
            # This is unknown without an extension that we accept/recognize.. no scan!
            self.log.debug(
                "Cuckoo is exiting because the file type could not be identified. %s %s"
                % (tag_extension, self.task.tag))
            return

        # Rename based on the found extension.
        if file_ext and self.task.sha256:
            self.file_name = self.task.sha256 + file_ext

        # Parse user-specified options
        kwargs = dict()
        task_options = []

        analysis_timeout = request.get_param('analysis_timeout')

        generate_report = request.get_param('generate_report')
        if generate_report is True:
            self.log.debug("Setting generate_report flag.")

        dump_processes = request.get_param('dump_processes')
        if dump_processes is True:
            self.log.debug("Setting procmemdump flag in task options")
            task_options.append('procmemdump=yes')

        dll_function = request.get_param('dll_function')
        if dll_function:
            task_options.append('function={}'.format(dll_function))

        arguments = request.get_param('arguments')
        if arguments:
            task_options.append('arguments={}'.format(arguments))

        # Parse extra options (these aren't user selectable because they are dangerous/slow)
        if request.get_param('pull_memory') and request.task.depth == 0:
            pull_memdump = True

        if request.get_param('dump_memory') and request.task.depth == 0:
            # Full system dump and volatility scan
            full_memdump = True
            kwargs['memory'] = True

        if request.get_param('no_monitor'):
            task_options.append("free=yes")

        routing = request.get_param('routing')
        if routing is None:
            routing = self.enabled_routes[0]

        select_machine = self.find_machine(self.task.tag, routing)

        if select_machine is None:
            # No matching VM and no default
            self.log.debug(
                "No Cuckoo vm matches tag %s and no machine is tagged as default."
                % select_machine)
            request.set_save_result(False)
            return

        kwargs['timeout'] = analysis_timeout
        kwargs['options'] = ','.join(task_options)
        if select_machine:
            kwargs['machine'] = select_machine

        self.cuckoo_task = CuckooTask(self.file_name, **kwargs)

        if self.restart_interval <= 0 or not self.is_cuckoo_ready():
            cuckoo_up = self.trigger_cuckoo_reset()
            if not cuckoo_up:
                self.session.close()
                raise RecoverableError(
                    "While restarting Cuckoo, Cuckoo never came back up.")
        else:
            self.restart_interval -= 1

        try:
            self.cuckoo_submit(file_content)
            if self.cuckoo_task.report:
                try:
                    machine_name = None
                    report_info = self.cuckoo_task.report.get('info', {})
                    machine = report_info.get('machine', {})

                    if isinstance(machine, dict):
                        machine_name = machine.get('name')

                    if machine_name is None:
                        self.log.debug(
                            'Unable to retrieve machine name from result.')
                        guest_ip = ""
                    else:
                        guest_ip = self.report_machine_info(machine_name)
                    self.log.debug(
                        "Generating AL Result from Cuckoo results..")
                    success = generate_al_result(self.cuckoo_task.report,
                                                 self.file_res, file_ext,
                                                 guest_ip,
                                                 self.SERVICE_CLASSIFICATION)
                    if success is False:
                        err_str = self.get_errors()
                        if "Machinery error: Unable to restore snapshot" in err_str:
                            raise RecoverableError(
                                "Cuckoo is restarting container: %s", err_str)

                        raise CuckooProcessingException(
                            "Cuckoo was unable to process this file. %s",
                            err_str)
                except RecoverableError:
                    self.trigger_cuckoo_reset(5)
                    raise
                except Exception as e:
                    # This is non-recoverable unless we were stopped during processing
                    self.trigger_cuckoo_reset(1)
                    if self.should_run:
                        self.log.exception("Error generating AL report: ")
                        raise CuckooProcessingException(
                            "Unable to generate cuckoo al report for task %s: %s"
                            % (safe_str(self.cuckoo_task.id), safe_str(e)))

                if self.check_stop():
                    raise RecoverableError(
                        "Cuckoo stopped during result processing..")

                if generate_report is True:
                    self.log.debug("Generating cuckoo report tar.gz.")

                    # Submit cuckoo analysis report archive as a supplementary file
                    tar_report = self.cuckoo_query_report(self.cuckoo_task.id,
                                                          fmt='all',
                                                          params={'tar': 'gz'})
                    if tar_report is not None:
                        tar_report_path = os.path.join(self.working_directory,
                                                       "cuckoo_report.tar.gz")
                        try:
                            report_file = open(tar_report_path, 'w')
                            report_file.write(tar_report)
                            report_file.close()
                            self.task.add_supplementary(
                                tar_report_path,
                                "Cuckoo Sandbox analysis report archive (tar.gz)"
                            )
                        except:
                            self.log.exception(
                                "Unable to add tar of complete report for task %s"
                                % self.cuckoo_task.id)

                self.log.debug("Checking for dropped files and pcap.")
                # Submit dropped files and pcap if available:
                self.check_dropped(request, self.cuckoo_task.id)
                self.check_pcap(self.cuckoo_task.id)

                # Check process memory dumps
                if dump_processes is True:
                    self.download_memdump('procmemdump')

                # We only retrieve full memory dumps for top-level files, and only if it was specified in
                # extra options.
                if full_memdump and pull_memdump:
                    self.download_memdump('fullmemdump')
            else:
                # We didn't get a report back.. cuckoo has failed us
                if self.should_run:
                    self.trigger_cuckoo_reset(5)
                    self.log.info("Raising recoverable error for running job.")
                    raise RecoverableError(
                        "Unable to retrieve cuckoo report. The following errors were detected: %s"
                        % safe_str(self.cuckoo_task.errors))

        except Exception as e:
            # Delete the task now..
            self.log.info('General exception caught during processing: %s' % e)
            if self.cuckoo_task and self.cuckoo_task.id is not None:
                self.cuckoo_delete_task(self.cuckoo_task.id)
            self.session.close()

            # Send the exception off to ServiceBase
            raise

        # Delete and exit
        if self.cuckoo_task and self.cuckoo_task.id is not None:
            self.cuckoo_delete_task(self.cuckoo_task.id)

        self.session.close()