def is_whitelisted(notice): # df node def reason, hit = get_whitelist_verdict(whitelist, notice) hit = {x: dotdump(safe_str(y)) for x, y in hit.iteritems()} sha256 = notice.get('sha256') if not reason: with whitelisted_lock: reason = whitelisted.get(sha256, None) if reason: hit = 'cached' if reason: if hit != 'cached': with whitelisted_lock: whitelisted[sha256] = reason notice.set( 'failure', "Whitelisting due to reason %s (%s)" % (dotdump(safe_str(reason)), hit)) dropq.push(notice.raw) # df push push ingester_counts.increment('ingest.whitelisted') whitelister_counts.increment('whitelist.' + reason) return reason
def process_signatures(sigs, al_result, classification): log.debug("Processing signature results.") if len(sigs) > 0: sigs_score = 0 sigs_res = ResultSection(title_text="Signatures", classification=classification) skipped_sigs = ['dead_host', 'has_authenticode', 'network_icmp', 'network_http', 'allocates_rwx', 'has_pdb'] # Severity is 0-5ish with 0 being least severe. for sig in sigs: severity = float(sig.get('severity', 0)) actor = sig.get('actor', '') sig_classification = sig.get('classification', CLASSIFICATION.UNRESTRICTED) sig_score = int(severity * 100) sig_name = sig.get('name', 'unknown') sig_categories = sig.get('categories', []) sig_families = sig.get('families', []) # Skipped Signature Checks: if sig_name in skipped_sigs: continue sigs_score += sig_score sigs_res.add_line(sig_name + ' [' + str(sig_score) + ']') sigs_res.add_line('\tDescription: ' + sig.get('description')) if len(sig_categories) > 0: sigs_res.add_line('\tCategories: ' + ','.join([safe_str(x) for x in sig_categories])) for category in sig_categories: al_result.add_tag(tag_type=TAG_TYPE.DYNAMIC_SIGNATURE_CATEGORY, value=category, weight=TAG_WEIGHT.HIGH, classification=sig_classification) if len(sig_families) > 0: sigs_res.add_line('\tFamilies: ' + ','.join([safe_str(x) for x in sig_families])) for family in sig_families: al_result.add_tag(tag_type=TAG_TYPE.DYNAMIC_SIGNATURE_FAMILY, value=family, weight=TAG_WEIGHT.VHIGH, classification=sig_classification) if sig_name != 'unknown' and sig_name != '': al_result.add_tag(tag_type=TAG_TYPE.DYNAMIC_SIGNATURE_NAME, value=sig_name, weight=TAG_WEIGHT.VHIGH, classification=sig_classification) sigs_res.add_line('') if actor and actor != '': al_result.add_tag(tag_type=TAG_TYPE.THREAT_ACTOR, value=actor, weight=TAG_WEIGHT.VHIGH, classification=sig_classification) # We don't want to get carried away.. sigs_res.score = min(1000, sigs_score) al_result.add_section(sigs_res)
def __init__( self, score=0, title_text=None, classification=Classification.UNRESTRICTED, parent=None, body='', body_format=None, tags=None, ): super(ResultSection, self).__init__() self.parent = parent self.score = score self.classification = classification self.body = body self.body_format = body_format self.links = [] self.subsections = [] self.tags = tags or [] self.depth = 0 self.finalized = False self.truncated = False if isinstance(title_text, list): title_text = ''.join(title_text) self.title_text = safe_str(title_text) if parent is not None: parent.add_section(self) self._warn_on_validation_errors()
def download_file(srl, **kwargs): """ Download the file using the default encoding method. This api will force the browser in download mode. Variables: srl => A resource locator for the file (sha256) Arguments: name => Name of the file to download format => Format to encode the file in password => Password of the password protected zip Data Block: None API call example: /api/v3/file/download/123456...654321/ Result example: <THE FILE BINARY ENCODED IN SPECIFIED FORMAT> """ user = kwargs['user'] file_obj = STORAGE.get_file(srl) if not file_obj: return make_api_response({}, "The file was not found in the system.", 404) if user and Classification.is_accessible(user['classification'], file_obj['classification']): params = load_user_settings(user) name = request.args.get('name', srl) if name == "": name = srl else: name = basename(name) name = safe_str(name) file_format = request.args.get('format', params['download_encoding']) if file_format == "raw" and not ALLOW_RAW_DOWNLOADS: return make_api_response({}, "RAW file download has been disabled by administrators.", 403) password = request.args.get('password', None) with forge.get_filestore() as f_transport: data = f_transport.get(srl) if not data: return make_api_response({}, "The file was not found in the system.", 404) data, error, already_encoded = encode_file(data, file_format, name, password) if error: return make_api_response({}, error['text'], error['code']) if file_format != "raw" and not already_encoded: name = "%s.%s" % (name, file_format) return make_file_response(data, name, len(data)) else: return make_api_response({}, "You are not allowed to download this file.", 403)
def add_access_control(user): user.update( Classification.get_access_control_parts(user.get( "classification", Classification.UNRESTRICTED), user_classification=True)) gl2_query = " OR ".join( ['__access_grp2__:__EMPTY__'] + ['__access_grp2__:"%s"' % x for x in user["__access_grp2__"]]) gl2_query = "(%s) AND " % gl2_query gl1_query = " OR ".join( ['__access_grp1__:__EMPTY__'] + ['__access_grp1__:"%s"' % x for x in user["__access_grp1__"]]) gl1_query = "(%s) AND " % gl1_query req = list( set(Classification.get_access_control_req()).difference( set(user["__access_req__"]))) req_query = " OR ".join(['__access_req__:"%s"' % r for r in req]) if req_query: req_query = "-(%s) AND " % req_query lvl_query = "__access_lvl__:[0 TO %s]" % user["__access_lvl__"] query = "".join([gl2_query, gl1_query, req_query, lvl_query]) user['access_control'] = safe_str(query)
def process_clsid(key, result_map): clsid_map = result_map.get('clsids', defaultdict(str)) for uuid in set(UUID_RE.findall(safe_str(key))): # Check if we have a matching CLSID uuid = uuid.upper() name = clsids.get(uuid) if name: clsid_map[name] = uuid result_map['clsids'] = clsid_map
def submit_existing_file(self, path, **kw): request = { 0: { 'path': safe_str(path), 'sha256': kw['sha256'], } } return self.submit_requests(request, **kw)
def extract_ace(self, request, local, encoding): if encoding != 'ace': return [], False path = os.path.join(self.working_directory, "ace") try: os.mkdir(path) except OSError: pass # noinspection PyBroadException try: with tempfile.NamedTemporaryFile(suffix=".ace", dir=path) as tf: # unace needs the .ace file extension with open(local, "rb") as fh: tf.write(fh.read()) tf.flush() proc = self.st.run(subprocess.Popen( '/usr/bin/unace e -y %s' % tf.name, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=path, env=os.environ, shell=True, preexec_fn=set_death_signal())) # Note, proc.communicate() hangs stdoutput = proc.stdout.read() while True: stdoutput += proc.stdout.read() if proc.poll() is not None: break time.sleep(0.01) if stdoutput: extracted_children = [] if "extracted:" in stdoutput: for line in stdoutput.splitlines(): line = line.strip() m = re.match("extracting (.+?)[ ]*(CRC OK)?$", line) if not m: continue filename = m.group(1) filepath = os.path.join(path, filename) if os.path.isdir(filepath): continue else: extracted_children.append([filepath, encoding, safe_str(filename)]) return extracted_children, False except ExtractIgnored: raise except Exception: self.log.exception('While extracting %s with unace', request.srl) return [], False
def process_ole_stream(self, ole, streams_section): listdir = ole.listdir() streams = [] for dir_entry in listdir: streams.append('/'.join(dir_entry)) if "\x05HwpSummaryInformation" in streams: decompress = True else: decompress = False decompress_macros = [] for stream in streams: self.log.debug("Extracting stream: {}".format(stream)) data = ole.openstream(stream).getvalue() try: if "Ole10Native" in stream: if self.process_ole10native(stream, data, streams_section) is True: continue elif "PowerPoint Document" in stream: if self.process_powerpoint_stream(data, streams_section) is True: continue if decompress: try: data = zlib.decompress(data, -15) except zlib.error: pass streams_section.add_line(safe_str(stream)) # Only write all streams with deep scan. stream_name = '{}.ole_stream'.format(hashlib.sha256(data).hexdigest()) if self.request.deep_scan: stream_path = os.path.join(self.working_directory, stream_name) with open(stream_path, 'w') as fh: fh.write(data) self.request.add_extracted(stream_path, "Embedded OLE Stream.", stream) if decompress and (stream.endswith(".ps") or stream.startswith("Scripts/")): decompress_macros.append(data) except Exception as e: self.log.error("Error adding extracted stream {}: {}".format(stream, e)) continue if decompress_macros: macros = "\n".join(decompress_macros) stream_name = '{}.macros'.format(hashlib.sha256(macros).hexdigest()) stream_path = os.path.join(self.working_directory, stream_name) with open(stream_path, 'w') as fh: fh.write(macros) self.request.add_extracted(stream_path, "Combined macros.", "all_macros.ps") return True return False
def add_line(self, text, _deprecated_format=None): # add_line with a list should join without newline seperator. # use add_lines if list should be split one element per line. if isinstance(text, list): text = ''.join(text) textstr = safe_str(text) if len(self.body) != 0: # noinspection PyAugmentAssignment textstr = '\n' + textstr self.body = self.body + textstr
def add_tag(self, tag_type, value, weight, usage=None, classification=Classification.UNRESTRICTED, context=None): tag = { 'type': tag_type, 'value': safe_str(value), 'weight': weight, 'usage': usage or self.default_usage, 'classification': classification, 'context': context } for existing_tag in self.tags: if existing_tag['type'] == tag['type'] and \ existing_tag['value'] == tag['value']: return self.tags.append(tag) if not TAG_TYPE.contains_value(tag_type): tb = traceback.format_stack(limit=4) log.warn("Invalid tag_type: %s -- %s", tag_type, tb) if len(value) <= 0 or len(value) >= 2048: tb = traceback.format_stack(limit=4) log.warn("invalid tag_value: %s:'%s' -- %s", tag_type, safe_str(value), tb) if not (isinstance(weight, int) and -1000 < weight < 1000): log.warn("invalid weight: %s", weight) if usage and not TAG_USAGE.contains_value(usage): log.warn("invalid tag usage: %s", usage) if not Classification.is_valid(classification): tb = traceback.format_stack(limit=4) log.warn("invalid classification:%s.\n%s", str(self.classification), str(tb)) if context: if not Context.verify_context(tag_type, context): log.warn("Invalid tag_type: %s and context: %s combination" % (tag_type, context))
def report_machine_info(self, machine_name): try: self.log.debug("Querying machine info for %s" % machine_name) machine = self.cuckoo_query_machine_info(machine_name) machine_section = ResultSection( title_text='Machine Information', classification=self.SERVICE_CLASSIFICATION) machine_section.add_line('ID: ' + str(machine.get('id'))) machine_section.add_line('Name: ' + str(machine.get('name'))) machine_section.add_line('Label: ' + str(machine.get('label'))) machine_section.add_line('Platform: ' + str(machine.get('platform'))) machine_section.add_line('Tags:') for tag in machine.get('tags', []): machine_section.add_line('\t ' + safe_str(tag).replace('_', ' ')) self.file_res.add_section(machine_section) return str(machine.get('ip', "")) except Exception as e: self.log.error( 'Unable to retrieve machine information for %s: %s' % (machine_name, safe_str(e)))
def extract_tnef(self, _, file_path, encoding): children = [] if encoding != 'tnef': return children, False # noinspection PyBroadException try: # noinspection PyUnresolvedReferences from tnefparse import tnef tnef_logger = logging.getLogger("tnef-decode") tnef_logger.setLevel(60) # This completely turns off the TNEF logger count = 0 for a in tnef.TNEF(open(file_path).read()).attachments: # This may not exist so try to access it and deal the # possible AttributeError, by skipping this entry as # there is no point if there is no data. try: data = a.data except AttributeError: continue count += 1 # This may not exist either but long_filename still # seems to return so deal with the AttributeError # here rather than blowing up. try: name = a.long_filename() or a.name if not name: continue name = safe_str(name) except AttributeError: name = 'unknown_tnef_%d' % count if not name: continue path = os.path.join(self.working_directory, str(count)) with open(path, 'w') as f: f.write(data) children.append([path, encoding, name]) except ImportError: self.log.exception("Import error: tnefparse library not installed:") except Exception: self.log.exception("Error extracting from tnef file:") return children, False
def cuckoo_submit(self, file_content): try: """ Submits a new file to Cuckoo for analysis """ task_id = self.cuckoo_submit_file(file_content) self.log.debug("Submitted file. Task id: %s.", task_id) if not task_id: err_msg = "Failed to get task for submitted file." self.cuckoo_task.errors.append(err_msg) self.log.error(err_msg) return else: self.cuckoo_task.id = task_id except Exception as e: err_msg = "Error submitting to Cuckoo" self.cuckoo_task.errors.append('%s: %s' % (err_msg, safe_str(e))) raise RecoverableError("Unable to submit to Cuckoo") self.log.debug("Submission succeeded. File: %s -- Task ID: %s" % (self.cuckoo_task.file, self.cuckoo_task.id)) # Quick sleep to avoid failing when the API can't get the task yet. for i in xrange(5): if self.check_stop(): return time.sleep(1) try: status = self.cuckoo_poll_started() except RetryError: self.log.info("VM startup timed out") status = None if status == "started": try: status = self.cuckoo_poll_report() except RetryError: self.log.info("Max retries exceeded for report status.") status = None err_msg = None if status is None: err_msg = "Timed out while waiting for cuckoo to analyze file." elif status == "missing": err_msg = "Task went missing while waiting for cuckoo to analyze file." elif status == "stopped": err_msg = "Service has been stopped while waiting for cuckoo to analyze file." if err_msg: self.log.debug(err_msg) raise RecoverableError(err_msg)
def _init_section_list(self): # Lazy init if self._sect_list is None: self._sect_list = [] try: for section in self.pe_file.sections: zero_idx = section.Name.find(chr(0x0)) if not zero_idx == -1: sname = section.Name[:zero_idx] else: sname = safe_str(section.Name) entropy = section.get_entropy() self._sect_list.append((sname, section, section.get_hash_md5(), entropy)) except AttributeError: pass
def extract_libarchive(self, request, local, encoding): extracted_children = [] try: # noinspection PyUnresolvedReferences from libarchive import Archive for file_encoding in ["utf8", "cp437"]: try: with Archive(local, encoding=file_encoding) as archive: count = 0 for entry in archive: name = safe_str(entry.pathname) if entry.isdir(): continue count += 1 path = os.path.join(self.working_directory, str(count)) with open(path, 'w') as f: archive.readpath(f) if os.stat(path).st_size != entry.size: raise RuntimeError("Extracted file size mismatch, archive is probably " "password protected: %s" % name) extracted_children.append([path, encoding, name]) break except RuntimeError: extracted_children = [] except UnicodeDecodeError: extracted_children = [] self.log.debug("Archive is not using %s charset. Trying another one...", file_encoding) except Exception as e: extracted_children = [] msg = str(e) if msg.endswith("Unrecognized archive format."): return extracted_children, False elif msg == "Fatal error executing function, message is: None.": return extracted_children, False if request.tag != 'archive/cab': self.log.exception('while extracting (%s) with libarchive', request.srl) except ImportError: self.log.exception("Import error: libarchive library not installed:") return extracted_children, False
def apply_overlay(module_name, overlay): from assemblyline.common.charset import safe_str if not overlay: return False import sys module = sys.modules[module_name] for k, v in overlay.iteritems(): t = type(v) if t == unicode or t == str: v = safe_str(v) setattr(module, k, v) return True
def _init_section_list(self): # Lazy init if self._sect_list is None: self._sect_list = [] try: for section in self.pe_file.sections: zero_idx = section.Name.find(chr(0x0)) if not zero_idx == -1: sname = section.Name[:zero_idx] else: sname = safe_str(section.Name) entropy = section.get_entropy() self._sect_list.append( (sname, section, section.get_hash_md5(), entropy)) except AttributeError: pass
def get_digests_for_file(path, blocksize=DEFAULT_BLOCKSIZE, calculate_entropy=True, on_first_block=lambda b, l: {}): """ Generate digests for file reading only 'blocksize bytes at a time.""" bc = None if calculate_entropy: try: bc = entropy.BufferedCalculator() except: # pylint: disable=W0702 calculate_entropy = False result = {'path': safe_str(path)} md5 = hashlib.md5() sha1 = hashlib.sha1() sha256 = hashlib.sha256() size = 0 with open(path, 'rb') as f: data = f.read(blocksize) length = len(data) if not size: result.update(on_first_block(data, length)) while length > 0: if calculate_entropy: bc.update(data, length) md5.update(data) sha1.update(data) sha256.update(data) size += length data = f.read(blocksize) length = len(data) if calculate_entropy: result['entropy'] = bc.entropy() else: result['entropy'] = 0 result['md5'] = md5.hexdigest() result['sha1'] = sha1.hexdigest() result['sha256'] = sha256.hexdigest() result['size'] = size return result
def make_file_response(data, name, size, status_code=200, content_type="application/octet-stream"): quota_user = flsk_session.pop("quota_user", None) quota_id = flsk_session.pop("quota_id", None) quota_set = flsk_session.pop("quota_set", False) if quota_user and quota_set: RATE_LIMITER.dec(quota_user, track_id=quota_id) RATE_LIMITER.dec("__global__", track_id=quota_id) response = make_response(data, status_code) response.headers["Content-Type"] = content_type response.headers["Content-Length"] = size response.headers[ "Content-Disposition"] = 'attachment; filename="%s"' % safe_str(name) return response
def process_debug(debug, al_result, classification): failed = False if 'errors' in debug: error_res = ResultSection(title_text='Analysis Errors', classification=classification) for error in debug['errors']: err_str = str(error) err_str = err_str.lower() if err_str is not None and len(err_str) > 0: # Timeouts - ok, just means the process never exited # Start Error - probably a corrupt file.. # Initialization Error - restart the docker container error_res.add_line(error) if "analysis hit the critical timeout" not in err_str and \ "Unable to execute the initial process" not in err_str: raise RecoverableError("An error prevented cuckoo from " "generating complete results: %s" % safe_str(error)) if len(error_res.body) > 0: al_result.add_section(error_res) return failed
def _7zip_submit_extracted(self, request, lines, path, encoding): extract_pe_sections = request.get_param('extract_pe_sections') extracted_children = [] for line in lines: if line.startswith("Extracting "): filename = line.split("Extracting ", 1)[1] if not extract_pe_sections and \ ((encoding.startswith("executable/windows") and [f for f in self.FORBIDDEN_EXE if filename.startswith(f)]) or (encoding.startswith("executable/linux")and filename in self.FORBIDDEN_ELF_EXE)): raise ExtractIgnored("Detected extraction of forbidden PE/ELF file sections. " "No files will be extracted.") if os.path.isdir(path + "/" + filename): continue else: extracted_children.append([path + "/" + filename, encoding, safe_str(filename)]) return extracted_children
def __init__(self, result_as_xml_string): # There is a bug in McAfee XML output. # Occasionally the mcafee xml result with have non escaped utf-8 characters # in the embedded filename etc that will not parse. remove them. result_as_xml_string = safe_str( re.sub(self.BAD_XML_CHARREF, 'INV', result_as_xml_string)) root = ElementTree.fromstring( result_as_xml_string) # @UndefinedVariable if root.tag != ROOT_TAG: raise Exception('Unexpected root in XML result: %s.' % root.tag) element_parsers = { CHILD_PREAMBLE: self._add_preamble, CHILD_DATETIME: self._ignore_element, CHILD_TIME: self._add_duration, CHILD_OPTIONS: self._ignore_element, CHILD_FILE: self._add_file_result, } self.preamble = None self.file_results = [] for child in root: element_parsers.get(child.tag, self._handle_unexpected_element)(child)
def stream_file_response(reader, name, size, status_code=200): quota_user = flsk_session.pop("quota_user", None) quota_id = flsk_session.pop("quota_id", None) quota_set = flsk_session.pop("quota_set", False) if quota_user and quota_set: RATE_LIMITER.dec(quota_user, track_id=quota_id) RATE_LIMITER.dec("__global__", track_id=quota_id) chunk_size = 65535 def generate(): reader.seek(0) while True: data = reader.read(chunk_size) if not data: break yield data headers = { "Content-Type": 'application/octet-stream', "Content-Length": size, "Content-Disposition": 'attachment; filename="%s"' % safe_str(name) } return Response(generate(), status=status_code, headers=headers)
def fileinfo(path): path = safe_str(path) data = get_digests_for_file(path, on_first_block=ident) if data['mime'].lower() == 'application/cdfv2-corrupt': with open(path, 'r') as fh: buf = fh.read() buflen = len(buf) data.update(ident(buf, buflen)) data['ssdeep'] = ssdeep_from_file(path) if ssdeep_from_file else '' if not int(data.get('size', -1)): data['tag'] = 'empty' elif data['tag'] == 'archive/zip' or data['tag'] == 'java/jar': data['tag'] = zip_ident(path) elif data['tag'] == 'unknown': data['tag'], _ = guess_language(path) elif data['tag'] == 'archive/cart': data['tag'] = cart_ident(path) elif data['tag'] == 'executable/windows/dos': # The default magic file misidentifies PE files with a munged DOS header data['tag'] = dos_ident(path) return data
def _do_respmod(self, filename, data): encoded = self.chunk_encode(data) # ICAP RESPMOD req-hdr is the start of the original HTTP request. respmod_req_hdr = "GET /{FILENAME} HTTP/1.1\r\n\r\n".format( FILENAME=safe_str(filename)) # ICAP RESPMOD res-hdr is the start of the HTTP response for above request. respmod_res_hdr = ("HTTP/1.1 200 OK\r\n" "Transfer-Encoding: chunked\r\n\r\n") res_hdr_offset = len(respmod_req_hdr) res_bdy_offset = len(respmod_res_hdr) + res_hdr_offset # The ICAP RESPMOD header. Note: # res-hdr offset should match the start of the GET request above. # res-body offset should match the start of the response above. respmod_icap_hdr = ( "RESPMOD icap://{HOST}:{PORT}/{SERVICE}{ACTION} ICAP/1.0\r\n" "Host:{HOST}:{PORT}\r\n" "Allow:204\r\n" "Encapsulated: req-hdr=0, res-hdr={RES_HDR}, res-body={RES_BODY}\r\n\r\n" ).format(HOST=self.host, PORT=self.port, SERVICE=self.service, ACTION=self.action, RES_HDR=res_hdr_offset, RES_BODY=res_bdy_offset) sio = StringIO() sio.write(respmod_icap_hdr) sio.write(respmod_req_hdr) sio.write(respmod_res_hdr) sio.write(encoded) serialized_request = sio.getvalue() for i in xrange(self.MAX_RETRY): s = None try: s = socket.create_connection((self.host, self.port), timeout=10) s.sendall(serialized_request) response = temp_resp = s.recv(self.RESP_CHUNK_SIZE) while len(temp_resp) == self.RESP_CHUNK_SIZE: temp_resp = s.recv(self.RESP_CHUNK_SIZE) response += temp_resp return response except: if i == (self.MAX_RETRY - 1): raise finally: if s is not None: try: # try to close the connection anyways s.close() except: pass raise Exception("Icap server refused to respond.")
status) != status and item.get( 'status', status) != "TRIAGE": labels.append("CONFLICT.%s" % item['status']) msg = { "action": "workflow", "label": labels, "priority": priority, "status": status, "event_id": item['_yz_rk'], "queue_priority": QUEUE_PRIORITY } action_queue.push(QUEUE_PRIORITY, msg) except SearchException: log.warning("Invalid query '{query}' in " "workflow filter '{name}' by '{user}'".format( query=safe_str(aq.get('query', '')), name=aq.get('name', 'unknown'), user=aq.get('created_by', 'unknown'))) continue if count: log.info( "{count} Alert(s) were affected by this filter.".format( count=count)) if 'id' in aq: ds.increment_workflow_counter(aq['id'], count) else: log.info( "Skipping all workflow filter since there where no alerts created in the specified time period." )
def get_pe_info(self, lcid): """Dumps the PE header as Results in the FileResult.""" # PE Header pe_header_res = ResultSection(SCORE['NULL'], "PE: HEADER") # PE Header: Header Info pe_header_info_res = ResultSection(SCORE.NULL, "[HEADER INFO]", parent=pe_header_res) pe_header_info_res.add_line( "Entry point address: 0x%08X" % self.pe_file.OPTIONAL_HEADER.AddressOfEntryPoint) pe_header_info_res.add_line( "Linker Version: %02d.%02d" % (self.pe_file.OPTIONAL_HEADER.MajorLinkerVersion, self.pe_file.OPTIONAL_HEADER.MinorLinkerVersion)) pe_header_info_res.add_line( "OS Version: %02d.%02d" % (self.pe_file.OPTIONAL_HEADER.MajorOperatingSystemVersion, self.pe_file.OPTIONAL_HEADER.MinorOperatingSystemVersion)) pe_header_info_res.add_line([ "Time Date Stamp: %s (" % time.ctime(self.pe_file.FILE_HEADER.TimeDateStamp), res_txt_tag(str(self.pe_file.FILE_HEADER.TimeDateStamp), TAG_TYPE['PE_LINK_TIME_STAMP']), ")" ]) try: pe_header_info_res.add_line( "Machine Type: %s (%s)" % (hex(self.pe_file.FILE_HEADER.Machine), pefile.MACHINE_TYPE[self.pe_file.FILE_HEADER.Machine])) except KeyError: pass # PE Header: Rich Header # noinspection PyBroadException try: if self.pe_file.RICH_HEADER is not None: pe_rich_header_info = ResultSection(SCORE.NULL, "[RICH HEADER INFO]", parent=pe_header_res) values_list = self.pe_file.RICH_HEADER.values pe_rich_header_info.add_line("VC++ tools used:") for i in range(0, len(values_list) / 2): line = "Tool Id: %3d Version: %6d Times used: %3d" % ( values_list[2 * i] >> 16, values_list[2 * i] & 0xFFFF, values_list[2 * i + 1]) pe_rich_header_info.add_line(line) except: self.log.exception("Unable to parse PE Rich Header") # PE Header: Data Directories pe_dd_res = ResultSection(SCORE.NULL, "[DATA DIRECTORY]", parent=pe_header_res) for data_directory in self.pe_file.OPTIONAL_HEADER.DATA_DIRECTORY: if data_directory.Size or data_directory.VirtualAddress: pe_dd_res.add_line( "%s - va: 0x%08X - size: 0x%08X" % (data_directory.name[len("IMAGE_DIRECTORY_ENTRY_"):], data_directory.VirtualAddress, data_directory.Size)) # PE Header: Sections pe_sec_res = ResultSection(SCORE.NULL, "[SECTIONS]", parent=pe_header_res) self._init_section_list() try: for (sname, section, sec_md5, sec_entropy) in self._sect_list: txt = [ sname, " - Virtual: 0x%08X (0x%08X bytes)" " - Physical: 0x%08X (0x%08X bytes) - " % (section.VirtualAddress, section.Misc_VirtualSize, section.PointerToRawData, section.SizeOfRawData), "hash:", res_txt_tag(sec_md5, TAG_TYPE['PE_SECTION_HASH']), " - entropy:%f (min:0.0, Max=8.0)" % sec_entropy ] # add a search tag for the Section Hash make_tag(self.file_res, 'PE_SECTION_HASH', sec_md5, 'HIGH', usage='CORRELATION') pe_sec_res.add_line(txt) except AttributeError: pass self.file_res.add_section(pe_header_res) # debug try: if self.pe_file.DebugTimeDateStamp: pe_debug_res = ResultSection(SCORE['NULL'], "PE: DEBUG") self.file_res.add_section(pe_debug_res) pe_debug_res.add_line( "Time Date Stamp: %s" % time.ctime(self.pe_file.DebugTimeDateStamp)) # When it is a unicode, we know we are coming from RSDS which is UTF-8 # otherwise, we come from NB10 and we need to guess the charset. if type(self.pe_file.pdb_filename) != unicode: char_enc_guessed = translate_str(self.pe_file.pdb_filename) pdb_filename = char_enc_guessed['converted'] else: char_enc_guessed = {'confidence': 1.0, 'encoding': 'utf-8'} pdb_filename = self.pe_file.pdb_filename pe_debug_res.add_line([ "PDB: '", res_txt_tag_charset(pdb_filename, TAG_TYPE['PE_PDB_FILENAME'], char_enc_guessed['encoding'], char_enc_guessed['confidence']), "'" ]) # self.log.debug(u"\tPDB: %s" % pdb_filename) except AttributeError: pass # imports try: if hasattr(self.pe_file, 'DIRECTORY_ENTRY_IMPORT') and len( self.pe_file.DIRECTORY_ENTRY_IMPORT) > 0: pe_import_res = ResultSection(SCORE['NULL'], "PE: IMPORTS") self.file_res.add_section(pe_import_res) for entry in self.pe_file.DIRECTORY_ENTRY_IMPORT: pe_import_dll_res = ResultSection(SCORE.NULL, "[%s]" % entry.dll, parent=pe_import_res) first_element = True line = StringIO() for imp in entry.imports: if first_element: first_element = False else: line.write(", ") if imp.name is None: line.write(str(imp.ordinal)) else: line.write(imp.name) pe_import_dll_res.add_line(line.getvalue()) else: pe_import_res = ResultSection(SCORE['NULL'], "PE: NO IMPORTS DETECTED ") self.file_res.add_section(pe_import_res) except AttributeError: pass # exports try: if self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp is not None: pe_export_res = ResultSection(SCORE['NULL'], "PE: EXPORTS") self.file_res.add_section(pe_export_res) # noinspection PyBroadException try: pe_export_res.add_line([ "Module Name: ", res_txt_tag(safe_str(self.pe_file.ModuleName), TAG_TYPE['PE_EXPORT_MODULE_NAME']) ]) except: pass if self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp == 0: pe_export_res.add_line("Time Date Stamp: 0") else: pe_export_res.add_line( "Time Date Stamp: %s" % time.ctime(self.pe_file.DIRECTORY_ENTRY_EXPORT.struct. TimeDateStamp)) first_element = True txt = [] for exp in self.pe_file.DIRECTORY_ENTRY_EXPORT.symbols: if first_element: first_element = False else: txt.append(", ") txt.append(str(exp.ordinal)) if exp.name is not None: txt.append(": ") txt.append( res_txt_tag(exp.name, TAG_TYPE['PE_EXPORT_FCT_NAME'])) pe_export_res.add_line(txt) except AttributeError: pass # resources try: if len(self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries) > 0: pe_resource_res = ResultSection(SCORE['NULL'], "PE: RESOURCES") self.file_res.add_section(pe_resource_res) for res_entry in self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries: if res_entry.name is None: # noinspection PyBroadException try: entry_name = pefile.RESOURCE_TYPE[res_entry.id] except: # pylint: disable-msg=W0702 # unfortunately this code was done before we started to really care about which # exception to catch so, I actually don't really know at this point, would need to try # out :-\ entry_name = "UNKNOWN" else: entry_name = res_entry.name for name_id in res_entry.directory.entries: if name_id.name is None: name_id.name = hex(name_id.id) for language in name_id.directory.entries: try: language_desc = lcid[language.id] except KeyError: language_desc = 'Unknown language' line = [] if res_entry.name is None: line.append(entry_name) else: line.append( res_txt_tag(str(entry_name), TAG_TYPE['PE_RESOURCE_NAME'])) line.append(" " + str(name_id.name) + " ") line.append("0x") # this will add a link to search in AL for the value line.append( res_txt_tag("%04X" % language.id, TAG_TYPE['PE_RESOURCE_LANGUAGE'])) line.append(" (%s)" % language_desc) make_tag(self.file_res, 'PE_RESOURCE_LANGUAGE', language.id, weight='LOW', usage='IDENTIFICATION') # get the size of the resource res_size = language.data.struct.Size line.append(" Size: 0x%x" % res_size) pe_resource_res.add_line(line) except AttributeError: pass # Resources-VersionInfo try: if len(self.pe_file.FileInfo) > 2: pass for file_info in self.pe_file.FileInfo: if file_info.name == "StringFileInfo": if len(file_info.StringTable) > 0: pe_resource_verinfo_res = ResultSection( SCORE['NULL'], "PE: RESOURCES-VersionInfo") self.file_res.add_section(pe_resource_verinfo_res) try: if "LangID" in file_info.StringTable[0].entries: lang_id = file_info.StringTable[0].get( "LangID") if not int(lang_id, 16) >> 16 == 0: txt = ('LangId: ' + lang_id + " (" + lcid[int(lang_id, 16) >> 16] + ")") pe_resource_verinfo_res.add_line(txt) else: txt = ('LangId: ' + lang_id + " (NEUTRAL)") pe_resource_verinfo_res.add_line(txt) except (ValueError, KeyError): txt = ('LangId: %s is invalid' % lang_id) pe_resource_verinfo_res.add_line(txt) for entry in file_info.StringTable[0].entries.items(): txt = ['%s: ' % entry[0]] if entry[0] == 'OriginalFilename': txt.append( res_txt_tag( entry[1], TAG_TYPE[ 'PE_VERSION_INFO_ORIGINAL_FILENAME'] )) elif entry[0] == 'FileDescription': txt.append( res_txt_tag( entry[1], TAG_TYPE[ 'PE_VERSION_INFO_FILE_DESCRIPTION'] )) else: txt.append(entry[1]) pe_resource_verinfo_res.add_line(txt) except AttributeError: pass # Resources Strings try: BYTE = 1 WORD = 2 DWORD = 4 DS_SETFONT = 0x40 DIALOG_LEAD = DWORD + DWORD + WORD + WORD + WORD + WORD + WORD DIALOG_ITEM_LEAD = DWORD + DWORD + WORD + WORD + WORD + WORD + WORD DIALOGEX_LEAD = WORD + WORD + DWORD + DWORD + DWORD + WORD + WORD + WORD + WORD + WORD DIALOGEX_TRAIL = WORD + WORD + BYTE + BYTE DIALOGEX_ITEM_LEAD = DWORD + DWORD + DWORD + WORD + WORD + WORD + WORD + DWORD DIALOGEX_ITEM_TRAIL = WORD ITEM_TYPES = { 0x80: "BUTTON", 0x81: "EDIT", 0x82: "STATIC", 0x83: "LIST BOX", 0x84: "SCROLL BAR", 0x85: "COMBO BOX" } if hasattr(self.pe_file, 'DIRECTORY_ENTRY_RESOURCE'): for dir_type in self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries: if dir_type.name is None: if dir_type.id in pefile.RESOURCE_TYPE: dir_type.name = pefile.RESOURCE_TYPE[dir_type.id] for nameID in dir_type.directory.entries: if nameID.name is None: nameID.name = hex(nameID.id) for language in nameID.directory.entries: strings = [] if str(dir_type.name) == "RT_DIALOG": data_rva = language.data.struct.OffsetToData size = language.data.struct.Size data = self.pe_file.get_memory_mapped_image( )[data_rva:data_rva + size] offset = 0 if self.pe_file.get_word_at_rva(data_rva + offset) == 0x1 \ and self.pe_file.get_word_at_rva(data_rva + offset + WORD) == 0xFFFF: # Use Extended Dialog Parsing # Remove leading bytes offset += DIALOGEX_LEAD if data[offset:offset + 2] == "\xFF\xFF": offset += DWORD else: offset += WORD if data[offset:offset + 2] == "\xFF\xFF": offset += DWORD else: offset += WORD # Get window title window_title = self.pe_file.get_string_u_at_rva( data_rva + offset) if len(window_title) != 0: strings.append( ("DIALOG_TITLE", window_title)) offset += len(window_title) * 2 + WORD # Remove trailing bytes offset += DIALOGEX_TRAIL offset += len( self.pe_file.get_string_u_at_rva( data_rva + offset)) * 2 + WORD # alignment adjustment if (offset % 4) != 0: offset += WORD while True: if offset >= size: break offset += DIALOGEX_ITEM_LEAD # Get item type if self.pe_file.get_word_at_rva( data_rva + offset) == 0xFFFF: offset += WORD item_type = ITEM_TYPES[ self.pe_file.get_word_at_rva( data_rva + offset)] offset += WORD else: item_type = self.pe_file.get_string_u_at_rva( data_rva + offset) offset += len(item_type) * 2 + WORD # Get item text item_text = self.pe_file.get_string_u_at_rva( data_rva + offset) if len(item_text) != 0: strings.append( (item_type, item_text)) offset += len(item_text) * 2 + WORD extra_bytes = self.pe_file.get_word_at_rva( data_rva + offset) offset += extra_bytes + DIALOGEX_ITEM_TRAIL # Alignment adjustment if (offset % 4) != 0: offset += WORD else: # TODO: Use Non extended Dialog Parsing # Remove leading bytes style = self.pe_file.get_word_at_rva( data_rva + offset) offset += DIALOG_LEAD if data[offset:offset + 2] == "\xFF\xFF": offset += DWORD else: offset += len( self.pe_file.get_string_u_at_rva( data_rva + offset)) * 2 + WORD if data[offset:offset + 2] == "\xFF\xFF": offset += DWORD else: offset += len( self.pe_file.get_string_u_at_rva( data_rva + offset)) * 2 + WORD # Get window title window_title = self.pe_file.get_string_u_at_rva( data_rva + offset) if len(window_title) != 0: strings.append( ("DIALOG_TITLE", window_title)) offset += len(window_title) * 2 + WORD if (style & DS_SETFONT) != 0: offset += WORD offset += len( self.pe_file.get_string_u_at_rva( data_rva + offset)) * 2 + WORD # Alignment adjustment if (offset % 4) != 0: offset += WORD while True: if offset >= size: break offset += DIALOG_ITEM_LEAD # Get item type if self.pe_file.get_word_at_rva( data_rva + offset) == 0xFFFF: offset += WORD item_type = ITEM_TYPES[ self.pe_file.get_word_at_rva( data_rva + offset)] offset += WORD else: item_type = self.pe_file.get_string_u_at_rva( data_rva + offset) offset += len(item_type) * 2 + WORD # Get item text if self.pe_file.get_word_at_rva( data_rva + offset) == 0xFFFF: offset += DWORD else: item_text = self.pe_file.get_string_u_at_rva( data_rva + offset) if len(item_text) != 0: strings.append( (item_type, item_text)) offset += len(item_text) * 2 + WORD extra_bytes = self.pe_file.get_word_at_rva( data_rva + offset) offset += extra_bytes + WORD # Alignment adjustment if (offset % 4) != 0: offset += WORD elif str(dir_type.name) == "RT_STRING": data_rva = language.data.struct.OffsetToData size = language.data.struct.Size data = self.pe_file.get_memory_mapped_image( )[data_rva:data_rva + size] offset = 0 while True: if offset >= size: break ustr_length = self.pe_file.get_word_from_data( data[offset:offset + 2], 0) offset += 2 if ustr_length == 0: continue ustr = self.pe_file.get_string_u_at_rva( data_rva + offset, max_length=ustr_length) offset += ustr_length * 2 strings.append((None, ustr)) if len(strings) > 0: success = False try: comment = "%s (id:%s - lang_id:0x%04X [%s])" % ( str(dir_type.name), str(nameID.name), language.id, lcid[language.id]) except KeyError: comment = "%s (id:%s - lang_id:0x%04X [Unknown language])" % ( str(dir_type.name), str( nameID.name), language.id) res = ResultSection( SCORE['NULL'], "PE: STRINGS - %s" % comment) for idx in xrange(len(strings)): # noinspection PyBroadException try: tag_value = strings[idx][1] # The following line crash chardet if a # UPX packed file as packed the resources... chardet.detect( tag_value ) # TODO: Find a better way to do this tag_value = tag_value.replace( '\r', ' ').replace('\n', ' ') if strings[idx][0] is not None: res.add_line([ strings[idx][0], ": ", res_txt_tag( tag_value, TAG_TYPE['FILE_STRING']) ]) else: res.add_line( res_txt_tag( tag_value, TAG_TYPE['FILE_STRING'])) make_tag(self.file_res, 'FILE_STRING', tag_value, weight='NULL', usage='IDENTIFICATION') success = True except: pass if success: self.file_res.add_section(res) else: pass except AttributeError, e: self.log.debug("\t Error parsing output: " + repr(e))
def get_pe_info(self, lcid): """Dumps the PE header as Results in the FileResult.""" # PE Header pe_header_res = ResultSection(SCORE['NULL'], "PE: HEADER") # PE Header: Header Info pe_header_info_res = ResultSection(SCORE.NULL, "[HEADER INFO]", parent=pe_header_res) pe_header_info_res.add_line("Entry point address: 0x%08X" % self.pe_file.OPTIONAL_HEADER.AddressOfEntryPoint) pe_header_info_res.add_line("Linker Version: %02d.%02d" % (self.pe_file.OPTIONAL_HEADER.MajorLinkerVersion, self.pe_file.OPTIONAL_HEADER.MinorLinkerVersion)) pe_header_info_res.add_line("OS Version: %02d.%02d" % (self.pe_file.OPTIONAL_HEADER.MajorOperatingSystemVersion, self.pe_file.OPTIONAL_HEADER.MinorOperatingSystemVersion)) pe_header_info_res.add_line(["Time Date Stamp: %s (" % time.ctime(self.pe_file.FILE_HEADER.TimeDateStamp), res_txt_tag(str(self.pe_file.FILE_HEADER.TimeDateStamp), TAG_TYPE['PE_LINK_TIME_STAMP']), ")"]) try: pe_header_info_res.add_line("Machine Type: %s (%s)" % ( hex(self.pe_file.FILE_HEADER.Machine), pefile.MACHINE_TYPE[self.pe_file.FILE_HEADER.Machine])) except KeyError: pass # PE Header: Rich Header # noinspection PyBroadException try: if self.pe_file.RICH_HEADER is not None: pe_rich_header_info = ResultSection(SCORE.NULL, "[RICH HEADER INFO]", parent=pe_header_res) values_list = self.pe_file.RICH_HEADER.values pe_rich_header_info.add_line("VC++ tools used:") for i in range(0, len(values_list) / 2): line = "Tool Id: %3d Version: %6d Times used: %3d" % ( values_list[2 * i] >> 16, values_list[2 * i] & 0xFFFF, values_list[2 * i + 1]) pe_rich_header_info.add_line(line) except: self.log.exception("Unable to parse PE Rich Header") # PE Header: Data Directories pe_dd_res = ResultSection(SCORE.NULL, "[DATA DIRECTORY]", parent=pe_header_res) for data_directory in self.pe_file.OPTIONAL_HEADER.DATA_DIRECTORY: if data_directory.Size or data_directory.VirtualAddress: pe_dd_res.add_line("%s - va: 0x%08X - size: 0x%08X" % (data_directory.name[len("IMAGE_DIRECTORY_ENTRY_"):], data_directory.VirtualAddress, data_directory.Size)) # PE Header: Sections pe_sec_res = ResultSection(SCORE.NULL, "[SECTIONS]", parent=pe_header_res) self._init_section_list() try: for (sname, section, sec_md5, sec_entropy) in self._sect_list: txt = [sname, " - Virtual: 0x%08X (0x%08X bytes)" " - Physical: 0x%08X (0x%08X bytes) - " % (section.VirtualAddress, section.Misc_VirtualSize, section.PointerToRawData, section.SizeOfRawData), "hash:", res_txt_tag(sec_md5, TAG_TYPE['PE_SECTION_HASH']), " - entropy:%f (min:0.0, Max=8.0)" % sec_entropy] # add a search tag for the Section Hash make_tag(self.file_res, 'PE_SECTION_HASH', sec_md5, 'HIGH', usage='CORRELATION') pe_sec_res.add_line(txt) except AttributeError: pass self.file_res.add_section(pe_header_res) # debug try: if self.pe_file.DebugTimeDateStamp: pe_debug_res = ResultSection(SCORE['NULL'], "PE: DEBUG") self.file_res.add_section(pe_debug_res) pe_debug_res.add_line("Time Date Stamp: %s" % time.ctime(self.pe_file.DebugTimeDateStamp)) # When it is a unicode, we know we are coming from RSDS which is UTF-8 # otherwise, we come from NB10 and we need to guess the charset. if type(self.pe_file.pdb_filename) != unicode: char_enc_guessed = translate_str(self.pe_file.pdb_filename) pdb_filename = char_enc_guessed['converted'] else: char_enc_guessed = {'confidence': 1.0, 'encoding': 'utf-8'} pdb_filename = self.pe_file.pdb_filename pe_debug_res.add_line(["PDB: '", res_txt_tag_charset(pdb_filename, TAG_TYPE['PE_PDB_FILENAME'], char_enc_guessed['encoding'], char_enc_guessed['confidence']), "'"]) # self.log.debug(u"\tPDB: %s" % pdb_filename) except AttributeError: pass # imports try: if hasattr(self.pe_file, 'DIRECTORY_ENTRY_IMPORT') and len(self.pe_file.DIRECTORY_ENTRY_IMPORT) > 0: pe_import_res = ResultSection(SCORE['NULL'], "PE: IMPORTS") self.file_res.add_section(pe_import_res) for entry in self.pe_file.DIRECTORY_ENTRY_IMPORT: pe_import_dll_res = ResultSection(SCORE.NULL, "[%s]" % entry.dll, parent=pe_import_res) first_element = True line = StringIO() for imp in entry.imports: if first_element: first_element = False else: line.write(", ") if imp.name is None: line.write(str(imp.ordinal)) else: line.write(imp.name) pe_import_dll_res.add_line(line.getvalue()) else: pe_import_res = ResultSection(SCORE['NULL'], "PE: NO IMPORTS DETECTED ") self.file_res.add_section(pe_import_res) except AttributeError: pass # exports try: if self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp is not None: pe_export_res = ResultSection(SCORE['NULL'], "PE: EXPORTS") self.file_res.add_section(pe_export_res) # noinspection PyBroadException try: pe_export_res.add_line(["Module Name: ", res_txt_tag(safe_str(self.pe_file.ModuleName), TAG_TYPE['PE_EXPORT_MODULE_NAME'])]) except: pass if self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp == 0: pe_export_res.add_line("Time Date Stamp: 0") else: pe_export_res.add_line("Time Date Stamp: %s" % time.ctime(self.pe_file.DIRECTORY_ENTRY_EXPORT.struct.TimeDateStamp)) first_element = True txt = [] for exp in self.pe_file.DIRECTORY_ENTRY_EXPORT.symbols: if first_element: first_element = False else: txt.append(", ") txt.append(str(exp.ordinal)) if exp.name is not None: txt.append(": ") txt.append(res_txt_tag(exp.name, TAG_TYPE['PE_EXPORT_FCT_NAME'])) pe_export_res.add_line(txt) except AttributeError: pass # resources try: if len(self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries) > 0: pe_resource_res = ResultSection(SCORE['NULL'], "PE: RESOURCES") self.file_res.add_section(pe_resource_res) for res_entry in self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries: if res_entry.name is None: # noinspection PyBroadException try: entry_name = pefile.RESOURCE_TYPE[res_entry.id] except: # pylint: disable-msg=W0702 # unfortunately this code was done before we started to really care about which # exception to catch so, I actually don't really know at this point, would need to try # out :-\ entry_name = "UNKNOWN" else: entry_name = res_entry.name for name_id in res_entry.directory.entries: if name_id.name is None: name_id.name = hex(name_id.id) for language in name_id.directory.entries: try: language_desc = lcid[language.id] except KeyError: language_desc = 'Unknown language' line = [] if res_entry.name is None: line.append(entry_name) else: line.append(res_txt_tag(str(entry_name), TAG_TYPE['PE_RESOURCE_NAME'])) line.append(" " + str(name_id.name) + " ") line.append("0x") # this will add a link to search in AL for the value line.append(res_txt_tag("%04X" % language.id, TAG_TYPE['PE_RESOURCE_LANGUAGE'])) line.append(" (%s)" % language_desc) make_tag(self.file_res, 'PE_RESOURCE_LANGUAGE', language.id, weight='LOW', usage='IDENTIFICATION') # get the size of the resource res_size = language.data.struct.Size line.append(" Size: 0x%x" % res_size) pe_resource_res.add_line(line) except AttributeError: pass # Resources-VersionInfo try: if len(self.pe_file.FileInfo) > 2: pass for file_info in self.pe_file.FileInfo: if file_info.name == "StringFileInfo": if len(file_info.StringTable) > 0: pe_resource_verinfo_res = ResultSection(SCORE['NULL'], "PE: RESOURCES-VersionInfo") self.file_res.add_section(pe_resource_verinfo_res) try: if "LangID" in file_info.StringTable[0].entries: lang_id = file_info.StringTable[0].get("LangID") if not int(lang_id, 16) >> 16 == 0: txt = ('LangId: ' + lang_id + " (" + lcid[ int(lang_id, 16) >> 16] + ")") pe_resource_verinfo_res.add_line(txt) else: txt = ('LangId: ' + lang_id + " (NEUTRAL)") pe_resource_verinfo_res.add_line(txt) except (ValueError, KeyError): txt = ('LangId: %s is invalid' % lang_id) pe_resource_verinfo_res.add_line(txt) for entry in file_info.StringTable[0].entries.items(): txt = ['%s: ' % entry[0]] if entry[0] == 'OriginalFilename': txt.append(res_txt_tag(entry[1], TAG_TYPE['PE_VERSION_INFO_ORIGINAL_FILENAME'])) elif entry[0] == 'FileDescription': txt.append(res_txt_tag(entry[1], TAG_TYPE['PE_VERSION_INFO_FILE_DESCRIPTION'])) else: txt.append(entry[1]) pe_resource_verinfo_res.add_line(txt) except AttributeError: pass # Resources Strings try: BYTE = 1 WORD = 2 DWORD = 4 DS_SETFONT = 0x40 DIALOG_LEAD = DWORD + DWORD + WORD + WORD + WORD + WORD + WORD DIALOG_ITEM_LEAD = DWORD + DWORD + WORD + WORD + WORD + WORD + WORD DIALOGEX_LEAD = WORD + WORD + DWORD + DWORD + DWORD + WORD + WORD + WORD + WORD + WORD DIALOGEX_TRAIL = WORD + WORD + BYTE + BYTE DIALOGEX_ITEM_LEAD = DWORD + DWORD + DWORD + WORD + WORD + WORD + WORD + DWORD DIALOGEX_ITEM_TRAIL = WORD ITEM_TYPES = {0x80: "BUTTON", 0x81: "EDIT", 0x82: "STATIC", 0x83: "LIST BOX", 0x84: "SCROLL BAR", 0x85: "COMBO BOX"} if hasattr(self.pe_file, 'DIRECTORY_ENTRY_RESOURCE'): for dir_type in self.pe_file.DIRECTORY_ENTRY_RESOURCE.entries: if dir_type.name is None: if dir_type.id in pefile.RESOURCE_TYPE: dir_type.name = pefile.RESOURCE_TYPE[dir_type.id] for nameID in dir_type.directory.entries: if nameID.name is None: nameID.name = hex(nameID.id) for language in nameID.directory.entries: strings = [] if str(dir_type.name) == "RT_DIALOG": data_rva = language.data.struct.OffsetToData size = language.data.struct.Size data = self.pe_file.get_memory_mapped_image()[data_rva:data_rva + size] offset = 0 if self.pe_file.get_word_at_rva(data_rva + offset) == 0x1 \ and self.pe_file.get_word_at_rva(data_rva + offset + WORD) == 0xFFFF: # Use Extended Dialog Parsing # Remove leading bytes offset += DIALOGEX_LEAD if data[offset:offset + 2] == "\xFF\xFF": offset += DWORD else: offset += WORD if data[offset:offset + 2] == "\xFF\xFF": offset += DWORD else: offset += WORD # Get window title window_title = self.pe_file.get_string_u_at_rva(data_rva + offset) if len(window_title) != 0: strings.append(("DIALOG_TITLE", window_title)) offset += len(window_title) * 2 + WORD # Remove trailing bytes offset += DIALOGEX_TRAIL offset += len(self.pe_file.get_string_u_at_rva(data_rva + offset)) * 2 + WORD # alignment adjustment if (offset % 4) != 0: offset += WORD while True: if offset >= size: break offset += DIALOGEX_ITEM_LEAD # Get item type if self.pe_file.get_word_at_rva(data_rva + offset) == 0xFFFF: offset += WORD item_type = ITEM_TYPES[self.pe_file.get_word_at_rva(data_rva + offset)] offset += WORD else: item_type = self.pe_file.get_string_u_at_rva(data_rva + offset) offset += len(item_type) * 2 + WORD # Get item text item_text = self.pe_file.get_string_u_at_rva(data_rva + offset) if len(item_text) != 0: strings.append((item_type, item_text)) offset += len(item_text) * 2 + WORD extra_bytes = self.pe_file.get_word_at_rva(data_rva + offset) offset += extra_bytes + DIALOGEX_ITEM_TRAIL # Alignment adjustment if (offset % 4) != 0: offset += WORD else: # TODO: Use Non extended Dialog Parsing # Remove leading bytes style = self.pe_file.get_word_at_rva(data_rva + offset) offset += DIALOG_LEAD if data[offset:offset + 2] == "\xFF\xFF": offset += DWORD else: offset += len(self.pe_file.get_string_u_at_rva(data_rva + offset)) * 2 + WORD if data[offset:offset + 2] == "\xFF\xFF": offset += DWORD else: offset += len(self.pe_file.get_string_u_at_rva(data_rva + offset)) * 2 + WORD # Get window title window_title = self.pe_file.get_string_u_at_rva(data_rva + offset) if len(window_title) != 0: strings.append(("DIALOG_TITLE", window_title)) offset += len(window_title) * 2 + WORD if (style & DS_SETFONT) != 0: offset += WORD offset += len(self.pe_file.get_string_u_at_rva(data_rva + offset)) * 2 + WORD # Alignment adjustment if (offset % 4) != 0: offset += WORD while True: if offset >= size: break offset += DIALOG_ITEM_LEAD # Get item type if self.pe_file.get_word_at_rva(data_rva + offset) == 0xFFFF: offset += WORD item_type = ITEM_TYPES[self.pe_file.get_word_at_rva(data_rva + offset)] offset += WORD else: item_type = self.pe_file.get_string_u_at_rva(data_rva + offset) offset += len(item_type) * 2 + WORD # Get item text if self.pe_file.get_word_at_rva(data_rva + offset) == 0xFFFF: offset += DWORD else: item_text = self.pe_file.get_string_u_at_rva(data_rva + offset) if len(item_text) != 0: strings.append((item_type, item_text)) offset += len(item_text) * 2 + WORD extra_bytes = self.pe_file.get_word_at_rva(data_rva + offset) offset += extra_bytes + WORD # Alignment adjustment if (offset % 4) != 0: offset += WORD elif str(dir_type.name) == "RT_STRING": data_rva = language.data.struct.OffsetToData size = language.data.struct.Size data = self.pe_file.get_memory_mapped_image()[data_rva:data_rva + size] offset = 0 while True: if offset >= size: break ustr_length = self.pe_file.get_word_from_data(data[offset:offset + 2], 0) offset += 2 if ustr_length == 0: continue ustr = self.pe_file.get_string_u_at_rva(data_rva + offset, max_length=ustr_length) offset += ustr_length * 2 strings.append((None, ustr)) if len(strings) > 0: success = False try: comment = "%s (id:%s - lang_id:0x%04X [%s])" % ( str(dir_type.name), str(nameID.name), language.id, lcid[language.id]) except KeyError: comment = "%s (id:%s - lang_id:0x%04X [Unknown language])" % ( str(dir_type.name), str(nameID.name), language.id) res = ResultSection(SCORE['NULL'], "PE: STRINGS - %s" % comment) for idx in xrange(len(strings)): # noinspection PyBroadException try: tag_value = strings[idx][1] # The following line crash chardet if a # UPX packed file as packed the resources... chardet.detect(tag_value) # TODO: Find a better way to do this tag_value = tag_value.replace('\r', ' ').replace('\n', ' ') if strings[idx][0] is not None: res.add_line( [strings[idx][0], ": ", res_txt_tag(tag_value, TAG_TYPE['FILE_STRING'])]) else: res.add_line(res_txt_tag(tag_value, TAG_TYPE['FILE_STRING'])) make_tag(self.file_res, 'FILE_STRING', tag_value, weight='NULL', usage='IDENTIFICATION') success = True except: pass if success: self.file_res.add_section(res) else: pass except AttributeError, e: self.log.debug("\t Error parsing output: " + repr(e))
def dump_rule_file(rule_list, fake_dependencies=False, show_header=True): if show_header: out = ["//\t%s rule(s)" % len(rule_list), "", ""] else: out = [] modules = list( set([m for rule in rule_list for m in rule.get('modules', [])])) for m in modules: out.append('import "%s"' % m) out.append("") if fake_dependencies: depends = list( set([d for rule in rule_list for d in rule.get('depends', [])])) for d in depends: out.append(YaraParser.FAKE_RULE % d) out.append("") for rule in rule_list: if rule is None: continue out.append("%s %s%s {" % (rule['type'], rule['name'], { True: ": %s" % " ".join(rule['tags']), False: "" }[len(rule["tags"]) > 0])) # Do comments for c in rule['comments']: out.append(" // %s" % c) # Do meta. Try to preserve ordering if rule['meta']: out.append(" meta:") keys = rule['meta'].keys() if "rule_group" in keys: out.append(' rule_group = "%s"' % rule['meta']['rule_group']) keys.remove('rule_group') if rule['meta']['rule_group'] in keys: out.append(' %s = "%s"' % (rule['meta']['rule_group'], rule['meta'][rule['meta']['rule_group']])) keys.remove(rule['meta']['rule_group']) for x in YaraParser.RULE_GROUPS: if x in keys: out.append(' %s = "%s"' % (x, rule['meta'][x])) keys.remove(x) out.append(" ") do_space = False for i in YaraParser.RULE_IMPORTANT: if i in keys: do_space = True out.append(' %s = "%s"' % (i, rule['meta'][i])) keys.remove(i) if do_space: out.append(" ") keys.sort() for k in keys: out.append(' %s = "%s"' % (k, rule['meta'][k])) out.append(" ") # Do Strings if rule['strings']: if len(set(rule['strings'])) > 1 or rule['strings'][0] != "": out.append(" strings:") for s in rule['strings']: out.append(' %s' % s) out.append(" ") # Do conditions if rule['condition']: out.append(" condition:") for c in rule['condition']: out.append(' %s' % c) out.append(" ") out.extend(["}", "", ""]) return safe_str("\n".join(out))
def parse_rule_file(self, data, debug=False, force_safe_str=False): out = [] for line in data.splitlines(): if self.in_rule and debug: print line line = line.strip() if line.startswith("/*"): self.in_comment = True if self.in_comment: if "*/" in line: self.in_comment = False # There might be data after the inline comment line = line.split('*/', 1)[1] if not line: continue else: continue if not line.startswith( "//" ) and not self.in_meta and not self.in_strings and not self.in_condition: prev_bracket = self.open_bracket self.open_bracket += line.count("{") if prev_bracket == 0 and self.open_bracket == 1: self.got_open = True # Conditions may be on the same line as the curly bracket temp_line = line.strip('{').strip() if temp_line.startswith("meta"): self._switch_to("meta") elif temp_line.startswith("strings"): self._switch_to("strings") elif temp_line.startswith("condition"): self._switch_to("condition") if self.in_rule and not self.in_meta and not self.in_condition and not self.in_strings \ and line.startswith("//"): line_data = line[2:].strip() if force_safe_str: line_data = safe_str(line_data) self.cur_rule['comments'].append(line_data) if line.startswith("rule "): self.cur_rule['type'] = "rule" self.in_rule = True elif line.startswith("private rule "): self.cur_rule['type'] = "private rule" self.in_rule = True elif line.startswith("global rule "): self.cur_rule['type'] = "global rule" self.in_rule = True elif line.startswith("global private rule "): self.cur_rule['type'] = "global private rule" self.in_rule = True if line.startswith("rule ") or line.startswith( "private rule ") or line.startswith( "global private rule "): if debug: print line line = line[len(self.cur_rule['type']) + 1:].split("//")[0] self.cur_rule['tags'] = [] if ":" in line: self.cur_rule['name'], tags = line.split(':') self.cur_rule['name'] = self.cur_rule['name'].strip() tags = tags.split("{")[0].strip().split(" ") for t in tags: if force_safe_str: t = safe_str(t) self.cur_rule['tags'].append(t) else: self.cur_rule['name'] = line.split("{")[0].strip() if force_safe_str: self.cur_rule['name'] = safe_str(self.cur_rule['name']) if line.startswith("meta"): self._switch_to("meta") elif line.startswith("strings"): self._switch_to("strings") elif line.startswith("condition"): self._switch_to("condition") elif not line.startswith("}"): if self.in_meta and "=" in line: key, val = line.split("=", 1) key = key.strip() val = val.strip().strip('"') if force_safe_str: val = safe_str(val) self.cur_rule['meta'][key] = val elif self.in_strings and line != "": if force_safe_str: line = safe_str(line) self.cur_rule['strings'].append(line) elif self.in_condition and line != "": if force_safe_str: line = safe_str(line) self.cur_rule["condition"].append(line) if not line.startswith( "//") and not self.in_meta and not self.in_strings: self.open_bracket -= line.count("}") if self.got_open and self.open_bracket == 0: if debug: pprint.pprint(self.cur_rule) print "" print "" yara_version = self.cur_rule.get('meta', {}).get( 'yara_version', "3.6") modules = self.YARA_MODULES.get(yara_version, []) self.cur_rule['depends'], self.cur_rule['modules'] = \ self.parse_dependencies(self.cur_rule['condition'], modules) out.append(self.cur_rule) self._reset() return out
def execute(self, request): if request.task.depth > 3: self.log.debug( "Cuckoo is exiting because it currently does not execute on great great grand children." ) request.set_save_result(False) return self.session = requests.Session() self.task = request.task request.result = Result() self.file_res = request.result file_content = request.get() self.cuckoo_task = None self.al_report = None self.file_name = os.path.basename(request.path) full_memdump = False pull_memdump = False # Check the file extension original_ext = self.file_name.rsplit('.', 1) tag_extension = tag_to_extension.get(self.task.tag) # NOTE: Cuckoo still tries to identify files itself, so we only force the extension/package if the user # specifies one. However, we go through the trouble of renaming the file because the only way to have # certain modules run is to use the appropriate suffix (.jar, .vbs, etc.) # Check for a valid tag if tag_extension is not None and 'unknown' not in self.task.tag: file_ext = tag_extension # Check if the file was submitted with an extension elif len(original_ext) == 2: submitted_ext = original_ext[1] if submitted_ext not in SUPPORTED_EXTENSIONS: # This is the case where the submitted file was NOT identified, and the provided extension # isn't in the list of extensions that we explicitly support. self.log.debug( "Cuckoo is exiting because it doesn't support the provided file type." ) request.set_save_result(False) return else: # This is a usable extension. It might not run (if the submitter has lied to us). file_ext = '.' + submitted_ext else: # This is unknown without an extension that we accept/recognize.. no scan! self.log.debug( "Cuckoo is exiting because the file type could not be identified. %s %s" % (tag_extension, self.task.tag)) return # Rename based on the found extension. if file_ext and self.task.sha256: self.file_name = self.task.sha256 + file_ext # Parse user-specified options kwargs = dict() task_options = [] analysis_timeout = request.get_param('analysis_timeout') generate_report = request.get_param('generate_report') if generate_report is True: self.log.debug("Setting generate_report flag.") dump_processes = request.get_param('dump_processes') if dump_processes is True: self.log.debug("Setting procmemdump flag in task options") task_options.append('procmemdump=yes') dll_function = request.get_param('dll_function') if dll_function: task_options.append('function={}'.format(dll_function)) arguments = request.get_param('arguments') if arguments: task_options.append('arguments={}'.format(arguments)) # Parse extra options (these aren't user selectable because they are dangerous/slow) if request.get_param('pull_memory') and request.task.depth == 0: pull_memdump = True if request.get_param('dump_memory') and request.task.depth == 0: # Full system dump and volatility scan full_memdump = True kwargs['memory'] = True if request.get_param('no_monitor'): task_options.append("free=yes") routing = request.get_param('routing') if routing is None: routing = self.enabled_routes[0] select_machine = self.find_machine(self.task.tag, routing) if select_machine is None: # No matching VM and no default self.log.debug( "No Cuckoo vm matches tag %s and no machine is tagged as default." % select_machine) request.set_save_result(False) return kwargs['timeout'] = analysis_timeout kwargs['options'] = ','.join(task_options) if select_machine: kwargs['machine'] = select_machine self.cuckoo_task = CuckooTask(self.file_name, **kwargs) if self.restart_interval <= 0 or not self.is_cuckoo_ready(): cuckoo_up = self.trigger_cuckoo_reset() if not cuckoo_up: self.session.close() raise RecoverableError( "While restarting Cuckoo, Cuckoo never came back up.") else: self.restart_interval -= 1 try: self.cuckoo_submit(file_content) if self.cuckoo_task.report: try: machine_name = None report_info = self.cuckoo_task.report.get('info', {}) machine = report_info.get('machine', {}) if isinstance(machine, dict): machine_name = machine.get('name') if machine_name is None: self.log.debug( 'Unable to retrieve machine name from result.') guest_ip = "" else: guest_ip = self.report_machine_info(machine_name) self.log.debug( "Generating AL Result from Cuckoo results..") success = generate_al_result(self.cuckoo_task.report, self.file_res, file_ext, guest_ip, self.SERVICE_CLASSIFICATION) if success is False: err_str = self.get_errors() if "Machinery error: Unable to restore snapshot" in err_str: raise RecoverableError( "Cuckoo is restarting container: %s", err_str) raise CuckooProcessingException( "Cuckoo was unable to process this file. %s", err_str) except RecoverableError: self.trigger_cuckoo_reset(5) raise except Exception as e: # This is non-recoverable unless we were stopped during processing self.trigger_cuckoo_reset(1) if self.should_run: self.log.exception("Error generating AL report: ") raise CuckooProcessingException( "Unable to generate cuckoo al report for task %s: %s" % (safe_str(self.cuckoo_task.id), safe_str(e))) if self.check_stop(): raise RecoverableError( "Cuckoo stopped during result processing..") if generate_report is True: self.log.debug("Generating cuckoo report tar.gz.") # Submit cuckoo analysis report archive as a supplementary file tar_report = self.cuckoo_query_report(self.cuckoo_task.id, fmt='all', params={'tar': 'gz'}) if tar_report is not None: tar_report_path = os.path.join(self.working_directory, "cuckoo_report.tar.gz") try: report_file = open(tar_report_path, 'w') report_file.write(tar_report) report_file.close() self.task.add_supplementary( tar_report_path, "Cuckoo Sandbox analysis report archive (tar.gz)" ) except: self.log.exception( "Unable to add tar of complete report for task %s" % self.cuckoo_task.id) self.log.debug("Checking for dropped files and pcap.") # Submit dropped files and pcap if available: self.check_dropped(request, self.cuckoo_task.id) self.check_pcap(self.cuckoo_task.id) # Check process memory dumps if dump_processes is True: self.download_memdump('procmemdump') # We only retrieve full memory dumps for top-level files, and only if it was specified in # extra options. if full_memdump and pull_memdump: self.download_memdump('fullmemdump') else: # We didn't get a report back.. cuckoo has failed us if self.should_run: self.trigger_cuckoo_reset(5) self.log.info("Raising recoverable error for running job.") raise RecoverableError( "Unable to retrieve cuckoo report. The following errors were detected: %s" % safe_str(self.cuckoo_task.errors)) except Exception as e: # Delete the task now.. self.log.info('General exception caught during processing: %s' % e) if self.cuckoo_task and self.cuckoo_task.id is not None: self.cuckoo_delete_task(self.cuckoo_task.id) self.session.close() # Send the exception off to ServiceBase raise # Delete and exit if self.cuckoo_task and self.cuckoo_task.id is not None: self.cuckoo_delete_task(self.cuckoo_task.id) self.session.close()