def execute(self, request): parser = eml_parser.eml_parser.EmlParser(include_raw_body=True, include_attachment_data=True) # Validate URLs in sample, strip out [] if found content_str = request.file_contents.decode(errors="ignore") content_str, retry = self.validate_urls(content_str) while retry: content_str, retry = self.validate_urls(content_str) parsed_eml = parser.decode_email_bytes(content_str.encode()) result = Result() header = parsed_eml['header'] if "from" in header: all_uri = set() for body_counter, body in enumerate(parsed_eml['body']): if request.get_param('extract_body_text'): fd, path = mkstemp() with open(path, 'w') as f: f.write(body['content']) os.close(fd) request.add_extracted(path, "body_" + str(body_counter), "Body text") if "uri" in body: for uri in body['uri']: all_uri.add(uri) kv_section = ResultSection('Email Headers', body_format=BODY_FORMAT.KEY_VALUE, parent=result) # Basic tags kv_section.add_tag("network.email.address", header['from'].strip()) for to in header['to']: kv_section.add_tag("network.email.address", to) kv_section.add_tag("network.email.date", str(header['date']).strip()) kv_section.add_tag("network.email.subject", header['subject'].strip()) # Add CCs to body and tags if 'cc' in header: for to in header['to']: kv_section.add_tag("network.email.address", to.strip()) # Add Message ID to body and tags if 'message-id' in header['header']: kv_section.add_tag("network.email.msg_id", header['header']['message-id'][0].strip()) # Add Tags for received IPs if 'received_ip' in header: for ip in header['received_ip']: kv_section.add_tag('network.static.ip', ip.strip()) # Add Tags for received Domains if 'received_domain' in header: for dom in header['received_domain']: kv_section.add_tag('network.static.domain', dom.strip()) # If we've found URIs, add them to a section if len(all_uri) > 0: uri_section = ResultSection('URIs Found:', parent=result) for uri in all_uri: uri_section.add_line(uri) uri_section.add_tag('network.static.uri', uri.strip()) parsed_url = urlparse(uri) if parsed_url.hostname and re.match( IP_ONLY_REGEX, parsed_url.hostname): uri_section.add_tag('network.static.ip', parsed_url.hostname) else: uri_section.add_tag('network.static.domain', parsed_url.hostname) # Bring all headers together... extra_header = header.pop('header', {}) header.pop('received', None) header.update(extra_header) kv_section.body = json.dumps(header, default=self.json_serial) if "attachment" in parsed_eml: for attachment in parsed_eml['attachment']: fd, path = mkstemp() with open(path, 'wb') as f: f.write(base64.b64decode(attachment['raw'])) os.close(fd) request.add_extracted(path, attachment['filename'], "Attachment ") ResultSection('Extracted Attachments:', body="\n".join([ x['filename'] for x in parsed_eml['attachment'] ]), parent=result) if request.get_param('save_emlparser_output'): fd, temp_path = tempfile.mkstemp(dir=self.working_directory) with os.fdopen(fd, "w") as myfile: myfile.write( json.dumps(parsed_eml, default=self.json_serial)) request.add_supplementary( temp_path, "parsing.json", "These are the raw results of running GOVCERT-LU's eml_parser" ) else: text_section = ResultSection('EML parsing results') text_section.add_line("Could not parse EML") result.add_section(text_section) request.result = result
def dump_property(self, field, path, index, res, parent_res, is_orphan): if field['name'].value != '': name = field['name'].display[1:-1] p_type = field['type'].value if path[-1:] == '\\': abs_name = f"{path}{name}" else: abs_name = f"{path}\\{name}" prop_res = ResultSection(f"Property: {abs_name}", body_format=BODY_FORMAT.KEY_VALUE, body={}) # if type is not: 1- storage, 2- stream an not 5- root, that is weird. if p_type != 1 and p_type != 2 and p_type != 5: self.invalid_properties_count += 1 # for properties not storage (which should be seen like a folder) if p_type != 1: size = field['size'].value else: size = 0 address = 0 if size > 0: if field['size'].value < self.ole2parser[ 'header/threshold'].value and index != '0': # we first get the offset from the short block but then we need # to map it back to the file, which is from root[X]. offset = field['start'].value * self.ole2parser.ss_size keep_looping = True root_index = 0 while keep_looping: try: current_root = self.ole2parser[ f"root[{root_index}]"] if offset == 0 or current_root.size > offset: address = current_root.address + offset keep_looping = False else: offset -= current_root.size root_index += 1 except MissingField: keep_looping = False address = None if not is_orphan: self.invalid_streams.append( field['name'].display) else: address = HEADER_SIZE + field[ 'start'].value * self.ole2parser.sector_size else: address = 0 if address >= 0: prop_res.body['property_meta'] = \ f"offset: {hex(address // 8)} size: {hex(size)} / {field['type'].display} / " \ f"{field['decorator'].display} / id={index} left={field['left'].display} " \ f"right={field['right'].display} child={field['child'].display}" else: prop_res.body['property_meta'] = \ f"offset: could not map.. size: {hex(size)} / {field['type'].display} / " \ f"{field['decorator'].display} / id={index} left={field['left'].display} " \ f"right={field['right'].display} child={field['child'].display}" # for root or storage if p_type == 5 or p_type == 1: if field[ 'clsid'].display != "Null GUID: 00000000-0000-0000-0000-000000000000": clsid_desc = self.GUID_DESC.get(field['clsid'].display, "unknown clsid") prop_res.body[ "clsid"] = f"{field['clsid'].display} ({clsid_desc})" prop_res.add_tag('file.ole.clsid', field['clsid'].display) if field['creation'].display != "1601-01-01 00:00:00": prop_res.body["creation_date"] = field['creation'].display prop_res.add_tag('file.date.creation', field['creation'].display) if field['lastmod'].display != "1601-01-01 00:00:00": prop_res.body["last_modified_date"] = field[ 'lastmod'].display prop_res.add_tag('file.date.last_modified', field['lastmod'].display) # fixes up a bug: if name == '\\1CompObj': if p_type != 2: res_error = ResultSection( f"\\1CompObj type is '{p_type}' and it should be 2 (stream) " f"... really suspicious.") res_error.set_heuristic(41) prop_res.add_subsection(res_error) size = field['size'].value # Apparently, we can get to this point and have office_root_entry_parser set to None. # Not sure what we should do about that but trying to use that member variable seems # like a bad idea... if self.office_root_entry_parser is not None: temp_field = None for f in self.office_root_entry_parser.createFields(): if f.name.startswith('compobj'): temp_field = f # cache all the sub-fields.... for _ in temp_field: pass self.parse_field(temp_field, prop_res, self.PARSING_MODE_DISPLAY, parent_res) if size > 0 and index != '0': field_with_other_parser = self.additional_parsing_fields.get( address, None) if field_with_other_parser: # noinspection PyTypeChecker self.parse_field(field_with_other_parser, prop_res, self.PARSING_MODE_DISPLAY, parent_res) if len(prop_res.body) > 1: prop_res.body = json.dumps(prop_res.body) res.add_subsection(prop_res)