def parse_attachment(message_part): content_disposition = message_part.get("Content-Disposition", None) if content_disposition: dispositions = content_disposition.strip().split(";") if bool(content_disposition and dispositions[0].lower() == "attachment"): file_data = message_part.get_payload(decode=True) attachment = StringIO(file_data) attachment.content_type = message_part.get_content_type() attachment.size = len(file_data) attachment.name = None attachment.create_date = None attachment.mod_date = None attachment.read_date = None for param in dispositions[1:]: name,value = param.split("=") name = name.lower() if name == "filename": attachment.name = value elif name == "create-date": attachment.create_date = value #TODO: datetime elif name == "modification-date": attachment.mod_date = value #TODO: datetime elif name == "read-date": attachment.read_date = value #TODO: datetime return attachment return None
def parse_attachment(self, message_part): content_disposition = message_part.get("Content-Disposition", None) if content_disposition: dispo_type, dispo_dict = self.parse_dispositions(content_disposition) if dispo_type == "attachment" or (dispo_type == 'inline' and 'filename' in dispo_dict): file_data = message_part.get_payload(decode=True) if file_data is None: file_data = "" attachment = StringIO(file_data) attachment.content_type = message_part.get_content_type() attachment.size = len(file_data) attachment.name = None attachment.create_date = None attachment.mod_date = None attachment.read_date = None if "filename" in dispo_dict: attachment.name = dispo_dict['filename'] else: content_type = message_part.get("Content-Type", None) if content_type: _, content_dict = self.parse_dispositions(content_type) if 'name' in content_dict: attachment.name = content_dict['name'] if "create-date" in dispo_dict: attachment.create_date = dispo_dict['create-date'] # TODO: datetime if "modification-date" in dispo_dict: attachment.mod_date = dispo_dict['modification-date'] # TODO: datetime if "read-date" in dispo_dict: attachment.read_date = dispo_dict['read-date'] # TODO: datetime return attachment return None
def parse_attachments_poptres(self, content_disposition, part): dispositions = content_disposition.strip().split(";") if bool(content_disposition and dispositions[0].lower() == "attachment"): file_data = part.get_payload(decode=True) attachment = StringIO(file_data) attachment.content_type = part.get_content_type() attachment.size = len(file_data) attachment.name = None attachment.create_date = None attachment.mod_date = None attachment.read_date = None for param in dispositions[1:]: name, value = param.split("=") name = name.lower().strip() value = value.replace('"', '').strip() if name == "filename": attachment.name = value elif name == "create-date": attachment.create_date = value elif name == "modification-date": attachment.mod_date = value elif name == "read-date": attachment.read_date = value attachment.seek(0, 2) f = InMemoryUploadedFile(attachment, "", attachment.name, attachment.content_type, attachment.tell(), None) atch = Attachment() atch.user = self.usr atch.file.save(attachment.name, f) atch.save() return atch
def parse_attachment(message_part): content_disposition = message_part.get("Content-Disposition", None) if content_disposition: dispositions = content_disposition.strip().split(";") if bool(content_disposition and dispositions[0].lower() == "attachment"): file_data = message_part.get_payload(decode=True) # Used a StringIO object since PIL didn't seem to recognize # images using a custom file-like object attachment = StringIO(file_data) attachment.content_type = message_part.get_content_type() attachment.size = len(file_data) attachment.name = None attachment.create_date = None attachment.mod_date = None attachment.read_date = None # print dispositions for param in dispositions[1:]: name, value = param.split("=") name = name.strip().lower() if name == "filename": attachment.name = value elif name in ["create-date", "creation-date"]: attachment.create_date = value #TODO: datetime elif name == "modification-date": attachment.mod_date = value #TODO: datetime elif name == "read-date": attachment.read_date = value #TODO: datetime return attachment return None
def parse_attachment(self, message_part): content_disposition = message_part.get("Content-Disposition", None) if content_disposition: dispo_type, dispo_dict = self.parse_dispositions( content_disposition) if dispo_type == "attachment" or (dispo_type == 'inline' and 'filename' in dispo_dict): file_data = message_part.get_payload(decode=True) if file_data is None: file_data = "" attachment = StringIO(file_data) attachment.content_type = message_part.get_content_type() attachment.size = len(file_data) attachment.name = None attachment.create_date = None attachment.mod_date = None attachment.read_date = None if "filename" in dispo_dict: attachment.name = self.parse_header_field( dispo_dict['filename']) elif "create-date" in dispo_dict: attachment.create_date = dispo_dict[ 'create-date'] # TODO: datetime elif "modification-date" in dispo_dict: attachment.mod_date = dispo_dict[ 'modification-date'] # TODO: datetime elif "read-date" in dispo_dict: attachment.read_date = dispo_dict[ 'read-date'] # TODO: datetime return attachment return None
def parse_attachment(self, message_part): content_disposition = message_part.get("Content-Disposition", None) if content_disposition: dispositions = content_disposition.strip().split(";") if bool(content_disposition and dispositions[0].lower() == "attachment"): file_data = message_part.get_payload(decode=True) attachment = StringIO() attachment.write(file_data) attachment.content_type = message_part.get_content_type() attachment.size = len(file_data) attachment.name = None attachment.create_date = None attachment.mod_date = None attachment.read_date = None for param in dispositions[1:]: name,value = param.split("=") name = name.lower() attachment.name = value.replace('"', '') return attachment return None
def parse_attachment(message_part): content_disposition = message_part.get("Content-Disposition", None) if content_disposition: #print content_disposition dispositions = content_disposition.strip().split(";") if bool(content_disposition and dispositions[0].lower() == "attachment") or bool( content_disposition and dispositions[0].lower() == "inline"): file_data = message_part.get_payload(decode=True) # Used a StringIO object since PIL didn't seem to recognize # images using a custom file-like object attachment = StringIO(file_data) attachment.content_type = message_part.get_content_type() attachment.size = len(file_data) attachment.name = None attachment.create_date = None attachment.mod_date = None attachment.read_date = None for param in dispositions[1:]: param = param.strip() name, value = param.split("=", 1) name = name.lower() if name == "filename": h = email.Header.Header(value.lstrip('"').rstrip('"')) dh = email.Header.decode_header(h) fname = dh[0][0] if dh[0][1] != None: fname = fname.decode(dh[0][1]) attachment.name = fname #print fname elif name == "create-date": attachment.create_date = value #TODO: datetime elif name == "modification-date": attachment.mod_date = value #TODO: datetime elif name == "read-date": attachment.read_date = value #TODO: datetime return attachment return None
def parse_attachment(message_part, attachments=None): content_disposition = message_part.get("Content-Disposition", None) if content_disposition: try: cd = parse_headers(content_disposition, relaxed=True) if cd.disposition.lower() == "attachment": if not cd.assocs.has_key("filename"): #print error or warning? return None else: file_data = message_part.get_payload(decode=True) if not file_data: payload = message_part.get_payload() if isinstance(payload, list): for msgobj in payload: parse2(msgobj, attachments) return None print >>sys.stderr, message_part.get_payload() print >>sys.stderr, message_part.get_content_charset() attachment = StringIO(file_data) attachment.content_type = message_part.get_content_type() attachment.size = len(file_data) attachment.name = cd.assocs['filename'] attachment.create_date = None attachment.mod_date = None attachment.read_date = None for name, value in cd.assocs.iteritems(): if name == "create-date": attachment.create_date = value #TODO: datetime elif name == "modification-date": attachment.mod_date = value #TODO: datetime elif name == "read-date": attachment.read_date = value #TODO: datetime return attachment except: print >>sys.stderr, "content_disposition:", content_disposition raise return None
def parse_attachment(message_part, attachments=None): content_disposition = message_part.get("Content-Disposition", None) if content_disposition: try: cd = parse_headers(content_disposition, relaxed=True) if cd.disposition.lower() == "attachment": if not "filename" in cd.assocs: #print error or warning? return None else: file_data = message_part.get_payload(decode=True) if not file_data: payload = message_part.get_payload() if isinstance(payload, list): for msgobj in payload: _parse2(msgobj, attachments) return None # PSIPHON: fixed conditional return attachment = StringIO(file_data) attachment.content_type = message_part.get_content_type() attachment.size = len(file_data) attachment.name = cd.assocs['filename'] attachment.create_date = None attachment.mod_date = None attachment.read_date = None for name, value in cd.assocs.iteritems(): if name == "create-date": attachment.create_date = value # TODO: datetime elif name == "modification-date": attachment.mod_date = value # TODO: datetime elif name == "read-date": attachment.read_date = value # TODO: datetime return attachment except: print >> sys.stderr, "content_disposition:", content_disposition raise return None
def parse_attachment(message_part, attachments=None): """ Extract the attachment and metadata about it from the message. Returns the content, content type, size, and create/modification/read dates for the attachment. """ params = message_part.get_params(None, 'Content-Disposition') if params: # If a 'part' has a Content-Disposition, we assume it is an attachment try: params = dict(params) print('\tContent-Disposition (for following email)', params) if 'attachment' in params: # Not sure what's going on here # Why get payload with decode, then try again and reparse? # See details at # http://docs.python.org/2/library/email.message.html#email.message.Message.get_payload file_data = message_part.get_payload(decode=True) if not file_data: payload = message_part.get_payload() if isinstance(payload, list): for msgobj in payload: # TODO not sure this actually does anything parse2(msgobj, attachments) return None print(message_part.get_payload(), file=sys.stderr) print(message_part.get_content_charset(), file=sys.stderr) attachment = StringIO(file_data) attachment.content_type = message_part.get_content_type() attachment.size = params.get('size', len(file_data)) attachment.create_date = params.get('create-date') attachment.mod_date = params.get('modification-date') attachment.read_date = params.get('read-date') # TODO convert dates to datetime filename = message_part.get_filename(None) if filename: # Filenames may be encoded with =?encoding?... # If so, convert to unicode name, encoding = email.header.decode_header(filename)[0] if encoding: print( '\t{filename} encoded with {encoding}, converting to unicode' .format(filename=filename, encoding=encoding)) filename = name.decode(encoding) else: # filename not in Content-Disposition print( """Warning, no filename found in: [{%s}%s] Content-Disposition: %s or Content-Type""" % (sharedVariablesAcrossModules.sourceFileUUID, sharedVariablesAcrossModules.sourceFilePath, params), file=sys.stderr) filename = six.text_type(uuid.uuid4()) print("Attempting extraction with random filename: %s" % (filename), file=sys.stderr) # Remove newlines from filename because that breaks everything filename = filename.replace("\r", "").replace("\n", "") attachment.name = filename return attachment except Exception as inst: print(type(inst), file=sys.stderr) print(inst.args, file=sys.stderr) print("Error parsing: file: {%s}%s" % (sharedVariablesAcrossModules.sourceFileUUID, sharedVariablesAcrossModules.sourceFilePath), file=sys.stderr) print("Error parsing: Content-Disposition: ", params, file=sys.stderr) print(file=sys.stderr) sharedVariablesAcrossModules.errorCounter += 1 return None
def parse_attachment(message_part, attachments=None): content_disposition = message_part.get("Content-Disposition", None) if content_disposition: try: try: content_disposition = tweakContentDisposition(content_disposition) dispositions = content_disposition.strip().split(";", 1) except Exception as inst: print type(inst) print inst.args print >>sys.stderr, "Error parsing file: {%s}%s" % (sharedVariablesAcrossModules.sourceFileUUID, sharedVariablesAcrossModules.sourceFilePath) print >>sys.stderr, "Error parsing the content_disposition:", content_disposition if "attachment" in content_disposition.lower() and "filename" in content_disposition.lower(): try: filename = uuid.uuid4().__str__() print >>sys.stderr, "Attempting extraction with random filename: %s" % (filename) print >>sys.stderr content_disposition = "attachment; filename=%s;" % (filename) dispositions = content_disposition.strip().split(";") except Exception as inst: print >>sys.stderr, type(inst) print >>sys.stderr, inst.args print >>sys.stderr, "Failed" print >>sys.stderr return None else: print >>sys.stderr return None if content_disposition and dispositions[0].lower() == "attachment": file_data = message_part.get_payload(decode=True) if not file_data: payload = message_part.get_payload() if isinstance(payload, list): for msgobj in payload: parse2(msgobj, attachments) return None print >>sys.stderr, message_part.get_payload() print >>sys.stderr, message_part.get_content_charset() attachment = StringIO(file_data) attachment.content_type = message_part.get_content_type() attachment.size = len(file_data) attachment.create_date = None attachment.mod_date = None attachment.read_date = None attachment.name = "" for param in dispositions[1:]: name,value = param.split("=", 1) name = name.lower().strip() if name == "filename": attachment.name = urllib2.unquote(value.strip()).strip('"') if name == "filename*": attachment.name = urllib2.unquote(value.strip()) try: enc, name = attachment.name.split("''", 1) attachment.name = name.decode(enc) except Exception as inst: print >>sys.stderr, type(inst) print >>sys.stderr, inst.args pass elif name == "create-date": attachment.create_date = value #TODO: datetime elif name == "modification-date": attachment.mod_date = value #TODO: datetime elif name == "read-date": attachment.read_date = value #TODO: datetime if not attachment.name: print >>sys.stderr, """Warning, no filename found in: [{%s}%s]%s""" % (sharedVariablesAcrossModules.sourceFileUUID, sharedVariablesAcrossModules.sourceFilePath, content_disposition) filename = uuid.uuid4().__str__() print >>sys.stderr, "Attempting extraction with random filename: %s" % (filename) print >>sys.stderr return attachment except Exception as inst: print >>sys.stderr, type(inst) print >>sys.stderr, inst.args print >>sys.stderr, "Error parsing file: {%s}%s" % (sharedVariablesAcrossModules.sourceFileUUID, sharedVariablesAcrossModules.sourceFilePath) print >>sys.stderr, "Error parsing:", dispositions print >>sys.stderr sharedVariablesAcrossModules.errorCounter += 1 return None