Example #1
0
def parse_attachment(message_part):
    content_disposition = message_part.get("Content-Disposition", None)
    if content_disposition:
        dispositions = content_disposition.strip().split(";")
        if bool(content_disposition and dispositions[0].lower() == "attachment"):

            file_data = message_part.get_payload(decode=True)
            attachment = StringIO(file_data)
            attachment.content_type = message_part.get_content_type()
            attachment.size = len(file_data)
            attachment.name = None
            attachment.create_date = None
            attachment.mod_date = None
            attachment.read_date = None

            for param in dispositions[1:]:
                name,value = param.split("=")
                name = name.lower()

                if name == "filename":
                    attachment.name = value
                elif name == "create-date":
                    attachment.create_date = value  #TODO: datetime
                elif name == "modification-date":
                    attachment.mod_date = value #TODO: datetime
                elif name == "read-date":
                    attachment.read_date = value #TODO: datetime
            return attachment

    return None
Example #2
0
 def parse_attachment(self, message_part):
     content_disposition = message_part.get("Content-Disposition", None)
     if content_disposition:
         dispo_type, dispo_dict = self.parse_dispositions(content_disposition)
         if dispo_type == "attachment" or (dispo_type == 'inline' and
                 'filename' in dispo_dict):
             file_data = message_part.get_payload(decode=True)
             if file_data is None:
                 file_data = ""
             attachment = StringIO(file_data)
             attachment.content_type = message_part.get_content_type()
             attachment.size = len(file_data)
             attachment.name = None
             attachment.create_date = None
             attachment.mod_date = None
             attachment.read_date = None
             if "filename" in dispo_dict:
                 attachment.name = dispo_dict['filename']
             else:
                 content_type = message_part.get("Content-Type", None)
                 if content_type:
                     _, content_dict = self.parse_dispositions(content_type)
                     if 'name' in content_dict:
                         attachment.name = content_dict['name']
             if "create-date" in dispo_dict:
                 attachment.create_date = dispo_dict['create-date']  # TODO: datetime
             if "modification-date" in dispo_dict:
                 attachment.mod_date = dispo_dict['modification-date']  # TODO: datetime
             if "read-date" in dispo_dict:
                 attachment.read_date = dispo_dict['read-date']  # TODO: datetime
             return attachment
     return None
Example #3
0
    def parse_attachments_poptres(self, content_disposition, part):
        dispositions = content_disposition.strip().split(";")
        if bool(content_disposition and dispositions[0].lower() == "attachment"):

            file_data = part.get_payload(decode=True)
            attachment = StringIO(file_data)
            attachment.content_type = part.get_content_type()
            attachment.size = len(file_data)
            attachment.name = None
            attachment.create_date = None
            attachment.mod_date = None
            attachment.read_date = None

            for param in dispositions[1:]:
                name, value = param.split("=")
                name = name.lower().strip()
                value = value.replace('"', '').strip()

                if name == "filename":
                    attachment.name = value
                elif name == "create-date":
                    attachment.create_date = value
                elif name == "modification-date":
                    attachment.mod_date = value
                elif name == "read-date":
                    attachment.read_date = value

            attachment.seek(0, 2)
            f = InMemoryUploadedFile(attachment, "", attachment.name, attachment.content_type, attachment.tell(), None)

            atch = Attachment()
            atch.user = self.usr
            atch.file.save(attachment.name, f)
            atch.save()
            return atch
Example #4
0
    def parse_attachments_poptres(self, content_disposition, part):
        dispositions = content_disposition.strip().split(";")
        if bool(content_disposition and dispositions[0].lower() == "attachment"):

            file_data = part.get_payload(decode=True)
            attachment = StringIO(file_data)
            attachment.content_type = part.get_content_type()
            attachment.size = len(file_data)
            attachment.name = None
            attachment.create_date = None
            attachment.mod_date = None
            attachment.read_date = None

            for param in dispositions[1:]:
                name, value = param.split("=")
                name = name.lower().strip()
                value = value.replace('"', '').strip()

                if name == "filename":
                    attachment.name = value
                elif name == "create-date":
                    attachment.create_date = value
                elif name == "modification-date":
                    attachment.mod_date = value
                elif name == "read-date":
                    attachment.read_date = value

            attachment.seek(0, 2)
            f = InMemoryUploadedFile(attachment, "", attachment.name, attachment.content_type, attachment.tell(), None)

            atch = Attachment()
            atch.user = self.usr
            atch.file.save(attachment.name, f)
            atch.save()
            return atch
Example #5
0
def parse_attachment(message_part):
    content_disposition = message_part.get("Content-Disposition", None)
    if content_disposition:
        dispositions = content_disposition.strip().split(";")
        if bool(content_disposition
                and dispositions[0].lower() == "attachment"):

            file_data = message_part.get_payload(decode=True)
            # Used a StringIO object since PIL didn't seem to recognize
            # images using a custom file-like object
            attachment = StringIO(file_data)
            attachment.content_type = message_part.get_content_type()
            attachment.size = len(file_data)
            attachment.name = None
            attachment.create_date = None
            attachment.mod_date = None
            attachment.read_date = None
            # print dispositions
            for param in dispositions[1:]:
                name, value = param.split("=")
                name = name.strip().lower()

                if name == "filename":
                    attachment.name = value
                elif name in ["create-date", "creation-date"]:
                    attachment.create_date = value  #TODO: datetime
                elif name == "modification-date":
                    attachment.mod_date = value  #TODO: datetime
                elif name == "read-date":
                    attachment.read_date = value  #TODO: datetime
            return attachment

    return None
Example #6
0
    def parse_attachment(self, message_part):
        content_disposition = message_part.get("Content-Disposition", None)
        if content_disposition:
            dispo_type, dispo_dict = self.parse_dispositions(
                content_disposition)
            if dispo_type == "attachment" or (dispo_type == 'inline'
                                              and 'filename' in dispo_dict):
                file_data = message_part.get_payload(decode=True)
                if file_data is None:
                    file_data = ""
                attachment = StringIO(file_data)
                attachment.content_type = message_part.get_content_type()
                attachment.size = len(file_data)
                attachment.name = None
                attachment.create_date = None
                attachment.mod_date = None
                attachment.read_date = None

                if "filename" in dispo_dict:
                    attachment.name = self.parse_header_field(
                        dispo_dict['filename'])
                elif "create-date" in dispo_dict:
                    attachment.create_date = dispo_dict[
                        'create-date']  # TODO: datetime
                elif "modification-date" in dispo_dict:
                    attachment.mod_date = dispo_dict[
                        'modification-date']  # TODO: datetime
                elif "read-date" in dispo_dict:
                    attachment.read_date = dispo_dict[
                        'read-date']  # TODO: datetime
                return attachment
        return None
Example #7
0
	def parse_attachment(self, message_part):
		content_disposition = message_part.get("Content-Disposition", None)
		if content_disposition:
			dispositions = content_disposition.strip().split(";")
			if bool(content_disposition and dispositions[0].lower() == "attachment"):
 
				file_data = message_part.get_payload(decode=True)
				attachment = StringIO()
				attachment.write(file_data)
				attachment.content_type = message_part.get_content_type()
				attachment.size = len(file_data)
				attachment.name = None
				attachment.create_date = None
				attachment.mod_date = None
				attachment.read_date = None
				
				
				for param in dispositions[1:]:
					name,value = param.split("=")
					name = name.lower()
					
					attachment.name = value.replace('"', '')
 
				return attachment
 
		return None
Example #8
0
def parse_attachment(message_part):
    content_disposition = message_part.get("Content-Disposition", None)
    if content_disposition:
        #print content_disposition
        dispositions = content_disposition.strip().split(";")
        if bool(content_disposition
                and dispositions[0].lower() == "attachment") or bool(
                    content_disposition
                    and dispositions[0].lower() == "inline"):

            file_data = message_part.get_payload(decode=True)
            # Used a StringIO object since PIL didn't seem to recognize
            # images using a custom file-like object
            attachment = StringIO(file_data)
            attachment.content_type = message_part.get_content_type()
            attachment.size = len(file_data)
            attachment.name = None
            attachment.create_date = None
            attachment.mod_date = None
            attachment.read_date = None

            for param in dispositions[1:]:
                param = param.strip()
                name, value = param.split("=", 1)
                name = name.lower()

                if name == "filename":
                    h = email.Header.Header(value.lstrip('"').rstrip('"'))
                    dh = email.Header.decode_header(h)
                    fname = dh[0][0]
                    if dh[0][1] != None:
                        fname = fname.decode(dh[0][1])
                    attachment.name = fname
                    #print fname
                elif name == "create-date":
                    attachment.create_date = value  #TODO: datetime
                elif name == "modification-date":
                    attachment.mod_date = value  #TODO: datetime
                elif name == "read-date":
                    attachment.read_date = value  #TODO: datetime
            return attachment

    return None
def parse_attachment(message_part, attachments=None):
    content_disposition = message_part.get("Content-Disposition", None)
    if content_disposition:
        try:
            cd = parse_headers(content_disposition, relaxed=True)
            if cd.disposition.lower() == "attachment":
                if not cd.assocs.has_key("filename"):
                    #print error or warning?
                    return None
                else:
                    file_data = message_part.get_payload(decode=True)
                    if not file_data:
                        payload = message_part.get_payload()
                        if isinstance(payload, list):
                            for msgobj in payload:
                                parse2(msgobj, attachments)
                            return None
                        print >>sys.stderr, message_part.get_payload()
                        print >>sys.stderr, message_part.get_content_charset()
                    attachment = StringIO(file_data)
                    attachment.content_type = message_part.get_content_type()
                    attachment.size = len(file_data)
                    attachment.name = cd.assocs['filename']
                    attachment.create_date = None
                    attachment.mod_date = None
                    attachment.read_date = None 
                    
                    for name, value in cd.assocs.iteritems():
                        if name == "create-date":
                            attachment.create_date = value  #TODO: datetime
                        elif name == "modification-date":
                            attachment.mod_date = value #TODO: datetime
                        elif name == "read-date":
                            attachment.read_date = value #TODO: datetime
                    
                    return attachment
                            
        except:
            print >>sys.stderr, "content_disposition:", content_disposition
            raise
    return None
Example #10
0
def parse_attachment(message_part, attachments=None):
    content_disposition = message_part.get("Content-Disposition", None)
    if content_disposition:
        try:
            cd = parse_headers(content_disposition, relaxed=True)
            if cd.disposition.lower() == "attachment":
                if not "filename" in cd.assocs:
                    #print error or warning?
                    return None
                else:
                    file_data = message_part.get_payload(decode=True)
                    if not file_data:
                        payload = message_part.get_payload()
                        if isinstance(payload, list):
                            for msgobj in payload:
                                _parse2(msgobj, attachments)
                        return None  # PSIPHON: fixed conditional return
                    attachment = StringIO(file_data)
                    attachment.content_type = message_part.get_content_type()
                    attachment.size = len(file_data)
                    attachment.name = cd.assocs['filename']
                    attachment.create_date = None
                    attachment.mod_date = None
                    attachment.read_date = None

                    for name, value in cd.assocs.iteritems():
                        if name == "create-date":
                            attachment.create_date = value  # TODO: datetime
                        elif name == "modification-date":
                            attachment.mod_date = value  # TODO: datetime
                        elif name == "read-date":
                            attachment.read_date = value  # TODO: datetime

                    return attachment

        except:
            print >> sys.stderr, "content_disposition:", content_disposition
            raise
    return None
Example #11
0
def parse_attachment(message_part, attachments=None):
    """ Extract the attachment and metadata about it from the message.

    Returns the content, content type, size, and create/modification/read dates
    for the attachment.
    """
    params = message_part.get_params(None, 'Content-Disposition')
    if params:
        # If a 'part' has a Content-Disposition, we assume it is an attachment
        try:
            params = dict(params)
            print('\tContent-Disposition (for following email)', params)
            if 'attachment' in params:
                # Not sure what's going on here
                # Why get payload with decode, then try again and reparse?
                # See details at
                # http://docs.python.org/2/library/email.message.html#email.message.Message.get_payload
                file_data = message_part.get_payload(decode=True)
                if not file_data:
                    payload = message_part.get_payload()
                    if isinstance(payload, list):
                        for msgobj in payload:
                            # TODO not sure this actually does anything
                            parse2(msgobj, attachments)
                        return None
                    print(message_part.get_payload(), file=sys.stderr)
                    print(message_part.get_content_charset(), file=sys.stderr)

                attachment = StringIO(file_data)
                attachment.content_type = message_part.get_content_type()
                attachment.size = params.get('size', len(file_data))
                attachment.create_date = params.get('create-date')
                attachment.mod_date = params.get('modification-date')
                attachment.read_date = params.get('read-date')
                # TODO convert dates to datetime

                filename = message_part.get_filename(None)
                if filename:
                    # Filenames may be encoded with =?encoding?...
                    # If so, convert to unicode
                    name, encoding = email.header.decode_header(filename)[0]
                    if encoding:
                        print(
                            '\t{filename} encoded with {encoding}, converting to unicode'
                            .format(filename=filename, encoding=encoding))
                        filename = name.decode(encoding)
                else:  # filename not in Content-Disposition
                    print(
                        """Warning, no filename found in: [{%s}%s] Content-Disposition: %s or Content-Type"""
                        %
                        (sharedVariablesAcrossModules.sourceFileUUID,
                         sharedVariablesAcrossModules.sourceFilePath, params),
                        file=sys.stderr)
                    filename = six.text_type(uuid.uuid4())
                    print("Attempting extraction with random filename: %s" %
                          (filename),
                          file=sys.stderr)
                # Remove newlines from filename because that breaks everything
                filename = filename.replace("\r", "").replace("\n", "")

                attachment.name = filename
                return attachment

        except Exception as inst:
            print(type(inst), file=sys.stderr)
            print(inst.args, file=sys.stderr)
            print("Error parsing: file: {%s}%s" %
                  (sharedVariablesAcrossModules.sourceFileUUID,
                   sharedVariablesAcrossModules.sourceFilePath),
                  file=sys.stderr)
            print("Error parsing: Content-Disposition: ",
                  params,
                  file=sys.stderr)
            print(file=sys.stderr)
            sharedVariablesAcrossModules.errorCounter += 1
    return None
def parse_attachment(message_part, attachments=None):
    content_disposition = message_part.get("Content-Disposition", None)
    if content_disposition:
        try:
            try:
                content_disposition = tweakContentDisposition(content_disposition)
                dispositions = content_disposition.strip().split(";", 1)
            except Exception as inst:
                print type(inst)
                print inst.args
                print >>sys.stderr, "Error parsing file: {%s}%s" % (sharedVariablesAcrossModules.sourceFileUUID, sharedVariablesAcrossModules.sourceFilePath)
                print >>sys.stderr, "Error parsing the content_disposition:", content_disposition
                if "attachment" in content_disposition.lower() and "filename" in content_disposition.lower():  
                    try:
                        filename = uuid.uuid4().__str__()
                        print >>sys.stderr, "Attempting extraction with random filename: %s" % (filename)
                        print >>sys.stderr
                        content_disposition = "attachment; filename=%s;" % (filename)
                        dispositions = content_disposition.strip().split(";")
                    except Exception as inst:
                        print >>sys.stderr, type(inst)
                        print >>sys.stderr, inst.args
                        print >>sys.stderr, "Failed"
                        print >>sys.stderr
                        return None
                else:
                    print >>sys.stderr
                    return None
            if content_disposition and dispositions[0].lower() == "attachment":
                file_data = message_part.get_payload(decode=True)
                if not file_data:
                    payload = message_part.get_payload()
                    if isinstance(payload, list):
                        for msgobj in payload:
                            parse2(msgobj, attachments)
                        return None
                    print >>sys.stderr, message_part.get_payload()
                    print >>sys.stderr, message_part.get_content_charset()

                attachment = StringIO(file_data)
                attachment.content_type = message_part.get_content_type()
                attachment.size = len(file_data)
                attachment.create_date = None
                attachment.mod_date = None
                attachment.read_date = None
                attachment.name = ""
                
                for param in dispositions[1:]:
                    name,value = param.split("=", 1)
                    name = name.lower().strip()
                    
                    if name == "filename":
                        attachment.name = urllib2.unquote(value.strip()).strip('"')
                    if name == "filename*":
                        attachment.name = urllib2.unquote(value.strip())
                        try:
                            enc, name = attachment.name.split("''", 1)
                            attachment.name = name.decode(enc)
                        except Exception as inst:
                            print >>sys.stderr, type(inst)
                            print >>sys.stderr, inst.args
                            pass
                    elif name == "create-date":
                        attachment.create_date = value  #TODO: datetime
                    elif name == "modification-date":
                        attachment.mod_date = value #TODO: datetime
                    elif name == "read-date":
                        attachment.read_date = value #TODO: datetime
                
                if not attachment.name: 
                    print >>sys.stderr, """Warning, no filename found in: [{%s}%s]%s""" % (sharedVariablesAcrossModules.sourceFileUUID, sharedVariablesAcrossModules.sourceFilePath, content_disposition)
                    filename = uuid.uuid4().__str__()
                    print >>sys.stderr, "Attempting extraction with random filename: %s" % (filename)
                    print >>sys.stderr

                return attachment
                            
        except Exception as inst:
            print >>sys.stderr, type(inst)
            print >>sys.stderr, inst.args
            print >>sys.stderr, "Error parsing file: {%s}%s" % (sharedVariablesAcrossModules.sourceFileUUID, sharedVariablesAcrossModules.sourceFilePath)
            print >>sys.stderr, "Error parsing:", dispositions
            print >>sys.stderr
            sharedVariablesAcrossModules.errorCounter += 1
    return None