def test_add_file_to_group(user, group): Tag.objects.create(name="tag one") tags = ["tag one", "tag two", Tag.objects.create(name="tag three")] file = StringIO("mybinarydocumentcontent") file.size = len("mybinarydocumentcontent") doc = logic.add_file_to_group(file, "My document", ".dll", group, tags, user) assert doc assert doc in group.document_set.all() assert doc.name == "My document" assert doc.state == 'READY_TO_QUEUE' assert Tag.objects.count() == 3 assert doc.tags.count() == 3
def parse_attachment(message_part, state, attachments=None): """ Extract the attachment and metadata about it from the message. Returns the content, content type, size, and create/modification/read dates for the attachment. """ params = message_part.get_params(None, "Content-Disposition") if params: # If a 'part' has a Content-Disposition, we assume it is an attachment try: params = dict(params) print("\tContent-Disposition (for following email)", params) if "attachment" in params: # Not sure what's going on here # Why get payload with decode, then try again and reparse? # See details at # http://docs.python.org/2/library/email.message.html#email.message.Message.get_payload file_data = message_part.get_payload(decode=True) if not file_data: payload = message_part.get_payload() if isinstance(payload, list): for msgobj in payload: # TODO not sure this actually does anything parse2(msgobj, state, attachments) return None print(message_part.get_payload(), file=sys.stderr) print(message_part.get_content_charset(), file=sys.stderr) attachment = StringIO(file_data) attachment.content_type = message_part.get_content_type() attachment.size = params.get("size", len(file_data)) attachment.create_date = params.get("create-date") attachment.mod_date = params.get("modification-date") attachment.read_date = params.get("read-date") # TODO convert dates to datetime filename = message_part.get_filename(None) if filename: # Filenames may be encoded with =?encoding?... # If so, convert to unicode name, encoding = email.header.decode_header(filename)[0] if encoding: print( "\t{filename} encoded with {encoding}, converting to unicode" .format(filename=filename, encoding=encoding)) filename = name.decode(encoding) else: # filename not in Content-Disposition print( """Warning, no filename found in: [{%s}%s] Content-Disposition: %s or Content-Type""" % (state.sourceFileUUID, state.sourceFilePath, params), file=sys.stderr, ) filename = six.text_type(uuid.uuid4()) print( "Attempting extraction with random filename: %s" % (filename), file=sys.stderr, ) # Remove newlines from filename because that breaks everything filename = filename.replace("\r", "").replace("\n", "") attachment.name = filename return attachment except Exception as inst: print(type(inst), file=sys.stderr) print(inst.args, file=sys.stderr) print( "Error parsing: file: {%s}%s" % (state.sourceFileUUID, state.sourceFilePath), file=sys.stderr, ) print("Error parsing: Content-Disposition: ", params, file=sys.stderr) print(file=sys.stderr) state.error_count += 1 return None