Example #1
0
 def message_process_msg(
         self, model, message, custom_values=None, save_original=False,
         strip_attachments=False, thread_id=None,
 ):
     """Convert message to RFC2822 and pass to message_process"""
     if not Message:
         raise exceptions.UserError(
             _('Install the msg-extractor library to handle .msg files')
         )
     message_msg = Message(b64decode(message))
     message_email = self.env['ir.mail_server'].build_email(
         message_msg.sender, message_msg.to.split(','), message_msg.subject,
         # prefer html bodies to text
         message_msg._getStream('__substg1.0_10130102') or message_msg.body,
         email_cc=message_msg.cc,
         headers={'date': message_msg.date},
         message_id=message_msg.message_id,
         attachments=[
             (attachment.longFilename, attachment.data)
             for attachment in message_msg.attachments
         ],
     )
     return self.message_drop(
         model, message_email.as_string(), custom_values=custom_values,
         save_original=save_original, strip_attachments=strip_attachments,
         thread_id=thread_id,
     )
    def analyze_file(self, path):

        m = Message(path)

        def xstr(s):
            return '' if s is None else str(s)

        attachments = m.attachments
        a = []
        for attachment in attachments:
            sha256 = hashlib.sha256()
            if type(attachment.data) is not Message:
                sha256.update(attachment.data)
                minfo = magic.Magic(uncompress=True).from_buffer(
                    attachment.data)
                a.append({
                    'name': attachment.longFilename,
                    'sha256': sha256.hexdigest(),
                    'mimeinfo': minfo
                })

        email = {
            'header': xstr(m.header),
            'from': xstr(m.sender),
            'to': xstr(m.to),
            'cc': xstr(m.cc),
            'subject': xstr(m.subject),
            'date': xstr(m.date),
            'body': decode_utf7(m.body),
            'attachments': a
        }
        self.add_result_subsection('Email details', email)
        return self.results
Example #3
0
    def msgtext(self):
        """
        Extract texts and some information from .msg format email files.
        """

        try:
            mail = Message(self.path)
        except (AttributeError, Exception):  # wrong decode or cannot parse...
            text = ''
        else:
            if mail.sender is None:
                send = 'From: '
            else:
                send = 'From: ' + mail.sender
            if mail.to is None:
                to = 'To: '
            else:
                to = 'To: ' + mail.to
            if mail.date is None:
                dt = 'DataTime: '
            else:
                dt = 'DateTime: ' + mail.date
            if mail.subject is None:
                sub = 'Subject: '
            else:
                sub = 'Subject: ' + mail.subject
            if mail.body is None:
                msgs = 'Message: '
            else:
                msgs = 'Message: ' + mail.body
            text = send + ', ' + to + ', ' + dt + ', ' + sub + ', ' + msgs
            text = text.replace("'", "‘")
        return text
Example #4
0
 def test_message_import(self):
     """Test Outlook MSG file import & date parsing
     """
     path = Path(settings.BASE_DIR, 'prs2', 'referral', 'fixtures',
                 'test_email.msg')
     msg = Message(path)
     self.assertTrue(msg.date)
     record = Record.objects.all()[0]
     # Record order_date is empty.
     self.assertFalse(record.order_date)
     tmp_f = open(settings.MEDIA_ROOT + '/test.msg', 'wb')
     tmp_f.write(open(path, 'rb').read())
     tmp_f.close()
     record.uploaded_file = tmp_f.name
     record.save()
     # Record order_date is no longer empty.
     self.assertTrue(record.order_date)
Example #5
0
    def analyze_file(self, path):

        m = Message(path)

        def xstr(s):
            return "" if s is None else str(s)

        attachments = m.attachments
        a = []
        observables = []
        outdir = tempfile.mkdtemp()
        for attachment in attachments:
            sha256 = hashlib.sha256()
            if type(attachment.data) is not Message:
                sha256.update(attachment.data)
                minfo = magic.Magic(uncompress=True).from_buffer(
                    attachment.data)
                a.append({
                    "name": attachment.longFilename,
                    "sha256": sha256.hexdigest(),
                    "mimeinfo": minfo,
                })
                with open(os.path.join(outdir, attachment.longFilename),
                          "wb") as f:
                    f.write(attachment.data)
                    observables.append(
                        os.path.join(outdir, attachment.longFilename))

        email = {
            "header": xstr(m.header),
            "from": xstr(m.sender),
            "to": xstr(m.to),
            "cc": xstr(m.cc),
            "subject": xstr(m.subject),
            "date": xstr(m.date),
            "body": decode_utf7(m.body),
            "attachments": a,
        }
        self.add_result_subsection("Email details", email)
        return self.results, observables
Example #6
0
def typesense_index_record(rec, client=None):
    """Index a single record in Typesense.
    """
    if not client:
        client = typesense_client()

    rec_document = {
        'id': str(rec.pk),
        'created': rec.created.timestamp(),
        'referral_id': rec.referral.pk,
        'name': rec.name,
        'description': rec.description if rec.description else '',
        'file_name': rec.filename,
        'file_type': rec.extension,
    }
    # PDF document content.
    if rec.extension == 'PDF':
        try:
            # PDF text extraction can be a little error-prone.
            # In the event of an exception here, we'll just accept it and pass.
            content = high_level.extract_text(
                open(rec.uploaded_file.path, 'rb'))
            rec_document['file_content'] = content.replace('\n', ' ').strip()
        except:
            pass

    # MSG document content.
    if rec.extension == 'MSG':
        message = Message(rec.uploaded_file.path)
        content = '{} {}'.format(message.subject,
                                 message.body.replace('\r\n', ' '))
        rec_document['file_content'] = content.strip()

    # DOCX document content.
    if rec.extension == 'DOCX':
        content = docx2txt.process(rec.uploaded_file.path)
        rec_document['file_content'] = content.replace('\n', ' ').strip()

    client.collections['records'].documents.upsert(rec_document)
Example #7
0
 def analyze(self, data, parsed):
     '''
     start analyzing exe logic, add descriptions and get words and wordsstripped from array (need to implement from extract_msg.dev_classes import Message)
     '''
     streams = []
     parts = []
     mixed = []
     headers = []
     data["MSG"] = deepcopy(self.datastruct)
     message = Message(data["Location"]["File"])
     headers = self.get_headers(data["MSG"]["General"], message)
     self.get_content(data["MSG"], message)
     if self.check_attachment_and_make_dir(data, message):
         streams = self.get_attachment(data, message)
     else:
         pass
     mixed = streams + parts + headers
     if len(mixed) > 0:
         get_words_multi_filesarray(
             data, mixed)  # have to be bytes < will check this later on
     else:
         get_words(data, data["Location"]["File"])
     parsed.type = "msg"
Example #8
0
 def analyze(self, data, parsed):
     '''
     start analyzing exe logic, add descriptions and get words and wordsstripped from array 
     '''
     Streams = []
     Parts = []
     Mixed = []
     Headers = []
     data["MSG"] = deepcopy(self.datastruct)
     message = Message(data["Location"]["File"])
     Headers = self.get_headers(data["MSG"]["General"], message)
     self.get_content(data["MSG"], message)
     if self.check_attachment_and_make_dir(data, message):
         Streams = self.get_attachment(data, message)
     else:
         pass
     Mixed = Streams + Parts + Headers
     if len(Mixed) > 0:
         get_words_multi_filesarray(
             data, Mixed)  #have to be bytes < will check this later on
     else:
         get_words(data, data["Location"]["File"])
     parsed.type = "msg"
Example #9
0
    useFileName = False
    useContentId = False

    for rawFilename in sys.argv[1:]:
        if rawFilename == '--raw':
            writeRaw = True

        if rawFilename == '--json':
            toJson = True

        if rawFilename == '--use-file-name':
            useFileName = True

        if rawFilename == '--use-content-id':
            useContentId = True

        if rawFilename == '--debug':
            debug = True

        for filename in glob.glob(rawFilename):
            msg = Message(filename)
            try:
                if writeRaw:
                    msg.saveRaw()
                else:
                    msg.save(toJson, useFileName, False, useContentId)
            except Exception as e:
                # msg.debug()
                print("Error with file '" + filename + "': " +
                      traceback.format_exc())