예제 #1
0
    def __init__(self, mypath,nfile):
        self.mypath=mypath
        self.nfile=nfile
        self.onemail=None
        self.retval=None

        # filepath=os.path.join(mypath,i)
        filepath = os.path.join(mypath, nfile)
        #	print(filepath)
        if not os.path.isfile(filepath):
            print("File not found!")
            exit(1)

        matchObj = re.match(r'.*.(msg|eml)$', nfile, re.M | re.I)
        if matchObj:
            if matchObj.group(1) == "msg":
                self.onemail = mailparser.parse_from_file_msg(filepath)
            elif matchObj.group(1) == "eml":
                self.onemail = mailparser.parse_from_file(filepath)
            else:
                print("Unsupported file type!")
                exit(1)
        else:
            print("Unsupported file type!")
            exit(1)

        if not self.onemail.headers:
            print("Broken file!")
            exit(1)
        self.retval=self.analyse_email()
예제 #2
0
def main():
    args = get_args().parse_args()

    if args.file:
        if args.outlook:
            parser = mailparser.parse_from_file_msg(args.file)
        else:
            parser = mailparser.parse_from_file(args.file)
    elif args.string:
        parser = mailparser.parse_from_string(args.string)
    elif args.stdin:
        if args.outlook:
            raise MailParserOutlookError(
                "You can't use stdin with msg Outlook")
        parser = mailparser.parse_from_file_obj(sys.stdin)

    if args.json:
        safe_print(parser.mail_json)

    if args.body:
        safe_print(parser.body)

    if args.headers:
        safe_print(parser.headers_json)

    if args.to:
        safe_print(parser.to_json)

    if args.delivered_to:
        safe_print(parser.delivered_to_json)

    if args.from_:
        safe_print(parser.from_json)

    if args.subject:
        safe_print(parser.subject)

    if args.receiveds:
        safe_print(parser.received_json)

    if args.defects:
        for i in parser.defects_categories:
            safe_print(i)

    if args.senderip:
        r = parser.get_server_ipaddress(args.senderip)
        if r:
            safe_print(r)
        else:
            safe_print("Not Found")

    if args.attachments or args.attachments_hash:
        print_attachments(parser.attachments, args.attachments_hash)

    if args.mail_hash:
        print_mail_fingerprints(parser.body.encode("utf-8"))
예제 #3
0
def main():
    args = get_args().parse_args()

    if args.file:
        if args.outlook:
            parser = mailparser.parse_from_file_msg(args.file)
        else:
            parser = mailparser.parse_from_file(args.file)
    elif args.string:
        parser = mailparser.parse_from_string(args.string)

    if args.json:
        j = json.loads(parser.parsed_mail_json)
        safe_print(json.dumps(j, ensure_ascii=False, indent=4))

    if args.body:
        # safe_print(parser.body)
        safe_print(parser.body)

    if args.headers:
        safe_print(parser.headers)

    if args.to:
        safe_print(parser.to_)

    if args.from_:
        safe_print(parser.from_)

    if args.subject:
        safe_print(parser.subject)

    if args.receiveds:
        safe_print(parser.receiveds)

    if args.defects:
        for i in parser.defects_category:
            safe_print(i)

    if args.anomalies:
        for i in parser.anomalies:
            safe_print(i)

    if args.senderip:
        r = parser.get_server_ipaddress(args.senderip)
        if r:
            safe_print(r)
        else:
            safe_print("Not Found")

    if args.attachments or args.attachments_hash:
        print_attachments(parser.attachments_list, args.attachments_hash)

    if args.mail_hash:
        print_mail_fingerprints(parser.body.encode("utf-8"))
예제 #4
0
    def test_parse_from_file_msg(self):
        """
        Tested mail from VirusTotal: md5 b89bf096c9e3717f2d218b3307c69bd0

        The email used for unittest were found randomly on VirusTotal and
        then already publicly available so can not be considered
        as privacy violation
        """

        m = mailparser.parse_from_file_msg(mail_outlook_1)
        email = m.mail
        self.assertIn("attachments", email)
        self.assertEqual(len(email["attachments"]), 5)
        self.assertIn("from", email)
        self.assertEqual(email["from"][0][1], "*****@*****.**")
        self.assertIn("subject", email)
예제 #5
0
def main():
    args = get_args().parse_args()
    log = custom_log(level=args.log_level)

    if args.file:
        if args.outlook:
            log.debug("Analysis Outlook mail")
            parser = mailparser.parse_from_file_msg(args.file)
        else:
            parser = mailparser.parse_from_file(args.file)
    elif args.string:
        parser = mailparser.parse_from_string(args.string)
    elif args.stdin:
        if args.outlook:
            raise MailParserOutlookError(
                "You can't use stdin with msg Outlook")
        parser = mailparser.parse_from_file_obj(sys.stdin)

    if args.json:
        safe_print(parser.mail_json)

    if args.body:
        safe_print(parser.body)

    if args.headers:
        safe_print(parser.headers_json)

    if args.to:
        safe_print(parser.to_json)

    if args.delivered_to:
        safe_print(parser.delivered_to_json)

    if args.from_:
        safe_print(parser.from_json)

    if args.subject:
        safe_print(parser.subject)

    if args.receiveds:
        safe_print(parser.received_json)

    if args.defects:
        log.debug("Printing defects")
        for i in parser.defects_categories:
            safe_print(i)

    if args.senderip:
        log.debug("Printing sender IP")
        r = parser.get_server_ipaddress(args.senderip)
        if r:
            safe_print(r)
        else:
            safe_print("Not Found")

    if args.attachments or args.attachments_hash:
        log.debug("Printing attachments details")
        print_attachments(parser.attachments, args.attachments_hash)

    if args.mail_hash:
        log.debug("Printing also mail fingerprints")
        print_mail_fingerprints(parser.body.encode("utf-8"))
예제 #6
0
 def __parse(self, filepath):
     return parse_from_file_msg(filepath)
예제 #7
0
    def _email_parse_function(self, event, *args, **kwargs):
        """Function: Extract message headers and body parts from an email message (.eml or .msg).
        Any attachments found are added to the Incident as Artifacts if 'utilities_parse_email_attachments' is set to True"""

        try:
            log = logging.getLogger(__name__)

            # Set variables
            parsed_email = path_tmp_file = path_tmp_dir = reason = results = None

            # Get the function inputs:
            fn_inputs = validate_fields(["incident_id"], kwargs)

            # Instansiate ResultPayload
            rp = ResultPayload(CONFIG_DATA_SECTION, **kwargs)

            # If its just base64content as input, use parse_from_string
            if fn_inputs.get("base64content"):
                yield StatusMessage("Processing provided base64content")
                parsed_email = mailparser.parse_from_string(
                    b_to_s(base64.b64decode(fn_inputs.get("base64content"))))
                yield StatusMessage("Provided base64content processed")

            else:

                # Validate that either: (incident_id AND attachment_id OR artifact_id) OR (task_id AND attachment_id) is defined
                if not (fn_inputs.get("incident_id") and (fn_inputs.get("attachment_id") or fn_inputs.get("artifact_id"))) and \
                   not (fn_inputs.get("task_id") and fn_inputs.get("attachment_id")):
                    raise FunctionError(
                        "You must define either: (incident_id AND attachment_id OR artifact_id) OR (task_id AND attachment_id)"
                    )

                # Instansiate new Resilient API object
                res_client = self.rest_client()

                # Get attachment metadata
                attachment_metadata = get_file_attachment_metadata(
                    res_client=res_client,
                    incident_id=fn_inputs.get("incident_id"),
                    artifact_id=fn_inputs.get("artifact_id"),
                    task_id=fn_inputs.get("task_id"),
                    attachment_id=fn_inputs.get("attachment_id"))

                # Get attachment content
                attachment_contents = get_file_attachment(
                    res_client=res_client,
                    incident_id=fn_inputs.get("incident_id"),
                    artifact_id=fn_inputs.get("artifact_id"),
                    task_id=fn_inputs.get("task_id"),
                    attachment_id=fn_inputs.get("attachment_id"))

                # Write the attachment_contents to a temp file
                path_tmp_file, path_tmp_dir = write_to_tmp_file(
                    attachment_contents,
                    tmp_file_name=attachment_metadata.get("name"))

                # Get the file_extension
                file_extension = os.path.splitext(path_tmp_file)[1]

                if file_extension == ".msg":
                    yield StatusMessage("Processing MSG File")
                    try:
                        parsed_email = mailparser.parse_from_file_msg(
                            path_tmp_file)
                        yield StatusMessage("MSG File processed")
                    except Exception as err:
                        reason = u"Could not parse {0} MSG File".format(
                            attachment_metadata.get("name"))
                        yield StatusMessage(reason)
                        results = rp.done(success=False,
                                          content=None,
                                          reason=reason)
                        log.error(err)

                else:
                    yield StatusMessage("Processing Raw Email File")
                    try:
                        parsed_email = mailparser.parse_from_file(
                            path_tmp_file)
                        yield StatusMessage("Raw Email File processed")
                    except Exception as err:
                        reason = u"Could not parse {0} Email File".format(
                            attachment_metadata.get("name"))
                        yield StatusMessage(reason)
                        results = rp.done(success=False,
                                          content=None,
                                          reason=reason)
                        log.error(err)

            if parsed_email is not None:
                if not parsed_email.mail:
                    reason = u"Raw email in unsupported format. Failed to parse {0}".format(
                        u"provided base64content" if fn_inputs.
                        get("base64content"
                            ) else attachment_metadata.get("name"))
                    yield StatusMessage(reason)
                    results = rp.done(success=False,
                                      content=None,
                                      reason=reason)

                else:
                    # Load all parsed email attributes into a Python Dict
                    parsed_email_dict = json.loads(parsed_email.mail_json,
                                                   encoding="utf-8")
                    parsed_email_dict[
                        "plain_body"] = parsed_email.text_plain_json
                    parsed_email_dict[
                        "html_body"] = parsed_email.text_html_json
                    yield StatusMessage("Email parsed")

                    # If the input 'utilities_parse_email_attachments' is true and some attachments were found
                    if fn_inputs.get("utilities_parse_email_attachments"
                                     ) and parsed_email_dict.get(
                                         "attachments"):

                        yield StatusMessage(
                            "Attachments found in email message")
                        attachments_found = parsed_email_dict.get(
                            "attachments")

                        # Loop attachments found
                        for attachment in attachments_found:

                            yield StatusMessage(
                                u"Attempting to add {0} to Incident: {1}".
                                format(attachment.get("filename"),
                                       fn_inputs.get("incident_id")))

                            # Write the attachment.payload to a temp file
                            path_tmp_file, path_tmp_dir = write_to_tmp_file(
                                data=s_to_b(attachment.get("payload")),
                                tmp_file_name=attachment.get("filename"),
                                path_tmp_dir=path_tmp_dir)

                            artifact_description = u"This email attachment was found in the parsed email message from: '{0}'".format(
                                u"provided base64content" if fn_inputs.
                                get("base64content"
                                    ) else attachment_metadata.get("name"))

                            # POST the artifact to Resilient as an 'Email Attachment' Artifact
                            res_client.post_artifact_file(
                                uri=ARTIFACT_URI.format(
                                    fn_inputs.get("incident_id")),
                                artifact_type=EMAIL_ATTACHMENT_ARTIFACT_ID,
                                artifact_filepath=path_tmp_file,
                                description=artifact_description,
                                value=attachment.get("filename"),
                                mimetype=attachment.get("mail_content_type"))

                    results = rp.done(True, parsed_email_dict)

            else:
                reason = u"Raw email in unsupported format. Failed to parse {0}".format(
                    u"provided base64content" if fn_inputs.
                    get("base64content") else attachment_metadata.get("name"))
                yield StatusMessage(reason)
                results = rp.done(success=False, content=None, reason=reason)

            log.info("Done")

            yield FunctionResult(results)
        except Exception:
            yield FunctionError()

        finally:
            # Remove the tmp directory
            if path_tmp_dir and os.path.isdir(path_tmp_dir):
                shutil.rmtree(path_tmp_dir)
예제 #8
0
파일: utils.py 프로젝트: Maddonix/ukw_fax
def export_messages(
    path_attachments: pathlib.Path,
    path_export: pathlib.Path,
    image_as_jpg: bool = False,
):
    """
    Method to read all .msg files in the given folder.
    Returns list of msg objects.

    Parameters:
    path_attachments - pathlib.Path
        Path to folder containing all messages to be read.
    path_export - pathlib.Path
        Path to folder to save exports in
    path_tmp - pathlib.Path
        Path to folder used to temporarily store objects.
    path_tesseract - pathlib.path
        Path to local tesseract.exe
    image_as_jpg - bool
        If true, image is additionally exported as .jpg file, otherwise it is just exported as .tif file
    """
    assert path_attachments.exists()
    assert path_export.exists()

    ocr_result_name = "ocr_result.txt"

    filenames = [fn for fn in path_attachments.iterdir()]
    assert len(filenames) > 0

    for filename in tqdm(filenames):
        msg = mailparser.parse_from_file_msg(filename)
        msg_dict = msg.mail_partial
        assert len(msg_dict["from"][0]) == 2

        # get fax number of sender
        number = msg_dict["from"][0][1].split("/")[1].split('"')[0]
        # set path
        path_praxis = path_export.joinpath(number)
        # create folder if it doesnt exist
        if not path_praxis.exists():
            os.mkdir(path_praxis)
        name_tif = msg_dict["attachments"][0]["filename"]
        timestamp = name_tif.split("FAX_")[1].split(".")[0]
        # save final msg
        msg_export_path = path_praxis.joinpath(timestamp)
        i = 0
        _name = msg_export_path.name
        while msg_export_path.exists() is True:
            msg_export_path = msg_export_path.with_name(f"{_name}_({i})")
            i = +1

        os.mkdir(msg_export_path)
        # save json
        del msg_dict["attachments"]
        del msg_dict["date"]

        with open(msg_export_path.joinpath("mail.json"), "w") as f:
            json.dump(msg_dict, f)
        # write_msg
        msg.write_attachments(msg_export_path)

        # read image and run tesseract
        path_to_current_tif = msg_export_path.joinpath(name_tif)
        img = Image.open(path_to_current_tif)
        if image_as_jpg:
            img.save(path_to_current_tif.with_suffix(".jpg"))
        msg_txt = pytesseract.image_to_string(img, lang="deu")
        # save text
        with open(msg_export_path.joinpath(ocr_result_name),
                  "w",
                  encoding="utf8") as f:
            f.write(msg_txt)
예제 #9
0
def process_mail(content, filetype, parent_id):
    """
    Single mail task
    """
    # IF PARSE FAILS IGNORE
    if filetype == None:
        try:
            content = content.encode("utf_8")
            filepath = store_mail(content)
            msg = mailparser.parse_from_bytes(content)
        except Exception as e:
            logging.error(e)
            return "Error parsing mail from mail server: {}".format(e)

    elif filetype == ".msg":
        try:
            msg = mailparser.parse_from_file_msg(content)
            filepath = content
        except Exception as e:
            logging.error(e)
            return "Error parsing mail from msg attachment: {}".format(e)

    else:
        try:
            msg = mailparser.parse_from_file(content)
            filepath = content
        except Exception as e:
            logging.error(e)
            return "Error parsing mail from eml attachment: {}".format(e)

    info, _, cortex_api = get_info(mail=False)

    methmail = MethMail(
        msg,
        info=info,
        cortex_api=cortex_api,
        mail_filepath=filepath,
        parent_id=parent_id,
    )
    subtasks = methmail.process_mail()

    # Errors must be raise
    if not subtasks["ignore"] and subtasks["error"]:
        logging.error(subtasks["error"])
        raise Exception(subtasks["error"])

    # ignored are ok
    elif subtasks["ignore"]:
        return subtasks["error"]

    if subtasks["tasks"]:
        for (ioc, ioc_type, object_id, is_mail) in subtasks["tasks"]:
            check_cortex.apply_async(args=[
                ioc, ioc_type, object_id, is_mail, info.cortex_expiration_days
            ])

    if subtasks["childs"] and subtasks["id"]:
        for filepath, fileext in subtasks["childs"]:
            process_mail.apply_async(args=[filepath, fileext, subtasks["id"]])

    return "{} query run on cortex".format(len(subtasks))