Beispiel #1
0
def test_decode(tnefspec):
   fn, key, attchs, objs = tnefspec
   with open(datadir + os.sep + fn, "rb") as tfile:
      t = TNEF(tfile.read())
      assert t.key == key, "wrong key: 0x%2.2x" % t.key
      assert objcodes(t) == objs, "wrong objs: %s" % ["0x%2.2x" % o.name for o in t.objects]
      assert [a.name for a in t.attachments] == attchs
Beispiel #2
0
def parseFile():
    data = request.data
    with open("data.dat", "wb") as f:
        f.write(base64.urlsafe_b64decode(data))
    with open("data.dat", "rb") as tneffile:
        tnefParse = TNEF(tneffile.read())

    response = jsonify({"html": tnefParse.htmlbody})
    response.headers.add('Access-Control-Allow-Origin', '*')
    return response
Beispiel #3
0
def test_decode(tnefspec):
    fn, key, attchs, objs = tnefspec
    with open(datadir + os.sep + fn, "rb") as tfile:
        t = TNEF(tfile.read())
        assert t.key == key, "wrong key: 0x%2.2x" % t.key
        assert objcodes(t) == objs, "wrong objs: %s" % [
            "0x%2.2x" % o.name for o in t.objects
        ]
        # TODO: which encoding should we decode from below?
        assert [a.name.decode() for a in t.attachments] == attchs

        assert [a.long_filename() for a in t.attachments] == attchs
Beispiel #4
0
def test_decode(tnefspec):
    fn, key, attchs, body, objs = tnefspec
    with open(datadir + os.sep + fn, "rb") as tfile:
        t = TNEF(tfile.read())
        assert t.key == key, "wrong key: 0x%2.2x" % t.key

        for m in t.mapiprops:
            assert m.__str__()
            assert m.data is not None

        for i, a in enumerate(t.attachments):
            assert a.long_filename() == attchs[i]
            for m in a.mapi_attrs:
                assert m.__str__()
                assert m.data is not None

        for m in t.msgprops:
            assert m.__str__()
            assert m.data is not None
            if m.name == TNEF.ATTRECIPTABLE:
                for n_m in m.data[0]:
                    assert isinstance(n_m, TNEFMAPI_Attribute)

        if t.htmlbody:
            assert 'html' in t.htmlbody

        if body:
            assert getattr(t, body)
            assert t.has_body()
        else:
            assert not t.has_body()

        if t.rtfbody:
            assert t.rtfbody[0:5] == b'{\\rtf'

        if objs:
            assert objcodes(t) == objs, "wrong objs: %s" % ["0x%2.2x" % o.name for o in t.objects]

        assert t.dump(True)
        assert t.dump(False)
Beispiel #5
0
    def _read_tnef(self):
        # TODO: does this work in non-multipart?
        payloads = self.message.get_payload()
        if not isinstance(payloads, list):
            return

        for payload in payloads:
            if payload.get_content_type() == "application/ms-tnef":
                # TODO: skip renamed winmail.dat
                data = base64.b64decode(payload.get_payload())
                self.tnef_payload = payload
                self.tnef_message = TNEF(data)
                return
Beispiel #6
0
 async def scan(self, payload: Payload, request: Request) -> WorkerResponse:
     extracted: List[ExtractedPayload] = []
     tnef_results = TNEF(payload.content)
     if tnef_results.attachments:
         for tnef_attachment in tnef_results.attachments:
             try:
                 filename = UnicodeDammit(
                     tnef_attachment.name).unicode_markup
             except:
                 filename = "None"
             tnef_meta = PayloadMeta(extra_data={'filename': filename})
             extracted.append(
                 ExtractedPayload(tnef_attachment.data, tnef_meta))
     return WorkerResponse(extracted=extracted)
Beispiel #7
0
def parseFile(request):
    data = request.data
    with open("/tmp/data.dat", "wb") as f:
        f.write(base64.urlsafe_b64decode(data))
    with open("/tmp/data.dat", "rb") as tneffile:
        tnefParse = TNEF(tneffile.read())

    if (tnefParse.htmlbody == None):
        if (tnefParse.rtfbody != None):
            #doc = pandoc.Document("/tmp/temp")
            #doc.rtf = tnefParse.rtfbody;
            #response = jsonify({"html":doc.html5})
            temp = tnefParse.rtfbody.decode()
            sendbod2 = ''.join(str(s) for s in temp)
            sendbod = sendbod2.rstrip('\r').rstrip('\n')
            #sendbod = temp
            #temp = decompress(tnefParse.rtfbody)
            #print(temp)
            #for x in temp:
            #	sendbod += str(x)
            print(sendbod)

            #sendData = {"headers": {'Accept': '*/*', 'Content-Type': 'text/plain'},"body":sendbod}

            url1 = "https://us-central1-igneous-sweep-257100.cloudfunctions.net/rtfConvert2"
            r = requests.post(url=url1, json={"body": sendbod})
            #publisher = pubsub_v1.PublisherClient()
            #topic_path = publisher.topic_path("igneous-sweep-257100", "rtf-HTML")
            #future = publisher.publish(topic_path, data=tnefParse.rtfbody)
            #print(future.result())
            trunc = r.text.split("\n", 1)[1]
            response = jsonify({"rtf": trunc})
            response.headers.add('Access-Control-Allow-Origin', '*')
            return response
        else:
            response = jsonify({"html": tnefParse.body})
            response.headers.add('Access-Control-Allow-Origin', '*')
            return response

    #strData = tnefParse.htmlbody#.decode('windows-1252')
    #strData = tnefParse.htmlbody.decode('latin-1', 'replace')
    #strData = tnefParse.htmlbody.decode('raw_unicode_escape')
    # strData = base64.b64encode(tnefParse.htmlbody)

    #strData = html.unescape(tnefParse.htmlbody)
    response = jsonify({"html": tnefParse.htmlbody})
    # response = jsonify(html=tnefParse.htmlbody)
    response.headers.add('Access-Control-Allow-Origin', '*')
    return response
Beispiel #8
0
def demux_tnef(filename, options):
    retlist = []

    if not HAS_TNEFFILE:
        return retlist

    try:

        ext = os.path.splitext(filename)[1]
        if ext != ".dat" and ext != "" and ext != ".bin":
            return retlist

        extracted = []

        options = Config()
        tmp_path = options.cuckoo.get("tmppath", "/tmp")
        target_path = os.path.join(tmp_path, "cuckoo-tnef-tmp")
        if not os.path.exists(target_path):
            os.mkdir(target_path)

        with open(filename, "rb") as tfile:
            t = TNEF(tfile.read())
            for a in t.attachments:
                base, ext = os.path.splitext(a.name)
                basename = os.path.basename(a.name)
                ext = ext.lower()
                if ext == "" and len(basename) and basename[0] == ".":
                    continue
                for theext in demux_extensions_list:
                    if ext == theext:
                        tmp_dir = tempfile.mkdtemp(prefix='cuckootnef_',
                                                   dir=target_path)
                        fullpath = os.path.join(tmp_dir, a.name)
                        with open(fullpath, 'wb') as fp:
                            fp.write(a.data)
                        retlist.append(
                            os.path.join(tmp_dir, a.name.replace("\\", "/")))

    except:
        pass

    return retlist
Beispiel #9
0
def test_decode(tnefspec):
    fn, key, attchs, body, objs = tnefspec
    with open(datadir + os.sep + fn, "rb") as tfile:
        t = TNEF(tfile.read())
        assert t.key == key, "wrong key: 0x%2.2x" % t.key

        for m in t.mapiprops:
            assert m.__str__()
            assert m.data is not None

        for i, a in enumerate(t.attachments):
            assert a.long_filename() == attchs[i]
            for m in a.mapi_attrs:
                assert m.__str__()
                assert m.data is not None

        for m in t.msgprops:
            assert m.__str__()
            assert m.data is not None
            if m.name == TNEF.ATTRECIPTABLE:
                for n_m in m.data[0]:
                    assert isinstance(n_m, TNEFMAPI_Attribute)

        if t.htmlbody:
            assert 'html' in t.htmlbody

        if body:
            assert getattr(t, body)
            assert t.has_body()
        else:
            assert not t.has_body()

        if t.rtfbody:
            assert t.rtfbody[0:5] == b'{\\rtf'

        if objs:
            assert objcodes(t) == objs, "wrong objs: %s" % [
                "0x%2.2x" % o.name for o in t.objects
            ]

        assert t.dump(True)
        assert t.dump(False)
Beispiel #10
0
def test_decode(tnefspec):
    fn, key, attchs, body, objs = tnefspec
    t = TNEF((DATADIR / fn).read_bytes())
    assert t.key == key, f"wrong key: 0x{t.key:2.2x}"

    for m in t.mapiprops:
        assert m.__str__()
        assert m.data is not None

    for i, a in enumerate(t.attachments):
        assert a.long_filename() == attchs[i]
        assert type(a.data) is bytes
        for m in a.mapi_attrs:
            assert m.__str__()
            assert m.data is not None

    for m in t.msgprops:
        assert m.__str__()
        assert m.data is not None
        if m.name == TNEF.ATTRECIPTABLE:
            for n_m in m.data[0]:
                assert isinstance(n_m, TNEFMAPI_Attribute)

    if t.htmlbody:
        assert 'html' in t.htmlbody

    if body:
        assert getattr(t, body)
        assert t.has_body()
    else:
        assert not t.has_body()

    if t.rtfbody:
        assert t.rtfbody[0:5] == b'{\\rtf'

    if objs:
        assert objcodes(t) == objs, "wrong objs: " + str([f"0x{o.name:2.2x}" for o in t.objects])

    assert t.dump(True)
    assert t.dump(False)
Beispiel #11
0
def printkek():
    with open("winmail.dat", "rb") as tneffile:
        tnefobj = TNEF(tneffile.read())
        return tnefobj.htmlbody
Beispiel #12
0
def eml2str(msg):
    if type(msg) == bytes:
        msg = email.message_from_bytes(msg)
    elif type(msg) == str:
        msg = email.message_from_string(msg)
    elif type(msg) != email.message.Message:
        eprint(type(msg))

    text = []
    textlen = 0
    #pp = msg.get_payload()
    for p in msg.walk():
        #    print p.get_content_type()
        charset = p.get_content_charset("utf-8")
        #    print("charset='%s'"%charset)
        if not charset:
            charset = "iso8859-2"
        elif charset == "cp-850":
            charset = "cp850"
        elif charset == "_iso-2022-jp$esc":
            charset = "iso-2022-jp"
        elif charset == "iso-8859-8-i":
            charset = "iso-8859-8"
        elif charset == "windows-874":
            charset = "cp874"
        elif charset == "x-mac-ce":
            charset = "maccentraleurope"
        elif charset[0:4] == "utf8":
            charset = "utf-8"
        ctyp = p.get_content_type().lower()
        fnev = hdrdecode(str(p.get_filename())).lower()
        disp = p.get_content_disposition()
        #    print((ctyp,disp,fnev))
        if ctyp.split('/')[0] == "text" and disp != "attachment":
            #      print(ctyp)
            #      if ctyp.find("rfc")>=0:
            #        continue
            try:
                data = p.get_payload(decode=True)
                try:
                    data = data.decode(charset, 'mixed')
                except:
                    data = data.decode("utf-8", 'mixed')
                data = xmldecode(data)  # plaintextre is rafer...
                ldata = data.lower()
                if ctyp == "text/html" or ctyp == "text/xml" or data.find(
                        '<') >= 0 and (ldata.find("<body") >= 0
                                       or ldata.find("<img") >= 0
                                       or ldata.find("<style") >= 0
                                       or ldata.find("<center") >= 0
                                       or ldata.find("<a href") >= 0):
                    #          print(data.encode("iso8859-2"))
                    #          print("parsing html...")
                    p = ldata.find("<body")
                    if p > 0:
                        data = data[p:]
                    data = html2text(data)
                    text.append(data)
                elif ctyp == "text/plain":
                    text.append(data)
                if textlen < len(data):
                    textlen = len(data)
            except:
                eprint(traceback.format_exc())
        elif textlen < 200:
            s = ""
            t0 = time.time()
            try:
                if (ctyp == "application/pdf"
                        or fnev.endswith(".pdf")) and pdf_support:
                    eprint("PDF: parsing file: " + fnev)
                    if pdf_support == "pdfminer":
                        s = pdfminer.high_level.extract_text(io.BytesIO(
                            p.get_payload(decode=True)),
                                                             maxpages=3)
                    elif pdf_support == "pdftotext":
                        pdf = pdftotext.PDF(
                            io.BytesIO(p.get_payload(decode=True)))
                        for page in pdf:
                            s = str(page)
                            if len(s) > 200:
                                break
                elif (ctyp == "application/rtf"
                      or fnev.endswith(".rtf")) and rtf_support:
                    eprint("RTF: parsing file: " + fnev)
                    s = rtf_to_text(
                        p.get_payload(decode=True).decode("utf-8", "ignore"))
                elif ctyp == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" or fnev.endswith(
                        ".docx"):
                    eprint("DOCX: parsing file: " + fnev)
                    zipf = zipfile.ZipFile(
                        io.BytesIO(p.get_payload(decode=True)))
                    html = zipf.read('word/document.xml').decode("utf-8")
                    for ret in html.split("<"):
                        try:
                            tag, txt = ret.split(">", 1)
                            tag1 = tag.split()[0]
                        except:
                            continue
                        if tag1 == "w:t":
                            s += txt
                        elif tag1 in ["w:tab", "w:br", "w:cr", "w:p"]:
                            s += "\t"
                elif (ctyp == "application/ms-tnef"
                      or fnev == "winmail.dat") and tnef_support:
                    #          print("TNEF: parsing file: "+fnev+" from "+msg.get("Message-id","N/A"))
                    eprint("TNEF: parsing file: " + fnev)
                    tnefobj = TNEF(p.get_payload(decode=True))
                    tnefcp = tnefobj.codepage if tnefobj.codepage else "cp1250"
                    #          if tnefobj.body:
                    #              print("TNEF.raw:  %d" %(len(tnefobj.body)))
                    if tnefobj.htmlbody:
                        #              print("TNEF.html: %d" %(len(tnefobj.htmlbody)))
                        #              print(type(tnefobj.htmlbody))
                        #              if b"charset=utf-8" in tnefobj.htmlbody:
                        try:
                            s = html2text(
                                tnefobj.htmlbody.decode("utf-8", "strict"))


#                  print("UTF8 detected in TNEF/HTML...")
                        except:
                            s = html2text(
                                tnefobj.htmlbody.decode(tnefcp, "ignore"))
                    elif tnef_support > 1 and tnefobj.rtfbody:
                        #              print(type(tnefobj.rtfbody))
                        #              print("TNEF.rtf:  %d" %(len(tnefobj.rtfbody)))
                        s = rtf_to_text(
                            tnefobj.rtfbody.decode(tnefcp, "ignore"))
                t0 = time.time() - t0
                print(s)
                if len(s) > 50:
                    eprint("parsed: %d chars, %d ms" % (len(s), t0 * 1000))
                    text.append(s)
            except:
                eprint(traceback.format_exc())
    return text
Beispiel #13
0
    def scan(self, payload, **kwargs):

        if not payload:
            self.log.warn(
                "SMTP session is empty. Do you have permission to the source?")
            return False

        extracted_urls = None
        extracted_ips = None

        # Grab the uuid of so we can pass it off to the attachment
        uuid = kwargs.get('uuid', [self.stoq.get_uuid])

        # Get the appropriate metadata from the vortex filename
        vortex_meta = self.vortex_metadata(kwargs['filename'])

        # If vortex_meta returns False, it means the payload being analyzed is
        # the client session, which contains useless information. Let's just
        # skip it.
        if vortex_meta is False:
            self.log.debug("Vortex client sessions provided, skipping...")
            return True

        # Iterate over each e-mail session
        for email_session in self.carve_email(payload):
            email_session = self.stoq.force_unicode(email_session)
            message_json = {}
            message = pyzmail.message_from_string(email_session)

            if vortex_meta:
                # Setup our primary message json blob
                message_json = vortex_meta.copy()
                message_json['vortex_filename'] = kwargs['filename']

            # Create a dict of the headers in the session
            for k, v in list(message.items()):
                curr_header = k.lower()
                if curr_header in message_json:
                    # If the header key already exists, let's join them
                    message_json[curr_header] += "\n{}".format(
                        message.get_decoded_header(k))
                else:
                    message_json[curr_header] = message.get_decoded_header(k)

            # Extract the e-mail body, to include HTML if available
            if message.text_part is not None:
                message_json['body'] = self.stoq.force_unicode(
                    message.text_part.get_payload())
            else:
                message_json['body'] = ""

            if message.html_part is not None:
                message_json['body_html'] = self.stoq.force_unicode(
                    message.html_part.get_payload())
            else:
                message_json['body_html'] = ""

            # Make this easy, merge both text and html body within e-mail
            # for the purpose of extracting any URIs
            email_body = "{}{}".format(message_json['body'],
                                       message_json['body_html'])

            # Extract and normalize any IP addresses in headers
            if self.extract_iocs:
                # str of concatenated ip_headers
                concat_ips = ""

                # Define which headers we want to extract IP addresses from
                ip_headers = [
                    'src_ip', 'dest_ip', 'received', 'x-orig-ip',
                    'x-originating-ip', 'x-remote-ip', 'x-sender-ip'
                ]

                # concat all of our headers into one string for easy searching
                for ip_header in ip_headers:
                    if ip_header in message_json:
                        concat_ips += message_json[ip_header]

                extracted_ips = self.readers['iocregex'].read(
                    concat_ips, datatype_flag='ipv4')

                # Let's get a unique list of IP addresses from extracted data
                if 'ipv4' in extracted_ips:
                    message_json['ips'] = extracted_ips['ipv4']

                # extract and normalize any URLs found
                extracted_urls = self.readers['iocregex'].read(
                    email_body, datatype_flag='url')

                # Extract any URLs that may be in the merged body
                if 'url' in extracted_urls:
                    message_json['urls'] = extracted_urls['url']

            # Handle attachments
            message_json['att'] = []
            for mailpart in message.mailparts:
                try:
                    filename = mailpart.filename
                except TypeError:
                    filename = "None"

                # This is a check for winmail.dat files. If successful,
                # skip_attachment will be True and we will use the
                # results from that instead of winmail.dat file itself.
                skip_attachment = False

                if mailpart.type == "text/plain":
                    try:
                        message_json['body'] += self.stoq.force_unicode(
                            mailpart.get_payload())
                    except:
                        pass
                    skip_attachment = True
                else:

                    if filename == "winmail.dat":
                        tnef_results = TNEF(mailpart.get_payload())

                        # we have data, let's handle it.
                        if tnef_results.attachments:
                            # We have a valid file within winmail.dat,
                            # let's make sure we only handle it here.
                            skip_attachment = True
                            for tnef_attachment in tnef_results.attachments:
                                try:
                                    filename = self.stoq.force_unicode(
                                        tnef_attachment.name)
                                except:
                                    filename = "None"

                                try:
                                    attachment_json = self.handle_attachments(
                                        payload=tnef_attachment.data,
                                        filename=filename,
                                        uuid=message_json['uuid'])
                                    if attachment_json:
                                        message_json['att'].append(
                                            attachment_json)
                                except:
                                    pass

                # Let's handle the attachment normally
                if not skip_attachment:
                    attachment_json = self.handle_attachments(
                        payload=mailpart.get_payload(),
                        filename=filename,
                        uuid=uuid)
                    if attachment_json:
                        attachment_json['desc'] = mailpart.part.get(
                            'Content-Description')
                        attachment_json['type'] = mailpart.type
                        message_json['att'].append(attachment_json)

            if self.use_bloom:
                # Check bloom filters
                for field_name, field_bloom in self.bloomfilters.items():

                    # If the configured field name exists in parsed data...
                    if field_name in message_json:

                        # extract the field value and check if it has been seen
                        # before...
                        field_value = message_json[field_name]
                        seen_before = field_bloom.query_filter(
                            field_value, add_missing=True)

                        # Generate JSON entry key for flagging new field values
                        field_flag = "{}_isnew".format(field_name)

                        # if the value has not been seen before...
                        if not seen_before:
                            # flag it as new within JSON
                            message_json[field_flag] = True
                        else:
                            message_json[field_flag] = False

            # Make sure we delete the body and body_html keys if they are to
            # be omitted
            if self.omit_body:
                message_json.pop('body', None)
                message_json.pop('body_html', None)

            yield message_json
Beispiel #14
0
def extractAttachment(msg, dirname, uuid):
    lip = socket.gethostbyname(socket.gethostname())
    an = 0
    base_url = '''http://%s:4000/''' % lip
    FlagLink = False
    kuku = False

    #print msg.get_payload()
    for msgrep in msg.walk():
        i = 0
        if msgrep.is_multipart():
            payload = msgrep.get_payload()
            link = """<a href=3D"%s/%s/show">Somae file are here </a></head>""" % (
                base_url, str(uuid))
            newpayload = []
            q_att = []
            for attachment in payload:
                print attachment.get_content_type()
                if "html" in attachment.get_content_type():

                    if FlagLink:
                        hhh = attachment.get_payload()
                        a = hhh.split("</head>")
                        newhhh = a[0] + link + a[1]
                        attachment.set_payload(newhhh)

                att_name = attachment.get_filename(None)
                if att_name is not None:

                    if att_name.lower() in ["winmail.dat", "win.dat"]:
                        file_buf = attachment.get_payload(decode=True)
                        winmail = TNEF(file_buf)
                        for att in winmail.attachments:
                            print att.name
                            f = writeFile(att.name, att.data, dirname)
                            a = Decrypt.Check_Encryption(f, uuid)
                    else:
                        f = writeFile(att_name,
                                      attachment.get_payload(decode=True),
                                      dirname)
                        pf, a = Decrypt.Check_Encryption(f, uuid)

                    if not a:
                        file_buf = attachment.get_payload(decode=True)
                        res = clamcheck_buf(file_buf)
                        print str(res).lower()
                        if pf or "encrypted" not in str(res).lower():
                            newpayload.append(attachment)
                        else:
                            q_att.append(att_name)
                    else:
                        q_att.append(att_name)
                        FlagLink = True
                else:
                    newpayload.append(attachment)
                i += 1
            msgrep.set_payload(newpayload)
            if not q_att == []:
                elastic.UpdateAtts(q_att, uuid)

    #if FlagLink:
    #    msg = ModHtml(uuid,msg)

    return msg.as_string()