Example #1
0
def extract_macros_from_office2003(fullpath, fileobj=None):
    '''

    :return: [(host_fullpath, filename_from_host, data), ... ]
    '''
    from oletools.olevba import VBA_Parser

    vp = VBA_Parser(fullpath, data=fileobj.read() if fileobj else None)

    r = []

    try:
        if vp.detect_vba_macros():
            macros = vp.extract_all_macros()
            assert (macros
                    )  # macros detect, if cannot extact, must be error occured
            if macros:
                for (subfullpath, stream_path, vba_filename,
                     vba_code) in macros:
                    a = os.path.basename(fullpath)
                    b = os.path.basename(subfullpath)
                    vba_filename += u'.vba'
                    sub = (io_text_arg(fullpath),
                           io_text_arg(vba_filename if a == b else u'{0}_{1}'.
                                       format(b, vba_filename)), vba_code)
                    r.append(sub)
    except:
        pass
    finally:
        vp.close()

    return r
Example #2
0
def extract_office2003_from_unknown_office(fullpath, fileobj=None):
    '''
    
    从不明 office(可能是 office2003, office2007) 中解出内嵌的 office2003
    :return: [
            (host_fullpath,filename_from_host,<file_open_handler>),
            ]
    '''

    import zipfile
    import olefile
    import io

    r = []

    if olefile.isOleFile(fileobj if fileobj else fullpath):
        r.append((fullpath, os.path.basename(fullpath),
                  fileobj if fileobj else open(fullpath, 'rb')))

    elif zipfile.is_zipfile(fileobj if fileobj else fullpath):
        with zipfile.ZipFile(fileobj if fileobj else fullpath) as z:
            for subfile in z.namelist():
                with z.open(subfile) as zt:
                    magic = zt.read(len(olefile.MAGIC))
                    if magic == olefile.MAGIC:
                        r.append((fullpath, io_text_arg(subfile),
                                  io.BytesIO(z.open(subfile).read())))
    else:
        raise ValueError(u'not office file')

    return r
Example #3
0
def extract_attachment_from_msg(fullpath):
    '''
    
    :return: [(host_fullpath, filename_from_host, file_content)]
    '''
    from ExtractMsg import Message
    msg = Message(fullpath)
    r = []
    for attachment in msg.attachments:
        name = attachment.longFilename
        # name = u'{0}_{1}'.format(fullpath, name)
        r.append((fullpath, io_text_arg(name), attachment.data))
    return r
Example #4
0
def _extract_attachment_from_attachment(attachment, depth, results):
    '''
    call by others, and also call by self
    
    :return: 
    '''

    from base64_to_office import decode_mso_to_office, is_mso_buffer

    fn = attachment.get_filename()
    fn = io_text_arg(fn)
    if fn is None:
        v = attachment.get(u'Content-Location', None)
        if v:
            fn = os.path.split(v)[-1]
    if not fn:
        fn = u'noname.emb'
    fn = u'{0:0<3}.{1}'.format(depth, fn)

    if attachment.is_multipart():
        payloads = attachment.get_payload(decode=False)
        depth *= 10
        for e in payloads:
            depth += 1
            _extract_attachment_from_attachment(e, depth, results)
    else:
        data = attachment.get_payload(decode=True)

        if is_mso_buffer(data):
            ole = decode_mso_to_office(data)
            if ole:
                fn = fn + u'.office'
                results.append((fn, ole))
            else:
                results.append((fn, data))
        else:
            # results.append((fn, attachment.get_payload(decode=False)))
            results.append((fn, data))
Example #5
0
import argparse
import os
import sys

curpath = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.abspath(os.path.join(curpath, '../')))
from io_in_out import io_text_arg
from io_in_out import io_is_path_valid
from io_in_out import io_sys_stdout
from io_in_out import io_hash_stream
from io_in_out import io_hash_memory
from io_in_out import io_print
from io_in_out import io_files_from_arg
from io_in_out import io_path_format

curpath = io_text_arg(curpath)


def dump_sub_file(host_fullpath, filename_from_host, data_or_fileobj_to_write):
    '''
    从文件中内嵌出来的文件可能文件名是无效的,无法创建文件,这个函数来规范化文件名
    
    :return: the final sub file fullpath 
    '''

    import random
    import shutil

    # must detect path sep first
    _func_replace_os_path_sep = lambda x: x.replace(u'/', u'_').replace(
        u'\\', u'_')
Example #6
0
def escape_office_10native_from_buffer(stream_buffer):
    '''
    :return: None / ('','','','') 
     
    解出 ole 中的 pe 文件
    ref https://raw.githubusercontent.com/unixfreak0037/officeparser/master/officeparser.py
    上面的有错误, 利用下面微软的文章修正
    ref https://code.msdn.microsoft.com/office/CSOfficeDocumentFileExtract-e5afce86
    '''
    size = struct.unpack('<L', stream_buffer[0:4])[0]
    data = stream_buffer[4:]

    unknown_short = None
    filename = []
    src_path = []
    dst_path = []
    actual_size = None
    unknown_long_1 = None
    unknown_long_2 = None
    # I thought this might be an OLE type specifier ???
    unknown_short = struct.unpack('<H', data[0:2])[0]
    data = data[2:]

    # filename
    i = 0
    while i < len(data):
        if ord(data[i]) == 0:
            break
        filename.append(data[i])
        i += 1
    filename = ''.join(filename)
    data = data[i + 1:]

    # source path
    i = 0
    while i < len(data):
        if ord(data[i]) == 0:
            break
        src_path.append(data[i])
        i += 1
    src_path = ''.join(src_path)
    data = data[i + 1:]

    # TODO I bet these next 8 bytes are a timestamp
    unknown_long_1 = struct.unpack('<L', data[0:4])[0]
    data = data[4:]

    # Next four bytes gives the size of the temporary path of the embedded file  in little endian format
    # This should be converted
    temp_path_size = struct.unpack('<L', data[0:4])[0]
    data = data[4:]

    # destination path? (interesting that it has my name in there)
    i = 0
    while i < len(data):
        if ord(data[i]) == 0:
            break
        dst_path.append(data[i])
        i += 1
    dst_path = ''.join(dst_path)

    # 修正第一个 ref 文章的 bug
    if len(dst_path) > temp_path_size:
        raise ValueError(u'stream decode error, len(dst_path)>temp_path_size ')

    data = data[temp_path_size:]

    # size of the rest of the data
    actual_size = struct.unpack('<L', data[0:4])[0]
    if not actual_size:
        return None
    data = data[4:]

    # (filename, <fullpath before put in ole>,<fullpath to write from ole>,data)
    filename = io_text_arg(filename)
    fullpath_original = io_text_arg(src_path)
    fullpath_dst = io_text_arg(dst_path)
    return (filename, fullpath_original, fullpath_dst, data[0:actual_size])