Esempio n. 1
0
def zip_do_zip(azip, afile):
    """ azip:ZipFile, afile:source archive(s) name """
    # test if encrypted
    try:
        azip.testzip()
    except RuntimeError as e:
        if 'encrypted' in str(e):
            log_encrypted(BF_ZIP, afile)
            return
        else:
            log_error(str(e), afile)

    # iterate directly over file names
    for member in azip.namelist():
        # sort directories out
        if member.endswith('/'):
            continue
        # check file name
        filename = os.path.basename(member).lower()
        res = RGX_INFILENAME.search(filename)
        if res:
            log_secret(res.group(), afile + ':' + member)

        # check file content, calling other modules
        data = azip.read(member)
        (ftype, supported) = type_data(data, member)
        if supported:
            if ftype in ENCRYPTED:
                log_encrypted(ftype, member)
            else:
                do_data(ftype, data, afile + ':' + member)
Esempio n. 2
0
def xlsx_do_file(afile):
    try:
        axl = open_workbook(afile)
    except XLRDError as e:
        log_error(str(e), afile)
        return
    xlsx_do_xlsx(axl, afile)
Esempio n. 3
0
def init(path):
    """determines size and number of files"""
    log_comment('initializing...')
    total_size = 0
    count = 0

    for root, dirs, files in os.walk(path):
        for skip in SKIP:
            if skip in dirs:
                dirs.remove(skip)
        for filename in files:
            apath = os.path.join(root, filename)
            count += 1
            try:
                total_size += os.path.getsize(apath)
            except OSError as e:
                log_error(str(e), filename)

    readable = total_size

    for unit in ['bytes', 'KiB', 'MiB', 'GiB', 'TiB']:
        if readable < 1024:
            log_comment('%d files, %3.1f %s' % (count, readable, unit))
            return count
        readable /= 1024.0
Esempio n. 4
0
def zip_do_zip(azip, afile):
    """ azip:ZipFile, afile:source archive(s) name """
    # test if encrypted
    try:
        azip.testzip()
    except RuntimeError as e:
        if 'encrypted' in str(e):
            log_encrypted(BF_ZIP, afile)
            return
        else:
            log_error(str(e), afile)

    # iterate directly over file names
    for member in azip.namelist():
        # sort directories out
        if member.endswith('/'):
            continue
        # check file name
        filename = os.path.basename(member).lower()
        res = RGX_INFILENAME.search(filename)
        if res:
            log_secret(res.group(), afile+':'+member)

        # check file content, calling other modules
        data = azip.read(member)
        (ftype, supported) = type_data(data, member)
        if supported:
            if ftype in ENCRYPTED:
                log_encrypted(ftype, member)
            else:
                do_data(ftype, data, afile+':'+member)
Esempio n. 5
0
def xlsx_do_data(data, afile):
    try:
        axl = open_workbook(file_contents=data)
    except XLRDError as e:
        log_error(str(e), afile)
        return
    xlsx_do_xlsx(axl, afile)
Esempio n. 6
0
def pdf_do_file(afile):
    try:
        fid = open(afile, 'rb')
    except IOError as e:
        log_error(str(e), afile)
    pdf_do_pdf(fid, afile)
    fid.close()
Esempio n. 7
0
def pdf_do_file(afile):
    try:
        fid = open(afile, 'rb')
    except IOError as e:
        log_error(str(e), afile)
    pdf_do_pdf(fid, afile)
    fid.close()
Esempio n. 8
0
def init(path):
    """determines size and number of files"""
    log_comment('initializing...')
    total_size = 0
    count = 0

    for root, dirs, files in os.walk(path):
        for skip in SKIP:
            if skip in dirs:
                dirs.remove(skip)
        for filename in files:
            apath = os.path.join(root, filename)
            count += 1
            try:
                total_size += os.path.getsize(apath)
            except OSError as e:
                log_error(str(e), filename)

    readable = total_size

    for unit in ['bytes', 'KiB', 'MiB', 'GiB', 'TiB']:
        if readable < 1024:
            log_comment('%d files, %3.1f %s' % (count, readable, unit))
            return count
        readable /= 1024.0
Esempio n. 9
0
def docx_do_file(afile):
    try:
        azip = zipfile.ZipFile(afile)
    except zipfile.BadZipfile as e:
        log_error(str(e), afile)
        return
    docx_do_docx(azip, afile)
    azip.close()
Esempio n. 10
0
def gz_do_file(afile):
    try:
        agz = gzip.GzipFile(afile)
    except IOError as e:
        log_error(str(e), afile)
        return
    gz_do_gz(agz, afile)
    agz.close()
Esempio n. 11
0
def docx_do_file(afile):
    try:
        azip = zipfile.ZipFile(afile)
    except zipfile.BadZipfile as e:
        log_error(str(e), afile)
        return
    docx_do_docx(azip, afile)
    azip.close()
Esempio n. 12
0
def tar_do_file(afile):
    try:
        atar = tarfile.open(afile)
    except tarfile.TarError as e:
        log_error(str(e), afile)
        return
    tar_do_tar(atar, afile)
    atar.close()
Esempio n. 13
0
def gz_do_file(afile):
    try:
        agz = gzip.GzipFile(afile)
    except IOError as e:
        log_error(str(e), afile)
        return
    gz_do_gz(agz, afile)
    agz.close()
Esempio n. 14
0
def gz_do_data(data, afile):
    filelike = io.BytesIO(data)
    try:
        agz = gzip.GzipFile(fileobj=filelike)
    except IOError as e:
        log_error(str(e), afile)
        return
    gz_do_gz(agz, afile)
    agz.close()
Esempio n. 15
0
def docx_do_data(data, afile):
    filelike = io.BytesIO(data)
    try:
        azip = zipfile.ZipFile(filelike)
    except zipfile.BadZipfile as e:
        log_error(str(e), afile)
        return
    docx_do_docx(azip, afile)
    azip.close()
Esempio n. 16
0
def gz_do_data(data, afile):
    filelike = io.BytesIO(data)
    try:
        agz = gzip.GzipFile(fileobj=filelike)
    except IOError as e:
        log_error(str(e), afile)
        return
    gz_do_gz(agz, afile)
    agz.close()
Esempio n. 17
0
def text_do_file(afile):
    try:
        fid = open(afile)
    except IOError as e:
        log_error(str(e), afile)
        return
    data = fid.read().lower()
    fid.close()
    text_do_data(data, afile)
Esempio n. 18
0
def docx_do_data(data, afile):
    filelike = io.BytesIO(data)
    try:
        azip = zipfile.ZipFile(filelike)
    except zipfile.BadZipfile as e:
        log_error(str(e), afile)
        return
    docx_do_docx(azip, afile)
    azip.close()
Esempio n. 19
0
def bzip2_do_file(afile):
    try:
        fid = open(afile)
        abzip2 = fid.read()
    except IOError as e:
        log_error(str(e), afile)
        return
    bzip2_do_bzip2(abzip2, afile)
    fid.close()
Esempio n. 20
0
def bzip2_do_file(afile):
    try:
        fid = open(afile)
        abzip2 = fid.read()
    except IOError as e:
        log_error(str(e), afile)
        return
    bzip2_do_bzip2(abzip2, afile)
    fid.close()
Esempio n. 21
0
def tar_do_data(data, afile):
    filelike = io.BytesIO(data)
    try:
        atar = tarfile.open(fileobj=filelike)
    except tarfile.TarError as e:
        log_error(str(e), afile)
        return
    tar_do_tar(atar, afile)
    atar.close()
Esempio n. 22
0
def text_do_file(afile):
    try:
        fid = open(afile)
    except IOError as e:
        log_error(str(e), afile)
        return
    data = fid.read().lower()
    fid.close()
    text_do_data(data, afile)
Esempio n. 23
0
def scan(path, count):
    """selects files to process, checks file names"""
    log_comment('scanning %s:' % path)
    scanned = 0
    bar_width = 32
    if count < bar_width:
        bar_width = count
    if count == 0:
        bar_width = 1
    sys.stdout.write('%s\n' % ("=" * bar_width))
    bar_blocksize = count / bar_width
    bar_left = bar_width
    bar_count = 0

    for root, dirs, files in os.walk(path):
        for skip in SKIP:
            if skip in dirs:
                dirs.remove(skip)
        for filename in files:
            abspath = os.path.abspath(os.path.join(root, filename))
            res = RGX_INFILENAME.search(filename.lower())
            if res:
                log_secret(res.group(), abspath)

            try:
                ftype, supported = type_file(abspath)
            except TypeError as e:
                log_error(str(e), abspath)
                continue

            if supported:
                if ftype in ENCRYPTED:
                    # report but do not process
                    log_encrypted(ftype, abspath)
                if ftype in EXE:
                    # report but do not process
                    if looks_uniform(filename=abspath):
                        log_packed(ftype, abspath)
                    else:
                        log_exe(ftype, abspath)
                else:
                    # process the file
                    do_file(ftype, abspath)
                    scanned += 1

            # update progress bar
            bar_count += 1
            if bar_count >= bar_blocksize and bar_left:
                sys.stdout.write("=")
                sys.stdout.flush()
                bar_count = 0
                bar_left -= 1

    sys.stdout.write("\n")
    log_comment('%d files supported were processed' % scanned)
    return scanned
Esempio n. 24
0
def scan(path, count):
    """selects files to process, checks file names"""
    log_comment('scanning %s:' % path)
    scanned = 0
    bar_width = 32
    if count < bar_width:
        bar_width = count
    if count == 0:
        bar_width = 1
    sys.stdout.write('%s\n' % ("=" * bar_width))
    bar_blocksize = count / bar_width
    bar_left = bar_width
    bar_count = 0

    for root, dirs, files in os.walk(path):
        for skip in SKIP:
            if skip in dirs:
                dirs.remove(skip)
        for filename in files:
            abspath = os.path.abspath(os.path.join(root, filename))
            res = RGX_INFILENAME.search(filename.lower())
            if res:
                log_secret(res.group(), abspath)

            try:
                ftype, supported = type_file(abspath)
            except TypeError as e:
                log_error(str(e), abspath)
                continue

            if supported:
                if ftype in ENCRYPTED:  
                    # report but do not process
                    log_encrypted(ftype, abspath)
                if ftype in EXE:  
                    # report but do not process
                    if looks_uniform(filename=abspath):
                        log_packed(ftype, abspath)
                    else:
                        log_exe(ftype, abspath)
                else:
                    # process the file
                    do_file(ftype, abspath)
                    scanned += 1

            # update progress bar
            bar_count += 1
            if bar_count >= bar_blocksize and bar_left:
                sys.stdout.write("=")
                sys.stdout.flush()
                bar_count = 0
                bar_left -= 1

    sys.stdout.write("\n")
    log_comment('%d files supported were processed' % scanned)
    return scanned
Esempio n. 25
0
def load_dictionary_file(afile):
    log_comment('adding custom dictionary %s to infile' % afile)
    try:
        fid = open(afile)
    except IOError as e:
        log_error(str(e), afile)
        return
    data = fid.read().lower()
    fid.close()
    return data.splitlines()
Esempio n. 26
0
def xlsx_do_xlsx(axl, afile):
    rows = []
    try:
        for i in xrange(axl.nsheets):
            sheet = axl.sheet_by_index(i)
            for j in xrange(sheet.nrows):
                rows.append(' '.join(sheet.row_values(j)))
    except TypeError as e:
        log_error(str(e), afile)
        return

    text = '\n\n'.join(rows)
    text_do_data(text, afile)
Esempio n. 27
0
def gz_do_gz(agz, afile):
    """agz:GzipFile, afile:source file name"""
    try:
        data = agz.read()
    except IOError as e:
        log_error(str(e), afile)
        return
    (ftype, supported) = type_data(data)
    if supported:
        # strip any .gz extension
        (root, ext) = os.path.splitext(afile)
        if ext.lower() == '.gz':
            do_data(ftype, data, afile + ':' + root)
        else:
            do_data(ftype, data, afile)
Esempio n. 28
0
def gz_do_gz(agz, afile):
    """agz:GzipFile, afile:source file name"""
    try:
        data = agz.read()
    except IOError as e:
        log_error(str(e), afile)
        return
    (ftype, supported) = type_data(data)
    if supported:
        # strip any .gz extension
        (root, ext) = os.path.splitext(afile)
        if ext.lower() == '.gz':
            do_data(ftype, data, afile+':'+root)
        else:
            do_data(ftype, data, afile)
Esempio n. 29
0
def bzip2_do_bzip2(abzip2, afile):
    """abzip2: raw bytes, afile: source file name"""
    try:
        data = bz2.decompress(abzip2)
    except (IOError, ValueError) as e:
        log_error(str(e), afile)
        return
    (ftype, supported) = type_data(data)
    if supported:
        # strip any .bz2 extension
        (root, ext) = os.path.splitext(afile)
        if ext.lower() == '.bz2':
            do_data(ftype, data, afile + ':' + root)
        else:
            do_data(ftype, data, afile)
Esempio n. 30
0
def bzip2_do_bzip2(abzip2, afile):
    """abzip2: raw bytes, afile: source file name"""
    try:
        data = bz2.decompress(abzip2)
    except (IOError, ValueError) as e:
        log_error(str(e), afile)
        return
    (ftype, supported) = type_data(data)
    if supported:
        # strip any .bz2 extension
        (root, ext) = os.path.splitext(afile)
        if ext.lower() == '.bz2':
            do_data(ftype, data, afile+':'+root)
        else:
            do_data(ftype, data, afile)
Esempio n. 31
0
def type_file(filename):
    """guess a file's type"""
    # optimize for speed: prioritize extension over signature
    (ftype, supported) = type_from_extension(filename)
    if supported:
        return (ftype, supported)
    try:
        fin = open(filename)
    except IOError as e:
        log_error(str(e), filename)
        return
    data = fin.read(MAX_LEN)
    fin.close()
    if is_text(data[:MAX_LEN]):
        return (const.BF_TEXT, True)
    return type_from_signature(data[:MAX_SIG_LEN])
Esempio n. 32
0
def pdf_do_pdf(astream, afile):
    outstream = io.BytesIO()
    laparams = LAParams()
    rsrcmgr = PDFResourceManager(caching=True)
    device = TextConverter(rsrcmgr, outstream, codec='utf-8', laparams=laparams,
                               imagewriter=None)
    interpreter = PDFPageInterpreter(rsrcmgr, device)
    try:
        for page in PDFPage.get_pages(astream, set(),
                                      maxpages=0, password='',
                                      caching=True, check_extractable=True):
            interpreter.process_page(page)
    except PDFTextExtractionNotAllowed as e:
        log_error(str(e), afile)
        return
    text = outstream.getvalue()
    text_do_data(text, afile)
    outstream.close()
Esempio n. 33
0
def looks_uniform(data='', filename=''):
    """to detect compressed/encrypted data, packed executables etc.
       unreliable for short samples (<~100 bytes)
       if no data given, opens the file"""
    if not data:
        try:
            data_local = open(filename).read()
        except IOError as e:
            log_error(str(e), filename)
            return False
    else:
        # beware mutable default args
        data_local = data
    datalen = len(data_local)
    entropy = entropy2(data_local)
    if datalen < 250:
        return entropy > 6
    if datalen < 1000:
        return entropy > 7
    return entropy > 7.5
Esempio n. 34
0
def looks_uniform(data='', filename=''):
    """to detect compressed/encrypted data, packed executables etc.
       unreliable for short samples (<~100 bytes)
       if no data given, opens the file"""
    if not data:
        try:
            data_local = open(filename).read()
        except IOError as e:
            log_error(str(e), filename)
            return False
    else:
        # beware mutable default args
        data_local = data
    datalen = len(data_local)
    entropy = entropy2(data_local)
    if datalen < 250:
        return entropy > 6
    if datalen < 1000:
        return entropy > 7
    return entropy > 7.5
Esempio n. 35
0
def pdf_do_pdf(astream, afile):
    outstream = io.BytesIO()
    laparams = LAParams()
    rsrcmgr = PDFResourceManager(caching=True)
    device = TextConverter(rsrcmgr,
                           outstream,
                           codec='utf-8',
                           laparams=laparams,
                           imagewriter=None)
    interpreter = PDFPageInterpreter(rsrcmgr, device)
    try:
        for page in PDFPage.get_pages(astream,
                                      set(),
                                      maxpages=0,
                                      password='',
                                      caching=True,
                                      check_extractable=True):
            interpreter.process_page(page)
    except PDFTextExtractionNotAllowed as e:
        log_error(str(e), afile)
        return
    text = outstream.getvalue()
    text_do_data(text, afile)
    outstream.close()
Esempio n. 36
0
def blueflower(path, hashesfile, dictionaryfile, pwd, output_file, exclude_default_dictionary):
    """runs blueflower, returns name of the log file"""
    global RGX_INFILE
    global RGX_INFILENAME

    if not os.path.exists(path):
        raise BFException('%s does not exist' % path)

    if hashesfile and not os.path.exists(hashesfile):
        raise BFException('%s does not exist' % hashesfile)

    if dictionaryfile and not os.path.exists(dictionaryfile):
        raise BFException('%s does not exist' % dictionaryfile)

    if output_file:
        if os.path.isfile(output_file):
            logfile = output_file
        else:
            logfile = output_file + '/%s-%s-%s.csv' % (PROGRAM, os.path.basename(os.path.normpath(path)), timestamp())
    else:
        logfile = '%s-%s-%s.csv' % (PROGRAM, os.path.basename(os.path.normpath(path)), timestamp())

    # reset any existing logger
    logger = logging.getLogger()
    if logger.handlers:
        logger.handlers[0].stream.close()
        logger.removeHandler(logger.handlers[0])

    # instantiate logger
    logging.basicConfig(filename=logfile,
                        format='%(message)s',
                        level=logging.INFO)

    banner()
    log_comment('writing to %s' % logfile)

    # hash file support
    if hashesfile and pwd:
        try:
            get_hashes(hashesfile, pwd)
        except BFException:
            raise

    # read the dictionary and add to INFILE
    if dictionaryfile:
        extradictionary = load_dictionary_file(dictionaryfile)
    else:
        extradictionary=[]

    for rex in extradictionary:
        try:
            re.compile(rex, re.IGNORECASE)
        except re.error:
            log_error('regex does not compile: %s' % rex)


    # configure the regex dictionary to be used
    if exclude_default_dictionary:
        rgx_infile = '|'.join(set(extradictionary))
    else:
        rgx_infile = '|'.join(set(INFILE) | set(extradictionary))

    log_comment(rgx_infile)

    # precompile the regexes
    try:
        RGX_INFILE = re.compile(rgx_infile, re.IGNORECASE)
    except re.error:
        raise BFException('invalid infile regex %s' % rgx_infile)
    rgx_infilename = '|'.join(INFILENAME)
    try:
        RGX_INFILENAME = re.compile(rgx_infilename, re.IGNORECASE)
    except re.error:
        raise BFException('invalid infilename regex %s' % rgx_infilename)

    # start slow operations
    count = init(path)
    scan(path, count)
    count_logged(logfile)

    return logfile