def zip_do_zip(azip, afile): """ azip:ZipFile, afile:source archive(s) name """ # test if encrypted try: azip.testzip() except RuntimeError as e: if 'encrypted' in str(e): log_encrypted(BF_ZIP, afile) return else: log_error(str(e), afile) # iterate directly over file names for member in azip.namelist(): # sort directories out if member.endswith('/'): continue # check file name filename = os.path.basename(member).lower() res = RGX_INFILENAME.search(filename) if res: log_secret(res.group(), afile + ':' + member) # check file content, calling other modules data = azip.read(member) (ftype, supported) = type_data(data, member) if supported: if ftype in ENCRYPTED: log_encrypted(ftype, member) else: do_data(ftype, data, afile + ':' + member)
def xlsx_do_file(afile): try: axl = open_workbook(afile) except XLRDError as e: log_error(str(e), afile) return xlsx_do_xlsx(axl, afile)
def init(path): """determines size and number of files""" log_comment('initializing...') total_size = 0 count = 0 for root, dirs, files in os.walk(path): for skip in SKIP: if skip in dirs: dirs.remove(skip) for filename in files: apath = os.path.join(root, filename) count += 1 try: total_size += os.path.getsize(apath) except OSError as e: log_error(str(e), filename) readable = total_size for unit in ['bytes', 'KiB', 'MiB', 'GiB', 'TiB']: if readable < 1024: log_comment('%d files, %3.1f %s' % (count, readable, unit)) return count readable /= 1024.0
def zip_do_zip(azip, afile): """ azip:ZipFile, afile:source archive(s) name """ # test if encrypted try: azip.testzip() except RuntimeError as e: if 'encrypted' in str(e): log_encrypted(BF_ZIP, afile) return else: log_error(str(e), afile) # iterate directly over file names for member in azip.namelist(): # sort directories out if member.endswith('/'): continue # check file name filename = os.path.basename(member).lower() res = RGX_INFILENAME.search(filename) if res: log_secret(res.group(), afile+':'+member) # check file content, calling other modules data = azip.read(member) (ftype, supported) = type_data(data, member) if supported: if ftype in ENCRYPTED: log_encrypted(ftype, member) else: do_data(ftype, data, afile+':'+member)
def xlsx_do_data(data, afile): try: axl = open_workbook(file_contents=data) except XLRDError as e: log_error(str(e), afile) return xlsx_do_xlsx(axl, afile)
def pdf_do_file(afile): try: fid = open(afile, 'rb') except IOError as e: log_error(str(e), afile) pdf_do_pdf(fid, afile) fid.close()
def pdf_do_file(afile): try: fid = open(afile, 'rb') except IOError as e: log_error(str(e), afile) pdf_do_pdf(fid, afile) fid.close()
def init(path): """determines size and number of files""" log_comment('initializing...') total_size = 0 count = 0 for root, dirs, files in os.walk(path): for skip in SKIP: if skip in dirs: dirs.remove(skip) for filename in files: apath = os.path.join(root, filename) count += 1 try: total_size += os.path.getsize(apath) except OSError as e: log_error(str(e), filename) readable = total_size for unit in ['bytes', 'KiB', 'MiB', 'GiB', 'TiB']: if readable < 1024: log_comment('%d files, %3.1f %s' % (count, readable, unit)) return count readable /= 1024.0
def docx_do_file(afile): try: azip = zipfile.ZipFile(afile) except zipfile.BadZipfile as e: log_error(str(e), afile) return docx_do_docx(azip, afile) azip.close()
def gz_do_file(afile): try: agz = gzip.GzipFile(afile) except IOError as e: log_error(str(e), afile) return gz_do_gz(agz, afile) agz.close()
def docx_do_file(afile): try: azip = zipfile.ZipFile(afile) except zipfile.BadZipfile as e: log_error(str(e), afile) return docx_do_docx(azip, afile) azip.close()
def tar_do_file(afile): try: atar = tarfile.open(afile) except tarfile.TarError as e: log_error(str(e), afile) return tar_do_tar(atar, afile) atar.close()
def gz_do_file(afile): try: agz = gzip.GzipFile(afile) except IOError as e: log_error(str(e), afile) return gz_do_gz(agz, afile) agz.close()
def gz_do_data(data, afile): filelike = io.BytesIO(data) try: agz = gzip.GzipFile(fileobj=filelike) except IOError as e: log_error(str(e), afile) return gz_do_gz(agz, afile) agz.close()
def docx_do_data(data, afile): filelike = io.BytesIO(data) try: azip = zipfile.ZipFile(filelike) except zipfile.BadZipfile as e: log_error(str(e), afile) return docx_do_docx(azip, afile) azip.close()
def gz_do_data(data, afile): filelike = io.BytesIO(data) try: agz = gzip.GzipFile(fileobj=filelike) except IOError as e: log_error(str(e), afile) return gz_do_gz(agz, afile) agz.close()
def text_do_file(afile): try: fid = open(afile) except IOError as e: log_error(str(e), afile) return data = fid.read().lower() fid.close() text_do_data(data, afile)
def docx_do_data(data, afile): filelike = io.BytesIO(data) try: azip = zipfile.ZipFile(filelike) except zipfile.BadZipfile as e: log_error(str(e), afile) return docx_do_docx(azip, afile) azip.close()
def bzip2_do_file(afile): try: fid = open(afile) abzip2 = fid.read() except IOError as e: log_error(str(e), afile) return bzip2_do_bzip2(abzip2, afile) fid.close()
def bzip2_do_file(afile): try: fid = open(afile) abzip2 = fid.read() except IOError as e: log_error(str(e), afile) return bzip2_do_bzip2(abzip2, afile) fid.close()
def tar_do_data(data, afile): filelike = io.BytesIO(data) try: atar = tarfile.open(fileobj=filelike) except tarfile.TarError as e: log_error(str(e), afile) return tar_do_tar(atar, afile) atar.close()
def text_do_file(afile): try: fid = open(afile) except IOError as e: log_error(str(e), afile) return data = fid.read().lower() fid.close() text_do_data(data, afile)
def scan(path, count): """selects files to process, checks file names""" log_comment('scanning %s:' % path) scanned = 0 bar_width = 32 if count < bar_width: bar_width = count if count == 0: bar_width = 1 sys.stdout.write('%s\n' % ("=" * bar_width)) bar_blocksize = count / bar_width bar_left = bar_width bar_count = 0 for root, dirs, files in os.walk(path): for skip in SKIP: if skip in dirs: dirs.remove(skip) for filename in files: abspath = os.path.abspath(os.path.join(root, filename)) res = RGX_INFILENAME.search(filename.lower()) if res: log_secret(res.group(), abspath) try: ftype, supported = type_file(abspath) except TypeError as e: log_error(str(e), abspath) continue if supported: if ftype in ENCRYPTED: # report but do not process log_encrypted(ftype, abspath) if ftype in EXE: # report but do not process if looks_uniform(filename=abspath): log_packed(ftype, abspath) else: log_exe(ftype, abspath) else: # process the file do_file(ftype, abspath) scanned += 1 # update progress bar bar_count += 1 if bar_count >= bar_blocksize and bar_left: sys.stdout.write("=") sys.stdout.flush() bar_count = 0 bar_left -= 1 sys.stdout.write("\n") log_comment('%d files supported were processed' % scanned) return scanned
def scan(path, count): """selects files to process, checks file names""" log_comment('scanning %s:' % path) scanned = 0 bar_width = 32 if count < bar_width: bar_width = count if count == 0: bar_width = 1 sys.stdout.write('%s\n' % ("=" * bar_width)) bar_blocksize = count / bar_width bar_left = bar_width bar_count = 0 for root, dirs, files in os.walk(path): for skip in SKIP: if skip in dirs: dirs.remove(skip) for filename in files: abspath = os.path.abspath(os.path.join(root, filename)) res = RGX_INFILENAME.search(filename.lower()) if res: log_secret(res.group(), abspath) try: ftype, supported = type_file(abspath) except TypeError as e: log_error(str(e), abspath) continue if supported: if ftype in ENCRYPTED: # report but do not process log_encrypted(ftype, abspath) if ftype in EXE: # report but do not process if looks_uniform(filename=abspath): log_packed(ftype, abspath) else: log_exe(ftype, abspath) else: # process the file do_file(ftype, abspath) scanned += 1 # update progress bar bar_count += 1 if bar_count >= bar_blocksize and bar_left: sys.stdout.write("=") sys.stdout.flush() bar_count = 0 bar_left -= 1 sys.stdout.write("\n") log_comment('%d files supported were processed' % scanned) return scanned
def load_dictionary_file(afile): log_comment('adding custom dictionary %s to infile' % afile) try: fid = open(afile) except IOError as e: log_error(str(e), afile) return data = fid.read().lower() fid.close() return data.splitlines()
def xlsx_do_xlsx(axl, afile): rows = [] try: for i in xrange(axl.nsheets): sheet = axl.sheet_by_index(i) for j in xrange(sheet.nrows): rows.append(' '.join(sheet.row_values(j))) except TypeError as e: log_error(str(e), afile) return text = '\n\n'.join(rows) text_do_data(text, afile)
def gz_do_gz(agz, afile): """agz:GzipFile, afile:source file name""" try: data = agz.read() except IOError as e: log_error(str(e), afile) return (ftype, supported) = type_data(data) if supported: # strip any .gz extension (root, ext) = os.path.splitext(afile) if ext.lower() == '.gz': do_data(ftype, data, afile + ':' + root) else: do_data(ftype, data, afile)
def gz_do_gz(agz, afile): """agz:GzipFile, afile:source file name""" try: data = agz.read() except IOError as e: log_error(str(e), afile) return (ftype, supported) = type_data(data) if supported: # strip any .gz extension (root, ext) = os.path.splitext(afile) if ext.lower() == '.gz': do_data(ftype, data, afile+':'+root) else: do_data(ftype, data, afile)
def bzip2_do_bzip2(abzip2, afile): """abzip2: raw bytes, afile: source file name""" try: data = bz2.decompress(abzip2) except (IOError, ValueError) as e: log_error(str(e), afile) return (ftype, supported) = type_data(data) if supported: # strip any .bz2 extension (root, ext) = os.path.splitext(afile) if ext.lower() == '.bz2': do_data(ftype, data, afile + ':' + root) else: do_data(ftype, data, afile)
def bzip2_do_bzip2(abzip2, afile): """abzip2: raw bytes, afile: source file name""" try: data = bz2.decompress(abzip2) except (IOError, ValueError) as e: log_error(str(e), afile) return (ftype, supported) = type_data(data) if supported: # strip any .bz2 extension (root, ext) = os.path.splitext(afile) if ext.lower() == '.bz2': do_data(ftype, data, afile+':'+root) else: do_data(ftype, data, afile)
def type_file(filename): """guess a file's type""" # optimize for speed: prioritize extension over signature (ftype, supported) = type_from_extension(filename) if supported: return (ftype, supported) try: fin = open(filename) except IOError as e: log_error(str(e), filename) return data = fin.read(MAX_LEN) fin.close() if is_text(data[:MAX_LEN]): return (const.BF_TEXT, True) return type_from_signature(data[:MAX_SIG_LEN])
def pdf_do_pdf(astream, afile): outstream = io.BytesIO() laparams = LAParams() rsrcmgr = PDFResourceManager(caching=True) device = TextConverter(rsrcmgr, outstream, codec='utf-8', laparams=laparams, imagewriter=None) interpreter = PDFPageInterpreter(rsrcmgr, device) try: for page in PDFPage.get_pages(astream, set(), maxpages=0, password='', caching=True, check_extractable=True): interpreter.process_page(page) except PDFTextExtractionNotAllowed as e: log_error(str(e), afile) return text = outstream.getvalue() text_do_data(text, afile) outstream.close()
def looks_uniform(data='', filename=''): """to detect compressed/encrypted data, packed executables etc. unreliable for short samples (<~100 bytes) if no data given, opens the file""" if not data: try: data_local = open(filename).read() except IOError as e: log_error(str(e), filename) return False else: # beware mutable default args data_local = data datalen = len(data_local) entropy = entropy2(data_local) if datalen < 250: return entropy > 6 if datalen < 1000: return entropy > 7 return entropy > 7.5
def looks_uniform(data='', filename=''): """to detect compressed/encrypted data, packed executables etc. unreliable for short samples (<~100 bytes) if no data given, opens the file""" if not data: try: data_local = open(filename).read() except IOError as e: log_error(str(e), filename) return False else: # beware mutable default args data_local = data datalen = len(data_local) entropy = entropy2(data_local) if datalen < 250: return entropy > 6 if datalen < 1000: return entropy > 7 return entropy > 7.5
def pdf_do_pdf(astream, afile): outstream = io.BytesIO() laparams = LAParams() rsrcmgr = PDFResourceManager(caching=True) device = TextConverter(rsrcmgr, outstream, codec='utf-8', laparams=laparams, imagewriter=None) interpreter = PDFPageInterpreter(rsrcmgr, device) try: for page in PDFPage.get_pages(astream, set(), maxpages=0, password='', caching=True, check_extractable=True): interpreter.process_page(page) except PDFTextExtractionNotAllowed as e: log_error(str(e), afile) return text = outstream.getvalue() text_do_data(text, afile) outstream.close()
def blueflower(path, hashesfile, dictionaryfile, pwd, output_file, exclude_default_dictionary): """runs blueflower, returns name of the log file""" global RGX_INFILE global RGX_INFILENAME if not os.path.exists(path): raise BFException('%s does not exist' % path) if hashesfile and not os.path.exists(hashesfile): raise BFException('%s does not exist' % hashesfile) if dictionaryfile and not os.path.exists(dictionaryfile): raise BFException('%s does not exist' % dictionaryfile) if output_file: if os.path.isfile(output_file): logfile = output_file else: logfile = output_file + '/%s-%s-%s.csv' % (PROGRAM, os.path.basename(os.path.normpath(path)), timestamp()) else: logfile = '%s-%s-%s.csv' % (PROGRAM, os.path.basename(os.path.normpath(path)), timestamp()) # reset any existing logger logger = logging.getLogger() if logger.handlers: logger.handlers[0].stream.close() logger.removeHandler(logger.handlers[0]) # instantiate logger logging.basicConfig(filename=logfile, format='%(message)s', level=logging.INFO) banner() log_comment('writing to %s' % logfile) # hash file support if hashesfile and pwd: try: get_hashes(hashesfile, pwd) except BFException: raise # read the dictionary and add to INFILE if dictionaryfile: extradictionary = load_dictionary_file(dictionaryfile) else: extradictionary=[] for rex in extradictionary: try: re.compile(rex, re.IGNORECASE) except re.error: log_error('regex does not compile: %s' % rex) # configure the regex dictionary to be used if exclude_default_dictionary: rgx_infile = '|'.join(set(extradictionary)) else: rgx_infile = '|'.join(set(INFILE) | set(extradictionary)) log_comment(rgx_infile) # precompile the regexes try: RGX_INFILE = re.compile(rgx_infile, re.IGNORECASE) except re.error: raise BFException('invalid infile regex %s' % rgx_infile) rgx_infilename = '|'.join(INFILENAME) try: RGX_INFILENAME = re.compile(rgx_infilename, re.IGNORECASE) except re.error: raise BFException('invalid infilename regex %s' % rgx_infilename) # start slow operations count = init(path) scan(path, count) count_logged(logfile) return logfile