def _search_in_file_content(self, file) -> Optional[CsvRow]: """ Search for terms in the file content :param file: :return: """ try: with file.open(mode='r', errors='ignore') as handle: content = handle.read() perc = 0 for k, v in self.list_file_text_terms: found = re.search(k, content, re.IGNORECASE | re.MULTILINE) if found: perc += v if perc >= config.TERM_PREC_TH: if self.verbose: print( "==> Found a pattern in the file content: " + str(k)) return CsvRow(file, "ptrn_in_file_content", '"' + str(k) + '"') except PermissionError: print("EEE => Permissions error for: " + str(file)) except OSError as e: print("EEE => OSError: " + e.strerror) return None
def print_found_csv(self, file_name): """ Print the list of found items in the form of CSV file :param file_name: :return: """ s = "" if self.found: s = CsvRow.get_header() + "\n" for x in self.found: s = s + str(x) + "\n" if file_name and s.split(): with open(file_name, "w", encoding="UTF8") as handle: handle.write(s) df = pd.read_csv(file_name, sep=";", encoding="UTF8") print("\n\n%s %s %s" % (Scanner.sep, "Result in the CSV file", Scanner.sep)) print(df.filter(df.columns[2:])) print("*****************************************************") return df return s
def _search_in_file_content(self, file) -> Optional[CsvRow]: """ Search for terms in the file content :param file: :return: """ try: with open(file.path, mode='r', errors='ignore') as handle: content = handle.read() perc, list_found = 0, [] for k, v in self.file_text_terms_set: found = re.search(k, content, re.IGNORECASE | re.MULTILINE) if found: list_found.append((k, v)) perc += v if perc >= config.CFG_TERM_PERC_TH: if self.verbose: print( f'==> Found patterns in the file content: {str(list_found)}' ) return CsvRow(file, "ptrn_in_file_content", f'\"{str(list_found)}\"') except PermissionError as e: print(f'EEE => Permissions error: {e}') except OSError as e: print(f'EEE => OSError: {e.errno}-{e}') return None
def print_found_csv(self, file_name, verbose=False): """ Print the list of found items in the form of CSV file :param file_name: :return: """ s = "" if self.found: s = CsvRow.get_header() + "\n" for x in self.found: s = s + str(x) + "\n" if file_name and s.split(): with open(file_name, "w", encoding="UTF8", errors='ignore') as handle: handle.write(s) df = pd.read_csv(file_name, sep=";", encoding="UTF8") if verbose: print(f'\n\n{Scanner.sep} Result in the CSV file: ({file_name}) {Scanner.sep}') print(df.filter(df.columns[2:])) print("*****************************************************") # return df return s
def search_for_crypted_content(self, file): """ Calculate randomness of the crypto content :param file: :return: """ try: with open(file=file.path, mode='rb') as handle: # read only the first part of the file to check the magic type content = handle.read(config.CFG_MAX_FILE_SIGNATURE_LENGTH) if len(content) == 0: return None # well known file are not checked ret, sig, desc, offset = utils.is_known_file_type( file.name, content, verbose=self.verbose) if not ret: # read the size of the file set in the config content = handle.read(config.CFG_N_BYTES_2_RAND_CHECK) lcontent = len(content) # First test is the Entropy rnd_test_entropy = round( RandTest.calc_entropy_test(content, self.verbose), 2) if rnd_test_entropy > config.CFG_ENTR_RAND_TH: # Second test is the Compression factor rnd_test_compr = round( RandTest.calc_compression_test( content, self.verbose), 2) if rnd_test_compr > config.CFG_COMPR_RAND_TH and lcontent > config.CFG_COMPRESSED_CONTENT_MIN_LEN: adesc = f'Entropy: {str(rnd_test_entropy)} && Comp_Fact: {rnd_test_compr}' return CsvRow(file, CRYPTO, adesc) else: adesc = f"sig: '{sig}' : first type recogn. \"{desc}\" <- offset: {str(offset)}" return CsvRow(file, CRYPTO_NOTPROC, adesc) except PermissionError as e: print(f'EEE => Permissions error for: {file.path}') except OSError as e: print(f'EEE(2) => OSError {e.errno}-{e}') return None
def _search_in_file_name(self, file) -> Optional[CsvRow]: """ Search the match for the file name :param file: :return: """ lfile = Path(file).stem.lower() for f in self.file_name_terms_set: if lfile.startswith(f): if self.verbose: print(f'==> Found a file name starting with: {f}') return CsvRow(file, None, f'file_name_start_with: "{f}"') return None
def _search_in_file_name(self, file) -> Optional[CsvRow]: """ Search the match for the file name :param file: :return: """ for f in self.list_file_name_terms: if file.stem.lower().startswith(f): if self.verbose: print("==> Found a file name starting with: {0}".format( str(f))) return CsvRow(file, None, 'file_name_start_with: "{0}"'.format(str(f))) return None
def __search_in_file(self, file) -> Optional[CsvRow]: """ The search for the file It searches for: 1) If the file has a Bad extension 2) For the allowed extensions allowed (not the Bad exts), if it has a file name with suspect name part or terms in the content. If yes it detect it and continue on the next file :param file: the file analyzed :return: None or the file detected """ res = None ext = Path(file).suffix.lower() # check only the files with the max size in the configuration if (ext in self.file_bad_exts_set or ext in self.file_name_exts_set ) and file.stat().st_size <= config.CFG_MANIFEST_MAX_SIZE: # check if the file has a bad extension if ext in self.file_bad_exts_set: if self.verbose: print(f'-> Found bad extension in the file: {ext}') res = CsvRow(file, "bad_ext", ext) else: # Only the allowed extensions in the config are checked for the file name and the content if ext in self.file_name_exts_set: if self.verbose: print( f'-> Processing the file "{file.path}" for the extension "{ext}"' ) res = self._search_in_file_name(file) if not res: if self.verbose: print( f'-> Processing the file "{file.path}" for the content' ) res = self._search_in_file_content(file) return res
def search_for_crypted_content(self, file): """ Calculate randomness of the crypto content :param file: :return: """ try: with file.open(mode='rb') as handle: content = handle.read(config.NUM_BYTES_TO_RAND_CHECK) lcontent = len(content) # for the empty files if lcontent == 0: return None if not utils.is_known_file_type(content, verbose=self.verbose): # First test is the Entropy rnd_test_entropy = round( RandTest.calc_entropy_test(content, self.verbose), 2) if rnd_test_entropy > config.ENTR_RAND_TH: # Second test is the Compression factor rnd_test_compr = round( RandTest.calc_compression_test( content, self.verbose), 2) if rnd_test_compr > config.COMPR_RAND_TH and lcontent > MIN_LEN_COMPRESSED_CONTENT: #rnd_test_compr = norm_percentage(rnd_test_compr) adesc = "ent: {0} ==> cmp: {1}".format( str(rnd_test_entropy), rnd_test_compr) return CsvRow(file, CRYPTO, adesc) except PermissionError: print("EEE => Permissions error for: " + str(file)) except OSError as e: print("EEE => OSError: " + e.strerror) return None
def __search_in_file(self, file) -> Optional[CsvRow]: """ The search for the file It searches for: 1) If the file has a Bad extension. If yes it detect it and continue on the next file 2) If, for the extensions allowed (not the Bad exts), it has a file name with suspect name part or terms in the content. If yes it detect it and continue on the next file :param file: the file analyzed :return: None or the file detected """ res = None # check only the files with max a size if file.stat().st_size <= config.MANIFEST_MAX_SIZE: # check if the file has a bad extension ext = file.suffix.lower() if ext in self.list_file_bad_exts: if self.verbose: print("-> Found bad extension in the file: " + str(ext)) res = CsvRow(file, "bad_ext", ext) else: # Only the allowed extensions in the config are checked for the file name and the content if ext in self.list_file_name_exts: if self.verbose: print("-> Processing the file for the extension: " + str(ext)) res = self._search_in_file_name(file) if not res: if self.verbose: print("-> Processing the file for the content") res = self._search_in_file_content(file) return res