def create_fp(): """ >>> import test >>> timestamp = time.time() >>> test.create_testfiles_fingerprint_1(timestamp) >>> fp_files_conf.fp_dir='./testfiles/' >>> fp_conf.f_output='./testresults/fp_files_result1.csv' >>> fingerprint=FingerPrintFiles() >>> fingerprint.create_fp() >>> test.modify_testfiles_fingerprint_2(timestamp) >>> fp_files_conf.fp_dir='./testfiles/' >>> fp_conf.f_output='./testresults/fp_files_result2.csv' >>> fingerprint=FingerPrintFiles() >>> fingerprint.create_fp() """ logger.info(f'create fingerprint for files from {fp_files_conf.fp_dir}, storing results in {fp_conf.f_output}') n_files: int = 0 file_iterator = get_file_iterator() with open(fp_conf.f_output, 'w', encoding='utf-8', newline='') as f_out: fieldnames = lib_data_structures.DataStructFileInfo().get_data_dict_fieldnames() csv_writer = csv.DictWriter(f_out, fieldnames=fieldnames, dialect='excel') csv_writer.writeheader() for file in file_iterator: fileinfo = get_fileinfo(filename=file, hash_files=fp_files_conf.hash_files) if fileinfo is not None: n_files += 1 csv_writer.writerow(fileinfo.get_data_dict()) logger.info('{} files fingerprinted'.format(n_files))
def write_diff_csv_file( l_fileinfo: [lib_data_structures.DataStructFileInfo]): with open(fp_conf.f_output, 'w', encoding='utf-8', newline='') as f_out: fieldnames = lib_data_structures.DataStructFileInfo( ).get_data_dict_fieldnames() csv_writer = csv.DictWriter(f_out, fieldnames=fieldnames) csv_writer.writeheader() for fileinfo in l_fileinfo: csv_writer.writerow(fileinfo.get_data_dict())
def get_fileinfo(filename: str, hash_files: bool = True): # we need to pass hash_files because state of conf.hash_files gets lost in MP """ >>> import test >>> timestamp = time.time() >>> test.create_testfiles_fingerprint_1(timestamp) >>> fp_files_conf.fp_dir='./testfiles/' >>> fp_conf.f_output='./testresults/fp_files_test_result.csv' >>> fingerprint=FingerPrintFiles() >>> fileinfo = get_fileinfo('./testfiles/file1_no_changes.txt') # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE >>> fileinfo.path './testfiles/file1_no_changes.txt' >>> fileinfo.hash 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' >>> fileinfo.remark '' >>> fileinfo = get_fileinfo('./testfiles/does-not-exist.txt') # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE >>> fileinfo is None True >>> fileinfo = get_fileinfo('c:/pagefile.sys') >>> fileinfo.remark 'access denied' """ dict_attribute_functions = {'accessed_float': os.path.getatime, 'modified_float': os.path.getmtime, 'created_float': os.path.getctime, 'size': os.path.getsize, 'hash': lib_hash.get_file_hash_preserve_access_dates} fileinfo = lib_data_structures.DataStructFileInfo() fileinfo.path = filename for attribute,file_property_function in dict_attribute_functions.items(): try: if attribute != 'hash': setattr(fileinfo,attribute, file_property_function(filename)) elif hash_files: setattr(fileinfo, attribute, file_property_function(filename)) except FileNotFoundError: fileinfo = None break except OSError: fileinfo.remark = 'access denied' return fileinfo
def create_fp_mp(): """ >>> import test >>> timestamp = time.time() >>> test.create_testfiles_fingerprint_1(timestamp) >>> fp_files_conf.fp_dir='./testfiles/' >>> fp_conf.f_output='./testresults/fp_files_result1.csv' >>> fingerprint=FingerPrintFiles() >>> fingerprint.create_fp_mp() >>> test.modify_testfiles_fingerprint_2(timestamp) >>> fp_files_conf.fp_dir='./testfiles/' >>> fp_conf.f_output='./testresults/fp_files_result2.csv' >>> fingerprint=FingerPrintFiles() >>> fingerprint.create_fp_mp() """ logger.info(f'create fingerprint for files from {fp_files_conf.fp_dir}, storing results in {fp_conf.f_output}') n_files: int = 0 file_iterator = get_file_iterator() with open(fp_conf.f_output, 'w', encoding='utf-8', newline='') as f_out: fieldnames = lib_data_structures.DataStructFileInfo().get_data_dict_fieldnames() csv_writer = csv.DictWriter(f_out, fieldnames=fieldnames, dialect='excel') csv_writer.writeheader() with concurrent.futures.ProcessPoolExecutor(max_workers=int(os.cpu_count()-1)) as executor: fileinfo_futures = [executor.submit(get_fileinfo, filename=filename, hash_files=fp_files_conf.hash_files) for filename in file_iterator] for fileinfo_future in concurrent.futures.as_completed(fileinfo_futures): fileinfo = fileinfo_future.result() if fileinfo is not None: n_files += 1 csv_writer.writerow(fileinfo.get_data_dict()) logger.info(f'{n_files} files fingerprinted')
def get_fileinfo_from_dict( dict_file_info) -> lib_data_structures.DataStructFileInfo: fileinfo = lib_data_structures.DataStructFileInfo() for key, data in dict_file_info.items(): setattr(fileinfo, key, data) return fileinfo