Example #1
0
    def create_fp():
        """
        >>> import test
        >>> timestamp = time.time()
        >>> test.create_testfiles_fingerprint_1(timestamp)
        >>> fp_files_conf.fp_dir='./testfiles/'
        >>> fp_conf.f_output='./testresults/fp_files_result1.csv'
        >>> fingerprint=FingerPrintFiles()
        >>> fingerprint.create_fp()

        >>> test.modify_testfiles_fingerprint_2(timestamp)
        >>> fp_files_conf.fp_dir='./testfiles/'
        >>> fp_conf.f_output='./testresults/fp_files_result2.csv'
        >>> fingerprint=FingerPrintFiles()
        >>> fingerprint.create_fp()

        """

        logger.info(f'create fingerprint for files from {fp_files_conf.fp_dir}, storing results in {fp_conf.f_output}')

        n_files: int = 0
        file_iterator = get_file_iterator()

        with open(fp_conf.f_output, 'w', encoding='utf-8', newline='') as f_out:

            fieldnames = lib_data_structures.DataStructFileInfo().get_data_dict_fieldnames()
            csv_writer = csv.DictWriter(f_out, fieldnames=fieldnames, dialect='excel')
            csv_writer.writeheader()

            for file in file_iterator:
                fileinfo = get_fileinfo(filename=file, hash_files=fp_files_conf.hash_files)
                if fileinfo is not None:
                    n_files += 1
                    csv_writer.writerow(fileinfo.get_data_dict())
        logger.info('{} files fingerprinted'.format(n_files))
Example #2
0
 def write_diff_csv_file(
         l_fileinfo: [lib_data_structures.DataStructFileInfo]):
     with open(fp_conf.f_output, 'w', encoding='utf-8',
               newline='') as f_out:
         fieldnames = lib_data_structures.DataStructFileInfo(
         ).get_data_dict_fieldnames()
         csv_writer = csv.DictWriter(f_out, fieldnames=fieldnames)
         csv_writer.writeheader()
         for fileinfo in l_fileinfo:
             csv_writer.writerow(fileinfo.get_data_dict())
Example #3
0
def get_fileinfo(filename: str, hash_files: bool = True):   # we need to pass hash_files because state of conf.hash_files gets lost in MP
    """
    >>> import test
    >>> timestamp = time.time()
    >>> test.create_testfiles_fingerprint_1(timestamp)
    >>> fp_files_conf.fp_dir='./testfiles/'
    >>> fp_conf.f_output='./testresults/fp_files_test_result.csv'
    >>> fingerprint=FingerPrintFiles()
    >>> fileinfo = get_fileinfo('./testfiles/file1_no_changes.txt') # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
    >>> fileinfo.path
    './testfiles/file1_no_changes.txt'
    >>> fileinfo.hash
    'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855'
    >>> fileinfo.remark
    ''
    >>> fileinfo = get_fileinfo('./testfiles/does-not-exist.txt') # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
    >>> fileinfo is None
    True

    >>> fileinfo = get_fileinfo('c:/pagefile.sys')
    >>> fileinfo.remark
    'access denied'
    """

    dict_attribute_functions = {'accessed_float': os.path.getatime, 'modified_float': os.path.getmtime,
                                'created_float': os.path.getctime, 'size': os.path.getsize, 'hash': lib_hash.get_file_hash_preserve_access_dates}

    fileinfo = lib_data_structures.DataStructFileInfo()
    fileinfo.path = filename

    for attribute,file_property_function in dict_attribute_functions.items():
        try:
            if attribute != 'hash':
                setattr(fileinfo,attribute, file_property_function(filename))
            elif hash_files:
                setattr(fileinfo, attribute, file_property_function(filename))
        except FileNotFoundError:
            fileinfo = None
            break
        except OSError:
            fileinfo.remark = 'access denied'
    return fileinfo
Example #4
0
    def create_fp_mp():
        """
        >>> import test
        >>> timestamp = time.time()
        >>> test.create_testfiles_fingerprint_1(timestamp)
        >>> fp_files_conf.fp_dir='./testfiles/'
        >>> fp_conf.f_output='./testresults/fp_files_result1.csv'
        >>> fingerprint=FingerPrintFiles()
        >>> fingerprint.create_fp_mp()

        >>> test.modify_testfiles_fingerprint_2(timestamp)
        >>> fp_files_conf.fp_dir='./testfiles/'
        >>> fp_conf.f_output='./testresults/fp_files_result2.csv'
        >>> fingerprint=FingerPrintFiles()
        >>> fingerprint.create_fp_mp()

        """

        logger.info(f'create fingerprint for files from {fp_files_conf.fp_dir}, storing results in {fp_conf.f_output}')

        n_files: int = 0
        file_iterator = get_file_iterator()

        with open(fp_conf.f_output, 'w', encoding='utf-8', newline='') as f_out:
            fieldnames = lib_data_structures.DataStructFileInfo().get_data_dict_fieldnames()
            csv_writer = csv.DictWriter(f_out, fieldnames=fieldnames, dialect='excel')
            csv_writer.writeheader()

            with concurrent.futures.ProcessPoolExecutor(max_workers=int(os.cpu_count()-1)) as executor:
                fileinfo_futures = [executor.submit(get_fileinfo, filename=filename, hash_files=fp_files_conf.hash_files) for filename in file_iterator]
                for fileinfo_future in concurrent.futures.as_completed(fileinfo_futures):
                    fileinfo = fileinfo_future.result()
                    if fileinfo is not None:
                        n_files += 1
                        csv_writer.writerow(fileinfo.get_data_dict())
        logger.info(f'{n_files} files fingerprinted')
Example #5
0
 def get_fileinfo_from_dict(
         dict_file_info) -> lib_data_structures.DataStructFileInfo:
     fileinfo = lib_data_structures.DataStructFileInfo()
     for key, data in dict_file_info.items():
         setattr(fileinfo, key, data)
     return fileinfo