Ejemplo n.º 1
0
def search_file(filepath):
    print('Searching database for {0}'.format(filepath), end='\n\n')

    file_info = None
    try:
        file_info = get_fileinfo(filepath)
    except FileNotFoundError:
        fatal_error('Check file path; Unable to find file at {0}'.format())
    except PermissionError:
        fatal_error('Insufficient permissions to access {0}'.format(filepath))
    except BlockingIOError:
        fatal_error('Unable to access, another process has opened {0}'.format(filepath))

    row = check_file_exists_in_database(4, file_info['hashes']['sha1b32'])

    if row[0] is not '':
        print('\033[94mFile found! See {0}\033[0m'.format(os.path.join(settings.base_directory, row[0])))
        pass
    else:
        print('\033[94mFile not found in database.\033[0m')
Ejemplo n.º 2
0
def search_file(filepath):
    print('Searching database for {0}'.format(filepath), end='\n\n')

    file_info = None
    try:
        file_info = get_fileinfo(filepath)
    except FileNotFoundError:
        fatal_error('Check file path; Unable to find file at {0}'.format())
    except PermissionError:
        fatal_error('Insufficient permissions to access {0}'.format(filepath))
    except BlockingIOError:
        fatal_error('Unable to access, another process has opened {0}'.format(
            filepath))

    row = check_file_exists_in_database(4, file_info['hashes']['sha1b32'])

    if row[0] is not '':
        print('\033[94mFile found! See {0}\033[0m'.format(
            os.path.join(settings.base_directory, row[0])))
        pass
    else:
        print('\033[94mFile not found in database.\033[0m')
Ejemplo n.º 3
0
def verify():
    print("*** File manager verification ***\n")

    print("Beginning stage 1 (comparing database against file store)...")
    db_to_fs_bad = check_db_to_fs()

    if len(db_to_fs_bad) == 0:
        print("Stage 1 complete. No inconsistencies detected between database and file system.")

    print("\nBeginning stage 2 (comparing file store against database)...")
    fs_to_db_bad = check_fs_to_db()

    if len(fs_to_db_bad) == 0:
        print("Stage 2 complete. No inconsistencies detected between file system and database.")

    if len(fs_to_db_bad) == 0 and len(db_to_fs_bad) == 0:
        print("\n\nNo inconsistencies detected!")
    else:
        # we have to fix things
        print("\n\nFound {:,d} database and {:,d} file system inconsistencies.".format(len(db_to_fs_bad),
                                                                                       len(fs_to_db_bad)))

        fix_it = input("\nDo you want to fix these issues? [Y|n]: ")

        if not fix_it.lower() == 'n':
            print("\nDeleting bad records from database...", end='')
            delete_files_from_db(db_to_fs_bad)

            print("Deleted {:,d} records from database!".format(len(db_to_fs_bad)))

            # set up a clean staging area for files to be imported from
            verify_directory = os.path.join(settings.base_directory, "verify")

            if os.path.isdir(verify_directory):
                shutil.rmtree(verify_directory)

            os.mkdir(verify_directory)

            print("Adding files to database...")
            for file in fs_to_db_bad:
                fileinfo = get_fileinfo(file)

                if file_exists_in_database(fileinfo):
                    # nuke it to be clean
                    delete_file_from_db(fileinfo)

                # move each file to a staging directory, then call import work on it. done
                head, tail = os.path.split(file)

                to_file = os.path.join(verify_directory, tail)

                unique_prefix = 0

                while os.path.isfile(to_file):
                    # file exists, so get a unique name
                    to_file = os.path.join(verify_directory, str(unique_prefix) + "_" + tail)
                    unique_prefix += 1

                shutil.move(file, to_file)

            (files_added_to_database, total_files, files_deleted, files_copied, files_with_duplicate_hashes,
             files_with_invalid_extensions) = import_files_work(verify_directory)

            shutil.rmtree(verify_directory)

            print("\nAdded {:,d} files to database!".format(files_added_to_database))

            print("\n\n*** Repair complete! ***")
Ejemplo n.º 4
0
def import_files_work(dirname):
    files_with_invalid_extensions = []  # list of files we didn't import.

    total_files = 0
    files_added_to_database = 0
    files_deleted = 0
    files_with_duplicate_hashes = []
    files_copied = 0

    # Looking up each hash is sllllllow, so pull em all in as a set and just look there!
    print("Getting existing hashes from database...", end='')
    existing_hashes = get_sha1b32_from_database()

    print("Got {:,d} hashes from database. Looking for files.\n".format(
        len(existing_hashes)))

    for dirpath, dirnames, files in os.walk(dirname, topdown=False):

        total_files += len(files)

        file_counter = 0

        if len(files) > 0:
            safeprint("\n\tFound {:,d} files in {}. Processing...".format(
                len(files), dirpath))

            #   logger.info("Found {:,d} files in {}".format(len(files), dirpath))

        for name in files:
            full_path_name = os.path.join(dirpath, name)

            file_counter += 1

            if os.path.isfile(full_path_name):

                if os.path.getsize(full_path_name) == 0:
                    safeprint("\t\tDeleting 0 byte file '{}'.".format(
                        full_path_name))
                    os.remove(full_path_name)
                    continue

                parts = os.path.splitext(name.lower())
                if len(parts) == 2:
                    ext = parts[1]

                    # some files are always bad, so just make em go away.
                    if ext in auto_delete_extensions:
                        safeprint(
                            '\t\t({} [{:,d}/{:,d}]): File {} has an autonuke extension. Deleting...'
                            .format(datetime.datetime.now().strftime('%x %X'),
                                    file_counter, len(files), full_path_name))
                        os.remove(full_path_name)
                        continue

                    if ext in extensions:
                        # logger.info(
                        #     "{} before fileinfo = get_file_data(full_path_name)".format(
                        #         datetime.datetime.now().strftime('%x %X')))

                        fileinfo = get_fileinfo(full_path_name)

                        # logger.info("{} after fileinfo = get_file_data(full_path_name)".format(
                        #     datetime.datetime.now().strftime('%x %X')))

                        if not fileinfo['hashes']['sha1b32'] in existing_hashes:
                            files_added_to_database += 1

                            safeprint(
                                "\t\t({} [{:,d}/{:,d}]): '{}' does not exist in database! Adding..."
                                .format(
                                    datetime.datetime.now().strftime('%x %X'),
                                    file_counter, len(files), full_path_name))

                            # since this is a new file, we add it to our set for future import operations
                            existing_hashes.add(fileinfo['hashes']['sha1b32'])

                            add_file_to_db(fileinfo)
                        else:
                            pass  # do anything else here? should i check if file exists in file system? who cares tho
                            # as this syncs it up maybe here is where you do extra hashing of what is on file
                            #  system to make sure the 2 match, properly named, etc

                        # logger.info("{} before copied = copy_file_to_store(fileinfo)):".format(
                        #     datetime.datetime.now().strftime('%x %X')))

                        copied = copy_file_to_store(fileinfo)

                        if copied:
                            safeprint(
                                '\t\t({} [{:,d}/{:,d}]): File with SHA-1 Base32 hash {} does not exist in file store! Copying {:,d} bytes...'
                                .format(
                                    datetime.datetime.now().strftime('%x %X'),
                                    file_counter, len(files),
                                    fileinfo['hashes']['sha1b32'],
                                    fileinfo['filesize']))

                        # logger.info("{} after copied = copy_file_to_store(fileinfo)):".format(
                        #     datetime.datetime.now().strftime('%x %X')))

                        if not copied:
                            files_with_duplicate_hashes.append(full_path_name)
                        else:
                            files_copied += 1

                        if len(settings.copy_new_destination) > 0 and copied:
                            if not os.path.exists(
                                    settings.copy_new_destination):
                                os.mkdir(settings.copy_new_destination)

                            # TODO should this create the 2 char structure too? for now, just copy it

                            copy_name = os.path.join(
                                settings.copy_new_destination, name)

                            unique_prefix = 0

                            while os.path.isfile(copy_name):
                                # file exists, so get a unique name
                                copy_name = os.path.join(
                                    settings.copy_new_destination,
                                    str(unique_prefix) + "_" + name)
                                unique_prefix += 1

                            shutil.copyfile(full_path_name, copy_name)

                            outfile = os.path.join(
                                settings.copy_new_destination, "!!" +
                                datetime.datetime.now().strftime("%Y-%m-%d") +
                                " File copy log " + '.txt')
                            with open(outfile, 'a',
                                      encoding="utf-16") as logfile:
                                logfile.write("{}: Copied {} to {}.\n".format(
                                    datetime.datetime.now(), full_path_name,
                                    copy_name))

                        if settings.delete_existing:
                            safeprint(
                                "\t\t({} [{:,d}/{:,d}]): Deleting '{}'...".
                                format(
                                    datetime.datetime.now().strftime('%x %X'),
                                    file_counter, len(files), full_path_name))

                            if settings.delete_existing == 'yes':
                                os.remove(full_path_name)

                            files_deleted += 1
                    else:
                        files_with_invalid_extensions.append(
                            os.path.join(dirpath, name))

        if settings.delete_empty_directories:
            if not os.listdir(dirpath):
                safeprint(
                    "\t\t({} [{:,d}/{:,d}]): Deleting empty directory '{}'...".
                    format(datetime.datetime.now().strftime('%x %X'),
                           file_counter, len(files), dirpath))
                if settings.delete_empty_directories == 'yes':
                    os.rmdir(dirpath)

    return (files_added_to_database, total_files, files_deleted, files_copied,
            files_with_duplicate_hashes, files_with_invalid_extensions)
Ejemplo n.º 5
0
def import_files_work(dirname):
    files_with_invalid_extensions = []  # list of files we didn't import.

    total_files = 0
    files_added_to_database = 0
    files_deleted = 0
    files_with_duplicate_hashes = []
    files_copied = 0

    # Looking up each hash is sllllllow, so pull em all in as a set and just look there!
    print("Getting existing hashes from database...", end='')
    existing_hashes = get_sha1b32_from_database()

    print("Got {:,d} hashes from database. Looking for files.\n".format(len(existing_hashes)))

    for dirpath, dirnames, files in os.walk(dirname, topdown=False):

        total_files += len(files)

        file_counter = 0

        if len(files) > 0:
            safeprint("\n\tFound {:,d} files in {}. Processing...".format(len(files), dirpath))

            #   logger.info("Found {:,d} files in {}".format(len(files), dirpath))

        for name in files:
            full_path_name = os.path.join(dirpath, name)

            file_counter += 1

            if os.path.isfile(full_path_name):

                if os.path.getsize(full_path_name) == 0:
                    safeprint("\t\tDeleting 0 byte file '{}'.".format(full_path_name))
                    os.remove(full_path_name)
                    continue

                parts = os.path.splitext(name.lower())
                if len(parts) == 2:
                    ext = parts[1]

                    # some files are always bad, so just make em go away.
                    if ext in auto_delete_extensions:
                        safeprint(
                            '\t\t({} [{:,d}/{:,d}]): File {} has an autonuke extension. Deleting...'.format(
                                datetime.datetime.now().strftime('%x %X'),
                                file_counter,
                                len(files), full_path_name))
                        os.remove(full_path_name)
                        continue

                    if ext in extensions:
                        # logger.info(
                        #     "{} before fileinfo = get_file_data(full_path_name)".format(
                        #         datetime.datetime.now().strftime('%x %X')))

                        fileinfo = get_fileinfo(full_path_name)

                        # logger.info("{} after fileinfo = get_file_data(full_path_name)".format(
                        #     datetime.datetime.now().strftime('%x %X')))

                        if not fileinfo['hashes']['sha1b32'] in existing_hashes:
                            files_added_to_database += 1

                            safeprint("\t\t({} [{:,d}/{:,d}]): '{}' does not exist in database! Adding...".format
                                      (datetime.datetime.now().strftime('%x %X'),
                                       file_counter,
                                       len(files),
                                       full_path_name))

                            # since this is a new file, we add it to our set for future import operations
                            existing_hashes.add(fileinfo['hashes']['sha1b32'])

                            add_file_to_db(fileinfo)
                        else:
                            pass  # do anything else here? should i check if file exists in file system? who cares tho
                            # as this syncs it up maybe here is where you do extra hashing of what is on file
                            #  system to make sure the 2 match, properly named, etc

                        # logger.info("{} before copied = copy_file_to_store(fileinfo)):".format(
                        #     datetime.datetime.now().strftime('%x %X')))

                        copied = copy_file_to_store(fileinfo)

                        if copied:
                            safeprint(
                                '\t\t({} [{:,d}/{:,d}]): File with SHA-1 Base32 hash {} does not exist in file store! Copying {:,d} bytes...'.format(
                                    datetime.datetime.now().strftime('%x %X'),
                                    file_counter,
                                    len(files), fileinfo['hashes']['sha1b32'], fileinfo['filesize']))

                        # logger.info("{} after copied = copy_file_to_store(fileinfo)):".format(
                        #     datetime.datetime.now().strftime('%x %X')))

                        if not copied:
                            files_with_duplicate_hashes.append(full_path_name)
                        else:
                            files_copied += 1

                        if len(settings.copy_new_destination) > 0 and copied:
                            if not os.path.exists(settings.copy_new_destination):
                                os.mkdir(settings.copy_new_destination)

                            # TODO should this create the 2 char structure too? for now, just copy it

                            copy_name = os.path.join(settings.copy_new_destination, name)

                            unique_prefix = 0

                            while os.path.isfile(copy_name):
                                # file exists, so get a unique name
                                copy_name = os.path.join(settings.copy_new_destination,
                                                         str(unique_prefix) + "_" + name)
                                unique_prefix += 1

                            shutil.copyfile(full_path_name, copy_name)

                            outfile = os.path.join(settings.copy_new_destination,
                                                   "!!" + datetime.datetime.now().strftime(
                                                       "%Y-%m-%d") + " File copy log " + '.txt')
                            with open(outfile, 'a', encoding="utf-16") as logfile:
                                logfile.write(
                                    "{}: Copied {} to {}.\n".format(datetime.datetime.now(), full_path_name, copy_name))

                        if settings.delete_existing:
                            safeprint("\t\t({} [{:,d}/{:,d}]): Deleting '{}'...".format(
                                datetime.datetime.now().strftime('%x %X'),
                                file_counter,
                                len(files),
                                full_path_name))

                            if settings.delete_existing == 'yes':
                                os.remove(full_path_name)

                            files_deleted += 1
                    else:
                        files_with_invalid_extensions.append(os.path.join(dirpath, name))

        if settings.delete_empty_directories:
            if not os.listdir(dirpath):
                safeprint("\t\t({} [{:,d}/{:,d}]): Deleting empty directory '{}'...".format(
                    datetime.datetime.now().strftime('%x %X'), file_counter, len(files), dirpath))
                if settings.delete_empty_directories == 'yes':
                    os.rmdir(dirpath)

    return (files_added_to_database, total_files, files_deleted, files_copied, files_with_duplicate_hashes,
            files_with_invalid_extensions)