def search_file(filepath): print('Searching database for {0}'.format(filepath), end='\n\n') file_info = None try: file_info = get_fileinfo(filepath) except FileNotFoundError: fatal_error('Check file path; Unable to find file at {0}'.format()) except PermissionError: fatal_error('Insufficient permissions to access {0}'.format(filepath)) except BlockingIOError: fatal_error('Unable to access, another process has opened {0}'.format(filepath)) row = check_file_exists_in_database(4, file_info['hashes']['sha1b32']) if row[0] is not '': print('\033[94mFile found! See {0}\033[0m'.format(os.path.join(settings.base_directory, row[0]))) pass else: print('\033[94mFile not found in database.\033[0m')
def search_file(filepath): print('Searching database for {0}'.format(filepath), end='\n\n') file_info = None try: file_info = get_fileinfo(filepath) except FileNotFoundError: fatal_error('Check file path; Unable to find file at {0}'.format()) except PermissionError: fatal_error('Insufficient permissions to access {0}'.format(filepath)) except BlockingIOError: fatal_error('Unable to access, another process has opened {0}'.format( filepath)) row = check_file_exists_in_database(4, file_info['hashes']['sha1b32']) if row[0] is not '': print('\033[94mFile found! See {0}\033[0m'.format( os.path.join(settings.base_directory, row[0]))) pass else: print('\033[94mFile not found in database.\033[0m')
def export_files(export_existing, file_name): """ Copies files from file store to a directory @param export_existing: if true, export files in input file that are also in file store, else, export the opposite @param file_name: the file to read hash type and hashes from """ hash_file = open(file_name) hash_name = hash_file.readline().strip().lower() hash_id = get_hash_id_from_hash_name(hash_name) if hash_id == -1: print("Unknown hash type: '{}'. Export cancelled!".format(hash_name)) return datetime_string = datetime.datetime.now().strftime("%H%M%S%f") export_directory = os.path.join( settings.export_directory, "Export run " + datetime_string + " for {}".format(hash_name)) if not os.path.exists(export_directory): os.makedirs(export_directory) log_name = os.path.join(export_directory, "Export log " + datetime_string + '.txt') log_file = open(log_name, 'w', encoding="utf-16") log_file.write("Looking for hashes in '{}'\n\n".format(file_name)) log_file.write("Hash type: {}\n".format(hash_name)) print("\t\tHash type: {}\n".format(hash_name)) log_file.write("Zip exported: {}\n".format(settings.zip_exported)) log_file.write("Rename exported: {}\n\n".format(settings.rename_exported)) if export_existing: export_type = "Existing" else: export_type = "Delta" log_file.write("Export operation: {}\n\n".format(export_type)) log_file.write("Copy log\n\n") found_files = 0 hash_count = 0 # TODO collect operations in a single list then iterate/copy after so as to remove duplicate code in loops for each if export_existing: for line in hash_file: line = line.strip() hash_count += 1 (file_path, file_size) = check_file_exists_in_database(hash_id, line) # TODO This needs cleaned up in regard to the paths. the database should store things in one format # right now its all bunged up if file_path: print( "\t\t({:,d}) File with hash '{}' found! Copying {:,d} bytes..." .format(hash_count, line, file_size)) found_files += 1 abs_path = os.path.join(settings.base_directory, file_path) if not os.path.isfile(abs_path): front, ext = os.path.splitext(abs_path) abs_path = front + ext.lower() abs_path = abs_path.replace("\\", "/") if settings.rename_exported and not hash_name == 'sha1b32': # the default is sha1b32 out_path = build_new_out_path(export_directory, line, file_path.replace("\\", "/")) else: out_path = os.path.join(export_directory, file_path.replace("\\", "/")) print("Copying '{}' to '{}'\n".format(abs_path, out_path)) copy_file(abs_path, log_file, out_path) # TODO Error handling here else: print("Getting hashes from file...") hashes = [line.strip() for line in hash_file] hash_set = set(hashes) # get rid of any dupes hash_count = len(hash_set) file_count = 0 print("Found {:,d} hashes in file!".format(hash_count)) # sql wont work # export entire DB for hash_id to file containing: file_id and hash for hash_id # once done, read that into dictionary with hash: fileid # loop thru hash_set and remove similar items from dictionary # when done, export files remaining in dictionary print("Getting existing hashes from database...") existing_hash_list = get_existing_hash_list(hash_id) print("Found {:,d} hashes in database!".format( len(existing_hash_list))) for h in hash_set: if h in existing_hash_list: del existing_hash_list[h] print("After pruning there are {:,d} hashes to export.".format( len(existing_hash_list))) for value in existing_hash_list.values(): # value is fileID for the file, so now we can get info on the file and export db_name = get_file_from_db(value) if db_name: abs_path = os.path.join(settings.base_directory, db_name) if not os.path.isfile(abs_path): front, ext = os.path.splitext(abs_path) abs_path = front + ext.lower() abs_path = abs_path.replace("\\", "/") if settings.rename_exported and not hash_name == 'sha1b32': # the default is sha1b32 # sigh. we have to now get the appropriate hash value from the database and do trickery based on that # we know the file id, so we can get the hash for the corresponding hash_type from the database # since we also know the hash_id new_hash = get_hash_from_hash_id_and_file_id( hash_id, value) out_path = build_new_out_path(export_directory, new_hash, db_name) else: out_path = os.path.join(export_directory, db_name.replace("\\", "/")) # print("abs_path is {}".format(abs_path)) # print("out_path is {}".format(out_path)) file_count += 1 print("[{:,d}/{:,d}] Copying '{}' to '{}'\n".format( file_count, len(existing_hash_list), abs_path, out_path)) copy_file(abs_path, log_file, out_path) # TODO Error handling here hash_file.close() log_file.close() if settings.zip_exported: zip_name = os.path.join( settings.export_directory, "Exported " + hash_name + " " + datetime_string + ".zip") print("\t\tZipping files to '{}'\n".format(zip_name)) z_file = zipfile.ZipFile(zip_name, "w") for dirpath, dirnames, filenames in os.walk(export_directory): for filename in filenames: full_name = os.path.join(export_directory, dirpath, filename) if full_name.endswith("txt"): archive_name = os.path.basename(full_name) else: parts = full_name.split("\\") archive_name = "\\".join(str(parts[-3:])) z_file.write(full_name, archive_name) z_file.close() print("\t\tRemoving '{} since export was zipped to {}...'\n".format( export_directory, zip_name)) shutil.rmtree(export_directory) print( "\n\t\tSaw {:,d} {} hashes in '{}'. Files found: {:,d}. See '{}' for details." .format(hash_count, hash_name, file_name, found_files, log_name))
def export_files(export_existing, file_name): """ Copies files from file store to a directory @param export_existing: if true, export files in input file that are also in file store, else, export the opposite @param file_name: the file to read hash type and hashes from """ hash_file = open(file_name) hash_name = hash_file.readline().strip().lower() hash_id = get_hash_id_from_hash_name(hash_name) if hash_id == -1: print("Unknown hash type: '{}'. Export cancelled!".format(hash_name)) return datetime_string = datetime.datetime.now().strftime("%H%M%S%f") export_directory = os.path.join( settings.export_directory, "Export run " + datetime_string + " for {}".format(hash_name) ) if not os.path.exists(export_directory): os.makedirs(export_directory) log_name = os.path.join(export_directory, "Export log " + datetime_string + ".txt") log_file = open(log_name, "w", encoding="utf-16") log_file.write("Looking for hashes in '{}'\n\n".format(file_name)) log_file.write("Hash type: {}\n".format(hash_name)) print("\t\tHash type: {}\n".format(hash_name)) log_file.write("Zip exported: {}\n".format(settings.zip_exported)) log_file.write("Rename exported: {}\n\n".format(settings.rename_exported)) if export_existing: export_type = "Existing" else: export_type = "Delta" log_file.write("Export operation: {}\n\n".format(export_type)) log_file.write("Copy log\n\n") found_files = 0 hash_count = 0 # TODO collect operations in a single list then iterate/copy after so as to remove duplicate code in loops for each if export_existing: for line in hash_file: line = line.strip() hash_count += 1 (file_path, file_size) = check_file_exists_in_database(hash_id, line) # TODO This needs cleaned up in regard to the paths. the database should store things in one format # right now its all bunged up if file_path: print( "\t\t({:,d}) File with hash '{}' found! Copying {:,d} bytes...".format(hash_count, line, file_size) ) found_files += 1 abs_path = os.path.join(settings.base_directory, file_path) if not os.path.isfile(abs_path): front, ext = os.path.splitext(abs_path) abs_path = front + ext.lower() abs_path = abs_path.replace("\\", "/") if settings.rename_exported and not hash_name == "sha1b32": # the default is sha1b32 out_path = build_new_out_path(export_directory, line, file_path.replace("\\", "/")) else: out_path = os.path.join(export_directory, file_path.replace("\\", "/")) print("Copying '{}' to '{}'\n".format(abs_path, out_path)) copy_file(abs_path, log_file, out_path) # TODO Error handling here else: print("Getting hashes from file...") hashes = [line.strip() for line in hash_file] hash_set = set(hashes) # get rid of any dupes hash_count = len(hash_set) file_count = 0 print("Found {:,d} hashes in file!".format(hash_count)) # sql wont work # export entire DB for hash_id to file containing: file_id and hash for hash_id # once done, read that into dictionary with hash: fileid # loop thru hash_set and remove similar items from dictionary # when done, export files remaining in dictionary print("Getting existing hashes from database...") existing_hash_list = get_existing_hash_list(hash_id) print("Found {:,d} hashes in database!".format(len(existing_hash_list))) for h in hash_set: if h in existing_hash_list: del existing_hash_list[h] print("After pruning there are {:,d} hashes to export.".format(len(existing_hash_list))) for value in existing_hash_list.values(): # value is fileID for the file, so now we can get info on the file and export db_name = get_file_from_db(value) if db_name: abs_path = os.path.join(settings.base_directory, db_name) if not os.path.isfile(abs_path): front, ext = os.path.splitext(abs_path) abs_path = front + ext.lower() abs_path = abs_path.replace("\\", "/") if settings.rename_exported and not hash_name == "sha1b32": # the default is sha1b32 # sigh. we have to now get the appropriate hash value from the database and do trickery based on that # we know the file id, so we can get the hash for the corresponding hash_type from the database # since we also know the hash_id new_hash = get_hash_from_hash_id_and_file_id(hash_id, value) out_path = build_new_out_path(export_directory, new_hash, db_name) else: out_path = os.path.join(export_directory, db_name.replace("\\", "/")) # print("abs_path is {}".format(abs_path)) # print("out_path is {}".format(out_path)) file_count += 1 print( "[{:,d}/{:,d}] Copying '{}' to '{}'\n".format( file_count, len(existing_hash_list), abs_path, out_path ) ) copy_file(abs_path, log_file, out_path) # TODO Error handling here hash_file.close() log_file.close() if settings.zip_exported: zip_name = os.path.join(settings.export_directory, "Exported " + hash_name + " " + datetime_string + ".zip") print("\t\tZipping files to '{}'\n".format(zip_name)) z_file = zipfile.ZipFile(zip_name, "w") for dirpath, dirnames, filenames in os.walk(export_directory): for filename in filenames: full_name = os.path.join(export_directory, dirpath, filename) if full_name.endswith("txt"): archive_name = os.path.basename(full_name) else: parts = full_name.split("\\") archive_name = "\\".join(str(parts[-3:])) z_file.write(full_name, archive_name) z_file.close() print("\t\tRemoving '{} since export was zipped to {}...'\n".format(export_directory, zip_name)) shutil.rmtree(export_directory) print( "\n\t\tSaw {:,d} {} hashes in '{}'. Files found: {:,d}. See '{}' for details.".format( hash_count, hash_name, file_name, found_files, log_name ) )