def date_files(input_dir): """Calls add_date on all files in a directory (shallow)""" assert os.path.isdir(input_dir), 'Not a directory: ' + input_dir names = os.listdir(input_dir) progress_bar = Bar('Adding dates to files', max=len(names)) for name in names: if os.path.isfile(input_dir + '/' + name): add_date(input_dir + '/' + name) progress_bar.next() progress_bar.finish()
def filter_files(input_dir, file_types): """Filters specified file types into a separate folder""" assert os.path.isdir(input_dir), 'Not a directory: ' + input_dir names = os.listdir(input_dir) progress_bar = Bar('Filtering file types', max=len(names)) for name in names: if os.path.isfile(input_dir + '/' + name) and split_file_name( name)[1].lower() not in file_types: try: os.rename(input_dir + '/' + name, input_dir + '/non_image/' + name) except FileNotFoundError: os.mkdir(input_dir + '/non_image') os.rename(input_dir + '/' + name, input_dir + '/non_image/' + name) except FileExistsError as e: raise AssertionError(str(e)) progress_bar.next() progress_bar.finish()
def check_duplicates(input_path): """Shallow check if there are duplicate files (based on sha1 hash) in the same directory""" assert os.path.isdir(input_path), 'Input path must be a valid directory' if not os.path.exists(input_path + '/duplicates'): os.mkdir(input_path + '/duplicates') hashes, BLOCK_SIZE = {}, 65536 names = sorted(os.listdir(input_path)) names.reverse() progress_bar = Bar('Checking for duplicates', max=len(names)) log_data = 'Duplicates (duplicate filename -> sha1 hash = original file):\n' for name in names: if os.path.isfile(input_path + '/' + name): hasher = hashlib.sha1() with open(input_path + '/' + name, 'rb') as file: buffer = file.read(BLOCK_SIZE) while len(buffer) > 0: hasher.update(buffer) buffer = file.read(BLOCK_SIZE) result = hasher.hexdigest() if result in hashes: try: os.rename(input_path + '/' + name, input_path + '/duplicates/' + name) log_data += '\n' + name + ' -> ' + result + ' = ' + hashes[ result] except FileExistsError as e: raise AssertionError(str(e)) else: hashes[result] = name progress_bar.next() log(log_data, input_path + '/duplicates/log.txt', 0) progress_bar.finish()
def sort_items(input_dir): """Sorts renamed items into folders based on date""" assert os.path.isdir(input_dir), 'Not a directory: ' + input_dir names = os.listdir(input_dir) progress_bar = Bar('Sorting files', max=len(names)) for name in names: if os.path.isfile(input_dir + '/' + name): date_strings = re.findall("\d{4}-\d{2}-\d{2}_*", name) if len(date_strings) == 1: y, m, d = date_strings[0][:4], date_strings[0][5:7], date_strings[0][8:10] try: os.rename(input_dir + '/' + name, input_dir + '/' + y + '/' + MONTHS[m] + '/' + d + '/' + name) except FileNotFoundError: if not os.path.exists(input_dir + '/' + y): os.mkdir(input_dir + '/' + y) if not os.path.exists(input_dir + '/' + y + '/' + MONTHS[m]): os.mkdir(input_dir + '/' + y + '/' + MONTHS[m]) if not os.path.exists(input_dir + '/' + y + '/' + MONTHS[m] + '/' + d): os.mkdir(input_dir + '/' + y + '/' + MONTHS[m] + '/' + d) os.rename(input_dir + '/' + name, input_dir + '/' + y + '/' + MONTHS[m] + '/' + d + '/' + name) except FileExistsError as e: raise AssertionError(str(e)) else: try: os.rename(input_dir + '/' + name, input_dir + '/unsorted/' + name) except FileNotFoundError: os.mkdir(input_dir + '/unsorted') os.rename(input_dir + '/' + name, input_dir + '/unsorted/' + name) except FileExistsError as e: raise AssertionError(str(e)) progress_bar.next() progress_bar.finish()