class MoveToRoot: def __init__(self, root_path, dir_to_exclude): self.root_path = root_path self.formatter = FileFormatter() self.dir_to_exclude = set(dir_to_exclude) def move(self): files = os.listdir(self.root_path) self._move_helper(self.root_path, files) return None def _move_helper(self, root_path, files): for file in files: if file not in self.dir_to_exclude: file_path = os.path.join(root_path, file) if os.path.isdir(file_path): self._move_helper(file_path, list(os.listdir(file_path))) print('removing directory:', file_path, 'with contents') [print('\t', deleted_file) for deleted_file in list(os.listdir(file_path))] shutil.rmtree(file_path) else: if (self.formatter.file_contains_format(file, file_path) and root_path != self.root_path): new_path = os.path.join(self.root_path, file) print('moving:', file_path, 'to:', new_path) shutil.move(file_path, new_path) return None
class MoveToRoot: def __init__(self, root_path, dir_to_exclude): self.root_path = root_path self.formatter = FileFormatter() self.dir_to_exclude = set(dir_to_exclude) def move(self): files = os.listdir(self.root_path) self._move_helper(self.root_path, files) return None def _move_helper(self, root_path, files): for file in files: if file not in self.dir_to_exclude: file_path = os.path.join(root_path, file) if os.path.isdir(file_path): self._move_helper(file_path, list(os.listdir(file_path))) print('removing directory:', file_path, 'with contents') [ print('\t', deleted_file) for deleted_file in list(os.listdir(file_path)) ] shutil.rmtree(file_path) else: if (self.formatter.file_contains_format(file, file_path) and root_path != self.root_path): new_path = os.path.join(self.root_path, file) print('moving:', file_path, 'to:', new_path) shutil.move(file_path, new_path) return None
def __init__(self): self.punctuation = [ ' ', '_', '.', '[', ']', ',', '/', '-', '{', '}', '|', '(', ')', '*', '!', '&', "'" ] self.fileFormatter = FileFormatter()
def makeNewFile(self, nameList, aFile, pathToFile, fileNumber): if FileFormatter().file_contains_format(aFile, pathToFile): newFile = FileNamer().makeNewFileName(nameList, aFile, pathToFile) if newFile != aFile: src = pathToFile + aFile dst = pathToFile + newFile shutil.move(src, dst) print( fileNumber, '-- from: ' + aFile + '\n' + str(fileNumber) + ' -- to: ' + newFile + '\n') elif not os.path.isdir(pathToFile + aFile): os.remove(pathToFile + aFile) print(fileNumber, '-- removing file: ' + aFile + '\n') newFile = None else: print('File is a dir: ' + pathToFile + aFile) newFile = None return newFile
def __init__(self, top_dirs): self.top_dirs = top_dirs self.file_formatter = FileFormatter() self.files = {}
class FileCounter: def __init__(self, top_dirs): self.top_dirs = top_dirs self.file_formatter = FileFormatter() self.files = {} def count_files(self, dirs): counter = 0 for topdir in dirs: print('******************* searching', topdir, '***************************') if os.path.isdir(topdir): files = os.listdir(topdir) for file in files: file_path = os.path.join(topdir, file) if os.path.isdir(file_path): newFileNumber = self.countFiles([file_path]) print('\ncounter:', counter, '\nnew number:', newFileNumber, '\nnew counter:', counter + newFileNumber) counter += newFileNumber elif self.fileFormatter.file_contains_format(file, topdir): counter += 1 print(counter, 'a movie:', file) print('------ returning', counter, '----------') return counter def count_names(self): for dir in self.top_dirs: if not os.path.isdir(dir): continue files = os.listdir(dir) for file in files: file_path = os.path.join(dir, file) if not os.path.isdir(file_path): continue sub_files = os.listdir(file_path) for sub_file in sub_files: sub_file_path = os.path.join(file_path, sub_file) if os.path.isdir(sub_file_path): sub_sub_files = self.get_relevant_files(sub_file_path) self.add_files_for_name(file, sub_sub_files) elif self.file_formatter.file_contains_format(sub_file, file_path): self.add_files_for_name(file, [sub_file]) return None def get_relevant_files(self, file_path): return [file for file in list(os.listdir(file_path)) if self.file_formatter.file_contains_format(file, file_path)] def add_files_for_name(self, name, files): if name in self.files: old_files = self.files[name] self.files[name] = files + old_files else: self.files[name] = files return None def print_results(self): names = list(self.files.keys()) for i in range(len(names) - 1): target = i for j in range(i + 1, len(names)): if len(self.files[names[j]]) > len(self.files[names[target]]): target = j if i != target: temp = names[i] names[i] = names[target] names[target] = temp max_num_length = len(str(len(names))) + 1 max_name_length = max([len(name) for name in names]) + 1 for i in range(len(names)): print(str(i + 1).ljust(max_num_length), names[i].ljust(max_name_length), 'scenes:', len(self.files[names[i]]) ) return None
class FileCounter: def __init__(self, top_dirs): self.top_dirs = top_dirs self.file_formatter = FileFormatter() self.files = {} def count_files(self, dirs): counter = 0 for topdir in dirs: print('******************* searching', topdir, '***************************') if os.path.isdir(topdir): files = os.listdir(topdir) for file in files: file_path = os.path.join(topdir, file) if os.path.isdir(file_path): newFileNumber = self.countFiles([file_path]) print('\ncounter:', counter, '\nnew number:', newFileNumber, '\nnew counter:', counter + newFileNumber) counter += newFileNumber elif self.fileFormatter.file_contains_format(file, topdir): counter += 1 print(counter, 'a movie:', file) print('------ returning', counter, '----------') return counter def count_names(self): for dir in self.top_dirs: if not os.path.isdir(dir): continue files = os.listdir(dir) for file in files: file_path = os.path.join(dir, file) if not os.path.isdir(file_path): continue sub_files = os.listdir(file_path) for sub_file in sub_files: sub_file_path = os.path.join(file_path, sub_file) if os.path.isdir(sub_file_path): sub_sub_files = self.get_relevant_files(sub_file_path) self.add_files_for_name(file, sub_sub_files) elif self.file_formatter.file_contains_format(sub_file, file_path): self.add_files_for_name(file, [sub_file]) return None def get_relevant_files(self, file_path): return [file for file in list(os.listdir(file_path)) if self.file_formatter.file_contains_format(file, file_path)] def add_files_for_name(self, name, files): if name in self.files: old_files = self.files[name] self.files[name] = files + old_files else: self.files[name] = files return None def print_results(self): names = list(self.files.keys()) for i in range(len(names) - 1): target = i for j in range(i + 1, len(names)): if len(self.files[names[j]]) > len(self.files[names[target]]): target = j if i != target: temp = names[i] names[i] = names[target] names[target] = temp max_num_length = len(str(len(names))) + 1 max_name_length = max([len(name) for name in names]) + 1 for i in range(len(names)): print(str(i + 1).ljust(max_num_length), names[i].ljust(max_name_length), 'scenes:', len(self.files[names[i]]) ) return None def make_histogram(self): num_of_bins = 100 counts = [len(value) for value in self.files.values()] max_count = max(counts) min_count = min(counts) region = max_count - min_count bin_size = int(region/num_of_bins) print( 'min:', min_count, 'max:', max_count, 'range:', region, 'bins:', num_of_bins, 'bin size:', bin_size, 'entries:', len(counts) ) bins = (num_of_bins + 1)*[0] for count in counts: i = int(count/bin_size) try: bins[i] += 1 except IndexError: print('INDEX ERROR WHEN -- i:', i) continue x0 = min_count results = [] ranges = [] for i in range(len(bins)): results.append(str(x0) + ' - ' + str(x0 + bin_size) + ' : ') x0 += bin_size results.reverse() bins.reverse() max_just = len(results[len(results) - 1]) [print(results[i].ljust(max_just), bins[i]) for i in range(len(results))] return None
def __init__(self, root_path, dir_to_exclude): self.root_path = root_path self.formatter = FileFormatter() self.dir_to_exclude = set(dir_to_exclude)
class FileNamer: def __init__(self): self.punctuation = [ ' ', '_', '.', '[', ']', ',', '/', '-', '{', '}', '|', '(', ')', '*', '!', '&', "'" ] self.fileFormatter = FileFormatter() def makeNewFileName(self, nameList, aFile, pathToFile): # check if aFile has name in it aFile = aFile.lower() firstName = nameList[0].lower() start = aFile.find(firstName) - 1 if start >= 0: # firstName is found but not at beginning of aFile if aFile[start] not in self.punctuation: aFile = aFile.replace(firstName, '_' + firstName, 1) if len(nameList) > 1: # name list has first and last name lastName = nameList[1].lower() if firstName in aFile and lastName in aFile: endFirst = aFile.find(firstName) + len(firstName) startLast = aFile.find(lastName) if endFirst == startLast: # first and last names are right next to each other newFile = aFile.replace(lastName, '_' + lastName, 1) else: # file contains first and last names. No changes needed newFile = aFile elif firstName in aFile: # file contains first name only start = aFile.find(firstName) end = start + len(firstName) target = aFile[end] if target in self.punctuation: newFile = aFile.replace(firstName, firstName + '_' + lastName, 1) else: newFile = aFile.replace(firstName, firstName + '_' + lastName + '_', 1) else: # file does not contain name if self.fileFormatter.file_contains_format(aFile, pathToFile): # file is in a movie format parts = aFile.rpartition('.') newFile = parts[0] + '_' + firstName + '_' + lastName + parts[1] + parts[2] else: newFile = aFile + '_' + firstName + '_' + lastName else: if firstName in aFile: newFile = aFile elif self.fileFormatter.file_contains_format(aFile, pathToFile): parts = aFile.rpartition('.') newFile = parts[0] + '_' + firstName + parts[1] + parts[2] else: newFile = aFile + '_' + firstName newFile = self.replacePunctuation(newFile, pathToFile) return newFile def make_new_filename_multiple_names(self, names_list, file, file_path): for name_list in names_list: first_name = name_list[0] try: last_name = name_list[1] except IndexError: raise WrongNameFormatException('a name must have a first and last name') file = self.makeNewFileName( [first_name, last_name], file, file_path ) return file def replacePunctuation(self, aFile, pathToFile): isMovie = False if self.fileFormatter.file_contains_format(aFile, pathToFile): length = aFile.rfind('.') isMovie = True else: length = len(aFile) newName = aFile if length != -1: i = 0 while True: if newName[i] in self.punctuation: newName = newName[:i] + '_' + newName[i + 1:] try: j = i + 1 while True: if newName[j] in self.punctuation: if isMovie: format_target = newName.rfind('.') if j == format_target: break newName = newName[:j] + newName[j + 1:] length -= 1 i = j if j >= length: break else: break except IndexError: break i += 1 if i >= length: break if newName[0] == '_': newName = newName.replace('_', '', 1) length -= 1 if isMovie: target = newName.rfind('.') - 1 if newName[target] == '_': newName = newName[:target] + newName[target + 1:] else: if newName[len(newName) - 1] == '_': newName = newName[:len(newName) - 1] + newName[len(newName):] return newName def clean_name_for_raw_file(self, file, base_path): ext = self.fileFormatter.get_format(file, base_path) if ext: file = file.rsplit('.', 1)[0] for punctuation in self.punctuation: file = file.replace(punctuation, '_') to_remove = [] i = len(file) - 1 while i >= 0: if file[i] == '_' and (i == 0 or i == len(file) - 1 or file[i - 1] == '_'): to_remove.append(i) i -= 1 for index in to_remove: file = file[:index] + file[index + 1:] if ext: file += '.' + ext return file.lower()