def find_dupes(path='tmp'): dups = [] record = {} d = DiskWalk(path) files = d.paths for file in files: compound_key = (getsize(file), create_checksum(file)) if compound_key in record: dups.append(file) else: record[compound_key] = file return dups
def find_figure_data(self,directory): directory = self.condition_path(directory) dw = DiskWalk(directory) paths_bag_ext = dw.enumerate_paths_with_ext(FILE_TOOLS.bag_ext) figure_paths = [] for path in paths_bag_ext: (dir,file) = os.path.split(path) if file.startswith(FILE_TOOLS.bag_prefix): dw_path = DiskWalk(dir) figure_files_in_dir = dw_path.enumerate_paths_with_ext(FILE_TOOLS.figure_ext) tracking_files_in_dir = dw_path.enumerate_paths_with_ext(FILE_TOOLS.tracking_ext) if tracking_files_in_dir > 1 and (self.overwrite or len(figure_files_in_dir) == 0): figure_paths.append(path) # else: # print(path + " tracking data already processed!") return figure_paths
def find_video_data(self,directory): directory = self.condition_path(directory) dw = DiskWalk(directory) paths_bag_ext = dw.enumerate_paths_with_ext(FILE_TOOLS.bag_ext) video_paths = [] for path in paths_bag_ext: (dir,file) = os.path.split(path) if file.startswith(FILE_TOOLS.bag_prefix): dw_path = DiskWalk(dir) video_files_in_dir = dw_path.enumerate_paths_with_ext(FILE_TOOLS.video_ext) if self.overwrite or len(video_files_in_dir) == 0: video_paths.append(path) # else: # print(path + " bag data already processed!") return video_paths
def find_tracking_data(self,directory): directory = self.condition_path(directory) dw = DiskWalk(directory) paths_tracking_ext = dw.enumerate_paths_with_ext(FILE_TOOLS.tracking_ext) tracking_paths = [] for path in paths_tracking_ext: (dir,file) = os.path.split(path) if file.startswith(FILE_TOOLS.tracking_prefix): dw_path = DiskWalk(dir) tracking_files_in_dir = dw_path.enumerate_paths_with_ext(FILE_TOOLS.tracking_ext) # one tracking file should already exist, more are produced when analyzed if self.overwrite or len(tracking_files_in_dir) == 1: tracking_paths.append(path) # else: # print(path + " tracking data already processed!") return tracking_paths
def get_image_paths(self,directory): dw = DiskWalk(directory) paths_images_ext = dw.enumerate_paths_with_ext(self.image_ext) return paths_images_ext