class DU: def __init__ (self, sqlite_file, root_dir): self.db = CommsDBTable(sqlite_file) self.root_dir = root_dir for subdir in self.db.list_dir(self.root_dir): path = os.path.join (root_dir, subdir) size = self.get_size (path) print '{}\t{}'.format(human_readable_size(size), subdir) total_size = self.get_size (root_dir) print human_readable_size(total_size) def get_size (self, path): # conn = sqlite3.connect(self.db.sqlite_file) # c = conn.cursor() # # query = "SELECT SUM (size) FROM comms_files WHERE path LIKE '{}%'".format(path) # print query # c.execute(query) # return c.fetchone()[0] if 0 and os.path.isfile(path): rows = self.db.select ('size', "WHERE path='{}'".format(path)) print rows return rows[0][0] return self.db.sum_size_for_selected ("WHERE path LIKE '{}%'".format(path))
def __init__ (self, sqlite_file, root_dir): self.db = CommsDBTable(sqlite_file) self.root_dir = root_dir for subdir in self.db.list_dir(self.root_dir): path = os.path.join (root_dir, subdir) size = self.get_size (path) print '{}\t{}'.format(human_readable_size(size), subdir) total_size = self.get_size (root_dir) print human_readable_size(total_size)
def __init__(self, dup_data_path, sqlite_file): """ self.dup_map, which mapps a checksum to the fileset for that checksum """ self.dup_data_path = dup_data_path self.sqlite_file = sqlite_file self.db = CommsDBTable(sqlite_file) DupManager.__init__(self, dup_data_path) print 'initialized with {}'.format(len(self.dup_map)) self.filter_for_videos() print 'filtered to {}'.format(len(self.dup_map)) self.to_delete = None
def __init__ (self, image_dir, catalog_record, image_url_base): self.image_dir = image_dir self.catalog_record = catalog_record self.image_url_base = image_url_base self.db_table = CommsDBTable (self.sqlite_file) self.dup_manager = DupManager(self.dup_data_path)
def __init__(self, root_directory, sqlite_path): self.root = root_directory self.db = CommsDBTable(sqlite_path) self.title = "Small Images"
class HtmlWriter: def __init__(self, root_directory, sqlite_path): self.root = root_directory self.db = CommsDBTable(sqlite_path) self.title = "Small Images" def get_top_nodes(self): top_nodes = [] for filename in self.db.list_dir(self.root): # print filename if filename.startswith('disc'): top_nodes.append(os.path.join(file_system_root, filename)) def key_fn(path): name = os.path.basename(path) index = name.split(' ')[1] try: return int(index) except: # print 'warn: could not handle "{}"'.format(path) return 134 # top_nodes.sort(lambda x:int(os.path.basename(x).split(' ')[1])) top_nodes.sort(key=key_fn) return top_nodes def render_tree(self): max_node_count = 300 max_node_depth = 30 top_nodes = self.get_top_nodes() print '{} top_nodes'.format(len(top_nodes)) tree = UL(klass='root-node') for path in top_nodes[:min(len(top_nodes), max_node_count)]: base_path = path branch = Branch(base_path, self.db) tree.append(LI(branch.as_html(max_node_depth))) return tree def as_html(self): doc = HtmlDocument(title=self.title, stylesheet="styles.css") # doc.body["onload"] = "init();" # <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> doc.head.append( META(http_equiv="Content-Type", content="text/html; charset=utf-8")) doc.addJavascript( "https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js" ) doc.addJavascript( "https://ajax.googleapis.com/ajax/libs/jqueryui/1.11.1/jquery-ui.min.js" ) doc.addJavascript("script.js") doc.append(self.render_tree()) return doc def write_html(self, outpath="html/SMALL_IMAGE.html"): fp = open(outpath, 'w') fp.write(self.as_html().__str__()) fp.close() print 'wrote ', outpath
def get_size (path): sqlite_file = '/Users/ostwald/Documents/Comms/Composite_DB/cic-de-duped.sqlite' db = CommsDBTable(sqlite_file) rec = db.select ('size', "WHERE path='{}'".format(path)) print rec[0][0]
class VideoDupManager(DupManager): dowrites = 0 verbose = 0 empty_file_checksum = 'd41d8cd98f00b204e9800998ecf8427e' def __init__(self, dup_data_path, sqlite_file): """ self.dup_map, which mapps a checksum to the fileset for that checksum """ self.dup_data_path = dup_data_path self.sqlite_file = sqlite_file self.db = CommsDBTable(sqlite_file) DupManager.__init__(self, dup_data_path) print 'initialized with {}'.format(len(self.dup_map)) self.filter_for_videos() print 'filtered to {}'.format(len(self.dup_map)) self.to_delete = None def filter_for_videos(self): dup_video_map = {} for key in self.dup_map.keys(): dupset = self.dup_map[key] video_dup_set = [] for dup_path in dupset: if is_video(dup_path): video_dup_set.append(dup_path) if len(video_dup_set) > 1: dup_video_map[key] = video_dup_set self.dup_map = dup_video_map def default_process_dupset(self, checksum): print 'default_process_dupset: {}'.format(checksum) return 1 def get_non_dups(self, path): """ 1 - collect all paths that contain path 2 - remove paths that are duplicates :param path: :return: non-dups with paths starting with the provided path """ all_paths = map( lambda x: x[0], self.db.select('path', "WHERE path LIKE '{}%'".format(path))) print ' - all paths: {} ({})'.format(len(all_paths), path) # non_dups = filter (lambda x: self.path_map.has_key(x), all_paths) non_dups = [] path_map = self._get_path_map() for path in all_paths: if not path_map.has_key(path): non_dups.append(path) return non_dups def process_dupset(self, checksum): if 0 and self.verbose: print 'process_dupset: {}'.format(checksum) dup_set = self.dup_map[checksum] # Here is the way we can collect paths using a predicate such as 'in' # backup_paths = filter (lambda x:'Video Backup' in x, dup_set) mt_sherman_paths = filter(lambda x: x.startswith(MT_SHERMAN_PATH), dup_set) video_clips_paths = filter(lambda x: x.startswith(VIDEO_CLIPS_PATH), dup_set) master_paths = filter(lambda x: x.startswith(MASTER_PATH), dup_set) if mt_sherman_paths and video_clips_paths and master_paths: print '\n{}'.format(checksum) print '- {}/{} mt sherman paths'.format(len(mt_sherman_paths), len(dup_set)) print '- {}/{} video clips paths'.format(len(video_clips_paths), len(dup_set)) print '- {}/{} master paths'.format(len(master_paths), len(dup_set)) return 1 def process_dupsets(self, fn=None): """ backup_dupsets are like - /Volumes/Video Backup/Video/AccessQuality/HD/2011-01-09 - 2011-01-15/00048.mp4 - /Volumes/VideoLibrary/Video/AccessQuality/HD/2011-01-09 - 2011-01-15/00048.mp4 :return: """ if fn is None: # fn = self.default_process_dupset fn = self.process_dupset # for dup_set in reporter.dup_map.values(): checksums = self.dup_map.keys() self.to_delete = [] for checksum in checksums: # the call is GET_BACKUP_PATH # where it should be # if call returns true # then append to whatever # print whatever try: if fn(checksum) and self.verbose: print 'success processed dupset for {}'.format(checksum) except: if self.verbose: print 'fail with checksum {}'.format(checksum) traceback.print_exc() # Now loop through the "to_delete" paths for path_to_delete in self.to_delete: if self.dowrites: # DB delete (path_to_delete) try: where_condition = "path = '{}'".format(path_to_delete) self.db.delete_record(where_condition) except: traceback.print_exc() sys.exit() else: print 'woulda deleted ', path_to_delete