Exemple #1
0
class DU:

    def __init__ (self, sqlite_file, root_dir):
        self.db = CommsDBTable(sqlite_file)
        self.root_dir = root_dir

        for subdir in self.db.list_dir(self.root_dir):
            path = os.path.join (root_dir, subdir)
            size = self.get_size (path)
            print '{}\t{}'.format(human_readable_size(size), subdir)

        total_size =  self.get_size (root_dir)
        print human_readable_size(total_size)

    def get_size (self, path):
        # conn = sqlite3.connect(self.db.sqlite_file)
        # c = conn.cursor()
        #
        # query = "SELECT SUM (size) FROM comms_files WHERE path LIKE '{}%'".format(path)
        # print query
        # c.execute(query)
        # return c.fetchone()[0]
        if 0 and os.path.isfile(path):
            rows = self.db.select ('size', "WHERE path='{}'".format(path))
            print rows
            return rows[0][0]
        return self.db.sum_size_for_selected ("WHERE path LIKE '{}%'".format(path))
Exemple #2
0
    def __init__ (self, sqlite_file, root_dir):
        self.db = CommsDBTable(sqlite_file)
        self.root_dir = root_dir

        for subdir in self.db.list_dir(self.root_dir):
            path = os.path.join (root_dir, subdir)
            size = self.get_size (path)
            print '{}\t{}'.format(human_readable_size(size), subdir)

        total_size =  self.get_size (root_dir)
        print human_readable_size(total_size)
    def __init__(self, dup_data_path, sqlite_file):
        """
        self.dup_map, which mapps a checksum to the fileset for that checksum

        """
        self.dup_data_path = dup_data_path
        self.sqlite_file = sqlite_file
        self.db = CommsDBTable(sqlite_file)
        DupManager.__init__(self, dup_data_path)
        print 'initialized with {}'.format(len(self.dup_map))
        self.filter_for_videos()
        print 'filtered to {}'.format(len(self.dup_map))

        self.to_delete = None
Exemple #4
0
 def __init__ (self, image_dir, catalog_record, image_url_base):
     self.image_dir = image_dir
     self.catalog_record = catalog_record
     self.image_url_base = image_url_base
     self.db_table = CommsDBTable (self.sqlite_file)
     self.dup_manager = DupManager(self.dup_data_path)
Exemple #5
0
 def __init__(self, root_directory, sqlite_path):
     self.root = root_directory
     self.db = CommsDBTable(sqlite_path)
     self.title = "Small Images"
Exemple #6
0
class HtmlWriter:
    def __init__(self, root_directory, sqlite_path):
        self.root = root_directory
        self.db = CommsDBTable(sqlite_path)
        self.title = "Small Images"

    def get_top_nodes(self):
        top_nodes = []
        for filename in self.db.list_dir(self.root):
            # print filename
            if filename.startswith('disc'):
                top_nodes.append(os.path.join(file_system_root, filename))

        def key_fn(path):
            name = os.path.basename(path)
            index = name.split(' ')[1]
            try:
                return int(index)
            except:
                # print 'warn: could not handle "{}"'.format(path)
                return 134

        # top_nodes.sort(lambda x:int(os.path.basename(x).split(' ')[1]))
        top_nodes.sort(key=key_fn)
        return top_nodes

    def render_tree(self):
        max_node_count = 300
        max_node_depth = 30
        top_nodes = self.get_top_nodes()
        print '{} top_nodes'.format(len(top_nodes))
        tree = UL(klass='root-node')
        for path in top_nodes[:min(len(top_nodes), max_node_count)]:
            base_path = path
            branch = Branch(base_path, self.db)
            tree.append(LI(branch.as_html(max_node_depth)))
        return tree

    def as_html(self):
        doc = HtmlDocument(title=self.title, stylesheet="styles.css")
        # doc.body["onload"] = "init();"

        # <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">

        doc.head.append(
            META(http_equiv="Content-Type",
                 content="text/html; charset=utf-8"))

        doc.addJavascript(
            "https://ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"
        )
        doc.addJavascript(
            "https://ajax.googleapis.com/ajax/libs/jqueryui/1.11.1/jquery-ui.min.js"
        )
        doc.addJavascript("script.js")

        doc.append(self.render_tree())
        return doc

    def write_html(self, outpath="html/SMALL_IMAGE.html"):
        fp = open(outpath, 'w')
        fp.write(self.as_html().__str__())
        fp.close()
        print 'wrote ', outpath
Exemple #7
0
def get_size (path):
    sqlite_file = '/Users/ostwald/Documents/Comms/Composite_DB/cic-de-duped.sqlite'
    db = CommsDBTable(sqlite_file)
    rec = db.select ('size', "WHERE path='{}'".format(path))
    print rec[0][0]
class VideoDupManager(DupManager):

    dowrites = 0
    verbose = 0
    empty_file_checksum = 'd41d8cd98f00b204e9800998ecf8427e'

    def __init__(self, dup_data_path, sqlite_file):
        """
        self.dup_map, which mapps a checksum to the fileset for that checksum

        """
        self.dup_data_path = dup_data_path
        self.sqlite_file = sqlite_file
        self.db = CommsDBTable(sqlite_file)
        DupManager.__init__(self, dup_data_path)
        print 'initialized with {}'.format(len(self.dup_map))
        self.filter_for_videos()
        print 'filtered to {}'.format(len(self.dup_map))

        self.to_delete = None

    def filter_for_videos(self):
        dup_video_map = {}
        for key in self.dup_map.keys():
            dupset = self.dup_map[key]
            video_dup_set = []
            for dup_path in dupset:
                if is_video(dup_path):
                    video_dup_set.append(dup_path)
            if len(video_dup_set) > 1:
                dup_video_map[key] = video_dup_set
        self.dup_map = dup_video_map

    def default_process_dupset(self, checksum):
        print 'default_process_dupset: {}'.format(checksum)
        return 1

    def get_non_dups(self, path):
        """
        1 - collect all paths that contain path
        2 - remove paths that are duplicates
        :param path:
        :return: non-dups with paths starting with the provided path
        """
        all_paths = map(
            lambda x: x[0],
            self.db.select('path', "WHERE path LIKE '{}%'".format(path)))
        print ' - all paths: {} ({})'.format(len(all_paths), path)
        # non_dups = filter (lambda x: self.path_map.has_key(x), all_paths)

        non_dups = []
        path_map = self._get_path_map()
        for path in all_paths:
            if not path_map.has_key(path):
                non_dups.append(path)
        return non_dups

    def process_dupset(self, checksum):
        if 0 and self.verbose:
            print 'process_dupset: {}'.format(checksum)
        dup_set = self.dup_map[checksum]

        # Here is the way we can collect paths using a predicate such as 'in'
        #        backup_paths = filter (lambda x:'Video Backup' in x, dup_set)

        mt_sherman_paths = filter(lambda x: x.startswith(MT_SHERMAN_PATH),
                                  dup_set)
        video_clips_paths = filter(lambda x: x.startswith(VIDEO_CLIPS_PATH),
                                   dup_set)
        master_paths = filter(lambda x: x.startswith(MASTER_PATH), dup_set)
        if mt_sherman_paths and video_clips_paths and master_paths:
            print '\n{}'.format(checksum)
            print '- {}/{} mt sherman paths'.format(len(mt_sherman_paths),
                                                    len(dup_set))
            print '- {}/{} video clips paths'.format(len(video_clips_paths),
                                                     len(dup_set))
            print '- {}/{} master paths'.format(len(master_paths),
                                                len(dup_set))

        return 1

    def process_dupsets(self, fn=None):
        """
        backup_dupsets are like
        - /Volumes/Video Backup/Video/AccessQuality/HD/2011-01-09 - 2011-01-15/00048.mp4
        - /Volumes/VideoLibrary/Video/AccessQuality/HD/2011-01-09 - 2011-01-15/00048.mp4

        :return:
        """
        if fn is None:
            # fn = self.default_process_dupset
            fn = self.process_dupset

        # for dup_set in reporter.dup_map.values():
        checksums = self.dup_map.keys()

        self.to_delete = []
        for checksum in checksums:

            # the call is GET_BACKUP_PATH

            # where it should be
            # if call returns true
            #   then append to whatever
            #   print whatever

            try:
                if fn(checksum) and self.verbose:
                    print 'success processed dupset for {}'.format(checksum)
            except:
                if self.verbose:
                    print 'fail with checksum {}'.format(checksum)
                    traceback.print_exc()

        # Now loop through the "to_delete" paths
        for path_to_delete in self.to_delete:

            if self.dowrites:

                # DB delete (path_to_delete)
                try:
                    where_condition = "path = '{}'".format(path_to_delete)
                    self.db.delete_record(where_condition)
                except:
                    traceback.print_exc()
                    sys.exit()
            else:
                print 'woulda deleted ', path_to_delete