Example #1
0
	def testDstWithSubdirectory(self):
		newdst = os.path.join(os.path.dirname(os.path.abspath(self._sha1file)), "newsubdir")
		expected = "/home/belisarius/github/fuse-sha1/test/newsubdir/subdir/file.txt"
		self.assertEqual(expected, fsu.dstWithSubdirectory("subdir/file.txt", newdst))
		
		self.assertEqual("/media/cdrom/usr/local/test.txt",
			fsu.dstWithSubdirectory("/usr/local/test.txt", "/media/cdrom"))
		self.assertEqual("/media/cdrom/subdir/othersubdir/test.txt",
			fsu.dstWithSubdirectory("/media/cdrom/othersubdir/test.txt", "/media/cdrom/subdir"))
Example #2
0
	def testDstWithSubdirectoryBad(self):
		self.assertRaises(IOError, lambda: fsu.dstWithSubdirectory("", ""))
		self.assertRaises(IOError, lambda: fsu.dstWithSubdirectory("", None))
		self.assertRaises(IOError, lambda: fsu.dstWithSubdirectory("", "subdir"))
		
		self.assertRaises(IOError, lambda: fsu.dstWithSubdirectory(None, None))
		self.assertRaises(IOError, lambda: fsu.dstWithSubdirectory(None, ""))
		
		self.assertRaises(IOError, lambda: fsu.dstWithSubdirectory("uouoeuoaeuu", ""))
		
		self.assertRaises(IOError, lambda: fsu.dstWithSubdirectory(self._sha1file, ""))
		self.assertRaises(IOError, lambda: fsu.dstWithSubdirectory(self._sha1file, None))
		
		self.assertRaises(IOError, 
			lambda: fsu.dstWithSubdirectory("/media/cdrom/test.txt", "/media/cdrom"))
Example #3
0
    def dedup(self, dupdir, doSymlink):
        """ Moves duplicate entries (based on checksum) into the dupdir.  Uses the entry's path to 
    reconstruct a subdirectory hierarchy in dupdir.  This will remove any common prefixes
    between dupdir and the file path itself so as to make a useful subdirectory structure.
    If doSymlink is true, then the original paths of the files that were moved will be symlinked 
    back to the canonical file; in addition, it will keep the file entry in the database rather than
    removing it."""
        logging.info("De-duping database")

        if os.path.exists(dupdir) and not len(os.listdir(dupdir)) <= 0:
            raise Exception("%s is not empty; refusing to move files" % dupdir)

        try:
            pathmap = {}  # store duplicate paths keyed by file checksum

            with sqliteConn(self.database) as cursor:
                cursor.execute("""select chksum, path, link from files 
where chksum in(
select chksum from files where symlink = 0 group by chksum having count(chksum) > 1) 
and symlink = 0 
and link = 1
order by chksum, link;""")
                for row in cursor:
                    (chksum, path, islink) = row
                    if not chksum in pathmap:
                        # ensure existence of list for checksum
                        pathmap[chksum] = []
                    paths = pathmap[chksum]
                    paths.append(path)

                for chksum, paths in pathmap.iteritems():
                    # the query above will result in single rows for symlinked files, so fix that here
                    # rather than mucking about with temp tables
                    paths = filter(lambda path: not os.path.islink(path),
                                   paths)

                    # we'll have at least two elements due to the inner part of the query above
                    for path in paths:
                        dst = dstWithSubdirectory(path, dupdir)
                        moveFile(
                            path, dst,
                            (not doSymlink
                             ))  # don't rm empty dirs if we are symlinking
                        if not doSymlink:
                            cursor.execute(REMOVE_ROW, (path, ))
                        else:
                            cursor.execute(
                                "update files set symlink = 1 where path = ?;",
                                (path, ))
                            symlinkFile(canonicalPath, path)
            logging.info("De-duping complete")
        except Exception as einst:
            logging.error("Unable to de-dup database: %s" % einst)
            raise
Example #4
0
  def dedup(self, dupdir, doSymlink):
    """ Moves duplicate entries (based on checksum) into the dupdir.  Uses the entry's path to 
    reconstruct a subdirectory hierarchy in dupdir.  This will remove any common prefixes
    between dupdir and the file path itself so as to make a useful subdirectory structure.
    If doSymlink is true, then the original paths of the files that were moved will be symlinked 
    back to the canonical file; in addition, it will keep the file entry in the database rather than
    removing it."""
    logging.info("De-duping database")
  
    if os.path.exists(dupdir) and not len(os.listdir(dupdir)) <= 0:
      raise Exception("%s is not empty; refusing to move files" % dupdir)
      
    try:
      pathmap = {} # store duplicate paths keyed by file checksum
      
      with sqliteConn(self.database) as cursor:
        cursor.execute("""select chksum, path, link from files 
where chksum in(
select chksum from files where symlink = 0 group by chksum having count(chksum) > 1) 
and symlink = 0 
and link = 1
order by chksum, link;""")
        for row in cursor:
          (chksum, path, islink) = row
          if not chksum in pathmap: 
            # ensure existence of list for checksum
            pathmap[chksum] = [] 
          paths = pathmap[chksum]
          paths.append(path)
          
        for chksum, paths in pathmap.iteritems():
          # the query above will result in single rows for symlinked files, so fix that here
          # rather than mucking about with temp tables
          paths = filter(lambda path: not os.path.islink(path), paths)
          
          # we'll have at least two elements due to the inner part of the query above
          for path in paths: 
            dst = dstWithSubdirectory(path, dupdir)
            moveFile(path, dst, (not doSymlink)) # don't rm empty dirs if we are symlinking
            if not doSymlink:
              cursor.execute(REMOVE_ROW, (path, ))
            else:
              cursor.execute("update files set symlink = 1 where path = ?;", (path, ))
              symlinkFile(canonicalPath, path)
      logging.info("De-duping complete")
    except Exception as einst:
      logging.error("Unable to de-dup database: %s" % einst)
      raise