Beispiel #1
0
	def testLinkSymlink(self):
		link = "sha1link.txt"
		fsu.symlinkFile(self._sha1file, link)
		
		self.assertSymlink(link)
		
		# try to hard link link; this should be a no-op
		fsu.linkFile(self._sha1file, link)
		
		self.assertSymlink(link)
		self.assertUnlinked(link)
Beispiel #2
0
	def testSymlinkFile(self):
		link = "sha1link.txt"
		fsu.symlinkFile(self._sha1file, link)
		
		self.assertSymlink(link)
		
		# link again, just to make sure it won't fail
		fsu.symlinkFile(self._sha1file, link)
		
		self.assertSymlink(link)
		self.assertUnlinked(link)
Beispiel #3
0
    def dedup(self, dupdir, doSymlink):
        """ Moves duplicate entries (based on checksum) into the dupdir.  Uses the entry's path to 
    reconstruct a subdirectory hierarchy in dupdir.  This will remove any common prefixes
    between dupdir and the file path itself so as to make a useful subdirectory structure.
    If doSymlink is true, then the original paths of the files that were moved will be symlinked 
    back to the canonical file; in addition, it will keep the file entry in the database rather than
    removing it."""
        logging.info("De-duping database")

        if os.path.exists(dupdir) and not len(os.listdir(dupdir)) <= 0:
            raise Exception("%s is not empty; refusing to move files" % dupdir)

        try:
            pathmap = {}  # store duplicate paths keyed by file checksum

            with sqliteConn(self.database) as cursor:
                cursor.execute("""select chksum, path, link from files 
where chksum in(
select chksum from files where symlink = 0 group by chksum having count(chksum) > 1) 
and symlink = 0 
and link = 1
order by chksum, link;""")
                for row in cursor:
                    (chksum, path, islink) = row
                    if not chksum in pathmap:
                        # ensure existence of list for checksum
                        pathmap[chksum] = []
                    paths = pathmap[chksum]
                    paths.append(path)

                for chksum, paths in pathmap.iteritems():
                    # the query above will result in single rows for symlinked files, so fix that here
                    # rather than mucking about with temp tables
                    paths = filter(lambda path: not os.path.islink(path),
                                   paths)

                    # we'll have at least two elements due to the inner part of the query above
                    for path in paths:
                        dst = dstWithSubdirectory(path, dupdir)
                        moveFile(
                            path, dst,
                            (not doSymlink
                             ))  # don't rm empty dirs if we are symlinking
                        if not doSymlink:
                            cursor.execute(REMOVE_ROW, (path, ))
                        else:
                            cursor.execute(
                                "update files set symlink = 1 where path = ?;",
                                (path, ))
                            symlinkFile(canonicalPath, path)
            logging.info("De-duping complete")
        except Exception as einst:
            logging.error("Unable to de-dup database: %s" % einst)
            raise
Beispiel #4
0
  def dedup(self, dupdir, doSymlink):
    """ Moves duplicate entries (based on checksum) into the dupdir.  Uses the entry's path to 
    reconstruct a subdirectory hierarchy in dupdir.  This will remove any common prefixes
    between dupdir and the file path itself so as to make a useful subdirectory structure.
    If doSymlink is true, then the original paths of the files that were moved will be symlinked 
    back to the canonical file; in addition, it will keep the file entry in the database rather than
    removing it."""
    logging.info("De-duping database")
  
    if os.path.exists(dupdir) and not len(os.listdir(dupdir)) <= 0:
      raise Exception("%s is not empty; refusing to move files" % dupdir)
      
    try:
      pathmap = {} # store duplicate paths keyed by file checksum
      
      with sqliteConn(self.database) as cursor:
        cursor.execute("""select chksum, path, link from files 
where chksum in(
select chksum from files where symlink = 0 group by chksum having count(chksum) > 1) 
and symlink = 0 
and link = 1
order by chksum, link;""")
        for row in cursor:
          (chksum, path, islink) = row
          if not chksum in pathmap: 
            # ensure existence of list for checksum
            pathmap[chksum] = [] 
          paths = pathmap[chksum]
          paths.append(path)
          
        for chksum, paths in pathmap.iteritems():
          # the query above will result in single rows for symlinked files, so fix that here
          # rather than mucking about with temp tables
          paths = filter(lambda path: not os.path.islink(path), paths)
          
          # we'll have at least two elements due to the inner part of the query above
          for path in paths: 
            dst = dstWithSubdirectory(path, dupdir)
            moveFile(path, dst, (not doSymlink)) # don't rm empty dirs if we are symlinking
            if not doSymlink:
              cursor.execute(REMOVE_ROW, (path, ))
            else:
              cursor.execute("update files set symlink = 1 where path = ?;", (path, ))
              symlinkFile(canonicalPath, path)
      logging.info("De-duping complete")
    except Exception as einst:
      logging.error("Unable to de-dup database: %s" % einst)
      raise
Beispiel #5
0
	def testSymlinkFileBad(self):
		self.assertRaises(OSError, lambda: fsu.symlinkFile(None, None))
		self.assertRaises(OSError, lambda: fsu.symlinkFile("", ""))
		self.assertRaises(OSError, lambda: fsu.symlinkFile(None, ""))
		self.assertRaises(OSError, lambda: fsu.symlinkFile("", None))
		self.assertRaises(OSError, lambda: fsu.symlinkFile(self._sha1file, ""))
		self.assertRaises(OSError, lambda: fsu.symlinkFile(self._sha1file, None))