def copydir(self, source, destination, path): """Copies the given directory to the destination. This does not copy its contents.""" logging.info("Copying directory: {0}".format(destination)) if self.test: return False os.mkdir(destination) self.copyattr(source, destination)
def copyfile( self, source, destination, path ): """Copies the given file. This will check the file's inode to detect hardlink. If a file with the same inode has already been copied, then a hard link will be created to that file, otherwise a new file will be created.""" s_stat = os.lstat(source) mode = s_stat[stat.ST_MODE] if stat.S_ISCHR(mode): logging.info("Skipping special device file: {0}".format(utf8(source))) elif stat.S_ISBLK(mode): logging.info("Skipping block device file: {0}".format(utf8(source))) elif stat.S_ISFIFO(mode): logging.info("Skipping FIFO file: {0}".format(utf8(source))) elif stat.S_ISSOCK(mode): logging.info("Skipping socket file: {0}".format(utf8(source))) else: s_inode = s_stat[stat.ST_INO] # If the destination does not exists, then we need to restore # it. original_path = self.getInodePath(s_inode) if original_path: self.hardlink(source, destination) else: logging.info("Copying file: {0}".format(destination)) if self.test: return False # If we haven't copied the source inode anywhere into the # destination, then we copy it, preserving its attributes shutil.copyfile(source, destination, follow_symlinks=False) # NOTE: We really don't want to have absolute paths here, we # need them relative, otherwise the DB is going to explode in # size. self.setInodePath(s_inode, destination[len(self.output):]) # In all cases we copy the attributes self.copyattr(source, destination)
def copydir( self, source, destination, path ): """Copies the given directory to the destination. This does not copy its contents.""" logging.info("Copying directory: {0}".format(destination)) if self.test: return False os.mkdir(destination) self.copyattr(source, destination)
def save( self, path ): """Saves the catalogue to the given `path`. This will in turn call `write()`.""" d = os.path.dirname(path) if not os.path.exists(d): logging.info("Catalogue: creating catalogue directory {0}".format(utf8(d))) os.makedirs(d) with open(path, "wb") as f: self.write(f)
def copylink( self, source, destination, path ): """Copies the given symlink to the destination. This preserves the target but does not check if it is valid or not.""" target = os.readlink(source) logging.info("Copying link [->{1}]: {0}".format(destination, target)) if self.test: return False d = os.path.dirname(destination) f = os.path.basename(destination) os.symlink(target, destination) self.copyattr(source, destination)
def save(self, path): """Saves the catalogue to the given `path`. This will in turn call `write()`.""" d = os.path.dirname(path) if not os.path.exists(d): logging.info("Catalogue: creating catalogue directory {0}".format( utf8(d))) os.makedirs(d) with open(path, "wb") as f: self.write(f)
def copylink(self, source, destination, path): """Copies the given symlink to the destination. This preserves the target but does not check if it is valid or not.""" target = os.readlink(source) logging.info("Copying link [->{1}]: {0}".format(destination, target)) if self.test: return False d = os.path.dirname(destination) f = os.path.basename(destination) os.symlink(target, destination) self.copyattr(source, destination)
def __init__(self, output, filter=None): self.db = None self.last = -1 self.output = output self.base = None self.root = None self.filter = filter self._indexPath = os.path.join(self.output, "__rawcopy__/index.json") if not os.path.exists(output): logging.info("Creating output directory {0}".format(output)) os.makedirs(output)
def __init__( self, output, filter=None ): self.db = None self.last = -1 self.output = output self.base = None self.root = None self.filter = filter self._indexPath = os.path.join(self.output, "__rawcopy__/index.json") if not os.path.exists(output): logging.info("Creating output directory {0}".format(output)) os.makedirs(output)
def ensureInodePath( self, source, path): """Ensures the the given source element path's inode is mapped to the given destination's path inode.""" s = os.lstat(source) inode = s[stat.ST_INO] mode = s[stat.ST_MODE] if not stat.S_ISDIR(mode) and not self.getInodePath(inode): logging.info("Remapping inode for {0} to {1}".format(utf8(source), utf8(path))) self.setInodePath(inode, path) return True else: return False
def ensureInodePath(self, source, path): """Ensures the the given source element path's inode is mapped to the given destination's path inode.""" s = os.lstat(source) inode = s[stat.ST_INO] mode = s[stat.ST_MODE] if not stat.S_ISDIR(mode) and not self.getInodePath(inode): logging.info("Remapping inode for {0} to {1}".format( utf8(source), utf8(path))) self.setInodePath(inode, path) return True else: return False
def hardlink( self, source, destination ): """Copies the file/directory as a hard link. Return True if a hard link was detected.""" if self.test: return False # Otherwise if the inode is already there, then we can # simply hardlink it s = os.lstat(source) inode = s[stat.ST_INO] mode = s[stat.ST_MODE] if stat.S_ISDIR(mode) or os.path.exists(destination): # Directories can't have hard links return False original_path = self.getInodePath(inode) if original_path: logging.info("Hard linking file: {0}".format(destination)) link_source = os.path.join(self.base, original_path) os.link(link_source, destination, follow_symlinks=False) self.copyattr(source, destination) return True else: return False
def hardlink(self, source, destination): """Copies the file/directory as a hard link. Return True if a hard link was detected.""" if self.test: return False # Otherwise if the inode is already there, then we can # simply hardlink it s = os.lstat(source) inode = s[stat.ST_INO] mode = s[stat.ST_MODE] if stat.S_ISDIR(mode) or os.path.exists(destination): # Directories can't have hard links return False original_path = self.getInodePath(inode) if original_path: logging.info("Hard linking file: {0}".format(destination)) link_source = os.path.join(self.base, original_path) os.link(link_source, destination, follow_symlinks=False) self.copyattr(source, destination) return True else: return False
def copyfile(self, source, destination, path): """Copies the given file. This will check the file's inode to detect hardlink. If a file with the same inode has already been copied, then a hard link will be created to that file, otherwise a new file will be created.""" s_stat = os.lstat(source) mode = s_stat[stat.ST_MODE] if stat.S_ISCHR(mode): logging.info("Skipping special device file: {0}".format( utf8(source))) elif stat.S_ISBLK(mode): logging.info("Skipping block device file: {0}".format( utf8(source))) elif stat.S_ISFIFO(mode): logging.info("Skipping FIFO file: {0}".format(utf8(source))) elif stat.S_ISSOCK(mode): logging.info("Skipping socket file: {0}".format(utf8(source))) else: s_inode = s_stat[stat.ST_INO] # If the destination does not exists, then we need to restore # it. original_path = self.getInodePath(s_inode) if original_path: self.hardlink(source, destination) else: logging.info("Copying file: {0}".format(destination)) if self.test: return False # If we haven't copied the source inode anywhere into the # destination, then we copy it, preserving its attributes shutil.copyfile(source, destination, follow_symlinks=False) # NOTE: We really don't want to have absolute paths here, we # need them relative, otherwise the DB is going to explode in # size. self.setInodePath(s_inode, destination[len(self.output):]) # In all cases we copy the attributes self.copyattr(source, destination)
def run( args ): sources = [os.path.abspath(_) for _ in args.source] base = os.path.commonprefix(sources) if not os.path.exists(base) or not os.path.isdir(base): base = os.path.dirname(base) for s in sources: if not os.path.exists(s): logging.error("Source path does not exists: {0}".format(s)) return None # We setup the filter node_filter = Filter(types=args.type, names=args.name) # We log the information about the sources logging.info("Using base: {0}".format(base)) for _ in sources: logging.info("Using source: {0}".format(_)) # Sometimes the sources have a common filename prefix, so make sure it is # a directory or we get its dirname # Now we create the catalogue if not (args.catalogue or args.output): logging.error("Either catalogue or output directory are required") return -1 # Now we retrieve/create the catalogue cat_path = args.catalogue or os.path.join(args.output, "__rawcopy__", "catalogue.lst") if not os.path.exists(cat_path): logging.info("Creating source catalogue at {0}".format(cat_path)) c = Catalogue(sources, base, node_filter) c.save(cat_path) elif args.catalogue_only: logging.info("Catalogue-only mode, regenerating the catalogue") c = Catalogue(sources, base, node_filter) c.save(cat_path) # Now we iterate over the catalogue if args.catalogue_only: logging.info("Catalogue-only mode, skipping copy. Remove -C option to do the actual copy") elif args.list: # FIXME: Use a copy with no action c = Copy(args.output, node_filter) r = args.range c.fromCatalogue(cat_path, range=r, test=True, callback=lambda i,t,p,s,d:sys.stdout.write("{0}\t{1}\t{2}\t{3}\t{4}\n".format(i,t,p,s,d))) elif args.output: logging.info("Copy catalogue's contents to {0}".format(args.output)) c = Copy(args.output, node_filter) r = args.range if r: try: r = [int(_ or -1) for _ in r.split("-")] except ValueError as e: logging.error("Unsupported range format. Expects `start-end`") return -1 logging.info("Using catalogue item range: {0}".format(r)) if args.test: logging.info("Test mode enabled (not actual file copy)".format(r)) c.fromCatalogue(cat_path, range=r, test=args.test)
def _open( self, path ): self._close() if not self.db: logging.info("Opening copy database at {0}".format(path)) self.db = dbm.open(path, "c") return self
def fromCatalogue(self, path, range=None, test=False, callback=None): """Reads the given catalogue and copies directories, symlinks and files listed in the catalogue. Note that this expects the catalogue to be in traversal order.""" logging.info("Opening catalogue: {0}".format(path)) # The base is the common prefix/ancestor of all the paths in the # catalogue. The root changes but will always start with the base. base = None root = None self.test = test # When no range is specified, we look for the index path # and load it. if range is None and os.path.exists( self._indexPath) and os.stat(path)[stat.ST_MTIME] <= os.stat( self._indexPath)[stat.ST_MTIME]: with open(self._indexPath, "r") as f: r = f.read() try: r = int(r) range = (r, -1) except ValueError as e: pass with open(path, "r") as f: for line in f: j_t_p = line.split(Catalogue.FIELD_SEPARATOR, 2) if len(j_t_p) != 3: logging.error( "Malformed line, expecting at least 3 colon-separated values: {0}" .format(repr(line))) continue j, t, p = j_t_p p = p[:-1] i = int(j) self.last = i if t == TYPE_BASE: # The first line of the catalogue is expected to be the base # it is also expected to be absolute. self.base = base = p assert os.path.exists( p), "Base directory does not exists: {0}".format( utf8(p)) # Once we have the base, we can create rawcopy's DB files rd = os.path.join(self.output, "__rawcopy__") if not os.path.exists(rd): logging.info( "Creating rawcopy database directory {0}".format( utf8(rd))) os.makedirs(rd) self._open(os.path.join(rd, "copy.db")) elif t == TYPE_ROOT: # If we found a root, we ensure that it is prefixed with the # base assert base, "Catalogue must have a base directory before having roots" assert os.path.normpath(p).startswith( os.path.normpath(base) ), "Catalogue roots must be prefixed by the base, base={0}, root={1}".format( utf8(base), utf8(p)) # Now we extract the suffix, which is the root minus the base # and no leading / self.root = root = p source = p suffix = p[len(self.base):] if suffix and suffix[0] == "/": suffix = suffix[1:] destination = os.path.join( os.path.join(self.output, suffix)) if not (os.path.exists(destination) and not os.path.islink(destination)): pd = os.path.dirname(destination) logging.info("Creating root: {0}:{1}".format( i, utf8(p))) # We make sure the source exists if not os.path.exists(source) and not os.path.islink( source): logging.info( "Root does not exists: {0}:{1}".format( i, utf8(p))) # TODO: How do we handle filters at this stage? # We make sure the parent destination exists (it should be the case) if not os.path.exists(pd): # We copy the original parent directory self.copydir(p, pd, suffix) if os.path.isdir(source): self.copydir(p, destination, suffix) elif os.path.islink(source): self.copylink(p, destination, suffix) elif os.path.isfile(source): self.copyfile(p, destination, suffix) else: logging.error( "Unsupported root (not a dir/link/file): {0}:{1}" .format(i, utf8(p))) else: # We skip the indexes that are not within the range, if given if range: if i < range[0]: continue if len(range) > 1 and range[1] >= 0 and i > range[1]: logging.info( "Reached end of range {0} >= {1}".format( i, range[1])) break # We check if the filter matches if not self.match(p, t): continue assert root and self.output # We prepare the source, suffix and destination source = os.path.join(root, p) assert source.startswith( base ), "os.path.join(root={0}, path={1}) expected to start with base={2}".format( repr(root), repr(p), repr(base)) suffix = source[len(base):] if suffix[0] == "/": suffix = suffix[1:] destination = os.path.join( os.path.join(self.output, suffix)) assert suffix, "Empty suffix: source={0}, path={1}, destination={2}".format( utf8(source), utf(p), utf8(destination)) # We now proceed with the actual copy if not (os.path.exists(source) or os.path.islink(source)): logging.error( "Source path not available: {0}:{1}".format( i, utf8(source))) elif not (os.path.exists(destination) or os.path.islink(destination)): logging.info("Copying path [{2}] {0}:{1}".format( i, utf8(p), t)) if t == TYPE_DIR or os.path.isdir(source): if t != TYPE_DIR: logging.warn( "Source detected as directory, but typed as {0} -- {1}:{2}" .format(t, i, utf8(p))) self.copydir(source, destination, p) elif t == TYPE_SYMLINK: self.copylink(source, destination, p) elif t == TYPE_FILE: self.copyfile(source, destination, p) else: logging.error( "Copy: line {0} unsupported type {1}".format( i, t, p)) elif not self.test: # We only fo there if we're not in test mode if t == TYPE_DIR: logging.info( "Skipping already copied directory: {0}:{1}". format(i, utf8(destination))) elif t == TYPE_SYMLINK: logging.info( "Skipping already copied link: {0}:{1}".format( i, utf8(destination))) elif t == TYPE_FILE: logging.info( "Skipping already copied file: {0}:{1}".format( i, utf8(destination))) # TODO: We should repair a damaged DB and make sure the inode is copied self.ensureInodePath(source, suffix) # We call the callback if callback: callback(i, t, p, source, destination) # We sync the database every 1000 item if j.endswith("000") and (not range or i >= range[0]): logging.info("{0} items processed, syncing db".format(i)) self._sync(j) # We don't forget to close the DB self._close()
def _close(self): if self.db: logging.info("Opening closing database") self.db.close() self.db = None return self
def _open(self, path): self._close() if not self.db: logging.info("Opening copy database at {0}".format(path)) self.db = dbm.open(path, "c") return self
def _close( self ): if self.db: logging.info("Opening closing database") self.db.close() self.db = None return self
def fromCatalogue( self, path, range=None, test=False, callback=None ): """Reads the given catalogue and copies directories, symlinks and files listed in the catalogue. Note that this expects the catalogue to be in traversal order.""" logging.info("Opening catalogue: {0}".format(path)) # The base is the common prefix/ancestor of all the paths in the # catalogue. The root changes but will always start with the base. base = None root = None self.test = test # When no range is specified, we look for the index path # and load it. if range is None and os.path.exists(self._indexPath) and os.stat(path)[stat.ST_MTIME] <= os.stat(self._indexPath)[stat.ST_MTIME]: with open(self._indexPath, "r") as f: r = f.read() try: r = int(r) range = (r,-1) except ValueError as e: pass with open(path, "r") as f: for line in f: j_t_p = line.split(Catalogue.FIELD_SEPARATOR, 2) if len(j_t_p) != 3: logging.error("Malformed line, expecting at least 3 colon-separated values: {0}".format(repr(line))) continue j, t, p = j_t_p p = p[:-1] i = int(j) ; self.last = i if t == TYPE_BASE: # The first line of the catalogue is expected to be the base # it is also expected to be absolute. self.base = base = p assert os.path.exists(p), "Base directory does not exists: {0}".format(utf8(p)) # Once we have the base, we can create rawcopy's DB files rd = os.path.join(self.output, "__rawcopy__") if not os.path.exists(rd): logging.info("Creating rawcopy database directory {0}".format(utf8(rd))) os.makedirs(rd) self._open(os.path.join(rd, "copy.db")) elif t == TYPE_ROOT: # If we found a root, we ensure that it is prefixed with the # base assert base, "Catalogue must have a base directory before having roots" assert os.path.normpath(p).startswith(os.path.normpath(base)), "Catalogue roots must be prefixed by the base, base={0}, root={1}".format(utf8(base), utf8(p)) # Now we extract the suffix, which is the root minus the base # and no leading / self.root = root = p source = p suffix = p[len(self.base):] if suffix and suffix[0] == "/": suffix = suffix[1:] destination = os.path.join(os.path.join(self.output, suffix)) if not (os.path.exists(destination) and not os.path.islink(destination)): pd = os.path.dirname(destination) logging.info("Creating root: {0}:{1}".format(i, utf8(p))) # We make sure the source exists if not os.path.exists(source) and not os.path.islink(source): logging.info("Root does not exists: {0}:{1}".format(i, utf8(p))) # TODO: How do we handle filters at this stage? # We make sure the parent destination exists (it should be the case) if not os.path.exists(pd): # We copy the original parent directory self.copydir(p, pd, suffix) if os.path.isdir(source): self.copydir(p, destination, suffix) elif os.path.islink(source): self.copylink(p, destination, suffix) elif os.path.isfile(source): self.copyfile(p, destination, suffix) else: logging.error("Unsupported root (not a dir/link/file): {0}:{1}".format(i, utf8(p))) else: # We skip the indexes that are not within the range, if given if range: if i < range[0]: continue if len(range) > 1 and range[1] >= 0 and i > range[1]: logging.info("Reached end of range {0} >= {1}".format(i, range[1])) break # We check if the filter matches if not self.match(p, t): continue assert root and self.output # We prepare the source, suffix and destination source = os.path.join(root, p) assert source.startswith(base), "os.path.join(root={0}, path={1}) expected to start with base={2}".format(repr(root), repr(p), repr(base)) suffix = source[len(base):] if suffix[0] == "/": suffix = suffix[1:] destination = os.path.join(os.path.join(self.output, suffix)) assert suffix, "Empty suffix: source={0}, path={1}, destination={2}".format(utf8(source), utf(p), utf8(destination)) # We now proceed with the actual copy if not (os.path.exists(source) or os.path.islink(source)): logging.error("Source path not available: {0}:{1}".format(i,utf8(source))) elif not (os.path.exists(destination) or os.path.islink(destination)): logging.info("Copying path [{2}] {0}:{1}".format(i,utf8(p),t)) if t == TYPE_DIR or os.path.isdir(source): if t != TYPE_DIR: logging.warn("Source detected as directory, but typed as {0} -- {1}:{2}".format(t, i, utf8(p))) self.copydir(source, destination, p) elif t == TYPE_SYMLINK: self.copylink(source, destination, p) elif t == TYPE_FILE: self.copyfile(source, destination, p) else: logging.error("Copy: line {0} unsupported type {1}".format(i, t, p)) elif not self.test: # We only fo there if we're not in test mode if t == TYPE_DIR: logging.info("Skipping already copied directory: {0}:{1}".format(i, utf8(destination))) elif t == TYPE_SYMLINK: logging.info("Skipping already copied link: {0}:{1}".format(i, utf8(destination))) elif t == TYPE_FILE: logging.info("Skipping already copied file: {0}:{1}".format(i, utf8(destination))) # TODO: We should repair a damaged DB and make sure the inode is copied self.ensureInodePath(source, suffix) # We call the callback if callback: callback(i, t, p, source, destination) # We sync the database every 1000 item if j.endswith("000") and (not range or i>=range[0]): logging.info("{0} items processed, syncing db".format(i)) self._sync(j) # We don't forget to close the DB self._close()
def walk(self): """Walks all the catalogue's `paths` and yields triples `(index, type, path)`.""" counter = 0 yield (counter, TYPE_BASE, self.base) for p in self.paths: mode = os.lstat(p)[stat.ST_MODE] if stat.S_ISCHR(mode): logging.info( "Catalogue: Skipping special device file: {0}".format( utf8(p))) elif stat.S_ISBLK(mode): logging.info( "Catalogue: Skipping block device file: {0}".format( utf8(p))) elif stat.S_ISFIFO(mode): logging.info("Catalogue: Skipping FIFO file: {0}".format( utf8(p))) elif stat.S_ISSOCK(mode): logging.info("Catalogue: Skipping socket file: {0}".format( utf8(p))) elif os.path.isfile(p) and self.match(p, TYPE_FILE): yield (counter, TYPE_ROOT, os.path.dirname(p)) counter += 1 yield (counter, TYPE_FILE, os.path.basename(p)) elif os.path.islink(p) and self.match(p, TYPE_SYMLINK): yield (counter, TYPE_ROOT, os.path.dirname(p)) counter += 1 yield (counter, TYPE_SYMLINK, os.path.basename(p)) elif self.match(p, TYPE_DIR): for root, dirs, files in os.walk(p, topdown=True): logging.info( "Catalogue:\t#{3:010d}\t{0:04d}f+{1:04d}d\t{2}".format( len(files), len(dirs), utf8(root), counter)) yield (counter, TYPE_ROOT, root) for name in files: path = os.path.join(root, name) type = TYPE_SYMLINK if os.path.islink( path) else TYPE_FILE if self.match(path, type): yield (counter, type, name) counter += 1 for name in dirs: path = os.path.join(root, name) if self.match(path, TYPE_DIR): yield (counter, TYPE_DIR, name) counter += 1 else: logging.info("Catalogue: Filtered out path: {0}".format( utf8(p)))
# FFI # # ----------------------------------------------------------------------------- ffi = None lib = None LIBPARSING_FFI = join(PACKAGE_PATH, "_libparsing.ffi") if os.path.exists(join(PACKAGE_PATH, "_libparsing.ffi")) else None LIBPARSING_EXT = None LIBPARSING_SO = None LIBRARY_EXTS = ("so", "dylib", "dll") # We check if there is a _libparsing SO/DYLIB/DLL file. If not, we need to # build it using CFFI. from . import _buildext if len([_ for _ in LIBRARY_EXTS if os.path.exists(join(PACKAGE_PATH, _buildext.filename(_)))]) == 0: logging.info("Building native libparsing Python bindings‥") _buildext.build() # Now we look for the actual Python extension (_libparsing # We need to support different extensions and different prefixes. CFFI # will build extensions as " _libparsing.cpython-35m-x86_64-linux-gnu.so" # on Linux. PREFIX_EXT = _buildext.name() + "." PREFIX_SO = "libparsing." for p in os.listdir(PACKAGE_PATH): if p.startswith(PREFIX_EXT) and p.rsplit(".",1)[-1] in LIBRARY_EXTS: LIBPARSING_EXT = os.path.join(PACKAGE_PATH, p) if p.startswith(PREFIX_SO) and p.rsplit(".",1)[-1] in LIBRARY_EXTS: LIBPARSING_SO = os.path.join(PACKAGE_PATH, p) if LIBPARSING_EXT:
ffi = None lib = None LIBPARSING_FFI = join(PACKAGE_PATH, "_libparsing.ffi") if os.path.exists( join(PACKAGE_PATH, "_libparsing.ffi")) else None LIBPARSING_EXT = None LIBPARSING_SO = None LIBRARY_EXTS = ("so", "dylib", "dll") # We check if there is a _libparsing SO/DYLIB/DLL file. If not, we need to # build it using CFFI. from . import _buildext if len([ _ for _ in LIBRARY_EXTS if os.path.exists(join(PACKAGE_PATH, _buildext.filename(_))) ]) == 0: logging.info("Building native libparsing Python bindings‥") _buildext.build() # Now we look for the actual Python extension (_libparsing # We need to support different extensions and different prefixes. CFFI # will build extensions as " _libparsing.cpython-35m-x86_64-linux-gnu.so" # on Linux. PREFIX_EXT = _buildext.name() + "." PREFIX_SO = "libparsing." for p in os.listdir(PACKAGE_PATH): if p.startswith(PREFIX_EXT) and p.rsplit(".", 1)[-1] in LIBRARY_EXTS: LIBPARSING_EXT = os.path.join(PACKAGE_PATH, p) if p.startswith(PREFIX_SO) and p.rsplit(".", 1)[-1] in LIBRARY_EXTS: LIBPARSING_SO = os.path.join(PACKAGE_PATH, p) if LIBPARSING_EXT:
def logged_application(environ, startResponse): logging.info("{0} {1}".format(environ["REQUEST_METHOD"], environ["PATH_INFO"])) if "retro.app" not in environ: environ["retro.app"] = stack.app() return environ["retro.app"](environ, startResponse)
def run(args): sources = [os.path.abspath(_) for _ in args.source] base = os.path.commonprefix(sources) if not os.path.exists(base) or not os.path.isdir(base): base = os.path.dirname(base) for s in sources: if not os.path.exists(s): logging.error("Source path does not exists: {0}".format(s)) return None # We setup the filter node_filter = Filter(types=args.type, names=args.name) # We log the information about the sources logging.info("Using base: {0}".format(base)) for _ in sources: logging.info("Using source: {0}".format(_)) # Sometimes the sources have a common filename prefix, so make sure it is # a directory or we get its dirname # Now we create the catalogue if not (args.catalogue or args.output): logging.error("Either catalogue or output directory are required") return -1 # Now we retrieve/create the catalogue cat_path = args.catalogue or os.path.join(args.output, "__rawcopy__", "catalogue.lst") if not os.path.exists(cat_path): logging.info("Creating source catalogue at {0}".format(cat_path)) c = Catalogue(sources, base, node_filter) c.save(cat_path) elif args.catalogue_only: logging.info("Catalogue-only mode, regenerating the catalogue") c = Catalogue(sources, base, node_filter) c.save(cat_path) # Now we iterate over the catalogue if args.catalogue_only: logging.info( "Catalogue-only mode, skipping copy. Remove -C option to do the actual copy" ) elif args.list: # FIXME: Use a copy with no action c = Copy(args.output, node_filter) r = args.range c.fromCatalogue(cat_path, range=r, test=True, callback=lambda i, t, p, s, d: sys.stdout.write( "{0}\t{1}\t{2}\t{3}\t{4}\n".format(i, t, p, s, d))) elif args.output: logging.info("Copy catalogue's contents to {0}".format(args.output)) c = Copy(args.output, node_filter) r = args.range if r: try: r = [int(_ or -1) for _ in r.split("-")] except ValueError as e: logging.error("Unsupported range format. Expects `start-end`") return -1 logging.info("Using catalogue item range: {0}".format(r)) if args.test: logging.info("Test mode enabled (not actual file copy)".format(r)) c.fromCatalogue(cat_path, range=r, test=args.test)
def walk( self ): """Walks all the catalogue's `paths` and yields triples `(index, type, path)`.""" counter = 0 yield (counter, TYPE_BASE, self.base) for p in self.paths: mode = os.lstat(p)[stat.ST_MODE] if stat.S_ISCHR(mode): logging.info("Catalogue: Skipping special device file: {0}".format(utf8(p))) elif stat.S_ISBLK(mode): logging.info("Catalogue: Skipping block device file: {0}".format(utf8(p))) elif stat.S_ISFIFO(mode): logging.info("Catalogue: Skipping FIFO file: {0}".format(utf8(p))) elif stat.S_ISSOCK(mode): logging.info("Catalogue: Skipping socket file: {0}".format(utf8(p))) elif os.path.isfile(p) and self.match(p, TYPE_FILE): yield (counter, TYPE_ROOT, os.path.dirname(p)) counter += 1 yield (counter, TYPE_FILE, os.path.basename(p)) elif os.path.islink(p) and self.match(p, TYPE_SYMLINK): yield (counter, TYPE_ROOT, os.path.dirname(p)) counter += 1 yield (counter, TYPE_SYMLINK, os.path.basename(p)) elif self.match(p, TYPE_DIR): for root, dirs, files in os.walk(p, topdown=True): logging.info("Catalogue:\t#{3:010d}\t{0:04d}f+{1:04d}d\t{2}".format(len(files), len(dirs), utf8(root), counter)) yield (counter, TYPE_ROOT, root) for name in files: path = os.path.join(root, name) type = TYPE_SYMLINK if os.path.islink(path) else TYPE_FILE if self.match(path, type): yield (counter, type, name) counter += 1 for name in dirs: path = os.path.join(root, name) if self.match(path, TYPE_DIR): yield (counter, TYPE_DIR, name) counter += 1 else: logging.info("Catalogue: Filtered out path: {0}".format(utf8(p)))