def ls_partitions(self): """List all partitions available for storing bundles and how many bundles are currently stored.""" partitions, _ = path_util.ls(self.partitions) print '%d %s' % (len(partitions), 'partition' if len(partitions) == 1 else 'partitions') for d in partitions: partition_path = os.path.join(self.partitions, d) real_path = os.readlink(partition_path) bundles = reduce(lambda x,y: x+y, path_util.ls(os.path.join(partition_path, MultiDiskBundleStore.DATA_SUBDIRECTORY))) print '- %-016s\n\tmountpoint: %s\n\t%d %s' % (d, real_path, len(bundles), 'bundle' if len(bundles) == 1 else 'bundles')
def add_partition(self, target, new_partition_name): """ MultiDiskBundleStore specific method. Add a new partition to the bundle store. The "target" is actually a symlink to the target directory, which the user has configured as the mountpoint for some desired partition. First, all bundles that are to be relocated onto the new partition are copied to a temp location to be resilient against failures. After the copy is performed, the bundles are subsequently moved to the new partition, and finally the original copy of the bundles are deleted from their old locations """ target = os.path.abspath(target) new_partition_location = os.path.join(self.partitions, new_partition_name) mtemp = os.path.join(target, MultiDiskBundleStore.TEMP_SUBDIRECTORY) try: path_util.make_directory(mtemp) except: print >> sys.stderr, "Could not make directory %s on partition %s, aborting" % (mtemp, target) sys.exit(1) self.ring.add_node(new_partition_name) # Add the node to the partition locations delete_on_success = [] # Paths to bundles that will be deleted after the copy finishes successfully print >> sys.stderr, "Marking bundles for placement on new partition %s (might take a while)" % new_partition_name # For each bundle in the bundle store, check to see if any hash to the new partition. If so move them over partitions, _ = path_util.ls(self.partitions) for partition in partitions: partition_abs_path = os.path.join(self.partitions, partition, MultiDiskBundleStore.DATA_SUBDIRECTORY) bundles = reduce(lambda dirs, files: dirs + files, path_util.ls(partition_abs_path)) for bundle in bundles: correct_partition = self.ring.get_node(bundle) if correct_partition != partition: # Reposition the node to the correct partition from_path = os.path.join(self.partitions, partition, MultiDiskBundleStore.DATA_SUBDIRECTORY, bundle) to_path = os.path.join(mtemp, bundle) print >> sys.stderr, "copying %s to %s" % (from_path, to_path) path_util.copy(from_path, to_path) delete_on_success += [from_path] print >> sys.stderr, "Adding new partition as %s..." % new_partition_location path_util.soft_link(target, new_partition_location) # Atomically move the temp location to the new partition's mdata new_mdata = os.path.join(new_partition_location, MultiDiskBundleStore.DATA_SUBDIRECTORY) new_mtemp = os.path.join(new_partition_location, MultiDiskBundleStore.TEMP_SUBDIRECTORY) path_util.rename(new_mtemp, new_mdata) path_util.make_directory(new_mtemp) # Go through and purge all of the originals at this time print >> sys.stderr, "Cleaning up drives..." for to_delete in delete_on_success: path_util.remove(to_delete) print >> sys.stderr, "Successfully added partition '%s' to the pool." % new_partition_name
def ls_partitions(self): """List all partitions available for storing bundles and how many bundles are currently stored.""" partitions, _ = path_util.ls(self.partitions) print '%d %s' % (len(partitions), 'partition' if len(partitions) == 1 else 'partitions') for d in partitions: partition_path = os.path.join(self.partitions, d) real_path = os.readlink(partition_path) bundles = reduce( lambda x, y: x + y, path_util.ls( os.path.join(partition_path, MultiDiskBundleStore.DATA_SUBDIRECTORY))) print '- %-016s\n\tmountpoint: %s\n\t%d %s' % ( d, real_path, len(bundles), 'bundle' if len(bundles) == 1 else 'bundles')
def rm_partition(self, partition): """ Deletes the given partition entry from the bundle store, and purges the lru cache. Does not move any bundles. """ if self.__get_num_partitions() == 1: """ Prevent foot-shooting """ print >>sys.stderr, "Error, cannot remove last partition. If you really wish to delete CodaLab, please run the following command:" print >>sys.stderr, " rm -rf %s" % self.codalab_home return partition_abs_path = os.path.join(self.partitions, partition) try: print(partition_abs_path) path_util.check_isvalid(partition_abs_path, 'rm-partition') except: print >>sys.stderr, "Partition with name '%s' does not exist. Run `cl ls-partitions` to see a list of mounted partitions." % partition sys.exit(1) print >>sys.stderr, "Unlinking partition %s from CodaLab deployment..." % partition path_util.remove(partition_abs_path) nodes, _ = path_util.ls(self.partitions) self.nodes = nodes print >>sys.stderr, "Partition removed successfully from bundle store pool" print >>sys.stdout, "Warning: this does not affect the bundles in the removed partition or any entries in the bundle database" self.lru_cache = OrderedDict()
def rm_partition(self, partition): """ Deletes the given partition entry from the bundle store, and purges the lru cache. Does not move any bundles. """ if self.__get_num_partitions() == 1: """ Prevent foot-shooting """ print >> sys.stderr, "Error, cannot remove last partition. If you really wish to delete CodaLab, please run the following command:" print >> sys.stderr, " rm -rf %s" % self.codalab_home return partition_abs_path = os.path.join(self.partitions, partition) try: print(partition_abs_path) path_util.check_isvalid(partition_abs_path, 'rm-partition') except: print >> sys.stderr, "Partition with name '%s' does not exist. Run `cl ls-partitions` to see a list of mounted partitions." % partition sys.exit(1) print >> sys.stderr, "Unlinking partition %s from CodaLab deployment..." % partition path_util.remove(partition_abs_path) nodes, _ = path_util.ls(self.partitions) self.nodes = nodes print >> sys.stderr, "Partition removed successfully from bundle store pool" print >> sys.stdout, "Warning: this does not affect the bundles in the removed partition or any entries in the bundle database" self.lru_cache = OrderedDict()
def rm_partition(self, partition): """ Deletes the given disk from the bundle store, and if it is not the last partition, it redistributes the bundles from that partition across the remaining partitions. """ # Transfer all of the files to their correct locations. if self.__get_num_partitions() == 1: """ Prevent foot-shooting """ print >> sys.stderr, "Error, cannot remove last partition. If you really wish to delete CodaLab, please run the following command:" print >> sys.stderr, " rm -rf %s" % self.codalab_home return relocations = dict() partition_abs_path = os.path.join(self.partitions, partition) old_mdata = os.path.join(partition_abs_path, MultiDiskBundleStore.DATA_SUBDIRECTORY) old_mtemp = os.path.join(partition_abs_path, MultiDiskBundleStore.TEMP_SUBDIRECTORY) try: print partition_abs_path path_util.check_isvalid(partition_abs_path, 'rm-partition') except: print >> sys.stderr, "Partition with name '%s' does not exist. Run `cl ls-partitions` to see a list of mounted partitions." % partition sys.exit(1) # Reset the ring to distribute across remaining partitions self.ring.remove_node(partition) bundles_to_move = reduce(lambda dirs, files: dirs + files, path_util.ls(old_mdata)) for bundle in bundles_to_move: new_partition = self.ring.get_node(bundle) relocations[bundle] = os.path.join(self.partitions, new_partition) # Copy all bundles off of the old partition to temp directories on the new partition for bundle, partition in relocations.iteritems(): # temporary directory on the partition temp_dir = os.path.join(partition, MultiDiskBundleStore.TEMP_SUBDIRECTORY) from_path = os.path.join(old_mdata, bundle) to_path = os.path.join(temp_dir, 'stage-%s' % bundle) path_util.copy(from_path, to_path) # Now that each bundle is on the proper partition, move each from the staging area to the # production mdata/ subdirectory on its partition. for bundle, partition in relocations.iteritems(): temp_dir = os.path.join(partition, MultiDiskBundleStore.TEMP_SUBDIRECTORY) from_path = os.path.join(temp_dir, 'stage-%s' % bundle) to_path = os.path.join(partition, MultiDiskBundleStore.DATA_SUBDIRECTORY, bundle) path_util.rename(from_path, to_path) # Remove data from partition and unlink from CodaLab print >> sys.stderr, "Cleaning bundles off of partition..." path_util.remove(old_mdata) path_util.remove(old_mtemp) print >> sys.stderr, "Unlinking partition %s from CodaLab deployment..." % partition path_util.remove(partition_abs_path) print >> sys.stderr, "Partition removed successfully from bundle store pool"
def __init__(self, codalab_home): self.codalab_home = path_util.normalize(codalab_home) self.partitions = os.path.join(self.codalab_home, 'partitions') self.mtemp = os.path.join(self.codalab_home, MultiDiskBundleStore.MISC_TEMP_SUBDIRECTORY) # Perform initialization first to ensure that directories will be populated super(MultiDiskBundleStore, self).__init__() nodes, _ = path_util.ls(self.partitions) self.nodes = nodes self.lru_cache = OrderedDict() super(MultiDiskBundleStore, self).__init__()
def __init__(self, codalab_home): self.codalab_home = path_util.normalize(codalab_home) self.partitions = os.path.join(self.codalab_home, 'partitions') self.mtemp = os.path.join(self.codalab_home, MultiDiskBundleStore.MISC_TEMP_SUBDIRECTORY) # Perform initialization first to ensure that directories will be populated super(MultiDiskBundleStore, self).__init__() nodes, _ = path_util.ls(self.partitions) self.ring = HashRing(nodes) super(MultiDiskBundleStore, self).__init__()
def health_check(self, model, force=False, compute_data_hash=False, repair_hashes=False): """ MultiDiskBundleStore.health_check(): In the MultiDiskBundleStore, bundle contents are stored on disk, and occasionally the disk gets out of sync with the database, in which case we make repairs in the following ways: 1. Deletes bundles with corresponding UUID not in the database. 3. Deletes any files not beginning with UUID string. 4. For each bundle marked READY or FAILED, ensure that its dependencies are not located in the bundle directory. If they are then delete the dependencies. 5. For bundle <UUID> marked READY or FAILED, <UUID>.cid or <UUID>.status, or the <UUID>(-internal).sh files should not exist. |force|: Perform any destructive operations on the bundle store the health check determines are necessary. False by default |compute_data_hash|: If True, compute the data_hash for every single bundle ourselves and see if it's consistent with what's in the database. False by default. """ UUID_REGEX = re.compile(r'^(%s)' % spec_util.UUID_STR) def _delete_path(loc): cmd = 'rm -r \'%s\'' % loc print(cmd) if force: path_util.remove(loc) def _get_uuid(path): fname = os.path.basename(path) try: return UUID_REGEX.match(fname).groups()[0] except: return None def _is_bundle(path): """Returns whether the given path is a bundle directory/file""" return _get_uuid(path) == os.path.basename(path) def _check_bundle_paths(bundle_paths, db_bundle_by_uuid): """ Takes in a list of bundle paths and a mapping of UUID to BundleModel, and returns a list of paths and subpaths that need to be removed. """ to_delete = [] # Batch get information for all bundles stored on-disk for bundle_path in bundle_paths: uuid = _get_uuid(bundle_path) # Screen for bundles stored on disk that are no longer in the database bundle = db_bundle_by_uuid.get(uuid, None) if bundle == None: to_delete += [bundle_path] continue # Delete dependencies stored inside of READY or FAILED bundles if bundle.state in [State.READY, State.FAILED]: dep_paths = [ os.path.join(bundle_path, dep.child_path) for dep in bundle.dependencies ] to_delete += list(filter(os.path.exists, dep_paths)) return to_delete def _check_other_paths(other_paths, db_bundle_by_uuid): """ Given a list of non-bundle paths, and a mapping of UUID to BundleModel, returns a list of paths to delete. """ to_delete = [] for path in other_paths: uuid = _get_uuid(path) bundle = db_bundle_by_uuid.get(uuid, None) if bundle == None: to_delete += [path] continue ends_with_ext = (path.endswith('.cid') or path.endswith('.status') or path.endswith('.sh')) if bundle.state in [State.READY, State.FAILED]: if ends_with_ext: to_delete += [path] continue elif '.' in path: print('WARNING: File %s is likely junk.' % path, file=sys.stderr) return to_delete partitions, _ = path_util.ls(self.partitions) trash_count = 0 for partition in partitions: print('Looking for trash in partition %s...' % partition, file=sys.stderr) partition_path = os.path.join( self.partitions, partition, MultiDiskBundleStore.DATA_SUBDIRECTORY) entries = list( map( lambda f: os.path.join(partition_path, f), reduce(lambda d, f: d + f, path_util.ls(partition_path)), )) bundle_paths = list(filter(_is_bundle, entries)) other_paths = set(entries) - set(bundle_paths) uuids = list(map(_get_uuid, bundle_paths)) db_bundles = model.batch_get_bundles(uuid=uuids) db_bundle_by_uuid = dict() for bundle in db_bundles: db_bundle_by_uuid[bundle.uuid] = bundle # Check both bundles and non-bundles and remove each for to_delete in _check_bundle_paths(bundle_paths, db_bundle_by_uuid): trash_count += 1 _delete_path(to_delete) for to_delete in _check_other_paths(other_paths, db_bundle_by_uuid): trash_count += 1 _delete_path(to_delete) # Check for each bundle if we need to compute its data_hash data_hash_recomputed = 0 print('Checking data_hash of bundles in partition %s...' % partition, file=sys.stderr) for bundle_path in bundle_paths: uuid = _get_uuid(bundle_path) bundle = db_bundle_by_uuid.get(uuid, None) if bundle == None: continue if compute_data_hash or bundle.data_hash == None: dirs_and_files = (path_util.recursive_ls(bundle_path) if os.path.isdir(bundle_path) else ([], [bundle_path])) data_hash = '0x%s' % path_util.hash_directory( bundle_path, dirs_and_files) if bundle.data_hash == None: data_hash_recomputed += 1 print( 'Giving bundle %s data_hash %s' % (bundle_path, data_hash), file=sys.stderr, ) if force: db_update = dict(data_hash=data_hash) model.update_bundle(bundle, db_update) elif compute_data_hash and data_hash != bundle.data_hash: data_hash_recomputed += 1 print( 'Bundle %s should have data_hash %s, actual digest is %s' % (bundle_path, bundle.data_hash, data_hash), file=sys.stderr, ) if repair_hashes and force: db_update = dict(data_hash=data_hash) model.update_bundle(bundle, db_update) if force: print('\tDeleted %d objects from the bundle store' % trash_count, file=sys.stderr) print('\tRecomputed data_hash for %d bundles' % data_hash_recomputed, file=sys.stderr) else: print( 'Dry-Run Statistics, re-run with --force to perform updates:', file=sys.stderr) print('\tObjects marked for deletion: %d' % trash_count, file=sys.stderr) print( '\tBundles that need data_hash recompute: %d' % data_hash_recomputed, file=sys.stderr, )
def __get_num_partitions(self): """ Returns the current number of disks being used by this MultiDiskBundleStore. This is calculated as the number of directories in self.partitions """ return reduce(lambda dirs, _: len(dirs), path_util.ls(self.partitions))
def add_partition(self, target, new_partition_name): """ MultiDiskBundleStore specific method. Add a new partition to the bundle store. The "target" is actually a symlink to the target directory, which the user has configured as the mountpoint for some desired partition. First, all bundles that are to be relocated onto the new partition are copied to a temp location to be resilient against failures. After the copy is performed, the bundles are subsequently moved to the new partition, and finally the original copy of the bundles are deleted from their old locations """ target = os.path.abspath(target) new_partition_location = os.path.join(self.partitions, new_partition_name) mtemp = os.path.join(target, MultiDiskBundleStore.TEMP_SUBDIRECTORY) try: path_util.make_directory(mtemp) except: print >> sys.stderr, "Could not make directory %s on partition %s, aborting" % ( mtemp, target) sys.exit(1) self.ring.add_node( new_partition_name) # Add the node to the partition locations delete_on_success = [ ] # Paths to bundles that will be deleted after the copy finishes successfully print >> sys.stderr, "Marking bundles for placement on new partition %s (might take a while)" % new_partition_name # For each bundle in the bundle store, check to see if any hash to the new partition. If so move them over partitions, _ = path_util.ls(self.partitions) for partition in partitions: partition_abs_path = os.path.join( self.partitions, partition, MultiDiskBundleStore.DATA_SUBDIRECTORY) bundles = reduce(lambda dirs, files: dirs + files, path_util.ls(partition_abs_path)) for bundle in bundles: correct_partition = self.ring.get_node(bundle) if correct_partition != partition: # Reposition the node to the correct partition from_path = os.path.join( self.partitions, partition, MultiDiskBundleStore.DATA_SUBDIRECTORY, bundle) to_path = os.path.join(mtemp, bundle) print >> sys.stderr, "copying %s to %s" % (from_path, to_path) path_util.copy(from_path, to_path) delete_on_success += [from_path] print >> sys.stderr, "Adding new partition as %s..." % new_partition_location path_util.soft_link(target, new_partition_location) # Atomically move the temp location to the new partition's mdata new_mdata = os.path.join(new_partition_location, MultiDiskBundleStore.DATA_SUBDIRECTORY) new_mtemp = os.path.join(new_partition_location, MultiDiskBundleStore.TEMP_SUBDIRECTORY) path_util.rename(new_mtemp, new_mdata) path_util.make_directory(new_mtemp) # Go through and purge all of the originals at this time print >> sys.stderr, "Cleaning up drives..." for to_delete in delete_on_success: path_util.remove(to_delete) print >> sys.stderr, "Successfully added partition '%s' to the pool." % new_partition_name
dry_run = False if len(sys.argv) > 1 and sys.argv[1] == '-f' else True manager = CodaLabManager() model = manager.model() CODALAB_HOME = manager.codalab_home """Move data/ directory over to a temp area, and create a staging tree for uuid-based storage""" DATA_DIR = os.path.join(CODALAB_HOME, 'data') FINAL_LOCATION = os.path.join(CODALAB_HOME, 'bundles') if not dry_run: path_util.make_directory(FINAL_LOCATION) """For each data hash, get a list of all bundles that have that hash, and make a copy of the bundle in the staging area under the UUID for the bundle.""" data_hashes = reduce(lambda x, y: x + y, path_util.ls(DATA_DIR)) for data_hash in data_hashes: orig_location = os.path.join(DATA_DIR, data_hash) bundles_with_hash = model.batch_get_bundles(data_hash=data_hash) # We'd prefer renaming bundles to making copies, but because we are converting from deduplicated storage # we need to make sure that we only perform renames if we map 1:1 UUID->Hash. rename_allowed = len(bundles_with_hash) <= 1 for bundle in bundles_with_hash: # Build the command to be executed in a subshell uuid = bundle.uuid copy_location = os.path.join(FINAL_LOCATION, uuid) command = '%s %s %s' % ('mv' if rename_allowed else 'cp -a', orig_location, copy_location) print(command) if not dry_run:
def health_check(self, model, force=False, compute_data_hash=False, repair_hashes=False): """ MultiDiskBundleStore.health_check(): In the MultiDiskBundleStore, bundle contents are stored on disk, and occasionally the disk gets out of sync with the database, in which case we make repairs in the following ways: 1. Deletes bundles with corresponding UUID not in the database. 3. Deletes any files not beginning with UUID string. 4. For each bundle marked READY or FAILED, ensure that its dependencies are not located in the bundle directory. If they are then delete the dependencies. 5. For bundle <UUID> marked READY or FAILED, <UUID>.cid or <UUID>.status, or the <UUID>(-internal).sh files should not exist. |force|: Perform any destructive operations on the bundle store the health check determines are necessary. False by default |compute_data_hash|: If True, compute the data_hash for every single bundle ourselves and see if it's consistent with what's in the database. False by default. """ UUID_REGEX = re.compile(r'^(%s)' % spec_util.UUID_STR) def _delete_path(loc): cmd = 'rm -r \'%s\'' % loc print(cmd) if force: path_util.remove(loc) def _get_uuid(path): fname = os.path.basename(path) try: return UUID_REGEX.match(fname).groups()[0] except: return None def _is_bundle(path): """Returns whether the given path is a bundle directory/file""" return _get_uuid(path) == os.path.basename(path) def _check_bundle_paths(bundle_paths, db_bundle_by_uuid): """ Takes in a list of bundle paths and a mapping of UUID to BundleModel, and returns a list of paths and subpaths that need to be removed. """ to_delete = [] # Batch get information for all bundles stored on-disk for bundle_path in bundle_paths: uuid = _get_uuid(bundle_path) # Screen for bundles stored on disk that are no longer in the database bundle = db_bundle_by_uuid.get(uuid, None) if bundle == None: to_delete += [bundle_path] continue # Delete dependencies stored inside of READY or FAILED bundles if bundle.state in [State.READY, State.FAILED]: dep_paths = [ os.path.join(bundle_path, dep.child_path) for dep in bundle.dependencies ] to_delete += filter(os.path.exists, dep_paths) return to_delete def _check_other_paths(other_paths, db_bundle_by_uuid): """ Given a list of non-bundle paths, and a mapping of UUID to BundleModel, returns a list of paths to delete. """ to_delete = [] for path in other_paths: uuid = _get_uuid(path) bundle = db_bundle_by_uuid.get(uuid, None) if bundle == None: to_delete += [path] continue ends_with_ext = ( path.endswith('.cid') or path.endswith('.status') or path.endswith('.sh') ) if bundle.state in [State.READY, State.FAILED]: if ends_with_ext: to_delete += [path] continue elif '.' in path: print >>sys.stderr, 'WARNING: File %s is likely junk.' % path return to_delete partitions, _ = path_util.ls(self.partitions) trash_count = 0 for partition in partitions: print >>sys.stderr, 'Looking for trash in partition %s...' % partition partition_path = os.path.join( self.partitions, partition, MultiDiskBundleStore.DATA_SUBDIRECTORY ) entries = map( lambda f: os.path.join(partition_path, f), reduce(lambda d, f: d + f, path_util.ls(partition_path)), ) bundle_paths = filter(_is_bundle, entries) other_paths = set(entries) - set(bundle_paths) uuids = map(_get_uuid, bundle_paths) db_bundles = model.batch_get_bundles(uuid=uuids) db_bundle_by_uuid = dict() for bundle in db_bundles: db_bundle_by_uuid[bundle.uuid] = bundle # Check both bundles and non-bundles and remove each for to_delete in _check_bundle_paths(bundle_paths, db_bundle_by_uuid): trash_count += 1 _delete_path(to_delete) for to_delete in _check_other_paths(other_paths, db_bundle_by_uuid): trash_count += 1 _delete_path(to_delete) # Check for each bundle if we need to compute its data_hash data_hash_recomputed = 0 print >>sys.stderr, 'Checking data_hash of bundles in partition %s...' % partition for bundle_path in bundle_paths: uuid = _get_uuid(bundle_path) bundle = db_bundle_by_uuid.get(uuid, None) if bundle == None: continue if compute_data_hash or bundle.data_hash == None: dirs_and_files = ( path_util.recursive_ls(bundle_path) if os.path.isdir(bundle_path) else ([], [bundle_path]) ) data_hash = '0x%s' % path_util.hash_directory(bundle_path, dirs_and_files) if bundle.data_hash == None: data_hash_recomputed += 1 print >>sys.stderr, 'Giving bundle %s data_hash %s' % ( bundle_path, data_hash, ) if force: db_update = dict(data_hash=data_hash) model.update_bundle(bundle, db_update) elif compute_data_hash and data_hash != bundle.data_hash: data_hash_recomputed += 1 print >>sys.stderr, 'Bundle %s should have data_hash %s, actual digest is %s' % ( bundle_path, bundle.data_hash, data_hash, ) if repair_hashes and force: db_update = dict(data_hash=data_hash) model.update_bundle(bundle, db_update) if force: print >>sys.stderr, '\tDeleted %d objects from the bundle store' % trash_count print >>sys.stderr, '\tRecomputed data_hash for %d bundles' % data_hash_recomputed else: print >>sys.stderr, 'Dry-Run Statistics, re-run with --force to perform updates:' print >>sys.stderr, '\tObjects marked for deletion: %d' % trash_count print >>sys.stderr, '\tBundles that need data_hash recompute: %d' % data_hash_recomputed
def refresh_partitions(self): nodes, _ = path_util.ls(self.partitions) self.nodes = nodes
manager = CodaLabManager() model = manager.model() CODALAB_HOME = manager.codalab_home """Move data/ directory over to a temp area, and create a staging tree for uuid-based storage""" DATA_DIR = os.path.join(CODALAB_HOME, 'data') FINAL_LOCATION = os.path.join(CODALAB_HOME, 'bundles') if not dry_run: path_util.make_directory(FINAL_LOCATION) """For each data hash, get a list of all bundles that have that hash, and make a copy of the bundle in the staging area under the UUID for the bundle.""" data_hashes = reduce(lambda x,y: x+y, path_util.ls(DATA_DIR)) for data_hash in data_hashes: orig_location = os.path.join(DATA_DIR, data_hash) bundles_with_hash = model.batch_get_bundles(data_hash=data_hash) # We'd prefer renaming bundles to making copies, but because we are converting from deduplicated storage # we need to make sure that we only perform renames if we map 1:1 UUID->Hash. rename_allowed = len(bundles_with_hash) <= 1 for bundle in bundles_with_hash: # Build the command to be executed in a subshell uuid = bundle.uuid copy_location = os.path.join(FINAL_LOCATION, uuid) command = '%s %s %s' % ('mv' if rename_allowed else 'cp -a', orig_location, copy_location) print command if not dry_run: exec_str = shlex.split(command)
def ls(self, target): path = self.get_target_path(target) return path_util.ls(path)