def _pre_deploy_remote_file(connection, stripe, copies_amount, links_amount, source_file, dest_file): remoto.process.check(connection, 'mkdir -p {}'.format(fs.dirname(dest_file)), shell=True) _, _, exitcode = remoto.process.check(connection, 'touch {}'.format(dest_file), shell=True) if exitcode != 0: printe('Could not touch file at cluster: {}'.format(dest_file)) return False if copies_amount > 0 and not data_deploy.shared.copy.copy_single(connection, dest_file, copies_amount, silent=False): return False if links_amount > 0 and not data_deploy.shared.link.link(connection, expression=data_deploy.shared.copy.copy_expression(dest_file, copies_amount), num_links=links_amount, silent=False): return False cmd = '''sudo python3 -c " import itertools import subprocess import concurrent.futures from multiprocessing import cpu_count with concurrent.futures.ThreadPoolExecutor(max_workers=cpu_count()-1) as executor: futures_setfattr = [executor.submit(subprocess.call, 'setfattr --no-dereference -n ceph.file.layout.object_size -v {2} {{}}'.format(x), shell=True) for x in itertools.chain(['{0}'], ('{0}.copy.{{}}'.format(x) for x in range({1})))] results = [x.result() == 0 for x in futures_setfattr] exit(0 if all(results) else 1) " '''.format(dest_file, copies_amount, stripe*1024*1024) out, error, exitcode = remoto.process.check(connection, cmd, shell=True) if exitcode != 0: printe('Could not stripe file{} at cluster: {}. Is the cluster running?\nReason: Out: {}\n\nError: {}'.format(' (and all {} copies)'.format(copies_amount) if copies_amount > 0 else '', dest_file, '\n'.join(out), '\n'.join(error))) return False return True
def _generate_module_ssh(silent=False): '''Generates SSH-install module from available sources.''' generation_loc = fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto', 'modules', 'generated', 'install_ssh.py') files = [ fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'util', 'printer.py'), fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto', 'modules', 'printer.py'), fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto', 'modules', 'ssh_install.py'), fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto', 'modules', 'remoto_base.py'), ] ModuleGenerator().with_module(fs).with_files(*files).generate( generation_loc, silent) return importer.import_full_path(generation_loc)
def deploy(reservation, paths=None, key_path=None, admin_id=None, connectionwrapper=None, stripe=defaults.stripe(), copy_multiplier=1, link_multiplier=1, mountpoint_path=start_defaults.mountpoint_path(), silent=False): '''Deploy data on remote RADOS-Ceph clusters, on an existing reservation. Dataset sizes can be inflated on the remote, using 2 strategies: 1. link multiplication: Every dataset file receives `x` hardlinks. The hardlinks ensure the dataset size appears to be `x` times larger, but in reality, just the original file consumes space. This method is very fast, but has drawbacks: Only the original files are stored by Ceph. When using the RADOS-Arrow connector, this means Arrow will spam only the nodes that contain the original data. E.g: If we deploy 1 file of 64MB, with link multiplier 1024, the data will apear to be 64GB. The storage space used on RADOS-Ceph will still be 64MB, because we have 1 real file of 64MB, and 1023 hardlinks to that 1 file. The actual data is only stored on 3 OSDs (with default Ceph Striping factor 3). Now, Arrow will spam all work to those 3 OSDs containing the data, while the rest is idle. 2. file multiplication: Every dataset file receives `x` copies. This method is slower than the one listed above, because real data has to be copied. It also actually increases storage usage, contrary to above. However, because we multiply real data, the load is guaranteed to be balanced across nodes, as far as Ceph does that. Note that mutiple multiplication techniques can be combined, in which case they stack. E.g: If we deploy 1 file of 64MB, with a copy multiplier 4 and a link multiplier 1024, we get 4 real files (1 original + 3 copies), and each file gets 1023 hardlinks assigned to it. Args: reservation (`metareserve.Reservation`): Reservation object with all nodes to start RADOS-Ceph on. key_path (optional str): Path to SSH key, which we use to connect to nodes. If `None`, we do not authenticate using an IdentityFile. admin_id (optional int): Node id of the ceph admin. If `None`, the node with lowest public ip value (string comparison) will be picked. connectionwrapper (optional RemotoSSHWrapper): If set, uses given connection, instead of building a new one. paths (optional list(str)): Data paths to offload to the remote cluster. Can be relative to CWD or absolute. stripe (optional int): Ceph object stripe property, in megabytes. copy_multiplier (optional int): If set to a value `x`, makes the dataset appear `x` times larger by copying every file `x`-1 times. Does nothing if `x`<=1. link_multiplier (optional int): If set to a value `x`, makes the dataset appear `x` times larger by adding `x`-1 hardlinks for every transferred file. Does nothing if `x`<=1. mountpoint_path (optional str): Path where CephFS is mounted on all nodes. silent (optional bool): If set, we only print errors and critical info. Otherwise, more verbose output. Returns: `True` on success, `False` otherwise.''' module = importer.import_full_path( fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'data_deploy', 'rados_deploy.deploy.plugin.py')) args = [] kwargs = { 'admin_id': admin_id, 'connectionwrapper': connectionwrapper, 'stripe': stripe, 'copy_multiplier': copy_multiplier, 'link_multiplier': link_multiplier } return module.execute(reservation, key_path, paths, dest, silent, *args, **kwargs)
def generate(self, outputpath, allowed_imports=None, silent=False): '''Generates the final, non-stl dependency-free module to be used with Remoto remote module execution. Captures all import commands and ensures they are present only once for the entire module. Warning: Removes all non-stl import statements. Args: outputpath (str): Location to store module, including output filename. Creates every directory that does not exist. allowed_imports (optional iterable(str)): If set to an iterable, does not remove given import statements. silent (optional bool): If set, skips printing warnings when non-standard imports are encountered.''' dest_dir = fs.dirname(outputpath) if not fs.isdir(dest_dir): fs.mkdir(dest_dir, exist_ok=True) stl_imports, stl_imports_from = self._read_imports( allowed_imports=allowed_imports, silent=silent) with open(outputpath, 'w') as f: header = ''' ################################################################################ # Generated by the meta modulegenerator # Processed {} files/modules: {} ################################################################################ '''.format(len(self._files), '\n'.join('# {}'.format(x) for x in self._files)) f.write(header) importstring = '\n' + '\n'.join('import {}'.format(name) for name in stl_imports) importstring += '\n' importstring += '\n'.join( 'from {} import {} as {}'.format(*names) if len(names) == 3 and names[2] != None else 'from {} import {}'.format(*names) for names in stl_imports_from) f.write(importstring) for x in self._files: content = self._read_non_imports(x) f.write(''' ################################################################################ # Created from file {} '''.format(x)) f.write(content) f.write(''' ################################################################################ ''')
def _generate_module_rados(silent=False): '''Generates RADOS-arrow-install module from available sources.''' generation_loc = fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto', 'modules', 'generated', 'install_rados.py') files = [ fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'util', 'printer.py'), fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto', 'modules', 'printer.py'), fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'util', 'executor.py'), fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto', 'env.py'), fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto', 'modules', 'rados_install.py'), fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto', 'modules', 'remoto_base.py'), ] ModuleGenerator().with_modules(fs, importer).with_files(*files).generate( generation_loc, silent) return importer.import_full_path(generation_loc)
def _generate_module_stop(silent=False): '''Generates RADOS-Ceph-start module from available sources.''' generation_loc = fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'generated', 'stop_rados_bluestore.py') files = [ fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'util', 'printer.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'printer.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'util', 'executor.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'thirdparty', 'sshconf', 'sshconf.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'ssh_wrapper.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'designation.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'rados_util.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'config.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'pool.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'cephfs.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'manager.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'mds.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'monitor.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'osd.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'stop', 'bluestore.py'), fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'remoto_base.py'), ] import metareserve.reservation as reserve ModuleGenerator().with_modules(fs, reserve).with_files(*files).generate(generation_loc, allowed_imports=['remoto', 'remoto.process'], silent=True) return importer.import_full_path(generation_loc)
def list_plugins(): '''Returns all paths to files with a name indicating a data-deploy plugin.''' return ( y for y in fs.ls(fs.dirname(__file__), only_files=True, full_paths=True) if y.endswith('.deploy.plugin.py'))