Exemple #1
0
def install():
    data_deploy_dst = data_deploy_destination()
    fs.mkdir(data_deploy_dst, exist_ok=True)
    for x in list_plugins():
        dst = fs.join(data_deploy_dst, fs.basename(x))
        if fs.exists(dst) or fs.issymlink(dst):
            print('Found dst (removing): {}'.format(dst))
            fs.rm(dst, ignore_errors=True)
        fs.ln(x, dst)
Exemple #2
0
def _generate_module_ssh(silent=False):
    '''Generates SSH-install module from available sources.'''
    generation_loc = fs.join(fs.dirname(fs.abspath(__file__)), 'internal',
                             'remoto', 'modules', 'generated',
                             'install_ssh.py')
    files = [
        fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'util',
                'printer.py'),
        fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto',
                'modules', 'printer.py'),
        fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto',
                'modules', 'ssh_install.py'),
        fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto',
                'modules', 'remoto_base.py'),
    ]
    ModuleGenerator().with_module(fs).with_files(*files).generate(
        generation_loc, silent)
    return importer.import_full_path(generation_loc)
def deploy(reservation,
           paths=None,
           key_path=None,
           admin_id=None,
           connectionwrapper=None,
           stripe=defaults.stripe(),
           copy_multiplier=1,
           link_multiplier=1,
           mountpoint_path=start_defaults.mountpoint_path(),
           silent=False):
    '''Deploy data on remote RADOS-Ceph clusters, on an existing reservation.
    Dataset sizes can be inflated on the remote, using 2 strategies:
     1. link multiplication: Every dataset file receives `x` hardlinks.
        The hardlinks ensure the dataset size appears to be `x` times larger, but in reality, just the original file consumes space.
        This method is very fast, but has drawbacks: Only the original files are stored by Ceph.
        When using the RADOS-Arrow connector, this means Arrow will spam only the nodes that contain the original data.
        E.g: If we deploy 1 file of 64MB, with link multiplier 1024, the data will apear to be 64GB.
             The storage space used on RADOS-Ceph will still be 64MB, because we have 1 real file of 64MB, and 1023 hardlinks to that 1 file.
             The actual data is only stored on 3 OSDs (with default Ceph Striping factor 3).
             Now, Arrow will spam all work to those 3 OSDs containing the data, while the rest is idle.
     2. file multiplication: Every dataset file receives `x` copies.
        This method is slower than the one listed above, because real data has to be copied. 
        It also actually increases storage usage, contrary to above. 
        However, because we multiply real data, the load is guaranteed to be balanced across nodes, as far as Ceph does that.

    Note that mutiple multiplication techniques can be combined, in which case they stack.
    E.g: If we deploy 1 file of 64MB, with a copy multiplier 4 and a link multiplier 1024, we get 4 real files (1 original + 3 copies),
         and each file gets 1023 hardlinks assigned to it.
    Args:
        reservation (`metareserve.Reservation`): Reservation object with all nodes to start RADOS-Ceph on.
        key_path (optional str): Path to SSH key, which we use to connect to nodes. If `None`, we do not authenticate using an IdentityFile.
        admin_id (optional int): Node id of the ceph admin. If `None`, the node with lowest public ip value (string comparison) will be picked.
        connectionwrapper (optional RemotoSSHWrapper): If set, uses given connection, instead of building a new one.
        paths (optional list(str)): Data paths to offload to the remote cluster. Can be relative to CWD or absolute.
        stripe (optional int): Ceph object stripe property, in megabytes.
        copy_multiplier (optional int): If set to a value `x`, makes the dataset appear `x` times larger by copying every file `x`-1 times. Does nothing if `x`<=1.
        link_multiplier (optional int): If set to a value `x`, makes the dataset appear `x` times larger by adding `x`-1 hardlinks for every transferred file. Does nothing if `x`<=1.
        mountpoint_path (optional str): Path where CephFS is mounted on all nodes.
        silent (optional bool): If set, we only print errors and critical info. Otherwise, more verbose output.

    Returns:
        `True` on success, `False` otherwise.'''
    module = importer.import_full_path(
        fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'data_deploy',
                'rados_deploy.deploy.plugin.py'))
    args = []
    kwargs = {
        'admin_id': admin_id,
        'connectionwrapper': connectionwrapper,
        'stripe': stripe,
        'copy_multiplier': copy_multiplier,
        'link_multiplier': link_multiplier
    }
    return module.execute(reservation, key_path, paths, dest, silent, *args,
                          **kwargs)
Exemple #4
0
def _execute_internal(connectionwrapper, reservation, paths, dest, silent, copy_multiplier, link_multiplier, admin_node, stripe):
    if not connectionwrapper:
        printe('Could not connect to admin: {}'.format(admin_node))
        return False

    if not _ensure_attr(connectionwrapper.connection):
        return False

    max_filesize = stripe * 1024 * 1024
    copies_to_add = max(1, copy_multiplier) - 1
    links_to_add = max(1, link_multiplier) - 1
    with concurrent.futures.ThreadPoolExecutor(max_workers=cpu_count()-1) as executor:
        files_to_deploy = []
        for path in paths:
            if fs.isfile(path):
                if os.path.getsize(path) > max_filesize:
                    printe('File {} is too large ({} bytes, max allowed is {} bytes)'.format(path, os.path.getsize(path), max_filesize))
                    return False
                files_to_deploy.append((path, fs.join(dest, fs.basename(path))))
            elif fs.isdir(path):
                to_visit = [path]
                path_len = len(path)
                while any(to_visit):
                    visit_now = to_visit.pop()
                    to_visit += list(fs.ls(visit_now, only_dirs=True, full_paths=True))
                    files = list(fs.ls(visit_now, only_files=True, full_paths=True))
                    files_too_big = [x for x in files if os.path.getsize(x) > max_filesize]
                    if any(files_too_big):
                        for x in files_too_big:
                            printe('File {} is too large ({} bytes, max allowed is {} bytes)'.format(x, os.path.getsize(x), max_filesize))
                        return False
                    files_to_deploy += [(x, fs.join(dest, x[path_len+1:])) for x in files]
        futures_pre_deploy = [executor.submit(_pre_deploy_remote_file, connectionwrapper.connection, stripe, copies_to_add, links_to_add, source_file, dest_file) for (source_file, dest_file) in files_to_deploy]
        if not all(x.result() for x in futures_pre_deploy):
            printe('Pre-data deployment error occured.')
            return False

        if not silent:
            print('Transferring data...')
        fun = lambda path: subprocess.call('rsync -e "ssh -F {}" -q -aHAXL --inplace {} {}:{}'.format(connectionwrapper.ssh_config.name, path, admin_node.ip_public, fs.join(dest, fs.basename(path))), shell=True) == 0
        futures_rsync = {path, executor.submit(fun, path) for path in paths}
Exemple #5
0
def _generate_module_rados(silent=False):
    '''Generates RADOS-arrow-install module from available sources.'''
    generation_loc = fs.join(fs.dirname(fs.abspath(__file__)), 'internal',
                             'remoto', 'modules', 'generated',
                             'install_rados.py')
    files = [
        fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'util',
                'printer.py'),
        fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto',
                'modules', 'printer.py'),
        fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'util',
                'executor.py'),
        fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto',
                'env.py'),
        fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto',
                'modules', 'rados_install.py'),
        fs.join(fs.dirname(fs.abspath(__file__)), 'internal', 'remoto',
                'modules', 'remoto_base.py'),
    ]
    ModuleGenerator().with_modules(fs, importer).with_files(*files).generate(
        generation_loc, silent)
    return importer.import_full_path(generation_loc)
def _generate_module_stop(silent=False):
    '''Generates RADOS-Ceph-start module from available sources.'''
    generation_loc = fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'generated', 'stop_rados_bluestore.py')
    files = [
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'util', 'printer.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'printer.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'util', 'executor.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'thirdparty', 'sshconf', 'sshconf.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'ssh_wrapper.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'designation.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'rados_util.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'config.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'pool.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'cephfs.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'manager.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'mds.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'monitor.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'rados', 'osd.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'stop', 'bluestore.py'),
        fs.join(fs.dirname(fs.dirname(fs.abspath(__file__))), 'internal', 'remoto', 'modules', 'remoto_base.py'),
    ]
    import metareserve.reservation as reserve
    ModuleGenerator().with_modules(fs, reserve).with_files(*files).generate(generation_loc, allowed_imports=['remoto', 'remoto.process'], silent=True)
    return importer.import_full_path(generation_loc)
def clean(reservation,
          paths,
          key_path=None,
          admin_id=None,
          connectionwrapper=None,
          mountpoint_path=start_defaults.mountpoint_path(),
          silent=False):
    '''Cleans data from the RADOS-Ceph cluster, on an existing reservation.
    Args:
        reservation (`metareserve.Reservation`): Reservation object with all nodes to start RADOS-Ceph on.
        paths (list(str)): Data paths to delete to the remote cluster. Mountpoint path is always prepended.
        key_path (optional str): Path to SSH key, which we use to connect to nodes. If `None`, we do not authenticate using an IdentityFile.
        admin_id (optional int): Node id of the ceph admin. If `None`, the node with lowest public ip value (string comparison) will be picked.
        mountpoint_path (optional str): Path where CephFS is mounted on all nodes.
        silent (optional bool): If set, we only print errors and critical info. Otherwise, more verbose output.

    Returns:
        `True` on success, `False` otherwise.'''
    if (not reservation) or len(reservation) == 0:
        raise ValueError('Reservation does not contain any items' +
                         (' (reservation=None)' if not reservation else ''))

    admin_picked, _ = _pick_admin(reservation, admin=admin_id)
    print('Picked admin node: {}'.format(admin_picked))

    local_connections = connectionwrapper == None
    if local_connections:
        ssh_kwargs = {
            'IdentitiesOnly': 'yes',
            'User': admin_picked.extra_info['user'],
            'StrictHostKeyChecking': 'no'
        }
        if key_path:
            ssh_kwargs['IdentityFile'] = key_path

        connectionwrapper = get_wrapper(admin_picked.ip_public,
                                        silent=True,
                                        ssh_params=ssh_kwargs)

    if not any(paths):
        _, _, exitcode = remoto.process.check(
            connectionwrapper.connection,
            'sudo rm -rf {}/*'.format(mountpoint_path),
            shell=True)
        state_ok = exitcode == 0
    else:
        paths = [x if x[0] != '/' else x[1:] for x in paths]
        with concurrent.futures.ThreadPoolExecutor(max_workers=cpu_count() -
                                                   1) as executor:
            if not silent:
                print('Deleting data...')
            futures_rm = [
                executor.submit(remoto.process.check,
                                connectionwrapper.connection,
                                'sudo rm -rf {}'.format(
                                    fs.join(mountpoint_path, path)),
                                shell=True) for path in paths
            ]

            state_ok = all(x.result()[2] == 0 for x in futures_rm)

    if state_ok:
        prints('Data deleted.')
    else:
        printe('Could not delete data.')
    if local_connections:
        close_wrappers([connectionwrapper])
    return state_ok
Exemple #8
0
def remove():
    data_deploy_dst = data_deploy_destination()
    for x in list_plugins():
        dst = fs.join(data_deploy_dst, fs.basename(x))
        fs.rm(dst, ignore_errors=True)
Exemple #9
0
def data_deploy_destination():
    return fs.join(os.path.expanduser('~'), '.data-deploy')
Exemple #10
0
def arrowdir(install_dir):
    '''Path to RADOS-arrow compilation directory.'''
    return fs.join(install_dir, 'arrow')
Exemple #11
0
def cephdeploydir(install_dir):
    '''Path to ceph-deploy source directory.'''
    return fs.join(install_dir, 'ceph-deploy')