def _install_spark(connection, spark_module, install_dir, spark_url, force_reinstall, silent=False, retries=5): remote_module = connection.import_module(spark_module) return remote_module.spark_install(loc.sparkdir(install_dir), spark_url, force_reinstall, silent, retries)
def _start_spark_worker(remote_connection, module, install_dir, workdir, master_picked, master_port=7077, use_sudo=False, silent=False, retries=5): remote_module = remote_connection.import_module(module) return remote_module.start_worker(loc.sparkdir(install_dir), workdir, master_picked.ip_local, master_port, use_sudo, silent, retries)
def _start_spark_master(remote_connection, module, install_dir, host, host_webui, port=7077, webui_port=2205, use_sudo=False, silent=False, retries=5): remote_module = remote_connection.import_module(module) return remote_module.start_master(loc.sparkdir(install_dir), host, host_webui, port, webui_port, use_sudo, silent, retries)
def _uninstall_spark(connection, install_dir): remoto.process.run(connection, ['rm', '-rf', loc.sparkdir(install_dir)])
def _stop_spark(remote_connection, module, install_dir, workdir=None, use_sudo=False, silent=False, retries=5): remote_module = remote_connection.import_module(module) return remote_module.stop_all(loc.sparkdir(install_dir), workdir, use_sudo, silent, retries)
def submit(reservation, command, paths=[], install_dir=install_defaults.install_dir(), key_path=None, connectionwrappers=None, application_dir=defaults.application_dir(), master_id=None, use_sudo=False, silent=False): '''Submit applications using spark-submit on the remote Spark cluster, on an existing reservation. Args: reservation (`metareserve.Reservation`): Reservation object with all nodes to we run Spark on. Important if we deploy in cluster mode, as every node could be chosen to boot the JAR on, meaning every node must have the JAR. In client mode, you can just provide only the master node. command (str): Command to propagate to remote "spark-submit" executable. paths (optional list(str)): Data paths to offload to the remote cluster. Can be relative to CWD or absolute. install_dir (str): Location on remote host where Spark (and any local-installed Java) is installed in. key_path (str): Path to SSH key, which we use to connect to nodes. If `None`, we do not authenticate using an IdentityFile. connectionwrappers (optional dict(metareserve.Node, RemotoSSHWrapper)): If set, uses provided connections instead of making new ones. application_dir (optional str): Location on remote host where we export all given 'paths' to. Illegal values: 1. ''. 2. '~/'. The reason is that we use rsync for fast file transfer, which messes up homedir permissions if set as destination target. master_id (optional int): Node id of the Spark master. If `None`, the node with lowest public ip value (string comparison) will be picked. use_sudo (optional bool): If set, uses sudo when deploying. silent (optional bool): If set, we only print errors and critical info. Otherwise, more verbose output. Returns: `True` on success, `False` otherwise.''' if not reservation or len(reservation) == 0: raise ValueError('Reservation does not contain any items' + (' (reservation=None)' if not reservation else '')) if application_dir == '~/' or application_dir == '~' or not application_dir: raise ValueError( 'application_dir must not be equal to "{}". Check the docs.'. format(application_dir)) if application_dir.startswith('~/'): application_dir = application_dir[2:] master_picked, workers_picked = _get_master_and_workers( reservation, master_id) print('Picked master node: {}'.format(master_picked)) local_connections = connectionwrappers == None if local_connections: ssh_kwargs = { 'IdentitiesOnly': 'yes', 'User': master_picked.extra_info['user'], 'StrictHostKeyChecking': 'no' } if key_path: ssh_kwargs['IdentityFile'] = key_path connectionwrappers = get_wrappers( reservation.nodes, lambda node: node.ip_public, ssh_params=lambda node: _merge_kwargs( ssh_kwargs, {'User': node.extra_info['user']}), silent=silent) with concurrent.futures.ThreadPoolExecutor(max_workers=cpu_count() - 1) as executor: _, _, exitcode = remoto.process.check( connectionwrappers[master_picked].connection, 'ls {}'.format( fs.join(loc.sparkdir(install_dir), 'bin', 'spark-submit')), shell=True) if exitcode != 0: if local_connections: close_wrappers(connectionwrappers) raise FileNotFoundError( 'Could not find spark-submit executable on master. Expected at: {}' .format( fs.join(loc.sparkdir(install_dir), 'bin', 'spark-submit'))) mkdir_fun = lambda conn: remoto.process.check( conn, 'mkdir -p {}'.format(application_dir), shell=True)[2] == 0 futures_application_mkdir = [ executor.submit(mkdir_fun, connectionwrappers[x].connection) for x in reservation.nodes ] if not all(x.result() for x in futures_application_mkdir): printe('Could not make directory "{}" on all nodes.'.format( application_dir)) if local_connections: close_wrappers(connectionwrappers) return False if any(paths): paths = [fs.abspath(x) for x in paths] if not silent: print('Transferring application data...') if any(True for path in paths if not (fs.exists(path) or fs.issymlink(path))): printe( 'Application data transfer found non-existing source paths:' ) for path in paths: if not (fs.exists(path) or fs.issymlink(path)): print(' {}'.format(path)) if local_connections: close_wrappers(connectionwrappers) return False dests = [ fs.join(application_dir, fs.basename(path)) for path in paths ] rsync_global_net_fun = lambda node, conn_wrapper, path, dest: subprocess.call( 'rsync -e "ssh -F {}" -azL {} {}:{}'.format( conn_wrapper.ssh_config.name, path, node.ip_public, dest), shell=True) == 0 futures_rsync = [ executor.submit(rsync_global_net_fun, node, conn_wrapper, path, dest) for (path, dest) in zip(paths, dests) for (node, conn_wrapper) in connectionwrappers.items() ] if not all(x.result() for x in futures_rsync): printe('Could not deploy data to all remote nodes.') if local_connections: close_wrappers(connectionwrappers) return False if not silent: prints('Application data deployed.') if not install_dir[0] == '/' and not install_dir[ 0] == '~': # We deal with a relative path as installdir. This means '~/' must be appended, so we can execute this from non-home cwds. installdir = '~/' + install_dir run_cmd = '{} {}'.format( fs.join(loc.sparkdir(install_dir), 'bin', 'spark-submit'), command) if use_sudo: run_cmd = 'sudo ' + run_cmd submit_module = _generate_module_submit() retval = _submit_spark(connectionwrappers[master_picked].connection, submit_module, run_cmd, application_dir, silent=silent) if local_connections: close_wrappers(connectionwrappers) return retval