def install( self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): print("[{h}] Installing HDFS...".format( h=ssh_client.get_transport().getpeername()[0])) with ssh_client.open_sftp() as sftp: sftp.put( localpath=os.path.join(SCRIPTS_DIR, 'download-hadoop.py'), remotepath='/tmp/download-hadoop.py') ssh_check_output( client=ssh_client, command=""" set -e python /tmp/download-hadoop.py "{version}" "{download_source}" mkdir "hadoop" mkdir "hadoop/conf" tar xzf "hadoop-{version}.tar.gz" -C "hadoop" --strip-components=1 rm "hadoop-{version}.tar.gz" for f in $(find hadoop/bin -type f -executable -not -name '*.cmd'); do sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)" done """.format(version=self.version, download_source=self.download_source))
def install( self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): print("[{h}] Installing HDFS...".format( h=ssh_client.get_transport().getpeername()[0])) with ssh_client.open_sftp() as sftp: sftp.put( localpath=os.path.join(SCRIPTS_DIR, 'download-hadoop.py'), remotepath='/tmp/download-hadoop.py') ssh_check_output( client=ssh_client, command=""" set -e python /tmp/download-hadoop.py "{version}" "{download_source}" mkdir "hadoop" mkdir "hadoop/conf" tar xzf "hadoop-{version}.tar.gz" -C "hadoop" --strip-components=1 rm "hadoop-{version}.tar.gz" """.format(version=self.version, download_source=self.download_source))
def ssh_check_output(client: paramiko.client.SSHClient, command: str): """ Run a command via the provided SSH client and return the output captured on stdout. Raise an exception if the command returns a non-zero code. """ stdin, stdout, stderr = client.exec_command(command, get_pty=True) # NOTE: Paramiko doesn't clearly document this, but we must read() before # calling recv_exit_status(). # See: https://github.com/paramiko/paramiko/issues/448#issuecomment-159481997 stdout_output = stdout.read().decode('utf8').rstrip('\n') stderr_output = stderr.read().decode('utf8').rstrip('\n') exit_status = stdout.channel.recv_exit_status() if exit_status: # TODO: Return a custom exception that includes the return code. # See: https://docs.python.org/3/library/subprocess.html#subprocess.check_output # NOTE: We are losing the output order here since output from stdout and stderr # may be interleaved. raise SSHError(host=client.get_transport().getpeername()[0], message=stdout_output + stderr_output) return stdout_output
def install( self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): print("[{h}] Installing Spark...".format( h=ssh_client.get_transport().getpeername()[0])) try: if self.version: with ssh_client.open_sftp() as sftp: sftp.put( localpath=os.path.join(SCRIPTS_DIR, 'install-spark.sh'), remotepath='/tmp/install-spark.sh') sftp.chmod(path='/tmp/install-spark.sh', mode=0o755) url = self.download_source.format(v=self.version) ssh_check_output( client=ssh_client, command=""" set -e /tmp/install-spark.sh {url} rm -f /tmp/install-spark.sh """.format(url=shlex.quote(url))) else: ssh_check_output( client=ssh_client, command=""" set -e sudo yum install -y git sudo yum install -y java-devel """) ssh_check_output( client=ssh_client, command=""" set -e git clone {repo} spark cd spark git reset --hard {commit} if [ -e "make-distribution.sh" ]; then ./make-distribution.sh -Phadoop-2.6 else ./dev/make-distribution.sh -Phadoop-2.6 fi """.format( repo=shlex.quote(self.git_repository), commit=shlex.quote(self.git_commit))) ssh_check_output( client=ssh_client, command=""" set -e for f in $(find spark/bin -type f -executable -not -name '*.cmd'); do sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)" done echo "export SPARK_HOME='$(pwd)/spark'" >> .bashrc """) except Exception as e: # TODO: This should be a more specific exception. print("Error: Failed to install Spark.", file=sys.stderr) print(e, file=sys.stderr) raise
def install( self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster, ): logger.info("[{h}] Installing HDFS...".format( h=ssh_client.get_transport().getpeername()[0])) with ssh_client.open_sftp() as sftp: sftp.put(localpath=os.path.join(SCRIPTS_DIR, 'download-package.py'), remotepath='/tmp/download-package.py') logger.debug("[{h}] Downloading Hadoop from: {s}".format( h=ssh_client.get_transport().getpeername()[0], s=self.download_source, )) ssh_check_output( client=ssh_client, command=""" set -e python /tmp/download-package.py "{download_source}" "hadoop" for f in $(find hadoop/bin -type f -executable -not -name '*.cmd'); do sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)" done echo "export HADOOP_LIBEXEC_DIR='$(pwd)/hadoop/libexec'" >> .bashrc """.format( # version=self.version, download_source=self.download_source.format(v=self.version), ))
def install(self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): logger.info("[{h}] Installing HDFS...".format( h=ssh_client.get_transport().getpeername()[0])) with ssh_client.open_sftp() as sftp: sftp.put(localpath=os.path.join(SCRIPTS_DIR, 'download-hadoop.py'), remotepath='/tmp/download-hadoop.py') ssh_check_output(client=ssh_client, command=""" set -e python /tmp/download-hadoop.py "{version}" "{download_source}" mkdir "hadoop" mkdir "hadoop/conf" tar xzf "hadoop-{version}.tar.gz" -C "hadoop" --strip-components=1 rm "hadoop-{version}.tar.gz" for f in $(find hadoop/bin -type f -executable -not -name '*.cmd'); do sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)" done echo "export HADOOP_LIBEXEC_DIR='$(pwd)/hadoop/libexec'" >> .bashrc """.format(version=self.version, download_source=self.download_source))
def install( self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): print("[{h}] Installing HDFS...".format( h=ssh_client.get_transport().getpeername()[0])) with ssh_client.open_sftp() as sftp: sftp.put( localpath=os.path.join(SCRIPTS_DIR, 'download-hadoop.py'), remotepath='/tmp/download-hadoop.py') ssh_check_output( client=ssh_client, command=""" set -e python /tmp/download-hadoop.py "{version}" mkdir "hadoop" mkdir "hadoop/conf" tar xzf "hadoop-{version}.tar.gz" -C "hadoop" --strip-components=1 rm "hadoop-{version}.tar.gz" """.format(version=self.version))
def install( self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): logger.info("[{h}] Installing HDFS...".format( h=ssh_client.get_transport().getpeername()[0])) with ssh_client.open_sftp() as sftp: sftp.put( localpath=os.path.join(SCRIPTS_DIR, 'download-package.py'), remotepath='/tmp/download-package.py') ssh_check_output( client=ssh_client, command=""" set -e python /tmp/download-package.py "{download_source}" "hadoop" for f in $(find hadoop/bin -type f -executable -not -name '*.cmd'); do sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)" done echo "export HADOOP_LIBEXEC_DIR='$(pwd)/hadoop/libexec'" >> .bashrc """.format( version=self.version, download_source=self.download_source.format(v=self.version), ))
def run_experiment( ssh_client: paramiko.client.SSHClient, experiment_name: str, duration: int, exposure_time: float = None, ) -> ExperimentStreams: """Run run_experiment (image capture program) on the cosmobot with the given name and duration Args: experiment_name: experiment name to pass to run_experiment duration: duration to pass to run_experiment Returns: ExperimentStreams object """ run_experiment_command = _generate_run_experiment_command( experiment_name, duration, exposure_time ) hostname = ssh_client.get_transport().hostname logger.info( f"Starting image capture on cosmobot {hostname}\n" f"Command: {run_experiment_command}" ) return ExperimentStreams(*ssh_client.exec_command(run_experiment_command))
def ssh_check_output( client: paramiko.client.SSHClient, command: str, timeout_seconds: int=None, ): """ Run a command via the provided SSH client and return the output captured on stdout. Raise an exception if the command returns a non-zero code. """ stdin, stdout, stderr = client.exec_command( command, get_pty=True, timeout=timeout_seconds) # NOTE: Paramiko doesn't clearly document this, but we must read() before # calling recv_exit_status(). # See: https://github.com/paramiko/paramiko/issues/448#issuecomment-159481997 stdout_output = stdout.read().decode('utf8').rstrip('\n') stderr_output = stderr.read().decode('utf8').rstrip('\n') exit_status = stdout.channel.recv_exit_status() if exit_status: # TODO: Return a custom exception that includes the return code. # See: https://docs.python.org/3/library/subprocess.html#subprocess.check_output # NOTE: We are losing the output order here since output from stdout and stderr # may be interleaved. raise SSHError( host=client.get_transport().getpeername()[0], message=stdout_output + stderr_output) return stdout_output
def install( self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): logger.info("[{h}] Installing Spark...".format( h=ssh_client.get_transport().getpeername()[0])) if self.version: with ssh_client.open_sftp() as sftp: sftp.put( localpath=os.path.join(SCRIPTS_DIR, 'download-package.py'), remotepath='/tmp/download-package.py') ssh_check_output( client=ssh_client, command=""" python /tmp/download-package.py "{download_source}" "spark" """.format( version=self.version, download_source=self.download_source.format(v=self.version), )) else: ssh_check_output( client=ssh_client, command=""" set -e sudo yum install -y git sudo yum install -y java-devel """) ssh_check_output( client=ssh_client, command=""" set -e git clone {repo} spark cd spark git reset --hard {commit} if [ -e "make-distribution.sh" ]; then ./make-distribution.sh -Phadoop-{hadoop_short_version} else ./dev/make-distribution.sh -Phadoop-{hadoop_short_version} fi """.format( repo=shlex.quote(self.git_repository), commit=shlex.quote(self.git_commit), # Hardcoding this here until we figure out a better way to handle # the supported build profiles. hadoop_short_version='2.7', )) ssh_check_output( client=ssh_client, command=""" set -e for f in $(find spark/bin -type f -executable -not -name '*.cmd'); do sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)" done echo "export SPARK_HOME='$(pwd)/spark'" >> .bashrc """)
def setup_node( *, # Change this to take host, user, and identity_file? # Add some kind of caching for SSH connections so that they # can be looked up by host and reused? ssh_client: paramiko.client.SSHClient, services: list, java_version: int, cluster: FlintrockCluster): """ Setup a new node. Cluster methods like provision_node() and add_slaves_node() should delegate the main work of setting up new nodes to this function. """ host = ssh_client.get_transport().getpeername()[0] ssh_check_output(client=ssh_client, command=""" set -e echo {private_key} > "$HOME/.ssh/id_rsa" echo {public_key} >> "$HOME/.ssh/authorized_keys" chmod 400 "$HOME/.ssh/id_rsa" """.format(private_key=shlex.quote(cluster.ssh_key_pair.private), public_key=shlex.quote(cluster.ssh_key_pair.public))) with ssh_client.open_sftp() as sftp: sftp.put(localpath=os.path.join(SCRIPTS_DIR, 'setup-ephemeral-storage.py'), remotepath='/tmp/setup-ephemeral-storage.py') logger.info("[{h}] Configuring ephemeral storage...".format(h=host)) # TODO: Print some kind of warning if storage is large, since formatting # will take several minutes (~4 minutes for 2TB). storage_dirs_raw = ssh_check_output(client=ssh_client, command=""" set -e python /tmp/setup-ephemeral-storage.py rm -f /tmp/setup-ephemeral-storage.py """) storage_dirs = json.loads(storage_dirs_raw) cluster.storage_dirs.root = storage_dirs['root'] cluster.storage_dirs.ephemeral = storage_dirs['ephemeral'] ensure_java(ssh_client, java_version) for service in services: try: service.install( ssh_client=ssh_client, cluster=cluster, ) except Exception as e: raise Exception("Failed to install {}.".format( type(service).__name__)) from e
def install( self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): # TODO: Allow users to specify the Spark "distribution". (?) distribution = 'hadoop2.6' print("[{h}] Installing Spark...".format( h=ssh_client.get_transport().getpeername()[0])) try: if self.version: with ssh_client.open_sftp() as sftp: sftp.put( localpath=os.path.join(SCRIPTS_DIR, 'install-spark.sh'), remotepath='/tmp/install-spark.sh') sftp.chmod(path='/tmp/install-spark.sh', mode=0o755) ssh_check_output( client=ssh_client, command=""" set -e /tmp/install-spark.sh {spark_version} {distribution} rm -f /tmp/install-spark.sh """.format( spark_version=shlex.quote(self.version), distribution=shlex.quote(distribution))) else: ssh_check_output( client=ssh_client, command=""" set -e sudo yum install -y git sudo yum install -y java-devel """) ssh_check_output( client=ssh_client, command=""" set -e git clone {repo} spark cd spark git reset --hard {commit} if [ -e "make-distribution.sh" ]; then ./make-distribution.sh -Phadoop-2.6 else ./dev/make-distribution.sh -Phadoop-2.6 fi """.format( repo=shlex.quote(self.git_repository), commit=shlex.quote(self.git_commit))) except Exception as e: # TODO: This should be a more specific exception. print("Error: Failed to install Spark.", file=sys.stderr) print(e, file=sys.stderr) raise
def install(self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): logger.info("[{h}] Installing Spark...".format( h=ssh_client.get_transport().getpeername()[0])) if self.version: with ssh_client.open_sftp() as sftp: sftp.put(localpath=os.path.join(SCRIPTS_DIR, 'download-package.py'), remotepath='/tmp/download-package.py') ssh_check_output(client=ssh_client, command=""" python /tmp/download-package.py "{download_source}" "spark" """.format( version=self.version, download_source=self.download_source.format( v=self.version), )) else: ssh_check_output(client=ssh_client, command=""" set -e sudo yum install -y git sudo yum install -y java-devel """) ssh_check_output( client=ssh_client, command=""" set -e git clone {repo} spark cd spark git reset --hard {commit} if [ -e "make-distribution.sh" ]; then ./make-distribution.sh -Phadoop-{hadoop_short_version} else ./dev/make-distribution.sh -Phadoop-{hadoop_short_version} fi """.format( repo=shlex.quote(self.git_repository), commit=shlex.quote(self.git_commit), # Hardcoding this here until we figure out a better way to handle # the supported build profiles. hadoop_short_version='2.7', )) ssh_check_output(client=ssh_client, command=""" set -e for f in $(find spark/bin -type f -executable -not -name '*.cmd'); do sudo ln -s "$(pwd)/$f" "/usr/local/bin/$(basename $f)" done echo "export SPARK_HOME='$(pwd)/spark'" >> .bashrc """)
def setup_node( *, # Change this to take host, user, and identity_file? # Add some kind of caching for SSH connections so that they # can be looked up by host and reused? ssh_client: paramiko.client.SSHClient, services: list, cluster: FlintrockCluster): """ Setup a new node. Cluster methods like provision_node() and add_slaves_node() should delegate the main work of setting up new nodes to this function. """ host = ssh_client.get_transport().getpeername()[0] ssh_check_output( client=ssh_client, command=""" set -e echo {private_key} > "$HOME/.ssh/id_rsa" echo {public_key} >> "$HOME/.ssh/authorized_keys" chmod 400 "$HOME/.ssh/id_rsa" """.format( private_key=shlex.quote(cluster.ssh_key_pair.private), public_key=shlex.quote(cluster.ssh_key_pair.public))) with ssh_client.open_sftp() as sftp: sftp.put( localpath=os.path.join(SCRIPTS_DIR, 'setup-ephemeral-storage.py'), remotepath='/tmp/setup-ephemeral-storage.py') logger.info("[{h}] Configuring ephemeral storage...".format(h=host)) # TODO: Print some kind of warning if storage is large, since formatting # will take several minutes (~4 minutes for 2TB). storage_dirs_raw = ssh_check_output( client=ssh_client, command=""" set -e python /tmp/setup-ephemeral-storage.py rm -f /tmp/setup-ephemeral-storage.py """) storage_dirs = json.loads(storage_dirs_raw) cluster.storage_dirs.root = storage_dirs['root'] cluster.storage_dirs.ephemeral = storage_dirs['ephemeral'] ensure_java8(ssh_client) for service in services: service.install( ssh_client=ssh_client, cluster=cluster)
def attempt_to_close_connection(ssh_client: paramiko.client.SSHClient): """Call ssh_client.close() and log exception and cosmobot hostname if it fails""" # get hostname up here in case the transport isn't available after a failed close() hostname = ssh_client.get_transport().hostname try: ssh_client.close() except Exception as e: logging.error( f"exception occured while trying to close ssh connection to cosmobot {hostname}" ) logging.exception(e)
def uploadFile(sourcePath: str, targetPath: str, sshc: paramiko.client.SSHClient, compress_method: str = None, verbose: bool = True) -> pathlib.Path: def show_progress(filename, size, sent): print(f"Uploading {filename} progress: " + f"{float(sent)/float(size)*100:.2f}%", end="\r") progress = show_progress if verbose else None try: if compress_method: fileName = pathlib.Path(sourcePath).name # change targetPath for uploading to # targetPath's directory / sourcePath's name + ext. targetPath = pathlib.Path( str(pathlib.Path(targetPath).parent / fileName) + "." + compress_method) sourcePath = archiveFile(sourcePath, verbose=verbose, method=compress_method) isArchived = True with scp.SCPClient(sshc.get_transport(), progress=progress) as scpc: # in case Path is PosixPath, casting them to str scpc.put(str(sourcePath), str(targetPath)) print("\n") # nextline if compress_method: unarchiveSSH(targetPath, sshc, method=compress_method, verbose=verbose) isUnarchived = True # change targetPath to uploaded raw file uploadedPath = str(pathlib.Path(targetPath).parent / fileName) finally: # delete archive files if 'isArchived' in locals(): with verbosity_context(f"Deleting archive {sourcePath}", verbose): os.remove(sourcePath) if 'isUnarchived' in locals(): sftp = sshc.open_sftp() with verbosity_context(f"Deleting archive {targetPath} via SCP", verbose): sftp.remove(str(targetPath)) return uploadedPath if compress_method else targetPath
def tdloadViaSSH(engine: sqlalchemy.engine.base.Engine, sshc: paramiko.client.SSHClient, tablename: str, targetPath: str, jobname: str, dbname: str = None, skipRowNum: int = 0, verbose: bool = True) -> None: targetPath = pathlib.Path(targetPath) if dbname is None: dbname = engine.url.database # always use option: QuotedData = 'Optional' options = "--DCPQuotedData 'Optional'" if skipRowNum > 0: options += f" --SourceSkipRows {skipRowNum}" tdload_command = (f"tdload -f {targetPath} -t {dbname}.{tablename}" + f" -h {engine.url.host} -u {engine.url.username}" + f" -p {engine.url.password}" + f" --TargetWorkingDatabase {dbname}" + f" {options} {jobname}") # drop error log table if exists dropIfExists(tablename + "_ET", dbname, engine) dropIfExists(tablename + "_UV", dbname, engine) # execute command via ssh stdin, stdout, stderr = sshc.exec_command(tdload_command) for line in stdout: if verbose: print(line) else: if re.match(r".*(Total Rows|successfully).*", line): print(line)
def ensure_java8(client: paramiko.client.SSHClient): host = client.get_transport().getpeername()[0] java_major_version = get_java_major_version(client) if not java_major_version or java_major_version < (1, 8): logger.info("[{h}] Installing Oracle Java 1.8...".format(h=host)) ssh_check_output(client=client, command=""" set -e cd /tmp readonly url="http://www.oracle.com" readonly jdk_download_url1="$url/technetwork/java/javase/downloads/index.html" readonly jdk_download_url2=$(curl -s $jdk_download_url1 | egrep -o "\/technetwork\/java/\javase\/downloads\/jdk8-downloads-.+?\.html" | head -1 | cut -d '"' -f 1) [[ -z "$jdk_download_url2" ]] && error "Could not get jdk download url - $jdk_download_url1" readonly jdk_download_url3="${url}${jdk_download_url2}" readonly jdk_download_url4=$(curl -s $jdk_download_url3 | egrep -o "http\:\/\/download.oracle\.com\/otn-pub\/java\/jdk\/[7-8]u[0-9]+\-(.*)+\/jdk-[7-8]u[0-9]+(.*)linux-x64.rpm") for dl_url in ${jdk_download_url4[@]}; do wget --no-cookies --no-check-certificate --header "Cookie: oraclelicense=accept-securebackup-cookie" -N $dl_url done sudo yum remove -y java-1.8.0-openjdk* sudo yum localinstall -y ./jdk-8u144-linux-x64.rpm sudo yum remove -y java-1.6.0-openjdk java-1.7.0-openjdk java-1.8.0-openjdk* sudo sh -c "echo export JAVA_HOME=/usr/java/default/jre >> /etc/environment" source /etc/environment """)
def configure_master( self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): host = ssh_client.get_transport().getpeername()[0] print("[{h}] Configuring Spark master...".format(h=host)) # TODO: Maybe move this shell script out to some separate file/folder # for the Spark service. # TODO: Add some timeout for waiting on master UI to come up. ssh_check_output( client=ssh_client, command=""" set -e spark/sbin/start-master.sh set +e master_ui_response_code=0 while [ "$master_ui_response_code" -ne 200 ]; do sleep 1 master_ui_response_code="$( curl --head --silent --output /dev/null \ --write-out "%{{http_code}}" {m}:8080 )" done set -e spark/sbin/start-slaves.sh """.format( m=shlex.quote(cluster.master_host)))
def reboot_pc(ssh_client: paramiko.client.SSHClient, password: str): ip, port = ssh_client.get_transport().getpeername() cmd = 'sudo -S -p " " reboot' ch, stdin, stdout, stderr = sudo_exec(ssh_client, cmd, password) print_stdout_blocking(stdout, "reboot_pc", ip, port) print_stderr_blocking(stderr, "reboot_pc", ip, port) return
def configure_master( self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): host = ssh_client.get_transport().getpeername()[0] print("[{h}] Configuring Spark master...".format(h=host)) # TODO: Maybe move this shell script out to some separate file/folder # for the Spark service. # TODO: Add some timeout for waiting on master UI to come up. ssh_check_output( client=ssh_client, command=""" spark/sbin/start-all.sh master_ui_response_code=0 while [ "$master_ui_response_code" -ne 200 ]; do sleep 1 master_ui_response_code="$( curl --head --silent --output /dev/null \ --write-out "%{{http_code}}" {m}:8080 )" done """.format( m=shlex.quote(cluster.master_host)))
def configure_master(self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): host = ssh_client.get_transport().getpeername()[0] logger.info("[{h}] Configuring Spark master...".format(h=host)) # This loop is a band-aid for: https://github.com/nchammas/flintrock/issues/129 attempt_limit = 3 for attempt in range(attempt_limit): try: ssh_check_output( client=ssh_client, # Maybe move this shell script out to some separate # file/folder for the Spark service. command=""" spark/sbin/start-all.sh master_ui_response_code=0 while [ "$master_ui_response_code" -ne 200 ]; do sleep 1 master_ui_response_code="$( curl --head --silent --output /dev/null \ --write-out "%{{http_code}}" {m}:8080 )" done """.format(m=shlex.quote(cluster.master_host)), timeout_seconds=90) break except socket.timeout as e: logger.debug( "Timed out waiting for Spark master to come up.{}".format( " Trying again..." if attempt < attempt_limit - 1 else "")) else: raise Exception("Timed out waiting for Spark master to come up.")
def run_one_sudo_cmd(ssh_client: paramiko.client.SSHClient, cmd: str, password: str): ip, port = ssh_client.get_transport().getpeername() cmd_s = 'sudo -S -p " " ' + cmd ch, stdin, stdout, stderr = sudo_exec(ssh_client, cmd_s, password) print_stdout_blocking(stdout, "run_one_sudo_cmd", ip, port) print_stderr_blocking(stderr, "run_one_sudo_cmd", ip, port) return
def ensure_java(client: paramiko.client.SSHClient, java_version: int): """ Ensures that Java is available on the machine and that it has a version of at least java_version. The specified version of Java will be installed if it does not exist or the existing version has a major version lower than java_version. :param client: :param java_version: minimum version of Java required :return: """ host = client.get_transport().getpeername()[0] installed_java_version = get_installed_java_version(client) if installed_java_version == java_version: logger.info( "Java {j} is already installed, skipping Java install".format( j=installed_java_version)) return if installed_java_version and installed_java_version > java_version: logger.warning(""" Existing Java {j} installation is newer than the configured version {java_version}. Your applications will be executed with Java {j}. Please choose a different AMI if this does not work for you. """.format(j=installed_java_version, java_version=java_version)) return if installed_java_version and installed_java_version < java_version: logger.info(""" Existing Java {j} will be upgraded to AdoptOpenJDK {java_version} """.format(j=installed_java_version, java_version=java_version)) # We will install AdoptOpenJDK because it gives us access to Java 8 through 15 # Right now, Amazon Extras only provides Corretto Java 8, 11 and 15 logger.info("[{h}] Installing AdoptOpenJDK Java {j}...".format( h=host, j=java_version)) install_adoptopenjdk_repo(client) java_package = "adoptopenjdk-{j}-hotspot".format(j=java_version) ssh_check_output(client=client, command=""" set -e # Install Java first to protect packages that depend on Java from being removed. sudo yum install -q -y {jp} # Remove any older versions of Java to force the default Java to the requested version. # We don't use /etc/alternatives because it does not seem to update links in /usr/lib/jvm correctly, # and we don't just rely on JAVA_HOME because some programs use java directly in the PATH. sudo yum remove -y java-1.6.0-openjdk java-1.7.0-openjdk sudo sh -c "echo export JAVA_HOME=/usr/lib/jvm/{jp} >> /etc/environment" source /etc/environment """.format(jp=java_package))
def no_sudo_exec(ssh_client: paramiko.client.SSHClient, cmd: str, block=True): ch = ssh_client.get_transport().open_session() if block: ch.get_pty() ch.exec_command(cmd) stdin = ch.makefile_stdin("wb", -1) stdout = ch.makefile("r", -1) stderr = ch.makefile_stderr("r", -1) return ch, stdin, stdout, stderr
def configure_master(self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): host = ssh_client.get_transport().getpeername()[0] print("[{h}] Configuring HDFS master...".format(h=host)) ssh_check_output(client=ssh_client, command=""" ./hadoop/bin/hdfs namenode -format -nonInteractive ./hadoop/sbin/start-dfs.sh """)
def configure_master( self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): host = ssh_client.get_transport().getpeername()[0] print("[{h}] Configuring HDFS master...".format(h=host)) ssh_check_output( client=ssh_client, command=""" ./hadoop/bin/hdfs namenode -format -nonInteractive ./hadoop/sbin/start-dfs.sh """)
def install( self, ssh_client: paramiko.client.SSHClient, cluster_info: ClusterInfo): """ Downloads and installs Spark on a given node. """ # TODO: Allow users to specify the Spark "distribution". distribution = 'hadoop1' print("[{h}] Installing Spark...".format( h=ssh_client.get_transport().getpeername()[0])) try: # TODO: Figure out how these non-template paths should work. ssh_check_output( client=ssh_client, command=""" set -e echo {f} > /tmp/install-spark.sh chmod 755 /tmp/install-spark.sh /tmp/install-spark.sh {spark_version} {distribution} """.format( f=shlex.quote( get_formatted_template( path='./install-spark.sh', mapping=vars(cluster_info))), spark_version=shlex.quote(self.version), distribution=shlex.quote(distribution))) except Exception as e: print("Could not find package for Spark {s} / {d}.".format( s=self.version, d=distribution ), file=sys.stderr) raise template_path = "./spark/conf/spark-env.sh" ssh_check_output( client=ssh_client, command=""" echo {f} > {p} """.format( f=shlex.quote( get_formatted_template( path="templates/" + template_path, mapping=vars(cluster_info))), p=shlex.quote(template_path)))
def sudo_exec(ssh_client: paramiko.client.SSHClient, cmd: str, password: str, block=True): ch = ssh_client.get_transport().open_session() if block: ch.get_pty() ch.exec_command(cmd) stdin = ch.makefile_stdin("wb", -1) stdin.write(password + '\n') stdin.flush() stdout = ch.makefile("r", -1) stderr = ch.makefile_stderr("r", -1) stdout.readline() stdout.readline() return ch, stdin, stdout, stderr
def upload_file(ssh_client: paramiko.client.SSHClient, local_path, remote_path): def print_progress(filename, size, sent, trans): print( trans, "%s\'s progress: %.2f%% \r" % (filename, float(sent) / float(size) * 100)) scp_client = scp.SCPClient(ssh_client.get_transport(), progress4=print_progress) def fnmatch_list(fn, l): for n in l: if fnmatch.fnmatch(fn, n): return True return False ex_list = [ '.svn', '.cvs', '.idea', '.DS_Store', '.git', '.hg', '.hprof', '*.pyc', 'build', '*-build-*', '__pycache__', 'events*', '*.pt' ] for path, dirs, files in os.walk(local_path, topdown=True): dirs[:] = [d for d in dirs if not fnmatch_list(d, ex_list)] for d in dirs: local_p_to_d = os.path.join(path, d) relative_p_to_d = os.path.relpath(local_p_to_d, local_path) remote_p_to_d = os.path.join(remote_path, relative_p_to_d) print('cp from: ', relative_p_to_d) print('cp to: ', remote_p_to_d, scp_client.peername) run_one_no_sudo_cmd(ssh_client, 'mkdir ' + remote_p_to_d) for filename in files: if fnmatch_list(filename, ex_list): continue local_p_to_f = os.path.join(path, filename) relative_p_to_f = os.path.relpath(local_p_to_f, local_path) remote_p_to_f = os.path.join(remote_path, relative_p_to_f) print('local file', local_p_to_f) print('remote file', remote_p_to_f) print( scp_client.put(local_p_to_f, remote_path=remote_p_to_f, preserve_times=False)) return
def configure_master( self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): host = ssh_client.get_transport().getpeername()[0] logger.info("[{h}] Configuring HDFS master...".format(h=host)) ssh_check_output( client=ssh_client, command=""" # `|| true` because on cluster restart this command will fail. ./hadoop/bin/hdfs namenode -format -nonInteractive || true """) # This loop is a band-aid for: https://github.com/nchammas/flintrock/issues/157 attempt_limit = 3 for attempt in range(attempt_limit): try: ssh_check_output( client=ssh_client, command=""" ./hadoop/sbin/stop-dfs.sh ./hadoop/sbin/start-dfs.sh master_ui_response_code=0 while [ "$master_ui_response_code" -ne 200 ]; do sleep 1 master_ui_response_code="$( curl --head --silent --output /dev/null \ --write-out "%{{http_code}}" {m}:50070 )" done """.format(m=shlex.quote(cluster.master_host)), timeout_seconds=90 ) break except socket.timeout as e: logger.debug( "Timed out waiting for HDFS master to come up.{}" .format(" Trying again..." if attempt < attempt_limit - 1 else "") ) else: raise Exception("Time out waiting for HDFS master to come up.")
def configure_master(self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): host = ssh_client.get_transport().getpeername()[0] logger.info("[{h}] Configuring HDFS master...".format(h=host)) ssh_check_output(client=ssh_client, command=""" # `|| true` because on cluster restart this command will fail. ./hadoop/bin/hdfs namenode -format -nonInteractive || true """) # This loop is a band-aid for: https://github.com/nchammas/flintrock/issues/157 attempt_limit = 3 for attempt in range(attempt_limit): try: ssh_check_output(client=ssh_client, command=""" ./hadoop/sbin/stop-dfs.sh ./hadoop/sbin/start-dfs.sh master_ui_response_code=0 while [ "$master_ui_response_code" -ne 200 ]; do sleep 1 master_ui_response_code="$( curl \ --location --head --silent \ --output /dev/null \ --write-out "%{{http_code}}" \ {m}:{p} )" done """.format(m=shlex.quote(cluster.master_private_host), p=self.name_node_ui_port), timeout_seconds=90) break except socket.timeout as e: logger.debug( "Timed out waiting for HDFS master to come up.{}".format( " Trying again..." if attempt < attempt_limit - 1 else "")) else: raise Exception("Time out waiting for HDFS master to come up.")
def ensure_java8(client: paramiko.client.SSHClient): host = client.get_transport().getpeername()[0] java_major_version = get_java_major_version(client) if not java_major_version or java_major_version < (1, 8): logger.info("[{h}] Installing Java 1.8...".format(h=host)) ssh_check_output(client=client, command=""" set -e # Install Java 1.8 first to protect packages that depend on Java from being removed. sudo yum install -y java-1.8.0-openjdk # Remove any older versions of Java to force the default Java to 1.8. # We don't use /etc/alternatives because it does not seem to update links in /usr/lib/jvm correctly, # and we don't just rely on JAVA_HOME because some programs use java directly in the PATH. sudo yum remove -y java-1.6.0-openjdk java-1.7.0-openjdk sudo sh -c "echo export JAVA_HOME=/usr/lib/jvm/jre >> /etc/environment" source /etc/environment """)
def unarchiveSSH(archivePath: pathlib.PosixPath, sshc: paramiko.client.SSHClient, unarchiveFolder: pathlib.PosixPath = None, method: str = "gz", verbose: bool = True) -> None: if unarchiveFolder is None: unarchiveFolder = archivePath.parent if method == "7z": command = f"7z e {archivePath} -o{unarchiveFolder}" elif method in ["gz", "xz", "bz2"]: d_format_option = {"gz": "z", "xz": "J", "bz2": "j"} command = (f"tar -xv{d_format_option[method]}f" + f"{archivePath} -C {unarchiveFolder}") else: raise ValueError(f"method only supports ['7z', 'gz', 'xz', 'bz2']") with verbosity_context(f"Unarchiving {archivePath}", verbose): stdin, stdout, stderr = sshc.exec_command(command) if verbose: for line in stdout: print(line)
def ensure_java8(client: paramiko.client.SSHClient): host = client.get_transport().getpeername()[0] java_major_version = get_java_major_version(client) if not java_major_version or java_major_version < (1, 8): logger.info("[{h}] Installing Java 1.8...".format(h=host)) ssh_check_output( client=client, command=""" set -e # Install Java 1.8 first to protect packages that depend on Java from being removed. sudo yum install -y java-1.8.0-openjdk # Remove any older versions of Java to force the default Java to 1.8. # We don't use /etc/alternatives because it does not seem to update links in /usr/lib/jvm correctly, # and we don't just rely on JAVA_HOME because some programs use java directly in the PATH. sudo yum remove -y java-1.6.0-openjdk java-1.7.0-openjdk sudo sh -c "echo export JAVA_HOME=/usr/lib/jvm/jre >> /etc/environment" source /etc/environment """)
def download_checkpoint(ssh_client: paramiko.client.SSHClient, sftp_client: paramiko.sftp_client.SFTPClient, run_id: str) -> None: remote_checkpoint_dir = f'/home/{user}/proj/Wave-U-Net/checkpoints/{run_id}/' _stdin, stdout, _stderr = ssh_client.exec_command( f'ls -1 -v {remote_checkpoint_dir} -I checkpoint | tail -n 1') last_checkpoint_filename = stdout.read().decode('utf8') if not last_checkpoint_filename: print('File not found') return last_checkpoint_name = os.path.splitext(last_checkpoint_filename)[0] checkpoint_files = [ last_checkpoint_name + '.index', last_checkpoint_name + '.meta', last_checkpoint_name + '.data-00000-of-00001' ] local_folder = os.path.join(output_dir, run_id) os.makedirs(local_folder, exist_ok=True) for filename in checkpoint_files: local_path = os.path.join(local_folder, filename) print(os.path.join(remote_checkpoint_dir, filename)) sftp_client.get(os.path.join(remote_checkpoint_dir, filename), local_path)
def configure_master( self, ssh_client: paramiko.client.SSHClient, cluster: FlintrockCluster): host = ssh_client.get_transport().getpeername()[0] logger.info("[{h}] Configuring Spark master...".format(h=host)) # This loop is a band-aid for: https://github.com/nchammas/flintrock/issues/129 attempt_limit = 3 for attempt in range(attempt_limit): try: ssh_check_output( client=ssh_client, # Maybe move this shell script out to some separate # file/folder for the Spark service. command=""" spark/sbin/start-all.sh master_ui_response_code=0 while [ "$master_ui_response_code" -ne 200 ]; do sleep 1 master_ui_response_code="$( curl --head --silent --output /dev/null \ --write-out "%{{http_code}}" {m}:8080 )" done """.format(m=shlex.quote(cluster.master_host)), timeout_seconds=90 ) break except socket.timeout as e: logger.debug( "Timed out waiting for Spark master to come up.{}" .format(" Trying again..." if attempt < attempt_limit - 1 else "") ) else: raise Exception("Timed out waiting for Spark master to come up.")
def run_one_no_sudo_cmd(ssh_client: paramiko.client.SSHClient, cmd: str): ip, port = ssh_client.get_transport().getpeername() ch, stdin, stdout, stderr = no_sudo_exec(ssh_client, cmd) print_stdout_blocking(stdout, "run_one_no_sudo_cmd", ip, port) print_stderr_blocking(stderr, "run_one_no_sudo_cmd", ip, port) return
def configure_master( self, ssh_client: paramiko.client.SSHClient, cluster_info: ClusterInfo): """ Configures the Spark master and starts both the master and slaves. """ host = ssh_client.get_transport().getpeername()[0] print("[{h}] Configuring Spark master...".format(h=host)) # TODO: Maybe move this shell script out to some separate file/folder # for the Spark module. ssh_check_output( client=ssh_client, command=""" set -e echo {s} > spark/conf/slaves spark/sbin/start-master.sh set +e master_ui_response_code=0 while [ "$master_ui_response_code" -ne 200 ]; do sleep 1 master_ui_response_code="$( curl --head --silent --output /dev/null \ --write-out "%{{http_code}}" {m}:8080 )" done set -e spark/sbin/start-slaves.sh """.format( s=shlex.quote('\n'.join(cluster_info.slave_hosts)), m=shlex.quote(cluster_info.master_host))) # Spark health check # TODO: Move to health_check() module method? # TODO: Research (or implement) way to get Spark to tell you when # it's ready, as opposed to checking after a time delay. time.sleep(30) spark_master_ui = 'http://{m}:8080/json/'.format(m=cluster_info.master_host) spark_ui_info = json.loads( urllib.request.urlopen(spark_master_ui).read().decode('utf-8')) print(textwrap.dedent( """\ Spark Health Report: * Master: {status} * Workers: {workers} * Cores: {cores} * Memory: {memory:.1f} GB\ """.format( status=spark_ui_info['status'], workers=len(spark_ui_info['workers']), cores=spark_ui_info['cores'], memory=spark_ui_info['memory'] / 1024)))