コード例 #1
0
    def test_fail_connect_with_retries(self):
        connerr = None  # type: Optional[ConnectionError]
        try:
            _ = spurplus.connect_with_retries(hostname="some-nonexisting-hostname.com", retries=2, retry_period=1)
        except ConnectionError as err:
            connerr = err

        self.assertIsNotNone(connerr)
コード例 #2
0
 def connect(self,
             hostname: Optional[str] = None,
             username: Optional[str] = None,
             password: Optional[str] = None,
             use_key: bool = False,
             login: bool = False,
             port: Optional[int] = None,
             timeout: int = 60,
             retries: int = 0,
             retry_delay: int = 1) -> None:
     # ADD DOCSTRING
     # TODO: deal with prompting for required fields if not provided
     if self.connected:
         raise SSHConnectionError(
             'already connected to a remote host. use `SshShell.disconnect` '
             'to disconnect from the current host before connecting to a new '
             'one, or `SshShell.reconnect` to reset the connection to the '
             'current host')
     port = port or self.port or 22
     hostname = hostname or self.hostname or input("Hostname: ")
     username = username or self.username or input("Username: "******"Password: ")
     self._shell = spurplus.connect_with_retries(
         hostname=hostname,
         username=username,
         password=password,
         look_for_private_keys=(not use_key),
         port=port,
         connect_timeout=timeout,
         retries=retries,
         retry_period=retry_delay)
     # only update attrs if connection is successful
     self.hostname = hostname
     self.username = username
     self.port = port
     self.connected = True
     if self._environ is None:
         # read environment variables
         tmp_exe = self.executable or '/bin/bash'
         if login:
             printenv_command = [tmp_exe, '-lc', 'printenv']
         else:
             printenv_command = ['printenv']
         # TODO: a more robust solution for this in case BASH_FUNC_module isn't last
         initial_env = self.shell.check_output(printenv_command).split(
             '\nBASH_FUNC_module()')[0]
         initial_env = dict(
             map(lambda x: x.split('=', maxsplit=1),
                 initial_env.splitlines()))
         self._environ = PseudoEnviron(initial_env=initial_env,
                                       custom_vars=self._env_additions)
     # initial validation of properties that depend on environment
     self.cwd = self._cwd
     self.executable = self._executable
コード例 #3
0
def remote_submit(sync_changes=False, config_path=None):
    """
    main function that handles submitting jobs on the cluster from your local
    machine

    :param sync_changes: (bool, default: False) if True, upload any local
    changes to cluster scripts before submitting jobs
    :param config_path: (str, optional, default: None) path to your config file.
    If you created your config following the instructions in
    configs/template_config.ini, you can simply leave this empty
    :return: None (other than some hopefully some results, eventually!)
    """
    if config_path is None:
        config = attempt_load_config()
    else:
        config = parse_config(config_path)

    hostname = config['hostname']
    username = config['username']
    password = config['password']
    confirm_overwrite = config['confirm_overwrite_on_upload']

    if username.startswith('f00'):
        job_cmd = 'mksub'
    else:
        job_cmd = 'qsub'

    # set commands
    if job_config['env_type'] == 'conda':
        activate_cmd = 'source activate'
        deactivate_cmd = 'conda deactivate'
    else:
        # TODO: add commands for venv & virtualenv activation
        raise ValueError("Only conda environments are currently supported")

    with connect_with_retries(hostname=hostname,
                              username=username,
                              password=password) as cluster:
        if sync_changes:
            # upload cluster scripts to remote
            script_dir = opj(dirname(realpath(__file__)), 'cluster_scripts')
            upload_scripts(cluster, script_dir, job_config, confirm_overwrite)

        # create bash script to submit and run submit.py from compute node
        submitter_filepath = write_remote_submitter(cluster, job_config,
                                                    activate_cmd,
                                                    deactivate_cmd)

        # format commands for remote shell
        submitter_cmds = [job_cmd, submitter_filepath]
        remote_command = fmt_remote_commands(submitter_cmds)
        # run the submitter script
        cluster.run(remote_command)
コード例 #4
0
ファイル: zctl.py プロジェクト: jsmoyam/zserver
 def destination_exists(self, destination: str, ssh_connection: str=None) -> bool:
     # Check if exist file in local or in remote server
     if ssh_connection:
         # ssh_connection --> ssh://username:password@server:port
         url = urllib.parse.urlparse(ssh_connection)
         with spurplus.connect_with_retries(
                 retries=5,
                 hostname=url.hostname,
                 username=url.username,
                 password=url.password if url.password else None,
                 port=url.port if url.port else None
         ) as shell:
             return shell.exists(destination)
     else:
         return os.path.isdir(destination)
コード例 #5
0
def get_shell(production=False) -> SshShell:

    global sh
    if sh is not None:
        return sh

    if production:
        hostname = "safeisland.hesusruiz.org"
        print("=== Operating in PRODUCTION!!")
    else:
        hostname = "safeislandtest.hesusruiz.org"
        print("=== Operating in DEVELOPMENT!!")

    sh = spurplus.connect_with_retries(
        hostname=hostname,
        username='******',
        private_key_file='../telsiusin2/awsnode/AWSAlastriaIN2.pem',
        retries=5,
        connect_timeout=5,
    )
    return sh
コード例 #6
0
def set_up_test_shell() -> spurplus.SshShell:
    """sets up a shell to the testing instance."""
    params = params_from_environ()

    try:
        shell = spurplus.connect_with_retries(
            hostname=params.hostname,
            port=params.port,
            username=params.username,
            password=params.password,
            private_key_file=params.private_key_file,
            missing_host_key=spur.ssh.MissingHostKey.accept,
            retries=2,
            retry_period=1)
    except ConnectionError as err:
        raise ConnectionError(
            "Failed to connect to {}@{}:{}, private key file: {}, password is not None: {}"
            .format(params.username, params.hostname, params.port,
                    params.private_key_file, params.password
                    is not None)) from err

    return shell
コード例 #7
0
ファイル: zctl.py プロジェクト: jsmoyam/zserver
    def get_variables_from_startup_script(self, ssh_connection: str) -> dict:
        # Get value of remote variable reading startup script directly

        # ssh_connection --> ssh://username:password@server:port
        url = urllib.parse.urlparse(ssh_connection)
        with spurplus.connect_with_retries(
                retries=5,
                hostname=url.hostname,
                username=url.username,
                password=url.password if url.password else None,
                port=url.port if url.port else None
        ) as shell:
            shell.run(['ls'])
            script = os.sep + 'home' + os.sep + url.username + os.sep + STARTUP_SCRIPT_NAME
            content = shell.read_text(script)
            lines = content.split('\n')

            variables = dict()
            for line in lines:
                if line.startswith('zset'):
                    line_as_list = line.split()
                    variables[line_as_list[1]] = line_as_list[2]

            return variables
コード例 #8
0
def resubmit_failed(confirm_resubmission=False, config_path=None):
    # TODO: add docstring

    if config_path is None:
        config = attempt_load_config()
    else:
        config = parse_config(config_path)

    hostname = config['hostname']
    username = config['username']
    password = config['password']
    confirm = config['confirm_resubmission']

    workingdir = job_config['workingdir']
    scriptdir = job_config['scriptdir']
    job_name = job_config['jobname']

    # set confirmation option from config if not set here
    if confirm and not confirm_resubmission:
        confirm_resubmission = True

    # set submission command
    if username.startswith('f00'):
        job_cmd = 'mksub'
    else:
        job_cmd = 'qsub'

    with connect_with_retries(hostname=hostname,
                              username=username,
                              password=password) as cluster:
        cluster_sftp = cluster.as_sftp()

        # get all created bash scripts
        all_scripts = cluster_sftp.listdir(scriptdir)
        print(f"found {len(all_scripts)} job scripts")

        stdout_files = [
            f for f in cluster_sftp.listdir(workingdir)
            if f.startswith(f'{job_name}.o')
        ]
        print(f"found {len(stdout_files)} job stdout files")

        # get output of qstat command
        running_jobs = [
            line for line in get_qstat(cluster)
            if len(line) > 0 and line[0].isnumeric()
        ]
        # filter out completed jobs, isolate jobid
        running_jobids = [
            line.split('.')[0] for line in running_jobs
            if line.split()[-2] != 'C'
        ]
        print(f"found {len(running_jobids)} running jobs")

        print("parsing stdout files...")

        successful_jobs = {}
        for outfile in stdout_files:
            jobid = outfile.split('.o')[1]

            # read stdout file
            stdout_path = opj(workingdir, outfile)
            stdout = cluster.read_text(stdout_path)
            try:
                job_script = stdout.split('script name: ')[1].splitlines()[0]
                # track successfully finished jobs
                if 'job script finished' in stdout:
                    successful_jobs[job_script] = jobid
            except (IndexError, ValueError):
                print(f"failed to find corresponding script for {outfile}...")
                continue

        to_resubmit = [
            s for s in all_scripts if s not in list(successful_jobs.keys())
        ]

        if confirm_resubmission:
            view_scripts = prompt_input("View jobs to be resubmitted before \
                                        proceeding?")
            if view_scripts:
                print('\n'.join(to_resubmit))
                resubmit_confirmed = prompt_input("Do you want to resubmit \
                                                    these jobs?")
                if not resubmit_confirmed:
                    sys.exit()

        print("Removing failed jobs' stdout/stderr files...")
        for outfile in stdout_files:
            jobid = outfile.split('.o')[1]
            if not (jobid in successful_jobs.values()
                    or jobid in running_jobids):
                stdout_path = opj(workingdir, outfile)
                stderr_path = opj(workingdir, f'{job_name}.e{jobid}')
                cluster.remove(stdout_path)
                cluster.remove(stderr_path)

        print(f"resubmitting {len(to_resubmit)} jobs")
        for job in to_resubmit:
            script_path = opj(scriptdir, job)
            print(f"resubmitting {job}")
            cmd = fmt_remote_commands([f'{job_cmd} {script_path}'])
            cluster.run(cmd)
コード例 #9
0
                if not overwrite_confirmed:
                    print(f"skipping {file} (overwrite declined)")
                    continue

        remote_shell.put(src_path, dest_path, create_directories=False)
        print(f"uploaded {file}")
    print("finished uploading scripts")


# setup for running as a stand-alone script
if __name__ == '__main__':
    config = attempt_load_config()
    hostname = config['hostname']
    username = config['username']
    password = config['password']
    confirm_overwrite = config['confirm_overwrite_on_upload']

    script_dir = opj(dirname(realpath(__file__)), 'cluster_scripts')

    with connect_with_retries(
        hostname=hostname,
        username=username,
        password=password
    ) as cluster:
        upload_scripts(
            cluster,
            script_dir,
            job_config,
            confirm_overwrite=confirm_overwrite
        )
コード例 #10
0
ファイル: zctl.py プロジェクト: jsmoyam/zserver
    def do_zremoteupgrade(self, args):
        """Upgrade Z server in remote server.
     Usage:
         zremoteupgrade sshconnection
         zremoteupgrade sshconnection version_name
         sshconnection --> ssh://username:password@server:port
         """

        arglist = args.split()

        # Expect 0 or 1 argument
        if len(arglist) < 1 or len(arglist) > 2:
            self.perror('zremoteupgrade requires 1 or 2 argument:', traceback_war=False)
            self.do_help('zremoteupgrade')
            self._last_result = cmd2.CommandResult('', 'Bad arguments')
            return

        # Recover sshconnection
        ssh_connection = arglist[0]

        # Recover install path. git url and version name
        remote_variables = self.get_variables_from_startup_script(ssh_connection)
        install_path = remote_variables.get(VARIABLE_INSTALL_PATH, None)
        older_version_name = remote_variables.get(VARIABLE_VERSION_NAME, None)
        giturl = remote_variables.get(VARIABLE_GIT_URL, None)

        if len(arglist) == 2:
            version_name_arg = arglist[1]
        else:
            version_name_arg = self.get_last_tag(giturl)

        # Convert version_name is dev is selected
        version_name = version_name_arg
        if version_name_arg.lower() == DEVELOPER_BRANCH[0]:
            version_name = DEVELOPER_BRANCH[1]

        if install_path and older_version_name and giturl:

            # Do some actions in remote server: stop, backup, delete and install

            # ssh_connection --> ssh://username:password@server:port
            url = urllib.parse.urlparse(ssh_connection)
            with spurplus.connect_with_retries(
                    retries=5,
                    hostname=url.hostname,
                    username=url.username,
                    password=url.password if url.password else None,
                    port=url.port if url.port else None
            ) as shell:

                # Stop server
                zctl_path = install_path + os.sep + ZCTL_NAME
                command = [zctl_path] + ['zstop'] + ['quit']
                shell.run(command)

                # Compress actual deployment for backup generating compressed file to backup folder
                backup_folder = install_path + os.sep + '..' + os.sep + 'backup' + os.sep
                filename = '{}-{}.tar.gz'.format(self.get_timestamp(), older_version_name)
                compressed_file = backup_folder + filename

                # Execute targz command
                command = [zctl_path] + ['targz {} {}'.format(install_path, compressed_file)] + ['quit']
                shell.run(command)

                # Delete installation
                command = ['rm'] + ['-rf'] + [install_path]
                shell.run(command)

                # Install the new version
                self.poutput('Upgrading Z server')
                self.do_zremoteinstall(install_path + ' ' + giturl + ' ' + version_name_arg + ' ' + ssh_connection)

        else:
            self.poutput('No installation folder found')
コード例 #11
0
ファイル: zctl.py プロジェクト: jsmoyam/zserver
    def do_zremoteinstall(self, args):
        """Install Z server in remote server.
    Usage:
        zremoteinstall destination giturl version_name sshconnection
        sshconnection --> ssh://username:password@server:port
        """

        arglist = args.split()

        # Expect 3 argument
        if not arglist or len(arglist) != 4:
            self.perror('zremoteinstall requires exactly 4 argument:', traceback_war=False)
            self.do_help('zremoteinstall')
            self._last_result = cmd2.CommandResult('', 'Bad arguments')
            return

        # Recover arguments
        destination = arglist[0]
        giturl_raw = arglist[1]
        version_name_arg = arglist[2]
        ssh_connection = arglist[3]

        # If exists, convert username and password to url encoded to build git url
        giturl = self.get_git_url_encoded(giturl_raw)

        # Check if version_name exists
        if not self.version_exists(giturl_raw, version_name_arg):
            self.perror('Version does not exist. Please check it.', traceback_war=False)
            return

        # If version name is "dev", it is neccesary to convert to the correct branch
        version_name = self.convert_version_name(version_name_arg)

        # Check if destination folder exists
        if self.destination_exists(destination, ssh_connection):
            self.perror('Destination ifolder exists. Please check it and delete it', traceback_war=False)
            return

        # Define and delete if exists destination_tmp and venv_dir_tmp
        destination_tmp = tempfile.gettempdir() + os.sep + 'install'
        venv_dir_tmp = os.path.abspath(destination_tmp + os.sep + '..' + os.sep + 'venv')

        if self.destination_exists(destination_tmp):
            shutil.rmtree(destination_tmp)
        if self.destination_exists(venv_dir_tmp):
            shutil.rmtree(venv_dir_tmp)

        # Clone repository to temporal destination folder
        self.poutput('Installing Z server')
        self.clone_from_repository(giturl, version_name, destination_tmp)

        # Create virtual environment and install requirements
        bin_dir_tmp = os.path.abspath(venv_dir_tmp + os.sep + 'bin')
        os.system('python3 -m venv ' + venv_dir_tmp)
        os.system(bin_dir_tmp + os.sep + 'pip install --no-cache-dir -r ' + destination_tmp + os.sep +
                  'requirements.txt')

        # Define venv_dir. Define python binary folder
        venv_dir = os.path.abspath(destination + os.sep + '..' + os.sep + 'venv')
        bin_dir = os.path.abspath(venv_dir + os.sep + 'bin')

        # Replace all ocurrences of venv_dir_tmp for the new venv folder in destination
        self.find_replace(bin_dir_tmp, venv_dir_tmp, venv_dir)

        # Replace shebang (#!) for venv python in destination_tmp files
        old_shebang = '#!/usr/bin/python3'
        new_shebang = '#!{}'.format(bin_dir) + os.sep + 'python'
        zctl_path_tmp = destination_tmp + os.sep + ZCTL_NAME
        self.find_replace(zctl_path_tmp, old_shebang, new_shebang, first_occurrence=True)

        # Check operations in remote server
        url = urllib.parse.urlparse(ssh_connection)
        with spurplus.connect_with_retries(
                retries=5,
                hostname=url.hostname,
                username=url.username,
                password=url.password if url.password else None,
                port=url.port if url.port else None
        ) as shell:

            # Delete virtual environment if exist
            if shell.exists(venv_dir):
                shell.remove(venv_dir, recursive=True)

            # Copy temporal virtual environment and temporal installation folder to remote server
            shell.mkdir(remote_path=destination, parents=True, exist_ok=True)
            shell.mkdir(remote_path=venv_dir, parents=True, exist_ok=True)

            self.poutput('Copying Z server')
            shell.sync_to_remote(
                local_path=destination_tmp,
                remote_path=destination,
                delete=spurplus.Delete.BEFORE,
                preserve_permissions=True
            )

            self.poutput('Copying virtual environment')
            shell.sync_to_remote(
                local_path=venv_dir_tmp,
                remote_path=venv_dir,
                delete=spurplus.Delete.BEFORE,
                preserve_permissions=True
            )

            # Set install path and version name variables and store them
            # Execute these commands in remote server
            zctl_path = destination + os.sep + ZCTL_NAME
            command_1 = 'zset {} {} store'.format(VARIABLE_INSTALL_PATH, destination)
            command_2 = 'zset {} {} store'.format(VARIABLE_VERSION_NAME, version_name_arg)
            command_3 = 'zset {} {} store'.format(VARIABLE_GIT_URL, giturl_raw)
            command_4 = 'zset {} {} store'.format(VARIABLE_VENV_BIN_PATH, bin_dir)
            command_5 = 'quit'
            zcommand = '{} {} {} {} {}'.format(command_1, command_2, command_3, command_4, command_5)

            chmod_command = ['chmod'] + ['+x'] + [zctl_path]
            command = [zctl_path] + [command_1] + [command_2] + [command_3] + [command_4] + [command_5]
            shell.run(chmod_command)
            shell.run(command)

        self.poutput('Z server installed')
コード例 #12
0
def main() -> int:
    """Execute the main routine."""
    ##
    # Specify command-line arguments
    ##

    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--hostname",
                        help="Host name of the remote instance",
                        required=True)

    parser.add_argument("--user",
                        help="Remote user used for the deployment",
                        default="devop")

    group = parser.add_mutually_exclusive_group()
    group.add_argument("--data_dir",
                       help="Root directory where data sets reside; "
                       "if not specified, the data is not synced")

    group.add_argument(
        "--data_url",
        help="URL where data sets reside; "
        "if not specified, the data is not downloaded on the remote host")

    parser.add_argument(
        "--dont_install_requirements",
        help="If set, requirements are not installed on the remote machine.",
        action='store_true')

    ##
    # Parse command-line arguments
    ##

    args = parser.parse_args()

    hostname = str(args.hostname)
    remote_user = str(args.user)

    data_dir = None if args.data_dir is None else pathlib.Path(args.data_dir)
    data_url = None if args.data_url is None else str(args.data_url)

    assert ((data_dir is None and data_url is None)
            or ((data_dir is None) ^ (data_url is None)))

    if data_dir is not None and not data_dir.exists():
        raise FileNotFoundError(
            "Data directory does not exist: {}".format(data_dir))

    dont_install_requirements = bool(args.dont_install_requirements)

    ##
    # Connect
    ##

    print("Connecting to the remote instance at {}@{} ...".format(
        remote_user, hostname))

    with spurplus.connect_with_retries(
            hostname=hostname,
            username=remote_user,
            missing_host_key=spur.ssh.MissingHostKey.warn) as ssh:
        ##
        # Specify common paths
        ##

        remote_home_dir = pathlib.Path(
            ssh.check_output(['/bin/bash', '-c', 'echo $HOME']).strip())

        remote_mediti_dir = remote_home_dir / "mediti-train"

        script_dir = pathlib.Path(os.path.realpath(__file__)).parent

        ##
        # Install requirements
        ##

        if not dont_install_requirements:
            print("Installing the requirements...")

            ssh.put(local_path=script_dir / "requirements-gpu.txt",
                    remote_path=remote_mediti_dir / "requirements-gpu.txt")

            install_pth = remote_mediti_dir / 'install.sh'
            ssh.write_text(remote_path=install_pth,
                           text=textwrap.dedent('''#!/bin/bash
                    set -e
                    echo "sudo apt-get install'ing ..."
                    sudo apt-get install -y python3-venv wget unzip
                    
                    echo "Creating the virtual environment ..."
                    python3 -m venv venv
                    source venv/bin/activate
                    
                    echo "Installing the python requirements ..."
                    pip3 install -r requirements-gpu.txt
                    '''))

            ssh.chmod(remote_path=install_pth, mode=0o700)

            ssh.run(command=[install_pth.as_posix()], cwd=remote_mediti_dir)

        ##
        # Sync the data
        ##

        if data_dir is not None:
            print("Syncing the data...")

            assert data_dir is not None
            remote_data_dir = remote_mediti_dir / "data"
            ssh.mkdir(remote_path=remote_data_dir, exist_ok=True)
            ssh.sync_to_remote(local_path=data_dir,
                               remote_path=remote_data_dir,
                               delete=spurplus.Delete.BEFORE)

        if data_url is not None:
            remote_data_dir = remote_mediti_dir / "data"
            remote_limbo_dir = remote_mediti_dir / "data-limbo.{}".format(
                uuid.uuid4())

            def remove_limbo_dir() -> None:
                """Delete the temporary data limbo directory."""
                if ssh.exists(remote_limbo_dir):
                    print("Removing the limbo directory: {}".format(
                        remote_limbo_dir))
                    assert remote_limbo_dir != pathlib.Path()
                    assert remote_mediti_dir in remote_limbo_dir.parents
                    assert remote_limbo_dir != remote_mediti_dir
                    ssh.run(['rm', '-rf', remote_limbo_dir.as_posix()])

            with contextlib.ExitStack() as exit_stack:
                exit_stack.callback(remove_limbo_dir)

                ssh.mkdir(remote_limbo_dir, exist_ok=True, parents=True)
                ssh.mkdir(remote_data_dir, exist_ok=True, parents=True)

                print("Downloading the data to the remote limbo: {}".format(
                    remote_limbo_dir))

                # yapf: disable
                ssh.run([
                    'wget', data_url,
                    '-o', (remote_limbo_dir / "data.zip").as_posix()])
                # yapf: enable

                # yapf: disable
                ssh.run([
                    'unzip', (remote_limbo_dir / "data.zip").as_posix(),
                    '-d', (remote_data_dir).as_posix()
                ])
                # yapf: enable

        ##
        # Sync the code
        ##

        print("Syncing the code...")

        rel_pths = [
            pathlib.Path("fine_tune.py"),
            pathlib.Path("evaluate.py"),
            pathlib.Path("file_iterator.py"),
            pathlib.Path("specsmod.py"),
        ]

        remote_src_pth = remote_mediti_dir / "src"

        for rel_pth in rel_pths:
            ssh.put(local_path=script_dir / rel_pth,
                    remote_path=remote_src_pth / rel_pth)

        for rel_pth in [
                pathlib.Path("fine_tune.py"),
                pathlib.Path("evaluate.py")
        ]:
            ssh.chmod(remote_path=remote_src_pth / rel_pth, mode=0o700)

        ##
        # Goodbye
        ##

        print("The deployment has finished.")
        print("Execute manually to train and evaluate on: {}@{}".format(
            remote_user, hostname))

    return 0