def Run(self, args): """See ssh_utils.BaseSSHCLICommand.Run.""" on_prem = ( args.IsKnownAndSpecified('network') and args.IsKnownAndSpecified('region')) if on_prem: args.plain = True # These two lines are needed to ensure reauth is performed as needed, even # for on-prem, which doesn't use the resulting variables. holder = base_classes.ComputeApiHolder(self.ReleaseTrack()) client = holder.client ssh_helper = ssh_utils.BaseSSHCLIHelper() ssh_helper.Run(args) oslogin_state = ssh.OsloginState() if on_prem: user, ip = ssh_utils.GetUserAndInstance(args.user_host) remote = ssh.Remote(ip, user) iap_tunnel_args = iap_tunnel.CreateOnPremSshTunnelArgs( args, self.ReleaseTrack(), ip) instance_address = ip internal_address = ip else: user, instance_name = ssh_utils.GetUserAndInstance(args.user_host) instance_ref = instance_flags.SSH_INSTANCE_RESOLVER.ResolveResources( [instance_name], compute_scope.ScopeEnum.ZONE, args.zone, holder.resources, scope_lister=instance_flags.GetInstanceZoneScopeLister(client))[0] instance = ssh_helper.GetInstance(client, instance_ref) project = ssh_helper.GetProject(client, instance_ref.project) host_keys = ssh_helper.GetHostKeysFromGuestAttributes( client, instance_ref, instance, project) iap_tunnel_args = iap_tunnel.CreateSshTunnelArgs( args, self.ReleaseTrack(), instance_ref, ssh_utils.GetExternalInterface(instance, no_raise=True)) internal_address = ssh_utils.GetInternalIPAddress(instance) if args.troubleshoot: log.status.Print(TROUBLESHOOT_HEADER.format( instance_ref, args.zone or instance_ref.zone, datetime.datetime.now() )) RunTroubleshooting(project, args.zone or instance_ref.zone, instance, iap_tunnel_args) return if not host_keys and host_keys is not None: log.debug('Unable to retrieve host keys from instance metadata. ' 'Continuing.') expiration, expiration_micros = ssh_utils.GetSSHKeyExpirationFromArgs( args) if args.plain: oslogin_state.oslogin_enabled = False else: public_key = ssh_helper.keys.GetPublicKey().ToEntry( include_comment=True) # If there is an '@' symbol in the user_host arg, the user is requesting # to connect as a specific user. This may get overridden by OS Login. username_requested = '@' in args.user_host oslogin_state = ssh.GetOsloginState( instance, project, user, public_key, expiration_micros, self.ReleaseTrack(), username_requested=username_requested) user = oslogin_state.user log.debug(oslogin_state) if iap_tunnel_args: # IAP Tunnel only uses instance_address for the purpose of --ssh-flag # substitution. In this case, dest_addr doesn't do much, it just matches # against entries in the user's ssh_config file. It's best to use # something unique to avoid false positive matches, thus we use # HostKeyAlias. instance_address = internal_address dest_addr = ssh_utils.HostKeyAlias(instance) elif args.internal_ip: instance_address = internal_address dest_addr = instance_address else: instance_address = ssh_utils.GetExternalIPAddress(instance) dest_addr = instance_address remote = ssh.Remote(dest_addr, user) # identity_file_list will be None if security keys are not enabled. identity_file_list = ssh.WriteSecurityKeys(oslogin_state) identity_file = None options = None if not args.plain: if not identity_file_list: identity_file = ssh_helper.keys.key_file options = ssh_helper.GetConfig(ssh_utils.HostKeyAlias(instance), args.strict_host_key_checking, host_keys_to_add=host_keys) extra_flags = ssh.ParseAndSubstituteSSHFlags(args, remote, instance_address, internal_address) remainder = [] if args.ssh_args: remainder.extend(args.ssh_args) # Transform args.command into arg list or None if no command command_list = args.command.split(' ') if args.command else None tty = containers.GetTty(args.container, command_list) remote_command = containers.GetRemoteCommand(args.container, command_list) # Do not include default port since that will prevent users from # specifying a custom port (b/121998342). ssh_cmd_args = {'remote': remote, 'identity_file': identity_file, 'options': options, 'extra_flags': extra_flags, 'remote_command': remote_command, 'tty': tty, 'iap_tunnel_args': iap_tunnel_args, 'remainder': remainder, 'identity_list': identity_file_list} cmd = ssh.SSHCommand(**ssh_cmd_args) if args.dry_run: # Add quotes around any arguments that contain spaces. log.out.Print(' '.join('"{0}"'.format(arg) if ' ' in arg else arg for arg in cmd.Build(ssh_helper.env))) return # Raise errors if instance requires a security key but the local # envionment doesn't support them. This is after the 'dry-run' because # we want to allow printing the command regardless. if self.enable_security_keys: ssh_utils.ConfirmSecurityKeyStatus(oslogin_state) if args.plain or oslogin_state.oslogin_enabled: keys_newly_added = False else: keys_newly_added = ssh_helper.EnsureSSHKeyExists( client, remote.user, instance, project, expiration=expiration) if keys_newly_added: poller = ssh_utils.CreateSSHPoller(remote, identity_file, options, iap_tunnel_args, extra_flags=extra_flags) log.status.Print('Waiting for SSH key to propagate.') # TODO(b/35355795): Don't force_connect try: poller.Poll( ssh_helper.env, force_connect=properties.VALUES.ssh.putty_force_connect.GetBool()) except retry.WaitException: raise ssh_utils.NetworkError() if args.internal_ip and not on_prem: ssh_helper.PreliminarilyVerifyInstance(instance.id, remote, identity_file, options) # Errors from SSH itself result in an ssh.CommandError being raised try: return_code = cmd.Run( ssh_helper.env, force_connect=properties.VALUES.ssh.putty_force_connect.GetBool()) except ssh.CommandError as e: if not on_prem: log.status.Print(self.createRecommendMessage(args, instance_name, instance_ref, project)) raise e if return_code: # This is the return code of the remote command. Problems with SSH itself # will result in ssh.CommandError being raised above. sys.exit(return_code)
def Run(self, args): """See ssh_utils.BaseSSHCLICommand.Run.""" holder = base_classes.ComputeApiHolder(self.ReleaseTrack()) client = holder.client ssh_helper = ssh_utils.BaseSSHCLIHelper() ssh_helper.Run(args) user, instance_name = ssh_utils.GetUserAndInstance(args.user_host) instance_ref = instance_flags.SSH_INSTANCE_RESOLVER.ResolveResources( [instance_name], compute_scope.ScopeEnum.ZONE, args.zone, holder.resources, scope_lister=instance_flags.GetInstanceZoneScopeLister(client))[0] instance = ssh_helper.GetInstance(client, instance_ref) project = ssh_helper.GetProject(client, instance_ref.project) if self.get_host_keys: host_keys = ssh_helper.GetHostKeysFromGuestAttributes( client, instance_ref) if not host_keys: log.warning('Unable to retrieve host keys from instance metadata. ' 'Continuing.') else: host_keys = {} expiration, expiration_micros = ssh_utils.GetSSHKeyExpirationFromArgs(args) if args.plain: use_oslogin = False else: public_key = ssh_helper.keys.GetPublicKey().ToEntry(include_comment=True) user, use_oslogin = ssh.CheckForOsloginAndGetUser( instance, project, user, public_key, expiration_micros, self.ReleaseTrack()) iap_tunnel_args = iap_tunnel.SshTunnelArgs.FromArgs( args, self.ReleaseTrack(), instance_ref, ssh_utils.GetExternalInterface(instance, no_raise=True)) internal_address = ssh_utils.GetInternalIPAddress(instance) if iap_tunnel_args: # IAP Tunnel only uses instance_address for the purpose of --ssh-flag # substitution. In this case, dest_addr doesn't do much, it just matches # against entries in the user's ssh_config file. It's best to use # something unique to avoid false positive matches, thus we use # HostKeyAlias. instance_address = internal_address dest_addr = ssh_utils.HostKeyAlias(instance) elif args.internal_ip: instance_address = internal_address dest_addr = instance_address else: instance_address = ssh_utils.GetExternalIPAddress(instance) dest_addr = instance_address remote = ssh.Remote(dest_addr, user) identity_file = None options = None if not args.plain: identity_file = ssh_helper.keys.key_file options = ssh_helper.GetConfig(ssh_utils.HostKeyAlias(instance), args.strict_host_key_checking, host_keys_to_add=host_keys) extra_flags = ssh.ParseAndSubstituteSSHFlags(args, remote, instance_address, internal_address) remainder = [] if args.ssh_args: remainder.extend(args.ssh_args) # Transform args.command into arg list or None if no command command_list = args.command.split(' ') if args.command else None tty = containers.GetTty(args.container, command_list) remote_command = containers.GetRemoteCommand(args.container, command_list) # Do not include default port since that will prevent users from # specifying a custom port (b/121998342). ssh_cmd_args = {'remote': remote, 'identity_file': identity_file, 'options': options, 'extra_flags': extra_flags, 'remote_command': remote_command, 'tty': tty, 'iap_tunnel_args': iap_tunnel_args, 'remainder': remainder} cmd = ssh.SSHCommand(**ssh_cmd_args) if args.dry_run: log.out.Print(' '.join(cmd.Build(ssh_helper.env))) return if args.plain or use_oslogin: keys_newly_added = False else: keys_newly_added = ssh_helper.EnsureSSHKeyExists( client, remote.user, instance, project, expiration=expiration) if keys_newly_added: poller = ssh_utils.CreateSSHPoller(remote, identity_file, options, iap_tunnel_args, extra_flags=extra_flags) log.status.Print('Waiting for SSH key to propagate.') # TODO(b/35355795): Don't force_connect try: poller.Poll(ssh_helper.env, force_connect=True) except retry.WaitException: raise ssh_utils.NetworkError() if args.internal_ip: ssh_helper.PreliminarilyVerifyInstance(instance.id, remote, identity_file, options) # Errors from SSH itself result in an ssh.CommandError being raised return_code = cmd.Run(ssh_helper.env, force_connect=True) if return_code: # This is the return code of the remote command. Problems with SSH itself # will result in ssh.CommandError being raised above. sys.exit(return_code)
def Run(self, args): """See ssh_utils.BaseSSHCommand.Run.""" holder = base_classes.ComputeApiHolder(self.ReleaseTrack()) client = holder.client ssh_helper = ssh_utils.BaseSSHHelper() ssh_helper.Run(args) ssh_helper.keys.EnsureKeysExist(args.force_key_file_overwrite, allow_passphrase=True) remote = ssh.Remote.FromArg(args.user_host) if not remote: raise ssh_utils.ArgumentError( 'Expected argument of the form [USER@]INSTANCE. Received [{0}].' .format(args.user_host)) if not remote.user: remote.user = ssh.GetDefaultSshUsername() public_key = ssh_helper.keys.GetPublicKey().ToEntry(include_comment=True) hostname = '[{0}]:{1}'.format(args.serial_port_gateway, CONNECTION_PORT) # Update google_compute_known_hosts file with published host key if args.serial_port_gateway == SERIAL_PORT_GATEWAY: http_client = http.Http() http_response = http_client.request(HOST_KEY_URL) known_hosts = ssh.KnownHosts.FromDefaultFile() if http_response[0]['status'] == '200': host_key = http_response[1].strip() known_hosts.Add(hostname, host_key, overwrite=True) known_hosts.Write() elif known_hosts.ContainsAlias(hostname): log.warning( 'Unable to download and update Host Key for [{0}] from [{1}]. ' 'Attempting to connect using existing Host Key in [{2}]. If ' 'the connection fails, please try again to update the Host ' 'Key.'.format(SERIAL_PORT_GATEWAY, HOST_KEY_URL, known_hosts.file_path)) else: known_hosts.Add(hostname, DEFAULT_HOST_KEY) known_hosts.Write() log.warning( 'Unable to download Host Key for [{0}] from [{1}]. To ensure ' 'the security of the SSH connection, gcloud will attempt to ' 'connect using a hard-coded Host Key value. If the connection ' 'fails, please try again. If the problem persists, try ' 'updating gcloud and connecting again.' .format(SERIAL_PORT_GATEWAY, HOST_KEY_URL)) instance_ref = instance_flags.SSH_INSTANCE_RESOLVER.ResolveResources( [remote.host], compute_scope.ScopeEnum.ZONE, args.zone, holder.resources, scope_lister=instance_flags.GetInstanceZoneScopeLister(client))[0] instance = ssh_helper.GetInstance(client, instance_ref) project = ssh_helper.GetProject(client, instance_ref.project) expiration, expiration_micros = ssh_utils.GetSSHKeyExpirationFromArgs(args) remote.user, use_os_login = ssh.CheckForOsloginAndGetUser( instance, project, remote.user, public_key, expiration_micros, self.ReleaseTrack()) # Determine the serial user, host tuple (remote) port = 'port={0}'.format(args.port) constructed_username_list = [instance_ref.project, instance_ref.zone, instance_ref.Name(), remote.user, port] if args.extra_args: for k, v in args.extra_args.items(): constructed_username_list.append('{0}={1}'.format(k, v)) serial_user = '******'.join(constructed_username_list) serial_remote = ssh.Remote(args.serial_port_gateway, user=serial_user) identity_file = ssh_helper.keys.key_file options = ssh_helper.GetConfig(hostname, strict_host_key_checking='yes') del options['HostKeyAlias'] options['ControlPath'] = 'none' cmd = ssh.SSHCommand(serial_remote, identity_file=identity_file, port=CONNECTION_PORT, options=options) if args.dry_run: log.out.Print(' '.join(cmd.Build(ssh_helper.env))) return if not use_os_login: ssh_helper.EnsureSSHKeyExists( client, remote.user, instance, project, expiration) # Don't wait for the instance to become SSHable. We are not connecting to # the instance itself through SSH, so the instance doesn't need to have # fully booted to connect to the serial port. Also, ignore exit code 255, # since the normal way to terminate the serial port connection is ~. and # that causes ssh to exit with 255. try: return_code = cmd.Run(ssh_helper.env, force_connect=True) except ssh.CommandError: return_code = 255 if return_code: sys.exit(return_code)
def Run(self, args): dst = ssh.FileReference.FromPath(args.destination) srcs = [ssh.FileReference.FromPath(src) for src in args.sources] ssh.SCPCommand.Verify(srcs, dst, single_remote=True) if dst.remote: tpu_name = dst.remote.host else: tpu_name = srcs[0].remote.host # If zone is not set, retrieve the one from the config. if args.zone is None: args.zone = properties.VALUES.compute.zone.Get(required=True) # Retrieve the node. tpu = tpu_utils.TPUNode(self.ReleaseTrack()) node = tpu.Get(tpu_name, args.zone) if not tpu_utils.IsTPUVMNode(node): raise exceptions.BadArgumentException( 'TPU', 'this command is only available for Cloud TPU VM nodes. To access ' 'this node, please see ' 'https://cloud.google.com/tpu/docs/creating-deleting-tpus.') worker_ips = tpu_ssh_utils.ParseWorkerFlag(args.worker, node.networkEndpoints, args.internal_ip) if len(worker_ips) > 1 and srcs[0].remote: raise exceptions.InvalidArgumentException( '--worker', 'cannot target multiple workers while copying files to ' 'client.') tpu_ssh_utils.ValidateTPUState(node.state, tpu.messages.Node.StateValueValuesEnum) # Retrieve GuestAttributes. single_pod_worker = len( node.networkEndpoints) > 1 and len(worker_ips) == 1 if single_pod_worker: # Retrieve only that worker's GuestAttributes. worker_id = list(worker_ips)[0] guest_attributes_response = tpu.GetGuestAttributes( tpu_name, args.zone, six.text_type((worker_id))) host_key_suffixes = tpu_ssh_utils.GetHostKeySuffixes( guest_attributes_response.guestAttributes, len(node.networkEndpoints), worker_id) else: # Retrieve the GuestAttributes for all workers in that TPU. guest_attributes_response = tpu.GetGuestAttributes( tpu_name, args.zone) host_key_suffixes = tpu_ssh_utils.GetHostKeySuffixes( guest_attributes_response.guestAttributes) # Generate the public key. ssh_helper = ssh_utils.BaseSSHCLIHelper() ssh_helper.Run(args) public_key = ssh_helper.keys.GetPublicKey().ToEntry() remote = dst.remote or srcs[0].remote if not dst.remote: # Make sure all remotes point to the same ref. for src in srcs: src.remote = remote if remote.user: username_requested = True else: username_requested = False remote.user = ssh.GetDefaultSshUsername(warn_on_account_user=True) project = tpu_utils.GetProject(self.ReleaseTrack(), ssh_helper) if not args.plain: # If there is an '@' symbol in the user_host arg, the user is requesting # to connect as a specific user. This may get overridden by OS Login. _, expiration_micros = ssh_utils.GetSSHKeyExpirationFromArgs(args) oslogin_state = ssh.GetOsloginState( None, project, remote.user, public_key, expiration_micros, self.ReleaseTrack(), username_requested=username_requested, instance_enable_oslogin=tpu_ssh_utils.TpuHasOsLoginEnabled( node)) remote.user = oslogin_state.user # Format the key correctly. public_key = '{1}:{0} {1}'.format(public_key, remote.user) if not args.plain and not args.dry_run: tpu_ssh_utils.AddSSHKeyIfNeeded(project, tpu, node, tpu_name, args.zone, public_key) identity_file = None if not args.plain: identity_file = ssh_helper.keys.key_file # If the user's key is not in the SSH agent, the command will stall. We # want to verify it is added before proceeding, and raise an error if it # is not. if not args.dry_run and len(worker_ips) > 1: tpu_ssh_utils.VerifyKeyInAgent(identity_file) extra_flags = [] if args.scp_flag: extra_flags.extend(args.scp_flag) instance_names = {} if (args.IsKnownAndSpecified('tunnel_through_iap') and args.tunnel_through_iap): # Retrieve the instance names from the GuestAttributes. for worker in worker_ips: # The GuestAttributes will only have one entry if we're targeting a # single worker. index = 0 if single_pod_worker else worker instance_name = tpu_ssh_utils.GetFromGuestAttributes( guest_attributes_response.guestAttributes, index, 'hostname') if instance_name is None: log.status.Print('Failed to connect to TPU.') log.status.Print(tpu_ssh_utils.IAP_TROUBLESHOOTING_HELP) raise tpu_exceptions.IapTunnelingUnavailable() instance_names[worker] = instance_name ssh_threads = [] exit_statuses = [None] * len(worker_ips) for worker, ips in worker_ips.items(): options = None if not args.plain: options = ssh_helper.GetConfig( tpu_ssh_utils.GetInstanceID(node.id, worker, host_key_suffixes), args.strict_host_key_checking, None) iap_tunnel_args = None if (args.IsKnownAndSpecified('tunnel_through_iap') and args.tunnel_through_iap): # Retrieve the instance name from the GuestAttributes. instance_name = instance_names[worker] iap_tunnel_args = tpu_ssh_utils.CreateSshTunnelArgs( args, self.ReleaseTrack(), project, args.zone, instance_name) remote.host = ips.ip_address cmd = ssh.SCPCommand(srcs, dst, identity_file=identity_file, options=options, recursive=args.recurse, compress=args.compress, extra_flags=extra_flags, iap_tunnel_args=iap_tunnel_args) if args.dry_run: log.out.Print(' '.join(cmd.Build(ssh_helper.env))) continue if len(worker_ips) > 1: # Run the command on multiple workers concurrently. ssh_threads.append( threading.Thread( target=tpu_ssh_utils.AttemptRunWithRetries, args=('SCP', worker, exit_statuses, cmd, ssh_helper.env, None, True, SCPRunCmd))) ssh_threads[-1].start() else: # Run on a single worker. tpu_ssh_utils.AttemptRunWithRetries('SCP', worker, exit_statuses, cmd, ssh_helper.env, None, False, SCPRunCmd) if len(worker_ips) > 1: # Wait for all the threads to complete. for i in range(len(ssh_threads)): ssh_threads[i].join() # Exit with a nonzero status, if any. # This ensures that if any command failed on a worker, we don't end up # returning 0 for a value. for status in exit_statuses: if status: sys.exit(status)
def RunScp(self, compute_holder, args, port=None, recursive=False, compress=False, extra_flags=None, release_track=None, ip_type=ip.IpTypeEnum.EXTERNAL): """SCP files between local and remote GCE instance. Run this method from subclasses' Run methods. Args: compute_holder: The ComputeApiHolder. args: argparse.Namespace, the args the command was invoked with. port: str or None, Port number to use for SSH connection. recursive: bool, Whether to use recursive copying using -R flag. compress: bool, Whether to use compression. extra_flags: [str] or None, extra flags to add to command invocation. release_track: obj, The current release track. ip_type: IpTypeEnum, Specify using internal ip or external ip address. Raises: ssh_utils.NetworkError: Network issue which likely is due to failure of SSH key propagation. ssh.CommandError: The SSH command exited with SSH exit code, which usually implies that a connection problem occurred. """ if release_track is None: release_track = base.ReleaseTrack.GA super(BaseScpHelper, self).Run(args) dst = ssh.FileReference.FromPath(args.destination) srcs = [ssh.FileReference.FromPath(src) for src in args.sources] # Make sure we have a unique remote ssh.SCPCommand.Verify(srcs, dst, single_remote=True) remote = dst.remote or srcs[0].remote if not dst.remote: # Make sure all remotes point to the same ref for src in srcs: src.remote = remote instance_ref = instance_flags.SSH_INSTANCE_RESOLVER.ResolveResources( [remote.host], compute_scope.ScopeEnum.ZONE, args.zone, compute_holder.resources, scope_lister=instance_flags.GetInstanceZoneScopeLister( compute_holder.client))[0] instance = self.GetInstance(compute_holder.client, instance_ref) project = self.GetProject(compute_holder.client, instance_ref.project) expiration, expiration_micros = ssh_utils.GetSSHKeyExpirationFromArgs( args) if not remote.user: remote.user = ssh.GetDefaultSshUsername(warn_on_account_user=True) if args.plain: use_oslogin = False else: public_key = self.keys.GetPublicKey().ToEntry(include_comment=True) remote.user, use_oslogin = ssh.CheckForOsloginAndGetUser( instance, project, remote.user, public_key, expiration_micros, release_track) identity_file = None options = None if not args.plain: identity_file = self.keys.key_file options = self.GetConfig(ssh_utils.HostKeyAlias(instance), args.strict_host_key_checking) iap_tunnel_args = iap_tunnel.SshTunnelArgs.FromArgs( args, release_track, instance_ref, ssh_utils.GetExternalInterface(instance, no_raise=True)) if iap_tunnel_args: remote.host = ssh_utils.HostKeyAlias(instance) elif ip_type is ip.IpTypeEnum.INTERNAL: remote.host = ssh_utils.GetInternalIPAddress(instance) else: remote.host = ssh_utils.GetExternalIPAddress(instance) cmd = ssh.SCPCommand(srcs, dst, identity_file=identity_file, options=options, recursive=recursive, compress=compress, port=port, extra_flags=extra_flags, iap_tunnel_args=iap_tunnel_args) if args.dry_run: log.out.Print(' '.join(cmd.Build(self.env))) return if args.plain or use_oslogin: keys_newly_added = False else: keys_newly_added = self.EnsureSSHKeyExists(compute_holder.client, remote.user, instance, project, expiration=expiration) if keys_newly_added: poller = ssh_utils.CreateSSHPoller(remote, identity_file, options, iap_tunnel_args, port=port) log.status.Print('Waiting for SSH key to propagate.') # TODO(b/35355795): Don't force_connect try: poller.Poll(self.env, force_connect=True) except retry.WaitException: raise ssh_utils.NetworkError() if ip_type is ip.IpTypeEnum.INTERNAL: # This will never happen when IAP Tunnel is enabled, because ip_type is # always EXTERNAL when IAP Tunnel is enabled, even if the instance has no # external IP. IAP Tunnel doesn't need verification because it uses # unambiguous identifiers for the instance. self.PreliminarilyVerifyInstance(instance.id, remote, identity_file, options) # Errors from the SCP command result in an ssh.CommandError being raised cmd.Run(self.env, force_connect=True)
def Run(self, args): user, tpu_name = ssh_utils.GetUserAndInstance(args.user_tpu) # If zone is not set, retrieve the one from the config. if args.zone is None: args.zone = properties.VALUES.compute.zone.Get(required=True) # Validate the output path. if args.output_directory: if not args.command: raise exceptions.InvalidArgumentException( '--output_directory', 'cannot be specified without the `--command` ' 'flag. Please specify the `--command` flag or remove the ' '--output-directory flag.') output_directory_path = os.path.abspath( os.path.expandvars(os.path.expanduser(args.output_directory))) if not os.path.isdir(output_directory_path): raise exceptions.InvalidArgumentException( '--output_directory', 'Failed to find directory {}. Please create ' 'it or specify another directory'.format( output_directory_path)) # Retrieve the node. tpu = tpu_utils.TPUNode(self.ReleaseTrack()) node = tpu.Get(tpu_name, args.zone) if not tpu_utils.IsTPUVMNode(node): raise exceptions.BadArgumentException( 'TPU', 'this command is only available for Cloud TPU VM nodes. To access ' 'this node, please see ' 'https://cloud.google.com/tpu/docs/creating-deleting-tpus.') tpu_ssh_utils.ValidateTPUState(node.state, tpu.messages.Node.StateValueValuesEnum) worker_ips = tpu_ssh_utils.ParseWorkerFlag(args.worker, node.networkEndpoints, args.internal_ip) if len(worker_ips) > 1 and not args.command: raise exceptions.InvalidArgumentException( '--worker', 'cannot target multiple workers without the `--command` ' 'flag.') # Retrieve GuestAttributes. single_pod_worker = len( node.networkEndpoints) > 1 and len(worker_ips) == 1 if single_pod_worker: # Retrieve only that worker's GuestAttributes. worker_id = list(worker_ips)[0] guest_attributes_response = tpu.GetGuestAttributes( tpu_name, args.zone, six.text_type((worker_id))) host_key_suffixes = tpu_ssh_utils.GetHostKeySuffixes( guest_attributes_response.guestAttributes, len(node.networkEndpoints), worker_id) else: # Retrieve the GuestAttributes for all workers in that TPU. guest_attributes_response = tpu.GetGuestAttributes( tpu_name, args.zone) host_key_suffixes = tpu_ssh_utils.GetHostKeySuffixes( guest_attributes_response.guestAttributes) # Generate the public key. ssh_helper = ssh_utils.BaseSSHCLIHelper() ssh_helper.Run(args) public_key = ssh_helper.keys.GetPublicKey().ToEntry() project = tpu_utils.GetProject(self.ReleaseTrack(), ssh_helper) if not args.plain: # If there is an '@' symbol in the user_host arg, the user is requesting # to connect as a specific user. This may get overridden by OS Login. username_requested = '@' in args.user_tpu _, expiration_micros = ssh_utils.GetSSHKeyExpirationFromArgs(args) oslogin_state = ssh.GetOsloginState( None, project, user, public_key, expiration_micros, self.ReleaseTrack(), username_requested=username_requested, instance_enable_oslogin=tpu_ssh_utils.TpuHasOsLoginEnabled( node)) user = oslogin_state.user # Format the key correctly. public_key = '{1}:{0} {1}'.format(public_key, user) if not args.plain and not args.dry_run: tpu_ssh_utils.AddSSHKeyIfNeeded(project, tpu, node, tpu_name, args.zone, public_key) command_list = args.command.split(' ') if args.command else None remainder = [] if args.ssh_args: remainder.extend(args.ssh_args) if args.output_directory: log.status.Print( 'Preparing SSH command execution; output will be logged ' 'to {}'.format(output_directory_path)) instance_names = {} if (args.IsKnownAndSpecified('tunnel_through_iap') and args.tunnel_through_iap): # Retrieve the instance names from the GuestAttributes. for worker in worker_ips: # The GuestAttributes will only have one entry if we're targeting a # single worker. index = 0 if single_pod_worker else worker instance_name = tpu_ssh_utils.GetFromGuestAttributes( guest_attributes_response.guestAttributes, index, 'hostname') if instance_name is None: log.status.Print('Failed to connect to TPU.') log.status.Print(tpu_ssh_utils.IAP_TROUBLESHOOTING_HELP) raise tpu_exceptions.IapTunnelingUnavailable() instance_names[worker] = instance_name ssh_threads = [] exit_statuses = [None] * len(worker_ips) for worker, ips in worker_ips.items(): identity_file = None options = None if not args.plain: identity_file = ssh_helper.keys.key_file options = ssh_helper.GetConfig( tpu_ssh_utils.GetInstanceID(node.id, worker, host_key_suffixes), args.strict_host_key_checking, None) remote = ssh.Remote(ips.ip_address, user) extra_flags = ssh.ParseAndSubstituteSSHFlags( args, remote, ips.ip_address, ips.internal_address) iap_tunnel_args = None if (args.IsKnownAndSpecified('tunnel_through_iap') and args.tunnel_through_iap): # Retrieve the instance name from the GuestAttributes. instance_name = instance_names[worker] iap_tunnel_args = tpu_ssh_utils.CreateSshTunnelArgs( args, self.ReleaseTrack(), project, args.zone, instance_name) cmd = ssh.SSHCommand(remote=remote, identity_file=identity_file, remote_command=command_list, extra_flags=extra_flags, options=options, remainder=remainder, iap_tunnel_args=iap_tunnel_args) if args.dry_run: log.out.Print(' '.join(cmd.Build(ssh_helper.env))) continue output_file_writer = None if args.output_directory: output_file_writer = FileWriter('{}/{}.log'.format( output_directory_path, six.text_type(worker))) if len(worker_ips) > 1: # Run the command on multiple workers concurrently. ssh_threads.append( threading.Thread( target=tpu_ssh_utils.AttemptRunWithRetries, args=('SSH', worker, exit_statuses, cmd, ssh_helper.env, output_file_writer, True, SSHRunCmd))) ssh_threads[-1].start() else: # Run on a single worker. tpu_ssh_utils.AttemptRunWithRetries('SSH', worker, exit_statuses, cmd, ssh_helper.env, output_file_writer, False, SSHRunCmd) if len(worker_ips) > 1: # Wait for all the threads to complete. for i in range(len(ssh_threads)): ssh_threads[i].join() # Exit with a nonzero code, if there are any. # This ensures that if any command failed on a worker, we don't end up # returning 0 for a value. for status in exit_statuses: if status: sys.exit(status)