def instance_delete(project_id: t.Text, zone: t.Text, name: t.Text) -> None: service = Service(project_id=project_id, zone=zone) instance = Instance(service.get_instance(name=name)) if instance.name is None: print(f"Instance '{name}' does not exist.") else: service.wait_for_operation(service.delete_instance(instance.name)) print(f"Instance '{name}' has been deleted: {Instance(service.get_instance(name))}")
def instance_stop(project_id: t.Text, zone: t.Text, name: t.Text) -> None: service = Service(project_id=project_id, zone=zone) instance = Instance(service.get_instance(name=name)) if instance.name is None: print(f"Instance '{name}' does not exist.") elif instance.status != Status.RUNNING: print(f"Instance '{name}' is not running: {instance}") else: service.wait_for_operation(service.stop_instance(instance.name)) print(f"Instance '{name}' has stopped: {Instance(service.get_instance(name))}")
def instance_start(project_id: str, zone: str, name: str): service = Service(project_id=project_id, zone=zone) instance = Instance(service.get_instance(name=name)) if instance.name is None: print(f"Instance '{name}' does not exist.") elif instance.status == Status.RUNNING: print(f"Instance '{name}' is already running: {instance}") else: service.wait_for_operation(service.start_instance(instance.name)) print( f"Instance '{name}' has started: {Instance(service.get_instance(name))}" )
def instance_create(project_id: t.Text, zone: t.Text) -> None: service = Service(project_id=project_id, zone=zone) service.wait_for_operation(service.create_instance())
def instance_status(project_id: t.Text, zone: t.Text, name: t.Text) -> None: service = Service(project_id=project_id, zone=zone) print(Instance(service.get_instance(name=name)))
def list_instances(project_id: t.Text, zone: t.Text) -> None: service = Service(project_id=project_id, zone=zone) print("Instances:") for instance in service.list_instances(): print(f"\t{Instance(instance)}")
def configure(self) -> None: """ """ gcp: DictConfig = self.mlcube.runner # Check that SSH is configured. # TODO: (Sergey) why am I doing it here (copy-past bug)? ssh_config_file = os.path.join(Path.home(), '.ssh', 'mlcube') try: ssh_config = SSHConfig.load(ssh_config_file) gcp_host: Host = ssh_config.get(gcp.instance.name) except KeyError: raise ExecutionError.mlcube_configure_error( self.__class__.__name__, f"SSH configuration file ({ssh_config_file}) does not provide connection details for GCP instance " f"(name={gcp.instance.name}). Most likely this error has occurred due to implementation error - " "please, contact MLCube developers.") # TODO: I can try to add this info on the fly assuming standard paths. Need to figure out the user name. if gcp_host.get('User', None) is None or gcp_host.get( 'IdentityFile', None) is None: raise ExecutionError.mlcube_configure_error( self.__class__.__name__, f"SSH configuration file ({ssh_config_file}) provides connection details for GCP instance " f"(name={gcp.instance.name}), but these details do not include information about `User` " "and/or `IdentifyFile`.") # Connect to GCP logger.info("Connecting to GCP ...") try: service = Service(project_id=gcp.gcp.project_id, zone=gcp.gcp.zone, credentials=gcp.gcp.credentials) except Exception as err: raise ExecutionError.mlcube_configure_error( self.__class__.__name__, "The error most like is associated with either reading credentials, or connecting using google API (" f"project_id={gcp.gcp.project_id}, zone={gcp.gcp.zone}, credentials={gcp.gcp.credentials}). See " "context for more details.", error=str(err), gcp_info={ 'project_id': gcp.gcp.project_id, 'zone': gcp.gcp.zone, 'credentials': gcp.gcp.credentials }) # Figure out if an instance needs to be created try: instance = GCPInstance(service.get_instance(gcp.instance.name)) if instance.name is None: print("Creating GCP instance ...") service.wait_for_operation( service.create_instance( name=gcp.instance.name, machine_type=gcp.instance.machine_type, disk_size_gb=gcp.instance.disk_size_gb)) instance = GCPInstance(service.get_instance(gcp.instance.name)) # Check its running status if instance.status != GCPInstanceStatus.RUNNING: print("Starting GCP instance ...") service.wait_for_operation( service.start_instance(instance.name)) instance = GCPInstance(service.get_instance(gcp.instance.name)) except Exception as err: raise ExecutionError.mlcube_configure_error( self.__class__.__name__, "Failed to create or connect to remote GCP instance. See context for more details.", error=str(err), gcp_instance_info={ 'name': gcp.instance.name, 'machine_type': gcp.instance.machine_type, 'disk_size_gb': gcp.instance.disk_size_gb }) # Make sure SSH mlcube is up-to-date if gcp_host.get('HostName', None) != instance.public_ip: print( f"Updating SSH mlcube (prev={gcp_host.get('HostName')}, new={instance.public_ip}, " f"file={ssh_config_file})") ssh_config.update(instance.name, {'HostName': instance.public_ip}) ssh_config.write(ssh_config_file) # TODO: clean '.ssh/known_hosts'. # Configure remote instance. This is specific for docker-based images now. try: Shell.ssh( gcp.instance.name, 'sudo snap install docker && sudo addgroup --system docker && sudo adduser ${USER} docker && ' 'sudo snap disable docker && sudo snap enable docker && ' 'sudo apt update && yes | sudo apt install python3-pip virtualenv && sudo apt clean' ) except ExecutionError as err: raise ExecutionError.mlcube_configure_error( self.__class__.__name__, "Failed to install system packages on a remote instance. See context for more details.", error=str(err)) # Remote GCP instance has been configured print(instance) # Should be as simple as invoking SSH configure. try: Shell.run( f"mlcube configure --mlcube={self.mlcube.root} --platform={gcp.platform}" ) except ExecutionError as err: raise ExecutionError.mlcube_configure_error( self.__class__.__name__, f"Error occurred while running mlcube configure with GCP platform (platform={gcp.platform}). See " "context for more details.", error=str(err))
def configure(self) -> None: """ """ platform: Platform = self.mlcube.platform # Check that SSH is configured. ssh_config_file = os.path.join(Path.home(), '.ssh', 'config') try: ssh_config = SSHConfig.load(ssh_config_file) gcp_host: Host = ssh_config.get(platform.instance.name) except KeyError: raise ValueError(f"SSH config ({ssh_config_file}) does not provide connection " f"details for '{platform.instance.name}'") # TODO: I can try to add this info on the fly assuming standard paths. Need to figure out the user name. if gcp_host.get('User', None) is None or gcp_host.get('IdentityFile', None) is None: raise ValueError(f"SSH config does not provide connection details for '{platform.instance.name}'") # Connect to GCP logger.info("Connecting to GCP ...") service = Service(project_id=platform.gcp.project_id, zone=platform.gcp.zone, credentials=platform.gcp.credentials) # Figure out if an instance needs to be created instance = GCPInstance(service.get_instance(platform.instance.name)) if instance.name is None: print("Creating GCP instance ...") service.wait_for_operation( service.create_instance(name=platform.instance.name, machine_type=platform.instance.machine_type, disk_size_gb=platform.instance.disk_size_gb) ) instance = GCPInstance(service.get_instance(platform.instance.name)) # Check its running status if instance.status != GCPInstanceStatus.RUNNING: print("Starting GCP instance ...") service.wait_for_operation( service.start_instance(instance.name) ) instance = GCPInstance(service.get_instance(platform.instance.name)) # Make sure SSH config is up-to-date if gcp_host.get('HostName', None) != instance.public_ip: print(f"Updating SSH config (prev={gcp_host.get('HostName')}, new={instance.public_ip}, " f"file={ssh_config_file})") ssh_config.update(instance.name, {'HostName': instance.public_ip}) ssh_config.write(ssh_config_file) # TODO: clean '.ssh/known_hosts'. # Configure remote instance. This is specific for docker-based images now. Shell.ssh( platform.instance.name, 'sudo snap install docker && sudo addgroup --system docker && sudo adduser ${USER} docker && ' 'sudo snap disable docker && sudo snap enable docker && ' 'sudo apt update && yes | sudo apt install python3-pip virtualenv && sudo apt clean' ) # Remote GCP instance has been configured print(instance) # Should be as simple as invoking SSH configure. configure_ssh( mlcube=self.mlcube.root, platform=os.path.join(self.mlcube.root, 'platforms', platform.platform) )
def instance_create(project_id: str, zone: str): service = Service(project_id=project_id, zone=zone) service.wait_for_operation(service.create_instance())
def instance_status(project_id: str, zone: str, name: str): service = Service(project_id=project_id, zone=zone) print(Instance(service.get_instance(name=name)))