def start(self): if self.vm_id: try: node = self.node() except libcloud.common.google.ResourceNotFoundError: self.warn("seems to have been destroyed already") self._node_deleted() node = None if node and (node.state == NodeState.TERMINATED): self.stop() if node and (node.state == NodeState.STOPPED): self.log("starting GCE machine") self.connect().ex_start_node(node) self.public_ipv4 = self.node().public_ips[0] self.private_ipv4 = self.node().private_ips[0] known_hosts.add(self.public_ipv4, self.public_host_key) self.wait_for_ssh(check=True) self.send_keys() if not self.vm_id and self.block_device_mapping: prev_public_ipv4 = self.public_ipv4 prev_private_ipv4 = self.private_ipv4 self.create_node(self) if prev_public_ipv4 != self.public_ipv4: self.warn("Public IP address has changed from {0} to {1}, " "you may need to run 'nixops deploy'".format( prev_public_ipv4, self.public_ipv4)) if prev_private_ipv4 != self.private_ipv4: self.warn("Private IP address has changed from {0} to {1}, " "you may need to run 'nixops deploy'".format( prev_private_ipv4, self.private_ipv4)) self.wait_for_ssh(check=True) self.send_keys()
def _check(self, res): try: node = self.node() res.exists = True res.is_up = node.state == NodeState.RUNNING or node.state == NodeState.REBOOTING if node.state == NodeState.REBOOTING or node.state == NodeState.PENDING: self.state = self.STARTING if node.state == NodeState.STOPPED or node.state == NodeState.TERMINATED: self.state = self.STOPPED if node.state == NodeState.UNKNOWN: self.state = self.UNKNOWN if node.state == NodeState.RUNNING: # check that all disks are attached res.disks_ok = True for k, v in self.block_device_mapping.iteritems(): disk_name = v['disk_name'] or v['disk'] if all(d.get("deviceName", None) != disk_name for d in node.extra['disks']): res.disks_ok = False res.messages.append("disk {0} is detached".format(disk_name)) try: disk = self.connect().ex_get_volume(disk_name, v.get('region', None)) except libcloud.common.google.ResourceNotFoundError: res.messages.append("disk {0} is destroyed".format(disk_name)) self.handle_changed_property('public_ipv4', node.public_ips[0] if node.public_ips else None, property_name = 'IP address') if self.public_ipv4: known_hosts.add(self.public_ipv4, self.public_host_key) MachineState._check(self, res) except libcloud.common.google.ResourceNotFoundError: res.exists = False res.is_up = False self.state = self.MISSING;
def start(self): if self.vm_id: try: node = self.node() except libcloud.common.google.ResourceNotFoundError: self.warn("seems to have been destroyed already") self._node_deleted() node = None if node and (node.state == NodeState.TERMINATED): self.stop() if node and (node.state == NodeState.STOPPED): self.log("starting GCE machine") self.connect().ex_start_node(node) self.public_ipv4 = self.node().public_ips[0] self.private_ipv4 = self.node().private_ips[0] known_hosts.add(self.public_ipv4, self.public_host_key) self.wait_for_ssh(check=True) self.send_keys() if not self.vm_id and self.block_device_mapping: prev_public_ipv4 = self.public_ipv4 prev_private_ipv4 = self.private_ipv4 self.create_node(self) if prev_public_ipv4 != self.public_ipv4: self.warn("Public IP address has changed from {0} to {1}, " "you may need to run 'nixops deploy'" .format(prev_public_ipv4, self.public_ipv4) ) if prev_private_ipv4 != self.private_ipv4: self.warn("Private IP address has changed from {0} to {1}, " "you may need to run 'nixops deploy'" .format(prev_private_ipv4, self.private_ipv4) ) self.wait_for_ssh(check=True) self.send_keys()
def _update_host_keys(self) -> None: self.log_start("updating host keys...") cmd = f"cat /etc/ssh/ssh_host_{HOST_KEY_TYPE}_key.pub" self._public_host_key = str(self.run_command( cmd, capture_stdout=True)).strip() known_hosts.add(self.public_ipv4, self._public_host_key) self.log_end("")
def _create_instance(self, defn) -> None: if not self.public_client_key: (private, public) = create_key_pair(type="ed25519") self.public_client_key = public self.private_client_key = private if not self.public_host_key: (private, public) = create_key_pair(type="ed25519") self.public_host_key = public self.private_host_key = private location: BoundLocation = self.get_client().locations.get_by_name( defn.location) ssh_keys: List[BoundSSHKey] = [ self._create_ssh_key(self.public_client_key) ] # Ensure host keys get injected into the base OS user_data = ("#cloud-config\n" "ssh_keys:\n" " ed25519_public: {0}\n" " ed25519_private: |\n" " {1}").format( self.public_host_key, self.private_host_key.replace("\n", "\n ")) self.logger.log_start( f"creating {defn.server_type} server at {location.description}...") response = self.get_client().servers.create( name=defn.server_name, labels={ **self.get_common_labels(), **dict(defn.labels) }, location=location, server_type=ServerType(defn.server_type), ssh_keys=ssh_keys, user_data=user_data, image=Image(name="ubuntu-20.04"), # for lustration start_after_create=True, ) self.state = self.STARTING self.wait_on_action(response.action) with self.depl._db: self.vm_id = response.server.id self.public_ipv4 = response.server.public_net.ipv4.ip self.public_ipv6 = response.server.public_net.ipv6.ip self.server_name = defn.server_name self.server_type = defn.server_type self.legacy_if_scheme = defn.server_type.startswith("cx") self.location = defn.location self.labels = dict(defn.labels) self.private_host_key = None known_hosts.add(self.public_ipv4, self.public_host_key) self.logger.log_end(f"{self.public_ipv4}")
def create_node(self, defn): if not self.vm_id: self.log("creating {0}...".format(self.full_name)) boot_disk = next( (v for k, v in defn.block_device_mapping.iteritems() if v.get('bootDisk', False)), None) if not boot_disk: raise Exception("no boot disk found for {0}".format( self.full_name)) try: service_accounts = [] account = {'email': defn.email} if defn.scopes != []: account['scopes'] = defn.scopes service_accounts.append(account) # keeping a gcloud like behavior, if nothing was specified # i.e service account is default get the default scopes as well if defn.email == 'default' and defn.scopes == []: service_accounts = None node = self.connect().create_node( self.machine_name, defn.instance_type, "", location=self.connect().ex_get_zone(defn.region), ex_boot_disk=self.connect().ex_get_volume( boot_disk['disk_name'] or boot_disk['disk'], boot_disk.get('region', None)), ex_metadata=self.full_metadata(defn.metadata), ex_tags=defn.tags, ex_service_accounts=service_accounts, external_ip=(self.connect().ex_get_address(defn.ipAddress) if defn.ipAddress else 'ephemeral'), ex_network=(defn.network if defn.network else 'default')) except libcloud.common.google.ResourceExistsError: raise Exception( "tried creating an instance that already exists; " "please run 'deploy --check' to fix this") self.vm_id = self.machine_name self.state = self.STARTING self.ssh_pinged = False self.copy_properties(defn) self.public_ipv4 = node.public_ips[0] self.log("got public IP: {0}".format(self.public_ipv4)) known_hosts.add(self.public_ipv4, self.public_host_key) self.private_ipv4 = node.private_ips[0] for k, v in self.block_device_mapping.iteritems(): v['needsAttach'] = True self.update_block_device_mapping(k, v) # set scheduling config here instead of triggering an update using None values # because we might be called with defn = self, thus modifying self would ruin defn self.connect().ex_set_node_scheduling( node, automatic_restart=defn.automatic_restart, on_host_maintenance=defn.on_host_maintenance) self.automatic_restart = defn.automatic_restart self.on_host_maintenance = defn.on_host_maintenance # Update service account if self.email != defn.email or self.scopes != defn.scopes: self.log('updating the service account') node = self.node() request = '/zones/%s/instances/%s/setServiceAccount' % ( node.extra['zone'].name, node.name) service_account = {} service_account["email"] = defn.email if defn.scopes != []: service_account["scopes"] = defn.scopes self.connect().connection.async_request(request, method='POST', data=service_account) self.email = defn.email self.scopes = defn.scopes # Attach missing volumes for k, v in self.block_device_mapping.items(): defn_v = defn.block_device_mapping.get(k, None) if v.get('needsAttach', False) and defn_v: disk_name = v['disk_name'] or v['disk'] disk_region = v.get('region', None) v['readOnly'] = defn_v['readOnly'] v['bootDisk'] = defn_v['bootDisk'] v['deleteOnTermination'] = defn_v['deleteOnTermination'] v['passphrase'] = defn_v['passphrase'] self.log("attaching GCE disk '{0}'...".format(disk_name)) if not v.get('bootDisk', False): self.connect().attach_volume( self.node(), self.connect().ex_get_volume(disk_name, disk_region), device=disk_name, ex_mode=('READ_ONLY' if v['readOnly'] else 'READ_WRITE')) del v['needsAttach'] self.update_block_device_mapping(k, v) # generate LUKS key if the model didn't specify one if v.get('encrypt', False) and v.get('passphrase', "") == "" and v.get( 'generatedKey', "") == "": v['generatedKey'] = generate_random_string(length=256) self.update_block_device_mapping(k, v) if self.metadata != defn.metadata: self.log('setting new metadata values') node = self.node() meta = self.gen_metadata(self.full_metadata(defn.metadata)) request = '/zones/%s/instances/%s/setMetadata' % ( node.extra['zone'].name, node.name) metadata_data = {} metadata_data['items'] = meta['items'] metadata_data['kind'] = meta['kind'] metadata_data['fingerprint'] = node.extra['metadata'][ 'fingerprint'] self.connect().connection.async_request(request, method='POST', data=metadata_data) self.metadata = defn.metadata if self.tags != defn.tags: self.log('updating tags') self.connect().ex_set_node_tags(self.node(), defn.tags) self.tags = defn.tags if self.public_ipv4 and self.ipAddress != defn.ipAddress: self.log("detaching old public IP address {0}".format( self.public_ipv4)) self.connect().connection.async_request( "/zones/{0}/instances/{1}/deleteAccessConfig?accessConfig=External+NAT&networkInterface=nic0" .format(self.region, self.machine_name), method='POST') self.public_ipv4 = None self.ipAddress = None if self.public_ipv4 is None: self.log("attaching public IP address {0}".format( defn.ipAddress or "[Ephemeral]")) self.connect().connection.async_request( "/zones/{0}/instances/{1}/addAccessConfig?networkInterface=nic0" .format(self.region, self.machine_name), method='POST', data={ 'kind': 'compute#accessConfig', 'type': 'ONE_TO_ONE_NAT', 'name': 'External NAT', 'natIP': self.connect().ex_get_address(defn.ipAddress).address if defn.ipAddress else None }) self.ipAddress = defn.ipAddress self.public_ipv4 = self.node().public_ips[0] self.log("got public IP: {0}".format(self.public_ipv4)) known_hosts.add(self.public_ipv4, self.public_host_key) self.ssh.reset() self.ssh_pinged = False if self.automatic_restart != defn.automatic_restart or self.on_host_maintenance != defn.on_host_maintenance: self.log("setting scheduling configuration") self.connect().ex_set_node_scheduling( self.node(), automatic_restart=defn.automatic_restart, on_host_maintenance=defn.on_host_maintenance) self.automatic_restart = defn.automatic_restart self.on_host_maintenance = defn.on_host_maintenance
def create(self, defn, check, allow_reboot, allow_recreate): assert isinstance(defn, GCEDefinition) self.no_project_change(defn) self.no_region_change(defn) self.no_change(self.machine_name != defn.machine_name, "instance name") self.set_common_state(defn) self.copy_credentials(defn) self.machine_name = defn.machine_name self.region = defn.region if not self.public_client_key: (private, public) = create_key_pair() self.public_client_key = public self.private_client_key = private self.host_key_type = "ed25519" if self.state_version != "14.12" and nixops.util.parse_nixos_version( defn.config["nixosRelease"]) >= ["15", "09"] else "ecdsa" if not self.public_host_key: (private, public) = create_key_pair(type=self.host_key_type) self.public_host_key = public self.private_host_key = private recreate = False if check: try: node = self.node() if self.vm_id: if node.state == NodeState.TERMINATED: recreate = True self.warn( "the instance is terminated and needs a reboot") self.state = self.STOPPED self.handle_changed_property('region', node.extra['zone'].name, can_fix=False) # a bit hacky but should work network_name = node.extra['networkInterfaces'][0][ 'network'].split('/')[-1] if network_name == 'default': network_name = None self.handle_changed_property('network', network_name) self.handle_changed_property('instance_type', node.size) self.handle_changed_property( 'public_ipv4', node.public_ips[0] if node.public_ips else None, property_name='public IP address') if self.public_ipv4: known_hosts.add(self.public_ipv4, self.public_host_key) self.handle_changed_property( 'private_ipv4', node.private_ips[0] if node.private_ips else None, property_name='private IP address') if self.ipAddress: try: address = self.connect().ex_get_address( self.ipAddress) if self.public_ipv4 and self.public_ipv4 != address.address: self.warn( "static IP Address {0} assigned to this machine has unexpectely " "changed from {1} to {2} most likely due to being redeployed" .format(self.ipAddress, self.public_ipv4, address.address)) self.ipAddress = None except libcloud.common.google.ResourceNotFoundError: self.warn( "static IP Address resource {0} used by this machine has been destroyed; " "it is likely that the machine is still holding the address itself ({1}) " "and this is your last chance to reclaim it before it gets " "lost in a reboot".format( self.ipAddress, self.public_ipv4)) self.handle_changed_property('tags', sorted(node.extra['tags'])) actual_metadata = { i['key']: i['value'] for i in node.extra['metadata'].get('items', []) if i['key'] not in [ 'ssh_host_{0}_key'.format(self.host_key_type), 'sshKeys', 'ssh_host_{0}_key_pub'.format( self.host_key_type) ] } self.handle_changed_property('metadata', actual_metadata) self.handle_changed_property( 'automatic_restart', node.extra['scheduling']["automaticRestart"]) self.handle_changed_property( 'on_host_maintenance', node.extra['scheduling']["onHostMaintenance"]) attached_disk_names = [ d.get("deviceName", None) for d in node.extra['disks'] ] # check that all disks are attached for k, v in self.block_device_mapping.iteritems(): disk_name = v['disk_name'] or v['disk'] is_attached = disk_name in attached_disk_names if not is_attached and not v.get('needsAttach', False): self.warn( "disk {0} seems to have been detached behind our back; will reattach..." .format(disk_name)) v['needsAttach'] = True self.update_block_device_mapping(k, v) if is_attached and v.get('needsAttach', False): self.warn( "disk {0} seems to have been attached for us; thank you, mr. Elusive Bug!" .format(disk_name)) del v['needsAttach'] self.update_block_device_mapping(k, v) # check that no extra disks are attached defn_disk_names = [ v['disk_name'] or v['disk'] for k, v in defn.block_device_mapping.iteritems() ] state_disk_names = [ v['disk_name'] or v['disk'] for k, v in self.block_device_mapping.iteritems() ] unexpected_disks = list( set(attached_disk_names) - set(defn_disk_names) - set(state_disk_names)) if unexpected_disks: self.warn( "unexpected disk(s) {0} are attached to this instance; " "not fixing this just in case".format( unexpected_disks)) else: self.warn_not_supposed_to_exist(valuable_data=True) self.confirm_destroy(node, self.full_name) except libcloud.common.google.ResourceNotFoundError: if self.vm_id: self.warn( "the instance seems to have been destroyed behind our back" ) if not allow_recreate: raise Exception("use --allow-recreate to fix") self._node_deleted() # check that the disks that should exist do exist # and that the disks we expected to create don't exist yet for k, v in defn.block_device_mapping.iteritems(): disk_name = v['disk_name'] or v['disk'] try: disk = self.connect().ex_get_volume( disk_name, v.get('region', None)) if k not in self.block_device_mapping and v['disk_name']: self.warn_not_supposed_to_exist( resource_name=disk_name, valuable_data=True) self.confirm_destroy(disk, disk_name) except libcloud.common.google.ResourceNotFoundError: if v['disk']: raise Exception( "external disk '{0}' is required but doesn't exist" .format(disk_name)) if k in self.block_device_mapping and v['disk_name']: self.warn( "disk '{0}' is supposed to exist, but is missing; will recreate..." .format(disk_name)) self.update_block_device_mapping(k, None) # create missing disks for k, v in defn.block_device_mapping.iteritems(): if k in self.block_device_mapping: continue if v['disk'] is None: extra_msg = (" from snapshot '{0}'".format(v['snapshot']) if v['snapshot'] else " from image '{0}'".format( v['image']) if v['image'] else "") self.log("creating GCE disk of {0} GiB{1}...".format( v['size'] if v['size'] else "auto", extra_msg)) v['region'] = defn.region try: self.connect().create_volume(v['size'], v['disk_name'], v['region'], snapshot=v['snapshot'], image=v['image'], ex_disk_type="pd-" + v.get('type', 'standard'), use_existing=False) except libcloud.common.google.ResourceExistsError: raise Exception( "tried creating a disk that already exists; " "please run 'deploy --check' to fix this") v['needsAttach'] = True self.update_block_device_mapping(k, v) if self.vm_id: if self.instance_type != defn.instance_type: recreate = True self.warn("change of the instance type requires a reboot") if self.network != defn.network: recreate = True self.warn("change of the network requires a reboot") if self.email != defn.email or self.scopes != defn.scopes: recreate = True self.warn('change of service account requires a reboot') for k, v in self.block_device_mapping.iteritems(): defn_v = defn.block_device_mapping.get(k, None) if defn_v and not v.get('needsAttach', False): if v['bootDisk'] != defn_v['bootDisk']: recreate = True self.warn("change of the boot disk requires a reboot") if v['readOnly'] != defn_v['readOnly']: recreate = True self.warn("remounting disk as ro/rw requires a reboot") if recreate: if not allow_reboot: raise Exception( "reboot is required for the requested changes; please run with --allow-reboot" ) self.stop() self.create_node(defn) if self.node().state == NodeState.STOPPED: self.start()
def create_node(self, defn): if not self.vm_id: self.log("creating {0}...".format(self.full_name)) boot_disk = next((v for k,v in defn.block_device_mapping.iteritems() if v.get('bootDisk', False)), None) if not boot_disk: raise Exception("no boot disk found for {0}".format(self.full_name)) try: service_accounts = [] account = { 'email': defn.email } if defn.scopes != []: account['scopes'] = defn.scopes service_accounts.append(account) # keeping a gcloud like behavior, if nothing was specified # i.e service account is default get the default scopes as well if defn.email == 'default' and defn.scopes == []: service_accounts=None node = self.connect().create_node(self.machine_name, defn.instance_type, "", ex_preemptible = (defn.preemptible if defn.preemptible else None), location = self.connect().ex_get_zone(defn.region), ex_boot_disk = self.connect().ex_get_volume(boot_disk['disk_name'] or boot_disk['disk'], boot_disk.get('region', None)), ex_metadata = self.full_metadata(defn.metadata), ex_tags = defn.tags, ex_service_accounts = service_accounts, external_ip = (self.connect().ex_get_address(defn.ipAddress) if defn.ipAddress else 'ephemeral'), ex_network = (defn.network if defn.network else 'default') ) except libcloud.common.google.ResourceExistsError: raise Exception("tried creating an instance that already exists; " "please run 'deploy --check' to fix this") self.vm_id = self.machine_name self.state = self.STARTING self.ssh_pinged = False self.copy_properties(defn) self.public_ipv4 = node.public_ips[0] self.log("got public IP: {0}".format(self.public_ipv4)) known_hosts.add(self.public_ipv4, self.public_host_key) self.private_ipv4 = node.private_ips[0] for k,v in self.block_device_mapping.iteritems(): v['needsAttach'] = True self.update_block_device_mapping(k, v) # set scheduling config here instead of triggering an update using None values # because we might be called with defn = self, thus modifying self would ruin defn self.connect().ex_set_node_scheduling(node, automatic_restart = defn.automatic_restart, on_host_maintenance = defn.on_host_maintenance) self.automatic_restart = defn.automatic_restart self.on_host_maintenance = defn.on_host_maintenance # Update service account if self.email != defn.email or self.scopes != defn.scopes: self.log('updating the service account') node = self.node() request = '/zones/%s/instances/%s/setServiceAccount' % (node.extra['zone'].name, node.name) service_account = {} service_account["email"] = defn.email if defn.scopes != []: service_account["scopes"] = defn.scopes self.connect().connection.async_request(request, method='POST', data=service_account) self.email = defn.email self.scopes = defn.scopes # Attach missing volumes for k, v in self.block_device_mapping.items(): defn_v = defn.block_device_mapping.get(k, None) if v.get('needsAttach', False) and defn_v: disk_name = v['disk_name'] or v['disk'] disk_region = v.get('region', None) v['readOnly'] = defn_v['readOnly'] v['bootDisk'] = defn_v['bootDisk'] v['deleteOnTermination'] = defn_v['deleteOnTermination'] v['passphrase'] = defn_v['passphrase'] self.log("attaching GCE disk '{0}'...".format(disk_name)) if not v.get('bootDisk', False): self.connect().attach_volume(self.node(), self.connect().ex_get_volume(disk_name, disk_region), device = disk_name, ex_mode = ('READ_ONLY' if v['readOnly'] else 'READ_WRITE')) del v['needsAttach'] self.update_block_device_mapping(k, v) # generate LUKS key if the model didn't specify one if v.get('encrypt', False) and v.get('passphrase', "") == "" and v.get('generatedKey', "") == "": v['generatedKey'] = generate_random_string(length=256) self.update_block_device_mapping(k, v) if self.metadata != defn.metadata: self.log('setting new metadata values') node = self.node() meta = self.gen_metadata(self.full_metadata(defn.metadata)) request = '/zones/%s/instances/%s/setMetadata' % (node.extra['zone'].name, node.name) metadata_data = {} metadata_data['items'] = meta['items'] metadata_data['kind'] = meta['kind'] metadata_data['fingerprint'] = node.extra['metadata']['fingerprint'] self.connect().connection.async_request(request, method='POST', data=metadata_data) self.metadata = defn.metadata if self.tags != defn.tags: self.log('updating tags') self.connect().ex_set_node_tags(self.node(), defn.tags) self.tags = defn.tags if self.public_ipv4 and self.ipAddress != defn.ipAddress: self.log("detaching old public IP address {0}".format(self.public_ipv4)) self.connect().connection.async_request( "/zones/{0}/instances/{1}/deleteAccessConfig?accessConfig=External+NAT&networkInterface=nic0" .format(self.region, self.machine_name), method = 'POST') self.public_ipv4 = None self.ipAddress = None if self.public_ipv4 is None: self.log("attaching public IP address {0}".format(defn.ipAddress or "[Ephemeral]")) self.connect().connection.async_request( "/zones/{0}/instances/{1}/addAccessConfig?networkInterface=nic0" .format(self.region, self.machine_name), method = 'POST', data = { 'kind': 'compute#accessConfig', 'type': 'ONE_TO_ONE_NAT', 'name': 'External NAT', 'natIP': self.connect().ex_get_address(defn.ipAddress).address if defn.ipAddress else None }) self.ipAddress = defn.ipAddress self.public_ipv4 = self.node().public_ips[0] self.log("got public IP: {0}".format(self.public_ipv4)) known_hosts.add(self.public_ipv4, self.public_host_key) self.ssh.reset() self.ssh_pinged = False if self.automatic_restart != defn.automatic_restart or self.on_host_maintenance != defn.on_host_maintenance: self.log("setting scheduling configuration") self.connect().ex_set_node_scheduling(self.node(), automatic_restart = defn.automatic_restart, on_host_maintenance = defn.on_host_maintenance) self.automatic_restart = defn.automatic_restart self.on_host_maintenance = defn.on_host_maintenance
def create(self, defn, check, allow_reboot, allow_recreate): assert isinstance(defn, GCEDefinition) self.no_project_change(defn) self.no_region_change(defn) self.no_change(self.machine_name != defn.machine_name, "instance name") self.set_common_state(defn) self.copy_credentials(defn) self.machine_name = defn.machine_name self.region = defn.region if not self.public_client_key: (private, public) = create_key_pair() self.public_client_key = public self.private_client_key = private self.host_key_type = "ed25519" if self.state_version != "14.12" and nixops.util.parse_nixos_version(defn.config["nixosRelease"]) >= ["15", "09"] else "ecdsa" if not self.public_host_key: (private, public) = create_key_pair(type=self.host_key_type) self.public_host_key = public self.private_host_key = private recreate = False if check: try: node = self.node() if self.vm_id: if node.state == NodeState.TERMINATED: recreate = True self.warn("the instance is terminated and needs a reboot") self.state = self.STOPPED self.handle_changed_property('region', node.extra['zone'].name, can_fix = False) self.handle_changed_property('preemptible', node.extra['scheduling']['preemptible'], can_fix = False) # a bit hacky but should work network_name = node.extra['networkInterfaces'][0]['network'].split('/')[-1] if network_name == 'default': network_name = None self.handle_changed_property('network', network_name) self.handle_changed_property('instance_type', node.size) self.handle_changed_property('public_ipv4', node.public_ips[0] if node.public_ips else None, property_name = 'public IP address') if self.public_ipv4: known_hosts.add(self.public_ipv4, self.public_host_key) self.handle_changed_property('private_ipv4', node.private_ips[0] if node.private_ips else None, property_name = 'private IP address') if self.ipAddress: try: address = self.connect().ex_get_address(self.ipAddress) if self.public_ipv4 and self.public_ipv4 != address.address: self.warn("static IP Address {0} assigned to this machine has unexpectely " "changed from {1} to {2} most likely due to being redeployed" .format(self.ipAddress, self.public_ipv4, address.address) ) self.ipAddress = None except libcloud.common.google.ResourceNotFoundError: self.warn("static IP Address resource {0} used by this machine has been destroyed; " "it is likely that the machine is still holding the address itself ({1}) " "and this is your last chance to reclaim it before it gets " "lost in a reboot".format(self.ipAddress, self.public_ipv4) ) self.handle_changed_property('tags', sorted(node.extra['tags'])) actual_metadata = { i['key']: i['value'] for i in node.extra['metadata'].get('items', []) if i['key'] not in [ 'ssh_host_{0}_key'.format(self.host_key_type), 'sshKeys', 'ssh_host_{0}_key_pub'.format(self.host_key_type)] } self.handle_changed_property('metadata', actual_metadata) self.handle_changed_property('automatic_restart', node.extra['scheduling']["automaticRestart"]) self.handle_changed_property('on_host_maintenance', node.extra['scheduling']["onHostMaintenance"]) attached_disk_names = [d.get("deviceName", None) for d in node.extra['disks'] ] # check that all disks are attached for k, v in self.block_device_mapping.iteritems(): disk_name = v['disk_name'] or v['disk'] is_attached = disk_name in attached_disk_names if not is_attached and not v.get('needsAttach', False): self.warn("disk {0} seems to have been detached behind our back; will reattach...".format(disk_name)) v['needsAttach'] = True self.update_block_device_mapping(k, v) if is_attached and v.get('needsAttach', False): self.warn("disk {0} seems to have been attached for us; thank you, mr. Elusive Bug!".format(disk_name)) del v['needsAttach'] self.update_block_device_mapping(k, v) # check that no extra disks are attached defn_disk_names = [v['disk_name'] or v['disk'] for k,v in defn.block_device_mapping.iteritems()] state_disk_names = [v['disk_name'] or v['disk'] for k,v in self.block_device_mapping.iteritems()] unexpected_disks = list( set(attached_disk_names) - set(defn_disk_names) - set(state_disk_names) ) if unexpected_disks: self.warn("unexpected disk(s) {0} are attached to this instance; " "not fixing this just in case".format(unexpected_disks)) else: self.warn_not_supposed_to_exist(valuable_data = True) self.confirm_destroy(node, self.full_name) except libcloud.common.google.ResourceNotFoundError: if self.vm_id: self.warn("the instance seems to have been destroyed behind our back") if not allow_recreate: raise Exception("use --allow-recreate to fix") self._node_deleted() # check that the disks that should exist do exist # and that the disks we expected to create don't exist yet for k,v in defn.block_device_mapping.iteritems(): disk_name = v['disk_name'] or v['disk'] try: disk = self.connect().ex_get_volume(disk_name, v.get('region', None) ) if k not in self.block_device_mapping and v['disk_name']: self.warn_not_supposed_to_exist(resource_name = disk_name, valuable_data = True) self.confirm_destroy(disk, disk_name) except libcloud.common.google.ResourceNotFoundError: if v['disk']: raise Exception("external disk '{0}' is required but doesn't exist".format(disk_name)) if k in self.block_device_mapping and v['disk_name']: self.warn("disk '{0}' is supposed to exist, but is missing; will recreate...".format(disk_name)) self.update_block_device_mapping(k, None) # create missing disks for k, v in defn.block_device_mapping.iteritems(): if k in self.block_device_mapping: continue if v['disk'] is None: extra_msg = ( " from snapshot '{0}'".format(v['snapshot']) if v['snapshot'] else " from image '{0}'".format(v['image']) if v['image'] else "" ) self.log("creating GCE disk of {0} GiB{1}..." .format(v['size'] if v['size'] else "auto", extra_msg)) v['region'] = defn.region try: self.connect().create_volume(v['size'], v['disk_name'], v['region'], snapshot=v['snapshot'], image=v['image'], ex_disk_type="pd-" + v.get('type', 'standard'), use_existing=False) except AttributeError: # libcloud bug: The region we're trying to create the disk # in doesn't exist. raise Exception("tried creating a disk in nonexistent " "region %r" % v['region']) except libcloud.common.google.ResourceExistsError: raise Exception("tried creating a disk that already exists; " "please run 'deploy --check' to fix this") v['needsAttach'] = True self.update_block_device_mapping(k, v) if self.vm_id: if self.instance_type != defn.instance_type: recreate = True self.warn("change of the instance type requires a reboot") if self.network != defn.network: recreate = True self.warn("change of the network requires a reboot") if self.email != defn.email or self.scopes != defn.scopes: recreate = True self.warn('change of service account requires a reboot') for k, v in self.block_device_mapping.iteritems(): defn_v = defn.block_device_mapping.get(k, None) if defn_v and not v.get('needsAttach', False): if v['bootDisk'] != defn_v['bootDisk']: recreate = True self.warn("change of the boot disk requires a reboot") if v['readOnly'] != defn_v['readOnly']: recreate = True self.warn("remounting disk as ro/rw requires a reboot") if recreate: if not allow_reboot: raise Exception("reboot is required for the requested changes; please run with --allow-reboot") self.stop() self.create_node(defn) if self.node().state == NodeState.STOPPED: self.start()
def create_node(self, defn): if not self.vm_id: self.log("creating {0}...".format(self.full_name)) boot_disk = next((v for k, v in defn.block_device_mapping.iteritems() if v.get("bootDisk", False)), None) if not boot_disk: raise Exception("no boot disk found for {0}".format(self.full_name)) try: node = self.connect().create_node( self.machine_name, defn.instance_type, "none", location=self.connect().ex_get_zone(defn.region), ex_boot_disk=self.connect().ex_get_volume( boot_disk["disk_name"] or boot_disk["disk"], boot_disk.get("region", None) ), ex_metadata=self.full_metadata(defn.metadata), ex_tags=defn.tags, external_ip=(self.connect().ex_get_address(defn.ipAddress) if defn.ipAddress else "ephemeral"), ex_network=(defn.network if defn.network else "default"), ) except libcloud.common.google.ResourceExistsError: raise Exception( "tried creating an instance that already exists; " "please run 'deploy --check' to fix this" ) self.vm_id = self.machine_name self.state = self.STARTING self.ssh_pinged = False self.copy_properties(defn) self.public_ipv4 = node.public_ips[0] self.log("got public IP: {0}".format(self.public_ipv4)) known_hosts.add(self.public_ipv4, self.public_host_key) self.private_ipv4 = node.private_ips[0] for k, v in self.block_device_mapping.iteritems(): v["needsAttach"] = True self.update_block_device_mapping(k, v) # set scheduling config here instead of triggering an update using None values # because we might be called with defn = self, thus modifying self would ruin defn self.connect().ex_set_node_scheduling( node, automatic_restart=defn.automatic_restart, on_host_maintenance=defn.on_host_maintenance ) self.automatic_restart = defn.automatic_restart self.on_host_maintenance = defn.on_host_maintenance # Attach missing volumes for k, v in self.block_device_mapping.items(): defn_v = defn.block_device_mapping.get(k, None) if v.get("needsAttach", False) and defn_v: disk_name = v["disk_name"] or v["disk"] disk_region = v.get("region", None) v["readOnly"] = defn_v["readOnly"] v["bootDisk"] = defn_v["bootDisk"] v["deleteOnTermination"] = defn_v["deleteOnTermination"] v["passphrase"] = defn_v["passphrase"] self.log("attaching GCE disk '{0}'...".format(disk_name)) if not v.get("bootDisk", False): self.connect().attach_volume( self.node(), self.connect().ex_get_volume(disk_name, disk_region), device=disk_name, ex_mode=("READ_ONLY" if v["readOnly"] else "READ_WRITE"), ) del v["needsAttach"] self.update_block_device_mapping(k, v) # generate LUKS key if the model didn't specify one if v.get("encrypt", False) and v.get("passphrase", "") == "" and v.get("generatedKey", "") == "": v["generatedKey"] = generate_random_string(length=256) self.update_block_device_mapping(k, v) if self.metadata != defn.metadata: self.log("setting new metadata values") node = self.node() meta = self.gen_metadata(self.full_metadata(defn.metadata)) request = "/zones/%s/instances/%s/setMetadata" % (node.extra["zone"].name, node.name) metadata_data = {} metadata_data["items"] = meta["items"] metadata_data["kind"] = meta["kind"] metadata_data["fingerprint"] = node.extra["metadata"]["fingerprint"] self.connect().connection.async_request(request, method="POST", data=metadata_data) self.metadata = defn.metadata if self.tags != defn.tags: self.log("updating tags") self.connect().ex_set_node_tags(self.node(), defn.tags) self.tags = defn.tags if self.public_ipv4 and self.ipAddress != defn.ipAddress: self.log("detaching old public IP address {0}".format(self.public_ipv4)) self.connect().connection.async_request( "/zones/{0}/instances/{1}/deleteAccessConfig?accessConfig=External+NAT&networkInterface=nic0".format( self.region, self.machine_name ), method="POST", ) self.public_ipv4 = None self.ipAddress = None if self.public_ipv4 is None: self.log("attaching public IP address {0}".format(defn.ipAddress or "[Ephemeral]")) self.connect().connection.async_request( "/zones/{0}/instances/{1}/addAccessConfig?networkInterface=nic0".format(self.region, self.machine_name), method="POST", data={ "kind": "compute#accessConfig", "type": "ONE_TO_ONE_NAT", "name": "External NAT", "natIP": self.connect().ex_get_address(defn.ipAddress).address if defn.ipAddress else None, }, ) self.ipAddress = defn.ipAddress self.public_ipv4 = self.node().public_ips[0] self.log("got public IP: {0}".format(self.public_ipv4)) known_hosts.add(self.public_ipv4, self.public_host_key) self.ssh.reset() self.ssh_pinged = False if self.automatic_restart != defn.automatic_restart or self.on_host_maintenance != defn.on_host_maintenance: self.log("setting scheduling configuration") self.connect().ex_set_node_scheduling( self.node(), automatic_restart=defn.automatic_restart, on_host_maintenance=defn.on_host_maintenance ) self.automatic_restart = defn.automatic_restart self.on_host_maintenance = defn.on_host_maintenance
def create(self, defn, check, allow_reboot, allow_recreate): assert isinstance(defn, GCEDefinition) self.no_project_change(defn) self.no_region_change(defn) self.no_change(self.machine_name != defn.machine_name, "instance name") self.set_common_state(defn) self.copy_credentials(defn) self.machine_name = defn.machine_name self.region = defn.region if not self.public_client_key: (private, public) = create_key_pair() self.public_client_key = public self.private_client_key = private if not self.public_host_key: (private, public) = create_key_pair(type="ecdsa") self.public_host_key = public self.private_host_key = private recreate = False if check: try: node = self.node() if self.vm_id: if node.state == NodeState.TERMINATED: recreate = True self.warn("the instance is terminated and needs a reboot") self.state = self.STOPPED self.handle_changed_property("region", node.extra["zone"].name, can_fix=False) # a bit hacky but should work network_name = node.extra["networkInterfaces"][0]["network"].split("/")[-1] if network_name == "default": network_name = None self.handle_changed_property("network", network_name) self.handle_changed_property("instance_type", node.size) self.handle_changed_property( "public_ipv4", node.public_ips[0] if node.public_ips else None, property_name="public IP address", ) if self.public_ipv4: known_hosts.add(self.public_ipv4, self.public_host_key) self.handle_changed_property( "private_ipv4", node.private_ips[0] if node.private_ips else None, property_name="private IP address", ) if self.ipAddress: try: address = self.connect().ex_get_address(self.ipAddress) if self.public_ipv4 and self.public_ipv4 != address.address: self.warn( "static IP Address {0} assigned to this machine has unexpectely " "changed from {1} to {2} most likely due to being redeployed".format( self.ipAddress, self.public_ipv4, address.address ) ) self.ipAddress = None except libcloud.common.google.ResourceNotFoundError: self.warn( "static IP Address resource {0} used by this machine has been destroyed; " "it is likely that the machine is still holding the address itself ({1}) " "and this is your last chance to reclaim it before it gets " "lost in a reboot".format(self.ipAddress, self.public_ipv4) ) self.handle_changed_property("tags", sorted(node.extra["tags"])) actual_metadata = { i["key"]: i["value"] for i in node.extra["metadata"].get("items", []) if i["key"] not in ["ssh_host_ecdsa_key", "sshKeys", "ssh_host_ecdsa_key_pub"] } self.handle_changed_property("metadata", actual_metadata) self.handle_changed_property("automatic_restart", node.extra["scheduling"]["automaticRestart"]) self.handle_changed_property("on_host_maintenance", node.extra["scheduling"]["onHostMaintenance"]) attached_disk_names = [d.get("deviceName", None) for d in node.extra["disks"]] # check that all disks are attached for k, v in self.block_device_mapping.iteritems(): disk_name = v["disk_name"] or v["disk"] is_attached = disk_name in attached_disk_names if not is_attached and not v.get("needsAttach", False): self.warn( "disk {0} seems to have been detached behind our back; will reattach...".format( disk_name ) ) v["needsAttach"] = True self.update_block_device_mapping(k, v) if is_attached and v.get("needsAttach", False): self.warn( "disk {0} seems to have been attached for us; thank you, mr. Elusive Bug!".format( disk_name ) ) del v["needsAttach"] self.update_block_device_mapping(k, v) # check that no extra disks are attached defn_disk_names = [v["disk_name"] or v["disk"] for k, v in defn.block_device_mapping.iteritems()] state_disk_names = [v["disk_name"] or v["disk"] for k, v in self.block_device_mapping.iteritems()] unexpected_disks = list(set(attached_disk_names) - set(defn_disk_names) - set(state_disk_names)) if unexpected_disks: self.warn( "unexpected disk(s) {0} are attached to this instance; " "not fixing this just in case".format(unexpected_disks) ) else: self.warn_not_supposed_to_exist(valuable_data=True) self.confirm_destroy(node, self.full_name) except libcloud.common.google.ResourceNotFoundError: if self.vm_id: self.warn("the instance seems to have been destroyed behind our back") if not allow_recreate: raise Exception("use --allow-recreate to fix") self._node_deleted() # check that the disks that should exist do exist # and that the disks we expected to create don't exist yet for k, v in defn.block_device_mapping.iteritems(): disk_name = v["disk_name"] or v["disk"] try: disk = self.connect().ex_get_volume(disk_name, v.get("region", None)) if k not in self.block_device_mapping and v["disk_name"]: self.warn_not_supposed_to_exist(resource_name=disk_name, valuable_data=True) self.confirm_destroy(disk, disk_name) except libcloud.common.google.ResourceNotFoundError: if v["disk"]: raise Exception("external disk '{0}' is required but doesn't exist".format(disk_name)) if k in self.block_device_mapping and v["disk_name"]: self.warn("disk '{0}' is supposed to exist, but is missing; will recreate...".format(disk_name)) self.update_block_device_mapping(k, None) # create missing disks for k, v in defn.block_device_mapping.iteritems(): if k in self.block_device_mapping: continue if v["disk"] is None: extra_msg = ( " from snapshot '{0}'".format(v["snapshot"]) if v["snapshot"] else " from image '{0}'".format(v["image"]) if v["image"] else "" ) self.log("creating GCE disk of {0} GiB{1}...".format(v["size"] if v["size"] else "auto", extra_msg)) v["region"] = defn.region try: self.connect().create_volume( v["size"], v["disk_name"], v["region"], snapshot=v["snapshot"], image=v["image"], ex_disk_type="pd-" + v.get("type", "standard"), use_existing=False, ) except libcloud.common.google.ResourceExistsError: raise Exception( "tried creating a disk that already exists; " "please run 'deploy --check' to fix this" ) v["needsAttach"] = True self.update_block_device_mapping(k, v) if self.vm_id: if self.instance_type != defn.instance_type: recreate = True self.warn("change of the instance type requires a reboot") if self.network != defn.network: recreate = True self.warn("change of the network requires a reboot") for k, v in self.block_device_mapping.iteritems(): defn_v = defn.block_device_mapping.get(k, None) if defn_v and not v.get("needsAttach", False): if v["bootDisk"] != defn_v["bootDisk"]: recreate = True self.warn("change of the boot disk requires a reboot") if v["readOnly"] != defn_v["readOnly"]: recreate = True self.warn("remounting disk as ro/rw requires a reboot") if recreate: if not allow_reboot: raise Exception("reboot is required for the requested changes; please run with --allow-reboot") self.stop() self.create_node(defn)