Beispiel #1
0
    def _create_instance(self, defn) -> None:
        if not self.public_client_key:
            (private, public) = create_key_pair(type="ed25519")
            self.public_client_key = public
            self.private_client_key = private

        if not self.public_host_key:
            (private, public) = create_key_pair(type="ed25519")
            self.public_host_key = public
            self.private_host_key = private

        location: BoundLocation = self.get_client().locations.get_by_name(
            defn.location)

        ssh_keys: List[BoundSSHKey] = [
            self._create_ssh_key(self.public_client_key)
        ]

        # Ensure host keys get injected into the base OS
        user_data = ("#cloud-config\n"
                     "ssh_keys:\n"
                     "  ed25519_public: {0}\n"
                     "  ed25519_private: |\n"
                     "    {1}").format(
                         self.public_host_key,
                         self.private_host_key.replace("\n", "\n    "))

        self.logger.log_start(
            f"creating {defn.server_type} server at {location.description}...")
        response = self.get_client().servers.create(
            name=defn.server_name,
            labels={
                **self.get_common_labels(),
                **dict(defn.labels)
            },
            location=location,
            server_type=ServerType(defn.server_type),
            ssh_keys=ssh_keys,
            user_data=user_data,
            image=Image(name="ubuntu-20.04"),  # for lustration
            start_after_create=True,
        )

        self.state = self.STARTING
        self.wait_on_action(response.action)

        with self.depl._db:
            self.vm_id = response.server.id
            self.public_ipv4 = response.server.public_net.ipv4.ip
            self.public_ipv6 = response.server.public_net.ipv6.ip
            self.server_name = defn.server_name
            self.server_type = defn.server_type
            self.legacy_if_scheme = defn.server_type.startswith("cx")
            self.location = defn.location
            self.labels = dict(defn.labels)
            self.private_host_key = None

        known_hosts.add(self.public_ipv4, self.public_host_key)
        self.logger.log_end(f"{self.public_ipv4}")
Beispiel #2
0
    def _install_base_system(self):
        self.log_start("creating missing directories... ")
        cmds = ["mkdir -m 1777 -p /mnt/tmp /mnt/nix/store"]
        mntdirs = [
            "var", "etc", "bin", "nix/var/nix/gcroots",
            "nix/var/nix/temproots", "nix/var/nix/manifests",
            "nix/var/nix/userpool", "nix/var/nix/profiles", "nix/var/nix/db",
            "nix/var/log/nix/drvs"
        ]
        to_create = ' '.join(map(lambda d: os.path.join("/mnt", d), mntdirs))
        cmds.append("mkdir -m 0755 -p {0}".format(to_create))
        self.run_command(' && '.join(cmds))
        self.log_end("done.")

        self.log_start("bind-mounting files in /etc... ")
        for etcfile in ("resolv.conf", "passwd", "group"):
            self.log_continue("{0}...".format(etcfile))
            cmd = ("if ! test -e /mnt/etc/{0}; then"
                   " touch /mnt/etc/{0} && mount --bind /etc/{0} /mnt/etc/{0};"
                   " fi").format(etcfile)
            self.run_command(cmd)
        self.log_end("done.")

        self.run_command("touch /mnt/etc/NIXOS")
        self.run_command("activate-remote")

        self.main_ssh_private_key, self.main_ssh_public_key = create_key_pair(
            key_name="NixOps client key of {0}".format(self.name))
        self._gen_network_spec()
Beispiel #3
0
    def _install_base_system(self):
        self.log_start("creating missing directories... ")
        cmds = ["mkdir -m 1777 -p /mnt/tmp /mnt/nix/store"]
        mntdirs = ["var", "etc", "bin", "nix/var/nix/gcroots",
                   "nix/var/nix/temproots", "nix/var/nix/manifests",
                   "nix/var/nix/userpool", "nix/var/nix/profiles",
                   "nix/var/nix/db", "nix/var/log/nix/drvs"]
        to_create = ' '.join(map(lambda d: os.path.join("/mnt", d), mntdirs))
        cmds.append("mkdir -m 0755 -p {0}".format(to_create))
        self.run_command(' && '.join(cmds))
        self.log_end("done.")

        self.log_start("bind-mounting files in /etc... ")
        for etcfile in ("resolv.conf", "passwd", "group"):
            self.log_continue("{0}...".format(etcfile))
            cmd = ("if ! test -e /mnt/etc/{0}; then"
                   " touch /mnt/etc/{0} && mount --bind /etc/{0} /mnt/etc/{0};"
                   " fi").format(etcfile)
            self.run_command(cmd)
        self.log_end("done.")

        self.run_command("touch /mnt/etc/NIXOS")
        self.run_command("activate-remote")

        self.main_ssh_private_key, self.main_ssh_public_key = create_key_pair(
            key_name="NixOps client key of {0}".format(self.name)
        )
        self._gen_network_spec()
Beispiel #4
0
 def _install_main_ssh_keys(self):
     """
     Create a SSH private/public keypair and put the public key into the
     chroot.
     """
     private, public = create_key_pair(
         key_name="NixOps client key of {0}".format(self.name))
     self.main_ssh_private_key, self.main_ssh_public_key = private, public
     res = self.run_command(
         "umask 077 && mkdir -p /mnt/root/.ssh &&"
         " cat > /mnt/root/.ssh/authorized_keys",
         stdin_string=public)
Beispiel #5
0
    def create(self, defn, check, allow_reboot, allow_recreate):
        assert isinstance(defn, NoneDefinition)
        self.set_common_state(defn)
        self.target_host = defn._target_host

        if not self.vm_id:
            self.log_start("generating new SSH keypair...")
            key_name = "NixOps client key for {0}".format(self.name)
            self._ssh_private_key, self._ssh_public_key = \
                create_key_pair(key_name=key_name)
            self.log_end("done.")
            self.vm_id = "nixops-{0}-{1}".format(self.depl.uuid, self.name)
Beispiel #6
0
 def _install_main_ssh_keys(self):
     """
     Create a SSH private/public keypair and put the public key into the
     chroot.
     """
     private, public = create_key_pair(
         key_name="NixOps client key of {0}".format(self.name)
     )
     self.main_ssh_private_key, self.main_ssh_public_key = private, public
     res = self.run_command("umask 077 && mkdir -p /mnt/root/.ssh &&"
                            " cat > /mnt/root/.ssh/authorized_keys",
                            stdin_string=public)
Beispiel #7
0
    def create(self, defn, check, allow_reboot, allow_recreate):
        assert isinstance(defn, NoneDefinition)
        self.set_common_state(defn)
        self.target_host = defn._target_host

        if not self.vm_id:
            self.log_start("generating new SSH keypair...")
            key_name = "NixOps client key for {0}".format(self.name)
            self._ssh_private_key, self._ssh_public_key = \
                create_key_pair(key_name=key_name)
            self.log_end("done.")
            self.vm_id = "nixops-{0}-{1}".format(self.depl.uuid, self.name)
Beispiel #8
0
    def create(
        self,
        defn: NoneDefinition,
        check: bool,
        allow_reboot: bool,
        allow_recreate: bool,
    ) -> None:
        assert isinstance(defn, NoneDefinition)
        self.set_common_state(defn)
        self.target_host = defn._target_host
        self.public_ipv4 = defn._public_ipv4

        if not self.vm_id:
            if self.provision_ssh_key:
                self.logger.log_start("generating new SSH key pair... ")
                key_name = "NixOps client key for {0}".format(self.name)
                self._ssh_private_key, self._ssh_public_key = create_key_pair(
                    key_name=key_name
                )

            self.logger.log_end("done")
            self.vm_id = "nixops-{0}-{1}".format(self.depl.uuid, self.name)
Beispiel #9
0
    def create(self, defn, check, allow_reboot, allow_recreate):
        assert isinstance(defn, GCEDefinition)

        self.no_project_change(defn)
        self.no_region_change(defn)
        self.no_change(self.machine_name != defn.machine_name, "instance name")

        self.set_common_state(defn)
        self.copy_credentials(defn)
        self.machine_name = defn.machine_name
        self.region = defn.region

        if not self.public_client_key:
            (private, public) = create_key_pair()
            self.public_client_key = public
            self.private_client_key = private

        self.host_key_type = "ed25519" if self.state_version != "14.12" and nixops.util.parse_nixos_version(
            defn.config["nixosRelease"]) >= ["15", "09"] else "ecdsa"

        if not self.public_host_key:
            (private, public) = create_key_pair(type=self.host_key_type)
            self.public_host_key = public
            self.private_host_key = private

        recreate = False

        if check:
            try:
                node = self.node()
                if self.vm_id:

                    if node.state == NodeState.TERMINATED:
                        recreate = True
                        self.warn(
                            "the instance is terminated and needs a reboot")
                        self.state = self.STOPPED

                    self.handle_changed_property('region',
                                                 node.extra['zone'].name,
                                                 can_fix=False)

                    # a bit hacky but should work
                    network_name = node.extra['networkInterfaces'][0][
                        'network'].split('/')[-1]
                    if network_name == 'default': network_name = None
                    self.handle_changed_property('network', network_name)

                    self.handle_changed_property('instance_type', node.size)
                    self.handle_changed_property(
                        'public_ipv4',
                        node.public_ips[0] if node.public_ips else None,
                        property_name='public IP address')
                    if self.public_ipv4:
                        known_hosts.add(self.public_ipv4, self.public_host_key)

                    self.handle_changed_property(
                        'private_ipv4',
                        node.private_ips[0] if node.private_ips else None,
                        property_name='private IP address')

                    if self.ipAddress:
                        try:
                            address = self.connect().ex_get_address(
                                self.ipAddress)
                            if self.public_ipv4 and self.public_ipv4 != address.address:
                                self.warn(
                                    "static IP Address {0} assigned to this machine has unexpectely "
                                    "changed from {1} to {2} most likely due to being redeployed"
                                    .format(self.ipAddress, self.public_ipv4,
                                            address.address))
                                self.ipAddress = None

                        except libcloud.common.google.ResourceNotFoundError:
                            self.warn(
                                "static IP Address resource {0} used by this machine has been destroyed; "
                                "it is likely that the machine is still holding the address itself ({1}) "
                                "and this is your last chance to reclaim it before it gets "
                                "lost in a reboot".format(
                                    self.ipAddress, self.public_ipv4))

                    self.handle_changed_property('tags',
                                                 sorted(node.extra['tags']))

                    actual_metadata = {
                        i['key']: i['value']
                        for i in node.extra['metadata'].get('items', [])
                        if i['key'] not in [
                            'ssh_host_{0}_key'.format(self.host_key_type),
                            'sshKeys', 'ssh_host_{0}_key_pub'.format(
                                self.host_key_type)
                        ]
                    }
                    self.handle_changed_property('metadata', actual_metadata)

                    self.handle_changed_property(
                        'automatic_restart',
                        node.extra['scheduling']["automaticRestart"])
                    self.handle_changed_property(
                        'on_host_maintenance',
                        node.extra['scheduling']["onHostMaintenance"])

                    attached_disk_names = [
                        d.get("deviceName", None) for d in node.extra['disks']
                    ]
                    # check that all disks are attached
                    for k, v in self.block_device_mapping.iteritems():
                        disk_name = v['disk_name'] or v['disk']
                        is_attached = disk_name in attached_disk_names
                        if not is_attached and not v.get('needsAttach', False):
                            self.warn(
                                "disk {0} seems to have been detached behind our back; will reattach..."
                                .format(disk_name))
                            v['needsAttach'] = True
                            self.update_block_device_mapping(k, v)
                        if is_attached and v.get('needsAttach', False):
                            self.warn(
                                "disk {0} seems to have been attached for us; thank you, mr. Elusive Bug!"
                                .format(disk_name))
                            del v['needsAttach']
                            self.update_block_device_mapping(k, v)

                    # check that no extra disks are attached
                    defn_disk_names = [
                        v['disk_name'] or v['disk']
                        for k, v in defn.block_device_mapping.iteritems()
                    ]
                    state_disk_names = [
                        v['disk_name'] or v['disk']
                        for k, v in self.block_device_mapping.iteritems()
                    ]
                    unexpected_disks = list(
                        set(attached_disk_names) - set(defn_disk_names) -
                        set(state_disk_names))
                    if unexpected_disks:
                        self.warn(
                            "unexpected disk(s) {0} are attached to this instance; "
                            "not fixing this just in case".format(
                                unexpected_disks))
                else:
                    self.warn_not_supposed_to_exist(valuable_data=True)
                    self.confirm_destroy(node, self.full_name)

            except libcloud.common.google.ResourceNotFoundError:
                if self.vm_id:
                    self.warn(
                        "the instance seems to have been destroyed behind our back"
                    )
                    if not allow_recreate:
                        raise Exception("use --allow-recreate to fix")
                    self._node_deleted()

            # check that the disks that should exist do exist
            # and that the disks we expected to create don't exist yet
            for k, v in defn.block_device_mapping.iteritems():
                disk_name = v['disk_name'] or v['disk']
                try:
                    disk = self.connect().ex_get_volume(
                        disk_name, v.get('region', None))
                    if k not in self.block_device_mapping and v['disk_name']:
                        self.warn_not_supposed_to_exist(
                            resource_name=disk_name, valuable_data=True)
                        self.confirm_destroy(disk, disk_name)

                except libcloud.common.google.ResourceNotFoundError:
                    if v['disk']:
                        raise Exception(
                            "external disk '{0}' is required but doesn't exist"
                            .format(disk_name))
                    if k in self.block_device_mapping and v['disk_name']:
                        self.warn(
                            "disk '{0}' is supposed to exist, but is missing; will recreate..."
                            .format(disk_name))
                        self.update_block_device_mapping(k, None)

        # create missing disks
        for k, v in defn.block_device_mapping.iteritems():
            if k in self.block_device_mapping: continue
            if v['disk'] is None:
                extra_msg = (" from snapshot '{0}'".format(v['snapshot'])
                             if v['snapshot'] else " from image '{0}'".format(
                                 v['image']) if v['image'] else "")
                self.log("creating GCE disk of {0} GiB{1}...".format(
                    v['size'] if v['size'] else "auto", extra_msg))
                v['region'] = defn.region
                try:
                    self.connect().create_volume(v['size'],
                                                 v['disk_name'],
                                                 v['region'],
                                                 snapshot=v['snapshot'],
                                                 image=v['image'],
                                                 ex_disk_type="pd-" +
                                                 v.get('type', 'standard'),
                                                 use_existing=False)
                except libcloud.common.google.ResourceExistsError:
                    raise Exception(
                        "tried creating a disk that already exists; "
                        "please run 'deploy --check' to fix this")
            v['needsAttach'] = True
            self.update_block_device_mapping(k, v)

        if self.vm_id:
            if self.instance_type != defn.instance_type:
                recreate = True
                self.warn("change of the instance type requires a reboot")

            if self.network != defn.network:
                recreate = True
                self.warn("change of the network requires a reboot")

            if self.email != defn.email or self.scopes != defn.scopes:
                recreate = True
                self.warn('change of service account requires a reboot')

            for k, v in self.block_device_mapping.iteritems():
                defn_v = defn.block_device_mapping.get(k, None)
                if defn_v and not v.get('needsAttach', False):
                    if v['bootDisk'] != defn_v['bootDisk']:
                        recreate = True
                        self.warn("change of the boot disk requires a reboot")
                    if v['readOnly'] != defn_v['readOnly']:
                        recreate = True
                        self.warn("remounting disk as ro/rw requires a reboot")

        if recreate:
            if not allow_reboot:
                raise Exception(
                    "reboot is required for the requested changes; please run with --allow-reboot"
                )
            self.stop()

        self.create_node(defn)
        if self.node().state == NodeState.STOPPED:
            self.start()
Beispiel #10
0
    def create(self, defn, check, allow_reboot, allow_recreate):
        assert isinstance(defn, GCEDefinition)

        self.no_project_change(defn)
        self.no_region_change(defn)
        self.no_change(self.machine_name != defn.machine_name, "instance name")

        self.set_common_state(defn)
        self.copy_credentials(defn)
        self.machine_name = defn.machine_name
        self.region = defn.region

        if not self.public_client_key:
            (private, public) = create_key_pair()
            self.public_client_key = public
            self.private_client_key = private

        self.host_key_type = "ed25519" if self.state_version != "14.12" and nixops.util.parse_nixos_version(defn.config["nixosRelease"]) >= ["15", "09"] else "ecdsa"

        if not self.public_host_key:
            (private, public) = create_key_pair(type=self.host_key_type)
            self.public_host_key = public
            self.private_host_key = private

        recreate = False

        if check:
            try:
                node = self.node()
                if self.vm_id:

                    if node.state == NodeState.TERMINATED:
                        recreate = True
                        self.warn("the instance is terminated and needs a reboot")
                        self.state = self.STOPPED

                    self.handle_changed_property('region', node.extra['zone'].name, can_fix = False)
                    self.handle_changed_property('preemptible', node.extra['scheduling']['preemptible'], can_fix = False)

                    # a bit hacky but should work
                    network_name = node.extra['networkInterfaces'][0]['network'].split('/')[-1]
                    if network_name == 'default': network_name = None
                    self.handle_changed_property('network', network_name)

                    self.handle_changed_property('instance_type', node.size)
                    self.handle_changed_property('public_ipv4',
                                                 node.public_ips[0] if node.public_ips else None,
                                                 property_name = 'public IP address')
                    if self.public_ipv4:
                        known_hosts.add(self.public_ipv4, self.public_host_key)

                    self.handle_changed_property('private_ipv4',
                                                 node.private_ips[0] if node.private_ips else None,
                                                 property_name = 'private IP address')

                    if self.ipAddress:
                        try:
                            address = self.connect().ex_get_address(self.ipAddress)
                            if self.public_ipv4 and self.public_ipv4 != address.address:
                                self.warn("static IP Address {0} assigned to this machine has unexpectely "
                                          "changed from {1} to {2} most likely due to being redeployed"
                                          .format(self.ipAddress, self.public_ipv4, address.address) )
                                self.ipAddress = None

                        except libcloud.common.google.ResourceNotFoundError:
                            self.warn("static IP Address resource {0} used by this machine has been destroyed; "
                                      "it is likely that the machine is still holding the address itself ({1}) "
                                      "and this is your last chance to reclaim it before it gets "
                                      "lost in a reboot".format(self.ipAddress, self.public_ipv4) )

                    self.handle_changed_property('tags', sorted(node.extra['tags']))

                    actual_metadata = { i['key']: i['value']
                                        for i in node.extra['metadata'].get('items', [])
                                        if i['key'] not in [ 'ssh_host_{0}_key'.format(self.host_key_type), 'sshKeys',
                                                             'ssh_host_{0}_key_pub'.format(self.host_key_type)] }
                    self.handle_changed_property('metadata', actual_metadata)

                    self.handle_changed_property('automatic_restart',
                                                 node.extra['scheduling']["automaticRestart"])
                    self.handle_changed_property('on_host_maintenance',
                                                 node.extra['scheduling']["onHostMaintenance"])

                    attached_disk_names = [d.get("deviceName", None) for d in node.extra['disks'] ]
                    # check that all disks are attached
                    for k, v in self.block_device_mapping.iteritems():
                        disk_name = v['disk_name'] or v['disk']
                        is_attached = disk_name in attached_disk_names
                        if not is_attached  and not v.get('needsAttach', False):
                            self.warn("disk {0} seems to have been detached behind our back; will reattach...".format(disk_name))
                            v['needsAttach'] = True
                            self.update_block_device_mapping(k, v)
                        if is_attached and v.get('needsAttach', False):
                            self.warn("disk {0} seems to have been attached for us; thank you, mr. Elusive Bug!".format(disk_name))
                            del v['needsAttach']
                            self.update_block_device_mapping(k, v)

                    # check that no extra disks are attached
                    defn_disk_names  = [v['disk_name'] or v['disk'] for k,v in defn.block_device_mapping.iteritems()]
                    state_disk_names = [v['disk_name'] or v['disk'] for k,v in self.block_device_mapping.iteritems()]
                    unexpected_disks = list( set(attached_disk_names) - set(defn_disk_names) - set(state_disk_names) )
                    if unexpected_disks:
                        self.warn("unexpected disk(s) {0} are attached to this instance; "
                                  "not fixing this just in case".format(unexpected_disks))
                else:
                    self.warn_not_supposed_to_exist(valuable_data = True)
                    self.confirm_destroy(node, self.full_name)

            except libcloud.common.google.ResourceNotFoundError:
                if self.vm_id:
                    self.warn("the instance seems to have been destroyed behind our back")
                    if not allow_recreate: raise Exception("use --allow-recreate to fix")
                    self._node_deleted()

            # check that the disks that should exist do exist
            # and that the disks we expected to create don't exist yet
            for k,v in defn.block_device_mapping.iteritems():
                disk_name = v['disk_name'] or v['disk']
                try:
                    disk = self.connect().ex_get_volume(disk_name, v.get('region', None) )
                    if k not in self.block_device_mapping and v['disk_name']:
                        self.warn_not_supposed_to_exist(resource_name = disk_name, valuable_data = True)
                        self.confirm_destroy(disk, disk_name)

                except libcloud.common.google.ResourceNotFoundError:
                    if v['disk']:
                        raise Exception("external disk '{0}' is required but doesn't exist".format(disk_name))
                    if k in self.block_device_mapping and v['disk_name']:
                        self.warn("disk '{0}' is supposed to exist, but is missing; will recreate...".format(disk_name))
                        self.update_block_device_mapping(k, None)

        # create missing disks
        for k, v in defn.block_device_mapping.iteritems():
            if k in self.block_device_mapping: continue
            if v['disk'] is None:
                extra_msg = ( " from snapshot '{0}'".format(v['snapshot']) if v['snapshot']
                         else " from image '{0}'".format(v['image'])       if v['image']
                         else "" )
                self.log("creating GCE disk of {0} GiB{1}..."
                         .format(v['size'] if v['size'] else "auto", extra_msg))
                v['region'] = defn.region
                try:
                    self.connect().create_volume(v['size'], v['disk_name'], v['region'],
                                                snapshot=v['snapshot'], image=v['image'],
                                                ex_disk_type="pd-" + v.get('type', 'standard'),
                                                use_existing=False)
                except AttributeError:
                    # libcloud bug: The region we're trying to create the disk
                    # in doesn't exist.
                    raise Exception("tried creating a disk in nonexistent "
                                    "region %r" % v['region'])
                except libcloud.common.google.ResourceExistsError:
                    raise Exception("tried creating a disk that already exists; "
                                    "please run 'deploy --check' to fix this")
            v['needsAttach'] = True
            self.update_block_device_mapping(k, v)

        if self.vm_id:
            if self.instance_type != defn.instance_type:
                recreate = True
                self.warn("change of the instance type requires a reboot")

            if self.network != defn.network:
                recreate = True
                self.warn("change of the network requires a reboot")

            if self.email != defn.email or self.scopes != defn.scopes:
                recreate = True
                self.warn('change of service account requires a reboot')

            for k, v in self.block_device_mapping.iteritems():
                defn_v = defn.block_device_mapping.get(k, None)
                if defn_v and not v.get('needsAttach', False):
                    if v['bootDisk'] != defn_v['bootDisk']:
                        recreate = True
                        self.warn("change of the boot disk requires a reboot")
                    if v['readOnly'] != defn_v['readOnly']:
                        recreate = True
                        self.warn("remounting disk as ro/rw requires a reboot")

        if recreate:
            if not allow_reboot:
                raise Exception("reboot is required for the requested changes; please run with --allow-reboot")
            self.stop()

        self.create_node(defn)
        if self.node().state == NodeState.STOPPED:
            self.start()
Beispiel #11
0
    def create(self, defn: HcloudDefinition, check, allow_reboot,
               allow_recreate):
        assert isinstance(defn, HcloudDefinition)
        hetzner = defn.config.hcloud
        self.token = get_access_token(hetzner)
        if self.state not in (MachineState.RESCUE, MachineState.UP) or check:
            self.check()

        self.set_common_state(defn)
        self.upgrade_disk = hetzner.upgradeDisk

        # TODO maybe bootstrap can be automated with vncdotool
        image_id = self._fetch_image_id(hetzner.image, hetzner.image_selector)
        if self.image_id is None:
            self.image_id = image_id
        elif self.image_id != image_id:
            self.warn(
                f"image_id changed from {self.image_id} to {image_id} but can't update image of a VM."
            )
        if self.location is None:
            self.location = hetzner.location
        elif self.location != hetzner.location:
            self.warn(
                f"location changed from {self.location} to {hetzner.location} but can't update location of a VM."
            )
        if self.vm_id is not None and hetzner.serverType != self.server_type:
            # TODO Check if server can be upgraded before hitting the Hetzner API
            # https://docs.hetzner.cloud/#server-actions-change-the-type-of-a-server
            do_upgrade = True
            # Only confirm if upgrade_disk is True because then the upgrade can't be undone
            if self.upgrade_disk:
                do_upgrade = self.depl.logger.confirm(
                    f"are you sure you want to change Hetzner server {self.name} type from "
                    + f"{self.server_type} to {hetzner.serverType}?")
            if do_upgrade:
                self.log_start("Changing Hetzner server type...")
                self._server.shutdown().wait_until_finished()
                self.wait_for_down(callback=lambda: self.log_continue("."))
                self._server.change_type(
                    ServerType(name=hetzner.serverType),
                    upgrade_disk=self.upgrade_disk).wait_until_finished()
                self._server.power_on()
                self.wait_for_up(callback=lambda: self.log_continue("."))
                self.log_end("")
        self.server_type = hetzner.serverType

        ssh_keys = [
            k.name if isinstance(k, ResourceEval) else k
            for k in hetzner.sshKeys
        ]
        if self.state != MachineState.MISSING and ssh_keys != self.ssh_keys:
            self.logger.warn(
                f"SSH keys cannot be changed after the server is created.")

        volume_ids = []
        filesystems = {}
        for volumeopts in hetzner.volumes:
            volume = volumeopts.volume
            if isinstance(volume, str):
                volume_model = self._client.volumes.get_by_name(volume)
                volume_name = volume
                volume_id = volume_model.id
                volume_loc = volume_model.location.name
            else:
                volume_res = self.depl.get_typed_resource(
                    volume._name, "hcloud-volume", HcloudVolumeState)
                volume_name = volume_res.name
                volume_id = volume_res.hcloud_id
                assert volume_id is not None
                volume_loc = volume_res.location
            if volume_loc != self.location:
                raise Exception(
                    f"Volume {volume_name!r} is in a different location from server {self.name!r}"
                )
            volume_ids.append(volume_id)
            if volumeopts.mountPoint is not None:
                fs = dict(volumeopts.fileSystem)
                fs["device"] = f"/dev/disk/by-id/scsi-0HC_Volume_{volume_id}"
                filesystems[volumeopts.mountPoint] = fs

        has_priv = self._ssh_private_key is not None
        has_pub = self._ssh_public_key is not None
        assert has_priv == has_pub
        if not has_priv:
            self.log("Generating SSH keypair...")
            (self._ssh_private_key, self._ssh_public_key) = create_key_pair()
        if self.vm_id:
            if self.volume_ids != volume_ids:
                current = set(self.volume_ids)
                new = set(volume_ids)
                volumes_client = self._client.volumes
                self.log_start("Updating volumes...")
                for v in current - new:
                    volumes_client.detach(Volume(id=v))
                    self.log_continue(".")
                for v in new - current:
                    volumes_client.attach(
                        Volume(id=v), self._server,
                        automount=False).wait_until_finished()
                    self.log_continue(".")
                self.log_end("")
                self.volume_ids = volume_ids
        else:
            self.log_start(
                "Creating Hetzner Cloud VM (" +
                f"image '{image_id}', type '{hetzner.serverType}', location '{hetzner.location}'"
                + ")...")
            response = self._client.servers.create(
                name=self.name,
                ssh_keys=[SSHKey(name=k) for k in ssh_keys],
                volumes=[Volume(id=v) for v in volume_ids],
                server_type=ServerType(self.server_type),
                image=Image(id=self.image_id),
                # Set labels so we can find the instance if nixops crashes before writing vm_id
                labels=dict(self._server_labels()),
                user_data=None if self._ssh_public_key is None else yaml.dump(
                    {"public-keys": [self._ssh_public_key]}),
            )
            self.log_end("")
            self.public_ipv4 = response.server.public_net.ipv4.ip
            self.log_start("waiting for SSH...")
            self.wait_for_up(callback=lambda: self.log_continue("."))
            self.log_end("")
            with self.depl._db:
                self.vm_id = response.server.id
                # TODO get state from creation response
                self.state = MachineState.STARTING
                self.ssh_keys = ssh_keys
                self.volume_ids = volume_ids
                self._detect_hardware()
                self._update_host_keys()
        self.filesystems = filesystems
Beispiel #12
0
    def create(self, defn, check, allow_reboot, allow_recreate):
        assert isinstance(defn, GCEDefinition)

        self.no_project_change(defn)
        self.no_region_change(defn)
        self.no_change(self.machine_name != defn.machine_name, "instance name")

        self.set_common_state(defn)
        self.copy_credentials(defn)
        self.machine_name = defn.machine_name
        self.region = defn.region

        if not self.public_client_key:
            (private, public) = create_key_pair()
            self.public_client_key = public
            self.private_client_key = private

        if not self.public_host_key:
            (private, public) = create_key_pair(type="ecdsa")
            self.public_host_key = public
            self.private_host_key = private

        recreate = False

        if check:
            try:
                node = self.node()
                if self.vm_id:

                    if node.state == NodeState.TERMINATED:
                        recreate = True
                        self.warn("the instance is terminated and needs a reboot")
                        self.state = self.STOPPED

                    self.handle_changed_property("region", node.extra["zone"].name, can_fix=False)

                    # a bit hacky but should work
                    network_name = node.extra["networkInterfaces"][0]["network"].split("/")[-1]
                    if network_name == "default":
                        network_name = None
                    self.handle_changed_property("network", network_name)

                    self.handle_changed_property("instance_type", node.size)
                    self.handle_changed_property(
                        "public_ipv4",
                        node.public_ips[0] if node.public_ips else None,
                        property_name="public IP address",
                    )
                    if self.public_ipv4:
                        known_hosts.add(self.public_ipv4, self.public_host_key)

                    self.handle_changed_property(
                        "private_ipv4",
                        node.private_ips[0] if node.private_ips else None,
                        property_name="private IP address",
                    )

                    if self.ipAddress:
                        try:
                            address = self.connect().ex_get_address(self.ipAddress)
                            if self.public_ipv4 and self.public_ipv4 != address.address:
                                self.warn(
                                    "static IP Address {0} assigned to this machine has unexpectely "
                                    "changed from {1} to {2} most likely due to being redeployed".format(
                                        self.ipAddress, self.public_ipv4, address.address
                                    )
                                )
                                self.ipAddress = None

                        except libcloud.common.google.ResourceNotFoundError:
                            self.warn(
                                "static IP Address resource {0} used by this machine has been destroyed; "
                                "it is likely that the machine is still holding the address itself ({1}) "
                                "and this is your last chance to reclaim it before it gets "
                                "lost in a reboot".format(self.ipAddress, self.public_ipv4)
                            )

                    self.handle_changed_property("tags", sorted(node.extra["tags"]))

                    actual_metadata = {
                        i["key"]: i["value"]
                        for i in node.extra["metadata"].get("items", [])
                        if i["key"] not in ["ssh_host_ecdsa_key", "sshKeys", "ssh_host_ecdsa_key_pub"]
                    }
                    self.handle_changed_property("metadata", actual_metadata)

                    self.handle_changed_property("automatic_restart", node.extra["scheduling"]["automaticRestart"])
                    self.handle_changed_property("on_host_maintenance", node.extra["scheduling"]["onHostMaintenance"])

                    attached_disk_names = [d.get("deviceName", None) for d in node.extra["disks"]]
                    # check that all disks are attached
                    for k, v in self.block_device_mapping.iteritems():
                        disk_name = v["disk_name"] or v["disk"]
                        is_attached = disk_name in attached_disk_names
                        if not is_attached and not v.get("needsAttach", False):
                            self.warn(
                                "disk {0} seems to have been detached behind our back; will reattach...".format(
                                    disk_name
                                )
                            )
                            v["needsAttach"] = True
                            self.update_block_device_mapping(k, v)
                        if is_attached and v.get("needsAttach", False):
                            self.warn(
                                "disk {0} seems to have been attached for us; thank you, mr. Elusive Bug!".format(
                                    disk_name
                                )
                            )
                            del v["needsAttach"]
                            self.update_block_device_mapping(k, v)

                    # check that no extra disks are attached
                    defn_disk_names = [v["disk_name"] or v["disk"] for k, v in defn.block_device_mapping.iteritems()]
                    state_disk_names = [v["disk_name"] or v["disk"] for k, v in self.block_device_mapping.iteritems()]
                    unexpected_disks = list(set(attached_disk_names) - set(defn_disk_names) - set(state_disk_names))
                    if unexpected_disks:
                        self.warn(
                            "unexpected disk(s) {0} are attached to this instance; "
                            "not fixing this just in case".format(unexpected_disks)
                        )
                else:
                    self.warn_not_supposed_to_exist(valuable_data=True)
                    self.confirm_destroy(node, self.full_name)

            except libcloud.common.google.ResourceNotFoundError:
                if self.vm_id:
                    self.warn("the instance seems to have been destroyed behind our back")
                    if not allow_recreate:
                        raise Exception("use --allow-recreate to fix")
                    self._node_deleted()

            # check that the disks that should exist do exist
            # and that the disks we expected to create don't exist yet
            for k, v in defn.block_device_mapping.iteritems():
                disk_name = v["disk_name"] or v["disk"]
                try:
                    disk = self.connect().ex_get_volume(disk_name, v.get("region", None))
                    if k not in self.block_device_mapping and v["disk_name"]:
                        self.warn_not_supposed_to_exist(resource_name=disk_name, valuable_data=True)
                        self.confirm_destroy(disk, disk_name)

                except libcloud.common.google.ResourceNotFoundError:
                    if v["disk"]:
                        raise Exception("external disk '{0}' is required but doesn't exist".format(disk_name))
                    if k in self.block_device_mapping and v["disk_name"]:
                        self.warn("disk '{0}' is supposed to exist, but is missing; will recreate...".format(disk_name))
                        self.update_block_device_mapping(k, None)

        # create missing disks
        for k, v in defn.block_device_mapping.iteritems():
            if k in self.block_device_mapping:
                continue
            if v["disk"] is None:
                extra_msg = (
                    " from snapshot '{0}'".format(v["snapshot"])
                    if v["snapshot"]
                    else " from image '{0}'".format(v["image"])
                    if v["image"]
                    else ""
                )
                self.log("creating GCE disk of {0} GiB{1}...".format(v["size"] if v["size"] else "auto", extra_msg))
                v["region"] = defn.region
                try:
                    self.connect().create_volume(
                        v["size"],
                        v["disk_name"],
                        v["region"],
                        snapshot=v["snapshot"],
                        image=v["image"],
                        ex_disk_type="pd-" + v.get("type", "standard"),
                        use_existing=False,
                    )
                except libcloud.common.google.ResourceExistsError:
                    raise Exception(
                        "tried creating a disk that already exists; " "please run 'deploy --check' to fix this"
                    )
            v["needsAttach"] = True
            self.update_block_device_mapping(k, v)

        if self.vm_id:
            if self.instance_type != defn.instance_type:
                recreate = True
                self.warn("change of the instance type requires a reboot")

            if self.network != defn.network:
                recreate = True
                self.warn("change of the network requires a reboot")

            for k, v in self.block_device_mapping.iteritems():
                defn_v = defn.block_device_mapping.get(k, None)
                if defn_v and not v.get("needsAttach", False):
                    if v["bootDisk"] != defn_v["bootDisk"]:
                        recreate = True
                        self.warn("change of the boot disk requires a reboot")
                    if v["readOnly"] != defn_v["readOnly"]:
                        recreate = True
                        self.warn("remounting disk as ro/rw requires a reboot")

        if recreate:
            if not allow_reboot:
                raise Exception("reboot is required for the requested changes; please run with --allow-reboot")
            self.stop()
        self.create_node(defn)