Ejemplo n.º 1
0
    def execute(self):
        """
        Load the cluster and build a GC3Pie configuration snippet.
        """
        log.warning("Command `elasticluster gc3pie-config` is DEPRECATED"
                    " and will be removed in release 1.4 of ElastiCluster")
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Listing nodes from cluster %s: %s", cluster_name, ex)
            return

        from elasticluster.gc3pie_config import create_gc3pie_config_snippet

        if self.params.append:
            path = os.path.expanduser(self.params.append)
            try:
                fd = open(path, 'a')
                fd.write(create_gc3pie_config_snippet(cluster))
                fd.close()
            except IOError as ex:
                log.error("Unable to write configuration to file %s: %s", path,
                          ex)
        else:
            print(create_gc3pie_config_snippet(cluster))
Ejemplo n.º 2
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
            cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Setting up cluster %s: %s", cluster_name, ex)
            return

        # XXX: the default value of `self.params.ssh_to` should = the
        # default value for `ssh_to` in `Cluster.get_ssh_to_node()`
        frontend = cluster.get_ssh_to_node(self.params.ssh_to)

        host = frontend.connection_ip()
        if not host:
            log.error("No IP address known for node %s", frontend.name)
            sys.exit(1)

        addr, port = parse_ip_address_and_port(host)
        username = frontend.image_user
        knownhostsfile = (cluster.known_hosts_file if cluster.known_hosts_file
                          else '/dev/null')
        sftp_cmdline = [
            "sftp",
            "-P", "{0:d}".format(port),
            "-o", "UserKnownHostsFile={0}".format(knownhostsfile),
            "-o", "StrictHostKeyChecking=yes",
            "-o", "IdentityFile={0}".format(frontend.user_key_private),
        ]
        sftp_cmdline.extend(self.params.sftp_args)
        sftp_cmdline.append('{0}@{1}'.format(username, addr))
        os.execlp("sftp", *sftp_cmdline)
Ejemplo n.º 3
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
            cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Setting up cluster %s: %s\n" % (cluster_name, ex))
            return

        if self.params.ssh_to:
            try:
                nodes = dict((n.name, n) for n in cluster.get_all_nodes())
                frontend = nodes[self.params.ssh_to]
            except KeyError:
                raise ValueError("Hostname %s not found in cluster %s" %
                                 (self.params.ssh_to, cluster_name))
        else:
            frontend = cluster.get_frontend_node()
        host = frontend.connection_ip()
        username = frontend.image_user
        knownhostsfile = cluster.known_hosts_file if cluster.known_hosts_file \
                         else '/dev/null'
        sftp_cmdline = [
            "sftp", "-o",
            "UserKnownHostsFile=%s" % knownhostsfile, "-o",
            "StrictHostKeyChecking=yes", "-o",
            "IdentityFile=%s" % frontend.user_key_private
        ]
        sftp_cmdline.extend(self.params.sftp_args)
        sftp_cmdline.append('%s@%s' % (username, host))
        os.execlp("sftp", *sftp_cmdline)
Ejemplo n.º 4
0
    def execute(self):
        """
        Load the cluster and build a GC3Pie configuration snippet.
        """
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Listing nodes from cluster %s: %s\n" %
                      (cluster_name, ex))
            return

        from elasticluster.gc3pie_config import create_gc3pie_config_snippet

        if self.params.append:
            path = os.path.expanduser(self.params.append)
            try:
                fd = open(path, 'a')
                fd.write(create_gc3pie_config_snippet(cluster))
                fd.close()
            except IOError as ex:
                log.error("Unable to write configuration to file %s: %s", path,
                          ex)
        else:
            print(create_gc3pie_config_snippet(cluster))
Ejemplo n.º 5
0
def test_get_cloud_provider_openstack(tmpdir):
    wd = tmpdir.strpath
    ssh_key_path = os.path.join(wd, 'id_rsa.pem')
    with open(ssh_key_path, 'w+') as ssh_key_file:
        # don't really care about SSH key, just that the file exists
        ssh_key_file.write('')
        ssh_key_file.flush()
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            """
[cloud/openstack]
provider = openstack
auth_url = http://openstack.example.com:5000/v2.0
username = ${USER}
password = XXXXXX
project_name = test
    """
            + make_config_snippet("cluster", "example_openstack")
            + make_config_snippet("login", "ubuntu", keyname='test', valid_path=ssh_key_path)
            + make_config_snippet("setup", "slurm_setup_old")
        )
    creator = make_creator(config_path)
    cloud = creator.create_cloud_provider('example_openstack')
    from elasticluster.providers.openstack import OpenStackCloudProvider
    assert isinstance(cloud, OpenStackCloudProvider)
Ejemplo n.º 6
0
    def execute(self):

        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        config = creator.cluster_conf

        print("""%d cluster templates found in configuration file.""" %
              len(config))
        templates = config.keys()
        for pattern in self.params.clusters:
            templates = [t for t in templates if fnmatch(t, pattern)]

        if self.params.clusters:
            print("""%d cluter templates found matching pattern(s) '%s'""" %
                  (len(templates), str.join(", ", self.params.clusters)))

        for template in templates:
            try:
                cluster = creator.create_cluster(template, template)
                print("""
name:     %s""" % template)
                for nodekind in cluster.nodes:
                    print("%s nodes: %d" %
                          (nodekind, len(cluster.nodes[nodekind])))
            except ConfigurationError as ex:
                log.error("unable to load cluster `%s`: %s", template, ex)
Ejemplo n.º 7
0
    def execute(self):
        """
        Lists all nodes within the specified cluster with certain
        information like id and ip.
        """
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
            if self.params.update:
                cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Listing nodes from cluster %s: %s\n" %
                      (cluster_name, ex))
            return

        if self.params.pretty_json:
            print(json.dumps(cluster, default=dict, indent=4))
        elif self.params.json:
            print(json.dumps(cluster, default=dict))
        else:
            print(cluster_summary(cluster))
            for cls in cluster.nodes:
                print("%s nodes:" % cls)
                print("")
                for node in cluster.nodes[cls]:
                    txt = ["    " + i for i in node.pprint().splitlines()]
                    print('  - ' + str.join("\n", txt)[4:])
                    print("")
Ejemplo n.º 8
0
def test_get_cloud_provider_openstack(tmpdir):
    wd = tmpdir.strpath
    ssh_key_path = os.path.join(wd, 'id_rsa.pem')
    with open(ssh_key_path, 'w+') as ssh_key_file:
        # don't really care about SSH key, just that the file exists
        ssh_key_file.write('')
        ssh_key_file.flush()
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            """
[cloud/openstack]
provider = openstack
auth_url = http://openstack.example.com:5000/v2.0
username = ${USER}
password = XXXXXX
project_name = test
    """
            + make_config_snippet("cluster", "example_openstack")
            + make_config_snippet("login", "ubuntu", keyname='test', valid_path=ssh_key_path)
            + make_config_snippet("setup", "slurm_setup_old")
        )
    creator = make_creator(config_path)
    cloud = creator.create_cloud_provider('example_openstack')
    from elasticluster.providers.openstack import OpenStackCloudProvider
    assert isinstance(cloud, OpenStackCloudProvider)
Ejemplo n.º 9
0
    def execute(self):
        """
        Load the cluster and build a GC3Pie configuration snippet.
        """
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Listing nodes from cluster %s: %s\n" %
                      (cluster_name, ex))
            return

        from elasticluster.gc3pie_config import create_gc3pie_config_snippet

        if self.params.append:
            path = os.path.expanduser(self.params.append)
            try:
                fd = open(path, 'a')
                fd.write(create_gc3pie_config_snippet(cluster))
                fd.close()
            except IOError as ex:
                log.error("Unable to write configuration to file %s: %s",
                          path, ex)
        else:
            print(create_gc3pie_config_snippet(cluster))
Ejemplo n.º 10
0
    def execute(self):

        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        config = creator.cluster_conf

        print("""%d cluster templates found in configuration file.""" % len(config))
        templates = config.keys()
        for pattern in self.params.clusters:
            templates = [t for t in templates if fnmatch(t, pattern)]

        if self.params.clusters:
            print("""%d cluter templates found matching pattern(s) '%s'""" % (len(templates), str.join(", ", self.params.clusters)))

        for template in templates:
            try:
                cluster = creator.create_cluster(template, template)
                print("""
name:     %s""" % template)
                for nodekind in cluster.nodes:
                    print("%s nodes: %d" % (
                        nodekind,
                        len(cluster.nodes[nodekind])))
            except ConfigurationError as ex:
                log.error("unable to load cluster `%s`: %s", template, ex)
Ejemplo n.º 11
0
def test_invalid_ssh_to(tmpdir):
    """
    Drop cluster definition with an invalid `ssh_to=` line.
    """
    wd = tmpdir.strpath
    ssh_key_path = os.path.join(wd, 'id_rsa.pem')
    with open(ssh_key_path, 'w+') as ssh_key_file:
        # don't really care about SSH key, just that the file exists
        ssh_key_file.write('')
        ssh_key_file.flush()
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            make_config_snippet("cluster", "example_openstack",
                                'ssh_to=non-existent') +
            make_config_snippet("cloud", "openstack") +
            make_config_snippet("login",
                                "ubuntu",
                                keyname='test_invalid_ssh_to',
                                valid_path=ssh_key_path) +
            make_config_snippet("setup", "slurm_setup_old"))
        config_file.flush()
    creator = make_creator(config_path)
    # ERROR: Cluster `example_openstack` is configured to SSH into nodes of kind `non-existent`, but no such kind is defined
    with raises(ConfigurationError):
        creator.create_cluster('slurm')
Ejemplo n.º 12
0
    def execute(self):
        """
        Load the cluster and build a GC3Pie configuration snippet.
        """
        log.warning(
            "Command `elasticluster gc3pie-config` is DEPRECATED"
            " and will be removed in release 1.4 of ElastiCluster")
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Listing nodes from cluster %s: %s", cluster_name, ex)
            return

        from elasticluster.gc3pie_config import create_gc3pie_config_snippet

        if self.params.append:
            path = os.path.expanduser(self.params.append)
            try:
                fd = open(path, 'a')
                fd.write(create_gc3pie_config_snippet(cluster))
                fd.close()
            except IOError as ex:
                log.error("Unable to write configuration to file %s: %s",
                          path, ex)
        else:
            print(create_gc3pie_config_snippet(cluster))
Ejemplo n.º 13
0
    def execute(self):
        """
        Lists all nodes within the specified cluster with certain
        information like id and ip.
        """
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
            if self.params.update:
                cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Listing nodes from cluster %s: %s\n" %
                      (cluster_name, ex))
            return

        if self.params.pretty_json:
            print(json.dumps(cluster, default=dict, indent=4))
        elif self.params.json:
            print(json.dumps(cluster, default=dict))
        else:
            print(cluster_summary(cluster))
            for cls in cluster.nodes:
                print("%s nodes:" % cls)
                print("")
                for node in cluster.nodes[cls]:
                    txt = ["    " + i for i in node.pprint().splitlines()]
                    print('  - ' + str.join("\n", txt)[4:])
                    print("")
Ejemplo n.º 14
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
            cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Setting up cluster %s: %s\n" %
                      (cluster_name, ex))
            return

        if self.params.ssh_to:
            try:
                nodes = dict((n.name,n) for n in cluster.get_all_nodes())
                frontend = nodes[self.params.ssh_to]
            except KeyError:
                raise ValueError(
                    "Hostname %s not found in cluster %s" % (self.params.ssh_to, cluster_name))
        else:
            frontend = cluster.get_frontend_node()
        host = frontend.connection_ip()
        username = frontend.image_user
        knownhostsfile = cluster.known_hosts_file if cluster.known_hosts_file \
                         else '/dev/null'
        sftp_cmdline = ["sftp",
                        "-o", "UserKnownHostsFile=%s" % knownhostsfile,
                        "-o", "StrictHostKeyChecking=yes",
                        "-o", "IdentityFile=%s" % frontend.user_key_private]
        sftp_cmdline.extend(self.params.sftp_args)
        sftp_cmdline.append('%s@%s' % (username, host))
        os.execlp("sftp", *sftp_cmdline)
Ejemplo n.º 15
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
            cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Setting up cluster %s: %s\n" %
                      (cluster_name, ex))
            return

        if self.params.ssh_to:
            try:
                nodes = dict((n.name,n) for n in cluster.get_all_nodes())
                frontend = nodes[self.params.ssh_to]
            except KeyError:
                raise ValueError(
                    "Hostname %s not found in cluster %s" % (self.params.ssh_to, cluster_name))
        else:
            frontend = cluster.get_frontend_node()
        try:
            # ensure we can connect to the host
            if not frontend.preferred_ip:
                # Ensure we can connect to the node, and save the value of `preferred_ip`

                ssh = frontend.connect(keyfile=cluster.known_hosts_file)
                if ssh:
                    ssh.close()
                cluster.repository.save_or_update(cluster)

        except NodeNotFound as ex:
            log.error("Unable to connect to the frontend node: %s" % str(ex))
            sys.exit(1)
        host = frontend.connection_ip()

        # check for nonstandard port, either IPv4 or IPv6
        addr = host
        port = str(SSH_PORT)
        if ':' in host:
            match = IPV6_RE.match(host)
            if match:
                addr = match.groups()[0]
                port = match.groups()[1]
            else:
                addr, _, port = host.partition(':')

        username = frontend.image_user
        knownhostsfile = cluster.known_hosts_file if cluster.known_hosts_file \
                         else '/dev/null'
        ssh_cmdline = ["ssh",
                       "-i", frontend.user_key_private,
                       "-o", "UserKnownHostsFile=%s" % knownhostsfile,
                       "-o", "StrictHostKeyChecking=yes",
                       "-p", port,
                       '%s@%s' % (username, addr)]
        ssh_cmdline.extend(self.params.ssh_args)
        log.debug("Running command `%s`" % str.join(' ', ssh_cmdline))
        os.execlp("ssh", *ssh_cmdline)
Ejemplo n.º 16
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
            cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Setting up cluster %s: %s\n" %
                      (cluster_name, ex))
            return

        if self.params.ssh_to:
            try:
                nodes = dict((n.name,n) for n in cluster.get_all_nodes())
                frontend = nodes[self.params.ssh_to]
            except KeyError:
                raise ValueError(
                    "Hostname %s not found in cluster %s" % (self.params.ssh_to, cluster_name))
        else:
            frontend = cluster.get_frontend_node()
        try:
            # ensure we can connect to the host
            if not frontend.preferred_ip:
                # Ensure we can connect to the node, and save the value of `preferred_ip`

                ssh = frontend.connect(keyfile=cluster.known_hosts_file)
                if ssh:
                    ssh.close()
                cluster.repository.save_or_update(cluster)

        except NodeNotFound as ex:
            log.error("Unable to connect to the frontend node: %s" % str(ex))
            sys.exit(1)
        host = frontend.connection_ip()

        # check for nonstandard port, either IPv4 or IPv6
        addr = host
        port = str(SSH_PORT)
        if ':' in host:
            match = IPV6_RE.match(host)
            if match:
                addr = match.groups()[0]
                port = match.groups()[1]
            else:
                addr, _, port = host.partition(':')

        username = frontend.image_user
        knownhostsfile = cluster.known_hosts_file if cluster.known_hosts_file \
                         else '/dev/null'
        ssh_cmdline = ["ssh",
                       "-i", frontend.user_key_private,
                       "-o", "UserKnownHostsFile=%s" % knownhostsfile,
                       "-o", "StrictHostKeyChecking=yes",
                       "-p", port,
                       '%s@%s' % (username, addr)]
        ssh_cmdline.extend(self.params.ssh_args)
        log.debug("Running command `%s`" % str.join(' ', ssh_cmdline))
        os.execlp("ssh", *ssh_cmdline)
Ejemplo n.º 17
0
    def execute(self):
        """
        Starts a new cluster.
        """

        cluster_template = self.params.cluster
        if self.params.cluster_name:
            cluster_name = self.params.cluster_name
        else:
            cluster_name = self.params.cluster

        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)

        # overwrite configuration
        cluster_nodes_conf = creator.cluster_conf[cluster_template]['nodes']
        for kind, num in self.params.nodes_override.iteritems():
            if kind not in cluster_nodes_conf:
                raise ConfigurationError(
                    "No node group `{kind}` defined"
                    " in cluster template `{template}`"
                    .format(kind=kind, template=cluster_template))
            cluster_nodes_conf[kind]['num'] = num

        # First, check if the cluster is already created.
        try:
            cluster = creator.load_cluster(cluster_name)
        except ClusterNotFound:
            try:
                cluster = creator.create_cluster(
                    cluster_template, cluster_name)
            except ConfigurationError as err:
                log.error("Starting cluster %s: %s", cluster_template, err)
                return

        try:
            print("Starting cluster `{0}` with:".format(cluster.name))
            for cls in cluster.nodes:
                print("* {0:d} {1} nodes.".format(len(cluster.nodes[cls]), cls))
            print("(This may take a while...)")
            min_nodes = dict((kind, cluster_nodes_conf[kind]['min_num'])
                             for kind in cluster_nodes_conf)
            cluster.start(min_nodes=min_nodes)
            if self.params.no_setup:
                print("NOT configuring the cluster as requested.")
            else:
                print("Configuring the cluster.")
                print("(this too may take a while...)")
                ret = cluster.setup()
                if ret:
                    print("Your cluster is ready!")
                else:
                    print("\nWARNING: YOUR CLUSTER IS NOT READY YET!")
            print(cluster_summary(cluster))
        except (KeyError, ImageError, SecurityGroupError, ClusterError) as err:
            log.error("Could not start cluster `%s`: %s", cluster.name, err)
            raise
Ejemplo n.º 18
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Setting up cluster %s: %s", cluster_name, ex)
            return

        # XXX: the default value of `self.params.ssh_to` should = the
        # default value for `ssh_to` in `Cluster.get_ssh_to_node()`
        frontend = cluster.get_ssh_to_node(self.params.ssh_to)

        # ensure we can connect to the host
        try:
            if not frontend.connection_ip():
                log.info(
                    "No connection address known for node `%s`,"
                    " updating list of IP addresses ...", frontend.name)
                frontend.update_ips()
                log.debug("Checking that SSH connection to node `%s` works..",
                          frontend.name)
                # Ensure we can connect to the node, and save the value of `preferred_ip`
                ssh = frontend.connect(keyfile=cluster.known_hosts_file)
                if ssh:
                    ssh.close()
                cluster.repository.save_or_update(cluster)
        except Exception as err:
            log.error("Unable to connect to the frontend node: %s", err)
            sys.exit(os.EX_TEMPFAIL)

        # now delegate real connection to `ssh`
        host = frontend.connection_ip()
        if not host:
            log.error("No IP address known for node %s", frontend.name)
            sys.exit(1)
        addr, port = parse_ip_address_and_port(host)
        username = frontend.image_user
        knownhostsfile = cluster.known_hosts_file if cluster.known_hosts_file \
                         else '/dev/null'
        cmdline = [
            self.command, "-i", frontend.user_key_private, "-o",
            "UserKnownHostsFile={0}".format(knownhostsfile), "-o",
            "StrictHostKeyChecking=yes", "-o", "Port={0:d}".format(port),
            '%s@%s' % (username, addr)
        ]
        if cluster.ssh_proxy_command:
            cmdline[1:1] = [
                '-o',
                ('ProxyCommand=' + expand_ssh_proxy_command(
                    cluster.ssh_proxy_command, username, addr, port))
            ]
        cmdline.extend(self.params.cmds)
        log.debug("Running command `%s`", ' '.join(cmdline))
        os.execlp(self.command, *cmdline)
Ejemplo n.º 19
0
    def execute(self):
        """
        Starts a new cluster.
        """

        cluster_template = self.params.cluster
        if self.params.cluster_name:
            cluster_name = self.params.cluster_name
        else:
            cluster_name = self.params.cluster

        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)

        # overwrite configuration
        cluster_conf = creator.cluster_conf[cluster_template]
        for option, value in self.params.extra_conf.iteritems():
            if option in cluster_conf:
                cluster_conf[option] = value

        # First, check if the cluster is already created.
        try:
            cluster = creator.load_cluster(cluster_name)
        except ClusterNotFound:
            try:
                cluster = creator.create_cluster(cluster_template,
                                                 cluster_name)
            except ConfigurationError as err:
                log.error("Starting cluster %s: %s", cluster_template, err)
                return

        try:
            print("Starting cluster `{0}` with:".format(cluster.name))
            for cls in cluster.nodes:
                print("* {0:d} {1} nodes.".format(len(cluster.nodes[cls]),
                                                  cls))
            print("(This may take a while...)")
            min_nodes = dict((k[:-len('_nodes_min')], int(v))
                             for k, v in cluster_conf.iteritems()
                             if k.endswith('_nodes_min'))
            cluster.start(min_nodes=min_nodes)
            if self.params.no_setup:
                print("NOT configuring the cluster as requested.")
            else:
                print("Configuring the cluster.")
                print("(this too may take a while...)")
                ret = cluster.setup()
                if ret:
                    print("Your cluster is ready!")
                else:
                    print("\nWARNING: YOUR CLUSTER IS NOT READY YET!")
            print(cluster_summary(cluster))
        except (KeyError, ImageError, SecurityGroupError, ClusterError) as err:
            log.error("Could not start cluster `%s`: %s", cluster.name, err)
            raise
Ejemplo n.º 20
0
    def execute(self):
        """
        Starts a new cluster.
        """

        cluster_template = self.params.cluster
        if self.params.cluster_name:
            cluster_name = self.params.cluster_name
        else:
            cluster_name = self.params.cluster

        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)

        # overwrite configuration
        cluster_conf = creator.cluster_conf[cluster_template]
        for option, value in self.params.extra_conf.iteritems():
            if option in cluster_conf:
                cluster_conf[option] = value

        # First, check if the cluster is already created.
        try:
            cluster = creator.load_cluster(cluster_name)
        except ClusterNotFound:
            try:
                cluster = creator.create_cluster(
                    cluster_template, cluster_name)
            except ConfigurationError as err:
                log.error("Starting cluster %s: %s", cluster_template, err)
                return

        try:
            print("Starting cluster `{0}` with:".format(cluster.name))
            for cls in cluster.nodes:
                print("* {0:d} {1} nodes.".format(len(cluster.nodes[cls]), cls))
            print("(This may take a while...)")
            min_nodes = dict(
                (k[:-len('_nodes_min')], int(v)) for k, v in cluster_conf.iteritems() if
                k.endswith('_nodes_min'))
            cluster.start(min_nodes=min_nodes)
            if self.params.no_setup:
                print("NOT configuring the cluster as requested.")
            else:
                print("Configuring the cluster.")
                print("(this too may take a while...)")
                ret = cluster.setup()
                if ret:
                    print("Your cluster is ready!")
                else:
                    print("\nWARNING: YOUR CLUSTER IS NOT READY YET!")
            print(cluster_summary(cluster))
        except (KeyError, ImageError, SecurityGroupError, ClusterError) as err:
            log.error("Could not start cluster `%s`: %s", cluster.name, err)
            raise
Ejemplo n.º 21
0
 def execute(self):
     """Resume the cluster if it is paused."""
     cluster_name = self.params.cluster
     creator = make_creator(self.params.config,
                            storage_path=self.params.storage)
     try:
         cluster = creator.load_cluster(cluster_name)
     except (ClusterNotFound, ConfigurationError) as e:
         log.error("Cannot load cluster `%s`: %s", cluster_name, e)
         return os.EX_NOINPUT
     print("Resuming cluster `%s` ..." % cluster_name)
     cluster.resume()
Ejemplo n.º 22
0
 def execute(self):
     """Resume the cluster if it is paused."""
     cluster_name = self.params.cluster
     creator = make_creator(self.params.config,
                            storage_path=self.params.storage)
     try:
         cluster = creator.load_cluster(cluster_name)
     except (ClusterNotFound, ConfigurationError) as e:
         log.error("Cannot load cluster `%s`: %s", cluster_name, e)
         return os.EX_NOINPUT
     print("Resuming cluster `%s` ..." % cluster_name)
     cluster.resume()
Ejemplo n.º 23
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster
        try:
            cluster = creator.load_cluster(cluster_name)
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Setting up cluster %s: %s", cluster_name, ex)
            return

        # XXX: the default value of `self.params.ssh_to` should = the
        # default value for `ssh_to` in `Cluster.get_ssh_to_node()`
        frontend = cluster.get_ssh_to_node(self.params.ssh_to)
        log.debug("Updating the ip addresses of `%s`.", frontend.name)
        frontend.update_ips()

        # ensure we can connect to the host
        try:
            if not frontend.preferred_ip:
                # Ensure we can connect to the node, and save the value of `preferred_ip`
                ssh = frontend.connect(keyfile=cluster.known_hosts_file)
                if ssh:
                    ssh.close()
                cluster.repository.save_or_update(cluster)
        except NodeNotFound as ex:
            log.error("Unable to connect to the frontend node: %s", ex)
            sys.exit(1)

        # now delegate real connection to `ssh`
        host = frontend.connection_ip()
        if not host:
            log.error("No IP address known for node %s", frontend.name)
            sys.exit(1)
        addr, port = parse_ip_address_and_port(host)
        username = frontend.image_user
        knownhostsfile = cluster.known_hosts_file if cluster.known_hosts_file \
                         else '/dev/null'
        cmdline = [self.command,
                   "-i", frontend.user_key_private,
                   "-o", "UserKnownHostsFile={0}".format(knownhostsfile),
                   "-o", "StrictHostKeyChecking=yes",
                   "-o", "Port={0:d}".format(port),
                   '%s@%s' % (username, addr)]
        if cluster.ssh_proxy_command:
            cmdline[1:1] = [
                '-o', ('ProxyCommand=' +
                       expand_ssh_proxy_command(
                           cluster.ssh_proxy_command,
                           username, addr, port))]
        cmdline.extend(self.params.cmds)
        log.debug("Running command `%s`", ' '.join(cmdline))
        os.execlp(self.command, *cmdline)
Ejemplo n.º 24
0
 def execute(self):
     """Pause the cluster if it is running."""
     cluster_name = self.params.cluster
     creator = make_creator(self.params.config,
                            storage_path=self.params.storage)
     try:
         cluster = creator.load_cluster(cluster_name)
     except (ClusterNotFound, ConfigurationError) as e:
         log.error("Cannot load cluster `%s`: %s", cluster_name, e)
         return os.EX_NOINPUT
     if not self.params.yes:
         confirm_or_abort(
             "Do you want really want to pause cluster `{cluster_name}`?"
             .format(cluster_name=cluster_name),
             msg="Aborting upon user request.")
     print("Pausing cluster `%s` ..." % cluster_name)
     cluster.pause()
Ejemplo n.º 25
0
 def execute(self):
     """Pause the cluster if it is running."""
     cluster_name = self.params.cluster
     creator = make_creator(self.params.config,
                            storage_path=self.params.storage)
     try:
         cluster = creator.load_cluster(cluster_name)
     except (ClusterNotFound, ConfigurationError) as e:
         log.error("Cannot load cluster `%s`: %s", cluster_name, e)
         return os.EX_NOINPUT
     if not self.params.yes:
         confirm_or_abort(
             "Do you want really want to pause cluster `{cluster_name}`?".
             format(cluster_name=cluster_name),
             msg="Aborting upon user request.")
     print("Pausing cluster `%s` ..." % cluster_name)
     cluster.pause()
Ejemplo n.º 26
0
def test_issue_376(tmpdir):
    wd = tmpdir.strpath
    ssh_key_path = os.path.join(wd, 'id_rsa.pem')
    with open(ssh_key_path, 'w+') as ssh_key_file:
        # don't really care about SSH key, just that the file exists
        ssh_key_file.write('')
        ssh_key_file.flush()
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            # reported by @marcbrisson in issue #376
            """
[cluster/slurm]
cloud=google
login=ubuntu
setup=slurm_setup
security_group=default
image_id=https://www.googleapis.com/compute/v1/projects/jda-labs---decision-science-01/global/images/image-python-ubuntu
flavor=n1-standard-1
master_nodes=1
worker_nodes=4
ssh_to=master
image_userdata=
boot_disk_size=20

[cluster/slurm/master]
flavor=n1-standard-2
boot_disk_size=100
    """ + make_config_snippet("cloud", "google") +
            make_config_snippet("login",
                                "ubuntu",
                                keyname='test_issue_376',
                                valid_path=ssh_key_path) +
            make_config_snippet("setup", "slurm_setup"))
        config_file.flush()
    creator = make_creator(config_path)
    cluster = creator.create_cluster('slurm')
    # "master" nodes take values from their specific config section
    assert cluster.nodes['master'][0].flavor == 'n1-standard-2'
    assert cluster.nodes['master'][0].extra['boot_disk_size'] == '100'
    # "worker" nodes take values from the cluster defaults
    assert cluster.nodes['worker'][0].flavor == 'n1-standard-1'
    # FIXME: Actually, does this imply that the `boot_disk_size` value
    # defined at cluster level is not propagated to "worker" nodes?
    assert 'boot_disk_size' not in cluster.nodes['worker'][0].extra
Ejemplo n.º 27
0
def test_pr_378(tmpdir):
    wd = tmpdir.strpath
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            make_config_snippet("cloud", "google")
            # reported by @ikhaja in PR #378
            + """
[login/google]
image_user=my_username
image_user_sudo=root
image_sudo=True
user_key_name=elasticluster
user_key_private=~/.ssh/google_compute_engine
user_key_public=~/.ssh/google_compute_engine.pub
            """
            # FIXME: the std `cluster/*` snippet cannot set `login=` and `cloud=`
            + """
[cluster/slurm]
cloud=google
login=google
setup=slurm_setup
security_group=default
image_id=https://www.googleapis.com/compute/v1/projects/jda-labs---decision-science-01/global/images/image-python-ubuntu
flavor=n1-standard-1
master_nodes=1
worker_nodes=4
ssh_to=master
    """
            + make_config_snippet("setup", "slurm_setup")
        )
        config_file.flush()
    with patch('os.path.expanduser') as expanduser:
        # since `os.path.expanduser` is called from within
        # `_expand_config_file_list()` we need to provide the right return
        # value for it, as non-existent files will be removed from the list
        expanduser.return_value = config_path
        creator = make_creator(config_path)
        # check that `os.expanduser` has been called on the `user_key_*` values
        expanduser.assert_any_call('~/.ssh/google_compute_engine.pub')
        expanduser.assert_any_call('~/.ssh/google_compute_engine')
        # check that actual configured values have been expanded
        cluster = creator.create_cluster("slurm")
        assert os.path.isabs(cluster.user_key_public)
        assert os.path.isabs(cluster.user_key_private)
Ejemplo n.º 28
0
def test_pr_378(tmpdir):
    wd = tmpdir.strpath
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            make_config_snippet("cloud", "google")
            # reported by @ikhaja in PR #378
            + """
[login/google]
image_user=my_username
image_user_sudo=root
image_sudo=True
user_key_name=elasticluster
user_key_private=~/.ssh/google_compute_engine
user_key_public=~/.ssh/google_compute_engine.pub
            """
            # FIXME: the std `cluster/*` snippet cannot set `login=` and `cloud=`
            + """
[cluster/slurm]
cloud=google
login=google
setup=slurm_setup
security_group=default
image_id=https://www.googleapis.com/compute/v1/projects/jda-labs---decision-science-01/global/images/image-python-ubuntu
flavor=n1-standard-1
master_nodes=1
worker_nodes=4
ssh_to=master
    """
            + make_config_snippet("setup", "slurm_setup")
        )
        config_file.flush()
    with patch('os.path.expanduser') as expanduser:
        # since `os.path.expanduser` is called from within
        # `_expand_config_file_list()` we need to provide the right return
        # value for it, as non-existent files will be removed from the list
        expanduser.return_value = config_path
        creator = make_creator(config_path)
        # check that `os.expanduser` has been called on the `user_key_*` values
        expanduser.assert_any_call('~/.ssh/google_compute_engine.pub')
        expanduser.assert_any_call('~/.ssh/google_compute_engine')
        # check that actual configured values have been expanded
        cluster = creator.create_cluster("slurm")
        assert os.path.isabs(cluster.user_key_public)
        assert os.path.isabs(cluster.user_key_private)
Ejemplo n.º 29
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.clustername
        try:
            cluster = creator.load_cluster(cluster_name)
            cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Setting up cluster %s: %s", cluster_name, ex)
            return

        # XXX: the default value of `self.params.ssh_to` should = the
        # default value for `ssh_to` in `Cluster.get_ssh_to_node()`
        frontend = cluster.get_ssh_to_node(self.params.ssh_to)

        # ensure we can connect to the host
        try:
            if not frontend.preferred_ip:
                # Ensure we can connect to the node, and save the value of `preferred_ip`
                ssh = frontend.connect(keyfile=cluster.known_hosts_file)
                if ssh:
                    ssh.close()
                cluster.repository.save_or_update(cluster)
        except NodeNotFound as ex:
            log.error("Unable to connect to the frontend node: %s", ex)
            sys.exit(1)

        # now delegate real connection to `ssh`
        host = frontend.connection_ip()
        if not host:
            log.error("No IP address known for node %s", frontend.name)
            sys.exit(1)
        addr, port = parse_ip_address_and_port(host)
        username = frontend.image_user
        knownhostsfile = cluster.known_hosts_file if cluster.known_hosts_file \
                         else '/dev/null'
        ssh_cmdline = [
            "ssh", "-i", frontend.user_key_private, "-o",
            "UserKnownHostsFile={0}".format(knownhostsfile), "-o",
            "StrictHostKeyChecking=no", "-p", "{0:d}".format(port),
            '%s@%s' % (username, addr)
        ]
        ssh_cmdline.extend(self.params.ssh_args)
        log.debug("Running command `%s`", str.join(' ', ssh_cmdline))
        os.execlp("ssh", *ssh_cmdline)
Ejemplo n.º 30
0
def test_issue_376(tmpdir):
    wd = tmpdir.strpath
    ssh_key_path = os.path.join(wd, 'id_rsa.pem')
    with open(ssh_key_path, 'w+') as ssh_key_file:
        # don't really care about SSH key, just that the file exists
        ssh_key_file.write('')
        ssh_key_file.flush()
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            # reported by @marcbrisson in issue #376
            """
[cluster/slurm]
cloud=google
login=ubuntu
setup=slurm_setup
security_group=default
image_id=https://www.googleapis.com/compute/v1/projects/jda-labs---decision-science-01/global/images/image-python-ubuntu
flavor=n1-standard-1
master_nodes=1
worker_nodes=4
ssh_to=master
image_userdata=
boot_disk_size=20

[cluster/slurm/master]
flavor=n1-standard-2
boot_disk_size=100
    """
            + make_config_snippet("cloud", "google")
            + make_config_snippet("login", "ubuntu", keyname='test_issue_376', valid_path=ssh_key_path)
            + make_config_snippet("setup", "slurm_setup")
        )
        config_file.flush()
    creator = make_creator(config_path)
    cluster = creator.create_cluster('slurm')
    # "master" nodes take values from their specific config section
    assert cluster.nodes['master'][0].flavor == 'n1-standard-2'
    assert cluster.nodes['master'][0].extra['boot_disk_size'] == '100'
    # "worker" nodes take values from the cluster defaults
    assert cluster.nodes['worker'][0].flavor == 'n1-standard-1'
    # FIXME: Actually, does this imply that the `boot_disk_size` value
    # defined at cluster level is not propagated to "worker" nodes?
    assert 'boot_disk_size' not in cluster.nodes['worker'][0].extra
Ejemplo n.º 31
0
def test_gce_accelerator1(tmpdir):
    wd = tmpdir.strpath
    ssh_key_path = os.path.join(wd, 'id_rsa.pem')
    with open(ssh_key_path, 'w+') as ssh_key_file:
        # don't really care about SSH key, just that the file exists
        ssh_key_file.write('')
        ssh_key_file.flush()
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            make_config_snippet("cluster", "example_google",
                                '[cluster/example_google/misc]',
                                'accelerator_count=1')
            #             # ask for one GPU
            #             """
            # [cluster/slurm]
            # cloud=google
            # login=ubuntu
            # setup=slurm_setup
            # security_group=default
            # image_id=**not important**
            # flavor=n1-standard-1
            # master_nodes=1
            # worker_nodes=4
            # ssh_to=master

            # [cluster/slurm/worker]
            # accelerator_count=1
            #     """
            + make_config_snippet("cloud", "google") +
            make_config_snippet("login",
                                "ubuntu",
                                keyname='test_gce_accelerator',
                                valid_path=ssh_key_path) +
            make_config_snippet("setup", "misc_setup"))
        config_file.flush()
    creator = make_creator(config_path)
    cluster = creator.create_cluster('example_google')
    # "master" nodes take values from their specific config section
    #assert cluster.nodes['master'][0].extra['accelerator_count'] == 0
    # "worker" nodes take values from the cluster defaults
    assert 'accelerator_count' in cluster.nodes['misc'][0].extra
    assert cluster.nodes['misc'][0].extra['accelerator_count'] == 1
Ejemplo n.º 32
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster

        print("Updating cluster `%s`..." % cluster_name)
        try:
            cluster = creator.load_cluster(cluster_name)
            cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Setting up cluster %s: %s\n" % (cluster_name, ex))
            return

        print("Configuring cluster `%s`..." % cluster_name)
        ret = cluster.setup(self.params.extra)
        if ret:
            print("Your cluster is ready!")
        else:
            print("\nWARNING: YOUR CLUSTER IS NOT READY YET!")
        print(cluster_summary(cluster))
Ejemplo n.º 33
0
def test_gce_accelerator1(tmpdir):
    wd = tmpdir.strpath
    ssh_key_path = os.path.join(wd, 'id_rsa.pem')
    with open(ssh_key_path, 'w+') as ssh_key_file:
        # don't really care about SSH key, just that the file exists
        ssh_key_file.write('')
        ssh_key_file.flush()
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            make_config_snippet("cluster", "example_google",
                                '[cluster/example_google/misc]',
                                'accelerator_count=1')
#             # ask for one GPU
#             """
# [cluster/slurm]
# cloud=google
# login=ubuntu
# setup=slurm_setup
# security_group=default
# image_id=**not important**
# flavor=n1-standard-1
# master_nodes=1
# worker_nodes=4
# ssh_to=master

# [cluster/slurm/worker]
# accelerator_count=1
#     """
            + make_config_snippet("cloud", "google")
            + make_config_snippet("login", "ubuntu", keyname='test_gce_accelerator', valid_path=ssh_key_path)
            + make_config_snippet("setup", "misc_setup")
        )
        config_file.flush()
    creator = make_creator(config_path)
    cluster = creator.create_cluster('example_google')
    # "master" nodes take values from their specific config section
    #assert cluster.nodes['master'][0].extra['accelerator_count'] == 0
    # "worker" nodes take values from the cluster defaults
    assert 'accelerator_count' in cluster.nodes['misc'][0].extra
    assert cluster.nodes['misc'][0].extra['accelerator_count'] == 1
Ejemplo n.º 34
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.cluster

        print("Updating cluster `%s`..." % cluster_name)
        try:
            cluster = creator.load_cluster(cluster_name)
            cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Setting up cluster %s: %s\n" %
                      (cluster_name, ex))
            return

        print("Configuring cluster `%s`..." % cluster_name)
        ret = cluster.setup(self.params.extra)
        if ret:
            print("Your cluster is ready!")
        else:
            print("\nWARNING: YOUR CLUSTER IS NOT READY YET!")
        print(cluster_summary(cluster))
Ejemplo n.º 35
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        cluster_name = self.params.clustername
        try:
            cluster = creator.load_cluster(cluster_name)
            cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Setting up cluster %s: %s", cluster_name, ex)
            return

        # XXX: the default value of `self.params.ssh_to` should = the
        # default value for `ssh_to` in `Cluster.get_ssh_to_node()`
        frontend = cluster.get_ssh_to_node(self.params.ssh_to)

        host = frontend.connection_ip()
        if not host:
            log.error("No IP address known for node %s", frontend.name)
            sys.exit(1)

        addr, port = parse_ip_address_and_port(host)
        username = frontend.image_user
        knownhostsfile = (cluster.known_hosts_file
                          if cluster.known_hosts_file else '/dev/null')
        sftp_cmdline = [
            "sftp",
            #"-P", "{0:d}".format(port),
            "-o",
            "Port={0}".format(port),
            "-o",
            "UserKnownHostsFile={0}".format(knownhostsfile),
            "-o",
            "StrictHostKeyChecking=no",
            "-o",
            "IdentityFile={0}".format(frontend.user_key_private),
        ]
        sftp_cmdline.extend(self.params.sftp_args)
        sftp_cmdline.append('{0}@{1}'.format(username, addr))
        os.execlp("sftp", *sftp_cmdline)
Ejemplo n.º 36
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        repository = creator.create_repository()
        clusters = repository.get_all()

        if not clusters:
            print("No clusters found.")
        else:
            print("""
The following clusters have been started.
Please note that there's no guarantee that they are fully configured:
""")
            for cluster in sorted(clusters):
                print("%s " % cluster.name)
                print("-" * len(cluster.name))
                print("  name:           %s" % cluster.name)
                if cluster.name != cluster.template:
                    print("  template:       %s" % cluster.template)
                for cls in cluster.nodes:
                    print("  - %s nodes: %d" % (cls, len(cluster.nodes[cls])))
                print("")
Ejemplo n.º 37
0
def test_gce_accelerator2(tmpdir):
    wd = tmpdir.strpath
    ssh_key_path = os.path.join(wd, 'id_rsa.pem')
    with open(ssh_key_path, 'w+') as ssh_key_file:
        # don't really care about SSH key, just that the file exists
        ssh_key_file.write('')
        ssh_key_file.flush()
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            # ask for two GPU on `worker` nodes only
            """
[cluster/test]
cloud=google
login=ubuntu
setup=slurm_setup
security_group=default
image_id=**not important**
flavor=n1-standard-1
master_nodes=1
worker_nodes=4
ssh_to=master

[cluster/test/worker]
accelerator_count=2
    """ + make_config_snippet("cloud", "google") +
            make_config_snippet("login",
                                "ubuntu",
                                keyname='test_gce_accelerator',
                                valid_path=ssh_key_path) +
            make_config_snippet("setup", "slurm_setup"))
        config_file.flush()
    creator = make_creator(config_path)
    cluster = creator.create_cluster('test')
    # "master" nodes take values from their specific config section
    assert cluster.nodes['master'][0].extra['accelerator_count'] == 0
    # "worker" nodes take values from the cluster defaults
    assert 'accelerator_count' in cluster.nodes['worker'][0].extra
    assert cluster.nodes['worker'][0].extra['accelerator_count'] == 2
Ejemplo n.º 38
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        repository = creator.create_repository()
        clusters = repository.get_all()

        if not clusters:
            print("No clusters found.")
        else:
            print("""
The following clusters have been started.
Please note that there's no guarantee that they are fully configured:
""")
            for cluster in sorted(clusters):
                print("%s " % cluster.name)
                print("-" * len(cluster.name))
                print("  name:           %s" % cluster.name)
                if cluster.name != cluster.template:
                    print("  template:       %s" % cluster.template)
                for cls in cluster.nodes:
                    print("  - %s nodes: %d" % (cls, len(cluster.nodes[cls])))
                print("")
Ejemplo n.º 39
0
def test_issue_415(tmpdir):
    """
    Drop cluster definition if not all node kinds are present in the `setup/*` section.
    """
    wd = tmpdir.strpath
    ssh_key_path = os.path.join(wd, 'id_rsa.pem')
    with open(ssh_key_path, 'w+') as ssh_key_file:
        # don't really care about SSH key, just that the file exists
        ssh_key_file.write('')
        ssh_key_file.flush()
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            # reported by @dirkpetersen in issue #415
            """
[cluster/gce-slurm]
cloud=google
#login=ubuntu
login=google
setup=slurm_setup_old
security_group=default
image_id=ubuntu-1604-xenial-v20170307
flavor=n1-standard-1
frontend_nodes=1
worker_nodes=2
image_userdata=
ssh_to=frontend
            """ + make_config_snippet("cloud", "google") +
            make_config_snippet("login",
                                "ubuntu",
                                keyname='test_issue_415',
                                valid_path=ssh_key_path) +
            make_config_snippet("setup", "slurm_setup_old"))
        config_file.flush()
    creator = make_creator(config_path)
    # ERROR: Configuration section `cluster/gce-slurm` references non-existing login section `google`. Dropping cluster definition.
    with raises(ConfigurationError):
        creator.create_cluster('gce-slurm')
Ejemplo n.º 40
0
def test_gce_accelerator2(tmpdir):
    wd = tmpdir.strpath
    ssh_key_path = os.path.join(wd, 'id_rsa.pem')
    with open(ssh_key_path, 'w+') as ssh_key_file:
        # don't really care about SSH key, just that the file exists
        ssh_key_file.write('')
        ssh_key_file.flush()
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            # ask for two GPU on `worker` nodes only
            """
[cluster/test]
cloud=google
login=ubuntu
setup=slurm_setup
security_group=default
image_id=**not important**
flavor=n1-standard-1
master_nodes=1
worker_nodes=4
ssh_to=master

[cluster/test/worker]
accelerator_count=2
    """
            + make_config_snippet("cloud", "google")
            + make_config_snippet("login", "ubuntu", keyname='test_gce_accelerator', valid_path=ssh_key_path)
            + make_config_snippet("setup", "slurm_setup")
        )
        config_file.flush()
    creator = make_creator(config_path)
    cluster = creator.create_cluster('test')
    # "master" nodes take values from their specific config section
    assert cluster.nodes['master'][0].extra['accelerator_count'] == 0
    # "worker" nodes take values from the cluster defaults
    assert 'accelerator_count' in cluster.nodes['worker'][0].extra
    assert cluster.nodes['worker'][0].extra['accelerator_count'] == 2
Ejemplo n.º 41
0
def test_issue_415(tmpdir):
    """
    Drop cluster definition if not all node kinds are present in the `setup/*` section.
    """
    wd = tmpdir.strpath
    ssh_key_path = os.path.join(wd, 'id_rsa.pem')
    with open(ssh_key_path, 'w+') as ssh_key_file:
        # don't really care about SSH key, just that the file exists
        ssh_key_file.write('')
        ssh_key_file.flush()
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            # reported by @dirkpetersen in issue #415
            """
[cluster/gce-slurm]
cloud=google
#login=ubuntu
login=google
setup=slurm_setup_old
security_group=default
image_id=ubuntu-1604-xenial-v20170307
flavor=n1-standard-1
frontend_nodes=1
worker_nodes=2
image_userdata=
ssh_to=frontend
            """
            + make_config_snippet("cloud", "google")
            + make_config_snippet("login", "ubuntu", keyname='test_issue_415', valid_path=ssh_key_path)
            + make_config_snippet("setup", "slurm_setup_old")
        )
        config_file.flush()
    creator = make_creator(config_path)
    # ERROR: Configuration section `cluster/gce-slurm` references non-existing login section `google`. Dropping cluster definition.
    with raises(ConfigurationError):
        creator.create_cluster('gce-slurm')
Ejemplo n.º 42
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)

        # Get current cluster configuration
        cluster_name = self.params.cluster

        try:
            cluster = creator.load_cluster(cluster_name)
            cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Error loading cluster %s: %s\n" %
                      (cluster_name, ex))
            return

        # Find the node to remove.
        try:
            node = cluster.get_node_by_name(self.params.node)
        except NodeNotFound:
            log.error("Node %s not found in cluster %s" % (
                self.params.node, self.params.cluster))
            sys.exit(1)

        # Run
        if not self.params.yes:
            confirm_or_abort("Do you really want to remove node `{}`?"
                             .format(node.name),
                             msg="Aborting upon user request.")

        cluster.remove_node(node, stop=True)
        print("Node %s removed" % node.name)

        if self.params.no_setup:
            print("NOT reconfiguring the cluster as requested.")
        else:
            print("Reconfiguring the cluster.")
            cluster.setup()
Ejemplo n.º 43
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)

        # Get current cluster configuration
        cluster_name = self.params.cluster

        try:
            cluster = creator.load_cluster(cluster_name)
            cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Error loading cluster %s: %s\n" %
                      (cluster_name, ex))
            return

        # Find the node to remove.
        try:
            node = cluster.get_node_by_name(self.params.node)
        except NodeNotFound:
            log.error("Node %s not found in cluster %s" % (
                self.params.node, self.params.cluster))
            sys.exit(1)

        # Run
        if not self.params.yes:
            confirm_or_abort("Do you really want to remove node `{}`?"
                             .format(node.name),
                             msg="Aborting upon user request.")

        cluster.remove_node(node, stop=True)
        print("Node %s removed" % node.name)

        if self.params.no_setup:
            print("NOT reconfiguring the cluster as requested.")
        else:
            print("Reconfiguring the cluster.")
            cluster.setup()
Ejemplo n.º 44
0
def test_default_setup_provider_is_ansible(tmpdir):
    wd = tmpdir.strpath
    ssh_key_path = os.path.join(wd, 'id_rsa.pem')
    with open(ssh_key_path, 'w+') as ssh_key_file:
        # don't really care about SSH key, just that the file exists
        ssh_key_file.write('')
        ssh_key_file.flush()
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            make_config_snippet("cloud", "openstack")
            + make_config_snippet("cluster", "example_openstack", 'setup=setup_no_ansible')
            + make_config_snippet("login", "ubuntu", keyname='test', valid_path=ssh_key_path)
            # *note:* no `provider=` line here
            + """
[setup/setup_no_ansible]
frontend_groups = slurm_master
compute_groups = slurm_worker
    """
        )
    creator = make_creator(config_path)
    setup = creator.create_setup_provider('example_openstack')
    from elasticluster.providers.ansible_provider import AnsibleSetupProvider
    assert isinstance(setup, AnsibleSetupProvider)
Ejemplo n.º 45
0
def test_invalid_ssh_to(tmpdir):
    """
    Drop cluster definition with an invalid `ssh_to=` line.
    """
    wd = tmpdir.strpath
    ssh_key_path = os.path.join(wd, 'id_rsa.pem')
    with open(ssh_key_path, 'w+') as ssh_key_file:
        # don't really care about SSH key, just that the file exists
        ssh_key_file.write('')
        ssh_key_file.flush()
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            make_config_snippet("cluster", "example_openstack", 'ssh_to=non-existent')
            + make_config_snippet("cloud", "openstack")
            + make_config_snippet("login", "ubuntu",
                                  keyname='test_invalid_ssh_to', valid_path=ssh_key_path)
            + make_config_snippet("setup", "slurm_setup_old")
        )
        config_file.flush()
    creator = make_creator(config_path)
    # ERROR: Cluster `example_openstack` is configured to SSH into nodes of kind `non-existent`, but no such kind is defined
    with raises(ConfigurationError):
        creator.create_cluster('slurm')
Ejemplo n.º 46
0
def test_default_setup_provider_is_ansible(tmpdir):
    wd = tmpdir.strpath
    ssh_key_path = os.path.join(wd, 'id_rsa.pem')
    with open(ssh_key_path, 'w+') as ssh_key_file:
        # don't really care about SSH key, just that the file exists
        ssh_key_file.write('')
        ssh_key_file.flush()
    config_path = os.path.join(wd, 'config.ini')
    with open(config_path, 'w+') as config_file:
        config_file.write(
            make_config_snippet("cloud", "openstack")
            + make_config_snippet("cluster", "example_openstack", 'setup=setup_no_ansible')
            + make_config_snippet("login", "ubuntu", keyname='test', valid_path=ssh_key_path)
            # *note:* no `provider=` line here
            + """
[setup/setup_no_ansible]
frontend_groups = slurm_master
compute_groups = slurm_worker
    """
        )
    creator = make_creator(config_path)
    setup = creator.create_setup_provider('example_openstack')
    from elasticluster.providers.ansible_provider import AnsibleSetupProvider
    assert isinstance(setup, AnsibleSetupProvider)
Ejemplo n.º 47
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        repo = creator.create_repository()
        tmpdir = tempfile.mkdtemp()
        log.debug("Using temporary directory %s" % tmpdir)
        tmpconf = make_creator(self.params.config, storage_path=tmpdir)
        tmprepo = tmpconf.create_repository()

        rc = 0
        # Read the zip file.
        try:
            with ZipFile(self.params.file, 'r') as zipfile:
                # Find main cluster file
                # create cluster object from it
                log.debug("ZIP file %s opened" % self.params.file)
                cluster = None
                zipfile.extractall(tmpdir)
                newclusters = tmprepo.get_all()
                cluster = newclusters[0]
                cur_clusternames = [c.name for c in repo.get_all()]
                oldname = cluster.name
                newname = self.params.rename
                if self.params.rename:
                    cluster.name = self.params.rename
                    for node in cluster.get_all_nodes():
                        node.cluster_name = cluster.name
                if cluster.name in cur_clusternames:
                    raise Exception(
                        "A cluster with name %s already exists. Use "
                        "option --rename to rename the cluster to be "
                        "imported." % cluster.name)

                    # Save the cluster in the new position
                cluster.repository = repo
                repo.save_or_update(cluster)
                dest = cluster.repository.storage_path

                # Copy the known hosts
                srcfile = os.path.join(tmpdir, oldname + '.known_hosts')
                destfile = os.path.join(dest, cluster.name + '.known_hosts')
                shutil.copy(srcfile, destfile)

                # Copy the ssh keys, if present
                for attr in ('user_key_public', 'user_key_private'):
                    keyfile = getattr(cluster, attr)
                    keybase = os.path.basename(keyfile)
                    srcfile = os.path.join(tmpdir, keybase)
                    if os.path.isfile(srcfile):
                        log.info("Importing key file %s" % keybase)
                        destfile = os.path.join(dest, keybase)
                        shutil.copy(srcfile, destfile)
                        setattr(cluster, attr, destfile)

                    for node in cluster.get_all_nodes():
                        nodekeyfile = getattr(node, attr)
                        # Check if it's different from the main key
                        if nodekeyfile != keyfile \
                           and os.path.isfile(nodekeyfile):
                            destdir = os.path.join(dest, cluster.name,
                                                   node.kind, node.name)
                            nodekeybase = os.path.basename(nodekeyfile)
                            log.info("Importing key file %s for node %s" %
                                     (nodekeybase, node.name))
                            if not os.path.isdir(destdir):
                                os.makedirs(destdir)
                            # Path to key in zip file
                            srcfile = os.path.join(tmpdir, oldname, node.kind,
                                                   node.name, nodekeybase)
                            destfile = os.path.join(destdir, nodekeybase)
                            shutil.copy(srcfile, destfile)
                        # Always save the correct destfile
                        setattr(node, attr, destfile)

                repo.save_or_update(cluster)
                if not cluster:
                    log.error("ZIP file %s does not contain a valid cluster." %
                              self.params.file)
                    rc = 2

                # Check if a cluster already exists.
                # if not, unzip the needed files, and update ssh key path if needed.
        except Exception as ex:
            log.error("Unable to import from zipfile %s: %s" %
                      (self.params.file, ex))
            rc = 1
        finally:
            if os.path.isdir(tmpdir):
                shutil.rmtree(tmpdir)
            log.info("Cleaning up directory %s" % tmpdir)

        if rc == 0:
            print("Successfully imported cluster from ZIP %s to %s" %
                  (self.params.file, repo.storage_path))
        sys.exit(rc)
Ejemplo n.º 48
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)

        try:
            cluster = creator.load_cluster(self.params.cluster)
        except ClusterNotFound:
            log.error("Cluster `%s` not found in storage dir %s." %
                      (self.params.cluster, self.params.storage))
            sys.exit(1)

        if os.path.exists(self.params.zipfile) and not self.params.overwrite:
            log.error("ZIP file `%s` already exists." % self.params.zipfile)
            sys.exit(1)

        with ZipFile(self.params.zipfile, 'w') as zipfile:
            # The root of the zip file will contain:
            # * the storage file
            # * the known_hosts file
            # * ssh public and prived keys, if --save-keys is used
            #
            # it will NOT contain the ansible inventory file, as this
            # is automatically created when needed.
            #
            # Also, if --save-keys is used and there is an host with a
            # different ssh private/public key than the default, they
            # will be saved in:
            #
            #   ./<cluster>/<group>/<nodename>/
            #
            def verbose_add(fname, basedir='', comment=None):
                zipname = basedir + os.path.basename(fname)
                log.info("Adding '%s' as '%s'" % (fname, zipname))
                zipfile.write(fname, zipname)
                if comment:
                    info = zipfile.getinfo(zipname)
                    info.comment = comment

            try:
                verbose_add(cluster.storage_file, comment='cluster-file')
                verbose_add(cluster.known_hosts_file, comment='known_hosts')
                if self.params.save_keys:
                    # that's sensible stuff, let's ask permission.
                    print("""
==========================
WARNING! WARNING! WARNING!
==========================
You are about to add your SSH *private* key to the
ZIP archive. These are sensible data: anyone with
access to the ZIP file will have access to any host
where this private key has been deployed.

                    """)
                    confirm_or_abort(
                        "Are you sure you still want to copy them?",
                        msg="Aborting upon user request.")

                    # Also save all the public and private keys we can find.

                    # Cluster keys
                    verbose_add(cluster.user_key_public)
                    verbose_add(cluster.user_key_private)

                    # Node keys, if found
                    for node in cluster.get_all_nodes():
                        if node.user_key_public != cluster.user_key_public:
                            verbose_add(
                                node.user_key_public, "%s/%s/%s/" %
                                (cluster.name, node.kind, node.name))
                    for node in cluster.get_all_nodes():
                        if node.user_key_private != cluster.user_key_private:
                            verbose_add(
                                node.user_key_private, "%s/%s/%s/" %
                                (cluster.name, node.kind, node.name))
            except OSError as ex:
                # A file is probably missing!
                log.error(
                    "Fatal error: cannot add file %s to zip archive: %s." %
                    (ex.filename, ex))
                sys.exit(1)

        print("Cluster '%s' correctly exported into %s" %
              (cluster.name, self.params.zipfile))
Ejemplo n.º 49
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)

        # Get current cluster configuration
        cluster_name = self.params.cluster
        template = self.params.template

        try:
            cluster = creator.load_cluster(cluster_name)
            cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Listing nodes from cluster %s: %s\n" %
                      (cluster_name, ex))
            return
        for grp in self.params.nodes_to_add:
            print("Adding %d %s node(s) to the cluster"
                  "" % (self.params.nodes_to_add[grp], grp))

            # Currently we can't save which template was used to setup a
            # cluster, therefore we imply the configuration of the new nodes
            # to match already existent nodes in this group. If no node was
            # added to this group yet, it will abort and ask for the
            # `--template` argument.
            # TODO: find a better solution for this problem, it makes things
            #       complicated for the user
            if (not grp in cluster.nodes or not cluster.nodes[grp]) \
                    and not template:
                print "Elasticluster can not infer which template to use for "\
                      "the new node(s). Please provide the template with " \
                      "the `-t` or `--template` option"
                return

            if not template:
                sample_node = cluster.nodes[grp][0]
                for i in range(self.params.nodes_to_add[grp]):
                    cluster.add_node(grp,
                                     sample_node.image_id,
                                     sample_node.image_user,
                                     sample_node.flavor,
                                     sample_node.security_group,
                                     image_userdata=sample_node.image_userdata,
                                     **sample_node.extra)
            else:
                conf = creator.cluster_conf[template]
                conf_kind = conf['nodes'][grp]

                image_user = conf['login']['image_user']
                userdata = conf_kind.get('image_userdata', '')

                extra = conf_kind.copy()
                extra.pop('image_id', None)
                extra.pop('flavor', None)
                extra.pop('security_group', None)
                extra.pop('image_userdata', None)

                for i in range(self.params.nodes_to_add[grp]):
                    cluster.add_node(grp,
                                     conf_kind['image_id'],
                                     image_user,
                                     conf_kind['flavor'],
                                     conf_kind['security_group'],
                                     image_userdata=userdata,
                                     **extra)

        for grp in self.params.nodes_to_remove:
            n_to_rm = self.params.nodes_to_remove[grp]
            print("Removing %d %s node(s) from the cluster."
                  "" % (n_to_rm, grp))
            to_remove = cluster.nodes[grp][-n_to_rm:]
            print("The following nodes will be removed from the cluster.")
            print("    " + str.join("\n    ", [n.name for n in to_remove]))

            if not self.params.yes:
                confirm_or_abort("Do you really want to remove them?",
                                 msg="Aborting upon user request.")

            for node in to_remove:
                cluster.nodes[grp].remove(node)
                node.stop()

        cluster.start()
        if self.params.no_setup:
            print("NOT configuring the cluster as requested.")
        else:
            print("Reconfiguring the cluster.")
            cluster.setup()
        print(cluster_summary(cluster))
Ejemplo n.º 50
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)

        # Get current cluster configuration
        cluster_name = self.params.cluster
        template = self.params.template

        try:
            cluster = creator.load_cluster(cluster_name)
            cluster.update()
        except (ClusterNotFound, ConfigurationError) as ex:
            log.error("Listing nodes from cluster %s: %s\n" %
                      (cluster_name, ex))
            return
        for grp in self.params.nodes_to_add:
            print("Adding %d %s node(s) to the cluster"
                  "" % (self.params.nodes_to_add[grp], grp))

            # Currently we can't save which template was used to setup a
            # cluster, therefore we imply the configuration of the new nodes
            # to match already existent nodes in this group. If no node was
            # added to this group yet, it will abort and ask for the
            # `--template` argument.
            # TODO: find a better solution for this problem, it makes things
            #       complicated for the user
            if (not grp in cluster.nodes or not cluster.nodes[grp]) \
                    and not template:
                print "Elasticluster can not infer which template to use for "\
                      "the new node(s). Please provide the template with " \
                      "the `-t` or `--template` option"
                return

            if not template:
                sample_node = cluster.nodes[grp][0]
                for i in range(self.params.nodes_to_add[grp]):
                    cluster.add_node(grp,
                                     sample_node.image_id,
                                     sample_node.image_user,
                                     sample_node.flavor,
                                     sample_node.security_group,
                                     image_userdata=sample_node.image_userdata,
                                     **sample_node.extra)
            else:
                conf = creator.cluster_conf[template]
                conf_kind = conf['nodes'][grp]

                image_user = conf['login']['image_user']
                userdata = conf_kind.get('image_userdata', '')

                extra = conf_kind.copy()
                extra.pop('image_id', None)
                extra.pop('flavor', None)
                extra.pop('security_group', None)
                extra.pop('image_userdata', None)

                for i in range(self.params.nodes_to_add[grp]):
                    cluster.add_node(grp,
                                     conf_kind['image_id'],
                                     image_user,
                                     conf_kind['flavor'],
                                     conf_kind['security_group'],
                                     image_userdata=userdata,
                                     **extra)

        for grp in self.params.nodes_to_remove:
            n_to_rm = self.params.nodes_to_remove[grp]
            print("Removing %d %s node(s) from the cluster."
                  "" % (n_to_rm, grp))
            to_remove = cluster.nodes[grp][-n_to_rm:]
            print("The following nodes will be removed from the cluster.")
            print("    " + str.join("\n    ", [n.name for n in to_remove]))

            if not self.params.yes:
                confirm_or_abort("Do you really want to remove them?",
                                 msg="Aborting upon user request.")

            for node in to_remove:
                cluster.nodes[grp].remove(node)
                node.stop()

        cluster.start()
        if self.params.no_setup:
            print("NOT configuring the cluster as requested.")
        else:
            print("Reconfiguring the cluster.")
            cluster.setup()
        print(cluster_summary(cluster))
Ejemplo n.º 51
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)
        repo = creator.create_repository()
        tmpdir = tempfile.mkdtemp()
        log.debug("Using temporary directory %s" % tmpdir)
        tmpconf = make_creator(self.params.config, storage_path=tmpdir)
        tmprepo = tmpconf.create_repository()

        rc=0
        # Read the zip file.
        try:
            with ZipFile(self.params.file, 'r') as zipfile:
                # Find main cluster file
                # create cluster object from it
                log.debug("ZIP file %s opened" % self.params.file)
                cluster = None
                zipfile.extractall(tmpdir)
                newclusters = tmprepo.get_all()
                cluster = newclusters[0]
                cur_clusternames = [c.name for c in repo.get_all()]
                oldname = cluster.name
                newname = self.params.rename
                if self.params.rename:
                    cluster.name = self.params.rename
                    for node in cluster.get_all_nodes():
                        node.cluster_name = cluster.name
                if cluster.name in cur_clusternames:
                    raise Exception(
                        "A cluster with name %s already exists. Use "
                        "option --rename to rename the cluster to be "
                        "imported." % cluster.name)

                        # Save the cluster in the new position
                cluster.repository = repo
                repo.save_or_update(cluster)
                dest = cluster.repository.storage_path

                # Copy the known hosts
                srcfile = os.path.join(tmpdir, oldname+'.known_hosts')
                destfile = os.path.join(dest, cluster.name+'.known_hosts')
                shutil.copy(srcfile, destfile)

                # Copy the ssh keys, if present
                for attr in ('user_key_public', 'user_key_private'):
                    keyfile = getattr(cluster, attr)
                    keybase = os.path.basename(keyfile)
                    srcfile = os.path.join(tmpdir, keybase)
                    if os.path.isfile(srcfile):
                        log.info("Importing key file %s" % keybase)
                        destfile = os.path.join(dest, keybase)
                        shutil.copy(srcfile, destfile)
                        setattr(cluster, attr, destfile)

                    for node in cluster.get_all_nodes():
                        nodekeyfile = getattr(node, attr)
                        # Check if it's different from the main key
                        if nodekeyfile != keyfile \
                           and os.path.isfile(nodekeyfile):
                            destdir = os.path.join(dest,
                                                   cluster.name,
                                                   node.kind,
                                                   node.name)
                            nodekeybase = os.path.basename(nodekeyfile)
                            log.info("Importing key file %s for node %s" %
                                     (nodekeybase, node.name))
                            if not os.path.isdir(destdir):
                                os.makedirs(destdir)
                            # Path to key in zip file
                            srcfile = os.path.join(tmpdir,
                                                   oldname,
                                                   node.kind,
                                                   node.name,
                                                   nodekeybase)
                            destfile = os.path.join(destdir, nodekeybase)
                            shutil.copy(srcfile, destfile)
                        # Always save the correct destfile
                        setattr(node, attr, destfile)

                repo.save_or_update(cluster)
                if not cluster:
                    log.error("ZIP file %s does not contain a valid cluster."
                              % self.params.file)
                    rc = 2

                # Check if a cluster already exists.
                # if not, unzip the needed files, and update ssh key path if needed.
        except Exception as ex:
            log.error("Unable to import from zipfile %s: %s"
                      % (self.params.file, ex))
            rc=1
        finally:
            if os.path.isdir(tmpdir):
                shutil.rmtree(tmpdir)
            log.info("Cleaning up directory %s" % tmpdir)

        if rc == 0:
            print("Successfully imported cluster from ZIP %s to %s"
                  % (self.params.file, repo.storage_path))
        sys.exit(rc)
Ejemplo n.º 52
0
    def execute(self):
        creator = make_creator(self.params.config,
                               storage_path=self.params.storage)

        try:
            cluster = creator.load_cluster(self.params.cluster)
        except ClusterNotFound:
            log.error("Cluster `%s` not found in storage dir %s."
                      % (self.params.cluster, self.params.storage))
            sys.exit(1)

        if os.path.exists(self.params.zipfile) and not self.params.overwrite:
            log.error("ZIP file `%s` already exists." % self.params.zipfile)
            sys.exit(1)

        with ZipFile(self.params.zipfile, 'w') as zipfile:
            # The root of the zip file will contain:
            # * the storage file
            # * the known_hosts file
            # * ssh public and prived keys, if --save-keys is used
            #
            # it will NOT contain the ansible inventory file, as this
            # is automatically created when needed.
            #
            # Also, if --save-keys is used and there is an host with a
            # different ssh private/public key than the default, they
            # will be saved in:
            #
            #   ./<cluster>/<group>/<nodename>/
            #
            def verbose_add(fname, basedir='', comment=None):
                zipname = basedir + os.path.basename(fname)
                log.info("Adding '%s' as '%s'" % (fname, zipname))
                zipfile.write(fname, zipname)
                if comment:
                    info = zipfile.getinfo(zipname)
                    info.comment = comment

            try:
                verbose_add(cluster.storage_file, comment='cluster-file')
                verbose_add(cluster.known_hosts_file, comment='known_hosts')
                if self.params.save_keys:
                    # that's sensible stuff, let's ask permission.
                    print("""
==========================
WARNING! WARNING! WARNING!
==========================
You are about to add your SSH *private* key to the
ZIP archive. These are sensible data: anyone with
access to the ZIP file will have access to any host
where this private key has been deployed.

                    """)
                    confirm_or_abort(
                        "Are you sure you still want to copy them?",
                        msg="Aborting upon user request.")

                    # Also save all the public and private keys we can find.

                    # Cluster keys
                    verbose_add(cluster.user_key_public)
                    verbose_add(cluster.user_key_private)

                    # Node keys, if found
                    for node in cluster.get_all_nodes():
                        if node.user_key_public != cluster.user_key_public:
                            verbose_add(node.user_key_public,
                                        "%s/%s/%s/" % (cluster.name,
                                                       node.kind,
                                                       node.name))
                    for node in cluster.get_all_nodes():
                        if node.user_key_private != cluster.user_key_private:
                            verbose_add(node.user_key_private,
                                        "%s/%s/%s/" % (cluster.name,
                                                       node.kind,
                                                       node.name))
            except OSError as ex:
                # A file is probably missing!
                log.error("Fatal error: cannot add file %s to zip archive: %s."
                          % (ex.filename, ex))
                sys.exit(1)

        print("Cluster '%s' correctly exported into %s" %
              (cluster.name, self.params.zipfile))