Beispiel #1
0
    def copy_file_to_hosts(self, hostlist, srcfile, remotepath):
        cmd = "scp" + self._options + " -i " + self._private_key

        for host in hostlist:
            printDebug('Copying to host %s' % host.public_ip)
            error = os.system(cmd + " " + srcfile + " " + self._username + "@" + host.public_ip + ":" + remotepath)
            if error > 0:
                return error

        return 0
Beispiel #2
0
    def run_remote_command(self, hostlist, command):
        for host in hostlist:
            cmd = "ssh -y" + self._options + " -i " + self._private_key + " " + \
                  self._username + "@" + host.public_ip + " " + command
            printDebug('Command: %s' % cmd)
            printDebug('Executing in host %s' % host.public_ip)
            error = os.system(cmd)
            if error > 0:
                return error

        return 0
Beispiel #3
0
    def deploy(self):
        ssh = SSHUtil(self._runner.userPrivateKeyFile, self.cluster_admin)

        # Wait until all the images are up and running
        vmNetworkDetails = []
        vmStartTimeout = 600

        # wait until the each machine is up or timeout after 15 minutes
        printStep("Waiting for all cluster VMs to be instantiated...")
        if self._is_heterogeneous:
            printStep("Waiting for master")
            self._runner.waitUntilVmRunningOrTimeout(self._master_vmid, vmStartTimeout)
            vmNetworkDetails.append(self._runner.getNetworkDetail(self._master_vmid))

        for vmId in self._runner.vmIds:
            printDebug('Waiting for instance to start running %s' % str(vmId))
            self._runner.waitUntilVmRunningOrTimeout(vmId, vmStartTimeout)
            vmNetworkDetails.append(self._runner.getNetworkDetail(vmId))

        vm_cpu, vm_ram, vm_swap = self._runner.getInstanceResourceValues()

        for vmNetwork in vmNetworkDetails:
            if vmNetwork[0] == 'public':
                host = Host()
                host.public_ip = vmNetwork[1]

                try:
                    host.public_dns = socket.gethostbyaddr(host.public_ip)[0]
                except:
                    host.public_dns = host.public_ip

                host.cores = vm_cpu
                host.ram = vm_ram
                host.swap = vm_swap
                self.hosts.append(host)

        printStep("Waiting for all instances to become accessible...")

        failedHosts = []

        for host in self.hosts:
            hostReady = False
            hostFailed = False

            while not hostReady and not hostFailed:
                if not ssh.waitForConnectivity(host, vmStartTimeout):
                    printError('Timed out while connecting to %s.  Removing from target config. list.' % host.public_ip)
                    failedHosts.append(host)
                    hostFailed = True
                else:
                    hostReady = True

        if len(failedHosts) > 0:
            if self.tolerate_failures:
                for host in failedHosts:
                    self.hosts.remove(host)
            else:
                printError('Error instantiating some or all of the nodes. Bailing out...')
                if self.clean_after_failure:
                    self._runner.killInstances(self._runner.vmIds)
                return 128

        master_node = self.hosts[0]

        worker_nodes = list(self.hosts)

        worker_nodes.remove(master_node)

        printInfo('\tMaster is %s' % master_node.public_dns)

        for node in worker_nodes:
            printInfo('\tWorker: %s' % node.public_dns)

        # Configure the hosts
        printAction('Configuring nodes')

        # Try to install the missing packages
        if self.add_packages:
            self.doAddPackages(ssh)

        # For MPI clusters prepare the machinefile for mpirun
        if self.mpi_machine_file:
            self.doPrepareMPImachineFile(ssh, worker_nodes)

        if self.cluster_user:
            # Create a new user and prepare the environments for password-less ssh
            self.doCreateClusterUser(ssh, master_node)

        # Initialize the shared storage in NFS
        if self.shared_folder:
            self.doPrepareNFSSharedFolder(ssh, master_node, worker_nodes)

        if self.ssh_hostbased:
            self.doSetupSSHHostBasedCluster(ssh)

        # Update /etc/profile with StratusLab specific environment variables
        self.doUpdateEnvironmentVariables(ssh, master_node, worker_nodes)

        # Store the list of cluster nodes in a file under /tmp
        self.doPrepareNodeList(ssh, worker_nodes)

        # Update the /etc/hosts file for all hosts
        self.doUpdateHostsFile(ssh, master_node, worker_nodes)

        # Start any services defined in rc.cluster-services
        self.doStartClusterServices(ssh, master_node)

        return 0