def copy_file_to_hosts(self, hostlist, srcfile, remotepath): cmd = "scp" + self._options + " -i " + self._private_key for host in hostlist: printDebug('Copying to host %s' % host.public_ip) error = os.system(cmd + " " + srcfile + " " + self._username + "@" + host.public_ip + ":" + remotepath) if error > 0: return error return 0
def run_remote_command(self, hostlist, command): for host in hostlist: cmd = "ssh -y" + self._options + " -i " + self._private_key + " " + \ self._username + "@" + host.public_ip + " " + command printDebug('Command: %s' % cmd) printDebug('Executing in host %s' % host.public_ip) error = os.system(cmd) if error > 0: return error return 0
def deploy(self): ssh = SSHUtil(self._runner.userPrivateKeyFile, self.cluster_admin) # Wait until all the images are up and running vmNetworkDetails = [] vmStartTimeout = 600 # wait until the each machine is up or timeout after 15 minutes printStep("Waiting for all cluster VMs to be instantiated...") if self._is_heterogeneous: printStep("Waiting for master") self._runner.waitUntilVmRunningOrTimeout(self._master_vmid, vmStartTimeout) vmNetworkDetails.append(self._runner.getNetworkDetail(self._master_vmid)) for vmId in self._runner.vmIds: printDebug('Waiting for instance to start running %s' % str(vmId)) self._runner.waitUntilVmRunningOrTimeout(vmId, vmStartTimeout) vmNetworkDetails.append(self._runner.getNetworkDetail(vmId)) vm_cpu, vm_ram, vm_swap = self._runner.getInstanceResourceValues() for vmNetwork in vmNetworkDetails: if vmNetwork[0] == 'public': host = Host() host.public_ip = vmNetwork[1] try: host.public_dns = socket.gethostbyaddr(host.public_ip)[0] except: host.public_dns = host.public_ip host.cores = vm_cpu host.ram = vm_ram host.swap = vm_swap self.hosts.append(host) printStep("Waiting for all instances to become accessible...") failedHosts = [] for host in self.hosts: hostReady = False hostFailed = False while not hostReady and not hostFailed: if not ssh.waitForConnectivity(host, vmStartTimeout): printError('Timed out while connecting to %s. Removing from target config. list.' % host.public_ip) failedHosts.append(host) hostFailed = True else: hostReady = True if len(failedHosts) > 0: if self.tolerate_failures: for host in failedHosts: self.hosts.remove(host) else: printError('Error instantiating some or all of the nodes. Bailing out...') if self.clean_after_failure: self._runner.killInstances(self._runner.vmIds) return 128 master_node = self.hosts[0] worker_nodes = list(self.hosts) worker_nodes.remove(master_node) printInfo('\tMaster is %s' % master_node.public_dns) for node in worker_nodes: printInfo('\tWorker: %s' % node.public_dns) # Configure the hosts printAction('Configuring nodes') # Try to install the missing packages if self.add_packages: self.doAddPackages(ssh) # For MPI clusters prepare the machinefile for mpirun if self.mpi_machine_file: self.doPrepareMPImachineFile(ssh, worker_nodes) if self.cluster_user: # Create a new user and prepare the environments for password-less ssh self.doCreateClusterUser(ssh, master_node) # Initialize the shared storage in NFS if self.shared_folder: self.doPrepareNFSSharedFolder(ssh, master_node, worker_nodes) if self.ssh_hostbased: self.doSetupSSHHostBasedCluster(ssh) # Update /etc/profile with StratusLab specific environment variables self.doUpdateEnvironmentVariables(ssh, master_node, worker_nodes) # Store the list of cluster nodes in a file under /tmp self.doPrepareNodeList(ssh, worker_nodes) # Update the /etc/hosts file for all hosts self.doUpdateHostsFile(ssh, master_node, worker_nodes) # Start any services defined in rc.cluster-services self.doStartClusterServices(ssh, master_node) return 0