def doWork(self): configHolder = ConfigHolder(self.options.__dict__) runner = VmManagerFactory.create(self.image, configHolder) cluster = Cluster(configHolder, runner, self.options.master_vmid) printAction('Starting cluster') runner.runInstance() cluster.deploy() printStep('Done!')
def runInstance(self, details=False): self._printContacting() if Image.isImageId(self.vm_image): self._checkImageExists(self.vm_image) self.vm_image = self._prependMarketplaceUrlIfImageId(self.vm_image) elif Image.isDiskId(self.vm_image): self.vm_image = self._createDiskUrlIfDiskId(self.vm_image) elif self._isAliasUrl(self.vm_image): self.vm_image = self._resolveUrl(self.vm_image) else: raise Exceptions.ValidationException('Image reference must be an ' 'Alias URL, Marketplace Image ID or Disk ID: %s' % self.vm_image) printAction('Starting machine(s)') self.printDetail('Using VM template file: %s' % self.vmTemplateFile) vmTpl = self._buildVmTemplate(self.vmTemplateFile) label = (self.instanceNumber > 1) and 'machines' or 'machine' printStep('Starting %s %s' % (self.instanceNumber, label)) self.printDetail('on endpoint: %s' % self.endpoint, Util.VERBOSE_LEVEL_DETAILED) self.printDetail('with template:\n%s' % vmTpl, Util.VERBOSE_LEVEL_DETAILED) for vmNb in range(self.instanceNumber): vmId = self.cloud.vmStart(vmTpl) self.vmIds.append(vmId) networkName, ip = self.getNetworkDetail(vmId) self.vmIdsAndNetwork.append((vmId, networkName, ip)) vmIpPretty = '\t%s ip: %s' % (networkName.title(), ip) printStep('Machine %s (vm ID: %s)\n%s' % (vmNb + 1, vmId, vmIpPretty)) self.instancesDetail.append({'id': vmId, 'ip': ip, 'networkName': networkName}) self._saveVmIds() printStep('Done!') if not details: return self.vmIds else: return self.vmIdsAndNetwork
def _startService(self, componentName, componentInstallator): if self.startComponent: printAction('Starting %s services' % componentName) componentInstallator.startServices()
def _setupStep(self, componentName, componentInstallator): if self.setupStep: printAction('Setting up %s' % componentName) componentInstallator.setup()
def _installStep(self, componentName, componentInstallator): if self.installStep: printAction('Installing %s' % componentName) componentInstallator.install()
def deploy(self): ssh = SSHUtil(self._runner.userPrivateKeyFile, self.cluster_admin) # Wait until all the images are up and running vmNetworkDetails = [] vmStartTimeout = 600 # wait until the each machine is up or timeout after 15 minutes printStep("Waiting for all cluster VMs to be instantiated...") if self._is_heterogeneous: printStep("Waiting for master") self._runner.waitUntilVmRunningOrTimeout(self._master_vmid, vmStartTimeout) vmNetworkDetails.append(self._runner.getNetworkDetail(self._master_vmid)) for vmId in self._runner.vmIds: printDebug('Waiting for instance to start running %s' % str(vmId)) self._runner.waitUntilVmRunningOrTimeout(vmId, vmStartTimeout) vmNetworkDetails.append(self._runner.getNetworkDetail(vmId)) vm_cpu, vm_ram, vm_swap = self._runner.getInstanceResourceValues() for vmNetwork in vmNetworkDetails: if vmNetwork[0] == 'public': host = Host() host.public_ip = vmNetwork[1] try: host.public_dns = socket.gethostbyaddr(host.public_ip)[0] except: host.public_dns = host.public_ip host.cores = vm_cpu host.ram = vm_ram host.swap = vm_swap self.hosts.append(host) printStep("Waiting for all instances to become accessible...") failedHosts = [] for host in self.hosts: hostReady = False hostFailed = False while not hostReady and not hostFailed: if not ssh.waitForConnectivity(host, vmStartTimeout): printError('Timed out while connecting to %s. Removing from target config. list.' % host.public_ip) failedHosts.append(host) hostFailed = True else: hostReady = True if len(failedHosts) > 0: if self.tolerate_failures: for host in failedHosts: self.hosts.remove(host) else: printError('Error instantiating some or all of the nodes. Bailing out...') if self.clean_after_failure: self._runner.killInstances(self._runner.vmIds) return 128 master_node = self.hosts[0] worker_nodes = list(self.hosts) worker_nodes.remove(master_node) printInfo('\tMaster is %s' % master_node.public_dns) for node in worker_nodes: printInfo('\tWorker: %s' % node.public_dns) # Configure the hosts printAction('Configuring nodes') # Try to install the missing packages if self.add_packages: self.doAddPackages(ssh) # For MPI clusters prepare the machinefile for mpirun if self.mpi_machine_file: self.doPrepareMPImachineFile(ssh, worker_nodes) if self.cluster_user: # Create a new user and prepare the environments for password-less ssh self.doCreateClusterUser(ssh, master_node) # Initialize the shared storage in NFS if self.shared_folder: self.doPrepareNFSSharedFolder(ssh, master_node, worker_nodes) if self.ssh_hostbased: self.doSetupSSHHostBasedCluster(ssh) # Update /etc/profile with StratusLab specific environment variables self.doUpdateEnvironmentVariables(ssh, master_node, worker_nodes) # Store the list of cluster nodes in a file under /tmp self.doPrepareNodeList(ssh, worker_nodes) # Update the /etc/hosts file for all hosts self.doUpdateHostsFile(ssh, master_node, worker_nodes) # Start any services defined in rc.cluster-services self.doStartClusterServices(ssh, master_node) return 0