Example #1
0
    def doWork(self):
        configHolder = ConfigHolder(self.options.__dict__)
        runner = VmManagerFactory.create(self.image, configHolder)
        cluster = Cluster(configHolder, runner, self.options.master_vmid)

        printAction('Starting cluster')
        runner.runInstance()

        cluster.deploy()

        printStep('Done!')
Example #2
0
    def runInstance(self, details=False):
        self._printContacting()

        if Image.isImageId(self.vm_image):
            self._checkImageExists(self.vm_image)
            self.vm_image = self._prependMarketplaceUrlIfImageId(self.vm_image)
        elif Image.isDiskId(self.vm_image):
            self.vm_image = self._createDiskUrlIfDiskId(self.vm_image)
        elif self._isAliasUrl(self.vm_image):
            self.vm_image = self._resolveUrl(self.vm_image)
        else:
            raise Exceptions.ValidationException('Image reference must be an '
                                                 'Alias URL, Marketplace Image ID or Disk ID:  %s' %
                                                 self.vm_image)

        printAction('Starting machine(s)')

        self.printDetail('Using VM template file: %s' % self.vmTemplateFile)

        vmTpl = self._buildVmTemplate(self.vmTemplateFile)

        label = (self.instanceNumber > 1) and 'machines' or 'machine'

        printStep('Starting %s %s' % (self.instanceNumber, label))

        self.printDetail('on endpoint: %s' % self.endpoint, Util.VERBOSE_LEVEL_DETAILED)
        self.printDetail('with template:\n%s' % vmTpl, Util.VERBOSE_LEVEL_DETAILED)

        for vmNb in range(self.instanceNumber):
            vmId = self.cloud.vmStart(vmTpl)
            self.vmIds.append(vmId)
            networkName, ip = self.getNetworkDetail(vmId)
            self.vmIdsAndNetwork.append((vmId, networkName, ip))
            vmIpPretty = '\t%s ip: %s' % (networkName.title(), ip)
            printStep('Machine %s (vm ID: %s)\n%s' % (vmNb + 1, vmId, vmIpPretty))
            self.instancesDetail.append({'id': vmId, 'ip': ip, 'networkName': networkName})
        self._saveVmIds()

        printStep('Done!')

        if not details:
            return self.vmIds
        else:
            return self.vmIdsAndNetwork
Example #3
0
 def _startService(self, componentName, componentInstallator):
     if self.startComponent:
         printAction('Starting %s services' % componentName)
         componentInstallator.startServices()
Example #4
0
 def _setupStep(self, componentName, componentInstallator):
     if self.setupStep:
         printAction('Setting up %s' % componentName)
         componentInstallator.setup()
Example #5
0
 def _installStep(self, componentName, componentInstallator):
     if self.installStep:
         printAction('Installing %s' % componentName)
         componentInstallator.install()
Example #6
0
    def deploy(self):
        ssh = SSHUtil(self._runner.userPrivateKeyFile, self.cluster_admin)

        # Wait until all the images are up and running
        vmNetworkDetails = []
        vmStartTimeout = 600

        # wait until the each machine is up or timeout after 15 minutes
        printStep("Waiting for all cluster VMs to be instantiated...")
        if self._is_heterogeneous:
            printStep("Waiting for master")
            self._runner.waitUntilVmRunningOrTimeout(self._master_vmid, vmStartTimeout)
            vmNetworkDetails.append(self._runner.getNetworkDetail(self._master_vmid))

        for vmId in self._runner.vmIds:
            printDebug('Waiting for instance to start running %s' % str(vmId))
            self._runner.waitUntilVmRunningOrTimeout(vmId, vmStartTimeout)
            vmNetworkDetails.append(self._runner.getNetworkDetail(vmId))

        vm_cpu, vm_ram, vm_swap = self._runner.getInstanceResourceValues()

        for vmNetwork in vmNetworkDetails:
            if vmNetwork[0] == 'public':
                host = Host()
                host.public_ip = vmNetwork[1]

                try:
                    host.public_dns = socket.gethostbyaddr(host.public_ip)[0]
                except:
                    host.public_dns = host.public_ip

                host.cores = vm_cpu
                host.ram = vm_ram
                host.swap = vm_swap
                self.hosts.append(host)

        printStep("Waiting for all instances to become accessible...")

        failedHosts = []

        for host in self.hosts:
            hostReady = False
            hostFailed = False

            while not hostReady and not hostFailed:
                if not ssh.waitForConnectivity(host, vmStartTimeout):
                    printError('Timed out while connecting to %s.  Removing from target config. list.' % host.public_ip)
                    failedHosts.append(host)
                    hostFailed = True
                else:
                    hostReady = True

        if len(failedHosts) > 0:
            if self.tolerate_failures:
                for host in failedHosts:
                    self.hosts.remove(host)
            else:
                printError('Error instantiating some or all of the nodes. Bailing out...')
                if self.clean_after_failure:
                    self._runner.killInstances(self._runner.vmIds)
                return 128

        master_node = self.hosts[0]

        worker_nodes = list(self.hosts)

        worker_nodes.remove(master_node)

        printInfo('\tMaster is %s' % master_node.public_dns)

        for node in worker_nodes:
            printInfo('\tWorker: %s' % node.public_dns)

        # Configure the hosts
        printAction('Configuring nodes')

        # Try to install the missing packages
        if self.add_packages:
            self.doAddPackages(ssh)

        # For MPI clusters prepare the machinefile for mpirun
        if self.mpi_machine_file:
            self.doPrepareMPImachineFile(ssh, worker_nodes)

        if self.cluster_user:
            # Create a new user and prepare the environments for password-less ssh
            self.doCreateClusterUser(ssh, master_node)

        # Initialize the shared storage in NFS
        if self.shared_folder:
            self.doPrepareNFSSharedFolder(ssh, master_node, worker_nodes)

        if self.ssh_hostbased:
            self.doSetupSSHHostBasedCluster(ssh)

        # Update /etc/profile with StratusLab specific environment variables
        self.doUpdateEnvironmentVariables(ssh, master_node, worker_nodes)

        # Store the list of cluster nodes in a file under /tmp
        self.doPrepareNodeList(ssh, worker_nodes)

        # Update the /etc/hosts file for all hosts
        self.doUpdateHostsFile(ssh, master_node, worker_nodes)

        # Start any services defined in rc.cluster-services
        self.doStartClusterServices(ssh, master_node)

        return 0