Ejemplo n.º 1
0
    def doUpdateEnvironmentVariables(self, ssh, master_node, worker_nodes):
        printStep('Updating environment variables')
        active_nodes = []
        if self.include_master:
            active_nodes = self.hosts
        else:
            active_nodes = worker_nodes

        # Find the total available number of cores
        total_cores = 0
        for node in active_nodes:
            total_cores += int(node.cores)

        counter = 0
        for node in self.hosts:
            target = []
            target.append(node)
            ssh.run_remote_command(target, "'echo export STRATUSLAB_NC=" + str(
                counter) + " > /etc/profile.d/stratuslab_cluster.sh && " \
                           "echo export STRATUSLAB_CMASTER=" + master_node.public_dns + " >> /etc/profile.d/stratuslab_cluster.sh && " \
                                                                                        "echo export STRATUSLAB_CSIZE=" + str(
                len(active_nodes)) + " >> /etc/profile.d/stratuslab_cluster.sh && " \
                                     "echo export STRATUSLAB_CMAX_CORES=" + str(
                total_cores) + " >> /etc/profile.d/stratuslab_cluster.sh'")
            counter += 1
Ejemplo n.º 2
0
 def _installPackages(self, section):
     packages = self.packages[self.profile].get(section, [])
     if packages:
         printStep('Installing packages on %s for section "%s": %s'
                   % (self.profile, section,
                      ', '.join(packages)))
         self.system.installNodePackages(packages)
Ejemplo n.º 3
0
 def _sshPDisk(self, cmd, errorMsg, dontRaiseOnError=False):
     cmd_str = ' '.join(cmd)
     printStep("Executing: %s" % cmd_str)
     retCode, output = sshCmdWithOutput(cmd_str, self.pdisk.persistentDiskIp, user=getuser(),
                                        sshKey=self.pdisk.persistentDiskPrivateKey.replace('.pub', ''))
     if not dontRaiseOnError and retCode != 0:
         raise Exception('%s\n: Error: %s' % (errorMsg, output))
     return output
Ejemplo n.º 4
0
    def _setupFrontend(self):
        if self._backupConfigFileExists():
            printWarning("Policy validation backup file %s already exists, skipping configuration" % PolicyValidator.CONFIG_SAV)
            return

        printStep('Creating policy validation configuration file')
        self._backup()
        self._writeConfigFromTemplate()
Ejemplo n.º 5
0
    def doPrepareMPImachineFile(self, ssh, worker_nodes):
        printStep('Preparing MPI machine file')
        if self.include_master:
            target = self.hosts
        else:
            target = worker_nodes

        self.create_machine_file(target, "/tmp/machinefile", isForMPI=True)
        ssh.copy_file_to_hosts(self.hosts, "/tmp/machinefile", "/tmp")
        os.unlink("/tmp/machinefile")
Ejemplo n.º 6
0
 def _createLvmGroup(self):
     if 0 == self.system._nodeShell('%s %s'
             % (self.persistentDiskLvmVgdisplay, self.persistentDiskLvmDevice)):
         return
     printStep('Creating LVM volume group...')
     self.system._nodeShell('%s %s'
                            % (self.persistentDiskLvmPvcreate, self.persistentDiskPhysicalDevices))
     self.system._nodeShell('%s %s %s'
                            % (self.persistentDiskLvmVgcreate, self.persistentDiskLvmDevice,
                               self.persistentDiskPhysicalDevices))
Ejemplo n.º 7
0
 def _configureNodeSudo(self):
     printStep('Configuring sudo rights...')
     self.system._remoteAppendOrReplaceInFile('/etc/sudoers',
                                              '%s ALL = NOPASSWD: /sbin/iscsiadm, /usr/sbin/lsof, /usr/bin/virsh' % self.oneUsername,
                                              '%s ALL = NOPASSWD: /sbin/iscsiadm, /usr/sbin/lsof, /usr/bin/virsh' % self.oneUsername)
     self.system._remoteAppendOrReplaceInFile('/etc/sudoers',
                                              'Defaults:%s !requiretty' % self.oneUsername,
                                              'Defaults:%s !requiretty' % self.oneUsername)
     self.system._remoteAppendOrReplaceInFile('/etc/sudoers',
                                              'Defaults:%s !requiretty' % 'root',
                                              'Defaults:%s !requiretty' % 'root')
Ejemplo n.º 8
0
    def doUpdateHostsFile(self, ssh, master_node, worker_nodes):
        printStep('Updating hosts file')
        ssh.run_remote_command(self.hosts, "'echo  >> /etc/hosts && " \
                                           " echo \"# Cluster nodes\" >> /etc/hosts && " \
                                           " echo " + master_node.public_ip + " " + master_node.public_dns + " " + "master >> /etc/hosts'")

        counter = 0
        for host in worker_nodes:
            ssh.run_remote_command(self.hosts, " 'echo " + host.public_ip + " " + host.public_dns + " worker-" + str(
                counter) + " >> /etc/hosts'")
            counter += 1
Ejemplo n.º 9
0
    def doWork(self):
        configHolder = ConfigHolder(self.options.__dict__)
        runner = VmManagerFactory.create(self.image, configHolder)
        cluster = Cluster(configHolder, runner, self.options.master_vmid)

        printAction('Starting cluster')
        runner.runInstance()

        cluster.deploy()

        printStep('Done!')
Ejemplo n.º 10
0
    def doSetupSSHHostBasedCluster(self, ssh):
        printStep('Configuring passwordless host-based ssh authentication')
        ssh.run_remote_command(self.hosts,
                               "'echo \"IgnoreRhosts no\" >> /etc/ssh/sshd_config && service sshd restart &> /dev/null && " +
                               "echo \"HostbasedAuthentication yes\n" +
                               "StrictHostKeyChecking no\n" +
                               "EnableSSHKeysign yes\" >> /etc/ssh/ssh_config'")

        for host in self.hosts:
            ssh.run_remote_command(self.hosts,
                                   "'ssh-keyscan -t rsa " + host.public_dns + " 2>/dev/null >> /etc/ssh/ssh_known_hosts && " \
                                                                              "echo " + host.public_dns + " root >> /root/.shosts'")
Ejemplo n.º 11
0
 def _writePdiskConfig(self):
     printStep('Writing configuration...')
     self._overrideConfig('disk.store.share', self.persistentDiskShare)
     self._overrideConfig('disk.store.nfs.location', self.persistentDiskNfsMountPoint)
     self._overrideConfig('disk.store.iscsi.type', self.persistentDiskStorage)
     self._overrideConfig('disk.store.iscsi.file.location', self.persistentDiskFileLocation)
     self._overrideConfig('disk.store.lvm.device', self.persistentDiskLvmDevice)
     self._overrideConfig('disk.store.lvm.create', self.persistentDiskLvmCreate)
     self._overrideConfig('disk.store.lvm.remove', self.persistentDiskLvmRemove)
     self._overrideConfig('disk.store.cloud.node.admin', self.oneUsername)
     self._overrideConfig('disk.store.cloud.node.ssh_keyfile', self.cloudNodeKey)
     self._overrideConfig('disk.store.cloud.node.vm_dir', self.persistentDiskCloudVmDir)
Ejemplo n.º 12
0
 def _configureNfsServer(self):
     printStep('Configuring NFS sharing...')
     if self._nfsShareAlreadyExists():
         self.system.configureExistingNfsShare(self.persistentDiskExistingNfs,
                                               self.persistentDiskNfsMountPoint)
     elif self.profile == 'node':
         self.system.configureExistingNfsShare('%s:%s' % (
         VolumeManager.getFQNHostname(self.persistentDiskIp), self.persistentDiskNfsMountPoint),
                                               self.persistentDiskNfsMountPoint)
     else:
         self.system.configureNewNfsServer(self.persistentDiskNfsMountPoint,
                                           self.networkAddr,
                                           self.networkMask)
Ejemplo n.º 13
0
    def doCreateClusterUser(self, ssh, master_node):
        printStep('Creating additional user')
        master_only = []
        master_only.append(master_node)
        ssh.run_remote_command(self.hosts, "useradd -m " + self.cluster_user)
        ssh.run_remote_command(master_only,
                               ' "su - ' + self.cluster_user + " -c 'ssh-keygen -q -t rsa -N " + '\\"\\"' " -f ~/.ssh/id_rsa' " + '"')
        ssh.run_remote_command(master_only,
                               ' "su - ' + self.cluster_user + " -c 'cp ~/.ssh/id_rsa.pub ~/.ssh/authorized_keys' " + '"')

        #if self.shared_folder !="home":
        #    for host in self.hosts:
        #        ssh.run_remote_command(master_only, "scp -r /home/"+ self.cluster_user+"/.ssh " + host.public_ip + ":/home/" + self.cluster_user)

        if self.ssh_hostbased:
            for host in self.hosts:
                ssh.run_remote_command(self.hosts,
                                       ' "su - ' + self.cluster_user + " -c 'echo " + host.public_dns + " " + self.cluster_user + " >> ~/.shosts'" + '"')
Ejemplo n.º 14
0
 def _mergeAuthWithProxy(self):
     loginConf = os.path.join(Defaults.ETC_DIR, '%s/login.conf')
     pdiskDir = 'storage/pdisk'
     oneproxyDir = 'one-proxy'
     confLine = '<Arg>%s</Arg>'
     configFile = os.path.join(self.pdiskHomeDir, 'etc/jetty-jaas-stratuslab.xml')
     if not self.persistentDiskMergeAuthWithProxy:
         return
     printStep('Merging pdisk and one-proxy auth configuration...')
     if not self.system._remoteFileExists(loginConf % oneproxyDir):
         printWarning('Not merging login configuration with one proxy, '
                      'not able to find one-proxy configuration file.\n'
                      'Edit %s to do it.' % loginConf % pdiskDir)
         return
     if 0 == self.system._nodeShell(['grep', '"%s"' % confLine % loginConf % oneproxyDir, configFile]):
         return
     self.system._remoteAppendOrReplaceInFile(
         configFile,
         confLine % loginConf % pdiskDir,
         confLine % loginConf % oneproxyDir)
Ejemplo n.º 15
0
    def doAddPackages(self, ssh):
        printStep('Installing additional software packages')
        packages = self.add_packages.replace(",", " ")

        printStep('Trying to configure new apps with yum...')
        if ssh.run_remote_command(self.hosts, "yum -q -y install " + packages):
            printStep('Trying to configure new apps with apt-get...')
            ssh.run_remote_command(self.hosts, "apt-get -q -y install " + packages)
Ejemplo n.º 16
0
    def _installFrontend(self):
        printStep('Installing CAs')
        self._installCAs()

        printStep('Installing sendmail')
        self._installSendmail()

        printStep('Installing OpenNebula')
        self._installOpenNebula()

        self._printInstallCompleted(self.frontend.stdout.name, self.frontend.stderr.name)
Ejemplo n.º 17
0
    def doPrepareNFSSharedFolder(self, ssh, master_node, worker_nodes):
        printStep('Preparing NFS shared folder')
        master_only = []
        master_only.append(master_node)
        ssh.run_remote_command(self.hosts, "mkdir -p " + self.shared_folder)
        ssh.run_remote_command(master_only,
                               "'echo " + self.shared_folder + " \"*(rw,no_root_squash)\" >> /etc/exports'")

        printStep('\tTrying RedHat configuration...')
        if ssh.run_remote_command(master_only, "service nfs restart &> /dev/null"):
            printStep('\tTrying debian configuration...')
            ssh.run_remote_command(master_only, "service nfs-kernel-server restart &> /dev/null")

        ssh.run_remote_command(worker_nodes,
                               "mount " + master_node.public_ip + ":" + self.shared_folder + " " + self.shared_folder)
Ejemplo n.º 18
0
    def runInstance(self, details=False):
        self._printContacting()

        if Image.isImageId(self.vm_image):
            self._checkImageExists(self.vm_image)
            self.vm_image = self._prependMarketplaceUrlIfImageId(self.vm_image)
        elif Image.isDiskId(self.vm_image):
            self.vm_image = self._createDiskUrlIfDiskId(self.vm_image)
        elif self._isAliasUrl(self.vm_image):
            self.vm_image = self._resolveUrl(self.vm_image)
        else:
            raise Exceptions.ValidationException('Image reference must be an '
                                                 'Alias URL, Marketplace Image ID or Disk ID:  %s' %
                                                 self.vm_image)

        printAction('Starting machine(s)')

        self.printDetail('Using VM template file: %s' % self.vmTemplateFile)

        vmTpl = self._buildVmTemplate(self.vmTemplateFile)

        label = (self.instanceNumber > 1) and 'machines' or 'machine'

        printStep('Starting %s %s' % (self.instanceNumber, label))

        self.printDetail('on endpoint: %s' % self.endpoint, Util.VERBOSE_LEVEL_DETAILED)
        self.printDetail('with template:\n%s' % vmTpl, Util.VERBOSE_LEVEL_DETAILED)

        for vmNb in range(self.instanceNumber):
            vmId = self.cloud.vmStart(vmTpl)
            self.vmIds.append(vmId)
            networkName, ip = self.getNetworkDetail(vmId)
            self.vmIdsAndNetwork.append((vmId, networkName, ip))
            vmIpPretty = '\t%s ip: %s' % (networkName.title(), ip)
            printStep('Machine %s (vm ID: %s)\n%s' % (vmNb + 1, vmId, vmIpPretty))
            self.instancesDetail.append({'id': vmId, 'ip': ip, 'networkName': networkName})
        self._saveVmIds()

        printStep('Done!')

        if not details:
            return self.vmIds
        else:
            return self.vmIdsAndNetwork
Ejemplo n.º 19
0
    def _setupNode(self):
        printStep('Checking node connectivity')
        self._checkNodeConnectivity()

        printStep('Creating cloud admin account')
        self._createCloudAdmin(self.node)

        printStep('Configuring cloud admin account')
        self._configureCloudAdminNode()

        printStep('Configuring hypervisor')
        self._configureVirtualization()

        printStep('Configuring bridge')
        self._configureBridgeOnNode()

        printStep('Configuring file sharing')
        self._setupFileSharingClient()

        printStep('Adding node to cloud')
        self._assignDrivers()
        self._addCloudNode()
Ejemplo n.º 20
0
 def _installNode(self):
     printStep('Installing node dependencies')
     self._installNodeDependencies()
     self._warmXenNeedReboot()
Ejemplo n.º 21
0
 def _printInstallCompleted(self, stdoutFilename, stderrFilename):
     printStep('Installation completed')
     printInfo('\tInstallation details: %s, %s' % (stdoutFilename, stderrFilename))
Ejemplo n.º 22
0
 def _startServicesNode(self):
     printStep('Starting virtualization services')
     self._startVrtualization()
Ejemplo n.º 23
0
 def _setupFrontend(self):
     printStep('Creating monitoring configuration file')
     monitoringTpl = Util.get_template_file(['monitoring.cfg.tpl'])
     monitoringConfFile = os.path.join(Defaults.ETC_DIR, 'monitoring.cfg')
     self._writeConfigFromTemplate(monitoringConfFile, monitoringTpl)
Ejemplo n.º 24
0
 def _installFrontend(self):
     printStep('Installing packages')
     self.system.installPackages(self.packages)
Ejemplo n.º 25
0
 def _setupFrontend(self):
     self._validateParameters()
     printStep('Creating registration configuration file')
     registrationTpl = Util.get_template_file(['registration.cfg.tpl'])
     registrationConfFile = os.path.join(Defaults.ETC_DIR, 'registration.cfg')
     self._writeConfigFromTemplate(registrationConfFile, registrationTpl)
Ejemplo n.º 26
0
    def _setupFrontend(self):
        printStep('Configuring file sharing')
        self._setupFileSharingServer()

        printStep('Configuring quarantine')
        self._configureQuarantine()

        printStep('Configuring cloud proxy service')
        self._configureCloudProxyService()

        printStep('Configuring firewall')
        self._configureFirewall()

        printStep('Configuring DHCP server')
        self._configureDhcpServer()

        printStep('Configuring database')
        self._configureDatabase()

        printStep('Configuring cloud admin account')
        self._configureCloudAdminFrontend()

        printStep('Configuring cloud system')
        self._configureCloudSystem()

        printStep('Applying local policies')
        self._configurePolicies()

        self._setupMarketplacePolicyValidator()

        printStep('Starting cloud')
        self._startServicesFrontend()

        printStep('Adding default ONE vnet')
        self._addDefaultNetworks()

        printStep('Adding default ACLs')
        self._addDefaultAcls()

        self._printInstallCompleted(self.frontend.stdout.name, self.frontend.stderr.name)
Ejemplo n.º 27
0
    def deploy(self):
        ssh = SSHUtil(self._runner.userPrivateKeyFile, self.cluster_admin)

        # Wait until all the images are up and running
        vmNetworkDetails = []
        vmStartTimeout = 600

        # wait until the each machine is up or timeout after 15 minutes
        printStep("Waiting for all cluster VMs to be instantiated...")
        if self._is_heterogeneous:
            printStep("Waiting for master")
            self._runner.waitUntilVmRunningOrTimeout(self._master_vmid, vmStartTimeout)
            vmNetworkDetails.append(self._runner.getNetworkDetail(self._master_vmid))

        for vmId in self._runner.vmIds:
            printDebug('Waiting for instance to start running %s' % str(vmId))
            self._runner.waitUntilVmRunningOrTimeout(vmId, vmStartTimeout)
            vmNetworkDetails.append(self._runner.getNetworkDetail(vmId))

        vm_cpu, vm_ram, vm_swap = self._runner.getInstanceResourceValues()

        for vmNetwork in vmNetworkDetails:
            if vmNetwork[0] == 'public':
                host = Host()
                host.public_ip = vmNetwork[1]

                try:
                    host.public_dns = socket.gethostbyaddr(host.public_ip)[0]
                except:
                    host.public_dns = host.public_ip

                host.cores = vm_cpu
                host.ram = vm_ram
                host.swap = vm_swap
                self.hosts.append(host)

        printStep("Waiting for all instances to become accessible...")

        failedHosts = []

        for host in self.hosts:
            hostReady = False
            hostFailed = False

            while not hostReady and not hostFailed:
                if not ssh.waitForConnectivity(host, vmStartTimeout):
                    printError('Timed out while connecting to %s.  Removing from target config. list.' % host.public_ip)
                    failedHosts.append(host)
                    hostFailed = True
                else:
                    hostReady = True

        if len(failedHosts) > 0:
            if self.tolerate_failures:
                for host in failedHosts:
                    self.hosts.remove(host)
            else:
                printError('Error instantiating some or all of the nodes. Bailing out...')
                if self.clean_after_failure:
                    self._runner.killInstances(self._runner.vmIds)
                return 128

        master_node = self.hosts[0]

        worker_nodes = list(self.hosts)

        worker_nodes.remove(master_node)

        printInfo('\tMaster is %s' % master_node.public_dns)

        for node in worker_nodes:
            printInfo('\tWorker: %s' % node.public_dns)

        # Configure the hosts
        printAction('Configuring nodes')

        # Try to install the missing packages
        if self.add_packages:
            self.doAddPackages(ssh)

        # For MPI clusters prepare the machinefile for mpirun
        if self.mpi_machine_file:
            self.doPrepareMPImachineFile(ssh, worker_nodes)

        if self.cluster_user:
            # Create a new user and prepare the environments for password-less ssh
            self.doCreateClusterUser(ssh, master_node)

        # Initialize the shared storage in NFS
        if self.shared_folder:
            self.doPrepareNFSSharedFolder(ssh, master_node, worker_nodes)

        if self.ssh_hostbased:
            self.doSetupSSHHostBasedCluster(ssh)

        # Update /etc/profile with StratusLab specific environment variables
        self.doUpdateEnvironmentVariables(ssh, master_node, worker_nodes)

        # Store the list of cluster nodes in a file under /tmp
        self.doPrepareNodeList(ssh, worker_nodes)

        # Update the /etc/hosts file for all hosts
        self.doUpdateHostsFile(ssh, master_node, worker_nodes)

        # Start any services defined in rc.cluster-services
        self.doStartClusterServices(ssh, master_node)

        return 0
Ejemplo n.º 28
0
 def doStartClusterServices(self, ssh, master_node):
     printStep("Applying user defined cluster services")
     master_only = []
     master_only.append(master_node)
     ssh.run_remote_command(master_only, "'if [ -e /etc/rc.cluster-services ]; then /etc/rc.cluster-services; fi'")
Ejemplo n.º 29
0
    def persistentDiskStorageHotplugTest(self):
        """Ensure that a disk hot-plugged to a VM and then hot-unplugged"""

        pdiskDevice = "/dev/%s"
        pdiskMountPoint = "/mnt/pdisk-test"
        testFile = "%s/pdisk.txt" % pdiskMountPoint
        testFileCmp = "/tmp/pdisk.cmp"
        testString = "pdiskTest"

        configHolder = Testor.configHolder.copy()
        configHolder.pdiskUsername = Testor.configHolder.testUsername
        configHolder.pdiskPassword = Testor.configHolder.testPassword
        pdisk = VolumeManagerFactory.create(configHolder)

        runner = self._startVmWithPDiskAndWaitUntilUp(image=self.ubuntuImg)

        Util.printAction("Creating a new persistent disk")
        diskUUID = pdisk.createVolume(1, "test %s" % datetime.datetime.today(), False)

        Util.printAction("Checking persistent disk exists")
        if not pdisk.volumeExists(diskUUID):
            self.fail("An error occurred while creating a persistent disk")

        self._modeprobe(runner, "acpiphp")
        vmId = self.vmIds[0]
        node = runner.cloud.getVmNode(vmId)

        printStep("Attaching pdisk to VM")

        availableUserBeforeAttach, _ = pdisk.getVolumeUsers(diskUUID)
        device = pdisk.hotAttach(node, vmId, diskUUID)
        availableUserAfterAttach, _ = pdisk.getVolumeUsers(diskUUID)

        if availableUserAfterAttach != (availableUserBeforeAttach - 1):
            self.fail(
                "Available users on persistent disk have to decrease by "
                "one; before=%s, after=%s" % (availableUserBeforeAttach, availableUserAfterAttach)
            )

        self._formatDisk(runner, pdiskDevice % device)
        self._mountDisk(runner, pdiskDevice % device, pdiskMountPoint)
        self._writeToFile(runner, testFile, testString)
        self._umountDisk(runner, pdiskDevice % device)

        printStep("Detaching pdisk of VM")
        pdisk.hotDetach(node, vmId, diskUUID)

        availableUserAfterDetach, _ = pdisk.getVolumeUsers(diskUUID)

        if availableUserAfterDetach != availableUserBeforeAttach:
            self.fail(
                "Available users on persistent disk have to be the "
                "same as when VM has started; before=%s, after=%s"
                % (availableUserBeforeAttach, availableUserAfterDetach)
            )

        printStep("Re-attaching pdisk to VM")
        device = pdisk.hotAttach(node, vmId, diskUUID)

        self._mountDisk(runner, pdiskDevice % device, pdiskMountPoint)
        self._writeToFile(runner, testFileCmp, testString)
        self._compareFiles(runner, testFile, testFileCmp)
        self._umountPDiskAndStopVm(runner, pdiskDevice % device)

        availableUserAfterStop, _ = pdisk.getVolumeUsers(diskUUID)

        if availableUserAfterStop != availableUserBeforeAttach:
            self.fail(
                "Available users on persistent disk have to be the "
                "same as when VM has started; before=%s, after=%s" % (availableUserBeforeAttach, availableUserAfterStop)
            )

        Util.printAction("Removing persistent disk...")
        pdisk.deleteVolume(diskUUID)

        try:
            if pdisk.volumeExists(diskUUID):
                self.fail("The persistent disk %s is still present" % diskUUID)
        except ClientException, ex:
            if not re.match("404", ex.status):
                self.fail("The persistent disk %s is still present" % diskUUID)
Ejemplo n.º 30
0
    def save_instance_as_new_image(self, vm_id):
        self._printContacting()
        self._checkInstanceExists(vm_id)

        printStep('Instructing cloud to save instance as new image on shutdown')