コード例 #1
0
 def run_background_job(cls,
                        runSmokeTestSetup=True,
                        config=None,
                        flagFile="/tmp/flagFile"):
     '''
     Uploads Files to HDFS before upgrade starts and runs long running sleep job in background
     :return:  number of application started
     '''
     # start long running application which performs I/O operations (BUG-23838)
     #from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode
     #UpgradePerNode.reportProgress("### Background application for HDFS started ####")
     #jobArgs = {"mapred.job.queue.name" : cls._queue}
     #HadoopJobHelper.runSleepJob(numOfMaps = 1, numOfReduce = 1, mapSleepTime = "10000000", reduceSleepTime = "100", extraJobArg = jobArgs, runInBackground = True, config = config, directoutput = False )
     #MAPRED.triggerSleepJob("1", "0", "100000", "1000000", 1, background = True)
     # load generator
     HADOOP_TEST_JAR = cls.get_hadoop_test_jar()
     TEST_USER = Config.get('hadoop', 'HADOOPQA_USER')
     HDFS.deleteDirectory(flagFile)
     slavelist = HDFS.getDatanodes()
     jobCmd = 'jar %s NNloadGenerator -Dmapred.job.queue.name=%s -mr 3 %s -root %s -numOfThreads 5 -maxDelayBetweenOps 1000 -elapsedTime 36000 -flagFile %s' % (
         HADOOP_TEST_JAR, cls._queue, cls._lgTestOutputDir,
         cls._lgTestDataDir, flagFile)
     Hadoop.runInBackground(jobCmd)
     time.sleep(15)
     return 1
コード例 #2
0
ファイル: hiveutils.py プロジェクト: thakkardharmik/beaver
def getLLAPDaemonPidsHosts():
    hosts = []
    llapdaemon_pids = []
    if not Machine.isHumboldt(): nodes = Hadoop.getAllNodes()
    else: nodes = HDFS.getDatanodes()
    for node in nodes:
        pids = Machine.getProcessListRemote(
            node, format="%U %p %P %a", filter="org.apache.hadoop.hive.llap.daemon.impl.LlapDaemon", logoutput=True
        )
        if pids:
            if Hadoop.isSecure():
                pid = Machine.getPidFromString(pids[0], Config.get('hive', 'HIVE_USER'))
            else:
                pid = Machine.getPidFromString(pids[0], Config.get('hadoop', 'YARN_USER'))
            llapdaemon_pids.append(pid)
            hosts.append(node)
    return llapdaemon_pids, hosts
コード例 #3
0
    def hdp_upgrade(cls,
                    components,
                    currVersion,
                    latestVersion,
                    doTeardown=True,
                    finalize=True):
        '''
        Upgrade HDP Stack With Per Node Method.

        Steps
        1) Prepare and save component states.
        2) Setup prerequisites for background jobs.
        3) Start long-running background jobs for all components.
        4) Upgrade core components from bottom to top.
          For each service, does:
          4a) Upgrade service.
          4b) Run smoke tests for all components.
          4c) Check number of all background jobs.
        5) After all components are upgraded, run another set of tests.
        6) Repeat same process for non-core components.
        7) Upgrade clients of components which were upgraded earlier.
        8) Upgrade client-only components.
        9) After all components are upgraded, run smoke tests.
        10) Stop long running jobs.
        11) Look for failed and kill jobs.
        12) Verify outputs of successful jobs.
        13) Finalize all states.

        :param components: list of Components to upgrade
        :param currVersion: Current Version
        :param latestVersion: Version to be upgraded to
        :param doTeardown: Only Cleanup when required
        '''
        cls.reportProgress(
            "###  Starting upgrade from %s to %s for components=%s ####" %
            (currVersion, latestVersion, components))
        DN = HDFS.getDatanodes()

        # Find core components (HDFS, YARN, HBase) if exist.
        core_components = cls.find_existing_core_components(components)

        #Prepare and save state before upgrade
        Rollingupgrade.ru_prepare_save_state_for_upgrade(components)

        # Run setup for background Jobs for all components
        Rollingupgrade.background_job_setup(components, config=None)

        # Starts Long running background Jobs for all components
        numBackgroundJobs = Rollingupgrade.run_longRunning_Application(
            components, config=None)
        logger.info(
            "Total number of long running background jobs before starting upgrade is %s"
            % numBackgroundJobs)
        cls.reportProgress("###  Just started %s background jobs  ###" %
                           numBackgroundJobs)

        #upgrade the components in Hierchacy
        cls.reportProgress("###  Starting upgrade of core %s masters  ###" %
                           core_components)
        #### IF XA is enabled, upgrade XA services ####
        from beaver.component.xa import Xa
        cls.reportProgress(
            "******************************* checking for argus to be installed *******************************"
        )
        if "argus" in components and Xa.isArgusInstalled():
            logger.info(
                '**************************************************** XA is Enabled in the cluster, setting up and upgrading the same ****************************************************'
            )
            Rollingupgrade.upgrade_master_and_smoketest(
                ['argus'], latestVersion, config=None, currVersion=currVersion)

        ##### TODO - upgrade ZOOKEEPER ########
        if "zookeeper" in components:
            Rollingupgrade.upgrade_master_and_smoketest(["zookeeper"],
                                                        latestVersion,
                                                        config=None)
        # Upgrade Master services - Namenode, Secondarynamenode, Resourcemanager, Application Timelineserver, JobHistoryserver and HbaseMaster with new version
        #### TODO - Application Timelineserver HbaseMaster ####
        AfterUpgradeBackGroundJobs = Rollingupgrade.upgrade_master_and_smoketest(
            core_components, latestVersion, config=None)
        cls.reportProgress("###  Finished upgrade of core %s masters  ###" %
                           core_components)
        numBackgroundJobs = numBackgroundJobs + AfterUpgradeBackGroundJobs
        logger.info(
            "Total number of long running background jobs after upgrading master services is %s"
            % numBackgroundJobs)

        # upgrade slave service - Datanodes, Nodemanagers and Regionservers with new version
        cls.reportProgress("###  Starting upgrade of core %s slaves  ###" %
                           core_components)
        i = 0
        #### TODO - upgrade Regionserver  ####
        for node in DN:
            i += 1
            logger.info("**** Upgrading slave number " + str(i) + ": " + node +
                        " ****")
            if i % 4 == 0:
                runSmoke = True
            else:
                runSmoke = False
            Rollingupgrade.upgrade_slave_and_smoketest(core_components,
                                                       latestVersion, node,
                                                       None, runSmoke)
            #check if background function running
            runningJobs = YARN.getNumOfRunningJobs()
            logger.info("Long-running job ended too early; running jobs =" +
                        str(runningJobs))
            #assert runningJobs == numBackgroundJobs, 'Long-running job ended too early; running jobs = ' + str(runningJobs)

        cls.reportProgress("###  Finished upgrade of %d core %s slaves  ###" %
                           (i, core_components))
        #### Run all component Smoke tests ####
        Rollingupgrade.run_smokeTests(components, config=None)

        #  Run Tests to verify components accessibility
        Rollingupgrade.testAfterAllMasterSlavesUpgraded(components)

        #### Starting upgrade non core components ####
        cls.reportProgress(
            "###  Starting upgrade of non-core cluster components  ###")
        if "hive" in components:
            Rollingupgrade.upgrade_master_and_smoketest(["hive"],
                                                        latestVersion,
                                                        config=None)

        #### TODO- upgrade pig to N+1 version ####

        #### TODO - Run pig smoke test ####
        #     ## Example : ##
        if "pig" in components:
            Rollingupgrade.upgrade_master_and_smoketest(["pig"],
                                                        latestVersion,
                                                        config=None)
        # ##    Rollingupgrade.upgrade_slave_and_smoketest(["pig"], latestVersion, node)

        # #### TODO - upgrade oozie server to N+1 version ####

        # #### - Run oozie smoke test ####
        if "oozie" in components:
            Rollingupgrade.upgrade_master_and_smoketest(["oozie"],
                                                        latestVersion,
                                                        config=None)

        #### upgrade falcon to N+1 version and run its smoke tests ####

        if "falcon" in components:
            Rollingupgrade.upgrade_master_and_smoketest(["falcon"],
                                                        latestVersion,
                                                        config=None)

        #### TODO - upgrade phoenix to N+1 version ####

        #### TODO - Run phoenix smoke test ####
        if "phoenix" in components:
            ruPhoenix.run_smoke_test(ruPhoenix._smokeTestNum)

        #### TODO - upgrade sqoop to N+1 version ####
        #### TODO - Run sqoop smoke test ####

        cls.reportProgress(
            "###  Finished upgrade of non-core cluster components  ###")

        ##For storm-slider we want toverify the topologies and kill the storm-slider app.
        if "storm-slider" in components:
            from beaver.component.rollingupgrade.ruStorm import ruStorm
            ruStorm.verify_and_stop_slider_app()

        #### TODO- upgrade clients for Argus, Zk, Hdfs, Yarn, MR, Tez, Hive, Pig, Hbase, Falcon, oozie, sqoop , phoenix, mahout ####
        cls.reportProgress(
            "###  Starting upgrade of clients %s inside the cluster ###" %
            components)
        Rollingupgrade.upgrade_client_insideCluster_and_smoketest(
            components, latestVersion, config=None)

        if "storm-slider" in components:
            from beaver.component.rollingupgrade.ruStorm import ruStorm
            ruStorm.start_slider_app_resubmit_topologies()
            time.sleep(120)  # Allow time for storm-slider topologies to run.

        cls.reportProgress("###  Starting upgrade of slider apps ###")
        ### TODO- upgrade slider client and non rolling upgrade of slider-apps ####
        ### TODO- Stop storm-slider app, hbase-slider app, accumulo-slider app
        ### TODO- Upgrade storm-slider client
        ### TODO- resubmit storm-slider app, hbase-slider app, accumulo-slider app
        cls.reportProgress("###  Finished upgrade of slider apps ###")

        #### Knox upgrade
        if "knox" in components:
            Rollingupgrade.upgrade_master_and_smoketest(["knox"],
                                                        latestVersion,
                                                        config=None)

        #### upgrade Flume to N+1 version ####
        if "flume" in components:
            Rollingupgrade.upgrade_master_and_smoketest(["flume"],
                                                        latestVersion,
                                                        config=None)

        #### TODO - upgrade Kafka to N+1 version ####

        #### TODO - Run Kafka smoke test ####

        ## Example : ##
        ## if "kafka" in components:
        ##    Rollingupgrade.upgrade_master_and_smoketest(["kafka"], latestVersion, config=None)
        ##    Rollingupgrade.upgrade_slave_and_smoketest(["kafka"], latestVersion, node)

        #### TODO - upgrade Storm to N+1 version ####

        #### TODO - Run storm smoke test ####

        ## Example : ##
        ## if "storm" in components:
        ##    Rollingupgrade.upgrade_master_and_smoketest(["storm"], latestVersion, config=None)
        ##    Rollingupgrade.upgrade_slave_and_smoketest(["storm"], latestVersion, node)

        #### TODO - upgrade Hue to N+1 version ####

        #### TODO - Run Hue smoke test ####

        ## Example : ##
        ## if "hue" in components:
        ##    Rollingupgrade.upgrade_master_and_smoketest(["hue"], latestVersion, config=None)
        ##    Rollingupgrade.upgrade_slave_and_smoketest(["hue"], latestVersion, node)
        cls.reportProgress(
            "###  Finished upgrade of non-core components outside the cluster  ###"
        )

        #### TODO - Run all component Smoke tests ####
        Rollingupgrade.run_smokeTests(components, config=None)

        ### Need to stop HDFS Falcon,Yarn long runningJobs ####
        # create flagFile to kill HDFS background job
        TEST_USER = Config.get('hadoop', 'HADOOPQA_USER')
        createCmd = "dfs -touchz " + cls._HDFS_FLAG_FILE
        exit_code, output = HDFS.runas(TEST_USER, createCmd)

        if "falcon" in components:
            from beaver.component.rollingupgrade.ruFalcon import ruFalcon
            ruFalcon.stopFalconLongRunningJob()
        if "yarn" in components:
            ruYARN.stopYarnLongRunningJob()
        if "slider" in components:
            ruSlider.stopSliderLongRunningJob()
        if "storm-slider" in components:
            from beaver.component.rollingupgrade.ruStorm import ruStorm
            ruStorm.teardown_storm_slider_app()

        ## TODO - wait for long running jobs to finish
        isZero = YARN.waitForZeroRunningApps()
        if isZero:
            cls.reportProgress("#### None apps are running. ####")
        else:
            cls.reportProgress(
                "#### Check Failed. some apps are running. ####")
        #assert isZero, "all long running jobs are not finished"

        ### List down Failed/Killed applications ####
        Failed_Killed_apps = YARN.getFailedKilledAppList()
        cls.reportProgress(
            "### Listing Killed/Failed applications while performing upgrade ####"
        )
        for app in Failed_Killed_apps:
            queue = YARN.getQueueForApp(app)
            logger.info(" %s running on %s queue Failed/Killed." %
                        (app, queue))
            cls.reportProgress(
                "#### %s running on %s queue Failed/Killed. ####" %
                (app, queue))

        ## TODO - Validate long running jobs
        Rollingupgrade.verifyLongRunningJob(components)

        ## KILL APPLICATIONS ####
        YARN.killAllApplications(useYarnUser=True)

        ## TODO - call Finalize
        if finalize:
            Rollingupgrade.ru_finalize_state(components)

        ## TODO - call Teardown for long running jobs
        if doTeardown:
            Rollingupgrade.background_job_teardown(components, None)
        cls.reportProgress(
            "###  Completed upgrade from %s to %s for components=%s ####" %
            (currVersion, latestVersion, components))
コード例 #4
0
    def hdp_downgrade(cls,
                      components,
                      currVersion,
                      latestVersion,
                      doTeardown=True):
        '''
        Downgrade HDP Stack With Per Node Method
        :param components: Components to be downgraded
        :param currVersion: Current version (Version V1)
        :param latestVersion: Version to be downgraded to (Version V0)
        '''
        UpgradePerNode.reportProgress(
            "###  Starting downgrade from %s to %s for components=%s ####" %
            (currVersion, latestVersion, components))
        DN = HDFS.getDatanodes()
        core_components = UpgradePerNode.find_existing_core_components(
            components)

        # Run setup for background Jobs for all components
        Rollingupgrade.background_job_setup(components, config=None)

        # Starts Long running background Jobs for all components
        numBackgroundJobs = Rollingupgrade.run_longRunning_Application(
            components, config=None)
        logger.info(
            "Total number of long running background jobs before starting upgrade is %s"
            % numBackgroundJobs)
        UpgradePerNode.reportProgress(
            "###  Just started %s background jobs  ###" % numBackgroundJobs)

        #### TODO - downgrade Hue and run Hue smoke test ####
        UpgradePerNode.reportProgress(
            "###  Starting downgrade of non-core components outside the cluster  ###"
        )
        ## Example : ##
        ## if "hue" in components:
        ##    Rollingupgrade.downgrade_master_and_smoketest(["hue"], latestVersion, config=None)
        ##    Rollingupgrade.downgrade_slave_and_smoketest(["hue"], latestVersion, node)

        #### TODO - downgrade storm and run smoke test ####

        ## Example : ##
        ## if "storm" in components:
        ##    Rollingupgrade.downgrade_master_and_smoketest(["storm"], latestVersion, config=None)
        ##    Rollingupgrade.downgrade_slave_and_smoketest(["storm"], latestVersion, node)

        #### TODO - downgrade Kafka and run smoke test ####

        ## Example : ##
        ## if "kafka" in components:
        ##    Rollingupgrade.downgrade_master_and_smoketest(["kafka"], latestVersion, config=None)
        ##    Rollingupgrade.downgrade_slave_and_smoketest(["kafka"], latestVersion, node)

        #### downgrade Flume ####
        if "flume" in components:
            Rollingupgrade.downgrade_master_and_smoketest(["flume"],
                                                          latestVersion,
                                                          config=None)

        #### downgrade Knox and run smoke test ####
        if "knox" in components:
            Rollingupgrade.downgrade_master_and_smoketest(["knox"],
                                                          latestVersion,
                                                          config=None)
        UpgradePerNode.reportProgress(
            "###  Finished downgrade of non-core components outside the cluster  ###"
        )

        UpgradePerNode.reportProgress(
            "###  Starting downgrade of slider apps ###")
        ### TODO- downgrade slider client and non rolling upgrade of slider-apps ####
        ### TODO- Stop storm-slider app, hbase-slider app, accumulo-slider app
        ### TODO- downgrade storm-slider client
        ### TODO- resubmit storm-slider app, hbase-slider app, accumulo-slider app
        UpgradePerNode.reportProgress(
            "###  Finished downgrade of slider apps ###")

        # Downgrade Non core components
        UpgradePerNode.reportProgress(
            "###  Starting downgrade clients %s inside the cluster ###" %
            components)
        ### TODO - Downgrade CLIENTS ####
        Rollingupgrade.downgrade_client_insideCluster_and_smoketest(
            components, latestVersion, config=None)
        UpgradePerNode.reportProgress(
            "###  Finished downgrade of clients %s inside the cluster ###" %
            components)

        #### TODO - Downgrade phoenix and Run phoenix smoke test ####
        UpgradePerNode.reportProgress(
            "###  started downgrade of non-core cluster components  ###")

        ## Example : ##
        ## if "phoenix" in components:
        ##    Rollingupgrade.downgrade_master_and_smoketest(["phoenix"], latestVersion, config=None)
        ##    Rollingupgrade.downgrade_slave_and_smoketest(["phoenix"], latestVersion, node)

        #### downgrade falcon and run smoke test ####

        if "falcon" in components:
            Rollingupgrade.downgrade_master_and_smoketest(["falcon"],
                                                          latestVersion,
                                                          config=None)

        # #### - downgrade oozie and run smoke test ####
        if "oozie" in components:
            Rollingupgrade.downgrade_master_and_smoketest(["oozie"],
                                                          latestVersion,
                                                          config=None)

        #### Downgrade Pig and run pig smoke test ####
        if "pig" in components:
            Rollingupgrade.downgrade_master_and_smoketest(["pig"],
                                                          latestVersion,
                                                          config=None)

        if "hive" in components:
            Rollingupgrade.downgrade_master_and_smoketest(["hive"],
                                                          latestVersion,
                                                          config=None)
        UpgradePerNode.reportProgress(
            "###  Finished downgrade of non-core cluster components  ###")

        # Downgrade Slave services of core-components (Hdfs, Yarn, hbase)
        UpgradePerNode.reportProgress(
            "###  Starting downgrade of core %s slaves  ###" % core_components)
        i = 0
        #### TODO - Downgrade Datanode, Nodemanager, Regionserver  ####
        for node in DN:
            i += 1
            logger.info("**** Downgrading slave number " + str(i) + ": " +
                        node + " ****")
            Rollingupgrade.downgrade_slave_and_smoketest(
                core_components, latestVersion, node, None)
            #check if background function running
            runningJobs = YARN.getNumOfRunningJobs()
            logger.info("Long-running job ended too early; running jobs =" +
                        str(runningJobs))
            #assert runningJobs == numBackgroundJobs, 'Long-running job ended too early; running jobs = ' + str(runningJobs)
        UpgradePerNode.reportProgress(
            "###  Finished downgrade of %d core %s slaves  ###" %
            (i, core_components))

        # run smoke tests after downgrading
        Rollingupgrade.run_smokeTests(components, config=None)

        #### TODO - Downgrade Namenode, Resourcemanager, Hbase master ####
        UpgradePerNode.reportProgress(
            "###  Starting downgrade of core %s masters  ###" %
            core_components)
        Rollingupgrade.downgrade_master_and_smoketest(core_components,
                                                      latestVersion,
                                                      config=None)

        #### TODO - Run Validation after All Master and slave services are down ####
        Rollingupgrade.testAfterAllMasterSlavesUpgraded(components)

        ### TODO - Downgrade Zookeeper ####
        #Rollingupgrade.downgrade_master_and_smoketest(["zookeeeper"], latestVersion, config=None)
        UpgradePerNode.reportProgress(
            "###  Finished downgrade of core %s masters  ###" %
            core_components)

        #### IF XA is enabled, downgrade XA services ####
        from beaver.component.xa import Xa
        if "argus" in components and Xa.isArgusInstalled():
            logger.info(
                'XA is Enabled in the cluster, setting up and downgrading the same'
            )
            Rollingupgrade.downgrade_master_and_smoketest(['argus'],
                                                          latestVersion,
                                                          config=None,
                                                          currVersion=None)

#### TODO - Run all component Smoke tests ####
        Rollingupgrade.run_smokeTests(components, config=None)

        #TODO - this is common code with upgrade - move it to a function.   - but the slider part is differnt in downgrade; shouldn't be ---
        ### Need to stop HDFS Falcon,Yarn long runningJobs ####
        # create flagFile to kill HDFS background job
        ### Need to stop HDFS Falcon,Yarn long runningJobs ####
        TEST_USER = Config.get('hadoop', 'HADOOPQA_USER')
        createCmd = "dfs -touchz " + UpgradePerNode._HDFS_FLAG_FILE
        exit_code, output = HDFS.runas(TEST_USER, createCmd)

        ruYARN.stopYarnLongRunningJob()
        if "falcon" in components:
            from beaver.component.rollingupgrade.ruFalcon import ruFalcon
            ruFalcon.stopFalconLongRunningJob()
        if "storm-slider" in components:
            from beaver.component.rollingupgrade.ruStorm import ruStorm
            ruStorm.teardown_storm_slider_app()

        ## TODO - wait for long running jobs to finish
        isZero = YARN.waitForZeroRunningApps()
        ## Temporarily uncommenting to tune test
        #assert isZero, "all long running jobs are not finished"

        ## TODO - Validate long running jobs
        Rollingupgrade.verifyLongRunningJob(components)

        ## TODO - call Teardown for long running jobs
        Rollingupgrade.background_job_teardown(components, None)

        ## Finalize State
        Rollingupgrade.ru_finalize_state(components)
        UpgradePerNode.reportProgress(
            "###  Completed downgrade from %s to %s for components=%s ####" %
            (currVersion, latestVersion, components))
コード例 #5
0
import logging
import re

from beaver.machine import Machine
from beaver.component.hadoop import HDFS

logger = logging.getLogger(__name__)

nn = HDFS.getNamenode()
logger.info("NN = %s", nn)

#list of DNs
dns = HDFS.getDatanodes()
logger.info("DNs = %s", dns)

nodes = [nn]
for dn in dns:
    if dn not in nodes:
        nodes.append(dn)

paths = ["/grid/0", "/grid/1", "/grid/2", "/grid/3", "/grid/4", "/grid/5", ""]
logger.info("nodes=%s", nodes)
logger.info("paths=%s", paths)
suspiciousNodes = []
for node in nodes:
    logger.info("node=%s", node)
    for path in paths:
        fname = path + "/tmp/touchtest"
        #logger.info("testing %s at %s" % (fname, node))
        (exit_code, stdout) = Machine.touchRemote(node, fname)
        if re.search("Read-only file system", stdout) != None:
コード例 #6
0
ファイル: ruRollback.py プロジェクト: thakkardharmik/beaver
    def hdp_upgrade_rollback(cls,
                             components,
                             currVersion,
                             latestVersion,
                             doTeardown=True,
                             finalize=True):
        '''
        Test rollback after a partial upgrade. Note today only HDFS/HBase need state rollback.
        But we do partial upgrage of core without background tests and then rollback

        Steps
        0) Create some state (e.g. file) that we will delete after upgrade
        1) Prepare and save component states.
        4) Partially Upgrade core components
          For each service, does:
          4a) Upgrade Masters, Upgrade 1 slave
          4b) Run smoke tests for all components.
        5) Delete state created in step 0
        6) Create new state
        7) rollback state
        7b) Run smoke tests
        7c) Validate that state create in 0 still exists but state create in step 6 does not

        :param components: list of Components to upgrade (Can only be HDFS, HBASE)
        :param currVersion: Current Version
        :param latestVersion: Version to be upgraded to
        :param doTeardown: Only Cleanup when required
        '''
        cls.reportProgress(
            "###  Starting upgrade from %s to %s for components=%s ####" %
            (currVersion, latestVersion, components))
        DN = HDFS.getDatanodes()

        # Find core components (HDFS, YARN, HBase) if exist.
        core_components = cls.find_existing_core_components(components)

        #TODO if there are any non-core components then print an error since they are not going to be upgraded.

        #TODO create some state (e.g. files) whose existence will be checked after the rollback
        if "hdfs" in components:
            ruHDFS.createState4Rollback1()
        #if "hdfs" in components:
        #    hbase.createState4Rollback1()
        if "yarn" in components:
            logger.info("Rollback doesn't make sense for YRAN")

        #Prepare and save state before upgrade
        Rollingupgrade.ru_prepare_save_state_for_upgrade(components)

        #upgrade the components in Hierchacy
        cls.reportProgress("###  Starting upgrade of core %s masters  ###" %
                           core_components)
        #### IF XA is enabled, upgrade XA services ####
        from beaver.component.xa import Xa
        cls.reportProgress(
            "******************************* checking for argus to be installed *******************************"
        )
        if "argus" in components and Xa.isArgusInstalled():
            logger.info(
                '**************************************************** XA is Enabled in the cluster, setting up and upgrading the same ****************************************************'
            )
            Rollingupgrade.upgrade_master_and_smoketest(
                ['argus'], latestVersion, config=None, currVersion=currVersion)

        if "zookeeper" in components:
            Rollingupgrade.upgrade_master_and_smoketest(["zookeeper"],
                                                        latestVersion,
                                                        config=None)

        # Upgrade Master services -
        # Namenode, Secondarynamenode, Resourcemanager, Application Timelineserver,
        # JobHistoryserver and HbaseMaster with new version

        AfterUpgradeBackGroundJobs = Rollingupgrade.upgrade_master_and_smoketest(
            core_components, latestVersion, config=None)
        cls.reportProgress("###  Finished upgrade of core %s masters  ###" %
                           core_components)

        # upgrade 1 slave service - Datanodes, Nodemanagers and Regionservers with new version
        cls.reportProgress("###  Starting upgrade of core %s slaves  ###" %
                           core_components)

        logger.info("**** Upgrading first  slave:" + DN[0] + " ****")
        Rollingupgrade.upgrade_slave_and_smoketest(core_components,
                                                   latestVersion, DN[0], None,
                                                   False)

        cls.reportProgress("###  Finished upgrade of 1 core %s slave  ###" %
                           (core_components))
        #### Run all component Smoke tests ####
        Rollingupgrade.run_smokeTests(components, config=None)

        #  Run Tests to verify components accessibility
        Rollingupgrade.testAfterAllMasterSlavesUpgraded(components)

        #### Run all component Smoke tests ####
        Rollingupgrade.run_smokeTests(components, config=None)

        # TODO - delete some state that was created befoire the prepare-save state
        # TODO - create some new state
        if "hdfs" in components:
            ruHDFS.createState4Rollback2()
        if "hbase" in components:
            from beaver.component.rollingupgrade.ruHbase import ruHbase
            ruHbase.createState4Rollback2()
        if "yarn" in components:
            logger.info("Rollback doesn't make sense for YRAN")

        #################### Now do the rollback ########################
        cls.reportProgress(
            "###  Starting  rollback from %s to %s for components=%s ####" %
            (latestVersion, currVersion, components))

        logger.info("**** Downgrading slave number 0 : " + DN[0] + " ****")
        Rollingupgrade.downgrade_slave_and_smoketest(core_components,
                                                     currVersion, DN[0], None)

        #### Downgrade Namenode, Resourcemanager, Hbase master ####
        cls.reportProgress("###  Starting downgrade of core %s masters  ###" %
                           core_components)
        Rollingupgrade.downgrade_master_and_smoketest(core_components,
                                                      currVersion,
                                                      config=None)

        ## rollback state TODO the rollback function does not exist yet.
        #Rollingupgrade.ru_rollback_state(components)
        if "hdfs" in components:
            ruHDFS.ru_rollback_state()
        #if "hbase" in components:
        #    hbase.ru_rollback_state()
        if "yarn" in components:
            logger.info("Rollback doesn't make sense for YRAN")

        # TODO now check that the deleted state exists and the newly create state does not
        if "hdfs" in components:
            ruHDFS.checkState4Rollback()
        if "hbase" in components:
            from beaver.component.rollingupgrade.ruHbase import ruHbase
            ruHbase.checkState4Rollback()
        if "yarn" in components:
            logger.info("Rollback doesn't make sense for YRAN")

        cls.reportProgress(
            "###  Completed rollback from %s to %s for components=%s ####" %
            (currVersion, latestVersion, components))
コード例 #7
0
    def prepare_and_start_long_running_jobs(self):
        ############################Prepare and start long running jobs
        self.find_components_to_test()

        import beaver.component.rollingupgrade.ruSetup as ruSetup

        ruSetup.COMPONENTS_TO_FLIP = self.COMPONENTS_TO_TEST
        ruSetup.COMPONENTS_AFFECTED = self.COMPONENTS_TO_TEST
        ruSetup.COMPONENTS_TO_TEST = self.COMPONENTS_TO_TEST
        ruSetup.COMPONENTS_TO_IMPORT = self.COMPONENTS_TO_TEST

        from beaver.component.hadoop import HDFS
        if "slider" in self.COMPONENTS_TO_TEST:
            from beaver.component.rollingupgrade.ruSlider import ruSlider
        from beaver.component.rollingupgrade.ruCommon import Rollingupgrade
        from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode

        HDFS.createUserDirWithGroup('/user/' +
                                    Config.get('hadoop', 'HADOOPQA_USER'),
                                    'hdfs',
                                    Config.get('hadoop', 'HADOOPQA_USER'),
                                    'hdfs',
                                    diffGroup=True)
        logger.info(Config.get('hadoop', 'HADOOPQA_USER'))
        logger.info(
            "1==========================================================================="
        )

        self.setup_ru_cluster()
        logger.info(
            "2==========================================================================="
        )
        logger.info(
            "3==========================================================================="
        )
        DN = HDFS.getDatanodes()

        # Find core components (HDFS, YARN, HBase) if exist.
        #core_components = UpgradePerNode.find_existing_core_components(COMPONENTS_TO_TEST)
        logger.info(
            "4==========================================================================="
        )
        #Prepare and save state before upgrade
        Rollingupgrade.ru_prepare_save_state_for_upgrade(
            self.COMPONENTS_TO_TEST)
        logger.info(
            "5==========================================================================="
        )
        # Run setup for background Jobs for all components
        Rollingupgrade.background_job_setup(self.COMPONENTS_TO_TEST,
                                            config=None)

        logger.info("Running smoke tests before upgrade")
        Rollingupgrade.run_smokeTests(self.COMPONENTS_TO_TEST)

        logger.info(
            "6==========================================================================="
        )
        # Starts Long running background Jobs for all components
        numBackgroundJobs = Rollingupgrade.run_longRunning_Application(
            self.COMPONENTS_TO_TEST, config=None)
        logger.info(
            "7==========================================================================="
        )
        logger.info(
            "Total number of long running background jobs before starting upgrade is %s"
            % numBackgroundJobs)
        UpgradePerNode.reportProgress(
            "###  Just started %s background jobs  ###" % numBackgroundJobs)
        UpgradeLogger.reportProgress(
            "###  Just started %s background jobs  ###" % numBackgroundJobs,
            True)
コード例 #8
0
def formatNN_SetupHDFS(duReservedValue, mod_conf_path):
    """
    Format NN. Setup HDFS dir for MR jobs.

    Note that this permission is too wide for default HDP use.
    """
    datanodes = HDFS.getDatanodes()
    logger.info("datanodes = %s" % datanodes)
    HDFS.stopDatanodes()
    HDFS.stopNamenode()
    HDFS.formatNN(force=True, logoutput=True)

    for dn in datanodes:
        Machine.rm(user=Machine.getAdminUser(),
                   host=dn,
                   filepath="%s/current" %
                   HDFS.getConfigValue("dfs.datanode.data.dir"),
                   isdir=True)

    balancerModifyConfig(duReservedValue)
    HDFS.startNamenode(mod_conf_path)
    HDFS.startDatanodes(mod_conf_path)
    sleepTime = 45
    logger.info("sleep for %s sec" % sleepTime)
    time.sleep(sleepTime)

    version = Hadoop.getShortVersion()
    paths = [
        "/hdp", "/hdp/apps",
        "/hdp/apps/%s" % version,
        "/hdp/apps/%s/mapreduce" % version
    ]
    for path in paths:
        HDFS.mkdir(path=path, user=HDFS_USER)
    HDFS.chmod(runasUser=HDFS_USER,
               perm="777",
               directory="/hdp",
               recursive=True)
    HDFS.copyFromLocal(
        localpath="/usr/hdp/current/hadoop-client/mapreduce.tar.gz",
        hdfspath="/hdp/apps/%s/mapreduce/" % version)
    sleepTime = 45
    logger.info("sleep for %s sec for MR tarball replication" % sleepTime)
    time.sleep(sleepTime)
    paths = [
        "/app-logs", "/app-logs/hrt_qa", "/app-logs/hrt_qa/logs", "/mr-history"
    ]
    for path in paths:
        HDFS.mkdir(path=path, user=HDFS_USER)
    HDFS.chmod(runasUser=HDFS_USER,
               perm="777",
               directory="/app-logs",
               recursive=True)
    HDFS.chmod(runasUser=HDFS_USER,
               perm="777",
               directory="/mr-history",
               recursive=True)
    HDFS.mkdir(path="/user", user=HDFS_USER)
    HDFS.mkdir(path="/user/hrt_qa", user=HDFS_USER)
    HDFS.chown(runasUser=HDFS_USER,
               new_owner="hrt_qa:hrt_qa",
               directory="/user/hrt_qa",
               recursive=False)
    HDFS.chmod(runasUser="******",
               perm="770",
               directory="/user/hrt_qa",
               recursive=True)