Ejemplo n.º 1
0
def startLLAPWithChaosMonkey(interval='300'):
    hive_changes = {'tez-site.xml': {'tez.am.task.max.failed.attempts': '0'}}
    Hive.modifyConfig(hive_changes, services=['hiveserver2'])

    AMBARI_AGENT_TMP_DIR = '/var/lib/ambari-agent/tmp'
    ARTIFACTS_DIR = Config.getEnv('ARTIFACTS_DIR')
    LLAP_START_USER = Config.get('hive', 'HIVE_USER')
    dirs = [
        name for name in os.listdir(AMBARI_AGENT_TMP_DIR) if os.path.isdir(os.path.join(AMBARI_AGENT_TMP_DIR, name))
    ]

    llap_dirs = []
    for dir in dirs:
        if dir.startswith('llap-slider'): llap_dirs.append(dir)

    if len(llap_dirs) < 1:
        logger.info("Could not find llap dir under %s" % AMBARI_AGENT_TMP_DIR)
        Hive.startService(services=['hiveserver2'])
    else:
        llap_dir = llap_dirs[-1]

        resourceConfig = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'resources.json')
        tmpResourceConfig = os.path.join(ARTIFACTS_DIR, 'resources.json')
        propertyMap = [(["components", "LLAP"], {"yarn.container.failure.threshold": "1000"})]
        util.writePropertiesToConfigJSONFileMulti(resourceConfig, tmpResourceConfig, propertyMap)
        Machine.copy(tmpResourceConfig, resourceConfig, user=Machine.getAdminUser(), passwd=Machine.getAdminPasswd())

        appConfig = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'appConfig.json')
        tmpAppConfig = os.path.join(ARTIFACTS_DIR, 'appConfig.json')
        propertyMap = [
            (
                ["global"], {
                    "internal.chaos.monkey.probability.containerfailure": "10000",
                    "internal.chaos.monkey.interval.seconds": interval,
                    "internal.chaos.monkey.enabled": "True"
                }
            )
        ]
        util.writePropertiesToConfigJSONFileMulti(appConfig, tmpAppConfig, propertyMap)
        Machine.copy(tmpAppConfig, appConfig, user=Machine.getAdminUser(), passwd=Machine.getAdminPasswd())

        llapShellScript = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'run.sh')
        exit_code, stdout = Machine.runas(LLAP_START_USER, llapShellScript)
        if exit_code != 0: logger.info("LLAP Shell Script failed to run successfully with %d" % exit_code)

        for i in range(10):
            time.sleep(30)
            logger.info("@%d: Check if LLAP cluster is successfully deployed" % i)
            exit_code, stdout = Machine.runas(LLAP_START_USER, 'slider status llap0')
            if exit_code == 0:
                break
            elif i == 9:
                logger.info("LLAP cluster failed to deploy")
Ejemplo n.º 2
0
    def background_job_teardown(cls):
        '''
        Cleanup for long running Hive jobs
        '''
        from beaver.component.hive import Hive

        logger.info(
            "Make sure to switch the HiveServer2 to use the default port")
        adminUser = Machine.getAdminUser()
        hiveHost = Hive.getHiveHost()
        for port in cls._hs2_live_ports:
            pid = Machine.getPIDByPort(port, host=hiveHost, user=adminUser)
            if pid:
                Machine.killProcessRemote(pid, host=hiveHost, user=adminUser)
                time.sleep(2)
        if len(cls._hs2_live_ports) > 0:
            Hive.startService(services=["hiveserver2"])
Ejemplo n.º 3
0
    def switch_master_version(cls, action, version, config=None):
        '''
        Switches Hive master services' version
        :param action: Whether to "upgrade" or "downgrade"
        :param version: Version to be switched to
        :param config: Configuration location
        '''
        from beaver.component.rollingupgrade.ruCommon import hdpSelect
        from beaver.component.hive import Hive

        currentHiveVersion = Hive.getVersion()

        if action == 'upgrade':
            # Backup the database used by the Hive Metastore
            logger.info(
                "Performing backup of the Hive Metastore DB before starting the upgrade"
            )
            Hive.backupMetastoreDB(cls._metastore_backup_file)

        node = Hive.getHiveHost()

        # Stop the old Hive Metastore
        logger.info("Stopping the Hive Metastore")
        Hive.stopService(services=["metastore"])

        # Upgrade Hive Metastore servers to new version
        hdpSelect.changeVersion("hive-metastore", version, node)

        if action == 'upgrade':
            logger.info("Upgrading the Hive metastore schema")
            Hive.upgradeSchema()

        # Restart Hive Metastore servers one at a time
        logger.info("Restarting the Hive Metastore")
        Hive.startService(services=["metastore"])

        # Start new Hive Server 2 instance
        confHS2Port = Hive.getHiveserver2ThriftPort()
        hs2port = util.getNextAvailablePort(node, confHS2Port)

        hdpSelect.changeVersion("hive-server2", version, node)

        Hive.modifyConfig(config,
                          services=['hiveserver2'],
                          restartService=False)
        logger.info(
            "Starting a new HiveServer2 at port '%d' for assisting rolling-upgrade"
            % hs2port)
        if hs2port != confHS2Port:
            changes = {'hive-site.xml': {'hive.server2.thrift.port': hs2port}}
            Hive.modifyConfig(changes,
                              services=["hiveserver2"],
                              restartService=False)
        Hive.startService(services=["hiveserver2"])
        cls._hs2_live_ports = [Hive.getHiveserver2ThriftPort(), hs2port]

        # Deregister the old Hive Server 2 instances
        logger.info("Deregistering the HiveServer2 on version '%s'" %
                    currentHiveVersion)
        Hive.deregisterHiveServer2(version=currentHiveVersion)

        from beaver.component.hcatalog import Hcatalog

        # Stop the old WebHCat server
        logger.info("Stopping the WebHCat server")
        node = Config.get('templeton',
                          'TEMPLETON_HOST',
                          default=Machine.getfqdn())
        webhcatPort = Config.get('templeton',
                                 'TEMPLETON_PORT',
                                 default="50111")
        # Stop the old WebHCat server
        logger.info("Stop the WebHCat server")
        Hcatalog.stop(node)

        # Upgrade WebHCat to the new version
        hdpSelect.changeVersion("hive-webhcat", version, node)

        # Start the WebHCat server
        logger.info("Restarting the WebHCat server")
        newConfDir = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                  'localWebhcatConf')
        if os.path.exists(newConfDir):
            Hcatalog.start(node, hcat_confdir=newConfDir)
        else:
            Hcatalog.start(node)