Ejemplo n.º 1
0
def setupHS2ConcurrTestData(stdauth=True):
    # hive.support.concurrency is not in the whitelist, as this is a server setting and not something that user should/can set in a session.
    # In a case of Ranger and SQL std authorization, set hive.support.concurrency to true and restart HS2
    changes = {
        'hive-site.xml': {
            'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager',
            'hive.support.concurrency': 'true',
            'hive.compactor.initiator.on': 'true',
            'hive.compactor.worker.threads': '3',
            'hive.compactor.check.interval': '10',
            'hive.timedout.txn.reaper.interval': '20s'
        },
        'hiveserver2-site.xml': {
            'hive.compactor.initiator.on': 'false',
            'hive.exec.dynamic.partition.mode': 'nonstrict'
        }
    }
    if not Hive.isHive2():
        changes['hiveserver2-site.xml']['hive.enforce.bucketing'] = 'true'
    else:
        changes['hiveserver2-site.xml']['hive.server2.enable.doAs'] = 'false'
        changes['hiveserver2-site.xml']['hive.txn.manager'] = 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager'
        changes['hiveserver2-site.xml']['hive.support.concurrency'] = 'true'
    Hive.modifyConfig(changes)
    time.sleep(60)
    data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data")
    data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz")
    if not os.path.isfile(data_tgz):
        assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz)
    Machine.tarExtractAll(data_tgz, data_dir)
    # load data into HDFS
    hdfs_user = Config.get("hadoop", 'HDFS_USER')
    test_user = Config.get("hadoop", 'HADOOPQA_USER')
    HDFS.createDirectory("/tmp/hs2data", user=test_user, perm='777', force=True)
    HDFS.createDirectory("/tmp/hs2data/student", user=test_user, perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student")
    HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter")
    HDFS.createDirectory("/tmp/hs2data/customer_address", perm='777', force=True)
    HDFS.copyFromLocal(os.path.join(data_dir, 'customer_address10k'), "/tmp/hs2data/customer_address")
    query = """drop table if exists student;
create external table student (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student';
drop table if exists voter;
create external table voter (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter';
drop table if exists customer_address;
create external table customer_address (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2), ca_location_type string) row format delimited fields terminated by '|' stored as textfile location '/tmp/hs2data/customer_address';
drop table if exists customer_address_partitioned;
create table customer_address_partitioned (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) partitioned by (ca_location_type string) clustered by (ca_state) into 50 buckets stored as orc tblproperties('transactional'='true');
insert into table customer_address_partitioned partition(ca_location_type) select ca_address_sk, ca_address_id, ca_street_number, ca_street_name, ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset, ca_location_type from customer_address;"""
    if stdauth:
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table student to role public with grant option;"
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table voter to role public with grant option;"
        query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table customer_address_partitioned to role public with grant option;"
    exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True)
    assert exit_code == 0, "Test data creation failed"
Ejemplo n.º 2
0
def startLLAPWithChaosMonkey(interval='300'):
    hive_changes = {'tez-site.xml': {'tez.am.task.max.failed.attempts': '0'}}
    Hive.modifyConfig(hive_changes, services=['hiveserver2'])

    AMBARI_AGENT_TMP_DIR = '/var/lib/ambari-agent/tmp'
    ARTIFACTS_DIR = Config.getEnv('ARTIFACTS_DIR')
    LLAP_START_USER = Config.get('hive', 'HIVE_USER')
    dirs = [
        name for name in os.listdir(AMBARI_AGENT_TMP_DIR) if os.path.isdir(os.path.join(AMBARI_AGENT_TMP_DIR, name))
    ]

    llap_dirs = []
    for dir in dirs:
        if dir.startswith('llap-slider'): llap_dirs.append(dir)

    if len(llap_dirs) < 1:
        logger.info("Could not find llap dir under %s" % AMBARI_AGENT_TMP_DIR)
        Hive.startService(services=['hiveserver2'])
    else:
        llap_dir = llap_dirs[-1]

        resourceConfig = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'resources.json')
        tmpResourceConfig = os.path.join(ARTIFACTS_DIR, 'resources.json')
        propertyMap = [(["components", "LLAP"], {"yarn.container.failure.threshold": "1000"})]
        util.writePropertiesToConfigJSONFileMulti(resourceConfig, tmpResourceConfig, propertyMap)
        Machine.copy(tmpResourceConfig, resourceConfig, user=Machine.getAdminUser(), passwd=Machine.getAdminPasswd())

        appConfig = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'appConfig.json')
        tmpAppConfig = os.path.join(ARTIFACTS_DIR, 'appConfig.json')
        propertyMap = [
            (
                ["global"], {
                    "internal.chaos.monkey.probability.containerfailure": "10000",
                    "internal.chaos.monkey.interval.seconds": interval,
                    "internal.chaos.monkey.enabled": "True"
                }
            )
        ]
        util.writePropertiesToConfigJSONFileMulti(appConfig, tmpAppConfig, propertyMap)
        Machine.copy(tmpAppConfig, appConfig, user=Machine.getAdminUser(), passwd=Machine.getAdminPasswd())

        llapShellScript = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'run.sh')
        exit_code, stdout = Machine.runas(LLAP_START_USER, llapShellScript)
        if exit_code != 0: logger.info("LLAP Shell Script failed to run successfully with %d" % exit_code)

        for i in range(10):
            time.sleep(30)
            logger.info("@%d: Check if LLAP cluster is successfully deployed" % i)
            exit_code, stdout = Machine.runas(LLAP_START_USER, 'slider status llap0')
            if exit_code == 0:
                break
            elif i == 9:
                logger.info("LLAP cluster failed to deploy")
Ejemplo n.º 3
0
    def switch_master_version(cls, action, version, config=None):
        '''
        Switches Hive master services' version
        :param action: Whether to "upgrade" or "downgrade"
        :param version: Version to be switched to
        :param config: Configuration location
        '''
        from beaver.component.rollingupgrade.ruCommon import hdpSelect
        from beaver.component.hive import Hive

        currentHiveVersion = Hive.getVersion()

        if action == 'upgrade':
            # Backup the database used by the Hive Metastore
            logger.info(
                "Performing backup of the Hive Metastore DB before starting the upgrade"
            )
            Hive.backupMetastoreDB(cls._metastore_backup_file)

        node = Hive.getHiveHost()

        # Stop the old Hive Metastore
        logger.info("Stopping the Hive Metastore")
        Hive.stopService(services=["metastore"])

        # Upgrade Hive Metastore servers to new version
        hdpSelect.changeVersion("hive-metastore", version, node)

        if action == 'upgrade':
            logger.info("Upgrading the Hive metastore schema")
            Hive.upgradeSchema()

        # Restart Hive Metastore servers one at a time
        logger.info("Restarting the Hive Metastore")
        Hive.startService(services=["metastore"])

        # Start new Hive Server 2 instance
        confHS2Port = Hive.getHiveserver2ThriftPort()
        hs2port = util.getNextAvailablePort(node, confHS2Port)

        hdpSelect.changeVersion("hive-server2", version, node)

        Hive.modifyConfig(config,
                          services=['hiveserver2'],
                          restartService=False)
        logger.info(
            "Starting a new HiveServer2 at port '%d' for assisting rolling-upgrade"
            % hs2port)
        if hs2port != confHS2Port:
            changes = {'hive-site.xml': {'hive.server2.thrift.port': hs2port}}
            Hive.modifyConfig(changes,
                              services=["hiveserver2"],
                              restartService=False)
        Hive.startService(services=["hiveserver2"])
        cls._hs2_live_ports = [Hive.getHiveserver2ThriftPort(), hs2port]

        # Deregister the old Hive Server 2 instances
        logger.info("Deregistering the HiveServer2 on version '%s'" %
                    currentHiveVersion)
        Hive.deregisterHiveServer2(version=currentHiveVersion)

        from beaver.component.hcatalog import Hcatalog

        # Stop the old WebHCat server
        logger.info("Stopping the WebHCat server")
        node = Config.get('templeton',
                          'TEMPLETON_HOST',
                          default=Machine.getfqdn())
        webhcatPort = Config.get('templeton',
                                 'TEMPLETON_PORT',
                                 default="50111")
        # Stop the old WebHCat server
        logger.info("Stop the WebHCat server")
        Hcatalog.stop(node)

        # Upgrade WebHCat to the new version
        hdpSelect.changeVersion("hive-webhcat", version, node)

        # Start the WebHCat server
        logger.info("Restarting the WebHCat server")
        newConfDir = os.path.join(Config.getEnv('ARTIFACTS_DIR'),
                                  'localWebhcatConf')
        if os.path.exists(newConfDir):
            Hcatalog.start(node, hcat_confdir=newConfDir)
        else:
            Hcatalog.start(node)