def setupHS2ConcurrTestData(stdauth=True): # hive.support.concurrency is not in the whitelist, as this is a server setting and not something that user should/can set in a session. # In a case of Ranger and SQL std authorization, set hive.support.concurrency to true and restart HS2 changes = { 'hive-site.xml': { 'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager', 'hive.support.concurrency': 'true', 'hive.compactor.initiator.on': 'true', 'hive.compactor.worker.threads': '3', 'hive.compactor.check.interval': '10', 'hive.timedout.txn.reaper.interval': '20s' }, 'hiveserver2-site.xml': { 'hive.compactor.initiator.on': 'false', 'hive.exec.dynamic.partition.mode': 'nonstrict' } } if not Hive.isHive2(): changes['hiveserver2-site.xml']['hive.enforce.bucketing'] = 'true' else: changes['hiveserver2-site.xml']['hive.server2.enable.doAs'] = 'false' changes['hiveserver2-site.xml']['hive.txn.manager'] = 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager' changes['hiveserver2-site.xml']['hive.support.concurrency'] = 'true' Hive.modifyConfig(changes) time.sleep(60) data_dir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hs2concur-test-data") data_tgz = os.path.join(Config.getEnv('WORKSPACE'), "hs2concur-test-data.tgz") if not os.path.isfile(data_tgz): assert util.downloadUrl(Config.get('hive', 'HS2CONCURR_TEST_DATA'), data_tgz) Machine.tarExtractAll(data_tgz, data_dir) # load data into HDFS hdfs_user = Config.get("hadoop", 'HDFS_USER') test_user = Config.get("hadoop", 'HADOOPQA_USER') HDFS.createDirectory("/tmp/hs2data", user=test_user, perm='777', force=True) HDFS.createDirectory("/tmp/hs2data/student", user=test_user, perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'studenttab10k'), "/tmp/hs2data/student") HDFS.createDirectory("/tmp/hs2data/voter", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'votertab10k'), "/tmp/hs2data/voter") HDFS.createDirectory("/tmp/hs2data/customer_address", perm='777', force=True) HDFS.copyFromLocal(os.path.join(data_dir, 'customer_address10k'), "/tmp/hs2data/customer_address") query = """drop table if exists student; create external table student (name string, age int, gpa double) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/student'; drop table if exists voter; create external table voter (name string, age int, registration string, contributions float) row format delimited fields terminated by '\\t' stored as textfile location '/tmp/hs2data/voter'; drop table if exists customer_address; create external table customer_address (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2), ca_location_type string) row format delimited fields terminated by '|' stored as textfile location '/tmp/hs2data/customer_address'; drop table if exists customer_address_partitioned; create table customer_address_partitioned (ca_address_sk int, ca_address_id string, ca_street_number string, ca_street_name string, ca_street_type string, ca_suite_number string, ca_city string, ca_county string, ca_state string, ca_zip string, ca_country string, ca_gmt_offset decimal(5,2)) partitioned by (ca_location_type string) clustered by (ca_state) into 50 buckets stored as orc tblproperties('transactional'='true'); insert into table customer_address_partitioned partition(ca_location_type) select ca_address_sk, ca_address_id, ca_street_number, ca_street_name, ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset, ca_location_type from customer_address;""" if stdauth: query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table student to role public with grant option;" query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table voter to role public with grant option;" query += "\ngrant SELECT, INSERT, UPDATE, DELETE on table customer_address_partitioned to role public with grant option;" exit_code, stdout, stderr = Hive.runQueryOnBeeline(query, readFromFile=True, logoutput=True) assert exit_code == 0, "Test data creation failed"
def startLLAPWithChaosMonkey(interval='300'): hive_changes = {'tez-site.xml': {'tez.am.task.max.failed.attempts': '0'}} Hive.modifyConfig(hive_changes, services=['hiveserver2']) AMBARI_AGENT_TMP_DIR = '/var/lib/ambari-agent/tmp' ARTIFACTS_DIR = Config.getEnv('ARTIFACTS_DIR') LLAP_START_USER = Config.get('hive', 'HIVE_USER') dirs = [ name for name in os.listdir(AMBARI_AGENT_TMP_DIR) if os.path.isdir(os.path.join(AMBARI_AGENT_TMP_DIR, name)) ] llap_dirs = [] for dir in dirs: if dir.startswith('llap-slider'): llap_dirs.append(dir) if len(llap_dirs) < 1: logger.info("Could not find llap dir under %s" % AMBARI_AGENT_TMP_DIR) Hive.startService(services=['hiveserver2']) else: llap_dir = llap_dirs[-1] resourceConfig = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'resources.json') tmpResourceConfig = os.path.join(ARTIFACTS_DIR, 'resources.json') propertyMap = [(["components", "LLAP"], {"yarn.container.failure.threshold": "1000"})] util.writePropertiesToConfigJSONFileMulti(resourceConfig, tmpResourceConfig, propertyMap) Machine.copy(tmpResourceConfig, resourceConfig, user=Machine.getAdminUser(), passwd=Machine.getAdminPasswd()) appConfig = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'appConfig.json') tmpAppConfig = os.path.join(ARTIFACTS_DIR, 'appConfig.json') propertyMap = [ ( ["global"], { "internal.chaos.monkey.probability.containerfailure": "10000", "internal.chaos.monkey.interval.seconds": interval, "internal.chaos.monkey.enabled": "True" } ) ] util.writePropertiesToConfigJSONFileMulti(appConfig, tmpAppConfig, propertyMap) Machine.copy(tmpAppConfig, appConfig, user=Machine.getAdminUser(), passwd=Machine.getAdminPasswd()) llapShellScript = os.path.join(AMBARI_AGENT_TMP_DIR, llap_dir, 'run.sh') exit_code, stdout = Machine.runas(LLAP_START_USER, llapShellScript) if exit_code != 0: logger.info("LLAP Shell Script failed to run successfully with %d" % exit_code) for i in range(10): time.sleep(30) logger.info("@%d: Check if LLAP cluster is successfully deployed" % i) exit_code, stdout = Machine.runas(LLAP_START_USER, 'slider status llap0') if exit_code == 0: break elif i == 9: logger.info("LLAP cluster failed to deploy")
def switch_master_version(cls, action, version, config=None): ''' Switches Hive master services' version :param action: Whether to "upgrade" or "downgrade" :param version: Version to be switched to :param config: Configuration location ''' from beaver.component.rollingupgrade.ruCommon import hdpSelect from beaver.component.hive import Hive currentHiveVersion = Hive.getVersion() if action == 'upgrade': # Backup the database used by the Hive Metastore logger.info( "Performing backup of the Hive Metastore DB before starting the upgrade" ) Hive.backupMetastoreDB(cls._metastore_backup_file) node = Hive.getHiveHost() # Stop the old Hive Metastore logger.info("Stopping the Hive Metastore") Hive.stopService(services=["metastore"]) # Upgrade Hive Metastore servers to new version hdpSelect.changeVersion("hive-metastore", version, node) if action == 'upgrade': logger.info("Upgrading the Hive metastore schema") Hive.upgradeSchema() # Restart Hive Metastore servers one at a time logger.info("Restarting the Hive Metastore") Hive.startService(services=["metastore"]) # Start new Hive Server 2 instance confHS2Port = Hive.getHiveserver2ThriftPort() hs2port = util.getNextAvailablePort(node, confHS2Port) hdpSelect.changeVersion("hive-server2", version, node) Hive.modifyConfig(config, services=['hiveserver2'], restartService=False) logger.info( "Starting a new HiveServer2 at port '%d' for assisting rolling-upgrade" % hs2port) if hs2port != confHS2Port: changes = {'hive-site.xml': {'hive.server2.thrift.port': hs2port}} Hive.modifyConfig(changes, services=["hiveserver2"], restartService=False) Hive.startService(services=["hiveserver2"]) cls._hs2_live_ports = [Hive.getHiveserver2ThriftPort(), hs2port] # Deregister the old Hive Server 2 instances logger.info("Deregistering the HiveServer2 on version '%s'" % currentHiveVersion) Hive.deregisterHiveServer2(version=currentHiveVersion) from beaver.component.hcatalog import Hcatalog # Stop the old WebHCat server logger.info("Stopping the WebHCat server") node = Config.get('templeton', 'TEMPLETON_HOST', default=Machine.getfqdn()) webhcatPort = Config.get('templeton', 'TEMPLETON_PORT', default="50111") # Stop the old WebHCat server logger.info("Stop the WebHCat server") Hcatalog.stop(node) # Upgrade WebHCat to the new version hdpSelect.changeVersion("hive-webhcat", version, node) # Start the WebHCat server logger.info("Restarting the WebHCat server") newConfDir = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'localWebhcatConf') if os.path.exists(newConfDir): Hcatalog.start(node, hcat_confdir=newConfDir) else: Hcatalog.start(node)