def tear_down_jdbc_topology(cls, topologyName, useStandaloneCmd): """ kills Jbbc topology. """ from beaver.dbutil import MySQL Storm.killTopology(topologyName, logoutput=True, useStandaloneCmd=useStandaloneCmd) MySQL.runCmd("drop table user", database="test", host=mysqlnode, user=Config.get('machine', 'MYSQL_ROOT_USER')) MySQL.runAsRoot("DROP DATABASE IF EXISTS test")
def setup_storm_hbase_topology(cls, useStandaloneCmd): from beaver.component.hbase import HBase storm_version = Storm.getVersion(useStandaloneCmd=True) Machine.copy(JAVA_HBASE_SRC_DIR, LOCAL_HBASE_WORK_DIR, user=None, passwd=None) if Machine.isWindows(): (_, _) = Maven.run( 'package -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s' % (HADOOP_VERSION_MAVEN_PARAMETER, HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER, storm_version, HBASE_CONF_MAVEN_PARAMETER, HBASE_CONF, HBASE_FILE_MAVEN_PARAMETER, HBASE_FILE, HADOOP_CONF_MAVEN_PARAMETER, HADOOP_CONF, HDFS_FILE_MAVEN_PARAMETER, HDFS_FILE, CORE_FILE_MAVEN_PARAMETER, CORE_FILE, PUBLIC_REPO_MAVEN_PARAMETER, Maven.getPublicRepoUrl()), cwd=LOCAL_HBASE_WORK_DIR) else: (_, _) = Maven.run('package', cwd=LOCAL_HBASE_WORK_DIR, env={ HADOOP_VERSION_MAVEN_PARAMETER: HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER: storm_version, HBASE_CONF_MAVEN_PARAMETER: HBASE_CONF, HBASE_FILE_MAVEN_PARAMETER: HBASE_FILE, HADOOP_CONF_MAVEN_PARAMETER: HADOOP_CONF, HDFS_FILE_MAVEN_PARAMETER: HDFS_FILE, CORE_FILE_MAVEN_PARAMETER: CORE_FILE, PUBLIC_REPO_MAVEN_PARAMETER: Maven.getPublicRepoUrl() }) exit_code, stdout = HBase.createTable(HBASE_TABLE_NAME, "cf", True, None) ruAssert("Storm", exit_code == 0) grant_cmd = "grant '%s', 'RWCXA', '%s', 'cf'" % (Storm.getStormUser(), HBASE_TABLE_NAME) exit_code, stdout = HBase.runShellCmds([grant_cmd])
def submit_storm_hive_topology(cls, tcId, className, args, useStandaloneCmd): if Hadoop.isSecure(): if Config.hasOption('machine', 'USER_REALM'): user_realm = Config.get('machine', 'USER_REALM', '') else: nnKerbPrincipal = HDFS.getNameNodePrincipal(defaultValue='') atloc = nnKerbPrincipal.find("@") if (atloc != -1): user_realm = nnKerbPrincipal[atloc:] if user_realm != None: args += " " + Machine.getHeadlessUserKeytab( Config.getEnv('USER')) + " " + Config.getEnv( 'USER') + '@' + user_realm exit_code, stdout = Storm.runStormHdfsTopology( TARGET_HIVE_STORM_JAR, className, args, None, logoutput=True, inBackground=False, useStandaloneCmd=useStandaloneCmd) logger.info(exit_code) ruAssert("Storm", exit_code == 0, "[StormHiveSubmit] %s Failed" % (tcId))
def submit_storm_hdfs_topology(cls, tcId, className, args, topologyName, useStandaloneCmd): """ Name: Storm-HDFS Topologies Description: Testing all 4 storm-hdfs topologies in distributed mode 1. SequenceFileTopology 2. HdfsFileTopology 3. TridentFileTopology 4. TridentSequenceTopology Steps to verify: 1. create necessary input/output dir location if needed 2. Run storm topology in distributed mode using "storm <jar> <classname> <args>" 3. Verify expected output from hdfs output dir 4. kill the topology """ exit_code, stdout = Storm.runStormHdfsTopology( TARGET_HDFS_STORM_JAR, className, args, None, logoutput=True, inBackground=False, useStandaloneCmd=useStandaloneCmd) ruAssert("Storm", exit_code == 0, "[StormHDFSSubmit] %s Failed" % (tcId))
def submit_storm_jdbc_topology(cls, tcId, className, args, topologyName, useStandaloneCmd): """ Name: Storm-JDBC Topology Description: Testing storm-jdbc topology in distributed mode 1. UserPersistanceTopology """ from beaver.dbutil import MySQL MySQL.runAsRoot("CREATE DATABASE test") MySQL.runAsRoot("show databases") exit_code, stdout = Storm.runStormJdbcTopology( TARGET_JDBC_STORM_JAR, className, args, None, logoutput=True, inBackground=False, useStandaloneCmd=useStandaloneCmd) ruAssert("Storm", exit_code == 0, "[StormJDBCSubmit] %s Failed" % (tcId))
def tear_down_hbase_topology(cls, topologyName, useStandaloneCmd): """ tear down hbase topology. """ from beaver.component.hbase import HBase Machine.rm(user=None, host="localhost", filepath=LOCAL_HBASE_WORK_DIR, isdir=True, passwd=None) Storm.killTopology(topologyName, logoutput=True, useStandaloneCmd=useStandaloneCmd) exit_code, stdout = HBase.dropTable(HBASE_TABLE_NAME, True, None) ruAssert("Storm", exit_code == 0)
def setup_storm_hdfs_topology(cls, useStandaloneCmd): storm_version = Storm.getVersion(useStandaloneCmd=True) try: file_obj = open(HDFS_CONFIG_FILE, 'w') if Hadoop.isSecure(): file_obj.write('hdfs.keytab.file: ' + Machine.getHeadlessUserKeytab( user=HADOOPQA_USER) + '\n') file_obj.write('hdfs.kerberos.principal: ' + Machine.get_user_principal(user=HADOOPQA_USER) + '\n') finally: file_obj.close() HDFS.createDirectory("/tmp/mySeqTopology", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/dest", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/dest2", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/foo", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/trident", HDFS_USER, "777", False) HDFS.createDirectory("/tmp/trident-seq", HDFS_USER, "777", False) Machine.copy(JAVA_HDFS_SRC_DIR, LOCAL_HDFS_WORK_DIR, user=None, passwd=None) if not Machine.isWindows(): (exit_code, _) = Maven.run('package', cwd=LOCAL_HDFS_WORK_DIR, env={ HADOOP_VERSION_MAVEN_PARAMETER: HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER: storm_version, HADOOP_CONF_MAVEN_PARAMETER: HADOOP_CONF, HDFS_FILE_MAVEN_PARAMETER: HDFS_FILE, HADOOP_CORE_MAVEN_PARAMETER: HADOOP_CONF, CORE_FILE_MAVEN_PARAMETER: CORE_FILE, PUBLIC_REPO_MAVEN_PARAMETER: Maven.getPublicRepoUrl() }) else: filepath = os.path.join(MOD_CONF_PATH, "core-site.xml") (exit_code, _) = Maven.run( 'package -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s -D%s=%s' % (HADOOP_VERSION_MAVEN_PARAMETER, HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER, storm_version, HADOOP_CONF_MAVEN_PARAMETER, HADOOP_CONF, HDFS_FILE_MAVEN_PARAMETER, HDFS_FILE, HADOOP_CORE_MAVEN_PARAMETER, HADOOP_CONF, CORE_FILE_MAVEN_PARAMETER, CORE_FILE, PUBLIC_REPO_MAVEN_PARAMETER, Maven.getPublicRepoUrl()), cwd=LOCAL_HDFS_WORK_DIR) ruAssert("Storm", exit_code == 0, "[StormHDFSSetup] maven package command failed")
def Storm_isHA(cls, logoutput=True): try: from beaver.component.storm import Storm return Storm.isHA() except Exception: if logoutput: logger.error("Exception occured during Storm_isHA() call") logger.error(traceback.format_exc()) return False
def tear_down_hdfs_topology(cls, topologyName, useStandaloneCmd): """ kills hdfs topologies and deletes the hdfs directories. """ Machine.rm(user=None, host="localhost", filepath=LOCAL_HDFS_WORK_DIR, isdir=True, passwd=None) Storm.killTopology(topologyName, logoutput=True, useStandaloneCmd=useStandaloneCmd) HDFS.deleteDirectory("/tmp/mySeqTopology", HDFS_USER) HDFS.deleteDirectory("/tmp/dest", HDFS_USER) HDFS.deleteDirectory("/tmp/dest2", HDFS_USER) HDFS.deleteDirectory("/tmp/foo", HDFS_USER) HDFS.deleteDirectory("/tmp/trident", HDFS_USER) HDFS.deleteDirectory("/tmp/trident-seq", HDFS_USER)
def Storm_getNimbusSeeds(cls, logoutput=True): try: from beaver.component.storm import Storm return Storm.getNimbusSeeds() except Exception: if logoutput: logger.error( "Exception occured during Storm_getNimbusSeeds() call") logger.error(traceback.format_exc()) return cls.getAllNodes()
def Storm_getNimbus(cls, logoutput=True): try: from beaver.component.storm import Storm return Storm.getNimbus() except Exception as e: if logoutput: logger.error( "Exception occured during Storm_getNimbus() call: %s", str(e)) return None
def Storm_getLogDir(cls, logoutput=True): try: from beaver.component.storm import Storm return Storm.getLogDir() except Exception as e: if logoutput: logger.error( "Exception occured during Storm_getLogDir() call: %s", str(e)) storm_log = "/var/log/storm" return storm_log
def tear_down_hive_topology(cls, topologyName, useStandaloneCmd): """ tear down hbase topology. """ from beaver.component.hive import Hive Machine.rm(user=None, host="localhost", filepath=LOCAL_HIVE_WORK_DIR, isdir=True, passwd=None) Storm.killTopology(topologyName, logoutput=True, useStandaloneCmd=useStandaloneCmd) #Hive.restoreConfig(services=['metastore']) drop_table_q = "use %s; drop table if exists %s; " % (DATABASE_NAME, HIVE_TABLE_NAME) exit_code, stdout = Hive.runQuery( cls.get_set_queue_cmd(useStandaloneCmd) + drop_table_q) ruAssert("Storm", exit_code == 0)
def setup_storm_jdbc_topology(cls, useStandaloneCmd): from beaver.dbutil import MySQL MySQL.createUserWithAllPriveleges(STORM_JDBC_TEST_USER, STORM_JDBC_TEST_PASSWD) storm_version = Storm.getVersion(useStandaloneCmd=True) try: file_obj = open(HDFS_CONFIG_FILE, 'w') if Hadoop.isSecure(): file_obj.write('hdfs.keytab.file: ' + Machine.getHeadlessUserKeytab( user=HADOOPQA_USER) + '\n') file_obj.write('hdfs.kerberos.principal: ' + Machine.get_user_principal(user=HADOOPQA_USER) + '\n') finally: file_obj.close() Machine.copy(JAVA_JDBC_SRC_DIR, LOCAL_JDBC_WORK_DIR, user=None, passwd=None) post_fenton_opt = " -DpostFenton=true" if Storm.isAfterFenton() else "" package_cmd = 'package ' + post_fenton_opt (exit_code, _) = Maven.run(package_cmd, cwd=LOCAL_JDBC_WORK_DIR, env={ HADOOP_VERSION_MAVEN_PARAMETER: HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER: storm_version, HADOOP_CONF_MAVEN_PARAMETER: HADOOP_CONF, HDFS_FILE_MAVEN_PARAMETER: HDFS_FILE, CORE_FILE_MAVEN_PARAMETER: CORE_FILE, PUBLIC_REPO_MAVEN_PARAMETER: Maven.getPublicRepoUrl() }) ruAssert("Storm", exit_code == 0, "[StormJDBCSetup] maven package command failed")
def submit_storm_hbase_topology(cls, tcId, className, args, useStandaloneCmd): if Hadoop.isSecure(): keytab_file = Machine.getHeadlessUserKeytab(user=HADOOPQA_USER) principal = Machine.get_user_principal(user=HADOOPQA_USER) args = args + " %s %s" % (keytab_file, principal) exit_code, stdout = Storm.runStormHdfsTopology( TARGET_HBASE_STORM_JAR, className, args, None, logoutput=True, inBackground=False, useStandaloneCmd=useStandaloneCmd) logger.info(exit_code) ruAssert("Storm", exit_code == 0, "[StormHBaseSubmit] %s Failed" % (tcId))
def verify_hdfs_topology(cls, topologyName, targetDir, lines, type, useStandaloneCmd): """ Verifies the hdfs topologies produced expected output """ #Slider app is killed before log running job verification so disabling topology activation checks. if useStandaloneCmd == True: ruAssert( "Storm", Storm.getTopologyStatus( topologyName, logoutput=True, useStandaloneCmd=useStandaloneCmd) == 'ACTIVE') exit_code, stdout = HDFS.lsr(targetDir, False, True) hdfsListOutput = stdout.splitlines() #Picking the second last line as the first file might not have enough content and last file gets into transient #HDFS issues. if len(hdfsListOutput) >= 2: fileLine = hdfsListOutput[-2] sampleoutfile = fileLine.split(" ")[-1].strip() # Hecky solution as the test code for trident and core topologies writes under same directory. # if fileLine.endswith(".txt") and type == "cat": # sampleoutfile = fileLine.split(" ")[-1].strip() # if fileLine.endswith(".seq") and type == "text": # sampleoutfile = fileLine.split(" ")[-1].strip() logger.info("Taking sampleoutput file : %s" % (sampleoutfile)) if type == "text": exit_code, stdout = HDFS.text(sampleoutfile, None) else: exit_code, stdout = HDFS.cat(sampleoutfile, None) for line in lines: ruAssert( "Storm", stdout.find(line) >= 0, "[StormHDFSVerify] expected line : %s in %s" % (line, sampleoutfile)) else: ruAssert("Storm", False, "hdfsListOutput must have at least 2 lines")
def verify_hbase_topology(cls, topologyName, lines, useStandaloneCmd): from beaver.component.hbase import HBase #Slider app is killed before log running job verification so disabling topology activation checks. if useStandaloneCmd == True: ruAssert( "Storm", Storm.getTopologyStatus( topologyName, logoutput=True, useStandaloneCmd=useStandaloneCmd) == 'ACTIVE') exit_code, stdout = HBase.runShellCmds(["scan 'WordCount'"]) logger.info(exit_code) logger.info(stdout) for word in lines: ruAssert( "Storm", stdout.find(word) >= 0, "[StormHBaseVerify] %s not found in wordcount table" % word)
def verify_hive_topology(cls, topologyName, rows, useStandaloneCmd): #Slider app is killed before log running job verification so disabling topology activation checks. if useStandaloneCmd == True: ruAssert( "Storm", Storm.getTopologyStatus( topologyName, logoutput=True, useStandaloneCmd=useStandaloneCmd) == 'ACTIVE') verify_table_q = "select distinct id,name,phone,street,city,state from stormdb.userdata order by id;" stdout = cls.getHiveQueryOutput(verify_table_q, willRunMR=True, delim=",", useStandaloneCmd=useStandaloneCmd) logger.info(stdout) for row in rows: ruAssert("Storm", stdout.find(row) >= 0, "[StormHiveVerify] %s not found in userdata table" % row)
# redistribution, sharing, lending or other exploitation of all or # any part of the contents of this file is strictly prohibited. # # import os, re, string, time, socket, logging, platform, urllib2, collections, datetime, json, urllib, sys from beaver.component.hadoop import Hadoop, HDFS, MAPRED, YARN from beaver.component.storm import Storm from beaver.machine import Machine from beaver.config import Config from beaver import util from beaver import configUtils from beaver.maven import Maven from beaver.component.rollingupgrade.RuAssert import ruAssert #from beaver.component.rollingupgrade.ruCommon import Rollingupgrade, hdpRelease IS_UPGRADE_SUPPORTED = Hadoop.isDalorBeyond() and Storm.isAfterErie() logger = logging.getLogger(__name__) #######HDFS config##### JAVA_HDFS_SRC_DIR = os.path.join(Config.getEnv('WORKSPACE'), 'tests', 'storm', 'storm-hdfs', 'java') LOCAL_HDFS_WORK_DIR = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'storm-hdfs-tests') TARGET_HDFS_STORM_JAR = os.path.join( Config.getEnv('ARTIFACTS_DIR'), 'storm-hdfs-tests', 'target', 'storm-integration-test-1.0-SNAPSHOT.jar') HDFS_CONFIG_FILE = os.path.join(Machine.getTempDir(), 'hdfs-conf.yaml') HADOOP_CONF = Config.get('hadoop', 'HADOOP_CONF') HDFS_FILE = "hdfs-site.xml" CORE_FILE = "core-site.xml" MOD_CONF_PATH = Hadoop.getModifiedConfigPath()
def setup_storm_hive_topology(cls, useStandaloneCmd): from beaver.component.hive import Hive storm_version = Storm.getVersion(useStandaloneCmd=True) hive_version = Hive.getVersion() HIVE_METASTORE_URI = Hive.getConfigValue( "hive.metastore.uris", defaultValue="thrift://localhost:9083") global HIVE_METASTORE_URI global HIVE_HOST global HIVE_PORT global HIVE_WAREHOUSE_DIR HIVE_WAREHOUSE_DIR = Hive.getConfigValue( "hive.metastore.warehouse.dir", defaultValue="/apps/hive/warehouse") HIVE_HOST = Hive.getHiveHost() HIVE_PORT = Hive.getMetastoreThriftPort() if Storm.isDalorBeyond(): JAVA_HIVE_SRC_DIR = os.path.join(Config.getEnv('WORKSPACE'), 'tests', 'rolling_upgrade', 'Storm', '2_3', 'storm-hive', 'java') else: JAVA_HIVE_SRC_DIR = os.path.join(Config.getEnv('WORKSPACE'), 'tests', 'rolling_upgrade', 'Storm', '2_2', 'storm-hive', 'java') # hive.txn.manager and hive.support.concurrency are set through ambari as per bug-40500 #logger.info("Restart Hive") #changes = {'hive-site.xml': {'hive.txn.manager': 'org.apache.hadoop.hive.ql.lockmgr.DbTxnManager', # 'hive.support.concurrency': 'true'}} #Hive.modifyConfig(changes, services=['metastore'], restartService=True) logger.info("Create test database in Hive") exit_code, stdout = Hive.runQuery( cls.get_set_queue_cmd(useStandaloneCmd) + " drop database if exists stormdb cascade; \ create database stormdb;") ruAssert("Storm", exit_code == 0, "[StormHiveSetup] Failed to create test database" + stdout) HDFS.chmod(runasUser=HDFS.getHDFSUser(), perm=777, directory=HIVE_WAREHOUSE_DIR + "/" + DATABASE_NAME + ".db") #copy tests/storm/storm-hive/java to artifacts/storm-hive-tests logger.info("JAVA_SRC_DIR " + JAVA_HIVE_SRC_DIR) logger.info("LOCAL_WORK_DIR " + LOCAL_HIVE_WORK_DIR) Machine.copy(JAVA_HIVE_SRC_DIR, LOCAL_HIVE_WORK_DIR, user=None, passwd=None) #mvn package if Machine.isWindows(): (_, _) = Maven.run( 'package -D%s=%s -D%s=%s -D%s=%s -D%s=%s' % (HADOOP_VERSION_MAVEN_PARAMETER, HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER, storm_version, HIVE_VERSION_MAVEN_PARAMETER, hive_version, PUBLIC_REPO_MAVEN_PARAMETER, Maven.getPublicRepoUrl(), CORE_FILE_MAVEN_PARAMETER, CORE_FILE, HADOOP_CORE_MAVEN_PARAMETER, HADOOP_CONF, HIVE_CORE_MAVEN_PARAMETER, HIVE_CORE_DIR, HIVE_FILE_MAVEN_PARAMETER, HIVE_FILE), cwd=LOCAL_HIVE_WORK_DIR) else: (_, _) = Maven.run('package', cwd=LOCAL_HIVE_WORK_DIR, env={ HADOOP_VERSION_MAVEN_PARAMETER: HADOOP_VERSION, STORM_VERSION_MAVEN_PARAMETER: storm_version, HIVE_VERSION_MAVEN_PARAMETER: hive_version, PUBLIC_REPO_MAVEN_PARAMETER: Maven.getPublicRepoUrl(), CORE_FILE_MAVEN_PARAMETER: CORE_FILE, HADOOP_CONF_MAVEN_PARAMETER: HADOOP_CONF, HDFS_FILE_MAVEN_PARAMETER: HDFS_FILE, HADOOP_CORE_MAVEN_PARAMETER: HADOOP_CONF, HIVE_CORE_MAVEN_PARAMETER: HIVE_CORE_DIR, HIVE_FILE_MAVEN_PARAMETER: HIVE_FILE }) create_table_q = "use %s; \ drop table if exists %s; \ create table %s (id int, name string, phone string, street string) \ partitioned by (city string, state string) \ clustered by (id) into %s buckets \ stored as orc \ tblproperties ('transactional'='true');" % ( DATABASE_NAME, HIVE_TABLE_NAME, HIVE_TABLE_NAME, "5") exit_code, stdout = Hive.runQuery( cls.get_set_queue_cmd(useStandaloneCmd) + create_table_q) ruAssert( "Storm", exit_code == 0, "[StormHiveSetup] Failed to create test table userdata_partitioned" ) HDFS.chmod(runasUser=HDFS.getHDFSUser(), perm=777, directory=HIVE_WAREHOUSE_DIR + "/" + DATABASE_NAME + ".db/" + HIVE_TABLE_NAME)