def setup(): out = HDFS.deleteFile(CREATE_FILE_PATH_IN_HADOOP, user=HDFS_USER) assert out[0] == 0 out = HDFS.deleteDirectory(OUT_PATH_IN_HADOOP, user=HDFS_USER) assert out[0] == 0 out = HDFS.deleteDirectory(CREATE_FILE_2_PATH_IN_HADOOP, user=HDFS_USER) assert out[0] == 0
def configureOozieSpark(cls): cls.fix_qe_14910() # Workaround BUG-63500 oozie spark test cases are failing with org.apache.thrift.transport.TTransportException null sparkShareLibPath = cls.getLatestShareLibPath() + "/spark" SPARK_HOME = Config.get('spark', 'SPARK2_HOME') source = os.path.join(SPARK_HOME, "conf", "hive-site.xml") target_hive_site = os.path.join(sparkShareLibPath, "hive-site.xml") HDFS.deleteFile(target_hive_site, cls.getOozieUser()) HDFS.copyFromLocal(source, sparkShareLibPath, cls.getOozieUser()) isTez = Hadoop.isTez(True, False) if Hadoop.isTez: target_tez_site = os.path.join(sparkShareLibPath, "tez-site.xml") HDFS.deleteFile(target_tez_site, cls.getOozieUser()) HDFS.copyFromLocal( os.path.join(Config.get('tez', 'TEZ_CONF_DIR'), "tez-site.xml"), sparkShareLibPath, cls.getOozieUser()) exit_code, stdout = Oozie.share_lib_update() assert exit_code == 0
def insertCSVDataViaCSVBuildLoad(cls, csvFile, tableName, putIntoHDFS=True, deleteAfterExec=True, runInBackground=False, user=None, config=None, optionAndParameter="", env=None, delimiter=",", arrayDelimiter=None, schema=None): """ By default, the files will be allocated under /tmp/ folder in HDFS. """ global ZK_ZPARENT if Slider.isSlider(): ZK_ZPARENT = util.getPropertyValueFromConfigXMLFile( os.path.join(Config.get('hbase', 'HBASE_CONF_DIR'), 'hbase-site.xml'), "zookeeper.znode.parent") if Machine.isLinux(): clientjar = Machine.find(user=Machine.getAdminUser(), host="localhost", filepath=PHOENIX_HOME, searchstr="phoenix-*[0-9]-client.jar", passwd=Machine.getAdminPasswd()) else: clientjar = Machine.find(user=Machine.getAdminUser(), host="localhost", filepath=PHOENIX_HOME, searchstr="phoenix-*-client.jar", passwd=Machine.getAdminPasswd()) if Machine.isWindows(): clientjar = (clientjar[0].strip("\\localhost")).replace("$", ":") fileName = csvFile.split('\\')[-1] else: clientjar = clientjar[0] fileName = csvFile.split('/')[-1] # If we need to, we insert it into HDFS, since the library will take it from there. executingUser = (HADOOPQA_USER) if user is None else user if putIntoHDFS: if not HDFS.fileExists('/tmp/'): HDFS.mkdir('/tmp/') HDFS.copyFromLocal(csvFile, '/tmp/', executingUser, config, optionAndParameter) hbaseConfDir = HBASE_CONF_DIR if Slider.isSlider(): hbaseConfDir = Config.get('hbase', 'HBASE_CONF_DIR') classpath = hbaseConfDir finalCommand = "%s jar %s org.apache.phoenix.mapreduce.CsvBulkLoadTool --table %s --input %s" \ % (HADOOP_CMD, clientjar, tableName, '/tmp/%s' % fileName) if schema is not None: finalCommand = '%s -schema %s' % (finalCommand, schema) if Machine.isWindows(): os.environ['HADOOP_USER_CLASSPATH_FIRST'] = 'true' os.environ['HADOOP_CLASSPATH'] = classpath if delimiter != "," or arrayDelimiter != None: finalCommand = "%s -d `\\\"`%s`\\\" -a `\\\"`%s`\\\"" \ % (finalCommand, delimiter, arrayDelimiter.strip("'")) finalCommand = "%s --zookeeper %s" % (finalCommand, ZK_HOST) if runInBackground: exit_code = 0 stdout = '' Machine.runinbackground( finalCommand, env=dict(env.items() + ENVIRONMENT.items() if env is not None else ENVIRONMENT)) else: exit_code, stdout = Machine.run( finalCommand, env=dict(env.items() + ENVIRONMENT.items() if env is not None else ENVIRONMENT)) else: # delimiter options if delimiter != "," or arrayDelimiter != None: finalCommand = "%s --delimiter %s --array-delimiter %s" % ( finalCommand, delimiter, arrayDelimiter) # ZKHosts options finalCommand = "%s --zookeeper %s" % (finalCommand, cls.getZKConnectString()) if runInBackground: exit_code = 0 stdout = '' Machine.runinbackground( "HADOOP_CLASSPATH=%s %s" % (classpath, finalCommand), env=dict(env.items() + ENVIRONMENT.items() if env is not None else ENVIRONMENT)) else: exit_code, stdout = Machine.run( "HADOOP_CLASSPATH=%s %s" % (classpath, finalCommand), env=dict(env.items() + ENVIRONMENT.items() if env is not None else ENVIRONMENT)) # If selected, after insertion into HBase we will delete the csvFile from HDFS if deleteAfterExec and not runInBackground: # Does not work for "run in background" option HDFS.deleteFile('/tmp/%s' % fileName, executingUser) # return 0,"" return exit_code, stdout