def test_oozie(self): """ Test oozie for Ecosystem cluster """ master_vm_hostname = ssh_check_output_hadoop(self.user, self.master_IP, 'cat /etc/hostname', hadoop_path='')[0] read_workflow = open("workflow_ecosystem.xml", "r").read() workflow_file = open("workflow_ecosystem.xml", "w") workflow_file.write( re.sub("hostname", master_vm_hostname, read_workflow) ) workflow_file.close() ssh_call_hadoop(self.user, self.master_IP, 'dfs -mkdir oozie_app', hadoop_path=self.hdfs_path) ssh_stream_to_hadoop(self.user, self.master_IP, join(dirname(abspath(__file__)), "workflow_ecosystem.xml"), self.VALID_DEST_DIR + "/oozie_app/workflow.xml", hadoop_path=self.hdfs_path) job_properties = JOB_PROPERTIES_ECOSYSTEM_TEMPLATE.format(master_vm_hostname) create_job_properties_file = 'echo -e "{0}" > job.properties'.format(job_properties) subprocess.call(create_job_properties_file, stderr=FNULL, shell=True) subprocess.call( "scp {0} {1}@{2}:/tmp/".format(JOB_PROPERTIES_PATH, self.user, self.master_IP), stderr=FNULL, shell=True) ssh_call_hadoop(self.user, self.master_IP, self.oozie_command, hadoop_path='') exist_check_status = ssh_call_hadoop(self.user, self.master_IP, ' dfs -test -e {0}/{1}'.format(OOZIE_TEST_FOLDER, "oozie_test_folder"), hadoop_path=self.hdfs_path) self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, OOZIE_TEST_FOLDER, prefix="-r") self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/job.properties') self.addCleanup(self.delete_local_files, JOB_PROPERTIES_PATH) workflow_file = open("workflow_ecosystem.xml", "w") workflow_file.write( re.sub(master_vm_hostname, "hostname", read_workflow) ) workflow_file.close()
def test_hive_count_rows_in_table_exists(self): """ Functional test for Ecosystem Hive creates a table (if not exists) and counts rows in this table """ # create a table hive_command = "hive -e 'CREATE TABLE IF NOT EXISTS hive_table ( age int, name String );'" ssh_call_hadoop(self.user, self.master_IP, hive_command, hadoop_path='/usr/local/hive/bin/') # count rows hive_command_count = "hive -e 'select count(*) from hive_table';" exist_check_status = ssh_call_hadoop( self.user, self.master_IP, hive_command_count, hadoop_path='/usr/local/hive/bin/') self.assertEqual(exist_check_status, 0) # OK # Remove test table hive_command = "hive -e 'DROP TABLE hive_table;'" ssh_call_hadoop(self.user, self.master_IP, hive_command, hadoop_path='/usr/local/hive/bin/')
def test_hbase_table_exists(self): """ Functional test for Ecosystem HBase create a table and then check if the table exists """ baseurl = "http://" + self.master_IP + ":16010" tablename = "testtable" # Create shell script so as to create the table self.hadoop_local_fs_action( "echo " + "create \\'testtable\\', \\'cf\\'" + " > {0} && echo exit >> {0}".format(HBASE_SCRIPT_PATH)) hbase_command = "hbase shell " + HBASE_SCRIPT_PATH ssh_call_hadoop(self.user, self.master_IP, hbase_command, hadoop_path='/usr/local/hbase/bin/') # Check if table exists request = requests.get(baseurl + "/table.jsp?name=" + tablename) self.assertEqual(request.status_code, 200) # OK # Remove test data self.hadoop_local_fs_action( "echo disable \\'testtable\\' > {0} && echo drop \\'testtable\\' >> {0} && echo exit >> {0}" .format(HBASE_SCRIPT_PATH)) ssh_call_hadoop(self.user, self.master_IP, hbase_command, hadoop_path='/usr/local/hbase/bin/') self.addCleanup(self.hadoop_local_fs_action, 'rm ' + HBASE_SCRIPT_PATH)
def test_put_from_local_recursive(self): """ functional test to put files inside a folder from local to hdfs and check all the files now exist in hdfs and is not zero size. """ list_of_files = [] for i in range(10): subprocess.call('echo "this is the unit test file {0} for local to hdfs orka-cli put." > {0}{1}'.format(i, SOURCE_LOCAL_TO_HDFS_FILE), stderr=FNULL, shell=True) list_of_files.append('{0}{1}'.format(i, SOURCE_LOCAL_TO_HDFS_FILE)) list_of_files.append('/user/hduser') list_of_files.remove('0{0}'.format(SOURCE_LOCAL_TO_HDFS_FILE)) self.opts.update({'source': '0{0}'.format(SOURCE_LOCAL_TO_HDFS_FILE), 'destination': list_of_files, 'fileput': True}) HadoopCluster(self.opts).file_action() list_of_files.insert(0, '0{0}'.format(SOURCE_LOCAL_TO_HDFS_FILE)) for file in list_of_files[:-1]: exist_check_status = ssh_call_hadoop(self.user, self.master_IP, ' dfs -test -e {0}'.format(file), hadoop_path=self.hdfs_path) zero_check_status = ssh_call_hadoop(self.user, self.master_IP, ' dfs -test -z {0}'.format(file), hadoop_path=self.hdfs_path) self.assertEqual(exist_check_status, 0) and self.assertEqual(zero_check_status, 1) self.addCleanup(self.delete_hdfs_files, '/user/hduser/{0}'.format(file)) self.addCleanup(self.delete_local_files, file)
def put_file_to_hdfs(self, file_to_create): """ Helper method to create file in Hdfs before test. """ self.hadoop_local_fs_action('echo "test file for hdfs" > {0}'.format(file_to_create)) ssh_call_hadoop(self.user, self.master_IP, ' dfs -put {0}'.format(file_to_create), hadoop_path=self.hdfs_path)
def delete_hdfs_files(self, file_to_delete, prefix=""): """ Helper method to delete files transfered to hdfs filesystem after test. """ ssh_call_hadoop( self.user, self.master_IP, " dfs -rm {0} {1}".format(prefix, file_to_delete), hadoop_path=self.hdfs_path )
def delete_hdfs_files(self, file_to_delete, prefix=""): """ Helper method to delete files transfered to hdfs filesystem after test. """ ssh_call_hadoop(self.user, self.master_IP, ' dfs -rm {0} {1}'.format(prefix, file_to_delete), hadoop_path=self.hdfs_path)
def test_pig(self): """ Make a directory in HDFS running a pig command. """ ssh_call_hadoop(self.user, self.master_IP, self.pig_command, hadoop_path="") exist_check_status = ssh_call_hadoop( self.user, self.master_IP, " dfs -test -e {0}".format(PIG_TEST_FOLDER), hadoop_path=self.hdfs_path ) self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, PIG_TEST_FOLDER, prefix="-r")
def test_pig(self): """ Test pig for hadoop ecosystem """ pig_command = "export JAVA_HOME=/usr/lib/jvm/java-8-oracle; export HADOOP_HOME=/usr/local/hadoop; /usr/local/pig/bin/pig -e \"fs -mkdir /tmp/pig_test_folder\"" ssh_call_hadoop(self.user, self.master_IP, pig_command, hadoop_path='') exist_check_status = ssh_call_hadoop(self.user, self.master_IP, ' dfs -test -e /tmp/{0}'.format('pig_test_folder')) self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, '/tmp/pig_test_folder', prefix="-r")
def test_oozie(self): """ Test oozie for Ecosystem cluster """ master_vm_hostname = ssh_check_output_hadoop(self.user, self.master_IP, 'cat /etc/hostname', hadoop_path='')[0] read_workflow = open("workflow_ecosystem.xml", "r").read() workflow_file = open("workflow_ecosystem.xml", "w") workflow_file.write( re.sub("hostname", master_vm_hostname, read_workflow)) workflow_file.close() ssh_call_hadoop(self.user, self.master_IP, 'dfs -mkdir oozie_app', hadoop_path=self.hdfs_path) ssh_stream_to_hadoop(self.user, self.master_IP, join(dirname(abspath(__file__)), "workflow_ecosystem.xml"), self.VALID_DEST_DIR + "/oozie_app/workflow.xml", hadoop_path=self.hdfs_path) job_properties = JOB_PROPERTIES_ECOSYSTEM_TEMPLATE.format( master_vm_hostname) create_job_properties_file = 'echo -e "{0}" > job.properties'.format( job_properties) subprocess.call(create_job_properties_file, stderr=FNULL, shell=True) subprocess.call("scp {0} {1}@{2}:/tmp/".format(JOB_PROPERTIES_PATH, self.user, self.master_IP), stderr=FNULL, shell=True) ssh_call_hadoop(self.user, self.master_IP, self.oozie_command, hadoop_path='') exist_check_status = ssh_call_hadoop(self.user, self.master_IP, ' dfs -test -e {0}/{1}'.format( OOZIE_TEST_FOLDER, "oozie_test_folder"), hadoop_path=self.hdfs_path) self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, OOZIE_TEST_FOLDER, prefix="-r") self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/job.properties') self.addCleanup(self.delete_local_files, JOB_PROPERTIES_PATH) workflow_file = open("workflow_ecosystem.xml", "w") workflow_file.write( re.sub(master_vm_hostname, "hostname", read_workflow)) workflow_file.close()
def test_pig(self): """ Test pig for hadoop ecosystem """ pig_command = "export JAVA_HOME=/usr/lib/jvm/java-8-oracle; export HADOOP_HOME=/usr/local/hadoop; /usr/local/pig/bin/pig -e \"fs -mkdir /tmp/pig_test_folder\"" ssh_call_hadoop(self.user, self.master_IP, pig_command, hadoop_path='') exist_check_status = ssh_call_hadoop( self.user, self.master_IP, ' dfs -test -e /tmp/{0}'.format('pig_test_folder')) self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, '/tmp/pig_test_folder', prefix="-r")
def test_pig_script(self): """ Test pig through a pig script """ self.put_file_to_hdfs('/tmp/{0}'.format('test_file_pig.txt')) pig_command="export JAVA_HOME=/usr/lib/jvm/java-8-oracle; export HADOOP_HOME=/usr/local/hadoop; /usr/local/pig/bin/pig -e \\{0}\" ".format("\"data = LOAD {0} as (text:CHARARRAY);upper_case = FOREACH data GENERATE org.apache.pig.piggybank.evaluation.string.UPPER(text);STORE upper_case INTO {1};\\".format("'/user/hduser/test_file_pig.txt'", "'/user/hduser/pig_test'")) ssh_call_hadoop(self.user, self.master_IP, pig_command, hadoop_path='') exist_check_status = ssh_call_hadoop(self.user, self.master_IP, " dfs -test -e {0}".format('/user/hduser/pig_test/_SUCCESS'), hadoop_path=self.hdfs_path) self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, '/user/hduser/pig_test', prefix="-r") self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/{0}'.format('test_file_pig.txt')) self.addCleanup(self.hadoop_local_fs_action, 'rm /user/hduser/{0}'.format('test_file_pig.txt'))
def test_put_from_remote(self): """ functional test to put file from remote server to Hdfs and check that file now exists in Hdfs and is not zero size. """ self.opts.update({'source': SOURCE_REMOTE_TO_HDFS_FILE, 'destination': DEST_REMOTE_TO_HDFS_FILE, 'user': '', 'password': ''}) HadoopCluster(self.opts).put_from_server() exist_check_status = ssh_call_hadoop('hduser', self.master_IP, ' dfs -test -e {0}'.format(self.opts['destination'])) zero_check_status = ssh_call_hadoop('hduser', self.master_IP, ' dfs -test -z {0}'.format(self.opts['destination'])) self.assertEqual(exist_check_status, 0) and self.assertEqual(zero_check_status, 1) self.addCleanup(self.delete_hdfs_files, self.opts['destination'])
def test_compare_wordcount_pithos_hdfs(self): """ Functional test to upload a test file in Pithos and run two wordcounts, one from Pithos and one native from HDFS and compare the length of the output files. """ subprocess.call('echo "this is a test file to run a wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) ssh_call_hadoop('hduser', self.master_IP, 'kamaki file download {0}'. format(SOURCE_PITHOS_TO_HDFS_FILE), hadoop_path='') ssh_call_hadoop('hduser', self.master_IP, ' dfs -put {0}'. format(SOURCE_PITHOS_TO_HDFS_FILE)) ssh_call_hadoop('hduser', self.master_IP, wordcount_command + 'pithos://pithos/{0} {1}'. format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR), hadoop_path=hadoop_path_wordcount) ssh_call_hadoop('hduser', self.master_IP, wordcount_command + '{0} {1}'. format(SOURCE_PITHOS_TO_HDFS_FILE, HDFS_WORDCOUNT_DIR), hadoop_path=hadoop_path_wordcount) bytes_pithos_written = ssh_check_output_hadoop('hduser', self.master_IP, ' dfs -dus {0}'.format(PITHOS_WORDCOUNT_DIR)) bytes_hdfs_written = ssh_check_output_hadoop('hduser', self.master_IP, ' dfs -dus {0}'.format(HDFS_WORDCOUNT_DIR)) self.assertEqual(bytes_pithos_written[0].replace(PITHOS_WORDCOUNT_DIR, ""), bytes_hdfs_written[0].replace(HDFS_WORDCOUNT_DIR, "")) self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r") self.addCleanup(self.delete_hdfs_files, HDFS_WORDCOUNT_DIR, prefix="-r") self.addCleanup(self.delete_hdfs_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.hadoop_local_fs_action, 'rm {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE))
def test_put_from_local(self): """ functional test to put file from local to hdfs and check that file now exists in hdfs and is not zero size. """ subprocess.call('echo "this is a unit test file for local to hdfs orka-cli put." > {0}'.format(SOURCE_LOCAL_TO_HDFS_FILE), stderr=FNULL, shell=True) self.opts.update({'source': SOURCE_LOCAL_TO_HDFS_FILE, 'destination': DEST_LOCAL_TO_HDFS_FILE}) HadoopCluster(self.opts).put_from_local(self.active_cluster) exist_check_status = ssh_call_hadoop('hduser', self.master_IP, ' dfs -test -e {0}'.format(self.opts['destination'])) zero_check_status = ssh_call_hadoop('hduser', self.master_IP, ' dfs -test -z {0}'.format(self.opts['destination'])) self.assertEqual(exist_check_status, 0) and self.assertEqual(zero_check_status, 1) self.addCleanup(self.delete_hdfs_files, self.opts['destination']) self.addCleanup(self.delete_local_files, self.opts['source'])
def test_pig(self): """ Make a directory in HDFS running a pig command. """ ssh_call_hadoop(self.user, self.master_IP, self.pig_command, hadoop_path='') exist_check_status = ssh_call_hadoop( self.user, self.master_IP, ' dfs -test -e {0}'.format(PIG_TEST_FOLDER), hadoop_path=self.hdfs_path) self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, PIG_TEST_FOLDER, prefix="-r")
def test_put_from_remote(self): """ functional test to put file from remote server to Hdfs and check that file now exists in Hdfs and is not zero size. """ self.opts.update({'source': SOURCE_REMOTE_TO_HDFS_FILE, 'destination': DEST_REMOTE_TO_HDFS_FILE, 'user': '', 'password': ''}) HadoopCluster(self.opts).put_from_server() exist_check_status = ssh_call_hadoop(self.user, self.master_IP, ' dfs -test -e {0}'.format(self.opts['destination']), hadoop_path=self.hdfs_path) zero_check_status = ssh_call_hadoop(self.user, self.master_IP, ' dfs -test -z {0}'.format(self.opts['destination']), hadoop_path=self.hdfs_path) self.assertEqual(exist_check_status, 0) and self.assertEqual(zero_check_status, 1) self.addCleanup(self.delete_hdfs_files, self.opts['destination'])
def test_flume(self): """ Checks Flume's operation. Create a temp file and check that it is uploaded successfully to hdfs """ #clear flume data directories self.clear_flume_hdfs_folder() self.clear_flume_tmp_folder() #create the temp file in /usr/local/flume/tmp response = subprocess.call("ssh " + self.user + "@" + self.master_IP + " \"" + "echo 'this is a test file'" + " > /usr/local/flume/tmp/tempfile.log" + "\"", stderr=FNULL, shell=True) # wait for flume to write data in hdfs time.sleep(20) # check hdfs for Flume Data # response = subprocess.check_output("ssh " + self.user + "@" + self.master_IP + " \"" + # "/usr/local/hadoop/bin/hdfs dfs -test -e /user/hduser/flume/FlumeData.*" + # "\"", stderr=FNULL, shell=True) command = ' dfs -test -e /user/hduser/flume/FlumeData.*' exist_check_status = ssh_call_hadoop( self.user, self.master_IP, command, hadoop_path='/usr/local/hadoop/bin/hdfs') self.assertEqual(exist_check_status, 0) # Output exists in hdfs #clear flume hdfs folder self.addCleanup(self.clear_flume_hdfs_folder) # clear flume local temp folder self.addCleanup(self.clear_flume_tmp_folder)
def test_flume(self): """ Checks Flume's operation. Create a temp file and check that it is uploaded successfully to hdfs """ #clear flume data directories self.clear_flume_hdfs_folder() self.clear_flume_tmp_folder() #create the temp file in /usr/local/flume/tmp response = subprocess.call("ssh " + self.user + "@" + self.master_IP + " \"" + "echo 'this is a test file'" + " > /usr/local/flume/tmp/tempfile.log" + "\"", stderr=FNULL, shell=True) # wait for flume to write data in hdfs time.sleep(20) # check hdfs for Flume Data # response = subprocess.check_output("ssh " + self.user + "@" + self.master_IP + " \"" + # "/usr/local/hadoop/bin/hdfs dfs -test -e /user/hduser/flume/FlumeData.*" + # "\"", stderr=FNULL, shell=True) command = ' dfs -test -e /user/hduser/flume/FlumeData.*' exist_check_status = ssh_call_hadoop(self.user, self.master_IP, command, hadoop_path='/usr/local/hadoop/bin/hdfs') self.assertEqual(exist_check_status, 0) # Output exists in hdfs #clear flume hdfs folder self.addCleanup(self.clear_flume_hdfs_folder) # clear flume local temp folder self.addCleanup(self.clear_flume_tmp_folder)
def test_run_wordcount_from_pithos(self): """ Functional test to upload a test file in Pithos and run a wordcount streaming the file from Pithos. """ subprocess.call('echo "this is a test file to run a streaming wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) ssh_call_hadoop('hduser', self.master_IP, wordcount_command + 'pithos://pithos/{0} {1}'. format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR), hadoop_path=hadoop_path_wordcount) exist_check_status = ssh_call_hadoop('hduser', self.master_IP, ' dfs -test -e {0}/_SUCCESS'.format(PITHOS_WORDCOUNT_DIR)) self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r") self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE)
def test_compare_mapreduce_wordcount_pithos_hdfs(self): """ Run two MapReduce wordcounts one from Pithos and one native from HDFS and compare the length of the output files. """ subprocess.call( 'echo "this is a test file to run a wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True, ) subprocess.call("kamaki file upload {0}".format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) ssh_call_hadoop( self.user, self.master_IP, "kamaki file download {0} /tmp/{0}".format(SOURCE_PITHOS_TO_HDFS_FILE), hadoop_path="", ) ssh_call_hadoop( self.user, self.master_IP, " dfs -put /tmp/{0}".format(SOURCE_PITHOS_TO_HDFS_FILE), hadoop_path=self.hdfs_path, ) ssh_call_hadoop( self.user, self.master_IP, self.wordcount_command + "pithos://pithos/{0} {1}".format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR), hadoop_path=self.hadoop_path, ) ssh_call_hadoop( self.user, self.master_IP, self.wordcount_command + "{0} {1}".format(SOURCE_PITHOS_TO_HDFS_FILE, HDFS_WORDCOUNT_DIR), hadoop_path=self.hadoop_path, ) bytes_pithos_written = ssh_check_output_hadoop( self.user, self.master_IP, " dfs -dus {0}".format(PITHOS_WORDCOUNT_DIR), hadoop_path=self.hdfs_path ) bytes_hdfs_written = ssh_check_output_hadoop( self.user, self.master_IP, " dfs -dus {0}".format(HDFS_WORDCOUNT_DIR), hadoop_path=self.hdfs_path ) self.assertEqual( bytes_pithos_written[0].replace(PITHOS_WORDCOUNT_DIR, ""), bytes_hdfs_written[0].replace(HDFS_WORDCOUNT_DIR, ""), ) self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r") self.addCleanup(self.delete_hdfs_files, HDFS_WORDCOUNT_DIR, prefix="-r") self.addCleanup(self.delete_hdfs_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.hadoop_local_fs_action, "rm /tmp/{0}".format(SOURCE_PITHOS_TO_HDFS_FILE))
def test_hive_count_rows_in_table_not_exist(self): """ Functional test for Ecosystem Hive count rows in a table that does not exist """ hive_command = "hive -e 'select count(*) from table_not_exist';" exist_check_status = ssh_call_hadoop(self.user, self.master_IP, hive_command, hadoop_path='/usr/local/hive/bin/') self.assertEqual(exist_check_status, 17) # ERROR table not found
def test_run_wordcount_from_pithos(self): """ Functional test to upload a test file in Pithos and run a wordcount streaming the file from Pithos. """ subprocess.call('echo "this is a test file to run a streaming wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) ssh_call_hadoop(self.user, self.master_IP, self.wordcount_command + 'pithos://pithos/{0} {1}'. format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR), hadoop_path=self.hadoop_path) exist_check_status = ssh_call_hadoop(self.user, self.master_IP, ' dfs -test -e {0}/_SUCCESS'.format(PITHOS_WORDCOUNT_DIR), hadoop_path=self.hdfs_path) self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r") self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE)
def test_put_from_pithos(self): """ functional test to put file from Pithos to Hdfs and check that file now exists in Hdfs and is not zero size. """ subprocess.call('echo "this is a test file for pithos to hdfs orka-cli put" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) self.opts.update({'destination': DEST_PITHOS_TO_HDFS_FILE}) HadoopCluster(self.opts).put_from_pithos(self.active_cluster, SOURCE_PITHOS_TO_HDFS_FILE) exist_check_status = ssh_call_hadoop('hduser', self.master_IP, ' dfs -test -e {0}'.format(self.opts['destination'])) zero_check_status = ssh_call_hadoop('hduser', self.master_IP, ' dfs -test -z {0}'.format(self.opts['destination'])) self.assertEqual(exist_check_status, 0) and self.assertEqual(zero_check_status, 1) self.addCleanup(self.delete_hdfs_files, self.opts['destination']) self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE)
def test_put_from_local(self): """ functional test to put file from local to hdfs and check that file now exists in hdfs and is not zero size. """ subprocess.call('echo "this is a unit test file for local to hdfs orka-cli put." > {0}'.format(SOURCE_LOCAL_TO_HDFS_FILE), stderr=FNULL, shell=True) self.opts.update({'source': SOURCE_LOCAL_TO_HDFS_FILE, 'destination': [DEST_LOCAL_TO_HDFS_FILE], 'fileput': True}) HadoopCluster(self.opts).file_action() exist_check_status = ssh_call_hadoop(self.user, self.master_IP, ' dfs -test -e {0}'.format(DEST_LOCAL_TO_HDFS_FILE), hadoop_path=self.hdfs_path) zero_check_status = ssh_call_hadoop(self.user, self.master_IP, ' dfs -test -z {0}'.format(DEST_LOCAL_TO_HDFS_FILE), hadoop_path=self.hdfs_path) self.assertEqual(exist_check_status, 0) and self.assertEqual(zero_check_status, 1) self.addCleanup(self.delete_hdfs_files, DEST_LOCAL_TO_HDFS_FILE) self.addCleanup(self.delete_local_files, self.opts['source'])
def test_hive_count_rows_in_table_exists(self): """ Functional test for Ecosystem Hive creates a table (if not exists) and counts rows in this table """ # create a table hive_command = "hive -e 'CREATE TABLE IF NOT EXISTS hive_table ( age int, name String );'" ssh_call_hadoop(self.user, self.master_IP, hive_command, hadoop_path='/usr/local/hive/bin/') # count rows hive_command_count = "hive -e 'select count(*) from hive_table';" exist_check_status = ssh_call_hadoop(self.user, self.master_IP, hive_command_count, hadoop_path='/usr/local/hive/bin/') self.assertEqual(exist_check_status, 0) # OK # Remove test table hive_command = "hive -e 'DROP TABLE hive_table;'" ssh_call_hadoop(self.user, self.master_IP, hive_command, hadoop_path='/usr/local/hive/bin/')
def test_put_from_pithos(self): """ functional test to put file from Pithos to Hdfs and check that file now exists in Hdfs and is not zero size. """ subprocess.call('echo "this is a test file for pithos to hdfs orka-cli put" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) self.opts.update({'destination': DEST_PITHOS_TO_HDFS_FILE}) HadoopCluster(self.opts).put_from_pithos(self.active_cluster, SOURCE_PITHOS_TO_HDFS_FILE) exist_check_status = ssh_call_hadoop(self.user, self.master_IP, ' dfs -test -e {0}'.format(self.opts['destination']), hadoop_path=self.hdfs_path) zero_check_status = ssh_call_hadoop(self.user, self.master_IP, ' dfs -test -z {0}'.format(self.opts['destination']), hadoop_path=self.hdfs_path) self.assertEqual(exist_check_status, 0) and self.assertEqual(zero_check_status, 1) self.addCleanup(self.delete_hdfs_files, self.opts['destination']) self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE)
def test_hive_count_rows_in_table_not_exist(self): """ Functional test for Ecosystem Hive count rows in a table that does not exist """ hive_command = "hive -e 'select count(*) from table_not_exist';" exist_check_status = ssh_call_hadoop( self.user, self.master_IP, hive_command, hadoop_path='/usr/local/hive/bin/') self.assertEqual(exist_check_status, 17) # ERROR table not found
def test_hbase_table_exists(self): """ Functional test for Ecosystem HBase create a table and then check if the table exists """ baseurl = "http://" + self.master_IP + ":16010" tablename = "testtable" # Create shell script so as to create the table self.hadoop_local_fs_action("echo " + "create \\'testtable\\', \\'cf\\'" + " > {0} && echo exit >> {0}".format(HBASE_SCRIPT_PATH)) hbase_command = "hbase shell " + HBASE_SCRIPT_PATH ssh_call_hadoop(self.user, self.master_IP, hbase_command, hadoop_path='/usr/local/hbase/bin/') # Check if table exists request = requests.get(baseurl + "/table.jsp?name=" + tablename) self.assertEqual(request.status_code, 200) # OK # Remove test data self.hadoop_local_fs_action("echo disable \\'testtable\\' > {0} && echo drop \\'testtable\\' >> {0} && echo exit >> {0}".format(HBASE_SCRIPT_PATH)) ssh_call_hadoop(self.user, self.master_IP, hbase_command, hadoop_path='/usr/local/hbase/bin/') self.addCleanup(self.hadoop_local_fs_action, 'rm ' + HBASE_SCRIPT_PATH)
def test_pig_script(self): """ Test pig through a pig script """ self.put_file_to_hdfs('/tmp/{0}'.format('test_file_pig.txt')) pig_command = "export JAVA_HOME=/usr/lib/jvm/java-8-oracle; export HADOOP_HOME=/usr/local/hadoop; /usr/local/pig/bin/pig -e \\{0}\" ".format( "\"data = LOAD {0} as (text:CHARARRAY);upper_case = FOREACH data GENERATE org.apache.pig.piggybank.evaluation.string.UPPER(text);STORE upper_case INTO {1};\\" .format("'/user/hduser/test_file_pig.txt'", "'/user/hduser/pig_test'")) ssh_call_hadoop(self.user, self.master_IP, pig_command, hadoop_path='') exist_check_status = ssh_call_hadoop( self.user, self.master_IP, " dfs -test -e {0}".format('/user/hduser/pig_test/_SUCCESS'), hadoop_path=self.hdfs_path) self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, '/user/hduser/pig_test', prefix="-r") self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/{0}'.format('test_file_pig.txt')) self.addCleanup(self.hadoop_local_fs_action, 'rm /user/hduser/{0}'.format('test_file_pig.txt'))
def test_spark_pi_wordcount(self): """ Functional test to check if Spark is working correctly in a Ecosystem cluster by running a Spark Pi and a Spark WordCount. """ self.put_file_to_hdfs('/tmp/{0}'.format(SOURCE_HDFS_TO_PITHOS_FILE)) spark_job = 'export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop; /usr/local/spark/bin/spark-submit --class org.apache.spark.examples.' for job_properties in [('SparkPi', 10), ('JavaWordCount', SOURCE_HDFS_TO_PITHOS_FILE)]: test_job = spark_job + '{0} --deploy-mode cluster --master yarn-cluster {1} {2}'.format(job_properties[0], SPARK_ECOSYSTEM_EXAMPLES, job_properties[1]) exist_check_status = ssh_call_hadoop(self.user, self.master_IP, test_job, hadoop_path='') self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, SOURCE_HDFS_TO_PITHOS_FILE) self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/{0}'.format(SOURCE_HDFS_TO_PITHOS_FILE))
def test_spark_pi_wordcount(self): """ Run a Spark Pi and a Spark WordCount. """ self.put_file_to_hdfs("/tmp/{0}".format(SOURCE_HDFS_TO_PITHOS_FILE)) spark_job = "sudo -u hdfs spark-submit --class org.apache.spark.examples." for job_properties in [("SparkPi", 10), ("JavaWordCount", SOURCE_HDFS_TO_PITHOS_FILE)]: test_job = spark_job + "{0} --deploy-mode cluster --master yarn-cluster {1} {2}".format( job_properties[0], SPARK_EXAMPLES, job_properties[1] ) exist_check_status = ssh_call_hadoop(self.user, self.master_IP, test_job, hadoop_path="") self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, SOURCE_HDFS_TO_PITHOS_FILE) self.addCleanup(self.hadoop_local_fs_action, "rm /tmp/{0}".format(SOURCE_HDFS_TO_PITHOS_FILE))
def test_spark_pi_wordcount(self): """ Run a Spark Pi and a Spark WordCount. """ self.put_file_to_hdfs('/tmp/{0}'.format(SOURCE_HDFS_TO_PITHOS_FILE)) spark_job = 'sudo -u hdfs spark-submit --class org.apache.spark.examples.' for job_properties in [('SparkPi', 10), ('JavaWordCount', SOURCE_HDFS_TO_PITHOS_FILE)]: test_job = spark_job + '{0} --deploy-mode cluster --master yarn-cluster {1} {2}'.format( job_properties[0], SPARK_EXAMPLES, job_properties[1]) exist_check_status = ssh_call_hadoop(self.user, self.master_IP, test_job, hadoop_path='') self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, SOURCE_HDFS_TO_PITHOS_FILE) self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/{0}'.format(SOURCE_HDFS_TO_PITHOS_FILE))
def test_spark_pi_wordcount(self): """ Functional test to check if Spark is working correctly in a Ecosystem cluster by running a Spark Pi and a Spark WordCount. """ self.put_file_to_hdfs('/tmp/{0}'.format(SOURCE_HDFS_TO_PITHOS_FILE)) spark_job = 'export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop; /usr/local/spark/bin/spark-submit --class org.apache.spark.examples.' for job_properties in [('SparkPi', 10), ('JavaWordCount', SOURCE_HDFS_TO_PITHOS_FILE)]: test_job = spark_job + '{0} --deploy-mode cluster --master yarn-cluster {1} {2}'.format( job_properties[0], SPARK_ECOSYSTEM_EXAMPLES, job_properties[1]) exist_check_status = ssh_call_hadoop(self.user, self.master_IP, test_job, hadoop_path='') self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, SOURCE_HDFS_TO_PITHOS_FILE) self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/{0}'.format(SOURCE_HDFS_TO_PITHOS_FILE))
def test_compare_wordcount_pithos_hdfs(self): """ Functional test to upload a test file in Pithos and run two wordcounts, one from Pithos and one native from HDFS and compare the length of the output files. """ subprocess.call('echo "this is a test file to run a wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) ssh_call_hadoop(self.user, self.master_IP, 'kamaki file download {0} /tmp/{0}'. format(SOURCE_PITHOS_TO_HDFS_FILE), hadoop_path='') ssh_call_hadoop(self.user, self.master_IP, ' dfs -put /tmp/{0}'. format(SOURCE_PITHOS_TO_HDFS_FILE),hadoop_path=self.hdfs_path) ssh_call_hadoop(self.user, self.master_IP, self.wordcount_command + 'pithos://pithos/{0} {1}'. format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR), hadoop_path=self.hadoop_path) ssh_call_hadoop(self.user, self.master_IP, self.wordcount_command + '{0} {1}'. format(SOURCE_PITHOS_TO_HDFS_FILE, HDFS_WORDCOUNT_DIR), hadoop_path=self.hadoop_path) bytes_pithos_written = ssh_check_output_hadoop(self.user, self.master_IP, ' dfs -dus {0}'.format(PITHOS_WORDCOUNT_DIR), hadoop_path=self.hdfs_path) bytes_hdfs_written = ssh_check_output_hadoop(self.user, self.master_IP, ' dfs -dus {0}'.format(HDFS_WORDCOUNT_DIR), hadoop_path=self.hdfs_path) self.assertEqual(bytes_pithos_written[0].replace(PITHOS_WORDCOUNT_DIR, ""), bytes_hdfs_written[0].replace(HDFS_WORDCOUNT_DIR, "")) self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r") self.addCleanup(self.delete_hdfs_files, HDFS_WORDCOUNT_DIR, prefix="-r") self.addCleanup(self.delete_hdfs_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/{0}'.format(SOURCE_PITHOS_TO_HDFS_FILE))
def delete_hdfs_files(self, file_to_delete, prefix=""): """ Helper method to delete files transfered to hdfs filesystem after test. """ ssh_call_hadoop('hduser', self.master_IP, ' dfs -rm {0} {1}'.format(prefix, file_to_delete))