def test_compare_wordcount_pithos_hdfs(self): """ Functional test to upload a test file in Pithos and run two wordcounts, one from Pithos and one native from HDFS and compare the length of the output files. """ subprocess.call('echo "this is a test file to run a wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) ssh_call_hadoop('hduser', self.master_IP, 'kamaki file download {0}'. format(SOURCE_PITHOS_TO_HDFS_FILE), hadoop_path='') ssh_call_hadoop('hduser', self.master_IP, ' dfs -put {0}'. format(SOURCE_PITHOS_TO_HDFS_FILE)) ssh_call_hadoop('hduser', self.master_IP, wordcount_command + 'pithos://pithos/{0} {1}'. format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR), hadoop_path=hadoop_path_wordcount) ssh_call_hadoop('hduser', self.master_IP, wordcount_command + '{0} {1}'. format(SOURCE_PITHOS_TO_HDFS_FILE, HDFS_WORDCOUNT_DIR), hadoop_path=hadoop_path_wordcount) bytes_pithos_written = ssh_check_output_hadoop('hduser', self.master_IP, ' dfs -dus {0}'.format(PITHOS_WORDCOUNT_DIR)) bytes_hdfs_written = ssh_check_output_hadoop('hduser', self.master_IP, ' dfs -dus {0}'.format(HDFS_WORDCOUNT_DIR)) self.assertEqual(bytes_pithos_written[0].replace(PITHOS_WORDCOUNT_DIR, ""), bytes_hdfs_written[0].replace(HDFS_WORDCOUNT_DIR, "")) self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r") self.addCleanup(self.delete_hdfs_files, HDFS_WORDCOUNT_DIR, prefix="-r") self.addCleanup(self.delete_hdfs_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.hadoop_local_fs_action, 'rm {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE))
def test_compare_mapreduce_wordcount_pithos_hdfs(self): """ Run two MapReduce wordcounts one from Pithos and one native from HDFS and compare the length of the output files. """ subprocess.call( 'echo "this is a test file to run a wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True, ) subprocess.call("kamaki file upload {0}".format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) ssh_call_hadoop( self.user, self.master_IP, "kamaki file download {0} /tmp/{0}".format(SOURCE_PITHOS_TO_HDFS_FILE), hadoop_path="", ) ssh_call_hadoop( self.user, self.master_IP, " dfs -put /tmp/{0}".format(SOURCE_PITHOS_TO_HDFS_FILE), hadoop_path=self.hdfs_path, ) ssh_call_hadoop( self.user, self.master_IP, self.wordcount_command + "pithos://pithos/{0} {1}".format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR), hadoop_path=self.hadoop_path, ) ssh_call_hadoop( self.user, self.master_IP, self.wordcount_command + "{0} {1}".format(SOURCE_PITHOS_TO_HDFS_FILE, HDFS_WORDCOUNT_DIR), hadoop_path=self.hadoop_path, ) bytes_pithos_written = ssh_check_output_hadoop( self.user, self.master_IP, " dfs -dus {0}".format(PITHOS_WORDCOUNT_DIR), hadoop_path=self.hdfs_path ) bytes_hdfs_written = ssh_check_output_hadoop( self.user, self.master_IP, " dfs -dus {0}".format(HDFS_WORDCOUNT_DIR), hadoop_path=self.hdfs_path ) self.assertEqual( bytes_pithos_written[0].replace(PITHOS_WORDCOUNT_DIR, ""), bytes_hdfs_written[0].replace(HDFS_WORDCOUNT_DIR, ""), ) self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r") self.addCleanup(self.delete_hdfs_files, HDFS_WORDCOUNT_DIR, prefix="-r") self.addCleanup(self.delete_hdfs_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.hadoop_local_fs_action, "rm /tmp/{0}".format(SOURCE_PITHOS_TO_HDFS_FILE))
def test_oozie(self): """ Test oozie for Ecosystem cluster """ master_vm_hostname = ssh_check_output_hadoop(self.user, self.master_IP, 'cat /etc/hostname', hadoop_path='')[0] read_workflow = open("workflow_ecosystem.xml", "r").read() workflow_file = open("workflow_ecosystem.xml", "w") workflow_file.write( re.sub("hostname", master_vm_hostname, read_workflow) ) workflow_file.close() ssh_call_hadoop(self.user, self.master_IP, 'dfs -mkdir oozie_app', hadoop_path=self.hdfs_path) ssh_stream_to_hadoop(self.user, self.master_IP, join(dirname(abspath(__file__)), "workflow_ecosystem.xml"), self.VALID_DEST_DIR + "/oozie_app/workflow.xml", hadoop_path=self.hdfs_path) job_properties = JOB_PROPERTIES_ECOSYSTEM_TEMPLATE.format(master_vm_hostname) create_job_properties_file = 'echo -e "{0}" > job.properties'.format(job_properties) subprocess.call(create_job_properties_file, stderr=FNULL, shell=True) subprocess.call( "scp {0} {1}@{2}:/tmp/".format(JOB_PROPERTIES_PATH, self.user, self.master_IP), stderr=FNULL, shell=True) ssh_call_hadoop(self.user, self.master_IP, self.oozie_command, hadoop_path='') exist_check_status = ssh_call_hadoop(self.user, self.master_IP, ' dfs -test -e {0}/{1}'.format(OOZIE_TEST_FOLDER, "oozie_test_folder"), hadoop_path=self.hdfs_path) self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, OOZIE_TEST_FOLDER, prefix="-r") self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/job.properties') self.addCleanup(self.delete_local_files, JOB_PROPERTIES_PATH) workflow_file = open("workflow_ecosystem.xml", "w") workflow_file.write( re.sub(master_vm_hostname, "hostname", read_workflow) ) workflow_file.close()
def test_oozie(self): """ Test oozie for Ecosystem cluster """ master_vm_hostname = ssh_check_output_hadoop(self.user, self.master_IP, 'cat /etc/hostname', hadoop_path='')[0] read_workflow = open("workflow_ecosystem.xml", "r").read() workflow_file = open("workflow_ecosystem.xml", "w") workflow_file.write( re.sub("hostname", master_vm_hostname, read_workflow)) workflow_file.close() ssh_call_hadoop(self.user, self.master_IP, 'dfs -mkdir oozie_app', hadoop_path=self.hdfs_path) ssh_stream_to_hadoop(self.user, self.master_IP, join(dirname(abspath(__file__)), "workflow_ecosystem.xml"), self.VALID_DEST_DIR + "/oozie_app/workflow.xml", hadoop_path=self.hdfs_path) job_properties = JOB_PROPERTIES_ECOSYSTEM_TEMPLATE.format( master_vm_hostname) create_job_properties_file = 'echo -e "{0}" > job.properties'.format( job_properties) subprocess.call(create_job_properties_file, stderr=FNULL, shell=True) subprocess.call("scp {0} {1}@{2}:/tmp/".format(JOB_PROPERTIES_PATH, self.user, self.master_IP), stderr=FNULL, shell=True) ssh_call_hadoop(self.user, self.master_IP, self.oozie_command, hadoop_path='') exist_check_status = ssh_call_hadoop(self.user, self.master_IP, ' dfs -test -e {0}/{1}'.format( OOZIE_TEST_FOLDER, "oozie_test_folder"), hadoop_path=self.hdfs_path) self.assertEqual(exist_check_status, 0) self.addCleanup(self.delete_hdfs_files, OOZIE_TEST_FOLDER, prefix="-r") self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/job.properties') self.addCleanup(self.delete_local_files, JOB_PROPERTIES_PATH) workflow_file = open("workflow_ecosystem.xml", "w") workflow_file.write( re.sub(master_vm_hostname, "hostname", read_workflow)) workflow_file.close()
def test_compare_wordcount_pithos_hdfs(self): """ Functional test to upload a test file in Pithos and run two wordcounts, one from Pithos and one native from HDFS and compare the length of the output files. """ subprocess.call('echo "this is a test file to run a wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True) ssh_call_hadoop(self.user, self.master_IP, 'kamaki file download {0} /tmp/{0}'. format(SOURCE_PITHOS_TO_HDFS_FILE), hadoop_path='') ssh_call_hadoop(self.user, self.master_IP, ' dfs -put /tmp/{0}'. format(SOURCE_PITHOS_TO_HDFS_FILE),hadoop_path=self.hdfs_path) ssh_call_hadoop(self.user, self.master_IP, self.wordcount_command + 'pithos://pithos/{0} {1}'. format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR), hadoop_path=self.hadoop_path) ssh_call_hadoop(self.user, self.master_IP, self.wordcount_command + '{0} {1}'. format(SOURCE_PITHOS_TO_HDFS_FILE, HDFS_WORDCOUNT_DIR), hadoop_path=self.hadoop_path) bytes_pithos_written = ssh_check_output_hadoop(self.user, self.master_IP, ' dfs -dus {0}'.format(PITHOS_WORDCOUNT_DIR), hadoop_path=self.hdfs_path) bytes_hdfs_written = ssh_check_output_hadoop(self.user, self.master_IP, ' dfs -dus {0}'.format(HDFS_WORDCOUNT_DIR), hadoop_path=self.hdfs_path) self.assertEqual(bytes_pithos_written[0].replace(PITHOS_WORDCOUNT_DIR, ""), bytes_hdfs_written[0].replace(HDFS_WORDCOUNT_DIR, "")) self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r") self.addCleanup(self.delete_hdfs_files, HDFS_WORDCOUNT_DIR, prefix="-r") self.addCleanup(self.delete_hdfs_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE) self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/{0}'.format(SOURCE_PITHOS_TO_HDFS_FILE))