def test_compare_wordcount_pithos_hdfs(self):
        """
        Functional test to upload a test file in Pithos and run two wordcounts, one from Pithos and one native from HDFS
        and compare the length of the output files.
        """
        subprocess.call('echo "this is a test file to run a wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE),
                        stderr=FNULL, shell=True)
        subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True)

        ssh_call_hadoop('hduser', self.master_IP, 'kamaki file download {0}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE), hadoop_path='')
        ssh_call_hadoop('hduser', self.master_IP, ' dfs -put {0}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE))

        ssh_call_hadoop('hduser', self.master_IP, wordcount_command + 'pithos://pithos/{0} {1}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR),
                                             hadoop_path=hadoop_path_wordcount)
        ssh_call_hadoop('hduser', self.master_IP, wordcount_command + '{0} {1}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE, HDFS_WORDCOUNT_DIR),
                                             hadoop_path=hadoop_path_wordcount)

        bytes_pithos_written = ssh_check_output_hadoop('hduser', self.master_IP,
                                             ' dfs -dus {0}'.format(PITHOS_WORDCOUNT_DIR))
        bytes_hdfs_written = ssh_check_output_hadoop('hduser', self.master_IP,
                                             ' dfs -dus {0}'.format(HDFS_WORDCOUNT_DIR))

        self.assertEqual(bytes_pithos_written[0].replace(PITHOS_WORDCOUNT_DIR, ""),
                         bytes_hdfs_written[0].replace(HDFS_WORDCOUNT_DIR, ""))
        self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r")
        self.addCleanup(self.delete_hdfs_files, HDFS_WORDCOUNT_DIR, prefix="-r")
        self.addCleanup(self.delete_hdfs_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.hadoop_local_fs_action, 'rm {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE))
    def test_compare_mapreduce_wordcount_pithos_hdfs(self):
        """
        Run two MapReduce wordcounts one from Pithos and one native from HDFS and compare the
        length of the output files.
        """
        subprocess.call(
            'echo "this is a test file to run a wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE),
            stderr=FNULL,
            shell=True,
        )
        subprocess.call("kamaki file upload {0}".format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True)

        ssh_call_hadoop(
            self.user,
            self.master_IP,
            "kamaki file download {0} /tmp/{0}".format(SOURCE_PITHOS_TO_HDFS_FILE),
            hadoop_path="",
        )
        ssh_call_hadoop(
            self.user,
            self.master_IP,
            " dfs -put /tmp/{0}".format(SOURCE_PITHOS_TO_HDFS_FILE),
            hadoop_path=self.hdfs_path,
        )

        ssh_call_hadoop(
            self.user,
            self.master_IP,
            self.wordcount_command + "pithos://pithos/{0} {1}".format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR),
            hadoop_path=self.hadoop_path,
        )
        ssh_call_hadoop(
            self.user,
            self.master_IP,
            self.wordcount_command + "{0} {1}".format(SOURCE_PITHOS_TO_HDFS_FILE, HDFS_WORDCOUNT_DIR),
            hadoop_path=self.hadoop_path,
        )

        bytes_pithos_written = ssh_check_output_hadoop(
            self.user, self.master_IP, " dfs -dus {0}".format(PITHOS_WORDCOUNT_DIR), hadoop_path=self.hdfs_path
        )
        bytes_hdfs_written = ssh_check_output_hadoop(
            self.user, self.master_IP, " dfs -dus {0}".format(HDFS_WORDCOUNT_DIR), hadoop_path=self.hdfs_path
        )

        self.assertEqual(
            bytes_pithos_written[0].replace(PITHOS_WORDCOUNT_DIR, ""),
            bytes_hdfs_written[0].replace(HDFS_WORDCOUNT_DIR, ""),
        )
        self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r")
        self.addCleanup(self.delete_hdfs_files, HDFS_WORDCOUNT_DIR, prefix="-r")
        self.addCleanup(self.delete_hdfs_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.hadoop_local_fs_action, "rm /tmp/{0}".format(SOURCE_PITHOS_TO_HDFS_FILE))
    def test_oozie(self):
        """
        Test oozie for Ecosystem cluster
        """
        master_vm_hostname = ssh_check_output_hadoop(self.user, self.master_IP, 'cat /etc/hostname', hadoop_path='')[0]
        read_workflow = open("workflow_ecosystem.xml", "r").read()
        workflow_file = open("workflow_ecosystem.xml", "w")
        workflow_file.write( re.sub("hostname", master_vm_hostname, read_workflow) )
        workflow_file.close()
        ssh_call_hadoop(self.user, self.master_IP, 'dfs -mkdir oozie_app', hadoop_path=self.hdfs_path)
        ssh_stream_to_hadoop(self.user, self.master_IP, join(dirname(abspath(__file__)), "workflow_ecosystem.xml"),
                             self.VALID_DEST_DIR + "/oozie_app/workflow.xml", hadoop_path=self.hdfs_path)
        job_properties = JOB_PROPERTIES_ECOSYSTEM_TEMPLATE.format(master_vm_hostname)

        create_job_properties_file = 'echo -e "{0}" > job.properties'.format(job_properties)
        subprocess.call(create_job_properties_file, stderr=FNULL, shell=True)
        subprocess.call( "scp {0} {1}@{2}:/tmp/".format(JOB_PROPERTIES_PATH, self.user, self.master_IP),
                         stderr=FNULL, shell=True)
        ssh_call_hadoop(self.user, self.master_IP, self.oozie_command, hadoop_path='')
        exist_check_status = ssh_call_hadoop(self.user, self.master_IP,
                                             ' dfs -test -e {0}/{1}'.format(OOZIE_TEST_FOLDER, "oozie_test_folder"),
                                             hadoop_path=self.hdfs_path)
        self.assertEqual(exist_check_status, 0)
        self.addCleanup(self.delete_hdfs_files, OOZIE_TEST_FOLDER, prefix="-r")
        self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/job.properties')
        self.addCleanup(self.delete_local_files, JOB_PROPERTIES_PATH)
        workflow_file = open("workflow_ecosystem.xml", "w")
        workflow_file.write( re.sub(master_vm_hostname, "hostname", read_workflow) )
        workflow_file.close()
    def test_oozie(self):
        """
        Test oozie for Ecosystem cluster
        """
        master_vm_hostname = ssh_check_output_hadoop(self.user,
                                                     self.master_IP,
                                                     'cat /etc/hostname',
                                                     hadoop_path='')[0]
        read_workflow = open("workflow_ecosystem.xml", "r").read()
        workflow_file = open("workflow_ecosystem.xml", "w")
        workflow_file.write(
            re.sub("hostname", master_vm_hostname, read_workflow))
        workflow_file.close()
        ssh_call_hadoop(self.user,
                        self.master_IP,
                        'dfs -mkdir oozie_app',
                        hadoop_path=self.hdfs_path)
        ssh_stream_to_hadoop(self.user,
                             self.master_IP,
                             join(dirname(abspath(__file__)),
                                  "workflow_ecosystem.xml"),
                             self.VALID_DEST_DIR + "/oozie_app/workflow.xml",
                             hadoop_path=self.hdfs_path)
        job_properties = JOB_PROPERTIES_ECOSYSTEM_TEMPLATE.format(
            master_vm_hostname)

        create_job_properties_file = 'echo -e "{0}" > job.properties'.format(
            job_properties)
        subprocess.call(create_job_properties_file, stderr=FNULL, shell=True)
        subprocess.call("scp {0} {1}@{2}:/tmp/".format(JOB_PROPERTIES_PATH,
                                                       self.user,
                                                       self.master_IP),
                        stderr=FNULL,
                        shell=True)
        ssh_call_hadoop(self.user,
                        self.master_IP,
                        self.oozie_command,
                        hadoop_path='')
        exist_check_status = ssh_call_hadoop(self.user,
                                             self.master_IP,
                                             ' dfs -test -e {0}/{1}'.format(
                                                 OOZIE_TEST_FOLDER,
                                                 "oozie_test_folder"),
                                             hadoop_path=self.hdfs_path)
        self.assertEqual(exist_check_status, 0)
        self.addCleanup(self.delete_hdfs_files, OOZIE_TEST_FOLDER, prefix="-r")
        self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/job.properties')
        self.addCleanup(self.delete_local_files, JOB_PROPERTIES_PATH)
        workflow_file = open("workflow_ecosystem.xml", "w")
        workflow_file.write(
            re.sub(master_vm_hostname, "hostname", read_workflow))
        workflow_file.close()
    def test_compare_wordcount_pithos_hdfs(self):
        """
        Functional test to upload a test file in Pithos and run two wordcounts, one from Pithos and one native from HDFS
        and compare the length of the output files.
        """
        subprocess.call('echo "this is a test file to run a wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE),
                        stderr=FNULL, shell=True)
        subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True)

        ssh_call_hadoop(self.user, self.master_IP, 'kamaki file download {0} /tmp/{0}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE), hadoop_path='')
        ssh_call_hadoop(self.user, self.master_IP, ' dfs -put /tmp/{0}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE),hadoop_path=self.hdfs_path)

        ssh_call_hadoop(self.user, self.master_IP, self.wordcount_command + 'pithos://pithos/{0} {1}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR),
                                             hadoop_path=self.hadoop_path)
        ssh_call_hadoop(self.user, self.master_IP, self.wordcount_command + '{0} {1}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE, HDFS_WORDCOUNT_DIR),
                                             hadoop_path=self.hadoop_path)

        bytes_pithos_written = ssh_check_output_hadoop(self.user, self.master_IP,
                                             ' dfs -dus {0}'.format(PITHOS_WORDCOUNT_DIR),
                                             hadoop_path=self.hdfs_path)
        bytes_hdfs_written = ssh_check_output_hadoop(self.user, self.master_IP,
                                             ' dfs -dus {0}'.format(HDFS_WORDCOUNT_DIR),
                                             hadoop_path=self.hdfs_path)

        self.assertEqual(bytes_pithos_written[0].replace(PITHOS_WORDCOUNT_DIR, ""),
                         bytes_hdfs_written[0].replace(HDFS_WORDCOUNT_DIR, ""))
        self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r")
        self.addCleanup(self.delete_hdfs_files, HDFS_WORDCOUNT_DIR, prefix="-r")
        self.addCleanup(self.delete_hdfs_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/{0}'.format(SOURCE_PITHOS_TO_HDFS_FILE))