def test_oozie(self):
        """
        Test oozie for Ecosystem cluster
        """
        master_vm_hostname = ssh_check_output_hadoop(self.user, self.master_IP, 'cat /etc/hostname', hadoop_path='')[0]
        read_workflow = open("workflow_ecosystem.xml", "r").read()
        workflow_file = open("workflow_ecosystem.xml", "w")
        workflow_file.write( re.sub("hostname", master_vm_hostname, read_workflow) )
        workflow_file.close()
        ssh_call_hadoop(self.user, self.master_IP, 'dfs -mkdir oozie_app', hadoop_path=self.hdfs_path)
        ssh_stream_to_hadoop(self.user, self.master_IP, join(dirname(abspath(__file__)), "workflow_ecosystem.xml"),
                             self.VALID_DEST_DIR + "/oozie_app/workflow.xml", hadoop_path=self.hdfs_path)
        job_properties = JOB_PROPERTIES_ECOSYSTEM_TEMPLATE.format(master_vm_hostname)

        create_job_properties_file = 'echo -e "{0}" > job.properties'.format(job_properties)
        subprocess.call(create_job_properties_file, stderr=FNULL, shell=True)
        subprocess.call( "scp {0} {1}@{2}:/tmp/".format(JOB_PROPERTIES_PATH, self.user, self.master_IP),
                         stderr=FNULL, shell=True)
        ssh_call_hadoop(self.user, self.master_IP, self.oozie_command, hadoop_path='')
        exist_check_status = ssh_call_hadoop(self.user, self.master_IP,
                                             ' dfs -test -e {0}/{1}'.format(OOZIE_TEST_FOLDER, "oozie_test_folder"),
                                             hadoop_path=self.hdfs_path)
        self.assertEqual(exist_check_status, 0)
        self.addCleanup(self.delete_hdfs_files, OOZIE_TEST_FOLDER, prefix="-r")
        self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/job.properties')
        self.addCleanup(self.delete_local_files, JOB_PROPERTIES_PATH)
        workflow_file = open("workflow_ecosystem.xml", "w")
        workflow_file.write( re.sub(master_vm_hostname, "hostname", read_workflow) )
        workflow_file.close()
    def test_hive_count_rows_in_table_exists(self):
        """
        Functional test for Ecosystem Hive
        creates a table (if not exists)
        and counts rows in this table 
        """
        # create a table
        hive_command = "hive -e 'CREATE TABLE IF NOT EXISTS hive_table ( age int, name String );'"
        ssh_call_hadoop(self.user,
                        self.master_IP,
                        hive_command,
                        hadoop_path='/usr/local/hive/bin/')

        # count rows
        hive_command_count = "hive -e 'select count(*) from hive_table';"
        exist_check_status = ssh_call_hadoop(
            self.user,
            self.master_IP,
            hive_command_count,
            hadoop_path='/usr/local/hive/bin/')

        self.assertEqual(exist_check_status, 0)  # OK

        # Remove test table
        hive_command = "hive -e 'DROP TABLE hive_table;'"
        ssh_call_hadoop(self.user,
                        self.master_IP,
                        hive_command,
                        hadoop_path='/usr/local/hive/bin/')
    def test_hbase_table_exists(self):
        """
        Functional test for Ecosystem HBase
        create a table and then
        check if the table exists
        """
        baseurl = "http://" + self.master_IP + ":16010"
        tablename = "testtable"

        # Create shell script so as to create the table
        self.hadoop_local_fs_action(
            "echo " + "create \\'testtable\\', \\'cf\\'" +
            " > {0} && echo exit >> {0}".format(HBASE_SCRIPT_PATH))
        hbase_command = "hbase shell " + HBASE_SCRIPT_PATH
        ssh_call_hadoop(self.user,
                        self.master_IP,
                        hbase_command,
                        hadoop_path='/usr/local/hbase/bin/')

        # Check if table exists
        request = requests.get(baseurl + "/table.jsp?name=" + tablename)
        self.assertEqual(request.status_code, 200)  # OK

        # Remove test data
        self.hadoop_local_fs_action(
            "echo disable \\'testtable\\' > {0} && echo drop \\'testtable\\' >> {0} && echo exit >> {0}"
            .format(HBASE_SCRIPT_PATH))
        ssh_call_hadoop(self.user,
                        self.master_IP,
                        hbase_command,
                        hadoop_path='/usr/local/hbase/bin/')
        self.addCleanup(self.hadoop_local_fs_action, 'rm ' + HBASE_SCRIPT_PATH)
 def test_put_from_local_recursive(self):
     """
     functional test to put files inside a folder from local to hdfs and check all the files now exist in hdfs and is not zero size.
     """
     list_of_files = []
     for i in range(10):
         subprocess.call('echo "this is the unit test file {0} for local to hdfs orka-cli put." > {0}{1}'.format(i, SOURCE_LOCAL_TO_HDFS_FILE),
                         stderr=FNULL, shell=True)
         list_of_files.append('{0}{1}'.format(i, SOURCE_LOCAL_TO_HDFS_FILE))
     list_of_files.append('/user/hduser')
     list_of_files.remove('0{0}'.format(SOURCE_LOCAL_TO_HDFS_FILE))
     self.opts.update({'source': '0{0}'.format(SOURCE_LOCAL_TO_HDFS_FILE), 'destination': list_of_files,
                      'fileput': True})
     HadoopCluster(self.opts).file_action()
     list_of_files.insert(0, '0{0}'.format(SOURCE_LOCAL_TO_HDFS_FILE))
     for file in list_of_files[:-1]:
         exist_check_status = ssh_call_hadoop(self.user, self.master_IP,
                                              ' dfs -test -e {0}'.format(file),
                                              hadoop_path=self.hdfs_path)
         zero_check_status = ssh_call_hadoop(self.user, self.master_IP,
                                             ' dfs -test -z {0}'.format(file),
                                             hadoop_path=self.hdfs_path)
         self.assertEqual(exist_check_status, 0) and self.assertEqual(zero_check_status, 1)
         self.addCleanup(self.delete_hdfs_files, '/user/hduser/{0}'.format(file))
         self.addCleanup(self.delete_local_files, file)
 def put_file_to_hdfs(self, file_to_create):
     """
     Helper method to create file in Hdfs before test.
     """
     self.hadoop_local_fs_action('echo "test file for hdfs" > {0}'.format(file_to_create))
     ssh_call_hadoop(self.user, self.master_IP, ' dfs -put {0}'.format(file_to_create),
                     hadoop_path=self.hdfs_path)
 def delete_hdfs_files(self, file_to_delete, prefix=""):
     """
     Helper method to delete files transfered to hdfs filesystem after test.
     """
     ssh_call_hadoop(
         self.user, self.master_IP, " dfs -rm {0} {1}".format(prefix, file_to_delete), hadoop_path=self.hdfs_path
     )
 def delete_hdfs_files(self, file_to_delete, prefix=""):
     """
     Helper method to delete files transfered to hdfs filesystem after test.
     """
     ssh_call_hadoop(self.user,
                     self.master_IP,
                     ' dfs -rm {0} {1}'.format(prefix, file_to_delete),
                     hadoop_path=self.hdfs_path)
 def test_pig(self):
     """
     Make a directory in HDFS running a pig command.
     """
     ssh_call_hadoop(self.user, self.master_IP, self.pig_command, hadoop_path="")
     exist_check_status = ssh_call_hadoop(
         self.user, self.master_IP, " dfs -test -e {0}".format(PIG_TEST_FOLDER), hadoop_path=self.hdfs_path
     )
     self.assertEqual(exist_check_status, 0)
     self.addCleanup(self.delete_hdfs_files, PIG_TEST_FOLDER, prefix="-r")
 def test_pig(self):
     """
     Test pig for hadoop ecosystem
     """
     pig_command = "export JAVA_HOME=/usr/lib/jvm/java-8-oracle; export HADOOP_HOME=/usr/local/hadoop; /usr/local/pig/bin/pig -e \"fs -mkdir /tmp/pig_test_folder\""
     ssh_call_hadoop(self.user, self.master_IP, pig_command, hadoop_path='')
     exist_check_status = ssh_call_hadoop(self.user, self.master_IP,
                                          ' dfs -test -e /tmp/{0}'.format('pig_test_folder'))
     self.assertEqual(exist_check_status, 0)
     self.addCleanup(self.delete_hdfs_files, '/tmp/pig_test_folder', prefix="-r")
    def test_oozie(self):
        """
        Test oozie for Ecosystem cluster
        """
        master_vm_hostname = ssh_check_output_hadoop(self.user,
                                                     self.master_IP,
                                                     'cat /etc/hostname',
                                                     hadoop_path='')[0]
        read_workflow = open("workflow_ecosystem.xml", "r").read()
        workflow_file = open("workflow_ecosystem.xml", "w")
        workflow_file.write(
            re.sub("hostname", master_vm_hostname, read_workflow))
        workflow_file.close()
        ssh_call_hadoop(self.user,
                        self.master_IP,
                        'dfs -mkdir oozie_app',
                        hadoop_path=self.hdfs_path)
        ssh_stream_to_hadoop(self.user,
                             self.master_IP,
                             join(dirname(abspath(__file__)),
                                  "workflow_ecosystem.xml"),
                             self.VALID_DEST_DIR + "/oozie_app/workflow.xml",
                             hadoop_path=self.hdfs_path)
        job_properties = JOB_PROPERTIES_ECOSYSTEM_TEMPLATE.format(
            master_vm_hostname)

        create_job_properties_file = 'echo -e "{0}" > job.properties'.format(
            job_properties)
        subprocess.call(create_job_properties_file, stderr=FNULL, shell=True)
        subprocess.call("scp {0} {1}@{2}:/tmp/".format(JOB_PROPERTIES_PATH,
                                                       self.user,
                                                       self.master_IP),
                        stderr=FNULL,
                        shell=True)
        ssh_call_hadoop(self.user,
                        self.master_IP,
                        self.oozie_command,
                        hadoop_path='')
        exist_check_status = ssh_call_hadoop(self.user,
                                             self.master_IP,
                                             ' dfs -test -e {0}/{1}'.format(
                                                 OOZIE_TEST_FOLDER,
                                                 "oozie_test_folder"),
                                             hadoop_path=self.hdfs_path)
        self.assertEqual(exist_check_status, 0)
        self.addCleanup(self.delete_hdfs_files, OOZIE_TEST_FOLDER, prefix="-r")
        self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/job.properties')
        self.addCleanup(self.delete_local_files, JOB_PROPERTIES_PATH)
        workflow_file = open("workflow_ecosystem.xml", "w")
        workflow_file.write(
            re.sub(master_vm_hostname, "hostname", read_workflow))
        workflow_file.close()
 def test_pig(self):
     """
     Test pig for hadoop ecosystem
     """
     pig_command = "export JAVA_HOME=/usr/lib/jvm/java-8-oracle; export HADOOP_HOME=/usr/local/hadoop; /usr/local/pig/bin/pig -e \"fs -mkdir /tmp/pig_test_folder\""
     ssh_call_hadoop(self.user, self.master_IP, pig_command, hadoop_path='')
     exist_check_status = ssh_call_hadoop(
         self.user, self.master_IP,
         ' dfs -test -e /tmp/{0}'.format('pig_test_folder'))
     self.assertEqual(exist_check_status, 0)
     self.addCleanup(self.delete_hdfs_files,
                     '/tmp/pig_test_folder',
                     prefix="-r")
 def test_pig_script(self):
     """
     Test pig through a pig script
     """
     self.put_file_to_hdfs('/tmp/{0}'.format('test_file_pig.txt'))
     pig_command="export JAVA_HOME=/usr/lib/jvm/java-8-oracle; export HADOOP_HOME=/usr/local/hadoop; /usr/local/pig/bin/pig -e \\{0}\" ".format("\"data = LOAD {0} as (text:CHARARRAY);upper_case = FOREACH data GENERATE org.apache.pig.piggybank.evaluation.string.UPPER(text);STORE upper_case INTO {1};\\".format("'/user/hduser/test_file_pig.txt'", "'/user/hduser/pig_test'"))
     ssh_call_hadoop(self.user, self.master_IP, pig_command, hadoop_path='')
     exist_check_status = ssh_call_hadoop(self.user, self.master_IP,
                                         " dfs -test -e {0}".format('/user/hduser/pig_test/_SUCCESS'), 
                                         hadoop_path=self.hdfs_path)
     self.assertEqual(exist_check_status, 0)
     self.addCleanup(self.delete_hdfs_files, '/user/hduser/pig_test', prefix="-r")
     self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/{0}'.format('test_file_pig.txt'))
     self.addCleanup(self.hadoop_local_fs_action, 'rm /user/hduser/{0}'.format('test_file_pig.txt'))
 def test_put_from_remote(self):
     """
     functional test to put file from remote server to Hdfs and check that file now exists in Hdfs and
     is not zero size.
     """
     self.opts.update({'source': SOURCE_REMOTE_TO_HDFS_FILE, 'destination': DEST_REMOTE_TO_HDFS_FILE, 'user': '',
                       'password': ''})
     HadoopCluster(self.opts).put_from_server()
     exist_check_status = ssh_call_hadoop('hduser', self.master_IP,
                                          ' dfs -test -e {0}'.format(self.opts['destination']))
     zero_check_status = ssh_call_hadoop('hduser', self.master_IP,
                                         ' dfs -test -z {0}'.format(self.opts['destination']))
     self.assertEqual(exist_check_status, 0) and self.assertEqual(zero_check_status, 1)
     self.addCleanup(self.delete_hdfs_files, self.opts['destination'])
    def test_compare_wordcount_pithos_hdfs(self):
        """
        Functional test to upload a test file in Pithos and run two wordcounts, one from Pithos and one native from HDFS
        and compare the length of the output files.
        """
        subprocess.call('echo "this is a test file to run a wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE),
                        stderr=FNULL, shell=True)
        subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True)

        ssh_call_hadoop('hduser', self.master_IP, 'kamaki file download {0}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE), hadoop_path='')
        ssh_call_hadoop('hduser', self.master_IP, ' dfs -put {0}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE))

        ssh_call_hadoop('hduser', self.master_IP, wordcount_command + 'pithos://pithos/{0} {1}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR),
                                             hadoop_path=hadoop_path_wordcount)
        ssh_call_hadoop('hduser', self.master_IP, wordcount_command + '{0} {1}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE, HDFS_WORDCOUNT_DIR),
                                             hadoop_path=hadoop_path_wordcount)

        bytes_pithos_written = ssh_check_output_hadoop('hduser', self.master_IP,
                                             ' dfs -dus {0}'.format(PITHOS_WORDCOUNT_DIR))
        bytes_hdfs_written = ssh_check_output_hadoop('hduser', self.master_IP,
                                             ' dfs -dus {0}'.format(HDFS_WORDCOUNT_DIR))

        self.assertEqual(bytes_pithos_written[0].replace(PITHOS_WORDCOUNT_DIR, ""),
                         bytes_hdfs_written[0].replace(HDFS_WORDCOUNT_DIR, ""))
        self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r")
        self.addCleanup(self.delete_hdfs_files, HDFS_WORDCOUNT_DIR, prefix="-r")
        self.addCleanup(self.delete_hdfs_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.hadoop_local_fs_action, 'rm {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE))
 def test_put_from_local(self):
     """
     functional test to put file from local to hdfs and check that file now exists in hdfs and is not zero size.
     """
     subprocess.call('echo "this is a unit test file for local to hdfs orka-cli put." > {0}'.format(SOURCE_LOCAL_TO_HDFS_FILE),
                     stderr=FNULL, shell=True)
     self.opts.update({'source': SOURCE_LOCAL_TO_HDFS_FILE, 'destination': DEST_LOCAL_TO_HDFS_FILE})
     HadoopCluster(self.opts).put_from_local(self.active_cluster)
     exist_check_status = ssh_call_hadoop('hduser', self.master_IP,
                                          ' dfs -test -e {0}'.format(self.opts['destination']))
     zero_check_status = ssh_call_hadoop('hduser', self.master_IP,
                                         ' dfs -test -z {0}'.format(self.opts['destination']))
     self.assertEqual(exist_check_status, 0) and self.assertEqual(zero_check_status, 1)
     self.addCleanup(self.delete_hdfs_files, self.opts['destination'])
     self.addCleanup(self.delete_local_files, self.opts['source'])
 def test_pig(self):
     """
     Make a directory in HDFS running a pig command.
     """
     ssh_call_hadoop(self.user,
                     self.master_IP,
                     self.pig_command,
                     hadoop_path='')
     exist_check_status = ssh_call_hadoop(
         self.user,
         self.master_IP,
         ' dfs -test -e {0}'.format(PIG_TEST_FOLDER),
         hadoop_path=self.hdfs_path)
     self.assertEqual(exist_check_status, 0)
     self.addCleanup(self.delete_hdfs_files, PIG_TEST_FOLDER, prefix="-r")
 def test_put_from_remote(self):
     """
     functional test to put file from remote server to Hdfs and check that file now exists in Hdfs and
     is not zero size.
     """
     self.opts.update({'source': SOURCE_REMOTE_TO_HDFS_FILE, 'destination': DEST_REMOTE_TO_HDFS_FILE, 'user': '',
                       'password': ''})
     HadoopCluster(self.opts).put_from_server()
     exist_check_status = ssh_call_hadoop(self.user, self.master_IP,
                                          ' dfs -test -e {0}'.format(self.opts['destination']),
                                          hadoop_path=self.hdfs_path)
     zero_check_status = ssh_call_hadoop(self.user, self.master_IP,
                                         ' dfs -test -z {0}'.format(self.opts['destination']),
                                         hadoop_path=self.hdfs_path)
     self.assertEqual(exist_check_status, 0) and self.assertEqual(zero_check_status, 1)
     self.addCleanup(self.delete_hdfs_files, self.opts['destination'])
 def test_flume(self):
     """
     Checks Flume's operation.
     Create a temp file and check that it is uploaded successfully to hdfs
     """
     #clear flume data directories
     self.clear_flume_hdfs_folder()
     self.clear_flume_tmp_folder()
     #create the temp file in /usr/local/flume/tmp
     response = subprocess.call("ssh " + self.user + "@" + self.master_IP +
                                " \"" + "echo 'this is a test file'" +
                                " >  /usr/local/flume/tmp/tempfile.log" +
                                "\"",
                                stderr=FNULL,
                                shell=True)
     # wait for flume to write data in hdfs
     time.sleep(20)
     # check hdfs for Flume Data
     #         response = subprocess.check_output("ssh " + self.user + "@" + self.master_IP + " \"" +
     #                                     "/usr/local/hadoop/bin/hdfs dfs -test -e /user/hduser/flume/FlumeData.*" +
     #                                     "\"", stderr=FNULL, shell=True)
     command = ' dfs -test -e /user/hduser/flume/FlumeData.*'
     exist_check_status = ssh_call_hadoop(
         self.user,
         self.master_IP,
         command,
         hadoop_path='/usr/local/hadoop/bin/hdfs')
     self.assertEqual(exist_check_status, 0)  # Output exists in hdfs
     #clear flume hdfs folder
     self.addCleanup(self.clear_flume_hdfs_folder)
     # clear flume local temp folder
     self.addCleanup(self.clear_flume_tmp_folder)
    def test_flume(self):
        """
        Checks Flume's operation.
        Create a temp file and check that it is uploaded successfully to hdfs
        """
        #clear flume data directories
        self.clear_flume_hdfs_folder()
        self.clear_flume_tmp_folder()
        #create the temp file in /usr/local/flume/tmp
        response = subprocess.call("ssh " + self.user + "@" + self.master_IP + " \"" + 
                                    "echo 'this is a test file'" +
                                    " >  /usr/local/flume/tmp/tempfile.log" + 
                                    "\"", stderr=FNULL, shell=True)
        # wait for flume to write data in hdfs
        time.sleep(20)
        # check hdfs for Flume Data
#         response = subprocess.check_output("ssh " + self.user + "@" + self.master_IP + " \"" + 
#                                     "/usr/local/hadoop/bin/hdfs dfs -test -e /user/hduser/flume/FlumeData.*" +
#                                     "\"", stderr=FNULL, shell=True)
        command = ' dfs -test -e /user/hduser/flume/FlumeData.*'
        exist_check_status = ssh_call_hadoop(self.user, self.master_IP, command, hadoop_path='/usr/local/hadoop/bin/hdfs')
        self.assertEqual(exist_check_status, 0) # Output exists in hdfs        
        #clear flume hdfs folder
        self.addCleanup(self.clear_flume_hdfs_folder)
        # clear flume local temp folder
        self.addCleanup(self.clear_flume_tmp_folder)
    def test_run_wordcount_from_pithos(self):
        """
        Functional test to upload a test file in Pithos and run a wordcount streaming the file from Pithos.
        """
        subprocess.call('echo "this is a test file to run a streaming wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE),
                        stderr=FNULL, shell=True)
        subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True)
        ssh_call_hadoop('hduser', self.master_IP, wordcount_command + 'pithos://pithos/{0} {1}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR),
                                             hadoop_path=hadoop_path_wordcount)

        exist_check_status = ssh_call_hadoop('hduser', self.master_IP,
                                             ' dfs -test -e {0}/_SUCCESS'.format(PITHOS_WORDCOUNT_DIR))
        self.assertEqual(exist_check_status, 0)
        self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r")
        self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE)
    def test_compare_mapreduce_wordcount_pithos_hdfs(self):
        """
        Run two MapReduce wordcounts one from Pithos and one native from HDFS and compare the
        length of the output files.
        """
        subprocess.call(
            'echo "this is a test file to run a wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE),
            stderr=FNULL,
            shell=True,
        )
        subprocess.call("kamaki file upload {0}".format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True)

        ssh_call_hadoop(
            self.user,
            self.master_IP,
            "kamaki file download {0} /tmp/{0}".format(SOURCE_PITHOS_TO_HDFS_FILE),
            hadoop_path="",
        )
        ssh_call_hadoop(
            self.user,
            self.master_IP,
            " dfs -put /tmp/{0}".format(SOURCE_PITHOS_TO_HDFS_FILE),
            hadoop_path=self.hdfs_path,
        )

        ssh_call_hadoop(
            self.user,
            self.master_IP,
            self.wordcount_command + "pithos://pithos/{0} {1}".format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR),
            hadoop_path=self.hadoop_path,
        )
        ssh_call_hadoop(
            self.user,
            self.master_IP,
            self.wordcount_command + "{0} {1}".format(SOURCE_PITHOS_TO_HDFS_FILE, HDFS_WORDCOUNT_DIR),
            hadoop_path=self.hadoop_path,
        )

        bytes_pithos_written = ssh_check_output_hadoop(
            self.user, self.master_IP, " dfs -dus {0}".format(PITHOS_WORDCOUNT_DIR), hadoop_path=self.hdfs_path
        )
        bytes_hdfs_written = ssh_check_output_hadoop(
            self.user, self.master_IP, " dfs -dus {0}".format(HDFS_WORDCOUNT_DIR), hadoop_path=self.hdfs_path
        )

        self.assertEqual(
            bytes_pithos_written[0].replace(PITHOS_WORDCOUNT_DIR, ""),
            bytes_hdfs_written[0].replace(HDFS_WORDCOUNT_DIR, ""),
        )
        self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r")
        self.addCleanup(self.delete_hdfs_files, HDFS_WORDCOUNT_DIR, prefix="-r")
        self.addCleanup(self.delete_hdfs_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.hadoop_local_fs_action, "rm /tmp/{0}".format(SOURCE_PITHOS_TO_HDFS_FILE))
 def test_hive_count_rows_in_table_not_exist(self):
     """
     Functional test for Ecosystem Hive
     count rows in a table that does not exist 
     """
     hive_command = "hive -e 'select count(*) from table_not_exist';"
     exist_check_status = ssh_call_hadoop(self.user, self.master_IP, hive_command, hadoop_path='/usr/local/hive/bin/')
     
     self.assertEqual(exist_check_status, 17) # ERROR table not found
    def test_run_wordcount_from_pithos(self):
        """
        Functional test to upload a test file in Pithos and run a wordcount streaming the file from Pithos.
        """
        subprocess.call('echo "this is a test file to run a streaming wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE),
                        stderr=FNULL, shell=True)
        subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True)
        ssh_call_hadoop(self.user, self.master_IP, self.wordcount_command + 'pithos://pithos/{0} {1}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR),
                                             hadoop_path=self.hadoop_path)

        exist_check_status = ssh_call_hadoop(self.user, self.master_IP,
                                             ' dfs -test -e {0}/_SUCCESS'.format(PITHOS_WORDCOUNT_DIR),
                                             hadoop_path=self.hdfs_path)
        self.assertEqual(exist_check_status, 0)
        self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r")
        self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE)
 def test_put_from_pithos(self):
     """
     functional test to put file from Pithos to Hdfs and check that file now exists in Hdfs and
     is not zero size.
     """
     subprocess.call('echo "this is a test file for pithos to hdfs orka-cli put" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE),
                     stderr=FNULL, shell=True)
     subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True)
     self.opts.update({'destination': DEST_PITHOS_TO_HDFS_FILE})
     HadoopCluster(self.opts).put_from_pithos(self.active_cluster, SOURCE_PITHOS_TO_HDFS_FILE)
     exist_check_status = ssh_call_hadoop('hduser', self.master_IP,
                                          ' dfs -test -e {0}'.format(self.opts['destination']))
     zero_check_status = ssh_call_hadoop('hduser', self.master_IP,
                                         ' dfs -test -z {0}'.format(self.opts['destination']))
     self.assertEqual(exist_check_status, 0) and self.assertEqual(zero_check_status, 1)
     self.addCleanup(self.delete_hdfs_files, self.opts['destination'])
     self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE)
     self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE)
 def test_put_from_local(self):
     """
     functional test to put file from local to hdfs and check that file now exists in hdfs and is not zero size.
     """
     subprocess.call('echo "this is a unit test file for local to hdfs orka-cli put." > {0}'.format(SOURCE_LOCAL_TO_HDFS_FILE),
                     stderr=FNULL, shell=True)
     self.opts.update({'source': SOURCE_LOCAL_TO_HDFS_FILE, 'destination': [DEST_LOCAL_TO_HDFS_FILE],
                      'fileput': True})
     HadoopCluster(self.opts).file_action()
     exist_check_status = ssh_call_hadoop(self.user, self.master_IP,
                                          ' dfs -test -e {0}'.format(DEST_LOCAL_TO_HDFS_FILE),
                                          hadoop_path=self.hdfs_path)
     zero_check_status = ssh_call_hadoop(self.user, self.master_IP,
                                         ' dfs -test -z {0}'.format(DEST_LOCAL_TO_HDFS_FILE),
                                         hadoop_path=self.hdfs_path)
     self.assertEqual(exist_check_status, 0) and self.assertEqual(zero_check_status, 1)
     self.addCleanup(self.delete_hdfs_files, DEST_LOCAL_TO_HDFS_FILE)
     self.addCleanup(self.delete_local_files, self.opts['source'])
 def test_hive_count_rows_in_table_exists(self):
     """
     Functional test for Ecosystem Hive
     creates a table (if not exists)
     and counts rows in this table 
     """
     # create a table
     hive_command = "hive -e 'CREATE TABLE IF NOT EXISTS hive_table ( age int, name String );'"
     ssh_call_hadoop(self.user, self.master_IP, hive_command, hadoop_path='/usr/local/hive/bin/')
     
     # count rows
     hive_command_count = "hive -e 'select count(*) from hive_table';"
     exist_check_status = ssh_call_hadoop(self.user, self.master_IP, hive_command_count, hadoop_path='/usr/local/hive/bin/')
     
     self.assertEqual(exist_check_status, 0) # OK
     
     # Remove test table
     hive_command = "hive -e 'DROP TABLE hive_table;'"
     ssh_call_hadoop(self.user, self.master_IP, hive_command, hadoop_path='/usr/local/hive/bin/')
 def test_put_from_pithos(self):
     """
     functional test to put file from Pithos to Hdfs and check that file now exists in Hdfs and
     is not zero size.
     """
     subprocess.call('echo "this is a test file for pithos to hdfs orka-cli put" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE),
                     stderr=FNULL, shell=True)
     subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True)
     self.opts.update({'destination': DEST_PITHOS_TO_HDFS_FILE})
     HadoopCluster(self.opts).put_from_pithos(self.active_cluster, SOURCE_PITHOS_TO_HDFS_FILE)
     exist_check_status = ssh_call_hadoop(self.user, self.master_IP,
                                          ' dfs -test -e {0}'.format(self.opts['destination']),
                                          hadoop_path=self.hdfs_path)
     zero_check_status = ssh_call_hadoop(self.user, self.master_IP,
                                         ' dfs -test -z {0}'.format(self.opts['destination']),
                                         hadoop_path=self.hdfs_path)
     self.assertEqual(exist_check_status, 0) and self.assertEqual(zero_check_status, 1)
     self.addCleanup(self.delete_hdfs_files, self.opts['destination'])
     self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE)
     self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE)
    def test_hive_count_rows_in_table_not_exist(self):
        """
        Functional test for Ecosystem Hive
        count rows in a table that does not exist 
        """
        hive_command = "hive -e 'select count(*) from table_not_exist';"
        exist_check_status = ssh_call_hadoop(
            self.user,
            self.master_IP,
            hive_command,
            hadoop_path='/usr/local/hive/bin/')

        self.assertEqual(exist_check_status, 17)  # ERROR table not found
 def test_hbase_table_exists(self):
     """
     Functional test for Ecosystem HBase
     create a table and then
     check if the table exists
     """
     baseurl = "http://" + self.master_IP + ":16010"                
     tablename = "testtable"
           
     # Create shell script so as to create the table
     self.hadoop_local_fs_action("echo " + "create \\'testtable\\', \\'cf\\'" + " > {0} && echo exit >> {0}".format(HBASE_SCRIPT_PATH))
     hbase_command = "hbase shell " + HBASE_SCRIPT_PATH
     ssh_call_hadoop(self.user, self.master_IP, hbase_command, hadoop_path='/usr/local/hbase/bin/')
     
     # Check if table exists
     request = requests.get(baseurl + "/table.jsp?name=" + tablename)   
     self.assertEqual(request.status_code, 200) # OK
     
     # Remove test data
     self.hadoop_local_fs_action("echo disable \\'testtable\\' > {0} && echo drop \\'testtable\\' >> {0} && echo exit >> {0}".format(HBASE_SCRIPT_PATH))
     ssh_call_hadoop(self.user, self.master_IP, hbase_command, hadoop_path='/usr/local/hbase/bin/')
     self.addCleanup(self.hadoop_local_fs_action, 'rm ' + HBASE_SCRIPT_PATH)
 def test_pig_script(self):
     """
     Test pig through a pig script
     """
     self.put_file_to_hdfs('/tmp/{0}'.format('test_file_pig.txt'))
     pig_command = "export JAVA_HOME=/usr/lib/jvm/java-8-oracle; export HADOOP_HOME=/usr/local/hadoop; /usr/local/pig/bin/pig -e \\{0}\" ".format(
         "\"data = LOAD {0} as (text:CHARARRAY);upper_case = FOREACH data GENERATE org.apache.pig.piggybank.evaluation.string.UPPER(text);STORE upper_case INTO {1};\\"
         .format("'/user/hduser/test_file_pig.txt'",
                 "'/user/hduser/pig_test'"))
     ssh_call_hadoop(self.user, self.master_IP, pig_command, hadoop_path='')
     exist_check_status = ssh_call_hadoop(
         self.user,
         self.master_IP,
         " dfs -test -e {0}".format('/user/hduser/pig_test/_SUCCESS'),
         hadoop_path=self.hdfs_path)
     self.assertEqual(exist_check_status, 0)
     self.addCleanup(self.delete_hdfs_files,
                     '/user/hduser/pig_test',
                     prefix="-r")
     self.addCleanup(self.hadoop_local_fs_action,
                     'rm /tmp/{0}'.format('test_file_pig.txt'))
     self.addCleanup(self.hadoop_local_fs_action,
                     'rm /user/hduser/{0}'.format('test_file_pig.txt'))
    def test_spark_pi_wordcount(self):
        """
        Functional test to check if Spark is working correctly in a Ecosystem cluster
        by running a Spark Pi and a Spark WordCount.
        """
        self.put_file_to_hdfs('/tmp/{0}'.format(SOURCE_HDFS_TO_PITHOS_FILE))
        spark_job = 'export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop; /usr/local/spark/bin/spark-submit --class org.apache.spark.examples.'

        for job_properties in [('SparkPi', 10), ('JavaWordCount', SOURCE_HDFS_TO_PITHOS_FILE)]:
            test_job = spark_job + '{0} --deploy-mode cluster --master yarn-cluster {1} {2}'.format(job_properties[0], SPARK_ECOSYSTEM_EXAMPLES, job_properties[1])
            exist_check_status = ssh_call_hadoop(self.user, self.master_IP, test_job, hadoop_path='')
            self.assertEqual(exist_check_status, 0)

        self.addCleanup(self.delete_hdfs_files, SOURCE_HDFS_TO_PITHOS_FILE)
        self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/{0}'.format(SOURCE_HDFS_TO_PITHOS_FILE))
    def test_spark_pi_wordcount(self):
        """
        Run a Spark Pi and a Spark WordCount.
        """
        self.put_file_to_hdfs("/tmp/{0}".format(SOURCE_HDFS_TO_PITHOS_FILE))
        spark_job = "sudo -u hdfs spark-submit --class org.apache.spark.examples."

        for job_properties in [("SparkPi", 10), ("JavaWordCount", SOURCE_HDFS_TO_PITHOS_FILE)]:
            test_job = spark_job + "{0} --deploy-mode cluster --master yarn-cluster {1} {2}".format(
                job_properties[0], SPARK_EXAMPLES, job_properties[1]
            )
            exist_check_status = ssh_call_hadoop(self.user, self.master_IP, test_job, hadoop_path="")
            self.assertEqual(exist_check_status, 0)

        self.addCleanup(self.delete_hdfs_files, SOURCE_HDFS_TO_PITHOS_FILE)
        self.addCleanup(self.hadoop_local_fs_action, "rm /tmp/{0}".format(SOURCE_HDFS_TO_PITHOS_FILE))
    def test_spark_pi_wordcount(self):
        """
        Run a Spark Pi and a Spark WordCount.
        """
        self.put_file_to_hdfs('/tmp/{0}'.format(SOURCE_HDFS_TO_PITHOS_FILE))
        spark_job = 'sudo -u hdfs spark-submit --class org.apache.spark.examples.'

        for job_properties in [('SparkPi', 10),
                               ('JavaWordCount', SOURCE_HDFS_TO_PITHOS_FILE)]:
            test_job = spark_job + '{0} --deploy-mode cluster --master yarn-cluster {1} {2}'.format(
                job_properties[0], SPARK_EXAMPLES, job_properties[1])
            exist_check_status = ssh_call_hadoop(self.user,
                                                 self.master_IP,
                                                 test_job,
                                                 hadoop_path='')
            self.assertEqual(exist_check_status, 0)

        self.addCleanup(self.delete_hdfs_files, SOURCE_HDFS_TO_PITHOS_FILE)
        self.addCleanup(self.hadoop_local_fs_action,
                        'rm /tmp/{0}'.format(SOURCE_HDFS_TO_PITHOS_FILE))
    def test_spark_pi_wordcount(self):
        """
        Functional test to check if Spark is working correctly in a Ecosystem cluster
        by running a Spark Pi and a Spark WordCount.
        """
        self.put_file_to_hdfs('/tmp/{0}'.format(SOURCE_HDFS_TO_PITHOS_FILE))
        spark_job = 'export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop; /usr/local/spark/bin/spark-submit --class org.apache.spark.examples.'

        for job_properties in [('SparkPi', 10),
                               ('JavaWordCount', SOURCE_HDFS_TO_PITHOS_FILE)]:
            test_job = spark_job + '{0} --deploy-mode cluster --master yarn-cluster {1} {2}'.format(
                job_properties[0], SPARK_ECOSYSTEM_EXAMPLES, job_properties[1])
            exist_check_status = ssh_call_hadoop(self.user,
                                                 self.master_IP,
                                                 test_job,
                                                 hadoop_path='')
            self.assertEqual(exist_check_status, 0)

        self.addCleanup(self.delete_hdfs_files, SOURCE_HDFS_TO_PITHOS_FILE)
        self.addCleanup(self.hadoop_local_fs_action,
                        'rm /tmp/{0}'.format(SOURCE_HDFS_TO_PITHOS_FILE))
    def test_compare_wordcount_pithos_hdfs(self):
        """
        Functional test to upload a test file in Pithos and run two wordcounts, one from Pithos and one native from HDFS
        and compare the length of the output files.
        """
        subprocess.call('echo "this is a test file to run a wordcount" > {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE),
                        stderr=FNULL, shell=True)
        subprocess.call('kamaki file upload {0}'.format(SOURCE_PITHOS_TO_HDFS_FILE), stderr=FNULL, shell=True)

        ssh_call_hadoop(self.user, self.master_IP, 'kamaki file download {0} /tmp/{0}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE), hadoop_path='')
        ssh_call_hadoop(self.user, self.master_IP, ' dfs -put /tmp/{0}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE),hadoop_path=self.hdfs_path)

        ssh_call_hadoop(self.user, self.master_IP, self.wordcount_command + 'pithos://pithos/{0} {1}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE, PITHOS_WORDCOUNT_DIR),
                                             hadoop_path=self.hadoop_path)
        ssh_call_hadoop(self.user, self.master_IP, self.wordcount_command + '{0} {1}'.
                        format(SOURCE_PITHOS_TO_HDFS_FILE, HDFS_WORDCOUNT_DIR),
                                             hadoop_path=self.hadoop_path)

        bytes_pithos_written = ssh_check_output_hadoop(self.user, self.master_IP,
                                             ' dfs -dus {0}'.format(PITHOS_WORDCOUNT_DIR),
                                             hadoop_path=self.hdfs_path)
        bytes_hdfs_written = ssh_check_output_hadoop(self.user, self.master_IP,
                                             ' dfs -dus {0}'.format(HDFS_WORDCOUNT_DIR),
                                             hadoop_path=self.hdfs_path)

        self.assertEqual(bytes_pithos_written[0].replace(PITHOS_WORDCOUNT_DIR, ""),
                         bytes_hdfs_written[0].replace(HDFS_WORDCOUNT_DIR, ""))
        self.addCleanup(self.delete_hdfs_files, PITHOS_WORDCOUNT_DIR, prefix="-r")
        self.addCleanup(self.delete_hdfs_files, HDFS_WORDCOUNT_DIR, prefix="-r")
        self.addCleanup(self.delete_hdfs_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.delete_local_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.delete_pithos_files, SOURCE_PITHOS_TO_HDFS_FILE)
        self.addCleanup(self.hadoop_local_fs_action, 'rm /tmp/{0}'.format(SOURCE_PITHOS_TO_HDFS_FILE))
 def delete_hdfs_files(self, file_to_delete, prefix=""):
     """
     Helper method to delete files transfered to hdfs filesystem after test.
     """
     ssh_call_hadoop('hduser', self.master_IP, ' dfs -rm {0} {1}'.format(prefix, file_to_delete))