Example #1
0
    def test_distributeWorkThroughClusters(self):
        """
        This test distributes data through different clusters and execute the same example on them
        """

        # For localhost configuration
        clusters = ["localhost"]  # the clusters used
        hadoop_source = "/user/xeon/gutenberg"
        hadoop_output = "/user/xeon/gutenberg-output"

        hadoop_jar = settings.get_hadoop_home() + "/hadoop-examples-1.0.4.jar"
        command = """%s jar %s wordcount %s %s""" % (settings.get_hadoop_bin(),
                                                     hadoop_jar, hadoop_source,
                                                     hadoop_output)

        print "Execute the job"
        mstart = datetime.now()
        for q in clusters:
            executeCommand.apply_async(queue=q, args=(command, ))
        mend = datetime.now()

        # copy data from HDFS to local
        print "Validate data"
        vstart = datetime.now()
        data = []
        for q in clusters:
            output = medusa.generateDigests.apply_async(queue=q,
                                                        args=(hadoop_output +
                                                              "/part*", ))
            data.append(output.get())

        # the validation of the digests is made by the scheduling algorithm
        vend = datetime.now()
        print data

        disconnect_all()
        print "Execution time: " + str(mend - mstart)
        print "Verification time: " + str(vend - vstart)
def get_hadoop_path():
    return settings.get_hadoop_home()
Example #3
0
def cat(path):
    return "%s dfs -cat %s" % (settings.get_hadoop_home() + "/bin/hdfs", path)
Example #4
0
def copyFromLocal(src, dest):
    return "%s dfs -copyFromLocal %s %s" % (settings.get_hadoop_home() + "/bin/hdfs", src, dest)