def test_distributeWorkThroughClusters(self): """ This test distributes data through different clusters and execute the same example on them """ # For localhost configuration clusters = ["localhost"] # the clusters used hadoop_source = "/user/xeon/gutenberg" hadoop_output = "/user/xeon/gutenberg-output" hadoop_jar = settings.get_hadoop_home() + "/hadoop-examples-1.0.4.jar" command = """%s jar %s wordcount %s %s""" % (settings.get_hadoop_bin(), hadoop_jar, hadoop_source, hadoop_output) print "Execute the job" mstart = datetime.now() for q in clusters: executeCommand.apply_async(queue=q, args=(command, )) mend = datetime.now() # copy data from HDFS to local print "Validate data" vstart = datetime.now() data = [] for q in clusters: output = medusa.generateDigests.apply_async(queue=q, args=(hadoop_output + "/part*", )) data.append(output.get()) # the validation of the digests is made by the scheduling algorithm vend = datetime.now() print data disconnect_all() print "Execution time: " + str(mend - mstart) print "Verification time: " + str(vend - vstart)
def get_hadoop_path(): return settings.get_hadoop_home()
def cat(path): return "%s dfs -cat %s" % (settings.get_hadoop_home() + "/bin/hdfs", path)
def copyFromLocal(src, dest): return "%s dfs -copyFromLocal %s %s" % (settings.get_hadoop_home() + "/bin/hdfs", src, dest)