def run(self): wordcount_in = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time())) wordcount_out = "%s/text" % get_hdfs_address() def code(): run_jar("%s/examples/" % get_flink_dist_path(), "flink-java-*WordCount.jar", args = [wordcount_out, wordcount_in], clazz = "org.apache.flink.examples.java.wordcount.WordCount") master(code)
def run(self): grep_out = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time())) grep_in = "%s/text" % get_hdfs_address() def code(): run_jar("~/flink-perf/flink-jobs/target", "flink-jobs-*.jar", args = [grep_in, grep_out, "these", "are", "test", "words"], clazz = "com.github.projectflink.grep.GrepJob") master(code)
def run(self): wordcount_in = "%s/text" % get_hdfs_address() self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address() def code(): run_jar("%s/target/" % self.repo.get_absolute_path(), "flink-dataflow-*-SNAPSHOT.jar", args = [wordcount_in, self.wordcount_out], clazz = self.implicit_clazz if self.implicit_combine else self.explicit_clazz) master(code)
def run(self): wordcount_in = "%s/text" % get_hdfs_address() self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address() def code(): run_jar("%s/target/" % self.repo.get_absolute_path(), "flink-dataflow-*-SNAPSHOT.jar", args = [wordcount_in, self.wordcount_out], clazz = "com.dataartisans.flink.dataflow.GoogleStreamingPipeline.examples") master(code)
def run(self): grep_out = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time())) grep_in = "%s/text" % get_hdfs_address() def code(): run_jar("~/flink-perf/flink-jobs/target", "flink-jobs-*.jar", args=[grep_in, grep_out, "these", "are", "test", "words"], clazz="com.github.projectflink.grep.GrepJob") master(code)
def run(self): wordcount_in = "%s/text" % get_hdfs_address() self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address() def code(): run_jar("%s/target/" % self.repo.get_absolute_path(), "flink-dataflow-*-SNAPSHOT.jar", args = ["--", # Flink 0.8 way of specifying options to user programs "--input=%s" % wordcount_in, "--output=%s" % self.wordcount_out], clazz = "com.dataartisans.flink.dataflow.examples.DataflowWordCount") master(code)
def run(self): wordcount_in = "%s/text" % get_hdfs_address() self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address() def code(): run_jar("%s/target/" % self.repo.get_absolute_path(), "flink-dataflow-*-SNAPSHOT.jar", args=[wordcount_in, self.wordcount_out], clazz=self.implicit_clazz if self.implicit_combine else self.explicit_clazz) master(code)
def run(self): wordcount_in = "%s/text" % get_hdfs_address() self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address() def code(): run_jar( "%s/target/" % self.repo.get_absolute_path(), "flink-dataflow-*-SNAPSHOT.jar", args=[wordcount_in, self.wordcount_out], clazz= "com.dataartisans.flink.dataflow.examples.StreamingPipeline") master(code)
def run(self): # TODO get from generator directly als_in = "%s/als-benchmark800000-100000-400" % get_hdfs_address() als_out = "%s/tmp/als_out_%d" % (get_hdfs_address(), int(time())) def code(): run_jar( "%s/flink-jobs/target" % generators.ALS.repo.get_absolute_path(), "flink-jobs-*.jar", args=["master", 15, 1, 10, 100, "rand", "%s/als-temp/" % get_hdfs_address(), als_in, als_out], clazz="com.github.projectflink.als.ALSJoinBlocking", ) master(code)
def setup(self): master_slaves(lambda: install("libgfortran3")) self.out_path = get_hdfs_address() + "/als-benchmark" self.repo.clone() self.repo.checkout("master") self.repo.maven("clean package")
def code(): run_jar( "%s/flink-jobs/target" % generators.ALS.repo.get_absolute_path(), "flink-jobs-*.jar", args=["master", 15, 1, 10, 100, "rand", "%s/als-temp/" % get_hdfs_address(), als_in, als_out], clazz="com.github.projectflink.als.ALSJoinBlocking", )
def run(self): wordcount_in = "%s/text" % get_hdfs_address() self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address() def code(): run_jar( "%s/target/" % self.repo.get_absolute_path(), "flink-dataflow-*-SNAPSHOT.jar", args=[ "--", # Flink 0.8 way of specifying options to user programs "--input=%s" % wordcount_in, "--output=%s" % self.wordcount_out ], clazz= "com.dataartisans.flink.dataflow.examples.DataflowWordCount") master(code)
def run(self): # TODO get from generator directly als_in = "%s/als-benchmark800000-100000-400" % get_hdfs_address() als_out = "%s/tmp/als_out_%d" % (get_hdfs_address(), int(time())) def code(): run_jar("%s/flink-jobs/target" % generators.ALS.repo.get_absolute_path(), "flink-jobs-*.jar", args = [ "master", 15, 1, 10, 100, "rand", "%s/als-temp/" % get_hdfs_address(), als_in, als_out ], clazz = "com.github.projectflink.als.ALSJoinBlocking") master(code)
def code(): run_jar("%s/flink-jobs/target" % generators.ALS.repo.get_absolute_path(), "flink-jobs-*.jar", args = [ "master", 15, 1, 10, 100, "rand", "%s/als-temp/" % get_hdfs_address(), als_in, als_out ], clazz = "com.github.projectflink.als.ALSJoinBlocking")
def setup(self): self.in_path = self.generator.experiment.out self.out_path = get_hdfs_address() + "/avro-benchmark/tpch1-avro" def code(): run_jar("%s/flink-jobs/target" % generators.Avro.repo.get_absolute_path(), "flink-jobs-*.jar", args=[self.in_path, self.out_path], clazz="com.github.projectflink.avro.Prepare") master(code)
def run(self): self.out = get_hdfs_address() + "/avro-benchmark/tpch1/" # + lineitems.csv def code(): run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(), "flink-jobs-*.jar", args=[ "-s", self.scale_factor, "-p", self.parallelism, "-o", self.out, ], clazz="com.github.projectflink.avro.GenerateLineitems" ) master(code) # update path for benchmark self.out += "lineitems.csv"
def setup(self): self.out_path = get_hdfs_address() + self.path self.repo.clone() self.repo.checkout(self.git_branch) self.repo.maven("clean package")
def setup(self): self.wordcount_in = "%s/text2" % get_hdfs_address() self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()