Example #1
0
    def run(self):

        def code():
            run_jar("%s/flink-staging/flink-streaming/flink-streaming-examples/target/" % get_flink_path(),
                    "flink-streaming-*-WordCount.jar",
                    args = [self.wordcount_in, self.wordcount_out],
                    clazz = "org.apache.flink.streaming.examples.wordcount.WordCount")
        master(code)
Example #2
0
    def run(self):

        def code():
            run_jar("%s/examples/" % get_flink_dist_path(),
                    "flink-java-*WordCount.jar",
                    args = [self.wordcount_in, self.wordcount_out],
                    clazz = "org.apache.flink.examples.java.wordcount.WordCount")
        master(code)
Example #3
0
    def run(self):

        def code():
            run_jar("%s/examples/streaming/" % get_flink_dist_path(),
                    "WordCount.jar",
                    args = ["--input", self.wordcount_in, "--output", self.wordcount_out],
                    clazz = "org.apache.flink.streaming.examples.wordcount.WordCount")
        master(code)
Example #4
0
    def run(self):

        def code():
            run_jar("%s/examples/streaming/" % get_flink_dist_path(),
                    "WindowWordCount.jar",
                    args = [self.wordcount_in, self.wordcount_out, 10000],
                    clazz = "org.apache.flink.streaming.examples.windowing.WindowWordCount")
        master(code)
Example #5
0
 def run(self):
     grep_out = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time()))
     grep_in = "%s/text" % get_hdfs_address()
     def code():
         run_jar("~/flink-perf/flink-jobs/target",
                 "flink-jobs-*.jar",
                 args = [grep_in, grep_out, "these", "are", "test", "words"],
                 clazz = "com.github.projectflink.grep.GrepJob")
     master(code)
Example #6
0
 def run(self):
     wordcount_in = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time()))
     wordcount_out = "%s/text" % get_hdfs_address()
     def code():
         run_jar("%s/examples/" % get_flink_dist_path(),
                 "flink-java-*WordCount.jar",
                 args = [wordcount_out, wordcount_in],
                 clazz = "org.apache.flink.examples.java.wordcount.WordCount")
     master(code)
Example #7
0
    def run(self):
        def code():
            run_jar("%s/flink-jobs/target" %
                    generators.Avro.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[self.out_path, self.in_path],
                    clazz="com.github.projectflink.avro.CompareJob")

        master(code)
Example #8
0
    def run(self):

        def code():
            run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[self.dop, self.out_path, self.size_gb],
                    clazz="com.github.projectflink.generators.Text")

        master(code)
Example #9
0
    def run(self):

        def code():
            run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[self.dop, self.out_path, self.size_gb],
                    clazz="com.github.projectflink.generators.Text")

        master(code)
Example #10
0
    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar("%s/target/" % self.repo.get_absolute_path(),
                    "flink-dataflow-*-SNAPSHOT.jar",
                    args = [wordcount_in, self.wordcount_out],
                    clazz = "com.dataartisans.flink.dataflow.GoogleStreamingPipeline.examples")
        master(code)
Example #11
0
    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar("%s/target/" % self.repo.get_absolute_path(),
                    "flink-dataflow-*-SNAPSHOT.jar",
                    args = [wordcount_in, self.wordcount_out],
                    clazz = self.implicit_clazz if self.implicit_combine else self.explicit_clazz)
        master(code)
Example #12
0
    def run(self):
        def code():
            run_jar(
                "%s/examples/streaming/" % get_flink_dist_path(),
                "WindowWordCount.jar",
                args=[self.wordcount_in, self.wordcount_out, 10000],
                clazz=
                "org.apache.flink.streaming.examples.windowing.WindowWordCount"
            )

        master(code)
Example #13
0
    def run(self):
        grep_out = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time()))
        grep_in = "%s/text" % get_hdfs_address()

        def code():
            run_jar("~/flink-perf/flink-jobs/target",
                    "flink-jobs-*.jar",
                    args=[grep_in, grep_out, "these", "are", "test", "words"],
                    clazz="com.github.projectflink.grep.GrepJob")

        master(code)
Example #14
0
 def run(self):
     def code():
         run_jar(path = "experiments/wordcount_files/",
                           jar_name = "flink-java-examples-0.8-incubating-SNAPSHOT-WordCount.jar",
                           args = [
                             "hdfs://%s:50040/generated-wc.txt" % env.master,
                             "hdfs://%s:50040/tmp/wc-out/" % env.master
                           ],
                           upload=True
         )
     master(code)
Example #15
0
    def run(self):
        def code():
            run_jar("%s/examples/batch/" % get_flink_dist_path(),
                    "WordCount.jar",
                    args=[
                        "--input", self.wordcount_in, "--output",
                        self.wordcount_out
                    ],
                    clazz="org.apache.flink.examples.java.wordcount.WordCount")

        master(code)
Example #16
0
    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar("%s/target/" % self.repo.get_absolute_path(),
                    "flink-dataflow-*-SNAPSHOT.jar",
                    args=[wordcount_in, self.wordcount_out],
                    clazz=self.implicit_clazz
                    if self.implicit_combine else self.explicit_clazz)

        master(code)
Example #17
0
    def setup(self):
        self.in_path = self.generator.experiment.out
        self.out_path = get_hdfs_address() + "/avro-benchmark/tpch1-avro"

        def code():
            run_jar("%s/flink-jobs/target" %
                    generators.Avro.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[self.in_path, self.out_path],
                    clazz="com.github.projectflink.avro.Prepare")

        master(code)
Example #18
0
    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar("%s/target/" % self.repo.get_absolute_path(),
                    "flink-dataflow-*-SNAPSHOT.jar",
                    args = ["--", # Flink 0.8 way of specifying options to user programs
                            "--input=%s" % wordcount_in,
                            "--output=%s" % self.wordcount_out],
                    clazz = "com.dataartisans.flink.dataflow.examples.DataflowWordCount")
        master(code)
Example #19
0
    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar(
                "%s/target/" % self.repo.get_absolute_path(),
                "flink-dataflow-*-SNAPSHOT.jar",
                args=[wordcount_in, self.wordcount_out],
                clazz=
                "com.dataartisans.flink.dataflow.examples.StreamingPipeline")

        master(code)
Example #20
0
    def run(self):
        def code():
            run_jar(
                path="experiments/wordcount_files/",
                jar_name=
                "flink-java-examples-0.8-incubating-SNAPSHOT-WordCount.jar",
                args=[
                    "hdfs://%s:50040/generated-wc.txt" % env.master,
                    "hdfs://%s:50040/tmp/wc-out/" % env.master
                ],
                upload=True)

        master(code)
Example #21
0
    def run(self):

        def code():
            run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[
                        self.num_rows, self.num_cols,
                        self.mean_entry, self.variance_entry,
                        self.mean_num_row_entries, self.variance_num_row_entries,
                        self.out_path
                    ],
                    clazz="com.github.projectflink.als.ALSDataGeneration")

        master(code)
Example #22
0
File: als.py Project: aljoscha/yoka
    def run(self):
        # TODO get from generator directly
        als_in = "%s/als-benchmark800000-100000-400" % get_hdfs_address()
        als_out = "%s/tmp/als_out_%d" % (get_hdfs_address(), int(time()))

        def code():
            run_jar(
                "%s/flink-jobs/target" % generators.ALS.repo.get_absolute_path(),
                "flink-jobs-*.jar",
                args=["master", 15, 1, 10, 100, "rand", "%s/als-temp/" % get_hdfs_address(), als_in, als_out],
                clazz="com.github.projectflink.als.ALSJoinBlocking",
            )

        master(code)
Example #23
0
    def run(self):

        def code():
            run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[
                        self.num_rows, self.num_cols,
                        self.mean_entry, self.variance_entry,
                        self.mean_num_row_entries, self.variance_num_row_entries,
                        self.out_path
                    ],
                    clazz="com.github.projectflink.als.ALSDataGeneration")

        master(code)
Example #24
0
    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar(
                "%s/target/" % self.repo.get_absolute_path(),
                "flink-dataflow-*-SNAPSHOT.jar",
                args=[
                    "--",  # Flink 0.8 way of specifying options to user programs
                    "--input=%s" % wordcount_in,
                    "--output=%s" % self.wordcount_out
                ],
                clazz=
                "com.dataartisans.flink.dataflow.examples.DataflowWordCount")

        master(code)
Example #25
0
    def run(self):
        self.out = get_hdfs_address() + "/avro-benchmark/tpch1/" # + lineitems.csv

        def code():
            run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[
                        "-s", self.scale_factor,
                        "-p", self.parallelism,
                        "-o", self.out,
                        ],
                    clazz="com.github.projectflink.avro.GenerateLineitems"
            )

        master(code)
        # update path for benchmark
        self.out += "lineitems.csv"
Example #26
0
    def run(self):
        self.out = get_hdfs_address() + "/avro-benchmark/tpch1/" # + lineitems.csv

        def code():
            run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[
                        "-s", self.scale_factor,
                        "-p", self.parallelism,
                        "-o", self.out,
                        ],
                    clazz="com.github.projectflink.avro.GenerateLineitems"
            )

        master(code)
        # update path for benchmark
        self.out += "lineitems.csv"
Example #27
0
File: als.py Project: mxm/yoka
    def run(self):
        # TODO get from generator directly
        als_in = "%s/als-benchmark800000-100000-400" % get_hdfs_address()
        als_out = "%s/tmp/als_out_%d" % (get_hdfs_address(), int(time()))

        def code():
            run_jar("%s/flink-jobs/target" % generators.ALS.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args = [
                        "master",
                        15, 1, 10, 100, "rand",
                        "%s/als-temp/" % get_hdfs_address(),
                        als_in,
                        als_out
                    ],
                    clazz = "com.github.projectflink.als.ALSJoinBlocking")
            
        master(code)
Example #28
0
 def maven(self, target):
     # avoid building multiple times
     if target != self.built_using:
         master("cd %s && mvn %s > /dev/null" % (self.path, target))
         self.built_using = target
Example #29
0
 def shutdown(self):
     master("rm -rf /tmp/wc-data/generated-wc.txt")
     master(lambda: delete_from_hdfs("generated-wc.txt"))
     master(lambda: delete_from_hdfs("/tmp/wc-out"))
Example #30
0
 def clone(self):
     if not self.cloned:
         master("rm -rf %s && git clone %s %s" % (self.path, self.url, self.path))
         self.cloned = True
Example #31
0
 def checkout(self, commit):
     if commit != self.commit:
         master("cd %s && git checkout %s" % (self.path, commit))
         self.commit = commit
         self.built_using = None
Example #32
0
 def setup(self):
     # generate wc data
     master(lambda: install("wget"))
     master(lambda: install("ruby"))
     master(lambda: install("bzip2"))
     master(lambda: install("aspell"))
     generate_wc_data = render_template(
         "experiments/wordcount_files/gen_wc_data.sh.mustache", self.params)
     master(lambda: exec_bash(generate_wc_data))
     master(lambda: copy_to_hdfs("/tmp/wc-data/generated-wc.txt",
                                 "generated-wc.txt"))
Example #33
0
 def shutdown(self):
     master("rm -rf /tmp/wc-data/generated-wc.txt")
     master(lambda: delete_from_hdfs("generated-wc.txt"))
     master(lambda: delete_from_hdfs("/tmp/wc-out"))
Example #34
0
 def maven(self, target):
     # avoid building multiple times
     if target != self.built_using:
         master("cd %s && mvn %s > /dev/null" % (self.path, target))
         self.built_using = target
Example #35
0
 def setup(self):
     # generate wc data
     master(lambda: install("wget"))
     master(lambda: install("ruby"))
     master(lambda: install("bzip2"))
     master(lambda: install("aspell"))
     generate_wc_data = render_template(
         "experiments/wordcount_files/gen_wc_data.sh.mustache",
         self.params
     )
     master(lambda: exec_bash(generate_wc_data))
     master(lambda: copy_to_hdfs("/tmp/wc-data/generated-wc.txt",
                                 "generated-wc.txt"))
Example #36
0
 def checkout(self, commit):
     if commit != self.commit:
         master("cd %s && git checkout %s" % (self.path, commit))
         self.commit = commit
         self.built_using = None
Example #37
0
 def clone(self):
     if not self.cloned:
         master("rm -rf %s && git clone %s %s" %
                (self.path, self.url, self.path))
         self.cloned = True
Example #38
0
 def shutdown(self):
     # delete out_path to be able to restart benchmark
     master(lambda: delete_from_hdfs(self.out_path))
Example #39
0
 def shutdown(self):
     master(lambda: delete_from_hdfs(self.wordcount_out))
Example #40
0
 def shutdown(self):
     master(lambda: delete_from_hdfs(self.wordcount_out))