Esempio n. 1
0
 def run(self):
     wordcount_in = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time()))
     wordcount_out = "%s/text" % get_hdfs_address()
     def code():
         run_jar("%s/examples/" % get_flink_dist_path(),
                 "flink-java-*WordCount.jar",
                 args = [wordcount_out, wordcount_in],
                 clazz = "org.apache.flink.examples.java.wordcount.WordCount")
     master(code)
Esempio n. 2
0
 def run(self):
     grep_out = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time()))
     grep_in = "%s/text" % get_hdfs_address()
     def code():
         run_jar("~/flink-perf/flink-jobs/target",
                 "flink-jobs-*.jar",
                 args = [grep_in, grep_out, "these", "are", "test", "words"],
                 clazz = "com.github.projectflink.grep.GrepJob")
     master(code)
Esempio n. 3
0
    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar("%s/target/" % self.repo.get_absolute_path(),
                    "flink-dataflow-*-SNAPSHOT.jar",
                    args = [wordcount_in, self.wordcount_out],
                    clazz = self.implicit_clazz if self.implicit_combine else self.explicit_clazz)
        master(code)
Esempio n. 4
0
    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar("%s/target/" % self.repo.get_absolute_path(),
                    "flink-dataflow-*-SNAPSHOT.jar",
                    args = [wordcount_in, self.wordcount_out],
                    clazz = "com.dataartisans.flink.dataflow.GoogleStreamingPipeline.examples")
        master(code)
Esempio n. 5
0
    def run(self):
        grep_out = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time()))
        grep_in = "%s/text" % get_hdfs_address()

        def code():
            run_jar("~/flink-perf/flink-jobs/target",
                    "flink-jobs-*.jar",
                    args=[grep_in, grep_out, "these", "are", "test", "words"],
                    clazz="com.github.projectflink.grep.GrepJob")

        master(code)
Esempio n. 6
0
    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar("%s/target/" % self.repo.get_absolute_path(),
                    "flink-dataflow-*-SNAPSHOT.jar",
                    args = ["--", # Flink 0.8 way of specifying options to user programs
                            "--input=%s" % wordcount_in,
                            "--output=%s" % self.wordcount_out],
                    clazz = "com.dataartisans.flink.dataflow.examples.DataflowWordCount")
        master(code)
Esempio n. 7
0
    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar("%s/target/" % self.repo.get_absolute_path(),
                    "flink-dataflow-*-SNAPSHOT.jar",
                    args=[wordcount_in, self.wordcount_out],
                    clazz=self.implicit_clazz
                    if self.implicit_combine else self.explicit_clazz)

        master(code)
Esempio n. 8
0
    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar(
                "%s/target/" % self.repo.get_absolute_path(),
                "flink-dataflow-*-SNAPSHOT.jar",
                args=[wordcount_in, self.wordcount_out],
                clazz=
                "com.dataartisans.flink.dataflow.examples.StreamingPipeline")

        master(code)
Esempio n. 9
0
File: als.py Progetto: aljoscha/yoka
    def run(self):
        # TODO get from generator directly
        als_in = "%s/als-benchmark800000-100000-400" % get_hdfs_address()
        als_out = "%s/tmp/als_out_%d" % (get_hdfs_address(), int(time()))

        def code():
            run_jar(
                "%s/flink-jobs/target" % generators.ALS.repo.get_absolute_path(),
                "flink-jobs-*.jar",
                args=["master", 15, 1, 10, 100, "rand", "%s/als-temp/" % get_hdfs_address(), als_in, als_out],
                clazz="com.github.projectflink.als.ALSJoinBlocking",
            )

        master(code)
Esempio n. 10
0
    def setup(self):
        master_slaves(lambda: install("libgfortran3"))
        self.out_path = get_hdfs_address() + "/als-benchmark"

        self.repo.clone()
        self.repo.checkout("master")
        self.repo.maven("clean package")
Esempio n. 11
0
    def setup(self):
        master_slaves(lambda: install("libgfortran3"))
        self.out_path = get_hdfs_address() + "/als-benchmark"

        self.repo.clone()
        self.repo.checkout("master")
        self.repo.maven("clean package")
Esempio n. 12
0
File: als.py Progetto: aljoscha/yoka
 def code():
     run_jar(
         "%s/flink-jobs/target" % generators.ALS.repo.get_absolute_path(),
         "flink-jobs-*.jar",
         args=["master", 15, 1, 10, 100, "rand", "%s/als-temp/" % get_hdfs_address(), als_in, als_out],
         clazz="com.github.projectflink.als.ALSJoinBlocking",
     )
Esempio n. 13
0
    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar(
                "%s/target/" % self.repo.get_absolute_path(),
                "flink-dataflow-*-SNAPSHOT.jar",
                args=[
                    "--",  # Flink 0.8 way of specifying options to user programs
                    "--input=%s" % wordcount_in,
                    "--output=%s" % self.wordcount_out
                ],
                clazz=
                "com.dataartisans.flink.dataflow.examples.DataflowWordCount")

        master(code)
Esempio n. 14
0
File: als.py Progetto: mxm/yoka
    def run(self):
        # TODO get from generator directly
        als_in = "%s/als-benchmark800000-100000-400" % get_hdfs_address()
        als_out = "%s/tmp/als_out_%d" % (get_hdfs_address(), int(time()))

        def code():
            run_jar("%s/flink-jobs/target" % generators.ALS.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args = [
                        "master",
                        15, 1, 10, 100, "rand",
                        "%s/als-temp/" % get_hdfs_address(),
                        als_in,
                        als_out
                    ],
                    clazz = "com.github.projectflink.als.ALSJoinBlocking")
            
        master(code)
Esempio n. 15
0
File: als.py Progetto: mxm/yoka
 def code():
     run_jar("%s/flink-jobs/target" % generators.ALS.repo.get_absolute_path(),
             "flink-jobs-*.jar",
             args = [
                 "master",
                 15, 1, 10, 100, "rand",
                 "%s/als-temp/" % get_hdfs_address(),
                 als_in,
                 als_out
             ],
             clazz = "com.github.projectflink.als.ALSJoinBlocking")
Esempio n. 16
0
    def setup(self):
        self.in_path = self.generator.experiment.out
        self.out_path = get_hdfs_address() + "/avro-benchmark/tpch1-avro"

        def code():
            run_jar("%s/flink-jobs/target" %
                    generators.Avro.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[self.in_path, self.out_path],
                    clazz="com.github.projectflink.avro.Prepare")

        master(code)
Esempio n. 17
0
    def run(self):
        self.out = get_hdfs_address() + "/avro-benchmark/tpch1/" # + lineitems.csv

        def code():
            run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[
                        "-s", self.scale_factor,
                        "-p", self.parallelism,
                        "-o", self.out,
                        ],
                    clazz="com.github.projectflink.avro.GenerateLineitems"
            )

        master(code)
        # update path for benchmark
        self.out += "lineitems.csv"
Esempio n. 18
0
    def run(self):
        self.out = get_hdfs_address() + "/avro-benchmark/tpch1/" # + lineitems.csv

        def code():
            run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[
                        "-s", self.scale_factor,
                        "-p", self.parallelism,
                        "-o", self.out,
                        ],
                    clazz="com.github.projectflink.avro.GenerateLineitems"
            )

        master(code)
        # update path for benchmark
        self.out += "lineitems.csv"
Esempio n. 19
0
    def setup(self):
        self.out_path = get_hdfs_address() + self.path

        self.repo.clone()
        self.repo.checkout(self.git_branch)
        self.repo.maven("clean package")
Esempio n. 20
0
 def setup(self):
     self.wordcount_in = "%s/text2" % get_hdfs_address()
     self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()
Esempio n. 21
0
    def setup(self):
        self.out_path = get_hdfs_address() + self.path

        self.repo.clone()
        self.repo.checkout(self.git_branch)
        self.repo.maven("clean package")
Esempio n. 22
0
 def setup(self):
     self.wordcount_in = "%s/text2" % get_hdfs_address()
     self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()