Python master Examples, cluster.utils.master Python Examples

Example #1

0

Show file

File: wordcount.py Project: aljoscha/yoka

    def run(self):

        def code():
            run_jar("%s/flink-staging/flink-streaming/flink-streaming-examples/target/" % get_flink_path(),
                    "flink-streaming-*-WordCount.jar",
                    args = [self.wordcount_in, self.wordcount_out],
                    clazz = "org.apache.flink.streaming.examples.wordcount.WordCount")
        master(code)

Example #2

0

Show file

File: wordcount.py Project: aljoscha/yoka

    def run(self):

        def code():
            run_jar("%s/examples/" % get_flink_dist_path(),
                    "flink-java-*WordCount.jar",
                    args = [self.wordcount_in, self.wordcount_out],
                    clazz = "org.apache.flink.examples.java.wordcount.WordCount")
        master(code)

Example #3

0

Show file

File: wordcount.py Project: mxm/yoka

    def run(self):

        def code():
            run_jar("%s/examples/streaming/" % get_flink_dist_path(),
                    "WordCount.jar",
                    args = ["--input", self.wordcount_in, "--output", self.wordcount_out],
                    clazz = "org.apache.flink.streaming.examples.wordcount.WordCount")
        master(code)

Example #4

0

Show file

File: wordcount.py Project: mxm/yoka

    def run(self):

        def code():
            run_jar("%s/examples/streaming/" % get_flink_dist_path(),
                    "WindowWordCount.jar",
                    args = [self.wordcount_in, self.wordcount_out, 10000],
                    clazz = "org.apache.flink.streaming.examples.windowing.WindowWordCount")
        master(code)

Example #5

0

Show file

File: grep.py Project: aljoscha/yoka

 def run(self):
     grep_out = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time()))
     grep_in = "%s/text" % get_hdfs_address()
     def code():
         run_jar("~/flink-perf/flink-jobs/target",
                 "flink-jobs-*.jar",
                 args = [grep_in, grep_out, "these", "are", "test", "words"],
                 clazz = "com.github.projectflink.grep.GrepJob")
     master(code)

Example #6

0

Show file

File: wordcount_new.py Project: ktzoumas/flink-perf-new

 def run(self):
     wordcount_in = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time()))
     wordcount_out = "%s/text" % get_hdfs_address()
     def code():
         run_jar("%s/examples/" % get_flink_dist_path(),
                 "flink-java-*WordCount.jar",
                 args = [wordcount_out, wordcount_in],
                 clazz = "org.apache.flink.examples.java.wordcount.WordCount")
     master(code)

Example #7

0

Show file

    def run(self):
        def code():
            run_jar("%s/flink-jobs/target" %
                    generators.Avro.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[self.out_path, self.in_path],
                    clazz="com.github.projectflink.avro.CompareJob")

        master(code)

Example #8

0

Show file

File: generators.py Project: mxm/yoka

    def run(self):

        def code():
            run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[self.dop, self.out_path, self.size_gb],
                    clazz="com.github.projectflink.generators.Text")

        master(code)

Example #9

0

Show file

File: generators.py Project: aljoscha/yoka

    def run(self):

        def code():
            run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[self.dop, self.out_path, self.size_gb],
                    clazz="com.github.projectflink.generators.Text")

        master(code)

Example #10

0

Show file

File: wordcount.py Project: mxm/yoka

    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar("%s/target/" % self.repo.get_absolute_path(),
                    "flink-dataflow-*-SNAPSHOT.jar",
                    args = [wordcount_in, self.wordcount_out],
                    clazz = "com.dataartisans.flink.dataflow.GoogleStreamingPipeline.examples")
        master(code)

Example #11

0

Show file

File: wordcount.py Project: mxm/yoka

    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar("%s/target/" % self.repo.get_absolute_path(),
                    "flink-dataflow-*-SNAPSHOT.jar",
                    args = [wordcount_in, self.wordcount_out],
                    clazz = self.implicit_clazz if self.implicit_combine else self.explicit_clazz)
        master(code)

Example #12

0

Show file

    def run(self):
        def code():
            run_jar(
                "%s/examples/streaming/" % get_flink_dist_path(),
                "WindowWordCount.jar",
                args=[self.wordcount_in, self.wordcount_out, 10000],
                clazz=
                "org.apache.flink.streaming.examples.windowing.WindowWordCount"
            )

        master(code)

Example #13

0

Show file

    def run(self):
        grep_out = "%s/tmp/grep_out_%d" % (get_hdfs_address(), int(time()))
        grep_in = "%s/text" % get_hdfs_address()

        def code():
            run_jar("~/flink-perf/flink-jobs/target",
                    "flink-jobs-*.jar",
                    args=[grep_in, grep_out, "these", "are", "test", "words"],
                    clazz="com.github.projectflink.grep.GrepJob")

        master(code)

Example #14

0

Show file

File: wordcount.py Project: ktzoumas/flink-perf-new

 def run(self):
     def code():
         run_jar(path = "experiments/wordcount_files/",
                           jar_name = "flink-java-examples-0.8-incubating-SNAPSHOT-WordCount.jar",
                           args = [
                             "hdfs://%s:50040/generated-wc.txt" % env.master,
                             "hdfs://%s:50040/tmp/wc-out/" % env.master
                           ],
                           upload=True
         )
     master(code)

Example #15

0

Show file

    def run(self):
        def code():
            run_jar("%s/examples/batch/" % get_flink_dist_path(),
                    "WordCount.jar",
                    args=[
                        "--input", self.wordcount_in, "--output",
                        self.wordcount_out
                    ],
                    clazz="org.apache.flink.examples.java.wordcount.WordCount")

        master(code)

Example #16

0

Show file

    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar("%s/target/" % self.repo.get_absolute_path(),
                    "flink-dataflow-*-SNAPSHOT.jar",
                    args=[wordcount_in, self.wordcount_out],
                    clazz=self.implicit_clazz
                    if self.implicit_combine else self.explicit_clazz)

        master(code)

Example #17

0

Show file

    def setup(self):
        self.in_path = self.generator.experiment.out
        self.out_path = get_hdfs_address() + "/avro-benchmark/tpch1-avro"

        def code():
            run_jar("%s/flink-jobs/target" %
                    generators.Avro.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[self.in_path, self.out_path],
                    clazz="com.github.projectflink.avro.Prepare")

        master(code)

Example #18

0

Show file

File: wordcount.py Project: mxm/yoka

    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar("%s/target/" % self.repo.get_absolute_path(),
                    "flink-dataflow-*-SNAPSHOT.jar",
                    args = ["--", # Flink 0.8 way of specifying options to user programs
                            "--input=%s" % wordcount_in,
                            "--output=%s" % self.wordcount_out],
                    clazz = "com.dataartisans.flink.dataflow.examples.DataflowWordCount")
        master(code)

Example #19

0

Show file

    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar(
                "%s/target/" % self.repo.get_absolute_path(),
                "flink-dataflow-*-SNAPSHOT.jar",
                args=[wordcount_in, self.wordcount_out],
                clazz=
                "com.dataartisans.flink.dataflow.examples.StreamingPipeline")

        master(code)

Example #20

0

Show file

    def run(self):
        def code():
            run_jar(
                path="experiments/wordcount_files/",
                jar_name=
                "flink-java-examples-0.8-incubating-SNAPSHOT-WordCount.jar",
                args=[
                    "hdfs://%s:50040/generated-wc.txt" % env.master,
                    "hdfs://%s:50040/tmp/wc-out/" % env.master
                ],
                upload=True)

        master(code)

Example #21

0

Show file

File: generators.py Project: mxm/yoka

    def run(self):

        def code():
            run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[
                        self.num_rows, self.num_cols,
                        self.mean_entry, self.variance_entry,
                        self.mean_num_row_entries, self.variance_num_row_entries,
                        self.out_path
                    ],
                    clazz="com.github.projectflink.als.ALSDataGeneration")

        master(code)

Example #22

0

Show file

File: als.py Project: aljoscha/yoka

    def run(self):
        # TODO get from generator directly
        als_in = "%s/als-benchmark800000-100000-400" % get_hdfs_address()
        als_out = "%s/tmp/als_out_%d" % (get_hdfs_address(), int(time()))

        def code():
            run_jar(
                "%s/flink-jobs/target" % generators.ALS.repo.get_absolute_path(),
                "flink-jobs-*.jar",
                args=["master", 15, 1, 10, 100, "rand", "%s/als-temp/" % get_hdfs_address(), als_in, als_out],
                clazz="com.github.projectflink.als.ALSJoinBlocking",
            )

        master(code)

Example #23

0

Show file

File: generators.py Project: aljoscha/yoka

    def run(self):

        def code():
            run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[
                        self.num_rows, self.num_cols,
                        self.mean_entry, self.variance_entry,
                        self.mean_num_row_entries, self.variance_num_row_entries,
                        self.out_path
                    ],
                    clazz="com.github.projectflink.als.ALSDataGeneration")

        master(code)

Example #24

0

Show file

    def run(self):
        wordcount_in = "%s/text" % get_hdfs_address()
        self.wordcount_out = "%s/tmp/wc_out" % get_hdfs_address()

        def code():
            run_jar(
                "%s/target/" % self.repo.get_absolute_path(),
                "flink-dataflow-*-SNAPSHOT.jar",
                args=[
                    "--",  # Flink 0.8 way of specifying options to user programs
                    "--input=%s" % wordcount_in,
                    "--output=%s" % self.wordcount_out
                ],
                clazz=
                "com.dataartisans.flink.dataflow.examples.DataflowWordCount")

        master(code)

Example #25

0

Show file

File: generators.py Project: mxm/yoka

    def run(self):
        self.out = get_hdfs_address() + "/avro-benchmark/tpch1/" # + lineitems.csv

        def code():
            run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[
                        "-s", self.scale_factor,
                        "-p", self.parallelism,
                        "-o", self.out,
                        ],
                    clazz="com.github.projectflink.avro.GenerateLineitems"
            )

        master(code)
        # update path for benchmark
        self.out += "lineitems.csv"

Example #26

0

Show file

File: generators.py Project: aljoscha/yoka

    def run(self):
        self.out = get_hdfs_address() + "/avro-benchmark/tpch1/" # + lineitems.csv

        def code():
            run_jar("%s/flink-jobs/target" % self.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args=[
                        "-s", self.scale_factor,
                        "-p", self.parallelism,
                        "-o", self.out,
                        ],
                    clazz="com.github.projectflink.avro.GenerateLineitems"
            )

        master(code)
        # update path for benchmark
        self.out += "lineitems.csv"

Example #27

0

Show file

File: als.py Project: mxm/yoka

    def run(self):
        # TODO get from generator directly
        als_in = "%s/als-benchmark800000-100000-400" % get_hdfs_address()
        als_out = "%s/tmp/als_out_%d" % (get_hdfs_address(), int(time()))

        def code():
            run_jar("%s/flink-jobs/target" % generators.ALS.repo.get_absolute_path(),
                    "flink-jobs-*.jar",
                    args = [
                        "master",
                        15, 1, 10, 100, "rand",
                        "%s/als-temp/" % get_hdfs_address(),
                        als_in,
                        als_out
                    ],
                    clazz = "com.github.projectflink.als.ALSJoinBlocking")
            
        master(code)

Example #28

0

Show file

File: utils.py Project: aljoscha/yoka

 def maven(self, target):
     # avoid building multiple times
     if target != self.built_using:
         master("cd %s && mvn %s > /dev/null" % (self.path, target))
         self.built_using = target

Example #29

0

Show file

 def shutdown(self):
     master("rm -rf /tmp/wc-data/generated-wc.txt")
     master(lambda: delete_from_hdfs("generated-wc.txt"))
     master(lambda: delete_from_hdfs("/tmp/wc-out"))

Example #30

0

Show file

File: utils.py Project: aljoscha/yoka

 def clone(self):
     if not self.cloned:
         master("rm -rf %s && git clone %s %s" % (self.path, self.url, self.path))
         self.cloned = True

Example #31

0

Show file

File: utils.py Project: aljoscha/yoka

 def checkout(self, commit):
     if commit != self.commit:
         master("cd %s && git checkout %s" % (self.path, commit))
         self.commit = commit
         self.built_using = None

Example #32

0

Show file

 def setup(self):
     # generate wc data
     master(lambda: install("wget"))
     master(lambda: install("ruby"))
     master(lambda: install("bzip2"))
     master(lambda: install("aspell"))
     generate_wc_data = render_template(
         "experiments/wordcount_files/gen_wc_data.sh.mustache", self.params)
     master(lambda: exec_bash(generate_wc_data))
     master(lambda: copy_to_hdfs("/tmp/wc-data/generated-wc.txt",
                                 "generated-wc.txt"))

Example #33

0

Show file

File: wordcount.py Project: ktzoumas/flink-perf-new

 def shutdown(self):
     master("rm -rf /tmp/wc-data/generated-wc.txt")
     master(lambda: delete_from_hdfs("generated-wc.txt"))
     master(lambda: delete_from_hdfs("/tmp/wc-out"))

Example #34

0

Show file

 def maven(self, target):
     # avoid building multiple times
     if target != self.built_using:
         master("cd %s && mvn %s > /dev/null" % (self.path, target))
         self.built_using = target

Example #35

0

Show file

File: wordcount.py Project: ktzoumas/flink-perf-new

 def setup(self):
     # generate wc data
     master(lambda: install("wget"))
     master(lambda: install("ruby"))
     master(lambda: install("bzip2"))
     master(lambda: install("aspell"))
     generate_wc_data = render_template(
         "experiments/wordcount_files/gen_wc_data.sh.mustache",
         self.params
     )
     master(lambda: exec_bash(generate_wc_data))
     master(lambda: copy_to_hdfs("/tmp/wc-data/generated-wc.txt",
                                 "generated-wc.txt"))

Example #36

0

Show file

 def checkout(self, commit):
     if commit != self.commit:
         master("cd %s && git checkout %s" % (self.path, commit))
         self.commit = commit
         self.built_using = None

Example #37

0

Show file

 def clone(self):
     if not self.cloned:
         master("rm -rf %s && git clone %s %s" %
                (self.path, self.url, self.path))
         self.cloned = True

Example #38

0

Show file

 def shutdown(self):
     # delete out_path to be able to restart benchmark
     master(lambda: delete_from_hdfs(self.out_path))

Example #39

0

Show file

 def shutdown(self):
     master(lambda: delete_from_hdfs(self.wordcount_out))

Example #40

0

Show file

File: wordcount.py Project: mxm/yoka

 def shutdown(self):
     master(lambda: delete_from_hdfs(self.wordcount_out))