Ejemplo n.º 1
0
 def get_stream(self, stream):
     '''
     流逻辑,必须实现
     '''
     return stream.flat_map(self.flat_map) \
         .key_by(KeyBy()) \
         .time_window(milliseconds(50)) \
         .reduce(Reduce())
Ejemplo n.º 2
0
def main(factory):
    env = factory.get_execution_environment()
    env.create_python_source(Generator(num_iters=1000)) \
        .flat_map(Tokenizer()) \
        .key_by(Selector()) \
        .time_window(milliseconds(50)) \
        .reduce(Sum()) \
        .output()
    env.execute()
Ejemplo n.º 3
0
    def run(self, flink):
        env = flink.get_execution_environment()
        env.from_collection(SomeIterator(constants.NUM_ITERATIONS_IN_TEST)) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .output()

        env.execute()
Ejemplo n.º 4
0
    def run(self, flink):
        env = flink.get_execution_environment()
        env.create_python_source(Generator(num_iters=constants.NUM_ITERATIONS_IN_TEST)) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(50)) \
            .reduce(Sum()) \
            .output()

        env.execute()
    def run(self, flink):
        env = flink.get_execution_environment()
        env.generate_sequence(1, constants.NUM_ITERATIONS_IN_TEST) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .output()

        env.execute()
Ejemplo n.º 6
0
    def run(self):
        env = self._get_execution_environment()
        env.add_source(Generator(num_iters=constants.NUM_ITERATIONS_IN_TEST)) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(50)) \
            .reduce(Sum()) \
            .print()

        env.execute(True)
Ejemplo n.º 7
0
    def run(self, flink):
        env = flink.get_execution_environment()

        split_window = env.generate_sequence(1, constants.NUM_ITERATIONS_IN_TEST).split(StreamSelector())

        split_window.select('lower_stream') \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .output()

        split_window.select('upper_stream') \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .output()

        env.execute()
Ejemplo n.º 8
0
    def run(self):
        env = self._get_execution_environment()
        env.from_collection(SomeIterator(constants.NUM_ITERATIONS_IN_TEST)) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .print()

        result = env.execute("MyJob", True)
        print("Job completed, job_id={}".format(result.jobID))
Ejemplo n.º 9
0
    def run(self, flink):
        elements = ["aa" if iii % 2 == 0 else "bbb" for iii in range(constants.NUM_ELEMENTS_IN_TEST)]

        env = flink.get_execution_environment()
        env.from_collection(elements) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .output()

        env.execute()
Ejemplo n.º 10
0
    def run(self, flink):
        env = flink.get_execution_environment()
        env.from_collection([3] * 5) \
            .map(DummyTupple()) \
            .map(MinusOne()) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(5)) \
            .reduce(Sum()) \
            .output()

        env.execute()
    def run(self, flink):
        elements = [("Alice", 111) if iii % 2 == 0 else ("Bob", 2222) for iii in range(constants.NUM_ELEMENTS_IN_TEST)]

        env = flink.get_execution_environment()
        env.from_elements(*elements) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .output()

        env.execute()
Ejemplo n.º 12
0
    def run(self, flink):
        elements = ["aa" if iii % 2 == 0 else "bbb" for iii in range(constants.NUM_ITERATIONS_IN_TEST)]

        env = flink.get_execution_environment()
        env.from_collection(elements) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .write_as_text("/tmp/flink_write_as_text", WriteMode.OVERWRITE)

        env.execute()
Ejemplo n.º 13
0
    def run(self):
        env = PythonStreamExecutionEnvironment.get_execution_environment()
        env.add_source(Generator(num_iters=100)) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(30)) \
            .reduce(Sum()) \
            .print()

        print(
            "Execution mode: {}".format("LOCAL" if self._local else "REMOTE"))
        env.execute(self._local)
Ejemplo n.º 14
0
    def run(self):
        elements = ["aa" if iii % 2 == 0 else "bbb" for iii in range(constants.NUM_ELEMENTS_IN_TEST)]

        env = self._get_execution_environment()
        env.from_collection(elements) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .print()

        result = env.execute("MyJob", True)
        print("Job completed, job_id={}".format(result.jobID))
Ejemplo n.º 15
0
    def run(self):
        env = self._get_execution_environment()

        split_window = env.generate_sequence(
            1, constants.NUM_ITERATIONS_IN_TEST).split(StreamSelector())

        split_window.select('lower_stream') \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .print()

        split_window.select('upper_stream') \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .print()

        result = env.execute("MyJob", True)
        print("Job completed, job_id={}".format(result.jobID))
Ejemplo n.º 16
0
    def run(self):
        env = self._get_execution_environment()
        env.from_collection([3] * 5) \
            .map(DummyTupple()) \
            .map(MinusOne()) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(5)) \
            .reduce(Sum()) \
            .print()

        result = env.execute("MyJob", True)
        print("Job completed, job_id={}".format(str(result.jobID)))
Ejemplo n.º 17
0
    def run(self, flink):
        env = flink.get_execution_environment()
        seq1 = env.create_python_source(Generator(msg='Hello', num_iters=constants.NUM_ITERATIONS_IN_TEST))
        seq2 = env.create_python_source(Generator(msg='World', num_iters=constants.NUM_ITERATIONS_IN_TEST))
        seq3 = env.create_python_source(Generator(msg='Happy', num_iters=constants.NUM_ITERATIONS_IN_TEST))

        seq1.union(seq2, seq3) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .output()

        env.execute()
Ejemplo n.º 18
0
    def run(self, flink):
        elements = [
            "aa" if iii % 2 == 0 else "bbb"
            for iii in range(constants.NUM_ELEMENTS_IN_TEST)
        ]

        env = flink.get_execution_environment()
        env.from_collection(elements) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .output()

        env.execute()
Ejemplo n.º 19
0
    def run(self, flink):
        tmp_f = generate_tmp_text_file(constants.NUM_ELEMENTS_IN_TEST)
        try:
            env = flink.get_execution_environment()
            env.read_text_file(tmp_f.name) \
                .flat_map(Tokenizer()) \
                .key_by(Selector()) \
                .time_window(milliseconds(100)) \
                .reduce(Sum()) \
                .output()

            env.execute()
        finally:
            tmp_f.close()
            os.unlink(tmp_f.name)
    def run(self, flink):
        elements = [
            "aa" if iii % 2 == 0 else "bbb"
            for iii in range(constants.NUM_ITERATIONS_IN_TEST)
        ]

        env = flink.get_execution_environment()
        env.from_collection(elements) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .write_as_text("/tmp/flink_write_as_text", WriteMode.OVERWRITE)

        env.execute()
Ejemplo n.º 21
0
    def run(self):
        elements = [
            "aa" if iii % 2 == 0 else "bbb"
            for iii in range(constants.NUM_ITERATIONS_IN_TEST)
        ]

        env = self._get_execution_environment()
        env.from_collection(elements) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .write_as_text("/tmp/flink_write_as_text", WriteMode.OVERWRITE)

        result = env.execute("MyJob", True)
        print("Job completed, job_id={}".format(result.jobID))
Ejemplo n.º 22
0
    def run(self, flink):
        env = flink.get_execution_environment()
        seq1 = env.create_python_source(
            Generator(msg='Hello', num_iters=constants.NUM_ITERATIONS_IN_TEST))
        seq2 = env.create_python_source(
            Generator(msg='World', num_iters=constants.NUM_ITERATIONS_IN_TEST))
        seq3 = env.create_python_source(
            Generator(msg='Happy', num_iters=constants.NUM_ITERATIONS_IN_TEST))

        seq1.union(seq2, seq3) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .output()

        env.execute()
Ejemplo n.º 23
0
    def run(self):
        env = self._get_execution_environment()
        seq1 = env.add_source(
            Generator(msg='Hello', num_iters=constants.NUM_ITERATIONS_IN_TEST))
        seq2 = env.add_source(
            Generator(msg='World', num_iters=constants.NUM_ITERATIONS_IN_TEST))
        seq3 = env.add_source(
            Generator(msg='Happy', num_iters=constants.NUM_ITERATIONS_IN_TEST))

        seq1.union(seq2, seq3) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(10)) \
            .reduce(Sum()) \
            .print()

        result = env.execute("My python union stream test", True)
        print("Job completed, job_id={}".format(result.jobID))
Ejemplo n.º 24
0
    def run(self):
        parameterTool = ParameterTool.fromArgs(sys.argv[1:])
        props = parameterTool.getProperties()
        props.setProperty("bootstrap.servers", self._bootstrap_server)

        consumer = PythonFlinkKafkaConsumer09("kafka09-test",
                                              StringDeserializationSchema(),
                                              props)

        env = self._get_execution_environment()
        env.add_source(consumer) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(100)) \
            .reduce(Sum()) \
            .print()

        result = env.execute("Python consumer kafka09 test", True)
        print("Kafka09 consumer job completed, job_id={}".format(result.jobID))
    def run(self, flink):
        port = utils.gen_free_port()
        SocketStringReader('', port, constants.NUM_ITERATIONS_IN_TEST).start()
        time.sleep(0.5)

        elements = [
            "aa" if iii % 2 == 0 else "bbb"
            for iii in range(constants.NUM_ITERATIONS_IN_TEST)
        ]

        env = flink.get_execution_environment()
        env.from_collection(elements) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(50)) \
            .reduce(Sum()) \
            .write_to_socket('localhost', port, ToStringSchema())

        env.execute()
Ejemplo n.º 26
0
    def run(self):
        port = utils.gen_free_port()
        SocketStringReader('', port, constants.NUM_ITERATIONS_IN_TEST).start()
        time.sleep(0.5)

        elements = [
            "aa" if iii % 2 == 0 else "bbb"
            for iii in range(constants.NUM_ITERATIONS_IN_TEST)
        ]

        env = self._get_execution_environment()
        env.from_collection(elements) \
            .flat_map(Tokenizer()) \
            .key_by(Selector()) \
            .time_window(milliseconds(50)) \
            .reduce(Sum()) \
            .write_to_socket('localhost', port, ToStringSchema())

        result = env.execute("MyJob", True)
        print("Job completed, job_id={}".format(result.jobID))