def test_word_count(): ray.init() env = Environment(config=Conf(channel_type=Config.NATIVE_CHANNEL)) env.read_text_file(__file__) \ .set_parallelism(1) \ .filter(lambda x: "word" in x) \ .inspect(lambda x: print("result", x)) env_handle = env.execute() ray.get(env_handle) # Stay alive until execution finishes env.wait_finish() ray.shutdown()
return True return False if __name__ == "__main__": args = parser.parse_args() ray.init(local_mode=False) # A Ray streaming environment with the default configuration env = Environment(config=Conf(channel_type=Config.NATIVE_CHANNEL)) # Stream represents the ouput of the filter and # can be forked into other dataflows stream = env.read_text_file(args.input_file) \ .shuffle() \ .flat_map(splitter) \ .set_parallelism(2) \ .filter(filter_fn) \ .set_parallelism(2) \ .inspect(lambda x: print("result", x)) # Prints the contents of the # stream to stdout start = time.time() env_handle = env.execute() ray.get(env_handle) # Stay alive until execution finishes env.wait_finish() end = time.time() logger.info("Elapsed time: {} secs".format(end - start)) logger.debug("Output stream id: {}".format(stream.id))
if __name__ == "__main__": # Get program parameters args = parser.parse_args() titles_file = str(args.titles_file) ray.init() # A Ray streaming environment with the default configuration env = Environment() env.set_parallelism(2) # Each operator will be executed by two actors # The following dataflow is a simple streaming wordcount # with a rolling sum operator. # It reads articles from wikipedia, splits them in words, # shuffles words, and counts the occurences of each word. stream = env.source(Wikipedia(titles_file)) \ .round_robin() \ .flat_map(splitter) \ .key_by(key_selector) \ .sum(attribute_selector) \ .inspect(print) # Prints the contents of the # stream to stdout start = time.time() env_handle = env.execute() # Deploys and executes the dataflow ray.get(env_handle) # Stay alive until execution finishes env.wait_finish() end = time.time() logger.info("Elapsed time: {} secs".format(end - start)) logger.debug("Output stream id: {}".format(stream.id))