Python MultiprocessingQueue Examples

Programming Language: Python

Namespace/Package Name: multiprocessing

Examples at hotexamples.com: 5

Python MultiprocessingQueue - 5 examples found. These are the top rated real world Python examples of multiprocessing.MultiprocessingQueue extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MultiprocessingQueue(5)

Frequently Used Methods

MultiprocessingQueue (5)

Example #1

Show file

File: viewers.py Project: ludovico86/concert

 def __init__(self, view_function):
     self._queue = MultiprocessingQueue()
     self._paused = False
     self._terminated = False
     self._coroutine = None
     self._proc = None
     # The udater is implementation-specific and must be provided by
     # the subclass by calling self._set_updater
     self._updater = None
     self.view_function = view_function

Example #2

Show file

File: compare_algorithms.py Project: TheRealFalcon/coalescing-prototype

def main():
    server_queue = MultiprocessingQueue()
    server = start_server_process(server_queue)
    client = start_client()

    testers = [
        algorithm_tester('async coalescing tester',
                         test_asyncio_coalesce,
                         use_tcp_nodelay=True),
        algorithm_tester('async no_delay tester',
                         test_asyncio_no_coalesce,
                         use_tcp_nodelay=True),
        algorithm_tester('async nagle tester',
                         test_asyncio_no_coalesce,
                         use_tcp_nodelay=False),
        algorithm_tester('threaded coalescing tester',
                         test_threaded_coalesce,
                         use_tcp_nodelay=True),
        algorithm_tester('threaded no_delay tester',
                         test_threaded_no_coalesce,
                         use_tcp_nodelay=True),
        algorithm_tester('threaded_nagle_tester',
                         test_threaded_no_coalesce,
                         use_tcp_nodelay=False),
        algorithm_tester('simple coalescing',
                         test_simple_coalescing,
                         use_tcp_nodelay=True),
        algorithm_tester('simple no_delay', test_simple, use_tcp_nodelay=True),
        algorithm_tester('simple nagle', test_simple, use_tcp_nodelay=False),
    ]

    stats = []
    try:
        for tester in testers:
            assert server_queue.empty()
            tester_stats = execute_test(tester, client, server_queue)
            stats.append(tester_stats)
    finally:
        client.close()
        server.kill()
    display_stats(stats)

Example #3

Show file

    def __init__(self,
                 project_id: str,
                 subscription: str,
                 agg_function: AggregationFunction,
                 agg_window_millis: int,
                 auth_file: str = None,
                 spark_opts: dict = {},
                 multiprocessing=False) -> None:
        """
        SparkPubSubStreamConsumer constructor

        :param project_id:          the project id
        :param subscription:        the subscription name
        :param agg_function:        aggregation function to apply
        :param agg_window_millis:   aggregation window in milliseconds
        :param auth_file:           path to credentials json file
        :param spark_opts:          spark options dict
        :param multiprocessing:     use multiprocessing instead of threading
        """
        super().__init__(agg_function, agg_window_millis)
        self.project_id = project_id
        self.subscription = subscription
        self.spark_opts = spark_opts
        self.subscribed = True
        self.multiprocessing = multiprocessing
        if self.multiprocessing:
            self.queue = MultiprocessingQueue()
        else:
            self.queue = Queue()

        os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = auth_file

        def run_spark_job(queue: Queue,
                          _agg_function: AggregationFunction,
                          _agg_window_millis: int,
                          _spark_opts: dict = {},
                          _environment: dict = {}):
            os.environ.update(_environment)
            try:
                try:
                    import findspark
                    findspark.init()
                except Exception as ex:
                    self.logger.warn("Cannot import Spark pyspark with"
                                     " findspark. Message: {}".format(str(ex)))
                    pass

                from pyspark.sql import SparkSession
                from pyspark.streaming import StreamingContext
                from pyspark.sql.functions import expr, window
                from pyspark.serializers import NoOpSerializer
                from pyspark.streaming import DStream
                from pyspark.streaming.kafka import utf8_decoder

                spark_builder = SparkSession \
                    .builder \

                for k in _spark_opts:
                    spark_builder = spark_builder.config(k, _spark_opts[k])

                spark_builder \
                    .appName(str(self)) \
                    .config("spark.jars.packages",
                            "org.apache.spark:spark-streaming-kafka-0-8_2.11:2.2.1,"
                            "org.apache.bahir:spark-streaming-pubsub_2.11:2.2.1") \
                    .config("spark.jars",
                            BASE_PATH + "/lib/streaming-pubsub-serializer_2.11-0.1.jar")

                spark = spark_builder.getOrCreate()
                spark.sparkContext.setLogLevel("WARN")
                ssc = StreamingContext(spark.sparkContext,
                                       (agg_window_millis / 1000))

                agg = expr("value")
                if _agg_function == AggregationFunction.AVG:
                    agg = expr("avg(value)")
                elif _agg_function == AggregationFunction.SUM:
                    agg = expr("sum(value)")
                elif _agg_function == AggregationFunction.COUNT:
                    agg = expr("count(value)")
                elif _agg_function == AggregationFunction.P50:
                    agg = expr("percentile(value, 0.5)")
                elif _agg_function == AggregationFunction.P75:
                    agg = expr("percentile(value, 0.75)")
                elif _agg_function == AggregationFunction.P95:
                    agg = expr("percentile(value, 0.95)")
                elif _agg_function == AggregationFunction.P99:
                    agg = expr("percentile(value, 0.99)")

                deserializer = \
                    ssc._jvm.org.apache.spark.streaming.pubsub.SparkPubsubMessageSerializer()  # noqa: E501
                pubsub_utils = \
                    ssc._jvm.org.apache.spark.streaming.pubsub.PubsubUtils
                credentials = \
                    ssc._jvm.org.apache.spark.streaming.pubsub.SparkGCPCredentials
                storage_level = \
                    ssc._jvm.org.apache.spark.storage.StorageLevel

                _pubsub_stream = pubsub_utils \
                    .createStream(ssc._jssc,
                                  project_id,
                                  subscription,
                                  credentials.Builder().build(),
                                  storage_level.DISK_ONLY())
                _pubsub_stream_des = _pubsub_stream.transform(deserializer)
                ser = NoOpSerializer()
                pubsub_stream = DStream(_pubsub_stream_des, ssc,
                                        ser).map(utf8_decoder)

                def aggregate_rdd(_queue, _agg, df, ts):

                    secs = int(self.agg_window_millis / 1000)
                    win = window("ts", "{}  seconds".format(secs))
                    if df.first():
                        aggs = df \
                            .groupBy("application", win) \
                            .agg(_agg.alias("value")) \
                            .collect()

                        for row in aggs:
                            message = InputMessage(row["application"],
                                                   value=row["value"],
                                                   ts=ts)
                            self.logger.debug("Enqueue: {}".format(
                                message.to_json()))
                            try:
                                _queue.put(message.to_json())
                            except AssertionError as ex:
                                self.logger.warn(str(ex))
                    else:
                        self.logger.warn("Empty RDD")

                # Create kafka stream
                pubsub_stream \
                    .foreachRDD(lambda ts, rdd:
                                aggregate_rdd(queue, agg,
                                              spark.read.json(rdd), ts))

                # Run
                ssc.start()
                if "timeout" in _spark_opts:
                    ssc.awaitTerminationOrTimeout(_spark_opts["timeout"])
                    ssc.stop()
                    spark.stop()
                else:
                    ssc.awaitTermination()
                    ssc.stop()
                    spark.stop()

            except Exception as e:
                raise e

        # Run in multiprocessing, each aggregation runs a spark driver.
        runner = Concurrency.run_process \
            if self.multiprocessing \
            else Concurrency.run_thread

        Concurrency.get_lock("spark").acquire()
        pid = runner(target=run_spark_job,
                     args=(self.queue, self.agg_function,
                           self.agg_window_millis, self.spark_opts,
                           os.environ.copy()),
                     name="PySpark {}".format(str(self)))
        Concurrency.schedule_release("spark", 30)
        self.pid = pid

Example #4

Show file

File: kafka.py Project: bluekiri/anomalydetection

    def __init__(self,
                 broker_servers: str,
                 input_topic: str,
                 group_id: str,
                 agg_function: AggregationFunction,
                 agg_window_millis: int,
                 spark_opts: dict = {},
                 multiprocessing=True) -> None:
        """
        SparkKafkaStreamConsumer constructor

        :param broker_servers:      broker servers
        :param input_topic:         input topic
        :param group_id:            consumer group id
        :param agg_function:        aggregation function to apply
        :param agg_window_millis:   aggregation window in milliseconds
        :param spark_opts:          spark options dict
        :param multiprocessing:     use multiprocessing instead of threading
        """
        super().__init__(agg_function, agg_window_millis)
        self.broker_servers = broker_servers.split(",")
        self.input_topic = input_topic
        self.group_id = group_id
        self.spark_opts = spark_opts
        self.subscribed = True
        self.multiprocessing = multiprocessing
        if self.multiprocessing:
            self.queue = MultiprocessingQueue()
        else:
            self.queue = Queue()

        def run_spark_job(queue: Queue,
                          _agg_function: AggregationFunction,
                          _agg_window_millis: int,
                          _spark_opts: dict = {},
                          _environment: dict = {}):
            os.environ.update(_environment)
            try:
                try:
                    import findspark
                    findspark.init()
                except Exception as ex:
                    self.logger.warn("Cannot import Spark pyspark with"
                                     " findspark. Message: {}".format(str(ex)))
                    pass

                from pyspark.sql import SparkSession
                from pyspark.streaming import StreamingContext
                from pyspark.streaming.kafka import KafkaUtils
                from pyspark.sql.functions import expr, window

                spark_builder = SparkSession \
                    .builder \

                for k in _spark_opts:
                    spark_builder = spark_builder.config(k, _spark_opts[k])

                spark_builder = spark_builder \
                    .appName(str(self)) \
                    .config("spark.jars.packages",
                            "org.apache.spark:spark-streaming-kafka-0-8_2.11:2.2.1,"
                            "org.apache.bahir:spark-streaming-pubsub_2.11:2.2.1") \
                    .config("spark.jars",
                            BASE_PATH + "/lib/streaming-pubsub-serializer_2.11-0.1.jar")

                spark = spark_builder.getOrCreate()
                spark.sparkContext.setLogLevel("WARN")
                ssc = StreamingContext(spark.sparkContext,
                                       (agg_window_millis / 1000))

                agg = expr("value")
                if _agg_function == AggregationFunction.AVG:
                    agg = expr("avg(value)")
                elif _agg_function == AggregationFunction.SUM:
                    agg = expr("sum(value)")
                elif _agg_function == AggregationFunction.COUNT:
                    agg = expr("count(value)")
                elif _agg_function == AggregationFunction.P50:
                    agg = expr("percentile(value, 0.5)")
                elif _agg_function == AggregationFunction.P75:
                    agg = expr("percentile(value, 0.75)")
                elif _agg_function == AggregationFunction.P95:
                    agg = expr("percentile(value, 0.95)")
                elif _agg_function == AggregationFunction.P99:
                    agg = expr("percentile(value, 0.99)")

                kafka_stream = KafkaUtils.createDirectStream(
                    ssc, [self.input_topic],
                    {"metadata.broker.list": ",".join(self.broker_servers)})

                def aggregate_rdd(_queue, _agg, df, ts):

                    secs = int(self.agg_window_millis / 1000)
                    win = window("ts", "{}  seconds".format(secs))
                    if df.first():
                        aggs = df \
                            .groupBy("application", win) \
                            .agg(_agg.alias("value")) \
                            .collect()

                        for row in aggs:
                            message = InputMessage(row["application"],
                                                   value=row["value"],
                                                   ts=ts)
                            self.logger.debug("Enqueue: {}".format(
                                message.to_json()))
                            try:
                                _queue.put(message.to_json())
                            except AssertionError as ex:
                                self.logger.warn(str(ex))
                    else:
                        warnings.warn("Empty RDD")

                # Create kafka stream
                kafka_stream \
                    .map(lambda x: x[1]) \
                    .foreachRDD(lambda ts, rdd:
                                aggregate_rdd(queue, agg,
                                              spark.read.json(rdd), ts))

                # Run
                ssc.start()
                if "timeout" in _spark_opts:
                    ssc.awaitTerminationOrTimeout(_spark_opts["timeout"])
                    ssc.stop()
                    spark.stop()
                else:
                    ssc.awaitTermination()
                    ssc.stop()
                    spark.stop()

            except Exception as e:
                raise e

        # Run in multiprocessing, each aggregation runs a spark driver.
        runner = Concurrency.run_process \
            if self.multiprocessing \
            else Concurrency.run_thread

        Concurrency.get_lock("spark").acquire()
        pid = runner(target=run_spark_job,
                     args=(self.queue, self.agg_function,
                           self.agg_window_millis, self.spark_opts,
                           os.environ.copy()),
                     name="PySpark {}".format(str(self)))
        Concurrency.schedule_release("spark", 30)
        self.pid = pid

Example #5

Show file

File: detect.py Project: vigji/cellfinder

def main(args):
    n_processes = get_num_processes(min_free_cpu_cores=args.n_free_cpus)
    start_time = datetime.now()

    (
        soma_diameter,
        max_cluster_size,
        ball_xy_size,
        ball_z_size,
    ) = calculate_parameters_in_pixels(
        args.x_pixel_um,
        args.y_pixel_um,
        args.z_pixel_um,
        args.soma_diameter,
        args.max_cluster_size,
        args.ball_xy_size,
        args.ball_z_size,
    )

    # file extension only used if a directory is passed
    img_paths = get_sorted_file_paths(args.signal_planes_paths[0],
                                      file_extension="tif")

    if args.end_plane == -1:
        args.end_plane = len(img_paths)
    planes_paths_range = img_paths[args.start_plane:args.end_plane]

    workers_queue = MultiprocessingQueue(maxsize=n_processes)
    # WARNING: needs to be AT LEAST ball_z_size
    mp_3d_filter_queue = MultiprocessingQueue(maxsize=ball_z_size)
    for plane_id in range(n_processes):
        # place holder for the queue to have the right size on first run
        workers_queue.put(None)

    clipping_val, threshold_value, ball_filter, cell_detector = setup(
        img_paths[0],
        soma_diameter,
        ball_xy_size,
        ball_z_size,
        ball_overlap_fraction=args.ball_overlap_fraction,
        z_offset=args.start_plane,
    )

    progress_bar = tqdm(total=len(planes_paths_range),
                        desc="Processing planes")
    mp_3d_filter = Mp3DFilter(
        mp_3d_filter_queue,
        ball_filter,
        cell_detector,
        soma_diameter,
        args.output_dir,
        soma_size_spread_factor=args.soma_spread_factor,
        progress_bar=progress_bar,
        save_planes=args.save_planes,
        plane_directory=args.plane_directory,
        start_plane=args.start_plane,
        max_cluster_size=max_cluster_size,
        outlier_keep=args.outlier_keep,
        artifact_keep=args.artifact_keep,
        save_csv=args.save_csv,
    )

    # start 3D analysis (waits for planes in queue)
    bf_process = multiprocessing.Process(target=mp_3d_filter.process, args=())
    bf_process.start()  # needs to be started before the loop

    mp_tile_processor = MpTileProcessor(workers_queue, mp_3d_filter_queue)
    prev_lock = Lock()
    processes = []

    # start 2D tile filter (output goes into queue for 3D analysis)
    for plane_id, path in enumerate(planes_paths_range):
        workers_queue.get()
        lock = Lock()
        lock.acquire()
        p = multiprocessing.Process(
            target=mp_tile_processor.process,
            args=(
                plane_id,
                path,
                prev_lock,
                lock,
                clipping_val,
                threshold_value,
                soma_diameter,
                args.log_sigma_size,
                args.n_sds_above_mean_thresh,
            ),
        )
        prev_lock = lock
        processes.append(p)
        p.start()

    processes[-1].join()
    mp_3d_filter_queue.put((None, None, None))  # Signal the end
    bf_process.join()

    logging.info(
        "Detection complete - all planes done in : {}".format(datetime.now() -
                                                              start_time))