class Reference: def __init__(self, id): self.id = id self.chunks = Queue(0) def close(self): self.chunks.close() def append(self, bytes): self.chunks.put(bytes) def get_chunk(self): return self.chunks.get() def get_complete(self): data = "" for chunk in self: data += chunk return data def next(self): try: return self.get_chunk() except Closed, e: raise StopIteration
class Peer: def __init__(self, conn, delegate, channel_factory=None, channel_options=None): self.conn = conn self.delegate = delegate self.outgoing = Queue(0) self.work = Queue(0) self.channels = {} self.lock = threading.Lock() if channel_factory: self.channel_factory = channel_factory else: self.channel_factory = Channel if channel_options is None: channel_options = {} self.channel_options = channel_options def channel(self, id): self.lock.acquire() try: try: ch = self.channels[id] except KeyError: ch = self.channel_factory(id, self.outgoing, self.conn.spec, self.channel_options) self.channels[id] = ch finally: self.lock.release() return ch def start(self): self.writer_thread = threading.Thread(target=self.writer) self.writer_thread.daemon = True self.writer_thread.start() self.reader_thread = threading.Thread(target=self.reader) self.reader_thread.daemon = True self.reader_thread.start() self.worker_thread = threading.Thread(target=self.worker) self.worker_thread.daemon = True self.worker_thread.start() def fatal(self, message=None): """Call when an unexpected exception occurs that will kill a thread.""" self.closed("Fatal error: %s\n%s" % (message or "", traceback.format_exc())) def reader(self): try: while True: try: frame = self.conn.read() except EOF, e: self.work.close() break ch = self.channel(frame.channel) ch.receive(frame, self.work) except VersionError, e: self.closed(e) except:
def read_alerts(filepath: str, q: queue.Queue, read_max:int=-1) -> None: iterator = iterate_tarfile(filepath) n_read = 0 for alert in iterator: q.put(alert) logging.debug(f"putting into q: {alert.candidate_id}") n_read += 1 if read_max > 0 and n_read >= read_max: break logging.debug(f"file queue complete, exiting") q.close()
def upload_alerts(bs: Blobstore, alerts: queue.Queue, urls: queue.Queue): while True: try: alert = alerts.get() except QueueClosed as e: logging.debug(f"upload queue complete, exiting: {e}") break # Is it empty because we're fast, or because there's nothing left? start = time.monotonic() url = bs.upload_alert(alert) logging.debug(f"upload {url} done - took {time.monotonic() - start}") urls.put(url) urls.close()
def get_task_queue(cpu_count): """ Gets the queue based on cpu count """ if cpu_count == 1: t = ThreadQueue() t.close = lambda: None return t return Queue()
def run_campaign_manager_process_sync(campaign): queue = Queue() queue.close = lambda: None queue.join_thread = lambda: None nuntius_worker.email_campaign_manager_process( campaign=campaign, queue=queue, quit_event=multiprocessing.Event()) message_event_tuples = [] while not queue.empty(): message_event_tuples.append(queue.get_nowait()) return message_event_tuples
class Channel: def __init__(self, id, outgoing, spec): self.id = id self.outgoing = outgoing self.spec = spec self.incoming = Queue(0) self.responses = Queue(0) self.queue = None self._closed = False self.reason = None self.requester = Requester(self.write) self.responder = Responder(self.write) self.completion = OutgoingCompletion() self.incoming_completion = IncomingCompletion(self) self.futures = {} self.control_queue = Queue( 0 ) #used for incoming methods that appas may want to handle themselves self.invoker = self.invoke_method self.use_execution_layer = (spec.major == 0 and spec.minor == 10) or ( spec.major == 99 and spec.minor == 0) self.synchronous = True def closed(self, reason): if self._closed: return self._closed = True self.reason = reason self.incoming.close() self.responses.close() self.completion.close() self.incoming_completion.reset() for f in self.futures.values(): f.put_response(self, reason) def write(self, frame, content=None): if self._closed: raise Closed(self.reason) frame.channel = self.id self.outgoing.put(frame) if (isinstance(frame, (Method, Request)) and content == None and frame.method_type.content): content = Content() if content != None: self.write_content(frame.method_type.klass, content) def write_content(self, klass, content): header = Header(klass, content.weight(), content.size(), content.properties) self.write(header) for child in content.children: self.write_content(klass, child) # should split up if content.body exceeds max frame size if content.body: self.write(Body(content.body)) def receive(self, frame, work): if isinstance(frame, Method): if frame.method.response: self.queue = self.responses else: self.queue = self.incoming work.put(self.incoming) elif isinstance(frame, Request): self.queue = self.incoming work.put(self.incoming) elif isinstance(frame, Response): self.requester.receive(self, frame) if frame.method_type.content: self.queue = self.responses return self.queue.put(frame) def queue_response(self, channel, frame): channel.responses.put(frame.method) def request(self, method, listener, content=None): self.requester.request(method, listener, content) def respond(self, method, batch, request): self.responder.respond(method, batch, request) def invoke(self, type, args, kwargs): if (type.klass.name in ["channel", "session"]) and (type.name in [ "close", "open", "closed" ]): self.completion.reset() self.incoming_completion.reset() self.completion.next_command(type) content = kwargs.pop("content", None) frame = Method(type, type.arguments(*args, **kwargs)) return self.invoker(frame, content) # used for 0-9 def invoke_reliable(self, frame, content=None): if not self.synchronous: future = Future() self.request(frame, future.put_response, content) if not frame.method.responses: return None else: return future self.request(frame, self.queue_response, content) if not frame.method.responses: if self.use_execution_layer and frame.method_type.is_l4_command(): self.execution_sync() self.completion.wait() if self._closed: raise Closed(self.reason) return None try: resp = self.responses.get() if resp.method_type.content: return Message(self, resp, read_content(self.responses)) else: return Message(self, resp) except QueueClosed, e: if self._closed: raise Closed(self.reason) else: raise e
class SparkPubsubStreamConsumer(BaseStreamConsumer, BaseStreamAggregation, LoggingMixin): def __init__(self, project_id: str, subscription: str, agg_function: AggregationFunction, agg_window_millis: int, auth_file: str = None, spark_opts: dict = {}, multiprocessing=False) -> None: """ SparkPubSubStreamConsumer constructor :param project_id: the project id :param subscription: the subscription name :param agg_function: aggregation function to apply :param agg_window_millis: aggregation window in milliseconds :param auth_file: path to credentials json file :param spark_opts: spark options dict :param multiprocessing: use multiprocessing instead of threading """ super().__init__(agg_function, agg_window_millis) self.project_id = project_id self.subscription = subscription self.spark_opts = spark_opts self.subscribed = True self.multiprocessing = multiprocessing if self.multiprocessing: self.queue = MultiprocessingQueue() else: self.queue = Queue() os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = auth_file def run_spark_job(queue: Queue, _agg_function: AggregationFunction, _agg_window_millis: int, _spark_opts: dict = {}, _environment: dict = {}): os.environ.update(_environment) try: try: import findspark findspark.init() except Exception as ex: self.logger.warn("Cannot import Spark pyspark with" " findspark. Message: {}".format(str(ex))) pass from pyspark.sql import SparkSession from pyspark.streaming import StreamingContext from pyspark.sql.functions import expr, window from pyspark.serializers import NoOpSerializer from pyspark.streaming import DStream from pyspark.streaming.kafka import utf8_decoder spark_builder = SparkSession \ .builder \ for k in _spark_opts: spark_builder = spark_builder.config(k, _spark_opts[k]) spark_builder \ .appName(str(self)) \ .config("spark.jars.packages", "org.apache.spark:spark-streaming-kafka-0-8_2.11:2.2.1," "org.apache.bahir:spark-streaming-pubsub_2.11:2.2.1") \ .config("spark.jars", BASE_PATH + "/lib/streaming-pubsub-serializer_2.11-0.1.jar") spark = spark_builder.getOrCreate() spark.sparkContext.setLogLevel("WARN") ssc = StreamingContext(spark.sparkContext, (agg_window_millis / 1000)) agg = expr("value") if _agg_function == AggregationFunction.AVG: agg = expr("avg(value)") elif _agg_function == AggregationFunction.SUM: agg = expr("sum(value)") elif _agg_function == AggregationFunction.COUNT: agg = expr("count(value)") elif _agg_function == AggregationFunction.P50: agg = expr("percentile(value, 0.5)") elif _agg_function == AggregationFunction.P75: agg = expr("percentile(value, 0.75)") elif _agg_function == AggregationFunction.P95: agg = expr("percentile(value, 0.95)") elif _agg_function == AggregationFunction.P99: agg = expr("percentile(value, 0.99)") deserializer = \ ssc._jvm.org.apache.spark.streaming.pubsub.SparkPubsubMessageSerializer() # noqa: E501 pubsub_utils = \ ssc._jvm.org.apache.spark.streaming.pubsub.PubsubUtils credentials = \ ssc._jvm.org.apache.spark.streaming.pubsub.SparkGCPCredentials storage_level = \ ssc._jvm.org.apache.spark.storage.StorageLevel _pubsub_stream = pubsub_utils \ .createStream(ssc._jssc, project_id, subscription, credentials.Builder().build(), storage_level.DISK_ONLY()) _pubsub_stream_des = _pubsub_stream.transform(deserializer) ser = NoOpSerializer() pubsub_stream = DStream(_pubsub_stream_des, ssc, ser).map(utf8_decoder) def aggregate_rdd(_queue, _agg, df, ts): secs = int(self.agg_window_millis / 1000) win = window("ts", "{} seconds".format(secs)) if df.first(): aggs = df \ .groupBy("application", win) \ .agg(_agg.alias("value")) \ .collect() for row in aggs: message = InputMessage(row["application"], value=row["value"], ts=ts) self.logger.debug("Enqueue: {}".format( message.to_json())) try: _queue.put(message.to_json()) except AssertionError as ex: self.logger.warn(str(ex)) else: self.logger.warn("Empty RDD") # Create kafka stream pubsub_stream \ .foreachRDD(lambda ts, rdd: aggregate_rdd(queue, agg, spark.read.json(rdd), ts)) # Run ssc.start() if "timeout" in _spark_opts: ssc.awaitTerminationOrTimeout(_spark_opts["timeout"]) ssc.stop() spark.stop() else: ssc.awaitTermination() ssc.stop() spark.stop() except Exception as e: raise e # Run in multiprocessing, each aggregation runs a spark driver. runner = Concurrency.run_process \ if self.multiprocessing \ else Concurrency.run_thread Concurrency.get_lock("spark").acquire() pid = runner(target=run_spark_job, args=(self.queue, self.agg_function, self.agg_window_millis, self.spark_opts, os.environ.copy()), name="PySpark {}".format(str(self))) Concurrency.schedule_release("spark", 30) self.pid = pid def unsubscribe(self): self.subscribed = False if isinstance(self.queue, MultiprocessingQueue): self.queue.close() self.queue.join_thread() elif isinstance(self.queue, Queue): self.queue.join() def poll(self) -> Generator: while self.subscribed: try: message = self.queue.get(timeout=2) yield message except Exception as _: pass def __str__(self) -> str: return "PubSub aggregated subscription: " \ "project: {}, subscription: {}, func: {}, window: {}ms".format( self.project_id, self.subscription, self.agg_function.name, self.agg_window_millis)
class SparkKafkaStreamConsumer(BaseStreamConsumer, BaseStreamAggregation, LoggingMixin): def __init__(self, broker_servers: str, input_topic: str, group_id: str, agg_function: AggregationFunction, agg_window_millis: int, spark_opts: dict = {}, multiprocessing=True) -> None: """ SparkKafkaStreamConsumer constructor :param broker_servers: broker servers :param input_topic: input topic :param group_id: consumer group id :param agg_function: aggregation function to apply :param agg_window_millis: aggregation window in milliseconds :param spark_opts: spark options dict :param multiprocessing: use multiprocessing instead of threading """ super().__init__(agg_function, agg_window_millis) self.broker_servers = broker_servers.split(",") self.input_topic = input_topic self.group_id = group_id self.spark_opts = spark_opts self.subscribed = True self.multiprocessing = multiprocessing if self.multiprocessing: self.queue = MultiprocessingQueue() else: self.queue = Queue() def run_spark_job(queue: Queue, _agg_function: AggregationFunction, _agg_window_millis: int, _spark_opts: dict = {}, _environment: dict = {}): os.environ.update(_environment) try: try: import findspark findspark.init() except Exception as ex: self.logger.warn("Cannot import Spark pyspark with" " findspark. Message: {}".format(str(ex))) pass from pyspark.sql import SparkSession from pyspark.streaming import StreamingContext from pyspark.streaming.kafka import KafkaUtils from pyspark.sql.functions import expr, window spark_builder = SparkSession \ .builder \ for k in _spark_opts: spark_builder = spark_builder.config(k, _spark_opts[k]) spark_builder = spark_builder \ .appName(str(self)) \ .config("spark.jars.packages", "org.apache.spark:spark-streaming-kafka-0-8_2.11:2.2.1," "org.apache.bahir:spark-streaming-pubsub_2.11:2.2.1") \ .config("spark.jars", BASE_PATH + "/lib/streaming-pubsub-serializer_2.11-0.1.jar") spark = spark_builder.getOrCreate() spark.sparkContext.setLogLevel("WARN") ssc = StreamingContext(spark.sparkContext, (agg_window_millis / 1000)) agg = expr("value") if _agg_function == AggregationFunction.AVG: agg = expr("avg(value)") elif _agg_function == AggregationFunction.SUM: agg = expr("sum(value)") elif _agg_function == AggregationFunction.COUNT: agg = expr("count(value)") elif _agg_function == AggregationFunction.P50: agg = expr("percentile(value, 0.5)") elif _agg_function == AggregationFunction.P75: agg = expr("percentile(value, 0.75)") elif _agg_function == AggregationFunction.P95: agg = expr("percentile(value, 0.95)") elif _agg_function == AggregationFunction.P99: agg = expr("percentile(value, 0.99)") kafka_stream = KafkaUtils.createDirectStream( ssc, [self.input_topic], {"metadata.broker.list": ",".join(self.broker_servers)}) def aggregate_rdd(_queue, _agg, df, ts): secs = int(self.agg_window_millis / 1000) win = window("ts", "{} seconds".format(secs)) if df.first(): aggs = df \ .groupBy("application", win) \ .agg(_agg.alias("value")) \ .collect() for row in aggs: message = InputMessage(row["application"], value=row["value"], ts=ts) self.logger.debug("Enqueue: {}".format( message.to_json())) try: _queue.put(message.to_json()) except AssertionError as ex: self.logger.warn(str(ex)) else: warnings.warn("Empty RDD") # Create kafka stream kafka_stream \ .map(lambda x: x[1]) \ .foreachRDD(lambda ts, rdd: aggregate_rdd(queue, agg, spark.read.json(rdd), ts)) # Run ssc.start() if "timeout" in _spark_opts: ssc.awaitTerminationOrTimeout(_spark_opts["timeout"]) ssc.stop() spark.stop() else: ssc.awaitTermination() ssc.stop() spark.stop() except Exception as e: raise e # Run in multiprocessing, each aggregation runs a spark driver. runner = Concurrency.run_process \ if self.multiprocessing \ else Concurrency.run_thread Concurrency.get_lock("spark").acquire() pid = runner(target=run_spark_job, args=(self.queue, self.agg_function, self.agg_window_millis, self.spark_opts, os.environ.copy()), name="PySpark {}".format(str(self))) Concurrency.schedule_release("spark", 30) self.pid = pid def unsubscribe(self): self.subscribed = False if isinstance(self.queue, MultiprocessingQueue): self.queue.close() self.queue.join_thread() elif isinstance(self.queue, Queue): self.queue.join() def poll(self) -> Generator: while self.subscribed: try: message = self.queue.get(timeout=2) yield message except Exception as _: pass def __str__(self) -> str: return "Kafka aggregated topic: " \ "brokers: {}, topic: {}, func: {}, window: {}ms".format( self.broker_servers, self.input_topic, self.agg_function.name, self.agg_window_millis)
class Channel: def __init__(self, id, outgoing, spec, options): self.id = id self.outgoing = outgoing self.spec = spec self.incoming = Queue(0) self.responses = Queue(0) self.queue = None self.content_queue = None self._closed = False self.reason = None self.requester = Requester(self.write) self.responder = Responder(self.write) self.completion = OutgoingCompletion() self.incoming_completion = IncomingCompletion(self) self.futures = {} self.control_queue = Queue(0)#used for incoming methods that appas may want to handle themselves self.invoker = self.invoke_method self.use_execution_layer = (spec.major == 0 and spec.minor == 10) or (spec.major == 99 and spec.minor == 0) self.synchronous = True self._flow_control_wait_failure = options.get("qpid.flow_control_wait_failure", 60) self._flow_control_wait_condition = threading.Condition() self._flow_control = False def closed(self, reason): if self._closed: return self._closed = True self.reason = reason self.incoming.close() self.responses.close() self.completion.close() self.incoming_completion.reset() for f in self.futures.values(): f.put_response(self, reason) def write(self, frame, content = None): if self._closed: raise Closed(self.reason) frame.channel = self.id self.outgoing.put(frame) if (isinstance(frame, (Method, Request)) and content == None and frame.method_type.content): content = Content() if content != None: self.write_content(frame.method_type.klass, content) def write_content(self, klass, content): header = Header(klass, content.weight(), content.size(), content.properties) self.write(header) for child in content.children: self.write_content(klass, child) # should split up if content.body exceeds max frame size if content.body: self.write(Body(content.body)) def receive(self, frame, work): if isinstance(frame, Method): if frame.method_type.content: if frame.method.response: self.content_queue = self.responses else: self.content_queue = self.incoming if frame.method.response: self.queue = self.responses else: self.queue = self.incoming work.put(self.incoming) elif isinstance(frame, Request): self.queue = self.incoming work.put(self.incoming) elif isinstance(frame, Response): self.requester.receive(self, frame) if frame.method_type.content: self.queue = self.responses return elif isinstance(frame, Body) or isinstance(frame, Header): self.queue = self.content_queue self.queue.put(frame) def queue_response(self, channel, frame): channel.responses.put(frame.method) def request(self, method, listener, content = None): self.requester.request(method, listener, content) def respond(self, method, batch, request): self.responder.respond(method, batch, request) def invoke(self, type, args, kwargs): if (type.klass.name in ["channel", "session"]) and (type.name in ["close", "open", "closed"]): self.completion.reset() self.incoming_completion.reset() self.completion.next_command(type) content = kwargs.pop("content", None) frame = Method(type, type.arguments(*args, **kwargs)) return self.invoker(frame, content) # used for 0-9 def invoke_reliable(self, frame, content = None): if not self.synchronous: future = Future() self.request(frame, future.put_response, content) if not frame.method.responses: return None else: return future self.request(frame, self.queue_response, content) if not frame.method.responses: if self.use_execution_layer and frame.method_type.is_l4_command(): self.execution_sync() self.completion.wait() if self._closed: raise Closed(self.reason) return None try: resp = self.responses.get() if resp.method_type.content: return Message(self, resp, read_content(self.responses)) else: return Message(self, resp) except QueueClosed, e: if self._closed: raise Closed(self.reason) else: raise e
class Channel: def __init__(self, id, outgoing, spec, options): self.id = id self.outgoing = outgoing self.spec = spec self.incoming = Queue(0) self.responses = Queue(0) self.queue = None self.content_queue = None self._closed = False self.reason = None self.requester = Requester(self.write) self.responder = Responder(self.write) self.completion = OutgoingCompletion() self.incoming_completion = IncomingCompletion(self) self.futures = {} self.control_queue = Queue(0)#used for incoming methods that appas may want to handle themselves self.invoker = self.invoke_method self.use_execution_layer = (spec.major == 0 and spec.minor == 10) or (spec.major == 99 and spec.minor == 0) self.synchronous = True self._flow_control_wait_failure = options.get("qpid.flow_control_wait_failure", 60) self._flow_control_wait_condition = threading.Condition() self._flow_control = False def closed(self, reason): if self._closed: return self._closed = True self.reason = reason self.incoming.close() self.responses.close() self.completion.close() self.incoming_completion.reset() for f in self.futures.values(): f.put_response(self, reason) def write(self, frame, content = None): if self._closed: raise Closed(self.reason) frame.channel = self.id self.outgoing.put(frame) if (isinstance(frame, (Method, Request)) and content == None and frame.method_type.content): content = Content() if content != None: self.write_content(frame.method_type.klass, content) def write_content(self, klass, content): header = Header(klass, content.weight(), content.size(), content.properties) self.write(header) for child in content.children: self.write_content(klass, child) if content.body: if not isinstance(content.body, (basestring, buffer)): # The 0-8..0-91 client does not support the messages bodies apart from string/buffer - fail early # if other type raise ContentError("Content body must be string or buffer, not a %s" % type(content.body)) frame_max = self.client.tune_params['frame_max'] - self.client.conn.AMQP_HEADER_SIZE for chunk in (content.body[i:i + frame_max] for i in xrange(0, len(content.body), frame_max)): self.write(Body(chunk)) def receive(self, frame, work): if isinstance(frame, Method): if frame.method_type.content: if frame.method.response: self.content_queue = self.responses else: self.content_queue = self.incoming if frame.method.response: self.queue = self.responses else: self.queue = self.incoming work.put(self.incoming) elif isinstance(frame, Request): self.queue = self.incoming work.put(self.incoming) elif isinstance(frame, Response): self.requester.receive(self, frame) if frame.method_type.content: self.queue = self.responses return elif isinstance(frame, Body) or isinstance(frame, Header): self.queue = self.content_queue self.queue.put(frame) def queue_response(self, channel, frame): channel.responses.put(frame.method) def request(self, method, listener, content = None): self.requester.request(method, listener, content) def respond(self, method, batch, request): self.responder.respond(method, batch, request) def invoke(self, type, args, kwargs): if (type.klass.name in ["channel", "session"]) and (type.name in ["close", "open", "closed"]): self.completion.reset() self.incoming_completion.reset() self.completion.next_command(type) content = kwargs.pop("content", None) frame = Method(type, type.arguments(*args, **kwargs)) return self.invoker(frame, content) # used for 0-9 def invoke_reliable(self, frame, content = None): if not self.synchronous: future = Future() self.request(frame, future.put_response, content) if not frame.method.responses: return None else: return future self.request(frame, self.queue_response, content) if not frame.method.responses: if self.use_execution_layer and frame.method_type.is_l4_command(): self.execution_sync() self.completion.wait() if self._closed: raise Closed(self.reason) return None try: resp = self.responses.get() if resp.method_type.content: return Message(self, resp, read_content(self.responses)) else: return Message(self, resp) except QueueClosed, e: if self._closed: raise Closed(self.reason) else: raise e
result = [] threeAndFive(3, result) threeAndFive(5, result) print((1, 4, 2, 10, 4)) print(sin(radians(90))) a = ['1:a:b:c', '1:ab:b:c'] a.sort() print(a) print(not []) try: a = open('test_file', 'w') a.write('haah\n') finally: a.close() with open('test_file', 'a') as a: a.write('zhaoyuehagn\n') i = 2 for i in range(5): print(i) print(i) a = [[2, 1, 1], [1, 1, 0], [0, 1, 1]] * 100 print("lalaalla", sys.getsizeof(a)) b = iter(a) print(sys.getsizeof(b)) print(max([max(_) for _ in a])) def zhao(a):
class DataStream(object): """A stream of data""" def __init__(self, name=None, unit=None): super(DataStream, self).__init__() self.queue = Queue() self.name = name self.unit = unit self.points_taken_lock = mp.Lock() self.points_taken = Value('i', 0) # Using shared memory since these are used in filter processes self.descriptor = None self.start_connector = None self.end_connector = None self.closed = False # Shared memory interface self.buffer_lock = mp.Lock() # self.buffer_size = 500000 self.buff_idx = Value('i', 0) def final_init(self): self.buffer_size = self.descriptor.num_points()*self.descriptor.buffer_mult_factor # logger.info(f"{self.start_connector.parent}:{self.start_connector} to {self.end_connector.parent}:{self.end_connector} buffer of size {self.buffer_size}") if self.buffer_size > 50e6: logger.debug(f"Limiting buffer size of {self} to 50 Million Points") self.buffer_size = 50e6 self.buff_shared_re = RawArray(ctypes.c_double, int(self.buffer_size)) self.buff_shared_im = RawArray(ctypes.c_double, int(self.buffer_size)) self.re_np = np.frombuffer(self.buff_shared_re, dtype=np.float64) self.im_np = np.frombuffer(self.buff_shared_im, dtype=np.float64) def set_descriptor(self, descriptor): if isinstance(descriptor,DataStreamDescriptor): logger.debug("Setting descriptor on stream '%s' to '%s'", self.name, descriptor) self.descriptor = descriptor else: raise TypeError("Failed setting descriptor. Object is not DataStreamDescriptor: {}".format(descriptor)) def num_points(self): if self.descriptor is not None: return self.descriptor.num_points() else: logger.warning("Stream '{}' has no descriptor. Function num_points() returns 0.".format(self.name)) return 0 def done(self): with self.points_taken_lock: return self.points_taken.value >= self.num_points() def percent_complete(self): if (self.descriptor is not None) and self.num_points()>0: with self.points_taken_lock: return 100.0*self.points_taken.value/self.num_points() else: return 0.0 def reset(self): self.descriptor.reset() with self.points_taken_lock: self.points_taken.value = 0 while not self.queue.empty(): self.queue.get_nowait() if self.start_connector is not None: self.start_connector.points_taken.value = 0 def __repr__(self): return "<DataStream(name={}, completion={}%, descriptor={})>".format( self.name, self.percent_complete(), self.descriptor) def push(self, data): if self.closed: raise Exception("The queue is closed and should not be receiving any more data") with self.points_taken_lock: if hasattr(data, 'size'): self.points_taken.value += data.size else: try: self.points_taken.value += len(data) except: try: junk = data + 1.0 self.points_taken.value += 1 except: raise ValueError("Got data {} that is neither an array nor a float".format(data)) with self.buffer_lock: start = self.buff_idx.value re = np.real(np.array(data)).flatten() if start+re.size > self.re_np.size: raise ValueError(f"Stream {self} received more data than fits in the shared memory buffer. \ This is probably due to digitizer raw streams producing data too quickly for the pipeline.") self.re_np[start:start+re.size] = re if np.issubdtype(self.descriptor.dtype, np.complexfloating): im = np.imag(data).flatten() self.im_np[start:start+im.size] = im message = {"type": "data", "data": None} self.buff_idx.value = start + np.array(data).size self.queue.put(message) def pop(self): result = None with self.buffer_lock: idx = self.buff_idx.value if idx != 0: result = self.re_np[:idx] if np.issubdtype(self.descriptor.dtype, np.complexfloating): result = result.astype(np.complex128) + 1.0j*self.im_np[:idx] self.buff_idx.value = 0 result = result.copy() return result def push_event(self, event_type, data=None): if self.closed: raise Exception("The queue is closed and should not be receiving any more data") message = {"type": "event", "event_type": event_type, "data": data} self.queue.put(message) if event_type == "done": logger.debug(f"Closing out queue {self}") self.queue.close() self.closed = True
class Logger(object): """ The actual Logger Frontend, passing logging messages to the assigned logging backend if appropriate or to python's logging module if not """ def __init__(self, backend: BaseBackend, max_queue_size: int = None, logging_frequencies=None, reduce_types=None, level=logging.NOTSET): """ Parameters ---------- backend : :class:`delira.logging.base_backend.BaseBackend` the logging backend to use max_queue_size : int the maximum size for the queue; if queue is full, all additional logging tasks will be dropped until some tasks inside the queue were executed; Per default no maximum size is applied logging_frequencies : int or dict specifies how often to log for each key. If int: integer will be applied to all valid keys if dict: should contain a frequency per valid key. Missing keys will be filled with a frequency of 1 (log every time) None is equal to empty dict here. reduce_types : str of FunctionType or dict Values are logged in each iteration. This argument specifies, how to reduce them to a single value if a logging_frequency besides 1 is passed if str: specifies the reduction type to use. Valid types are 'last' | 'first' | 'mean' | 'median' | 'max' | 'min'. The given type will be mapped to all valid keys. if FunctionType: specifies the actual reduction function. Will be applied for all keys. if dict: should contain pairs of valid logging keys and either str or FunctionType. Specifies the logging value per key. Missing keys will be filles with a default value of 'last'. Valid types for strings are 'last' | 'first' | 'mean' | 'max' | 'min'. level : int the logging value to use if passing the logging message to python's logging module because it is not appropriate for logging with the assigned logging backendDict[str, Callable] Warnings -------- Since the intermediate values between to logging steps are stored in memory to enable reduction, this might cause OOM errors easily (especially if the logged items are still on GPU). If this occurs you may want to choose a lower logging frequency. """ # 0 means unlimited size, but None is more readable if max_queue_size is None: max_queue_size = 0 # convert to empty dict if None if logging_frequencies is None: logging_frequencies = {} # if int: assign int to all possible keys if isinstance(logging_frequencies, int): logging_frequencies = { k: logging_frequencies for k in backend.KEYWORD_FN_MAPPING.keys()} # if dict: update missing keys with 1 and make sure other values # are ints elif isinstance(logging_frequencies, dict): for k in backend.KEYWORD_FN_MAPPING.keys(): if k not in logging_frequencies: logging_frequencies[k] = 1 else: logging_frequencies[k] = int(logging_frequencies[k]) else: raise TypeError("Invalid Type for logging frequencies: %s" % type(logging_frequencies).__name__) # assign frequencies and create empty queues self._logging_frequencies = logging_frequencies self._logging_queues = {} default_reduce_type = "last" if reduce_types is None: reduce_types = default_reduce_type # map string and function to all valid keys if isinstance(reduce_types, (str, FunctionType)): reduce_types = { k: reduce_types for k in backend.KEYWORD_FN_MAPPING.keys()} # should be dict by now! if isinstance(reduce_types, dict): # check all valid keys for occurences for k in backend.KEYWORD_FN_MAPPING.keys(): # use default reduce type if necessary if k not in reduce_types: reduce_types[k] = default_reduce_type # check it is either valid string or already function type else: if not isinstance(reduce_types, FunctionType): assert reduce_types[k] in possible_reductions() reduce_types[k] = str(reduce_types[k]) # map all strings to actual functions if isinstance(reduce_types[k], str): reduce_types[k] = get_reduction(reduce_types[k]) else: raise TypeError("Invalid Type for logging reductions: %s" % type(reduce_types).__name__) self._reduce_types = reduce_types self._abort_event = Event() self._flush_queue = Queue(max_queue_size) self._backend = backend self._backend.set_queue(self._flush_queue) self._backend.set_event(self._abort_event) self._level = level def log(self, log_message: dict): """ Main Logging Function, Decides whether to log with the assigned backend or python's internal module Parameters ---------- log_message : dict the message to log; Should be a dict, where the keys indicate the logging function to execute, and the corresponding value holds the arguments necessary to execute this function Raises ------ RuntimeError If the abort event was set externally """ try: if self._abort_event.is_set(): self.close() raise RuntimeError("Abort-Event in logging process was set: %s" % self._backend.name) # convert tuple to dict if necessary if isinstance(log_message, (tuple, list)): if len(log_message) == 2: log_message = (log_message,) log_message = dict(log_message) # try logging and drop item if queue is full try: # logging appropriate message with backend if isinstance(log_message, dict): # multiple logging instances at once possible with # different keys for k, v in log_message.items(): # append tag if tag is given, because otherwise we # would enqueue same types but different tags in same # queue if "tag" in v: queue_key = k + "." + v["tag"] else: queue_key = k # create queue if necessary if queue_key not in self._logging_queues: self._logging_queues[queue_key] = [] # append current message to queue self._logging_queues[queue_key].append({k: v}) # check if logging should be executed if (len(self._logging_queues[queue_key]) % self._logging_frequencies[k] == 0): # reduce elements inside queue reduce_message = reduce_dict( self._logging_queues[queue_key], self._reduce_types[k]) # flush reduced elements self._flush_queue.put_nowait(reduce_message) # empty queue self._logging_queues[queue_key] = [] else: # logging inappropriate message with python's logging logging.log(self._level, log_message) except Full: pass # if an exception was raised anywhere, the abort event will be set except Exception as e: self._abort_event.set() raise e def __call__(self, log_message: dict): """ Makes the class callable and forwards the call to :meth:`delira.logging.base_logger.Logger.log` Parameters ---------- log_message : dict the logging message to log Returns ------- Any the return value obtained by :meth:`delira.logging.base_logger.Logger.log` """ return self.log(log_message) def close(self): """ Function to close the actual logger; Waits for queue closing and sets the abortion event """ if hasattr(self, "_flush_queue"): if isinstance(self._flush_queue, MpQueue): self._flush_queue.close() self._flush_queue.join_thread() if hasattr(self, "abort_event"): self._abort_event.set() def __del__(self): """ Function to be executed, when class instance will be deleted; Calls :meth:`delira.logging.base_logger.Logger.close` """ self.close()