Esempio n. 1
0
def from_bytes(b):
    encoding = int.from_bytes(b[:4], byteorder="big")
    if encoding != SMALL_ENCODING:
        raise ValueError("unsupported encoding version: %s" % encoding)

    compression = struct.unpack(">d", b[4:12])[0]
    num_centroids = int.from_bytes(b[12:16], byteorder="big")
    if num_centroids < 0 or num_centroids > 1 << 22:
        raise ValueError("bad number of centroids")

    b = b[16:]
    digest = tdigest.TDigest(K=int(compression))

    x = 0
    means = []
    for i in range(num_centroids):
        delta = struct.unpack(">f", b[:4])[0]
        b = b[4:]
        x += delta
        means.append(x)

    stream = decode_varint_stream(b)
    for i in range(num_centroids):
        count = next(stream)
        digest.update(means[i], count)

    return digest
    def get_snapshot(self):
        """Return a statistical of the histogram distribution.

        @return: Snapshot of Histogram
        @rtype: Snapshot
        """
        snapshot = tdigest.TDigest(1 / self._ACCURACY)
        for minute_bin in self.get_prior_minute_bins_list():
            for thread_id in minute_bin.per_thread_dist:
                snapshot = snapshot + minute_bin.per_thread_dist[thread_id]
        return Snapshot(snapshot)
Esempio n. 3
0
 def __init__(self, args: argparse.Namespace, mqtt_client: asyncio_mqtt.Client,
              device_alternate_id: str, num_msgs: int = 10):
     """PublishingState constructor
     :param args: the parsed command line arguments of the script (see the main() function)
     :param mqtt_client: the instance of an MQTT client to use
     :param device_alternate_id: the alternate ID of the simulated device
     :param num_msgs: the number of the measurement messages to send.
     """
     self.mqtt_client = mqtt_client
     """The instance of an MQTT client to use"""
     self.device_alternate_id: str = device_alternate_id
     """The alternate ID of the simulated device"""
     self.num_msgs: int = num_msgs
     """The total number of messages to publish"""
     self.desired_message_rate: float = args.rate
     """The number of messages per second to publish"""
     self.samples_per_message: int = args.samples
     """The number of samples (measurement) per message to publish"""
     self.unconfirmed_measurements: Dict[str, datetime.datetime] = {}
     """The timestamp when the message with the given message ID--the key--was published.
     The messages referenced in the dictionary were not confirmed by the gateway (yet).
     As soon as the message is confirmed, the corresponding entry is removed from the dictionary.
     """
     self.utc_start_ts: float | None = None
     """The timestamp when the first measurement message was posted"""
     self.messages_published: int = 0
     """The number of measurement messages published"""
     self.bytes_published: int = 0
     """The cumulative size of the measurement messages published (in bytes)"""
     self.messages_acked = 0
     """The number of measurement messages successfully acknowledged by the gateway"""
     self.correlator = correlator.Correlator()
     """Measures how the latency correlates with the number of the published measurement messages.
     A positive correlation means that the gateway does not cope with the load and keeps buffering
     the incoming messages.
     """
     self.digest = tdigest.TDigest()
     """Percentile estimation for streaming data"""
 def get_dist_by_thread_id(self, thread_id):
     """Retrieve the thread-local dist in one given minute."""
     if thread_id not in self.per_thread_dist:
         self.per_thread_dist[thread_id] = tdigest.TDigest(delta=1 /
                                                           self.accuracy)
     return self.per_thread_dist[thread_id]
Esempio n. 5
0
def digest():
    return tdigest.TDigest()
Esempio n. 6
0
 def __init__(self):
     self.current_conn_number = 0
     self.rcv_msg_number = 0
     self.digest = tdigest.TDigest()
     self.conn_digest = tdigest.TDigest()
Esempio n. 7
0
 def __init__(self):
     self.count = 0
     self.sum = 0
     self.sumsq = 0
     self.td = tdigest.TDigest(K=10)
     self.tdigest = None
Esempio n. 8
0
        'Unimodal gaussian':
        lambda: rng.gauss(5, 3),
        'Bimodal gaussian':
        lambda: rng.gauss(5, 1) if rng.random() < .5 else rng.gauss(10, 2),
        'Exponential':
        lambda: rng.expovariate(5)
    }

    for func_name, random_func in random_funcs.items():

        methods = {
            'Histogram': histogram.Histogram(max_bins=256),
            'Gaussian': gaussian.Gaussian(),
            'KLL': kll.KLL(k=256, seed=42),
            'StreamHist': streamhist.StreamHist(maxbins=256),
            't-digest': tdigest.TDigest(K=256),
        }
        errors = collections.defaultdict(list)
        update_durations = collections.defaultdict(float)
        query_durations = collections.defaultdict(float)

        print('Updating each method...')
        for i in tqdm.tqdm(range(n)):
            X[i] = random_func()
            for name, method in methods.items():
                tic = time.perf_counter_ns()
                method.update(X[i])
                update_durations[name] += time.perf_counter_ns() - tic

        # Sort all the values in order to get access to the true CDF
        print('Evaluating CDF approximations...')