def end(self): ignore = mo_math.ceiling(len(self.samples) * (1 - self.middle) / 2) if ignore * 2 >= len(self.samples): return stats.Stats() output = stats.Stats(samples=sorted(self.samples)[ignore:len(self.samples) - ignore:]) output.samples = list(self.samples) return output
def assertAlmostEqualValue(test, expected, digits=None, places=None, msg=None, delta=None): """ Snagged from unittest/case.py, then modified (Aug2014) """ if expected is NULL: if test == None: # pandas dataframes reject any comparision with an exception! return else: raise AssertionError(expand_template("{{test}} != {{expected}}", locals())) if expected == None: # None has no expectations return if test == expected: # shortcut return if not is_number(expected): # SOME SPECIAL CASES, EXPECTING EMPTY CONTAINERS IS THE SAME AS EXPECTING NULL if is_list(expected) and len(expected) == 0 and test == None: return if is_data(expected) and not expected.keys() and test == None: return if test != expected: raise AssertionError(expand_template("{{test}} != {{expected}}", locals())) return num_param = 0 if digits != None: num_param += 1 if places != None: num_param += 1 if delta != None: num_param += 1 if num_param>1: raise TypeError("specify only one of digits, places or delta") if digits is not None: with suppress_exception: diff = log10(abs(test-expected)) if diff < digits: return standardMsg = expand_template("{{test}} != {{expected}} within {{digits}} decimal places", locals()) elif delta is not None: if abs(test - expected) <= delta: return standardMsg = expand_template("{{test}} != {{expected}} within {{delta}} delta", locals()) else: if places is None: places = 15 with suppress_exception: diff = mo_math.log10(abs(test-expected)) if diff < mo_math.ceiling(mo_math.log10(abs(test)))-places: return standardMsg = expand_template("{{test|json}} != {{expected|json}} within {{places}} places", locals()) raise AssertionError(coalesce(msg, "") + ": (" + standardMsg + ")")
def mostly_max(values): """ RETURN A VALUE MORE THAN MOST OF THE VALUES :param values: """ sorted = list(sorted(values)) num = len(sorted) p50 = sorted[ceiling(num * 0.5)] p90 = sorted[ceiling(num * 0.9)] max = sorted[num] most = max(p50 * 2.0, p90 * 1.1) if most == 0: return max * 1.1 return min(max * 1.1, most)
def nice_ceiling(value): """ RETURN A NICE CEILING :param value: """ if value == 0: return 1 d = 10 ** (ceiling(log10(value)) - 1) norm = value / d nice = first(v for v in [1.5, 2, 3, 5, 7.5, 10] if norm <= v) return nice * d
def compressed_bytes2ibytes(compressed, size): """ CONVERT AN ARRAY OF BYTES TO A BYTE-BLOCK GENERATOR USEFUL IN THE CASE WHEN WE WANT TO LIMIT HOW MUCH WE FEED ANOTHER GENERATOR (LIKE A DECOMPRESSOR) """ decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) for i in range(0, mo_math.ceiling(len(compressed), size), size): try: block = compressed[i: i + size] yield decompressor.decompress(block) except Exception as e: Log.error("Not expected", e)
def intervals(_min, _max=None, size=1): """ RETURN (min, max) PAIRS OF GIVEN SIZE, WHICH COVER THE _min, _max RANGE THE LAST PAIR MAY BE SMALLER Yes! It's just like range(), only cooler! """ if _max == None: _max = _min _min = 0 _max = int(mo_math.ceiling(_max)) _min = int(mo_math.floor(_min)) output = ((x, min(x + size, _max)) for x in _range(_min, _max, size)) return output
def compressed_bytes2ibytes(compressed, size): """ CONVERT AN ARRAY OF BYTES TO A BYTE-BLOCK GENERATOR USEFUL IN THE CASE WHEN WE WANT TO LIMIT HOW MUCH WE FEED ANOTHER GENERATOR (LIKE A DECOMPRESSOR) """ decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS) for i in range(0, mo_math.ceiling(len(compressed), size), size): try: block = compressed[i:i + size] yield decompressor.decompress(block) except Exception as e: Log.error("Not expected", e)
def required_utility(self, current_utility=None): queue = sqs.Queue(self.settings.work_queue) pending = len(queue) tod_minimum = None if Date.now().dow not in [6, 7] and Date.now().hour not in [ 4, 5, 6, 7, 8, 9, 10, 11 ]: tod_minimum = 100 minimum = max(self.settings.minimum_utility, tod_minimum) if current_utility < pending / 20: # INCREASE return max( minimum, mo_math.ceiling(pending / 20) ) # ENSURE THERE IS PLENTY OF WORK BEFORE MACHINE IS DEPLOYED else: # DECREASE target = max(minimum, min(current_utility, pending * 2)) return target + int((current_utility - target) / 2)
def _extend(self, rows): if self.read_only: Log.error("not for writing") if len(rows) == 0: return try: update = {} with Timer("encoding", verbose=DEBUG): while True: typed_rows = [] for rownum, row in enumerate(rows): typed_row, more, add_nested = typed_encode(row, self.flake) set_default(update, more) if add_nested: # row HAS NEW NESTED COLUMN! # GO OVER THE rows AGAIN SO "RECORD" GET MAPPED TO "REPEATED" DEBUG and Log.note("New nested documnet found, retrying") break typed_rows.append(typed_row) else: break if update or not self.shard: # BATCH HAS ADDITIONAL COLUMNS!! # WE CAN NOT USE THE EXISTING SHARD, MAKE A NEW ONE: self._create_new_shard() DEBUG and Log.note( "added new shard with name: {{shard}}", shard=self.shard.table_id ) with Timer( "insert {{num}} rows to bq", param={"num": len(rows)}, verbose=DEBUG ): failures = self.container.client.insert_rows_json( self.shard, json_rows=typed_rows, row_ids=[None] * len(typed_rows), skip_invalid_rows=False, ignore_unknown_values=False, ) if failures: if all(r == "stopped" for r in wrap(failures).errors.reason): self._create_new_shard() DEBUG and Log.note( "STOPPED encountered: Added new shard with name: {{shard}}", shard=self.shard.table_id, ) Log.error( "Got {{num}} failures:\n{{failures|json}}", num=len(failures), failures=failures[:5], ) else: self.last_extend = Date.now() DEBUG and Log.note("{{num}} rows added", num=len(typed_rows)) except Exception as cause: cause = Except.wrap(cause) if ( len(typed_rows) < 2 and "Your client has issued a malformed or illegal request." in cause ): Log.error( "big query complains about:\n{{data|json}}", data=typed_rows, cause=cause, ) elif len(rows) > 1 and ( "Request payload size exceeds the limit" in cause or "An existing connection was forcibly closed by the remote host" in cause or "Your client has issued a malformed or illegal request." in cause or "BrokenPipeError(32, 'Broken pipe')" in cause or "ConnectionResetError(104, 'Connection reset by peer')" in cause ): Log.warning( "problem with batch of size {{size}}", size=len(rows), cause=cause ) batch_size = ceiling(len(rows) / 10) try: DEBUG and Log.note( "attempt smaller batches of size {{batch_size}}", batch_size=batch_size, ) for _, chunk in jx.chunk(rows, batch_size): self._extend(chunk) return except Exception as cause2: Log.error( "smaller batches of size {{batch_size}} did not work", batch_size=batch_size, cause=cause2, ) elif len(rows) == 1: Log.error( "Could not insert document\n{{doc|json|indent}}", doc=rows[0], cause=cause, ) else: Log.error("Do not know how to handle", cause=cause)