def _collect_one(out, qrec, quant_compare=False): fout = out['output'][0] if quant_compare: fout = qrec.out_qs[0].dequantize(qrec.out_qs[0].quantize(fout)) qout = out['qoutput'][0] error_ = np.abs(fout - qout) node = out['node'] stat = { 'name': out['name'], 'op_name': node.op_name, 'step': out['step_idx'], 'av_err': np.mean(error_), 'max_err': np.max(error_), 'min_err': np.min(error_), 'qsnr': qsnr(fout, qout), 'cos': cos_similarity(fout, qout), 'chan_err': [] } dim = node.out_dims[0] if dim and dim.is_named and dim.has_key('c'): channel_error = [] dim = node.out_dims[0] for i in range(dim.c): srange = dim.srange(c=i) channel_error.append(np.average(fout[srange] - qout[srange])) stat['chan_err'] = channel_error return stat
def _collect(self, G, input_tensors) -> Mapping[NodeId, Mapping]: LOG.debug("gather quantization statistics") output_ = execute(G, input_tensors, limit=self._limit) all_details = [] qoutput_ = execute(G, input_tensors, limit=self._limit, qrecs=G.quantization, qmode=QuantizationMode.all(), all_details=all_details) stats = OrderedDict() for idx, out in enumerate(output_): error_ = np.abs(out[0] - qoutput_[idx][0]) step = G.graph_state.steps[idx] node = step['node'] details = all_details[idx] if details: overflow_dot = details['overflow_dot'] overflow_acc = details['overflow_acc'] else: overflow_dot = overflow_acc = "" stats[NodeId(node, None)] = { 'name': node.name, 'op_name': node.op_name, 'step': idx, 'av_err': np.mean(error_), 'max_err': np.max(error_), 'min_err': np.min(error_), 'qsnr': qsnr(out[0], qoutput_[idx][0]), 'overflow_dot': overflow_dot, 'overflow_acc': overflow_acc, } return stats
def _collect_one(fstat, qstat): fout = fstat['output'] qout = qstat['output'] error_ = np.abs(fout[0] - qout[0]) node = fstat['node'] details = qstat['details'] if details: overflow_dot = details['overflow_dot'] overflow_acc = details['overflow_acc'] else: overflow_dot = overflow_acc = "" stat = { 'name': fstat['name'], 'op_name': node.op_name, 'step': fstat['step_idx'], 'av_err': np.mean(error_), 'max_err': np.max(error_), 'min_err': np.min(error_), 'qsnr': qsnr(fout[0], qout[0]), 'overflow_dot': overflow_dot, 'overflow_acc': overflow_acc, } return stat
def qsnr(self, name_set1, name_set2, axis=None, node_name=None): if axis is None: axis = 0 if node_name is None: node_name = next(iter(self.node_names), '') set1 = self.stats[name_set1][node_name] set2 = self.stats[name_set2][node_name] return { k: qsnr(v, set2[k], axis=axis) if k in set2 else None for k, v in set1.items() }
def _collect_one(fstat, qstat, qrec, quant_compare=False): fout = fstat['output'][0] if quant_compare: fout = qrec.out_qs[0].dequantize(qrec.out_qs[0].quantize(fout)) qout = qstat['output'][0] error_ = np.abs(fout - qout) node = fstat['node'] stat = { 'name': fstat['name'], 'op_name': node.op_name, 'step': fstat['step_idx'], 'av_err': np.mean(error_), 'max_err': np.max(error_), 'min_err': np.min(error_), 'qsnr': qsnr(fout, qout), } return stat
def _collect_one(out): fout = out['output'] qout = out['qoutput'] error_ = np.abs(fout[0] - qout[0]) node = out['node'] qdetails = out['qdetails'] if qdetails: overflow_dot = qdetails['overflow_dot'] overflow_acc = qdetails['overflow_acc'] else: overflow_dot = overflow_acc = "" stat = { 'name': out['name'], 'op_name': node.op_name, 'step': out['step_idx'], 'av_err': np.mean(error_), 'max_err': np.max(error_), 'min_err': np.min(error_), 'qsnr': qsnr(fout[0], qout[0]), 'overflow_dot': overflow_dot, 'overflow_acc': overflow_acc, 'chan_err': [] } dim = node.out_dims[0] if dim and dim.is_named and dim.has_key('c'): channel_error = [] dim = node.out_dims[0] for i in range(dim.c): srange = dim.srange(c=i) channel_error.append( np.average(fout[0][srange] - qout[0][srange])) stat['chan_err'] = channel_error return stat
def func(x, y): if x is not None and y is not None: return qsnr(x.astype(np.float), y.astype(np.float)) return float('nan')
def compress(self, node, idx, bits=None, min_qsnr=None, force_sparse=False): val = self.get(node, idx) flattened_val = val.flatten() codes = None if val.size <= 4: LOG.warning('value in node %s is too small to compress', node.name) return None if bits is not None: bins = int(math.pow(2, bits)) if bins > val.size: bits = max(int(math.floor(math.log2(val.size))), 2) bins = int(math.pow(2, bits)) LOG.warning( 'more bins than values for node %s - reducing to %s bits', node.name, bits) compressed_val, codes, codebook = self.cluster( bins, flattened_val, val) elif min_qsnr: cur_qsnr = -math.inf bits = 1 while cur_qsnr < min_qsnr: bits += 1 if bits > 8: LOG.warning( 'value in node %s cannot meet %s QSNR at 8 bits or under - not compressing', node.name, min_qsnr) return None bins = int(math.pow(2, bits)) if bins > val.size: LOG.warning( 'value in node %s cannot be reduced in size - not compressing', node.name) return None compressed_val, codes, codebook = self.cluster( bins, flattened_val, val) cur_qsnr = qsnr(compressed_val.astype(np.float32), val.astype(np.float32)) else: # automatic search of optimal k with inertia method silhouette = [] inertia = [] for bits in range(2, 9): bins = int(math.pow(2, bits)) if bins > val.size - 1: break compressed_val, codes, codebook = self.cluster(bins, flattened_val, val, inertia=inertia) silhouette.append( silhouette_score(flattened_val.reshape(-1, 1), compressed_val.flatten())) if len(inertia) <= 1: compressed_val, codes, codebook = self.encode_shorter( flattened_val, val) else: # 2nd grade derivative to find the elbow if len(inertia) > 2: cinertia = np.array(inertia) # cinertia[cinertia>1] = 1 elb_idx = np.argmax(np.diff(np.diff(cinertia))) + 1 else: elb_idx = 1 # take the three around the elbow and look at the silhouette bits = np.argmax(np.array( silhouette[elb_idx - 1:elb_idx + 1])) + elb_idx + 1 bins = int(math.pow(2, bits)) compressed_val, codes, codebook = self.cluster( bins, flattened_val, val) # see if sparse representation is better # TODO - this is not entirely correct since it is not accounting for the extra bin created by the sparse value freqs = np.unique(codes, return_counts=True) max_index = np.where(freqs[1] == freqs[1].max())[0][0] sparse_freq = freqs[1][max_index] sparse_size = math.ceil((codes.size - sparse_freq) * (bits + 1) + sparse_freq) / 8 unsparse_size = math.ceil(codes.size * bits) / 8 if force_sparse: sparse = True comp_size = sparse_size else: sparse = sparse_size < unsparse_size comp_size = int(min(sparse_size, unsparse_size) + codebook.size) if comp_size >= val.size: LOG.warning( 'value in node %s cannot be compressed smaller with this setting', node.name) return None comp_val = CompressedVal(compressed_val, bits, codebook, comp_size, sparse) self.set(node, idx, val, comp_val) return comp_val
def func(x, y): if x is not None and y is not None: if x.shape != y.shape: return "shape" return qsnr(x.astype(np.float), y.astype(np.float)) return "N/A"
def compress(self, node, idx, bits=None, min_qsnr=None, force_sparse=False, allow_sparse=True, qbits=8, threshold=None): orig_val = self.get(node, idx, compressed=False) val = orig_val.copy() if threshold: val[np.logical_and(val < threshold, val > 0)] = 0 val[np.logical_and(val > np.negative(threshold), val < 0)] = 0 if np.all(val == 0): return None flattened_val = val.flatten() codes = None if val.size <= 4: LOG.info('value in node %s is too small to compress', node.name) return None if bits is not None: bins = int(math.pow(2, bits)) if bins > val.size: bits = max(int(math.floor(math.log2(val.size))), 2) bins = int(math.pow(2, bits)) LOG.info( 'more bins than values for node %s - reducing to %s bits', node.name, bits) compressed_val, codes, codebook = self.cluster( bins, flattened_val, val) elif min_qsnr: cur_qsnr = -math.inf bits = 1 while cur_qsnr < min_qsnr: bits += 1 if bits > 8: LOG.info( 'value in node %s cannot meet %s QSNR at 8 bits or under - not compressing', node.name, min_qsnr) return None bins = int(math.pow(2, bits)) if bins > val.size: LOG.info( 'value in node %s cannot be reduced in size - not compressing', node.name) return None compressed_val, codes, codebook = self.cluster( bins, flattened_val, val) cur_qsnr = qsnr(compressed_val.astype(np.float32), val.astype(np.float32)) else: # automatic search of optimal k with inertia method silhouette = [] inertia = [] for bits in range(2, 9): bins = int(math.pow(2, bits)) if bins > val.size - 1: break compressed_val, codes, codebook = self.cluster(bins, flattened_val, val, inertia=inertia) silhouette.append( silhouette_score(flattened_val.reshape(-1, 1), compressed_val.flatten())) if len(inertia) <= 1: compressed_val, codes, codebook = self.encode_shorter( flattened_val, val) else: # 2nd grade derivative to find the elbow if len(inertia) > 2: cinertia = np.array(inertia) # cinertia[cinertia>1] = 1 elb_idx = np.argmax(np.diff(np.diff(cinertia))) + 1 else: elb_idx = 1 # take the three around the elbow and look at the silhouette bits = np.argmax(np.array( silhouette[elb_idx - 1:elb_idx + 1])) + elb_idx + 1 bins = int(math.pow(2, bits)) compressed_val, codes, codebook = self.cluster( bins, flattened_val, val) # see if sparse representation is better unsparse_size = int(math.ceil(codes.size * bits) / 8) qelem_codebook_size = math.ceil((codebook.size * qbits) / 8) uncompressed_size = int(math.ceil((val.size * qbits) / 8)) if allow_sparse: freqs = np.unique(codes, return_counts=True) sparse_idx = np.where(freqs[1] == freqs[1].max())[0][0] sparse_freq = freqs[1][sparse_idx] sparse_size = int( math.ceil((codes.size - sparse_freq) * (bits + 1) + sparse_freq) / 8) if force_sparse or sparse_size < unsparse_size: sparse = True compressed_size = sparse_size else: sparse = False compressed_size = unsparse_size else: compressed_size = unsparse_size sparse = False sparse_idx = 0 compressed_size += qelem_codebook_size if compressed_size >= uncompressed_size: LOG.info( f'value in node {node.name} has not been compressed since its size ' f'was not reduced {uncompressed_size} bytes -> {compressed_size} bytes' ) return None comp_val = CompressedVal(compressed_val, bits, codebook, compressed_size, sparse, sparse_idx) self.set(node, idx, val, comp_val) return comp_val
def compress(self, node, idx, bits=None, min_qsnr=None, sparse=False): val = self.get(node, idx) flattened_val = val.flatten() if bits is not None: bins = int(math.pow(2, bits)) if bins > val.size: raise Exception( 'More bins than values with {} bits'.format(bits)) kmeans = KMeans(n_clusters=bins) kmeans.fit(flattened_val.reshape((-1, 1))) codebook = kmeans.cluster_centers_ codebook = codebook.astype(val.dtype) codes = vq(flattened_val.reshape((-1, 1)), codebook) compressed_val = np.array([codebook[code] for code in codes[0] ]).reshape(val.shape) elif min_qsnr: cur_qsnr = -math.inf bits = 1 while cur_qsnr < min_qsnr: bits += 1 if bits > 7: raise Exception( 'Cannot find a solution with less than 8 bits \ for {} with min_qsnr = {}'.format( node.name, min_qsnr)) bins = int(math.pow(2, bits)) if bins > val.size: break kmeans = KMeans(n_clusters=bins) kmeans.fit(flattened_val.reshape((-1, 1))) codebook = kmeans.cluster_centers_ codebook = codebook.astype(val.dtype) codes = vq(flattened_val.reshape((-1, 1)), codebook) compressed_val = np.array( [codebook[code] for code in codes[0]]).reshape(val.shape) cur_qsnr = qsnr(compressed_val.astype(np.float32), val.astype(np.float32)) else: # automatic search of optimal k with inertia method silhouette = [] inertia = [] for bits in range(1, 8): bins = int(math.pow(2, bits)) if bins > val.size: break kmeans = KMeans(n_clusters=bins) kmeans.fit(flattened_val.reshape((-1, 1))) codebook = kmeans.cluster_centers_ codebook = codebook.astype(val.dtype) codes = vq(flattened_val.reshape((-1, 1)), codebook) compressed_val = np.array( [codebook[code] for code in codes[0]]).reshape(val.shape) inertia.append(kmeans.inertia_) silhouette.append( silhouette_score(flattened_val.reshape(-1, 1), compressed_val.flatten().reshape(-1, 1))) elb_idx = np.argmax(np.diff(np.diff( np.array(inertia)))) # 2nd grade derivative to find the elbow elb_idx = 1 if elb_idx == 0 else elb_idx bits = np.argmax( np.array(silhouette[elb_idx - 1:elb_idx + 1]) ) + 1 # take the three around the elbow and look at the silhouette bins = int(math.pow(2, bits)) kmeans = KMeans(n_clusters=bins) kmeans.fit(flattened_val.reshape((-1, 1))) codebook = kmeans.cluster_centers_ codebook = codebook.astype(val.dtype) codes = vq(flattened_val.reshape((-1, 1)), codebook) compressed_val = np.array([codebook[code] for code in codes[0] ]).reshape(val.shape) if sparse: freqs = np.unique(codes, return_counts=True) max_index = np.where(freqs[1] == freqs[1].max())[0][0] sparse_val = freqs[0][max_index] else: sparse_val = None self.set(node, idx, val, compressed_val, sparse_val) x = 1
def compress(orig_val, bits=None, min_qsnr=None, force_sparse=False, allow_sparse=True, qbits=8, threshold=None, force=True): val = orig_val.copy() manual_bits = bits is not None and force if threshold: val[np.logical_and(val < threshold, val > 0)] = 0 val[np.logical_and(val > np.negative(threshold), val < 0)] = 0 if np.all(val == 0): raise CompressionError('value is all zeros') flattened_val = val.flatten() codes = None if val.size <= 4: raise CompressionError('value is too small to compress') if bits is not None: bins = int(math.pow(2, bits)) + (1 if force_sparse else 0) if bins > val.size: bits = max(int(math.floor(math.log2(val.size))), 2) bins = int(math.pow(2, bits)) LOG.info(f'more bins than values - reducing to {bits} bits') compressed_val, codes, codebook = cluster(bins, flattened_val, val) elif min_qsnr: cur_qsnr = -math.inf bits = 1 while cur_qsnr < min_qsnr: if bits == 8 and not force_sparse: bins = int(math.pow(2, bits)) + 1 force_sparse = True else: bits += 1 if bits > 8: raise CompressionError( f'value cannot meet {min_qsnr} QSNR at 8 bits or under' ) bins = int(math.pow(2, bits)) + (1 if force_sparse else 0) if bins > val.size: raise CompressionError('value cannot be reduced in size') compressed_val, codes, codebook = cluster(bins, flattened_val, val) cur_qsnr = qsnr(compressed_val.astype(np.float32), val.astype(np.float32)) else: # automatic search of optimal k with inertia method silhouette = [] inertia = [] for bits in range(2, 8): bins = int(math.pow(2, bits)) if bins > val.size - 1: break compressed_val, codes, codebook = cluster(bins, flattened_val, val, inertia=inertia) silhouette.append( silhouette_score(flattened_val.reshape(-1, 1), compressed_val.flatten())) if len(inertia) <= 1: compressed_val, codes, codebook = encode_shorter( flattened_val, val) else: # 2nd grade derivative to find the elbow if len(inertia) > 2: cinertia = np.array(inertia) # cinertia[cinertia>1] = 1 elb_idx = np.argmax(np.diff(np.diff(cinertia))) + 1 else: elb_idx = 1 # take the three around the elbow and look at the silhouette bits = np.argmax(np.array( silhouette[elb_idx - 1:elb_idx + 1])) + elb_idx + 1 bins = int(math.pow(2, bits)) compressed_val, codes, codebook = cluster(bins, flattened_val, val) # see if sparse representation is better unsparse_size = int(math.ceil(codes.size * bits) / 8) qelem_codebook_size = math.ceil((codebook.size * qbits) / 8) uncompressed_size = int(math.ceil((val.size * qbits) / 8)) if allow_sparse: sparse_idx, sparse_freq = compute_sparse(codes) sparse_size = int( math.ceil((val.size - sparse_freq) * (bits + 1) + sparse_freq) / 8) if force_sparse or sparse_size < unsparse_size: sparse = True compressed_size = sparse_size else: sparse = False compressed_size = unsparse_size else: compressed_size = unsparse_size sparse = False sparse_idx = 0 compressed_size += qelem_codebook_size if not manual_bits and compressed_size >= uncompressed_size: raise CompressionError('value cannot be reduced in size') comp_val = CompressedVal(compressed_val, bits, codebook, compressed_size, sparse, sparse_idx, threshold) return comp_val