Beispiel #1
0
    def _collect_one(out, qrec, quant_compare=False):
        fout = out['output'][0]
        if quant_compare:
            fout = qrec.out_qs[0].dequantize(qrec.out_qs[0].quantize(fout))
        qout = out['qoutput'][0]

        error_ = np.abs(fout - qout)
        node = out['node']

        stat = {
            'name': out['name'],
            'op_name': node.op_name,
            'step': out['step_idx'],
            'av_err': np.mean(error_),
            'max_err': np.max(error_),
            'min_err': np.min(error_),
            'qsnr': qsnr(fout, qout),
            'cos': cos_similarity(fout, qout),
            'chan_err': []
        }

        dim = node.out_dims[0]
        if dim and dim.is_named and dim.has_key('c'):
            channel_error = []
            dim = node.out_dims[0]
            for i in range(dim.c):
                srange = dim.srange(c=i)
                channel_error.append(np.average(fout[srange] - qout[srange]))
            stat['chan_err'] = channel_error

        return stat
Beispiel #2
0
    def _collect(self, G, input_tensors) -> Mapping[NodeId, Mapping]:
        LOG.debug("gather quantization statistics")
        output_ = execute(G, input_tensors, limit=self._limit)
        all_details = []
        qoutput_ = execute(G,
                           input_tensors,
                           limit=self._limit,
                           qrecs=G.quantization,
                           qmode=QuantizationMode.all(),
                           all_details=all_details)
        stats = OrderedDict()
        for idx, out in enumerate(output_):
            error_ = np.abs(out[0] - qoutput_[idx][0])
            step = G.graph_state.steps[idx]
            node = step['node']
            details = all_details[idx]
            if details:
                overflow_dot = details['overflow_dot']
                overflow_acc = details['overflow_acc']
            else:
                overflow_dot = overflow_acc = ""

            stats[NodeId(node, None)] = {
                'name': node.name,
                'op_name': node.op_name,
                'step': idx,
                'av_err': np.mean(error_),
                'max_err': np.max(error_),
                'min_err': np.min(error_),
                'qsnr': qsnr(out[0], qoutput_[idx][0]),
                'overflow_dot': overflow_dot,
                'overflow_acc': overflow_acc,
            }

        return stats
Beispiel #3
0
    def _collect_one(fstat, qstat):
        fout = fstat['output']
        qout = qstat['output']
        error_ = np.abs(fout[0] - qout[0])
        node = fstat['node']
        details = qstat['details']
        if details:
            overflow_dot = details['overflow_dot']
            overflow_acc = details['overflow_acc']
        else:
            overflow_dot = overflow_acc = ""

        stat = {
            'name': fstat['name'],
            'op_name': node.op_name,
            'step': fstat['step_idx'],
            'av_err': np.mean(error_),
            'max_err': np.max(error_),
            'min_err': np.min(error_),
            'qsnr': qsnr(fout[0], qout[0]),
            'overflow_dot': overflow_dot,
            'overflow_acc': overflow_acc,
        }

        return stat
    def qsnr(self, name_set1, name_set2, axis=None, node_name=None):
        if axis is None:
            axis = 0
        if node_name is None:
            node_name = next(iter(self.node_names), '')

        set1 = self.stats[name_set1][node_name]
        set2 = self.stats[name_set2][node_name]
        return {
            k: qsnr(v, set2[k], axis=axis) if k in set2 else None
            for k, v in set1.items()
        }
Beispiel #5
0
    def _collect_one(fstat, qstat, qrec, quant_compare=False):
        fout = fstat['output'][0]
        if quant_compare:
            fout = qrec.out_qs[0].dequantize(qrec.out_qs[0].quantize(fout))
        qout = qstat['output'][0]
        error_ = np.abs(fout - qout)
        node = fstat['node']

        stat = {
            'name': fstat['name'],
            'op_name': node.op_name,
            'step': fstat['step_idx'],
            'av_err': np.mean(error_),
            'max_err': np.max(error_),
            'min_err': np.min(error_),
            'qsnr': qsnr(fout, qout),
        }

        return stat
Beispiel #6
0
    def _collect_one(out):
        fout = out['output']
        qout = out['qoutput']
        error_ = np.abs(fout[0] - qout[0])
        node = out['node']
        qdetails = out['qdetails']
        if qdetails:
            overflow_dot = qdetails['overflow_dot']
            overflow_acc = qdetails['overflow_acc']
        else:
            overflow_dot = overflow_acc = ""

        stat = {
            'name': out['name'],
            'op_name': node.op_name,
            'step': out['step_idx'],
            'av_err': np.mean(error_),
            'max_err': np.max(error_),
            'min_err': np.min(error_),
            'qsnr': qsnr(fout[0], qout[0]),
            'overflow_dot': overflow_dot,
            'overflow_acc': overflow_acc,
            'chan_err': []
        }

        dim = node.out_dims[0]
        if dim and dim.is_named and dim.has_key('c'):
            channel_error = []
            dim = node.out_dims[0]
            for i in range(dim.c):
                srange = dim.srange(c=i)
                channel_error.append(
                    np.average(fout[0][srange] - qout[0][srange]))
            stat['chan_err'] = channel_error

        return stat
Beispiel #7
0
 def func(x, y):
     if x is not None and y is not None:
         return qsnr(x.astype(np.float), y.astype(np.float))
     return float('nan')
Beispiel #8
0
 def compress(self,
              node,
              idx,
              bits=None,
              min_qsnr=None,
              force_sparse=False):
     val = self.get(node, idx)
     flattened_val = val.flatten()
     codes = None
     if val.size <= 4:
         LOG.warning('value in node %s is too small to compress', node.name)
         return None
     if bits is not None:
         bins = int(math.pow(2, bits))
         if bins > val.size:
             bits = max(int(math.floor(math.log2(val.size))), 2)
             bins = int(math.pow(2, bits))
             LOG.warning(
                 'more bins than values for node %s - reducing to %s bits',
                 node.name, bits)
         compressed_val, codes, codebook = self.cluster(
             bins, flattened_val, val)
     elif min_qsnr:
         cur_qsnr = -math.inf
         bits = 1
         while cur_qsnr < min_qsnr:
             bits += 1
             if bits > 8:
                 LOG.warning(
                     'value in node %s cannot meet %s QSNR at 8 bits or under - not compressing',
                     node.name, min_qsnr)
                 return None
             bins = int(math.pow(2, bits))
             if bins > val.size:
                 LOG.warning(
                     'value in node %s cannot be reduced in size - not compressing',
                     node.name)
                 return None
             compressed_val, codes, codebook = self.cluster(
                 bins, flattened_val, val)
             cur_qsnr = qsnr(compressed_val.astype(np.float32),
                             val.astype(np.float32))
     else:
         # automatic search of optimal k with inertia method
         silhouette = []
         inertia = []
         for bits in range(2, 9):
             bins = int(math.pow(2, bits))
             if bins > val.size - 1:
                 break
             compressed_val, codes, codebook = self.cluster(bins,
                                                            flattened_val,
                                                            val,
                                                            inertia=inertia)
             silhouette.append(
                 silhouette_score(flattened_val.reshape(-1, 1),
                                  compressed_val.flatten()))
         if len(inertia) <= 1:
             compressed_val, codes, codebook = self.encode_shorter(
                 flattened_val, val)
         else:
             # 2nd grade derivative to find the elbow
             if len(inertia) > 2:
                 cinertia = np.array(inertia)
                 # cinertia[cinertia>1] = 1
                 elb_idx = np.argmax(np.diff(np.diff(cinertia))) + 1
             else:
                 elb_idx = 1
             # take the three around the elbow and look at the silhouette
             bits = np.argmax(np.array(
                 silhouette[elb_idx - 1:elb_idx + 1])) + elb_idx + 1
             bins = int(math.pow(2, bits))
             compressed_val, codes, codebook = self.cluster(
                 bins, flattened_val, val)
     # see if sparse representation is better
     # TODO - this is not entirely correct since it is not accounting for the extra bin created by the sparse value
     freqs = np.unique(codes, return_counts=True)
     max_index = np.where(freqs[1] == freqs[1].max())[0][0]
     sparse_freq = freqs[1][max_index]
     sparse_size = math.ceil((codes.size - sparse_freq) *
                             (bits + 1) + sparse_freq) / 8
     unsparse_size = math.ceil(codes.size * bits) / 8
     if force_sparse:
         sparse = True
         comp_size = sparse_size
     else:
         sparse = sparse_size < unsparse_size
         comp_size = int(min(sparse_size, unsparse_size) + codebook.size)
     if comp_size >= val.size:
         LOG.warning(
             'value in node %s cannot be compressed smaller with this setting',
             node.name)
         return None
     comp_val = CompressedVal(compressed_val, bits, codebook, comp_size,
                              sparse)
     self.set(node, idx, val, comp_val)
     return comp_val
 def func(x, y):
     if x is not None and y is not None:
         if x.shape != y.shape:
             return "shape"
         return qsnr(x.astype(np.float), y.astype(np.float))
     return "N/A"
Beispiel #10
0
    def compress(self,
                 node,
                 idx,
                 bits=None,
                 min_qsnr=None,
                 force_sparse=False,
                 allow_sparse=True,
                 qbits=8,
                 threshold=None):
        orig_val = self.get(node, idx, compressed=False)
        val = orig_val.copy()
        if threshold:
            val[np.logical_and(val < threshold, val > 0)] = 0
            val[np.logical_and(val > np.negative(threshold), val < 0)] = 0

        if np.all(val == 0):
            return None
        flattened_val = val.flatten()
        codes = None
        if val.size <= 4:
            LOG.info('value in node %s is too small to compress', node.name)
            return None
        if bits is not None:
            bins = int(math.pow(2, bits))
            if bins > val.size:
                bits = max(int(math.floor(math.log2(val.size))), 2)
                bins = int(math.pow(2, bits))
                LOG.info(
                    'more bins than values for node %s - reducing to %s bits',
                    node.name, bits)
            compressed_val, codes, codebook = self.cluster(
                bins, flattened_val, val)
        elif min_qsnr:
            cur_qsnr = -math.inf
            bits = 1
            while cur_qsnr < min_qsnr:
                bits += 1
                if bits > 8:
                    LOG.info(
                        'value in node %s cannot meet %s QSNR at 8 bits or under - not compressing',
                        node.name, min_qsnr)
                    return None
                bins = int(math.pow(2, bits))
                if bins > val.size:
                    LOG.info(
                        'value in node %s cannot be reduced in size - not compressing',
                        node.name)
                    return None
                compressed_val, codes, codebook = self.cluster(
                    bins, flattened_val, val)
                cur_qsnr = qsnr(compressed_val.astype(np.float32),
                                val.astype(np.float32))
        else:
            # automatic search of optimal k with inertia method
            silhouette = []
            inertia = []
            for bits in range(2, 9):
                bins = int(math.pow(2, bits))
                if bins > val.size - 1:
                    break
                compressed_val, codes, codebook = self.cluster(bins,
                                                               flattened_val,
                                                               val,
                                                               inertia=inertia)
                silhouette.append(
                    silhouette_score(flattened_val.reshape(-1, 1),
                                     compressed_val.flatten()))
            if len(inertia) <= 1:
                compressed_val, codes, codebook = self.encode_shorter(
                    flattened_val, val)
            else:
                # 2nd grade derivative to find the elbow
                if len(inertia) > 2:
                    cinertia = np.array(inertia)
                    # cinertia[cinertia>1] = 1
                    elb_idx = np.argmax(np.diff(np.diff(cinertia))) + 1
                else:
                    elb_idx = 1
                # take the three around the elbow and look at the silhouette
                bits = np.argmax(np.array(
                    silhouette[elb_idx - 1:elb_idx + 1])) + elb_idx + 1
                bins = int(math.pow(2, bits))
                compressed_val, codes, codebook = self.cluster(
                    bins, flattened_val, val)
        # see if sparse representation is better
        unsparse_size = int(math.ceil(codes.size * bits) / 8)
        qelem_codebook_size = math.ceil((codebook.size * qbits) / 8)
        uncompressed_size = int(math.ceil((val.size * qbits) / 8))
        if allow_sparse:
            freqs = np.unique(codes, return_counts=True)
            sparse_idx = np.where(freqs[1] == freqs[1].max())[0][0]
            sparse_freq = freqs[1][sparse_idx]
            sparse_size = int(
                math.ceil((codes.size - sparse_freq) *
                          (bits + 1) + sparse_freq) / 8)
            if force_sparse or sparse_size < unsparse_size:
                sparse = True
                compressed_size = sparse_size
            else:
                sparse = False
                compressed_size = unsparse_size
        else:
            compressed_size = unsparse_size
            sparse = False
            sparse_idx = 0

        compressed_size += qelem_codebook_size
        if compressed_size >= uncompressed_size:
            LOG.info(
                f'value in node {node.name} has not been compressed since its size '
                f'was not reduced {uncompressed_size} bytes -> {compressed_size} bytes'
            )
            return None
        comp_val = CompressedVal(compressed_val, bits, codebook,
                                 compressed_size, sparse, sparse_idx)
        self.set(node, idx, val, comp_val)
        return comp_val
Beispiel #11
0
    def compress(self, node, idx, bits=None, min_qsnr=None, sparse=False):
        val = self.get(node, idx)
        flattened_val = val.flatten()
        if bits is not None:
            bins = int(math.pow(2, bits))
            if bins > val.size:
                raise Exception(
                    'More bins than values with {} bits'.format(bits))
            kmeans = KMeans(n_clusters=bins)
            kmeans.fit(flattened_val.reshape((-1, 1)))
            codebook = kmeans.cluster_centers_
            codebook = codebook.astype(val.dtype)
            codes = vq(flattened_val.reshape((-1, 1)), codebook)
            compressed_val = np.array([codebook[code] for code in codes[0]
                                       ]).reshape(val.shape)
        elif min_qsnr:
            cur_qsnr = -math.inf
            bits = 1
            while cur_qsnr < min_qsnr:
                bits += 1
                if bits > 7:
                    raise Exception(
                        'Cannot find a solution with less than 8 bits \
                                     for {} with min_qsnr = {}'.format(
                            node.name, min_qsnr))
                bins = int(math.pow(2, bits))
                if bins > val.size:
                    break
                kmeans = KMeans(n_clusters=bins)
                kmeans.fit(flattened_val.reshape((-1, 1)))
                codebook = kmeans.cluster_centers_
                codebook = codebook.astype(val.dtype)
                codes = vq(flattened_val.reshape((-1, 1)), codebook)
                compressed_val = np.array(
                    [codebook[code] for code in codes[0]]).reshape(val.shape)
                cur_qsnr = qsnr(compressed_val.astype(np.float32),
                                val.astype(np.float32))
        else:
            # automatic search of optimal k with inertia method
            silhouette = []
            inertia = []
            for bits in range(1, 8):
                bins = int(math.pow(2, bits))
                if bins > val.size:
                    break
                kmeans = KMeans(n_clusters=bins)
                kmeans.fit(flattened_val.reshape((-1, 1)))
                codebook = kmeans.cluster_centers_
                codebook = codebook.astype(val.dtype)
                codes = vq(flattened_val.reshape((-1, 1)), codebook)
                compressed_val = np.array(
                    [codebook[code] for code in codes[0]]).reshape(val.shape)
                inertia.append(kmeans.inertia_)
                silhouette.append(
                    silhouette_score(flattened_val.reshape(-1, 1),
                                     compressed_val.flatten().reshape(-1, 1)))
            elb_idx = np.argmax(np.diff(np.diff(
                np.array(inertia))))  # 2nd grade derivative to find the elbow
            elb_idx = 1 if elb_idx == 0 else elb_idx
            bits = np.argmax(
                np.array(silhouette[elb_idx - 1:elb_idx + 1])
            ) + 1  # take the three around the elbow and look at the silhouette
            bins = int(math.pow(2, bits))
            kmeans = KMeans(n_clusters=bins)
            kmeans.fit(flattened_val.reshape((-1, 1)))
            codebook = kmeans.cluster_centers_
            codebook = codebook.astype(val.dtype)
            codes = vq(flattened_val.reshape((-1, 1)), codebook)
            compressed_val = np.array([codebook[code] for code in codes[0]
                                       ]).reshape(val.shape)

        if sparse:
            freqs = np.unique(codes, return_counts=True)
            max_index = np.where(freqs[1] == freqs[1].max())[0][0]
            sparse_val = freqs[0][max_index]
        else:
            sparse_val = None
        self.set(node, idx, val, compressed_val, sparse_val)
        x = 1
def compress(orig_val,
             bits=None,
             min_qsnr=None,
             force_sparse=False,
             allow_sparse=True,
             qbits=8,
             threshold=None,
             force=True):
    val = orig_val.copy()
    manual_bits = bits is not None and force
    if threshold:
        val[np.logical_and(val < threshold, val > 0)] = 0
        val[np.logical_and(val > np.negative(threshold), val < 0)] = 0

    if np.all(val == 0):
        raise CompressionError('value is all zeros')
    flattened_val = val.flatten()
    codes = None
    if val.size <= 4:
        raise CompressionError('value is too small to compress')
    if bits is not None:
        bins = int(math.pow(2, bits)) + (1 if force_sparse else 0)
        if bins > val.size:
            bits = max(int(math.floor(math.log2(val.size))), 2)
            bins = int(math.pow(2, bits))
            LOG.info(f'more bins than values - reducing to {bits} bits')
        compressed_val, codes, codebook = cluster(bins, flattened_val, val)
    elif min_qsnr:
        cur_qsnr = -math.inf
        bits = 1
        while cur_qsnr < min_qsnr:
            if bits == 8 and not force_sparse:
                bins = int(math.pow(2, bits)) + 1
                force_sparse = True
            else:
                bits += 1
                if bits > 8:
                    raise CompressionError(
                        f'value cannot meet {min_qsnr} QSNR at 8 bits or under'
                    )
                bins = int(math.pow(2, bits)) + (1 if force_sparse else 0)
            if bins > val.size:
                raise CompressionError('value cannot be reduced in size')
            compressed_val, codes, codebook = cluster(bins, flattened_val, val)
            cur_qsnr = qsnr(compressed_val.astype(np.float32),
                            val.astype(np.float32))
    else:
        # automatic search of optimal k with inertia method
        silhouette = []
        inertia = []
        for bits in range(2, 8):
            bins = int(math.pow(2, bits))
            if bins > val.size - 1:
                break
            compressed_val, codes, codebook = cluster(bins,
                                                      flattened_val,
                                                      val,
                                                      inertia=inertia)
            silhouette.append(
                silhouette_score(flattened_val.reshape(-1, 1),
                                 compressed_val.flatten()))
        if len(inertia) <= 1:
            compressed_val, codes, codebook = encode_shorter(
                flattened_val, val)
        else:
            # 2nd grade derivative to find the elbow
            if len(inertia) > 2:
                cinertia = np.array(inertia)
                # cinertia[cinertia>1] = 1
                elb_idx = np.argmax(np.diff(np.diff(cinertia))) + 1
            else:
                elb_idx = 1
            # take the three around the elbow and look at the silhouette
            bits = np.argmax(np.array(
                silhouette[elb_idx - 1:elb_idx + 1])) + elb_idx + 1
            bins = int(math.pow(2, bits))
            compressed_val, codes, codebook = cluster(bins, flattened_val, val)
    # see if sparse representation is better
    unsparse_size = int(math.ceil(codes.size * bits) / 8)
    qelem_codebook_size = math.ceil((codebook.size * qbits) / 8)
    uncompressed_size = int(math.ceil((val.size * qbits) / 8))
    if allow_sparse:
        sparse_idx, sparse_freq = compute_sparse(codes)
        sparse_size = int(
            math.ceil((val.size - sparse_freq) * (bits + 1) + sparse_freq) / 8)
        if force_sparse or sparse_size < unsparse_size:
            sparse = True
            compressed_size = sparse_size
        else:
            sparse = False
            compressed_size = unsparse_size
    else:
        compressed_size = unsparse_size
        sparse = False
        sparse_idx = 0

    compressed_size += qelem_codebook_size
    if not manual_bits and compressed_size >= uncompressed_size:
        raise CompressionError('value cannot be reduced in size')
    comp_val = CompressedVal(compressed_val, bits, codebook, compressed_size,
                             sparse, sparse_idx, threshold)
    return comp_val