Beispiel #1
0
    def compress(tensor, name=None, ratio=0.05, tb=None, i_ratio=0.25, stages=1, ec_grad_w=1.0, ec_mem_w=0.0):
        with torch.no_grad():
            numel = tensor.numel()
            k = max(int(numel * ratio), 1)

            t_norm = tensor.norm(2)
            GaussianKSGDCompressorEC.norm = t_norm

            norm_tensor = tensor / t_norm
            abs_norm_tensor = norm_tensor.abs()
            abs_norm_tensor = tensor  # TODO:REMOVE

            if torch.__version__ < '1.3.0':
                t_std = torch.std(abs_norm_tensor)
                t_mean = torch.mean(abs_norm_tensor)
            else:
                t_std, t_mean = torch.std_mean(abs_norm_tensor)

            left_thres, right_thres = utils.gen_threshold_from_normal_distribution(1 - ratio, float(t_mean), float(t_std))

            loops = 0
            while loops < 5:
                one_indexes = abs_norm_tensor > right_thres
                indexes = one_indexes.nonzero().data.squeeze().view(-1)
                if indexes.numel() < 2 * k / 3:
                    right_thres *= 0.5
                elif indexes.numel() > 4 * k / 3:
                    right_thres *= 1.5
                else:
                    break
                loops += 1

            values = tensor.data[indexes]

            return tensor, indexes, values
Beispiel #2
0
    def compress(tensor, name=None, sigma_scale=3, ratio=0.05):
        with torch.no_grad():
            if name not in GaussianCompressor.residuals:
                GaussianCompressor.residuals[name] = torch.zeros_like(
                    tensor.data)
            numel = tensor.numel()
            k = max(int(numel * ratio), 1)

            std = torch.std(tensor)
            mean = torch.mean(tensor)
            left_thres, right_thres = utils.gen_threshold_from_normal_distribution(
                1 - ratio, float(mean), float(std))
            abs_tensor = torch.abs(tensor)
            loops = 0
            while loops < 5:
                one_indexes = abs_tensor > right_thres
                indexes = one_indexes.nonzero().data.squeeze().view(-1)
                if indexes.numel() < 2 * k / 3:
                    right_thres *= 0.5
                elif indexes.numel() > 4 * k / 3:
                    right_thres *= 1.5
                else:
                    break
                loops += 1
            indexes = indexes
            values = tensor.data[indexes]
            GaussianCompressor.residuals[name].data = tensor.data + 0.0
            GaussianCompressor.residuals[name].data[indexes] = 0.0
            return tensor, indexes, values
Beispiel #3
0
    def compress(tensor, name=None, sigma_scale=3, ratio=0.05):
        with torch.no_grad():
            if name not in GaussianCompressor.residuals:
                GaussianCompressor.residuals[name] = torch.zeros_like(tensor.data)
            numel = tensor.numel()
            k = max(int(numel * ratio), 1)

            tensor.add_(GaussianCompressor.residuals[name].data)

            std = torch.std(tensor)
            mean = torch.mean(tensor)
            left_thres, right_thres = utils.gen_threshold_from_normal_distribution(1-ratio, float(mean), float(std))
            abs_tensor = torch.abs(tensor)
            abs_indexes = torch.arange(0, abs_tensor.numel(), device=tensor.device)
            loops = 0
            real_indexes = torch.ones(0, device=tensor.device, dtype=torch.int64)
            last_large = False
            while loops < 100:
                one_indexes = abs_tensor > right_thres
                zero_indexes = ~one_indexes
                indexes = one_indexes.nonzero().data.squeeze().view(-1)
                last_large = False
                if real_indexes.numel() + indexes.numel() < 2*k/3:
                    real_indexes = torch.cat([real_indexes, abs_indexes[indexes]])
                    abs_tensor = abs_tensor[zero_indexes]
                    abs_indexes = abs_indexes[zero_indexes]
                    right_thres *= 0.1
                elif real_indexes.numel() + indexes.numel() > 4*k/3:
                    last_large = True
                    abs_tensor = abs_tensor[one_indexes]
                    abs_indexes = abs_indexes[one_indexes]
                    right_thres *= 2
                else:
                    real_indexes = torch.cat([real_indexes, abs_indexes[indexes]])
                    break
                loops += 1
            if last_large and real_indexes.numel() < 2*k/3: 
                real_indexes = torch.cat([real_indexes, abs_indexes])
            #if hvd.rank() == 0:
            #    print('real_indexes.numel(): %d, k: %d' % (real_indexes.numel(), k))
            #one_indexes = abs_tensor > right_thres
            #indexes = one_indexes.nonzero().data.squeeze().view(-1)
            indexes = real_indexes #[0:k]
            values = tensor.data[indexes] 
            #print('gaussion vs topk: ', indexes.numel(), k)
            GaussianCompressor.residuals[name].data = tensor.data + 0.0 
            GaussianCompressor.residuals[name].data[indexes] = 0.0
            if GaussianCompressor.zc is None:
                GaussianCompressor.zc = torch.ones(tensor.numel(), dtype=torch.float32, device=tensor.device)
            GaussianCompressor.zc.fill_(1.0)
            GaussianCompressor.zc[indexes] = 0.0
            return tensor, indexes, values
Beispiel #4
0
def test_gaussion_thres():
    set_mean = 0.0; set_std = 0.5
    d = np.random.normal(set_mean, set_std, 10000)
    k2, p = stats.normaltest(d)
    print(p)
    nnz = np.count_nonzero(d)
    mean = np.mean(d)
    std = np.std(d)
    print('size:%d, nnz: %d' % (d.size, nnz))
    print(set_mean, set_std)
    print(mean, std)
    thres = 3*std
    d[np.abs(d) < thres] = 0
    pvalue = 1-np.count_nonzero(d)*1.0/d.size
    print('size:%d, p-value: %f' % (d.size, pvalue))
    left_thres, right_thres = utils.gen_threshold_from_normal_distribution(pvalue, mean, std)
    print('real thres:%f, gen thres: %f' % (thres, right_thres))
Beispiel #5
0
    def compress(tensor, name=None, ratio=0.05, tb=None, i_ratio=0.25, stages=1, ec_grad_w=1.0, ec_mem_w=0.0):
        with torch.no_grad():
            numel = tensor.numel()
            k = max(int(numel * ratio), 1)

            ada_stages = 0
            if stages < 0  or i_ratio == 0.0:
                ada_stages = stages
                stages = GaussianCompressorEC.cur_stages

            t_norm = tensor.norm(2)
            GaussianCompressorEC.norm = t_norm

            norm_tensor = tensor / t_norm
            abs_norm_tensor = norm_tensor.abs()
            abs_norm_tensor = tensor #TODO:REMOVE
            abs_norm_tensor_cpy = abs_norm_tensor.clone()

            if torch.__version__ < '1.3.0':
                t_std = torch.std(abs_norm_tensor)
                t_mean = torch.mean(abs_norm_tensor)
            else:
                t_std, t_mean = torch.std_mean(abs_norm_tensor)

            if stages == 1 or ratio >= NoneCompressor.first_ratio:
                _, threshold = utils.gen_threshold_from_normal_distribution(1 - ratio, float(t_mean), float(t_std))
            else:
                _, threshold = utils.gen_threshold_from_normal_distribution(1 - NoneCompressor.first_ratio, float(t_mean),
                                                                            float(t_std))

            r_ratio = ratio / NoneCompressor.first_ratio
            if stages > 1 or stages == 0:
                if stages == 0:
                    loop = math.ceil(math.log(r_ratio) / math.log(i_ratio))
                else:
                    i_ratio = math.pow(r_ratio, 1.0 / (stages - 1))
                    loop = stages - 1
                i = loop
                while i > 0:
                    one_indexes = abs_norm_tensor > threshold
                    indexes = one_indexes.nonzero().data.squeeze().view(-1)
                    abs_norm_tensor = abs_norm_tensor.data[indexes]

                    t_min = abs_norm_tensor.min()
                    abs_norm_tensor_min = abs_norm_tensor - t_min

                    if torch.__version__ < '1.3.0':
                        t_std = torch.std(abs_norm_tensor_min)
                        t_mean = torch.mean(abs_norm_tensor_min)
                    else:
                        t_std, t_mean = torch.std_mean(abs_norm_tensor_min)

                    _, threshold = utils.gen_threshold_from_normal_distribution(1 - i_ratio, float(t_mean),
                                                                                float(t_std))
                    if i == 1 and stages == 0:
                        _, threshold = utils.gen_threshold_from_normal_distribution(
                            1 - r_ratio / math.pow(i_ratio, loop - 1), float(t_mean),
                            float(t_std))
                    threshold += t_min

                    i -= 1

            one_indexes = abs_norm_tensor_cpy > threshold
            indexes = one_indexes.nonzero().data.squeeze().view(-1)
            values = tensor.data[indexes]

            if ada_stages:
                actual_ratio = (1.0 * values.numel() / numel)
                GaussianCompressorEC.adapt_stages(actual_ratio, ratio, ada_stages)

            return tensor, indexes, values