Ejemplo n.º 1
0
def test_prob_to_cum_freq():
    """
    Tests whether prob_to_cum_freq produces a table with the expected number
    of entries, number of samples, and that non-zero probabilities are
    represented by non-zero increases in frequency.

    Tests that cum_freq_to_prob is normalized and consistent with prob_to_cum_freq.
    """

    randomState = np.random.RandomState(190)
    resolution = 1024

    p0 = randomState.dirichlet([.1] * 50)
    cumFreq0 = prob_to_cum_freq(p0, resolution)
    p1 = cum_freq_to_prob(cumFreq0)
    cumFreq1 = prob_to_cum_freq(p1, resolution)

    # number of hypothetical samples should correspond to resolution
    assert cumFreq0[-1] == resolution
    assert len(cumFreq0) == len(p0) + 1

    # non-zero probabilities should have non-zero frequencies
    assert np.all(np.diff(cumFreq0)[p0 > 0.] > 0)

    # probabilities should be normalized.
    assert np.isclose(np.sum(p1), 1.)

    # while the probabilities might change, frequencies should not
    assert cumFreq0 == cumFreq1
Ejemplo n.º 2
0
def test_prob_to_cum_freq_zero_prob():
    """
    Tests whether prob_to_cum_freq handles zero probabilities as expected.
    """

    prob1 = [0.5, 0.25, 0.25]
    cumFreq1 = prob_to_cum_freq(prob1, resolution=8)

    prob0 = [0.5, 0., 0.25, 0.25, 0., 0.]
    cumFreq0 = prob_to_cum_freq(prob0, resolution=8)

    # removing entries corresponding to zeros
    assert [cumFreq0[0]
            ] + [cumFreq0[i + 1]
                 for i, p in enumerate(prob0) if p > 0.] == cumFreq1
Ejemplo n.º 3
0
    def build_latent_distribution(self, alpha: int = 1):
        """Two passes:num_images_pixels
            1. we calculate the minimum latent value across our entire training
                distribution,
            2. we then add |min| to the latent values such that they are all >= 0 and
                then use torch.bincount to get discrete value counts
          -> which we then laplace smooth and convert into a CDF.
          If a code is in multiple parts, e.g lateral FPN features, they are flattened and concatenated.
        """
        self.cdf = dict()
        self.min_val = torch.tensor(0.0).to(self.device).long()
        num_images = 0
        self.model.eval()
        if self.negative_codes:
            for batch in self.train_loader:
                with torch.no_grad():
                    out_dict = self.model(batch)
                    for code_feat in self.code_feats:
                        self.min_val = torch.min(
                            self.min_val,
                            out_dict[code_feat].long().min(),
                        )
                num_images += len(batch)
                if num_images > self.num_train_images:
                    break

        self.min_val = self.min_val.abs()
        self.bins = torch.tensor([0.0]).to(self.device).long()
        num_images = 0
        for batch in self.train_loader:
            with torch.no_grad():
                out_dict = self.model(batch)
                flat_codes = []
                for code_feat in self.code_feats:
                    flat_codes.append(out_dict[code_feat].long().flatten() +
                                      self.min_val)
                batch_bins = torch.bincount(torch.cat(flat_codes))
                if len(batch_bins) > len(self.bins):
                    batch_bins[:len(self.bins)] += self.bins
                    self.bins = batch_bins
                elif len(self.bins) > len(batch_bins):
                    self.bins[:len(batch_bins)] += batch_bins
                else:
                    self.bins += batch_bins
            num_images += len(batch)
            if num_images > self.num_train_images:
                break

        bins = self.bins.float()
        bins_smooth = ((bins + alpha) / (bins.sum() + len(bins) * alpha)
                       ).cpu()  # additive smooth counts using alpha
        self.cdf = rc.prob_to_cum_freq(bins_smooth,
                                       resolution=2 *
                                       len(bins_smooth))  # convert pdf -> cdf
        self.model.train()
Ejemplo n.º 4
0
def apply_range_encoder(seq_data, encodepath, args, config):
    resolution = config['resolution']
    prob = np.load('data_info/distribution_info_{}.npy'.format(args.model_num))

    # Avoid zero prob
    modified_freq = prob * resolution + 1
    modified_prob = modified_freq / np.sum(modified_freq)

    # print(modified_prob)

    cum_freq = range_coder.prob_to_cum_freq(modified_prob,
                                            resolution=resolution)

    # print('-----')
    # print(cum_freq)

    # cum_freq = [0] + [i for i in range(1, 256 + 1)]

    range_encoder = range_coder.RangeEncoder(encodepath)
    # Whether cum_freq resolution influences performance ?
    range_encoder.encode(seq_data, cum_freq)
    range_encoder.close()
Ejemplo n.º 5
0
def test_range_coder_overflow():
    """
    Cumulative frequencies must fit in an unsigned integer (assumed to be represented by 32 bits).
    This test checks that no error is thrown if the frequencies exceed that limit.
    """

    numBytes = 17
    filepath = mkstemp()[1]

    # encoding one sequence should require 1 byte
    prob = [4, 6, 8]
    prob = np.asarray(prob, dtype=np.float64) / np.sum(prob)
    cumFreq = prob_to_cum_freq(prob, 128)
    cumFreq[-1] = 2**32

    sequence = [2, 2]
    data = sequence * numBytes

    encoder = RangeEncoder(filepath)
    with pytest.raises(OverflowError):
        encoder.encode(data, cumFreq)
    encoder.close()
Ejemplo n.º 6
0
def apply_range_decoder(seq_data_len, decodepath, config):
  resolution = config['resolution']
  prob = np.load('data_info/distribution_info.npy')


  # Avoid zero prob
  modified_freq = prob * resolution + 1
  modified_prob = modified_freq / np.sum(modified_freq)

  # print(modified_prob)

  cum_freq = range_coder.prob_to_cum_freq(modified_prob, resolution=resolution)

  # print('-----')
  # print(cum_freq)

  # cum_freq = [0] + [i for i in range(1, 256 + 1)]

  range_decoder = range_coder.RangeDecoder(decodepath)

  # Whether cum_freq resolution influences performance ?
  seq_data = range_decoder.decode(seq_data_len, cum_freq)

  return seq_data
Ejemplo n.º 7
0
from range_coder import RangeEncoder, RangeDecoder, prob_to_cum_freq
import os

data = [2, 0, 1, 0, 0, 0, 1, 2, 2]
prob = [0.5, 0.2, 0.3]

# convert probabilities to cumulative integer frequency table
cumFreq = prob_to_cum_freq(prob, resolution=4)

print(cumFreq)

filepath="output.txt"
# encode data
encoder = RangeEncoder(filepath)
encoder.encode(data, cumFreq)
encoder.close()

# decode data
decoder = RangeDecoder(filepath)
dataRec = decoder.decode(len(data), cumFreq)
decoder.close()

print(os.stat(filepath))
print (dataRec)