def test_no_fit(self): """Test that the Zipf fit function correctly identifies a non-Zipfian distribution""" p_max = 0.02 freqs = np.asarray([random.randint(0, 20) for _ in range(100)]) _, p = traces.zipf_fit(freqs) self.assertLessEqual(p, p_max)
def zipf_estimation(trace_path, min_interval_size=2000): print('estimating zipfian alpha parameter for all intervals of the trace') data = [] with open('resources/' + trace_path, 'r') as trace: csv_reader = csv.reader(trace) requests = 0 occurrence_aggregate = defaultdict(int) for index, line in enumerate(csv_reader): time, receiver, object = line[0], line[1], line[2] requests += 1 occurrence_aggregate[object] += 1 if requests % min_interval_size == 0: zipf_alpha, zipf_fit_prob = zipf_fit(list( occurrence_aggregate.values()), need_sorting=True) if zipf_fit_prob > 0.95: data.append(zipf_alpha) else: data.append(None) occurrence_aggregate.clear() return data
def test_expected_fit(self): """Test that the Zipf fit function correctly estimates the alpha parameter of a known Zipf distribution""" alpha_tolerance = 0.02 # Tolerated alpha estimation error p_min = 0.99 # Min p n = 1000 # Number of Zipf distribution items alpha = np.arange(0.2, 5.0, 0.1) # Tested range of Zipf's alpha for a in alpha: z = TruncatedZipfDist(a, n) est_a, p = traces.zipf_fit(z.pdf) self.assertLessEqual(np.abs(a - est_a), alpha_tolerance) self.assertGreaterEqual(p, p_min)
def test_expected_fit_not_sorted(self): """Test that the Zipf fit function correctly estimates the alpha parameter of a known Zipf distribution""" alpha_tolerance = 0.02 # Tolerated alpha estimation error p_min = 0.99 # Min p n = 1000 # Number of Zipf distribution items alpha = np.arange(0.2, 5.0, 0.1) # Tested range of Zipf's alpha for a in alpha: pdf = TruncatedZipfDist(a, n).pdf np.random.shuffle(pdf) est_a, p = traces.zipf_fit(pdf, need_sorting=True) assert np.abs(a - est_a) <= alpha_tolerance assert p >= p_min