def test_transform(self): dist = StatsCounter({ 'of': 0.20, 'the': 0.50, 'that': 0.10, 'from': 0.20 }) dist = dist.transform(lambda word, prob: word.startswith('t')) assert dist == StatsCounter({True: 0.6, False: 0.4})
def test_normalize(self): pdist = StatsCounter({1: 1, 2: 2, 3: 1}).normalize() assert pdist == { 1: 0.25, 2: 0.50, 3: 0.25, }
def _get_revision_word_dist(self, page_title, revid): """""" revids_to_word_dist = self.ctitle_to_revids_to_word_dist[page_title] if revid in revids_to_word_dist: return revids_to_word_dist[revid] text = self._get_revision_text(page_title, revid) text = [word.lower() for word in wordpunct_tokenize(text) if word.lower() not in STOPWORDS and word.lower() not in PUNCTUATION] pdist = StatsCounter(text).normalize() revids_to_word_dist[revid] = pdist return pdist
def test_rank(etree): k = measure(etree.xpath('//body//*/..')) rv = rank(k) assert list(rv)[0] == (etree.xpath('//article')[0], StatsCounter(['div'] * 9))
def test_measure(etree): rv = measure(etree.xpath('//div/..')) uv = [(node.tag, metric) for node, metric in rv] assert uv == [('article', StatsCounter(['div'] * 9))]
def __init__(self, *args, **kwargs): StatsCounter.__init__(self, *args, **kwargs)
def test_get_weighted_random_value(self): wrv = StatsCounter(a=10, b=3).get_weighted_random_value() assert wrv == "a" or "b"
class TestStatsCounter: counter_ints = StatsCounter({str(s):s for s in range(1000)}) def test_mean_int(self): m = self.counter_ints.mean() d = 499500/1000 assert m == d def test_median_low(self): m = self.counter_ints.median_low() assert m == 499 def test_median_high(self, ): m = self.counter_ints.median_high() assert m == 500 def test_median_grouped(self, ): m = self.counter_ints.median_grouped() assert m == 499.5 def test_mode(self): with raises(stats.StatisticsError): self.counter_ints.mode() def test_variance(self): m = self.counter_ints.variance() assert m == 83416.66666666667 def test_stdev(self, ): m = self.counter_ints.stdev() assert m == 288.8194360957494 def test_pvariance(self): m = self.counter_ints.pvariance() assert m == 83333.25 def test_pstdev(self, ): m = self.counter_ints.pstdev() assert m == 288.6749902572095 def test_argmax(self): m = self.counter_ints.argmax() assert m == '999' def test_max(self): m = self.counter_ints.max() assert m == 999 def test_normalize(self): pdist = StatsCounter({1: 1, 2: 2, 3: 1}).normalize() assert pdist == { 1: 0.25, 2: 0.50, 3: 0.25, } def test_get_weighted_random_value(self): wrv = StatsCounter(a=10, b=3).get_weighted_random_value() assert wrv == "a" or "b" def test_transform(self): dist = StatsCounter({ 'of': 0.20, 'the': 0.50, 'that': 0.10, 'from': 0.20 }) dist = dist.transform(lambda word, prob: word.startswith('t')) assert dist == StatsCounter({True: 0.6, False: 0.4})
def measure(nodes): return [(node, StatsCounter([child.tag for child in node])) for node in nodes]