class Histogram(StatsFilter): """ Generate a basic histogram of the specified field. The value() method returns a dict of value to occurance count mappings. The __str__ method generates a basic and limited histogram useful for printing to the command line. The label_length attribute determines the padding and cut-off of the basic histogram labels. **This filters maintains a dict of unique field values in memory.** """ label_length = 6 def __init__(self, field, **kwargs): super(Histogram, self).__init__(field, **kwargs) if hasattr(collections, 'Counter'): self._counter = collections.Counter() else: self._counter = FallbackCounter() def process_field(self, item): self._counter[self.prep_field(item)] += 1 def prep_field(self, item): return item def value(self): return self._counter.copy() def in_order(self): ordered = [] for key in sorted(self._counter.keys()): ordered.append((key, self._counter[key])) return ordered def most_common(self, n=None): return self._counter.most_common(n) @classmethod def as_string(self, occurences, label_length): output = "\n" for key, count in occurences: key_str = str(key).ljust(label_length)[:label_length] output += "%s %s\n" % (key_str, "*" * count) return output def __str__(self): return Histogram.as_string(self.in_order(), label_length=self.label_length)
def __init__(self, field, **kwargs): super(Histogram, self).__init__(field, **kwargs) if hasattr(collections, 'Counter'): self._counter = collections.Counter() else: self._counter = FallbackCounter()