예제 #1
def absolute_counts_test():
	nc = NormalizedCounter()

	absolute = nc.absolute_counts()

	assert absolute["a"] == 3
	assert absolute["b"] == 2
	assert absolute["c"] == 1
	assert len(absolute) == 3
예제 #3
class Analyzer(object):
	The class that performs the analysis. 
	You can feed an analyzer from different sources (strings, files... ) so that
	it extracts the target frequency distribution and ask
	it to score supplied content based on frequency similarity
    def __init__(self, content=None):
        """ Build an analyzer

		Content can be a string or a dict with absolute frequencies like in:
		`` Analyzer({"a": 4, "b": 8, "c": 1}) ``
        self.counter = NormalizedCounter(content)

    def feed(self, content):
        """ Feeds the analyzer with a string

		:param content: the string to be fed to the analyzer 

    def feed_from_raw_file(self, filename):
        """ Feeds the analyzer with the content of a file
			Every character will be taken into account, including newline chars.

			:param filename: the path of the file that will be fed to the analyzer
        with open(filename) as f:
            content = f.read()

    def score(self, content):
		Assigns a score to any string. The smaller, the more similar frequency distribution. \
		0 means that the frequency distributions of both the content and the analyzer are equal.

		:param content: the string to be scored.

		:returns: a float number

        new_counter = NormalizedCounter()

        return counter_distance(self.counter, new_counter)

    def choose_best(self, strings, n=1):
		Returns the n strings whose frequency distribution is most similar
		to the one fed to the analyzer.

		:param strings: an iterator with the strings where the Analyzer will looked for the 
			best strings.
		:param n: an integer specifying the number of strings which will be returned.

		:returns: an iterable containing the ``n`` best strings sorted by frequency similarity

        scores = {string: self.score(string) for string in strings}

        return map(
            heapq.nsmallest(n, scores.iteritems(), operator.itemgetter(1)))

    def serialize(self):
        """ Returns a json representation of the analyzer 

		:returns: a string containing a json representation of the absolute frequencies
			the analyzer has been fed with."""
        content = self.counter.absolute_counts()

        return json.dumps(content)

    def store(self, filename):
        """ Stores the json representation of the analyzer to a file """
        with open(filename, "w") as f:

    def load(self, filename):
        """ Loads a frequency distribution file and adds it to the current distribution """
        with open(filename) as f:
            counter = NormalizedCounter(json.loads(f.read()))
            self.counter += counter

    def discard(self, chars):
        """ Removes the chars in chars from the counter

			:param chars: an interable consisting of the chars \
				whose frequency will be set to 0

        for char in chars:
            del self.counter[char]

    def transform_keys(self, transformation):
        """ Maps the keys to other new keys to get a new frequency distribution

			The relative frequency of keys that map to the same key will be added in 
			order to get the new frequency distribution.

			:param transformation: a callable object that maps chars to chars"""

    def keys(self):
        """ Returns the characters whose frequency is greater than 0 """
        return self.counter.elements()

    def from_raw_file(self, filename):
        """ Returns an analyzer whose frequency distribution is read from the file content """
        analyzer = Analyzer()

        return analyzer

    def from_file(self, filename):
        """ Reads a frequency distribution from a JSON file as stored by store method """
        analyzer = Analyzer()
        return analyzer