Esempio n. 1
0
File: brain.py Progetto: wodim/cobe
    def __init__(self, filename):
        self.analyzer = StandardAnalyzer()

        store = park.SQLiteStore(filename)

        self.model = Model(self.analyzer, store)
        self.searcher = RandomWalkSearcher(self.model)
Esempio n. 2
0
    def __init__(self, filename):
        self.analyzer = StandardAnalyzer()

        store = park.SQLiteStore(filename)

        self.model = Model(self.analyzer, store)
        self.searcher = RandomWalkSearcher(self.model)
Esempio n. 3
0
    def test_init(self):
        # Don't specify any ngram orders, which should get trigrams
        # and bigrams stored.
        model = self.model
        self.assertEquals((3, 2, 1), model.orders)

        # And make sure n=5 yields 5-grams and 4-grams
        model = Model(self.analyzer, self.store, n=5)
        self.assertEquals((5, 4, 3, 2, 1), model.orders)
Esempio n. 4
0
    def test_load_tokens(self):
        # Ensure that model.tokens is properly reloaded from the
        # database when an old Model is loaded
        model = self.model

        model.train(u"this is a test")
        model.train(u"this is another test")

        # We save on train(), so make sure the new tokens log is empty.
        self.assertEqual(0, len(model.tokens.token_log))

        save_token_ids = dict(model.tokens.token_ids)
        save_tokens = dict(model.tokens.tokens)

        model = Model(self.analyzer, self.store)

        self.assertEqual(save_token_ids, model.tokens.token_ids)
        self.assertEqual(save_tokens, model.tokens.tokens)
Esempio n. 5
0
File: brain.py Progetto: wodim/cobe
class Brain(object):
    """A simplified, cobe 2.x style interface.

    This behaves roughly like cobe 2.x with an English stemmer for
    now; more flexibility will come as the API is fleshed out.

    It generates replies with a random walk across the language model
    and scores candidate replies by entropy, with a penalty for
    too-long replies.

    """
    def __init__(self, filename):
        self.analyzer = StandardAnalyzer()

        store = park.SQLiteStore(filename)

        self.model = Model(self.analyzer, store)
        self.searcher = RandomWalkSearcher(self.model)

    def reply(self, text):
        # Create a search query from the input
        query = self.analyzer.query(text, self.model)

        # Track (and don't re-score) replies that have already been
        # seen. These are expected when using a random walk searcher,
        # but they're also useful when debugging searches.
        seen = set()

        join = self.analyzer.join
        entropy = self.model.entropy

        def score(reply):
            joined = join(reply)
            if joined in seen:
                return -1.0, joined

            seen.add(joined)
            n_tokens = len(reply)

            # Penalize longer replies (cobe 2.x compatibility)
            penalty = 1.0
            if n_tokens > 24:
                penalty = math.sqrt(n_tokens)
            elif n_tokens > 48:
                penalty = n_tokens

            joined = join(reply)
            return entropy(joined) / penalty, joined

        # This search is a generator; it doesn't start evaluating until read
        search = itime(self.searcher.search(query), 0.5)

        # Generate and score the search results.
        results = sorted(itertools.imap(score, search))

        if log.isEnabledFor(logging.DEBUG):
            for score, text in results:
                log.debug("%.4f %s", score, text)

            log.debug("made %d replies (%d unique)", len(results), len(seen))

        score, reply = results[-1]
        return reply

    def train(self, text):
        return self.model.train(text)

    def train_many(self, text_gen):
        return self.model.train_many(text_gen)
Esempio n. 6
0
class Brain(object):
    """A simplified, cobe 2.x style interface.

    This behaves roughly like cobe 2.x with an English stemmer for
    now; more flexibility will come as the API is fleshed out.

    It generates replies with a random walk across the language model
    and scores candidate replies by entropy, with a penalty for
    too-long replies.

    """
    def __init__(self, filename):
        self.analyzer = StandardAnalyzer()

        store = park.SQLiteStore(filename)

        self.model = Model(self.analyzer, store)
        self.searcher = RandomWalkSearcher(self.model)

    def reply(self, text):
        # Create a search query from the input
        query = self.analyzer.query(text, self.model)

        # Track (and don't re-score) replies that have already been
        # seen. These are expected when using a random walk searcher,
        # but they're also useful when debugging searches.
        seen = set()

        join = self.analyzer.join
        entropy = self.model.entropy

        def score(reply):
            joined = join(reply)
            if joined in seen:
                return -1.0, joined

            seen.add(joined)
            n_tokens = len(reply)

            # Penalize longer replies (cobe 2.x compatibility)
            penalty = 1.0
            if n_tokens > 16:
                penalty = math.sqrt(n_tokens)
            elif n_tokens > 32:
                penalty = n_tokens

            joined = join(reply)
            return entropy(joined) / penalty, joined

        # This search is a generator; it doesn't start evaluating until read
        search = itime(self.searcher.search(query), 0.5)

        # Generate and score the search results.
        results = sorted(itertools.imap(score, search))

        if log.isEnabledFor(logging.DEBUG):
            for score, text in results:
                log.debug("%.4f %s", score, text)

            log.debug("made %d replies (%d unique)", len(results), len(seen))

        score, reply = results[-1]
        return reply

    def train(self, text):
        return self.model.train(text)

    def train_many(self, text_gen):
        return self.model.train_many(text_gen)
Esempio n. 7
0
 def setUp(self):
     self.analyzer = WhitespaceAnalyzer()
     self.store = park.SQLiteStore(":memory:")
     self.model = Model(self.analyzer, self.store)