Esempio n. 1
0
def home():
    found_extended = None
    question = ""
    if 'search' in request.args:
        question = request.args['search']
        found = tfidf.search(question)
        found_extended = [(Cleaner.make_printable(title),
            similarity,
            Cleaner.make_printable(tfidf.documents[index][1]))
                for title, similarity, index in found]
    return render_template('home.html', found=found_extended, query=question)
Esempio n. 2
0
def clusters():
    if 'k' in request.args:
        k = int(request.args['k'])
    else:
        k = 9
    clusters = tfidf.group_kmeans(k, 20)
    enhanced_clusters = [[documents[doc_id] for doc_id in cluster]
            for cluster in clusters]
    cleaned_clusters = [[(Cleaner.make_printable(title),
        Cleaner.make_printable(desc),
        category)
        for title, desc, category in cluster]
        for cluster in enhanced_clusters]
    return render_template('clusters.html', clusters=cleaned_clusters)
Esempio n. 3
0
class TestCleaner(unittest.TestCase):
    def setUp(self):
        stopwords = "stop halt basta".split()
        self.c = Cleaner(stopwords)

    def test_clean_word(self):
        word = "STop"
        actual = self.c.clean_word(word)
        self.assertIsNone(actual)

        word = "co.mp%&*uTEr"
        actual = self.c.clean_word(word)
        self.assertEqual(actual, "comput")

    def test_clean_wordlist(self):
        words = "stop coMputer #$%&*".split()
        actual = self.c.clean_wordlist(words)
        expected = ["comput"]
        self.assertEqual(actual, expected)

        words = "stop computer halt 12-10-2010 morning".split()
        actual = self.c.clean_wordlist(words)
        expected = ["comput", "12", "10", "2010", "morn"]
        self.assertEqual(actual, expected)
Esempio n. 4
0
 def setUp(self):
     stopwords = "stop halt basta".split()
     self.c = Cleaner(stopwords)