def home(): found_extended = None question = "" if 'search' in request.args: question = request.args['search'] found = tfidf.search(question) found_extended = [(Cleaner.make_printable(title), similarity, Cleaner.make_printable(tfidf.documents[index][1])) for title, similarity, index in found] return render_template('home.html', found=found_extended, query=question)
def clusters(): if 'k' in request.args: k = int(request.args['k']) else: k = 9 clusters = tfidf.group_kmeans(k, 20) enhanced_clusters = [[documents[doc_id] for doc_id in cluster] for cluster in clusters] cleaned_clusters = [[(Cleaner.make_printable(title), Cleaner.make_printable(desc), category) for title, desc, category in cluster] for cluster in enhanced_clusters] return render_template('clusters.html', clusters=cleaned_clusters)
class TestCleaner(unittest.TestCase): def setUp(self): stopwords = "stop halt basta".split() self.c = Cleaner(stopwords) def test_clean_word(self): word = "STop" actual = self.c.clean_word(word) self.assertIsNone(actual) word = "co.mp%&*uTEr" actual = self.c.clean_word(word) self.assertEqual(actual, "comput") def test_clean_wordlist(self): words = "stop coMputer #$%&*".split() actual = self.c.clean_wordlist(words) expected = ["comput"] self.assertEqual(actual, expected) words = "stop computer halt 12-10-2010 morning".split() actual = self.c.clean_wordlist(words) expected = ["comput", "12", "10", "2010", "morn"] self.assertEqual(actual, expected)
def setUp(self): stopwords = "stop halt basta".split() self.c = Cleaner(stopwords)