u"redistribute it.\n\t\tThere is NO WARRANTY, to the extent " u"permitted by law.")) parser.add_argument('-k', '--keywords', help="Keywords file path", default="data/keywords-2.txt") parser.add_argument('-s', '--stopwords', help="Stopwords file path", default="data/stopwords.txt") parser.add_argument('-d', '--documents', help="Documents file path", default="data/documents-2.txt") parser.add_argument('-n', '--noresults', help="Number of displayed results", default="5") parser.add_argument('-v', '--version', action='version', version='%(prog)s 0.3') args = parser.parse_args() keywords = Loader.load_keywords(args.keywords) stopwords = Loader.load_stopwords(args.stopwords) documents = Loader.load_documents(args.documents) n = int(args.noresults) cleaner = Cleaner(stopwords) tfidf = TFIDF(keywords, documents, cleaner) question = raw_input("Enter search string or \"exit()\" and press enter: ") while question != "exit()": found = tfidf.search(question) for title, similarity, index in found[:n]: print "{0:4f}\t{1}".format(similarity, title) groups = tfidf.group_kmeans(9, 10) for i, group in enumerate(groups): print "\nGroup {0}:\n".format(i) for doc_id in group:
import os from data import Loader from word import Cleaner from search import TFIDF from guess import Guesses import expander from flask import Flask, render_template, request, jsonify keywords_path = "data/keywords-2.txt" stopwords_path = "data/stopwords.txt" documents_path = "data/documents-2.txt" keywords = Loader.load_keywords(keywords_path) stopwords = Loader.load_stopwords(stopwords_path) documents = Loader.load_documents(documents_path, categories=True) cleaner = Cleaner(stopwords) tfidf = TFIDF(keywords, documents, cleaner) autocomplete = Guesses(tfidf.get_term_document_matrix(), tfidf.keywords, tfidf.keywords_lookup) app = Flask(__name__) @app.route('/') def home(): found_extended = None question = "" if 'search' in request.args: question = request.args['search']