コード例 #1
0
ファイル: test.py プロジェクト: powerllamas/EZI
 def setUp(self):
     stopwords = "stop".split()
     keywords = "information agency retrieval".split()
     # documents = [
     #        ("Document 1", "information retrieval information retrieval"),
     #        ("Document 2", "retrieval retrieval retrieval retrieval"),
     #        ("Document 3", "agency information retrieval agency"),
     #        ("Document 4", "retrieval agency retrieval agency"),
     #    ]
     documents = Loader.load_documents("data/documents-lab1.txt")
     self.s = TFIDF(keywords, documents, Cleaner(stopwords))
コード例 #2
0
ファイル: pytia.py プロジェクト: powerllamas/EZI
          u"permitted by law."))
    parser.add_argument('-k', '--keywords', help="Keywords file path",
            default="data/keywords-2.txt")
    parser.add_argument('-s', '--stopwords', help="Stopwords file path",
            default="data/stopwords.txt")
    parser.add_argument('-d', '--documents', help="Documents file path",
            default="data/documents-2.txt")
    parser.add_argument('-n', '--noresults',
            help="Number of displayed results", default="5")
    parser.add_argument('-v', '--version', action='version',
            version='%(prog)s 0.3')
    args = parser.parse_args()

    keywords = Loader.load_keywords(args.keywords)
    stopwords = Loader.load_stopwords(args.stopwords)
    documents = Loader.load_documents(args.documents)
    n = int(args.noresults)

    cleaner = Cleaner(stopwords)
    tfidf = TFIDF(keywords, documents, cleaner)

    question = raw_input("Enter search string or \"exit()\" and press enter: ")
    while question != "exit()":
            found = tfidf.search(question)           
            for title, similarity, index in found[:n]:
                print "{0:4f}\t{1}".format(similarity, title)
            groups = tfidf.group_kmeans(9, 10)
            for i, group in enumerate(groups):
                print "\nGroup {0}:\n".format(i)
                for doc_id in group:
                    print "\t{0}\n".format(documents[doc_id][0])
コード例 #3
0
ファイル: server.py プロジェクト: powerllamas/EZI
from data import Loader
from word import Cleaner
from search import TFIDF
from guess import Guesses
import expander

from flask import Flask, render_template, request, jsonify

keywords_path = "data/keywords-2.txt"
stopwords_path = "data/stopwords.txt"
documents_path = "data/documents-2.txt"

keywords = Loader.load_keywords(keywords_path)
stopwords = Loader.load_stopwords(stopwords_path)
documents = Loader.load_documents(documents_path, categories=True)

cleaner = Cleaner(stopwords)
tfidf = TFIDF(keywords, documents, cleaner)
autocomplete = Guesses(tfidf.get_term_document_matrix(), tfidf.keywords, tfidf.keywords_lookup)

app = Flask(__name__)


@app.route('/')
def home():
    found_extended = None
    question = ""
    if 'search' in request.args:
        question = request.args['search']
        found = tfidf.search(question)