예제 #1
0
    def __init__(self, docs, gram):
        self.index = {}
        self.docs = docs
        self.gram = gram
        if self.gram == 2:
            all_words = unique([inner
                         for outer in docs
                            for inner in docs[outer].words])
            new_docs = {}
            for i in range(len(all_words)):
                word = "#"+all_words[i]+"#"
                id = i
                words = [word[j:j+2] for j in range(len(word) - 1)]
                doc = Doc(id, ' '.join(words))
                doc.words = words
                new_docs[id] = doc
            self.docs = new_docs

        self.create_index()
예제 #2
0
        preprocessor = EnglishPreprocessor(docs)
    else:
        docs = read_docs('../data/Persian.xml')
        preprocessor = PersianPreprocessor(docs)

    for doc in docs.values():
        doc.words = preprocessor.preprocess(doc.text)

    print("Preprocess is done!")

    index = PositionalIndexer(docs, 1).index
    print("Index Created Successfully!")

    query = input("Enter Query: ")
    q_doc = Doc(0, query)
    q_doc.words = preprocessor.preprocess(q_doc.text)

    query_tag = input("Enter Tag (1, 2, 3, 4, None): ")
    tag = None
    if query_tag in ["1", "2", "3", "4"]:
        tag = int(query_tag)

    if tag is not None:
        classify(docs)

    results = search(q_doc, docs, index, 10, query_tag)
    for result in results:
        print(result[1])
        print(result[0].text)
        print()