예제 #1
0
def main():
    db = MongoClient().poky
    inverse_table = InverseTable()
    parser = Parser("Stopword.txt")
    for document in db.documents.find({"text": {"$exists": True}}):
        for textlist in document[u'text'].values():
            for text in textlist:
                DRL = DocumentReverseList(parser.getIndexToken(text), document[u'_id'])
                inverse_table.merge(DRL)
    inverse_table.CalNormalizingPara()
    # inverse_table.printRT()

    for key in inverse_table.table:
        term = {}
        term['word'] = key
        term['df'] = inverse_table.table[key]['df']
        term['idf'] = inverse_table.table[key]['idf']
        term['posting'] = [{'doc_id': id,
                            'tf': inverse_table.table[key]['posting'][id]
                            } for id in inverse_table.table[key]['posting']]
        db.terms.save(term)

    for key in inverse_table.Normalization:
        document = db.documents.find_one({"_id": key})
        document["normalization"] = inverse_table.Normalization[key]
        db.documents.save(document)
예제 #2
0
def main():
    db = MongoClient().poky
    inverse_table = InverseTable()
    parser = Parser("Stopword.txt")
    for document in db.documents.find({"text": {"$exists": True}}):
        for textlist in document[u'text'].values():
            for text in textlist:
                DRL = DocumentReverseList(parser.getIndexToken(text),
                                          document[u'_id'])
                inverse_table.merge(DRL)
    inverse_table.CalNormalizingPara()
    # inverse_table.printRT()

    for key in inverse_table.table:
        term = {}
        term['word'] = key
        term['df'] = inverse_table.table[key]['df']
        term['idf'] = inverse_table.table[key]['idf']
        term['posting'] = [{
            'doc_id': id,
            'tf': inverse_table.table[key]['posting'][id]
        } for id in inverse_table.table[key]['posting']]
        db.terms.save(term)

    for key in inverse_table.Normalization:
        document = db.documents.find_one({"_id": key})
        document["normalization"] = inverse_table.Normalization[key]
        db.documents.save(document)
예제 #3
0
class QueryAnalysis(object):
    def __init__(self):
        self.parser = Parser(
            os.path.join(os.path.dirname(__file__), "Stopword.txt"))

    def analysis(self, Query):
        return self.parser.getIndexToken(Query)
예제 #4
0
파일: Rank.py 프로젝트: byouloh/poky-engine
class QueryAnalysis(object):

    def __init__(self):
        self.parser = Parser(os.path.join(os.path.dirname(__file__),
                             "Stopword.txt"))

    def analysis(self, Query):
        return self.parser.getIndexToken(Query)