def main(): db = MongoClient().poky inverse_table = InverseTable() parser = Parser("Stopword.txt") for document in db.documents.find({"text": {"$exists": True}}): for textlist in document[u'text'].values(): for text in textlist: DRL = DocumentReverseList(parser.getIndexToken(text), document[u'_id']) inverse_table.merge(DRL) inverse_table.CalNormalizingPara() # inverse_table.printRT() for key in inverse_table.table: term = {} term['word'] = key term['df'] = inverse_table.table[key]['df'] term['idf'] = inverse_table.table[key]['idf'] term['posting'] = [{'doc_id': id, 'tf': inverse_table.table[key]['posting'][id] } for id in inverse_table.table[key]['posting']] db.terms.save(term) for key in inverse_table.Normalization: document = db.documents.find_one({"_id": key}) document["normalization"] = inverse_table.Normalization[key] db.documents.save(document)
def main(): db = MongoClient().poky inverse_table = InverseTable() parser = Parser("Stopword.txt") for document in db.documents.find({"text": {"$exists": True}}): for textlist in document[u'text'].values(): for text in textlist: DRL = DocumentReverseList(parser.getIndexToken(text), document[u'_id']) inverse_table.merge(DRL) inverse_table.CalNormalizingPara() # inverse_table.printRT() for key in inverse_table.table: term = {} term['word'] = key term['df'] = inverse_table.table[key]['df'] term['idf'] = inverse_table.table[key]['idf'] term['posting'] = [{ 'doc_id': id, 'tf': inverse_table.table[key]['posting'][id] } for id in inverse_table.table[key]['posting']] db.terms.save(term) for key in inverse_table.Normalization: document = db.documents.find_one({"_id": key}) document["normalization"] = inverse_table.Normalization[key] db.documents.save(document)
class QueryAnalysis(object): def __init__(self): self.parser = Parser( os.path.join(os.path.dirname(__file__), "Stopword.txt")) def analysis(self, Query): return self.parser.getIndexToken(Query)
class QueryAnalysis(object): def __init__(self): self.parser = Parser(os.path.join(os.path.dirname(__file__), "Stopword.txt")) def analysis(self, Query): return self.parser.getIndexToken(Query)