Python documentManager Examples

Programming Language: Python

Namespace/Package Name: documentManager

Method/Function: documentManager

Examples at hotexamples.com: 5

Python documentManager - 5 examples found. These are the top rated real world Python examples of documentManager.documentManager extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

	def retrive_word(self, word):
		# 找出 DocID 对应的 url
		manager = documentManager()
		collection = manager.connect_mongo()

		id_list = []
		for word in self.word_dictionary[word]:
			url = collection.find_one({"DocID": int(word[0])})["url"]
			id_list.append(int(word[0]))
		return id_list

Example #2

Show file

	def caculate_BM25(self, query_words):
		manager = documentManager()
		collection = manager.connect_mongo()
		
		score_dictionary = {}
		b = 0.5 #参数调节因子
		k = 10 # 调节因子
		avdl = 800 # 文档平均长度

		# query_words 中至少一个单元词出现的所有文档
		DocId_of_query_words = set([])
		for word in query_words.split(' '):

			if not self.word_dictionary.has_key(word):
				continue

			for posting in self.word_dictionary[word]:
				DocID = posting[0]
				DocId_of_query_words.add(DocID)
		
		for id in DocId_of_query_words:
			BM25_score = 0
			for word in query_words.split(' '):
				content = collection.find_one({"DocID": int(id)})["content"]
				freq = self.get_wordcount_in_document(word ,content)
				
				doc_len = len(self.word_dictionary[word])
				idf = math.log(float(100) / doc_len)
				normalizer = 1 - b + b * (doc_len / avdl) 

				BM25_score += (float)((k + 1) * freq) / (freq + k * normalizer) * idf
			# 计算某个文档对　Query 的 BM25 分数 
			score_dictionary[id] = BM25_score

		score = sorted(score_dictionary.iteritems(), key=lambda d:d[1], reverse = True)

		for i in score:
			print self.DocID2Doc(int(i[0]))

Example #3

Show file

File: indexer.py Project: jessonsh/just-search-engine

	def process_all_documents(self):
		manager = documentManager()
		collection = manager.connect_mongo()
		for loop in range(1, 101):
			text = collection.find_one({"DocID": loop})["content"]
			self.count_words(text, loop)

Example #4

Show file

File: indexer.py Project: zgtios/just-search-engine

 def process_all_documents(self):
     manager = documentManager()
     collection = manager.connect_mongo()
     for loop in range(1, 101):
         text = collection.find_one({"DocID": loop})["content"]
         self.count_words(text, loop)

Example #5

Show file

	def DocID2Doc(self, DocID):
		manager = documentManager()
		collection = manager.connect_mongo()
		url = collection.find_one({"DocID": DocID})["url"]
		return url