Example #1
0
	def QueryPhrase(self,searchPhrase,isshow = True):
		words = jieba.cut(searchPhrase.decode('utf-8'),cut_all=False)
		cut = Cut()
		result = set(range(1,100000))
		for word in words:
			if not self.kw_id.has_key(word):
				print 'Not Exist Record'
				return set()
			idx = self.kw_id[word]
			ii_line = cut.getInverseIndexRow(idx,Global.inverse_dir,Global.filesize)
			record =json.loads(ii_line)
			re = set()
			for rec in record:
				re.add(int(rec))
			result = result & re
		if len(result) == 0:
			print 'Not Exists Record!'
		newslist=list()
		count = 0
		for rst in result:
		  	count+=1
		  	if count > Global.listsize:
		  		break
			line = cut.getRow(int(rst),Global.cutnews_origin_dir,Global.filesize)
			data = json.loads(line)
			if isshow:
				print data['title'],'\n',data['time'],'\n',data['content'],'\n'
			tm = time.localtime(int(data['time']))
			data['time'] = time.strftime('%Y-%m-%d %H:%M:%S',tm)
			data['content'] = data['content'][0:Global.snippetsize]
			data['id'] = rst
			newslist.append(data)
		return newslist
Example #2
0
	def QuerySingle(self,searchWord,ishow):
		if self.kw_id.has_key(searchWord.decode('utf-8')):
			idx = self.kw_id[searchWord.decode('utf-8')]
			cut = Cut()
			ii_line = cut.getInverseIndexRow(idx,Global.inverse_dir,Global.filesize)
			record =json.loads(ii_line)
			if ishow:
				for rec in record:
					line = cut.getRow(int(rec),Global.cutnews_origin_dir,Global.filesize)
					data = json.loads(line)
					print data['title'],'\n',data['time'],'\n',data['content'],'\n'
		#返回单个词项对应的倒排记录表
			return record
		else:
			if isshow:
				print 'Not Exists Record!'
			#调用该函数后需要对结果进行判断
			return dict()
Example #3
0
 def QuerySingle(self, searchWord, ishow):
     if self.kw_id.has_key(searchWord.decode('utf-8')):
         idx = self.kw_id[searchWord.decode('utf-8')]
         cut = Cut()
         ii_line = cut.getInverseIndexRow(idx, Global.inverse_dir,
                                          Global.filesize)
         record = json.loads(ii_line)
         if ishow:
             for rec in record:
                 line = cut.getRow(int(rec), Global.cutnews_origin_dir,
                                   Global.filesize)
                 data = json.loads(line)
                 print data['title'], '\n', data['time'], '\n', data[
                     'content'], '\n'
     #返回单个词项对应的倒排记录表
         return record
     else:
         if isshow:
             print 'Not Exists Record!'
         #调用该函数后需要对结果进行判断
         return dict()
Example #4
0
 def QueryPhrase(self, searchPhrase, isshow=True):
     words = jieba.cut(searchPhrase.decode('utf-8'), cut_all=False)
     cut = Cut()
     result = set(range(1, 100000))
     for word in words:
         if not self.kw_id.has_key(word):
             print 'Not Exist Record'
             return set()
         idx = self.kw_id[word]
         ii_line = cut.getInverseIndexRow(idx, Global.inverse_dir,
                                          Global.filesize)
         record = json.loads(ii_line)
         re = set()
         for rec in record:
             re.add(int(rec))
         result = result & re
     if len(result) == 0:
         print 'Not Exists Record!'
     newslist = list()
     count = 0
     for rst in result:
         count += 1
         if count > Global.listsize:
             break
         line = cut.getRow(int(rst), Global.cutnews_origin_dir,
                           Global.filesize)
         data = json.loads(line)
         if isshow:
             print data['title'], '\n', data['time'], '\n', data[
                 'content'], '\n'
         tm = time.localtime(int(data['time']))
         data['time'] = time.strftime('%Y-%m-%d %H:%M:%S', tm)
         data['content'] = data['content'][0:Global.snippetsize]
         data['id'] = rst
         newslist.append(data)
     return newslist