Ejemplo n.º 1
0
	def QueryPhrase(self,searchPhrase,isshow = True):
		words = jieba.cut(searchPhrase.decode('utf-8'),cut_all=False)
		cut = Cut()
		result = set(range(1,100000))
		for word in words:
			if not self.kw_id.has_key(word):
				print 'Not Exist Record'
				return set()
			idx = self.kw_id[word]
			ii_line = cut.getInverseIndexRow(idx,Global.inverse_dir,Global.filesize)
			record =json.loads(ii_line)
			re = set()
			for rec in record:
				re.add(int(rec))
			result = result & re
		if len(result) == 0:
			print 'Not Exists Record!'
		newslist=list()
		count = 0
		for rst in result:
		  	count+=1
		  	if count > Global.listsize:
		  		break
			line = cut.getRow(int(rst),Global.cutnews_origin_dir,Global.filesize)
			data = json.loads(line)
			if isshow:
				print data['title'],'\n',data['time'],'\n',data['content'],'\n'
			tm = time.localtime(int(data['time']))
			data['time'] = time.strftime('%Y-%m-%d %H:%M:%S',tm)
			data['content'] = data['content'][0:Global.snippetsize]
			data['id'] = rst
			newslist.append(data)
		return newslist
Ejemplo n.º 2
0
	def loadDataFromCutFile(self,totalnum):
		doc = []
		cut = Cut()
		for i in range(1,totalnum):
			line = cut.getRow(i,Global.cutnews_dir,Global.filesize)
			if not line:
				break
			data = json.loads(line)
			keyword = analyse.extract_tags(data['content'],topK=20)
			seg = " ".join(keyword)
			print seg
			doc.append(seg)
		return doc
Ejemplo n.º 3
0
 def loadDataFromCutFile(self, totalnum):
     doc = []
     cut = Cut()
     for i in range(1, totalnum):
         line = cut.getRow(i, Global.cutnews_dir, Global.filesize)
         if not line:
             break
         data = json.loads(line)
         keyword = analyse.extract_tags(data['content'], topK=20)
         seg = " ".join(keyword)
         print seg
         doc.append(seg)
     return doc
Ejemplo n.º 4
0
 def QueryById(self, no):
     no = int(no.decode('utf-8'))
     default = dict()
     default['title'] = "No Such News"
     default['time'] = ''
     default['content'] = "Oh No!"
     default['url'] = "#"
     if not no:
         return default
     cut = Cut()
     line = cut.getRow(no, Global.cutnews_origin_dir, Global.filesize)
     if line:
         data = json.loads(line)
         return data
     else:
         return default
Ejemplo n.º 5
0
	def QueryById(self,no):
		no = int(no.decode('utf-8'))
		default = dict()
		default['title'] = "No Such News"
		default['time']=''
		default['content'] = "Oh No!"
		default['url'] = "#"
		if not no:
			return default
		cut = Cut()
		line = cut.getRow(no,Global.cutnews_origin_dir,Global.filesize)
		if line:
			data = json.loads(line)
			return data
		else:
			return default
Ejemplo n.º 6
0
	def QuerySingle(self,searchWord,ishow):
		if self.kw_id.has_key(searchWord.decode('utf-8')):
			idx = self.kw_id[searchWord.decode('utf-8')]
			cut = Cut()
			ii_line = cut.getInverseIndexRow(idx,Global.inverse_dir,Global.filesize)
			record =json.loads(ii_line)
			if ishow:
				for rec in record:
					line = cut.getRow(int(rec),Global.cutnews_origin_dir,Global.filesize)
					data = json.loads(line)
					print data['title'],'\n',data['time'],'\n',data['content'],'\n'
		#返回单个词项对应的倒排记录表
			return record
		else:
			if isshow:
				print 'Not Exists Record!'
			#调用该函数后需要对结果进行判断
			return dict()
Ejemplo n.º 7
0
 def QuerySingle(self, searchWord, ishow):
     if self.kw_id.has_key(searchWord.decode('utf-8')):
         idx = self.kw_id[searchWord.decode('utf-8')]
         cut = Cut()
         ii_line = cut.getInverseIndexRow(idx, Global.inverse_dir,
                                          Global.filesize)
         record = json.loads(ii_line)
         if ishow:
             for rec in record:
                 line = cut.getRow(int(rec), Global.cutnews_origin_dir,
                                   Global.filesize)
                 data = json.loads(line)
                 print data['title'], '\n', data['time'], '\n', data[
                     'content'], '\n'
     #返回单个词项对应的倒排记录表
         return record
     else:
         if isshow:
             print 'Not Exists Record!'
         #调用该函数后需要对结果进行判断
         return dict()
Ejemplo n.º 8
0
 def QueryPhrase(self, searchPhrase, isshow=True):
     words = jieba.cut(searchPhrase.decode('utf-8'), cut_all=False)
     cut = Cut()
     result = set(range(1, 100000))
     for word in words:
         if not self.kw_id.has_key(word):
             print 'Not Exist Record'
             return set()
         idx = self.kw_id[word]
         ii_line = cut.getInverseIndexRow(idx, Global.inverse_dir,
                                          Global.filesize)
         record = json.loads(ii_line)
         re = set()
         for rec in record:
             re.add(int(rec))
         result = result & re
     if len(result) == 0:
         print 'Not Exists Record!'
     newslist = list()
     count = 0
     for rst in result:
         count += 1
         if count > Global.listsize:
             break
         line = cut.getRow(int(rst), Global.cutnews_origin_dir,
                           Global.filesize)
         data = json.loads(line)
         if isshow:
             print data['title'], '\n', data['time'], '\n', data[
                 'content'], '\n'
         tm = time.localtime(int(data['time']))
         data['time'] = time.strftime('%Y-%m-%d %H:%M:%S', tm)
         data['content'] = data['content'][0:Global.snippetsize]
         data['id'] = rst
         newslist.append(data)
     return newslist