def QueryPhrase(self,searchPhrase,isshow = True): words = jieba.cut(searchPhrase.decode('utf-8'),cut_all=False) cut = Cut() result = set(range(1,100000)) for word in words: if not self.kw_id.has_key(word): print 'Not Exist Record' return set() idx = self.kw_id[word] ii_line = cut.getInverseIndexRow(idx,Global.inverse_dir,Global.filesize) record =json.loads(ii_line) re = set() for rec in record: re.add(int(rec)) result = result & re if len(result) == 0: print 'Not Exists Record!' newslist=list() count = 0 for rst in result: count+=1 if count > Global.listsize: break line = cut.getRow(int(rst),Global.cutnews_origin_dir,Global.filesize) data = json.loads(line) if isshow: print data['title'],'\n',data['time'],'\n',data['content'],'\n' tm = time.localtime(int(data['time'])) data['time'] = time.strftime('%Y-%m-%d %H:%M:%S',tm) data['content'] = data['content'][0:Global.snippetsize] data['id'] = rst newslist.append(data) return newslist
def QuerySingle(self,searchWord,ishow): if self.kw_id.has_key(searchWord.decode('utf-8')): idx = self.kw_id[searchWord.decode('utf-8')] cut = Cut() ii_line = cut.getInverseIndexRow(idx,Global.inverse_dir,Global.filesize) record =json.loads(ii_line) if ishow: for rec in record: line = cut.getRow(int(rec),Global.cutnews_origin_dir,Global.filesize) data = json.loads(line) print data['title'],'\n',data['time'],'\n',data['content'],'\n' #返回单个词项对应的倒排记录表 return record else: if isshow: print 'Not Exists Record!' #调用该函数后需要对结果进行判断 return dict()
def QuerySingle(self, searchWord, ishow): if self.kw_id.has_key(searchWord.decode('utf-8')): idx = self.kw_id[searchWord.decode('utf-8')] cut = Cut() ii_line = cut.getInverseIndexRow(idx, Global.inverse_dir, Global.filesize) record = json.loads(ii_line) if ishow: for rec in record: line = cut.getRow(int(rec), Global.cutnews_origin_dir, Global.filesize) data = json.loads(line) print data['title'], '\n', data['time'], '\n', data[ 'content'], '\n' #返回单个词项对应的倒排记录表 return record else: if isshow: print 'Not Exists Record!' #调用该函数后需要对结果进行判断 return dict()
def QueryPhrase(self, searchPhrase, isshow=True): words = jieba.cut(searchPhrase.decode('utf-8'), cut_all=False) cut = Cut() result = set(range(1, 100000)) for word in words: if not self.kw_id.has_key(word): print 'Not Exist Record' return set() idx = self.kw_id[word] ii_line = cut.getInverseIndexRow(idx, Global.inverse_dir, Global.filesize) record = json.loads(ii_line) re = set() for rec in record: re.add(int(rec)) result = result & re if len(result) == 0: print 'Not Exists Record!' newslist = list() count = 0 for rst in result: count += 1 if count > Global.listsize: break line = cut.getRow(int(rst), Global.cutnews_origin_dir, Global.filesize) data = json.loads(line) if isshow: print data['title'], '\n', data['time'], '\n', data[ 'content'], '\n' tm = time.localtime(int(data['time'])) data['time'] = time.strftime('%Y-%m-%d %H:%M:%S', tm) data['content'] = data['content'][0:Global.snippetsize] data['id'] = rst newslist.append(data) return newslist