def get_doc_highlights(doc, patterns, start_tag="<b>", end_tag="</b>"): if type(doc) is int: doc = Document.objects.get(id=doc) title = " " + doc.title + " " body = " " + doc.body + " " for pattern in patterns: title = Highlighter.highlight_re(title, pattern, start_tag, end_tag) body = Highlighter.highlight_re(body, pattern, start_tag, end_tag) title = title.strip() body = body.strip() return {"title": title, "body": body}
def get_doc_highlights(doc,patterns,start_tag='<b>',end_tag='</b>'): if type(doc) is int: doc=Document.objects.get(id=doc) title=' '+doc.title+' ' body=' '+doc.body+' ' for pattern in patterns: title=Highlighter.highlight_re(title,pattern,start_tag,end_tag) body=Highlighter.highlight_re(body,pattern,start_tag,end_tag) title=title.strip() body=body.strip() return {'title':title,'body':body}
def search(text,entities=[],page_size=10,page_number=1,sort='-pub_date',facet_max=0,highlight=False,highlight_inline=False,start_tag='<b>',end_tag='</b>'): if page_number<1: page_number=1 print('feed facets query:') if page_size<0: page_size=0 if sort is None: sort='-pub_date' if facet_max<0: facet_max=0 start=(page_number-1)*page_size end=start+page_size results=None if text: if text=='*': text=None results=Document.objects.all() else: results=Document.objects.search(text) if entities: for entity in entities: results=results.filter(entities=entity) else: if entities: results=Document.objects.filter(entities=entities[0]) if len(entities)>1: for i in range(1,len(entities)): results=results.filter(entities=entities[i]) if results: print('get results facets') facets=Index.get_results_facets(results,facet_max) print('got facets') print('get docs') docs=results.order_by(sort)[start:end] print('get highlights') highlights={} if docs.count()>0: if highlight: patterns=Highlighter.get_highlight_patterns(text,entities) if len(patterns)>0: highlights=Index.get_result_highlights(docs,patterns,start_tag,end_tag) if highlight_inline: for doc in docs: if doc.id in highlights: highlight=highlights[doc.id] doc.title=highlight['title'] doc.body=highlight['body'] highlights={} if text is None: text='' return {'query':text,'results':docs,'facets':facets,'highlights':highlights} else: if text is None: text='' return {'query':text,'results':[],'facets':{},'highlights':{}}
def highlighted(self): return Highlighter.highlight_text(self.content(), None, self.entities.all())
def search( text, entities=[], page_size=10, page_number=1, sort="-pub_date", facet_max=0, highlight=False, highlight_inline=False, start_tag="<b>", end_tag="</b>", ): if page_number < 1: page_number = 1 print "feed facets query:" if page_size < 0: page_size = 0 if sort is None: sort = "-pub_date" if facet_max < 0: facet_max = 0 start = (page_number - 1) * page_size end = start + page_size results = None if text: if text == "*": text = None results = Document.objects.all() else: results = Document.objects.search(text) if entities: for entity in entities: results = results.filter(entities=entity) else: if entities: results = Document.objects.filter(entities=entities[0]) if len(entities) > 1: for i in range(1, len(entities)): results = results.filter(entities=entities[i]) if results: print "get results facets" facets = Index.get_results_facets(results, facet_max) print "got facets" print "get docs" docs = results.order_by(sort)[start:end] print "get highlights" highlights = {} if docs.count() > 0: if highlight: patterns = Highlighter.get_highlight_patterns(text, entities) if len(patterns) > 0: highlights = Index.get_result_highlights(docs, patterns, start_tag, end_tag) if highlight_inline: for doc in docs: if doc.id in highlights: highlight = highlights[doc.id] doc.title = highlight["title"] doc.body = highlight["body"] highlights = {} if text is None: text = "" return {"query": text, "results": docs, "facets": facets, "highlights": highlights} else: if text is None: text = "" return {"query": text, "results": [], "facets": {}, "highlights": {}}
def highlighted(self): return Highlighter.highlight_text(self.content(),None,self.entities.all())