def test_search(self): regexp = r'(?P<link><a.*?href="(?P<url>http://.*?/(?P<filename>.*?)/at_download/file/?.*?)".*?>)' search = text_search(HTML, regexp, preview=True, flags=re.DOTALL) self.assertEqual(len(search), 2) self.assertEqual(search, [{'start': 793, 'end': 872, 'text': '...lorem.\n<span class="mark"><a class="internal-link"\n' ' href="http://foo.org/aaa/bar.exe/at_download/file"></span>BAR fil...'}, {'start': 1243, 'end': 1330, 'text': '...p>\n <span class="mark"><a href="http://foo.org/aaa/foo.pdf/at_download/file/@@someview" class="internal-link"></span>FOO\nfil...'}]) search = text_search(HTML, regexp, flags=re.DOTALL) self.assertEqual(len(search), 2) self.assertEqual(search, [{'start': 793, 'end': 872, 'pre_text': '...lorem.\n', 'post_text': 'BAR fil...', 'text': '<a class="internal-link"\n' ' href="http://foo.org/aaa/bar.exe/at_download/file">'}, {'start': 1243, 'end': 1330, 'pre_text': '...p>\n ', 'post_text': 'FOO\nfil...', 'text': '<a href="http://foo.org/aaa/foo.pdf/at_download/file/@@someview" class="internal-link">'}])
def test_search(self): regexp = r'(?P<link><a.*?href="(?P<url>http://.*?/(?P<filename>.*?)/at_download/file/?.*?)".*?>)' search = text_search(HTML, regexp, preview=True, flags=re.DOTALL) self.assertEqual(len(search), 2) self.assertEqual(search, [{ 'start': 793, 'end': 872, 'text': '...lorem.\n<span class="mark"><a class="internal-link"\n' ' href="http://foo.org/aaa/bar.exe/at_download/file"></span>BAR fil...' }, { 'start': 1243, 'end': 1330, 'text': '...p>\n <span class="mark"><a href="http://foo.org/aaa/foo.pdf/at_download/file/@@someview" class="internal-link"></span>FOO\nfil...' }]) search = text_search(HTML, regexp, flags=re.DOTALL) self.assertEqual(len(search), 2) self.assertEqual(search, [{ 'start': 793, 'end': 872, 'pre_text': '...lorem.\n', 'post_text': 'BAR fil...', 'text': '<a class="internal-link"\n' ' href="http://foo.org/aaa/bar.exe/at_download/file">' }, { 'start': 1243, 'end': 1330, 'pre_text': '...p>\n ', 'post_text': 'FOO\nfil...', 'text': '<a href="http://foo.org/aaa/foo.pdf/at_download/file/@@someview" class="internal-link">' }])
def batchSearch(self): context = self.context request = self.request request.response.setHeader('Content-Type', 'application/json;charset=utf-8') search_query = request.get('searchQuery', '').decode('utf-8') b_start = request.get('b_start', 0) b_size = request.get('b_size', 20) really_checked_docs = request.get('really_checked_docs', 0) flags = request.get('flags', 0) portlets = request.get('portlets', False) portal_type = request.get('content_type', []) result_json = {} results = [] if not portal_type or not search_query: result_json['results'] = results return json.dumps(result_json) total_documents_count, search_results = \ self._catalog_search(portal_type, portlets, b_start, b_size) if not search_results: # stop client side queries result_json['results'] = None return json.dumps(result_json) for search_result in search_results: if search_result.text: really_checked_docs += 1 inner_results = utility.text_search(search_result.text, search_query, flags=flags, preview=True) for result in inner_results: result['url'] = search_result['url'] result['id'] = search_result['id'] result['uid'] = search_result['uid'] result['title'] = search_result['title'] result['icon'] = search_result['icon'] result['normalized_portal_type'] = \ search_result['normalized_portal_type'] results.extend(inner_results) result_json['total_documents_count'] = total_documents_count result_json['really_checked_docs'] = really_checked_docs result_json['results'] = results return json.dumps(result_json)
def get_possible_replacements(self, regex, repl, flags): retval = [] offset_number = 0 for adapter in self._get_text_adapters(): matches = text_search(adapter.utext, regex, flags=flags) pattern = re.compile(regex, flags) for match in matches: offset = lambda x: match[x]+offset_number old = match['text'] replaced = pattern.sub(repl, old) if old != replaced: result = dict(old=old, new=replaced) result['start'] = offset('start') result['end'] = offset('end') result['pre_text'] = match['pre_text'] result['post_text'] = match['post_text'] retval.append(result) offset_number += len(adapter.utext) + 1 return retval