Example #1
0
 def test_search(self):
     regexp = r'(?P<link><a.*?href="(?P<url>http://.*?/(?P<filename>.*?)/at_download/file/?.*?)".*?>)'
     search = text_search(HTML,
                          regexp,
                          preview=True,
                          flags=re.DOTALL)
     self.assertEqual(len(search), 2)
     self.assertEqual(search,
                      [{'start': 793, 'end': 872,
                        'text': '...lorem.\n<span class="mark">&lt;a class="internal-link"\n'
                                '   href="http://foo.org/aaa/bar.exe/at_download/file"&gt;</span>BAR fil...'},
                       {'start': 1243, 'end': 1330, 'text': '...p&gt;\n    <span class="mark">&lt;a href="http://foo.org/aaa/foo.pdf/at_download/file/@@someview" class="internal-link"&gt;</span>FOO\nfil...'}])
     search = text_search(HTML,
                          regexp,
                          flags=re.DOTALL)
     self.assertEqual(len(search), 2)
     self.assertEqual(search,
                      [{'start': 793, 'end': 872,
                        'pre_text': '...lorem.\n',
                        'post_text': 'BAR fil...',
                        'text': '<a class="internal-link"\n'
                                '   href="http://foo.org/aaa/bar.exe/at_download/file">'},
                       {'start': 1243, 'end': 1330,
                        'pre_text': '...p&gt;\n    ', 'post_text': 'FOO\nfil...',
                        'text': '<a href="http://foo.org/aaa/foo.pdf/at_download/file/@@someview" class="internal-link">'}])
Example #2
0
 def test_search(self):
     regexp = r'(?P<link><a.*?href="(?P<url>http://.*?/(?P<filename>.*?)/at_download/file/?.*?)".*?>)'
     search = text_search(HTML, regexp, preview=True, flags=re.DOTALL)
     self.assertEqual(len(search), 2)
     self.assertEqual(search, [{
         'start':
         793,
         'end':
         872,
         'text':
         '...lorem.\n<span class="mark">&lt;a class="internal-link"\n'
         '   href="http://foo.org/aaa/bar.exe/at_download/file"&gt;</span>BAR fil...'
     }, {
         'start':
         1243,
         'end':
         1330,
         'text':
         '...p&gt;\n    <span class="mark">&lt;a href="http://foo.org/aaa/foo.pdf/at_download/file/@@someview" class="internal-link"&gt;</span>FOO\nfil...'
     }])
     search = text_search(HTML, regexp, flags=re.DOTALL)
     self.assertEqual(len(search), 2)
     self.assertEqual(search, [{
         'start':
         793,
         'end':
         872,
         'pre_text':
         '...lorem.\n',
         'post_text':
         'BAR fil...',
         'text':
         '<a class="internal-link"\n'
         '   href="http://foo.org/aaa/bar.exe/at_download/file">'
     }, {
         'start':
         1243,
         'end':
         1330,
         'pre_text':
         '...p&gt;\n    ',
         'post_text':
         'FOO\nfil...',
         'text':
         '<a href="http://foo.org/aaa/foo.pdf/at_download/file/@@someview" class="internal-link">'
     }])
Example #3
0
    def batchSearch(self):
        context = self.context
        request = self.request
        request.response.setHeader('Content-Type',
                                   'application/json;charset=utf-8')
        search_query = request.get('searchQuery', '').decode('utf-8')
        b_start = request.get('b_start', 0)
        b_size = request.get('b_size', 20)
        really_checked_docs = request.get('really_checked_docs', 0)
        flags = request.get('flags', 0)
        portlets = request.get('portlets', False)
        portal_type = request.get('content_type', [])

        result_json = {}
        results = []

        if not portal_type or not search_query:
            result_json['results'] = results
            return json.dumps(result_json)
        
        total_documents_count, search_results = \
            self._catalog_search(portal_type, portlets, b_start, b_size)

        if not search_results:
            # stop client side queries
            result_json['results'] = None
            return json.dumps(result_json)

        for search_result in search_results:
            if search_result.text:
                really_checked_docs += 1
                inner_results = utility.text_search(search_result.text,
                                                    search_query,
                                                    flags=flags,
                                                    preview=True)
                for result in inner_results:
                    result['url'] = search_result['url']
                    result['id'] = search_result['id']
                    result['uid'] = search_result['uid']
                    result['title'] = search_result['title']
                    result['icon'] = search_result['icon']
                    result['normalized_portal_type'] = \
                        search_result['normalized_portal_type']
                results.extend(inner_results)

        result_json['total_documents_count'] = total_documents_count
        result_json['really_checked_docs'] = really_checked_docs
        result_json['results'] = results
        return json.dumps(result_json)
Example #4
0
 def get_possible_replacements(self, regex, repl, flags):
     retval = []
     offset_number = 0
     for adapter in self._get_text_adapters():
         matches = text_search(adapter.utext, regex, flags=flags)
         pattern = re.compile(regex, flags)
         for match in matches:
             offset = lambda x: match[x]+offset_number
             old = match['text']
             replaced = pattern.sub(repl, old)
             if old != replaced:
                 result = dict(old=old, new=replaced)
                 result['start'] = offset('start')
                 result['end'] = offset('end')
                 result['pre_text'] = match['pre_text']
                 result['post_text'] = match['post_text']
                 retval.append(result)
         offset_number += len(adapter.utext) + 1
     return retval