Ejemplo n.º 1
0
 def prepare(self, obj):
     """Open the expected .html file and extract body and title to index"""
     data = super(ImportedFileIndex, self).prepare(obj)
     html = self.get_file_content(obj)
     extractor = HtmlExtractor(html)
     # inject the object to the template with the rich content extracted
     context = {
         'object': obj,
         'title': extractor.title,
         'body': extractor.content
     }
     data['title'] = context['title']
     data['body'] = context['body']
     data['text'] = self.fields['text'].prepare_template(context)
     logger.info('Search Index: indexing file project=%s path=%s',
                 obj.project_id, obj.path)
     return data
Ejemplo n.º 2
0
 def test_strip_all(self):
     extractor = HtmlExtractor('')
     self.assertEqual(extractor.strip_all('<div>tag</div>'), 'tag')
Ejemplo n.º 3
0
 def test_strip_all_no_document(self):
     extractor = HtmlExtractor('')
     self.assertEqual(extractor.strip_all(''), '')
Ejemplo n.º 4
0
 def test_title_raise_value_error(self):
     extractor = HtmlExtractor("<h1></h1>")
     extractor.doc = Mock(side_effect=ValueError())
     self.assertIsNone(extractor.title)
Ejemplo n.º 5
0
 def test_title_no_header(self):
     doc = "<div>Dummy Title</div>"
     extractor = HtmlExtractor(doc)
     self.assertIsNone(extractor.title)
Ejemplo n.º 6
0
 def test_title_no_doc(self):
     extractor = HtmlExtractor("")
     self.assertIsNone(extractor.title)
Ejemplo n.º 7
0
 def test_title(self):
     doc = "<h1>Dummy Title</h1>"
     extractor = HtmlExtractor(doc)
     self.assertEqual(extractor.title, "Dummy Title")
Ejemplo n.º 8
0
 def test_content_raise_value_error(self):
     extractor = HtmlExtractor("<div></div>")
     extractor.doc = Mock(side_effect=ValueError())
     self.assertIsNone(extractor.content)
Ejemplo n.º 9
0
 def test_content_no_body(self):
     doc = "<div>Dummy Content</div>"
     extractor = HtmlExtractor(doc)
     self.assertIsNone(extractor.content)
Ejemplo n.º 10
0
 def test_content_no_doc(self):
     extractor = HtmlExtractor("")
     self.assertIsNone(extractor.content)
Ejemplo n.º 11
0
 def test_content(self):
     doc = "<body>Dummy Content</body>"
     extractor = HtmlExtractor(doc)
     self.assertEqual(extractor.content, "Dummy Content")
Ejemplo n.º 12
0
 def test_init_with_empty_document(self):
     extractor = HtmlExtractor("")
     self.assertIsNone(extractor.doc)
Ejemplo n.º 13
0
 def test_init(self):
     extractor = HtmlExtractor("<div></div>")
     self.assertIsNotNone(extractor.doc)
     self.assertIsInstance(extractor.doc, PyQuery)