Ejemplo n.º 1
0
 def test_direct_usage(self):
     """Test RscHtmlReader used directly to parse file."""
     r = RscHtmlReader()
     fname = '10.1039_C6OB02074G.html'
     f = io.open(os.path.join(os.path.dirname(__file__), 'data', 'rsc', fname), 'rb')
     content = f.read()
     d = r.readstring(content)
     self.assertEqual(len(d.elements), 61)
Ejemplo n.º 2
0
 def test_fig_id_detection(self):
     """ Tests RscHtmlReader can detect the right number of figures and fig captions"""
     r = RscHtmlReader()
     fname = '10.1039_C6OB02074G.html'
     f = io.open(
         os.path.join(os.path.dirname(__file__), 'data', 'rsc', fname),
         'rb')
     content = f.read()
     d = r.readstring(content)
     figs = d.figures
     ids = [fig.id for fig in figs]
     self.assertEqual(len(ids), 4)
 def test_fig_and_fig_cation_detection(self):
     """ Tests RscHtmlReader can detect the right number of figures and fig captions"""
     r = RscHtmlReader()
     fname = 'B9PP00180H.html'
     f = io.open(
         os.path.join(os.path.dirname(__file__), 'data', 'rsc', fname),
         'rb')
     content = f.read()
     d = r.readstring(content)
     figs = d.figures
     captions = [
         fig.caption for fig in figs if fig.caption.text != ('\n' or '')
     ]
     self.assertEqual(len(figs), 6)
     self.assertEqual(len(captions), 6)
     self.assertEqual(len(captions[1].sentences), 1)