def test_extract_csspath(self): from mobilize.components import CssPath testdata = [ {'datafile' : 'a.xml', 'components' : [CssPath('div#happy', classvalue='some-class')], 'extracted' : ['<div class="some-class" id="some-id"><div id="happy">lucky</div></div>'], }, {'datafile' : 'b.xml', 'components' : [CssPath('div#joyful', classvalue='some-class')], 'extracted' : ['<div class="some-class" id="some-id"><div id="joyful">fun</div></div>'], }, {'datafile' : 'c.xml', 'components' : [CssPath('p.graceful', classvalue='some-class')], 'extracted' : ['<div class="some-class" id="some-id"><p class="graceful">laughing</p></div>'], }, {'datafile' : 'd.xml', 'components' : [CssPath('p.graceful', classvalue='some-class')], 'extracted' : ['<div class="some-class" id="some-id"><p class="skipping graceful enthusiastic">laughing</p></div>'], }, {'datafile' : 'e.xml', 'components' : [CssPath('p.graceful', classvalue='some-class')], 'extracted' : ['<div class="some-class" id="some-id"><p class="skipping graceful enthusiastic">laughing</p><p class="graceful">enthusiastic</p></div>'], }, ] for ii, td in enumerate(testdata): doc = html.fromstring(open(data_file_path('extract_celems', td['datafile'])).read()) for sel in td['components']: sel.extract(doc) sel.process('some-id') expected = list(map(normxml, td['extracted'])) actual = [normxml(sel.html()) for sel in td['components']] msg = 'e: %s, a: %s [%d %s]' % (expected, actual, ii, td['datafile']) self.assertEqual(expected, actual, msg)
def test_GoogleAnalytics_none(self): # Check negative case where we expect to not find GA tracking codes from mobilize.components import GoogleAnalytics doc_str = open(data_file_path('whole-html', 'cnn.html')).read() doc = html.fromstring(doc_str) noga = GoogleAnalytics() noga.extract(doc) noga.process() actual = normxml(noga.html()) expected = normxml('''<div class="mwu-elem" id="mwu-elem-ga"></div>''') self.assertSequenceEqual(expected, actual)
def test_GoogleAnalytics_v1(self): from mobilize.components import GoogleAnalytics # Check positive case, where we expect to find the GA tracking code (older version) doc_str = open(data_file_path('whole-html', 'luxwny.html')).read() doc = html.fromstring(doc_str) ga = GoogleAnalytics() ga.extract(doc) ga.process() extracted_str = ga.html() extracted = html.fromstring(extracted_str) extracted_script_tags = extracted.cssselect('script') self.assertEqual(len(extracted_script_tags), 2) ga_script1_text = extracted_script_tags[0].text self.assertTrue('var gaJsHost' in ga_script1_text) ga_script2_text = extracted_script_tags[1].text self.assertTrue('UA-12345678-1' in ga_script2_text)
def test_GoogleAnalytics_v2(self): # Check positive case, where we expect to find the GA tracking codes from mobilize.components import GoogleAnalytics testdatafiles = [ 'msia.org.html', # variant 1 'msia.org.2.html', # variant 2 ] for testdatafile in testdatafiles: doc_str = open(data_file_path('whole-html', testdatafile)).read() doc = html.fromstring(doc_str) ga = GoogleAnalytics() ga.extract(doc) ga.process() extracted_str = ga.html() extracted = html.fromstring(extracted_str) extracted_script_tags = extracted.cssselect('script') self.assertEqual(len(extracted_script_tags), 1, testdatafile) ga_script_text = extracted_script_tags[0].text self.assertTrue('UA-12345678-1' in ga_script_text, testdatafile)