def test_extract(self): extractors = ContainerExtractor.apply(unvalidated_template, basic_extractors) ibl_extractor = TemplatePageMultiItemExtractor(unvalidated_template, extractors) data = ibl_extractor.extract(extraction_page) self.assertEqual(len(data), 95) self.assertEqual({tuple(sorted(i.keys())) for i in data}, {('_template', u'date', u'text', u'title', u'url')}) b = { u'_template': u'stack_overflow_test', u'date': [u'2015-08-07 10:09:32Z'], u'text': [u"Bootstrap navbar doesn't open - mobile view"], u'title': [u'I have a sticky nav with this code (Which is not mine' u') // Create a clone of the menu, right next to ' u'original. ...'], u'url': [u'https://stackoverflow.com/questions/31875193/bootstrap-' u'navbar-doesnt-open-mobile-view'] } print({k: v if b[k] != v else False for k, v in data[0].items()}) self.assertDictEqual(data[0], { u'_template': u'stack_overflow_test', u'date': [u'2015-08-07 10:09:32Z'], u'text': [u"Bootstrap navbar doesn't open - mobile view"], u'title': [u'I have a sticky nav with this code (Which is not mine' u') // Create a clone of the menu, right next to ' u'original. ...'], u'url': [u'https://stackoverflow.com/questions/31875193/bootstrap-' u'navbar-doesnt-open-mobile-view'] }) self.assertDictEqual(data[50], { u'_template': 'stack_overflow_test', u'date': [u'2015-08-07 10:01:03Z'], u'text': [u'Rails in production with Apache+passenger error'], u'title': [u"Last days i'm trying to put my rails app in " u"production with apache and passenger(no rvm), but " u"still nothing. In my browser i get an error like " u"this: We're sorry, but something went wrong. " u"We've been ..."], u'url': [u'https://stackoverflow.com/questions/31874997/rails-in-' u'production-with-apachepassenger-error'] }) self.assertDictEqual(data[-1], { u'_template': 'stack_overflow_test', u'date': [u'2015-08-07 08:19:38Z'], u'text': [u'pylab cannot find reference for its modules'], u'title': [u"I have a mac OS X Yosimite and I'm using python " u"2.7.10 and Pycharm as my IDLE. I have pylab installed" u" properly but I cannot use any of its modules. " u"When a try: from pylab import show (or any module) " u"..."], u'url': [u'https://stackoverflow.com/questions/31872881/pylab-' u'cannot-find-reference-for-its-modules'] })
def test_extract(self): extractors = ContainerExtractor.apply(unvalidated_template, basic_extractors) ibl_extractor = TemplatePageMultiItemExtractor(unvalidated_template, extractors) data = ibl_extractor.extract(extraction_page) self.assertEqual(len(data), 96) self.assertEqual( {tuple(sorted(i.keys())) for i in data}, {('_index', '_template', u'date', u'text', u'title', u'url')}) self.assertDictEqual(data[0], { u'_index': 1, u'_template': u'stack_overflow_test', u'date': [u'2015-08-07 10:09:32Z'], u'text': [u"Bootstrap navbar doesn't open - mobile view"], u'title': [u'I have a sticky nav with this code (Which is not mine' u')\n\n// Create a clone of the menu, right next to ' u'original.\n...'], u'url': [u'https://stackoverflow.com/questions/31875193/bootstrap-' u'navbar-doesnt-open-mobile-view'] }) self.assertDictEqual(data[50], { u'_index': 51, u'_template': 'stack_overflow_test', u'date': [u'2015-08-07 10:01:03Z'], u'text': [u'Rails in production with Apache+passenger error'], u'title': [u"Last days i'm trying to put my rails app in " u"production with apache and passenger(no rvm), but " u"still nothing. In my browser i get an error like " u"this:\n\nWe're sorry, but something went wrong." u"\nWe've been ..."], u'url': [u'https://stackoverflow.com/questions/31874997/rails-in-' u'production-with-apachepassenger-error'] }) self.assertDictEqual(data[-1], { u'_index': 96, u'_template': 'stack_overflow_test', u'date': [u'2015-08-07 08:16:43Z'], u'text': [u'iPython + Spark + Cassandra - Py4JJavaError and How to' u' connect to Cassandra from Spark?'], u'title': [u"How can I connect to Cassandra from Spark with " u"iPython?\n\nI have followed the code from here and " u"modified it,\n\nimport os\nimport sys\n\n# Path for " u"spark source folder\nos.environ['SPARK_HOME'] = ..."], u'url': [u'https://stackoverflow.com/questions/31872831/ipython-' u'spark-cassandra-py4jjavaerror-and-how-to-connect-to-' u'cassandra-from'] })
def test_extract(self): extractors = ContainerExtractor.apply(unvalidated_template, basic_extractors) ibl_extractor = TemplatePageMultiItemExtractor(unvalidated_template, extractors) data = ibl_extractor.extract(extraction_page) self.assertEqual(len(data), 95) self.assertEqual({tuple(sorted(i.keys())) for i in data}, {('_template', u'date', u'text', u'title', u'url')}) self.assertDictEqual(data[0], { u'_template': u'stack_overflow_test', u'date': [u'2015-08-07 10:09:32Z'], u'text': [u"Bootstrap navbar doesn't open - mobile view"], u'title': [u'I have a sticky nav with this code (Which is not mine' u') // Create a clone of the menu, right next to ' u'original. ...'], u'url': [u'https://stackoverflow.com/questions/31875193/bootstrap-' u'navbar-doesnt-open-mobile-view'] }) self.assertDictEqual(data[50], { u'_template': 'stack_overflow_test', u'date': [u'2015-08-07 10:01:03Z'], u'text': [u'Rails in production with Apache+passenger error'], u'title': [u"Last days i'm trying to put my rails app in " u"production with apache and passenger(no rvm), but " u"still nothing. In my browser i get an error like " u"this: We're sorry, but something went wrong. " u"We've been ..."], u'url': [u'https://stackoverflow.com/questions/31874997/rails-in-' u'production-with-apachepassenger-error'] }) self.assertDictEqual(data[-1], { u'_template': 'stack_overflow_test', u'date': [u'2015-08-07 08:19:38Z'], u'text': [u'pylab cannot find reference for its modules'], u'title': [u"I have a mac OS X Yosimite and I'm using python " u"2.7.10 and Pycharm as my IDLE. I have pylab installed" u" properly but I cannot use any of its modules. " u"When a try: from pylab import show (or any module) " u"..."], u'url': [u'https://stackoverflow.com/questions/31872881/pylab-' u'cannot-find-reference-for-its-modules'] })