def test_extract(self):
     extractors = ContainerExtractor.apply(unvalidated_template,
                                           basic_extractors)
     ibl_extractor = TemplatePageMultiItemExtractor(unvalidated_template,
                                                    extractors)
     data = ibl_extractor.extract(extraction_page)
     self.assertEqual(len(data), 95)
     self.assertEqual({tuple(sorted(i.keys())) for i in data},
                      {('_template', u'date', u'text', u'title', u'url')})
     b = {
         u'_template': u'stack_overflow_test',
         u'date': [u'2015-08-07 10:09:32Z'],
         u'text': [u"Bootstrap navbar doesn't open - mobile view"],
         u'title': [u'I have a sticky nav with this code (Which is not mine'
                    u') // Create a clone of the menu, right next to '
                    u'original. ...'],
         u'url': [u'https://stackoverflow.com/questions/31875193/bootstrap-'
                  u'navbar-doesnt-open-mobile-view']
     }
     print({k: v if b[k] != v else False for k, v in data[0].items()})
     self.assertDictEqual(data[0], {
         u'_template': u'stack_overflow_test',
         u'date': [u'2015-08-07 10:09:32Z'],
         u'text': [u"Bootstrap navbar doesn't open - mobile view"],
         u'title': [u'I have a sticky nav with this code (Which is not mine'
                    u') // Create a clone of the menu, right next to '
                    u'original. ...'],
         u'url': [u'https://stackoverflow.com/questions/31875193/bootstrap-'
                  u'navbar-doesnt-open-mobile-view']
     })
     self.assertDictEqual(data[50], {
         u'_template': 'stack_overflow_test',
         u'date': [u'2015-08-07 10:01:03Z'],
         u'text': [u'Rails in production with Apache+passenger error'],
         u'title': [u"Last days i'm trying to put my rails app in "
                    u"production with apache and passenger(no rvm), but "
                    u"still nothing. In my browser i get an error like "
                    u"this: We're sorry, but something went wrong. "
                    u"We've been ..."],
         u'url': [u'https://stackoverflow.com/questions/31874997/rails-in-'
                  u'production-with-apachepassenger-error']
     })
     self.assertDictEqual(data[-1], {
         u'_template': 'stack_overflow_test',
         u'date': [u'2015-08-07 08:19:38Z'],
         u'text': [u'pylab cannot find reference for its modules'],
         u'title': [u"I have a mac OS X Yosimite and I'm using python "
                    u"2.7.10 and Pycharm as my IDLE. I have pylab installed"
                    u" properly but I cannot use any of its modules. "
                    u"When a try: from pylab import show (or any module) "
                    u"..."],
         u'url': [u'https://stackoverflow.com/questions/31872881/pylab-'
                  u'cannot-find-reference-for-its-modules']
     })
Ejemplo n.º 2
0
 def test_extract(self):
     extractors = ContainerExtractor.apply(unvalidated_template,
                                           basic_extractors)
     ibl_extractor = TemplatePageMultiItemExtractor(unvalidated_template,
                                                    extractors)
     data = ibl_extractor.extract(extraction_page)
     self.assertEqual(len(data), 96)
     self.assertEqual(
         {tuple(sorted(i.keys())) for i in data},
         {('_index', '_template', u'date', u'text', u'title', u'url')})
     self.assertDictEqual(data[0], {
         u'_index': 1,
         u'_template': u'stack_overflow_test',
         u'date': [u'2015-08-07 10:09:32Z'],
         u'text': [u"Bootstrap navbar doesn't open - mobile view"],
         u'title': [u'I have a sticky nav with this code (Which is not mine'
                    u')\n\n// Create a clone of the menu, right next to '
                    u'original.\n...'],
         u'url': [u'https://stackoverflow.com/questions/31875193/bootstrap-'
                  u'navbar-doesnt-open-mobile-view']
     })
     self.assertDictEqual(data[50], {
         u'_index': 51,
         u'_template': 'stack_overflow_test',
         u'date': [u'2015-08-07 10:01:03Z'],
         u'text': [u'Rails in production with Apache+passenger error'],
         u'title': [u"Last days i'm trying to put my rails app in "
                    u"production with apache and passenger(no rvm), but "
                    u"still nothing. In my browser i get an error like "
                    u"this:\n\nWe're sorry, but something went wrong."
                    u"\nWe've been ..."],
         u'url': [u'https://stackoverflow.com/questions/31874997/rails-in-'
                  u'production-with-apachepassenger-error']
     })
     self.assertDictEqual(data[-1], {
         u'_index': 96,
         u'_template': 'stack_overflow_test',
         u'date': [u'2015-08-07 08:16:43Z'],
         u'text': [u'iPython + Spark + Cassandra - Py4JJavaError and How to'
                   u' connect to Cassandra from Spark?'],
         u'title': [u"How can I connect to Cassandra from Spark with "
                    u"iPython?\n\nI have followed the code from here and "
                    u"modified it,\n\nimport os\nimport sys\n\n# Path for "
                    u"spark source folder\nos.environ['SPARK_HOME'] = ..."],
         u'url': [u'https://stackoverflow.com/questions/31872831/ipython-'
                  u'spark-cassandra-py4jjavaerror-and-how-to-connect-to-'
                  u'cassandra-from']
     })
 def test_extract(self):
     extractors = ContainerExtractor.apply(unvalidated_template,
                                           basic_extractors)
     ibl_extractor = TemplatePageMultiItemExtractor(unvalidated_template,
                                                    extractors)
     data = ibl_extractor.extract(extraction_page)
     self.assertEqual(len(data), 96)
     self.assertEqual(
         {tuple(sorted(i.keys())) for i in data},
         {('_index', '_template', u'date', u'text', u'title', u'url')})
     self.assertDictEqual(data[0], {
         u'_index': 1,
         u'_template': u'stack_overflow_test',
         u'date': [u'2015-08-07 10:09:32Z'],
         u'text': [u"Bootstrap navbar doesn't open - mobile view"],
         u'title': [u'I have a sticky nav with this code (Which is not mine'
                    u')\n\n// Create a clone of the menu, right next to '
                    u'original.\n...'],
         u'url': [u'https://stackoverflow.com/questions/31875193/bootstrap-'
                  u'navbar-doesnt-open-mobile-view']
     })
     self.assertDictEqual(data[50], {
         u'_index': 51,
         u'_template': 'stack_overflow_test',
         u'date': [u'2015-08-07 10:01:03Z'],
         u'text': [u'Rails in production with Apache+passenger error'],
         u'title': [u"Last days i'm trying to put my rails app in "
                    u"production with apache and passenger(no rvm), but "
                    u"still nothing. In my browser i get an error like "
                    u"this:\n\nWe're sorry, but something went wrong."
                    u"\nWe've been ..."],
         u'url': [u'https://stackoverflow.com/questions/31874997/rails-in-'
                  u'production-with-apachepassenger-error']
     })
     self.assertDictEqual(data[-1], {
         u'_index': 96,
         u'_template': 'stack_overflow_test',
         u'date': [u'2015-08-07 08:16:43Z'],
         u'text': [u'iPython + Spark + Cassandra - Py4JJavaError and How to'
                   u' connect to Cassandra from Spark?'],
         u'title': [u"How can I connect to Cassandra from Spark with "
                    u"iPython?\n\nI have followed the code from here and "
                    u"modified it,\n\nimport os\nimport sys\n\n# Path for "
                    u"spark source folder\nos.environ['SPARK_HOME'] = ..."],
         u'url': [u'https://stackoverflow.com/questions/31872831/ipython-'
                  u'spark-cassandra-py4jjavaerror-and-how-to-connect-to-'
                  u'cassandra-from']
     })
Ejemplo n.º 4
0
 def test_extract(self):
     extractors = ContainerExtractor.apply(unvalidated_template,
                                           basic_extractors)
     ibl_extractor = TemplatePageMultiItemExtractor(unvalidated_template,
                                                    extractors)
     data = ibl_extractor.extract(extraction_page)
     self.assertEqual(len(data), 95)
     self.assertEqual({tuple(sorted(i.keys())) for i in data},
                      {('_template', u'date', u'text', u'title', u'url')})
     self.assertDictEqual(data[0], {
         u'_template': u'stack_overflow_test',
         u'date': [u'2015-08-07 10:09:32Z'],
         u'text': [u"Bootstrap navbar doesn't open - mobile view"],
         u'title': [u'I have a sticky nav with this code (Which is not mine'
                    u') // Create a clone of the menu, right next to '
                    u'original. ...'],
         u'url': [u'https://stackoverflow.com/questions/31875193/bootstrap-'
                  u'navbar-doesnt-open-mobile-view']
     })
     self.assertDictEqual(data[50], {
         u'_template': 'stack_overflow_test',
         u'date': [u'2015-08-07 10:01:03Z'],
         u'text': [u'Rails in production with Apache+passenger error'],
         u'title': [u"Last days i'm trying to put my rails app in "
                    u"production with apache and passenger(no rvm), but "
                    u"still nothing. In my browser i get an error like "
                    u"this: We're sorry, but something went wrong. "
                    u"We've been ..."],
         u'url': [u'https://stackoverflow.com/questions/31874997/rails-in-'
                  u'production-with-apachepassenger-error']
     })
     self.assertDictEqual(data[-1], {
         u'_template': 'stack_overflow_test',
         u'date': [u'2015-08-07 08:19:38Z'],
         u'text': [u'pylab cannot find reference for its modules'],
         u'title': [u"I have a mac OS X Yosimite and I'm using python "
                    u"2.7.10 and Pycharm as my IDLE. I have pylab installed"
                    u" properly but I cannot use any of its modules. "
                    u"When a try: from pylab import show (or any module) "
                    u"..."],
         u'url': [u'https://stackoverflow.com/questions/31872881/pylab-'
                  u'cannot-find-reference-for-its-modules']
     })