class CrawlerTest(unittest.TestCase): def setUp(self): self.crawler = BaseCrawler() def _test_requests(self): """ Very basic and foolish test """ response = self.crawler._get_response("https://github.com/jmg") self.assertTrue(response) def test_cookies(self): """ This test asserts if the login was successful and the second request retrieves a facebook's page that requires to be logged in. """ data = {'email' : 'user', 'pass': '******'} response = self.crawler._get_response("https://www.facebook.com/login.php?login_attempt=1", data) response = self.crawler._get_response("http://www.facebook.com/profile.php?id=1271577281") with open("url.html", 'w') as f: f.write(response.raw_html) def _test_post(self): crawler = PostCrawler() crawler.start()
def execute(self): try: import IPython except ImportError: exit_with_error("Please install the ipython console") url = self.args[0] crawler = BaseCrawler() response = crawler._get_data(url) html = XPathExtractor().get_object(response) shell = IPython.Shell.IPShellEmbed(argv=[], user_ns={ 'response' : response }) shell()
def execute(self): try: import IPython except ImportError: exit_with_error("Please install the ipython console") url = self.args[0] crawler = BaseCrawler() response = crawler._get_response(url) html = XPathExtractor().get_object(response) shell = IPython.Shell.IPShellEmbed(argv=[], user_ns={'response': response}) shell()
def test_generated_scrapers(self): test_dsl = """PAGE => http://www.python.org/ table3.model1 -> /html/body/div[5]/div/div/h1 table3.model2 -> /html/body/div table4.model1 -> /html/body/div/span""" generator = Generator(test_dsl, settings) generator.gen_entities() scrapers_classes = generator.gen_scrapers() crawler = BaseCrawler() response = crawler._get_response("http://www.python.org/") for scraper_class in scrapers_classes: scraper_class().scrape(response)
def setUp(self): self.crawler = BaseCrawler()