def test_backend(backend): # Graph graph = graphs.Manager() graph.add_site_list(SITE_LIST) # Frontier settings = Settings() settings.BACKEND = backend settings.LOGGING_MANAGER_ENABLED = True settings.LOGGING_BACKEND_ENABLED = True settings.LOGGING_DEBUGGING_ENABLED = False frontier = FrontierManager.from_settings(settings) print '-'*80 print frontier.backend.name print '-'*80 # Tester tester = FrontierTester(frontier, graph) tester.run() # Show crawling sequence for page in tester.sequence: print page.url
def test_logic(backend): # Graph graph = graphs.Manager('sqlite:///data/graph.db') # Frontier settings = Settings() settings.BACKEND = backend settings.LOGGING_MANAGER_ENABLED = True settings.LOGGING_BACKEND_ENABLED = True settings.LOGGING_DEBUGGING_ENABLED = False settings.TEST_MODE = True frontier = FrontierManager.from_settings(settings) # Tester tester = FrontierTester(frontier, graph) tester.run(add_all_pages=True) # Show crawling sequence print '-'*80 print frontier.backend.name print '-'*80 for page in tester.sequence: print page.url
def get_frontier(self): """ Returns frontierManager object """ return FrontierManager.from_settings(self.get_settings())
""" from frontera import FrontierManager, FrontierTester, Settings, graphs SETTINGS = Settings() SETTINGS.BACKEND = 'frontera.contrib.backends.memory_heapq.FIFO' SETTINGS.LOGGING_MANAGER_ENABLED = True SETTINGS.LOGGING_BACKEND_ENABLED = True SETTINGS.LOGGING_DEBUGGING_ENABLED = False if __name__ == '__main__': # Graph graph = graphs.Manager('sqlite:///recordings/scrapinghub.com.db') # Frontier frontier = FrontierManager.from_settings(SETTINGS) # Tester tester = FrontierTester(frontier, graph) # Run test tester.run() # Show frontier pages print '-'*80 print ' Frontier pages' print '-'*80 for page in frontier.backend.pages.values(): print page.url, page.depth, page.state # Show crawling sequence
""" Frontier tester usage example """ from frontera import FrontierManager, FrontierTester, Settings, graphs if __name__ == '__main__': # Graph graph = graphs.Manager('sqlite:///data/graph.db') # Frontier settings = Settings() settings.TEST_MODE = True settings.LOGGING_MANAGER_ENABLED = True settings.LOGGING_BACKEND_ENABLED = True settings.LOGGING_DEBUGGING_ENABLED = False frontier = FrontierManager.from_settings(settings) # Tester tester = FrontierTester(frontier, graph) # Run test tester.run() # Show crawling sequence for page in tester.sequence: print page.url
""" Frontier from parameters example """ from frontera import FrontierManager, graphs, Request, Response if __name__ == '__main__': # Create graph graph = graphs.Manager('sqlite:///data/graph.db') # Create frontier frontier = FrontierManager( request_model='frontera.core.models.Request', response_model='frontera.core.models.Response', backend='frontera.contrib.backends.memory.FIFO', logger='frontera.logger.FrontierLogger', event_log_manager='frontera.logger.events.EventLogManager', middlewares=[ 'frontera.contrib.middlewares.domain.DomainMiddleware', 'frontera.contrib.middlewares.fingerprint.UrlFingerprintMiddleware', 'frontera.contrib.middlewares.fingerprint.DomainFingerprintMiddleware', ], test_mode=True) # Add seeds frontier.add_seeds([Request(seed.url) for seed in graph.seeds]) # Get next requests next_requests = frontier.get_next_requests() # Crawl pages for request in next_requests: