Esempio n. 1
0
def test_backend(backend):

    # Graph
    graph = graphs.Manager()
    graph.add_site_list(SITE_LIST)

    # Frontier
    settings = Settings()
    settings.BACKEND = backend
    settings.LOGGING_MANAGER_ENABLED = True
    settings.LOGGING_BACKEND_ENABLED = True
    settings.LOGGING_DEBUGGING_ENABLED = False
    frontier = FrontierManager.from_settings(settings)

    print '-'*80
    print frontier.backend.name
    print '-'*80

    # Tester
    tester = FrontierTester(frontier, graph)
    tester.run()

    # Show crawling sequence
    for page in tester.sequence:
        print page.url
Esempio n. 2
0
def test_logic(backend):
    # Graph
    graph = graphs.Manager('sqlite:///data/graph.db')

    # Frontier
    settings = Settings()
    settings.BACKEND = backend
    settings.LOGGING_MANAGER_ENABLED = True
    settings.LOGGING_BACKEND_ENABLED = True
    settings.LOGGING_DEBUGGING_ENABLED = False
    settings.TEST_MODE = True
    frontier = FrontierManager.from_settings(settings)

    # Tester
    tester = FrontierTester(frontier, graph)
    tester.run(add_all_pages=True)

    # Show crawling sequence
    print '-'*80
    print frontier.backend.name
    print '-'*80
    for page in tester.sequence:
        print page.url
Esempio n. 3
0
 def get_frontier(self):
     """
     Returns frontierManager object
     """
     return FrontierManager.from_settings(self.get_settings())
Esempio n. 4
0
"""
from frontera import FrontierManager, FrontierTester, Settings, graphs

SETTINGS = Settings()
SETTINGS.BACKEND = 'frontera.contrib.backends.memory_heapq.FIFO'
SETTINGS.LOGGING_MANAGER_ENABLED = True
SETTINGS.LOGGING_BACKEND_ENABLED = True
SETTINGS.LOGGING_DEBUGGING_ENABLED = False


if __name__ == '__main__':
    # Graph
    graph = graphs.Manager('sqlite:///recordings/scrapinghub.com.db')

    # Frontier
    frontier = FrontierManager.from_settings(SETTINGS)

    # Tester
    tester = FrontierTester(frontier, graph)

    # Run test
    tester.run()

    # Show frontier pages
    print '-'*80
    print ' Frontier pages'
    print '-'*80
    for page in frontier.backend.pages.values():
        print page.url, page.depth, page.state

    # Show crawling sequence
Esempio n. 5
0
"""
Frontier tester usage example
"""
from frontera import FrontierManager, FrontierTester, Settings, graphs

if __name__ == '__main__':
    # Graph
    graph = graphs.Manager('sqlite:///data/graph.db')

    # Frontier
    settings = Settings()
    settings.TEST_MODE = True
    settings.LOGGING_MANAGER_ENABLED = True
    settings.LOGGING_BACKEND_ENABLED = True
    settings.LOGGING_DEBUGGING_ENABLED = False
    frontier = FrontierManager.from_settings(settings)

    # Tester
    tester = FrontierTester(frontier, graph)

    # Run test
    tester.run()

    # Show crawling sequence
    for page in tester.sequence:
        print page.url
Esempio n. 6
0
"""
Frontier from parameters example
"""
from frontera import FrontierManager, graphs, Request, Response

if __name__ == '__main__':
    # Create graph
    graph = graphs.Manager('sqlite:///data/graph.db')

    # Create frontier
    frontier = FrontierManager(
        request_model='frontera.core.models.Request',
        response_model='frontera.core.models.Response',
        backend='frontera.contrib.backends.memory.FIFO',
        logger='frontera.logger.FrontierLogger',
        event_log_manager='frontera.logger.events.EventLogManager',
        middlewares=[
            'frontera.contrib.middlewares.domain.DomainMiddleware',
            'frontera.contrib.middlewares.fingerprint.UrlFingerprintMiddleware',
            'frontera.contrib.middlewares.fingerprint.DomainFingerprintMiddleware',
        ],
        test_mode=True)

    # Add seeds
    frontier.add_seeds([Request(seed.url) for seed in graph.seeds])

    # Get next requests
    next_requests = frontier.get_next_requests()

    # Crawl pages
    for request in next_requests: