コード例 #1
0
def apply_plugin():
    """ All plugin modules need to define this method """

    # This method is expected to perform the following steps.
    # 1. Register the required hook function
    # 2. Get the config object and set/override any required settings
    # 3. Print any informational messages.

    # The first step is required, the last two are of course optional
    # depending upon the required application of the plugin.

    cfg = objects.config
    cfg.simulate = True
    cfg.localise = 0

    # Dummy function that does not really write the mirrored files.
    hooks.register_plugin_function('connector:save_url_plugin', save_url)

    # Hook to get access to the downloaded data after process_url has been called.
    hooks.register_post_callback_method('crawler:fetcher_process_url_callback',
                                        process_url)
    # Turn off caching, since no files are saved
    cfg.pagecache = 0
    # Turn off header dumping, since no files are saved
    cfg.urlheaders = 0
    logconsole(
        'Simulation mode turned on. Crawl will be simulated and no files will be saved.'
    )
コード例 #2
0
def apply_plugin():
    """ All plugin modules need to define this method """

    # This method is expected to perform the following steps.
    # 1. Register the required hook function
    # 2. Get the config object and set/override any required settings
    # 3. Print any informational messages.

    # The first step is required, the last two are of course optional
    # depending upon the required application of the plugin.
    
    cfg = objects.config
    cfg.simulate = True
    cfg.localise = 0

    # Dummy function that does not really write the mirrored files.
    hooks.register_plugin_function('connector:save_url_plugin', save_url)

    # Hook to get access to the downloaded data after process_url has been called.
    hooks.register_post_callback_method('crawler:fetcher_process_url_callback',
                                            process_url)
    # Turn off caching, since no files are saved
    cfg.pagecache = 0
    # Turn off header dumping, since no files are saved
    cfg.urlheaders = 0
    logconsole('Simulation mode turned on. Crawl will be simulated and no files will be saved.')
コード例 #3
0
def apply_plugin():
    """ All plugin modules need to define this method """

    # This method is expected to perform the following steps.
    # 1. Register the required hook function
    # 2. Get the config object and set/override any required settings
    # 3. Print any informational messages.

    # The first step is required, the last two are of course optional
    # depending upon the required application of the plugin.
    
    cfg = objects.config
    cfg.simulate = True
    cfg.localise = 0
    hooks.register_plugin_function('connector:save_url_plugin', save_url)
    # Turn off caching, since no files are saved
    cfg.pagecache = 0
    # Turn off header dumping, since no files are saved
    cfg.urlheaders = 0
    # For simulator, we need in-mem data mode
    # since files are never saved!
    cfg.datamode = CONNECTOR_DATA_MODE_INMEM
    logconsole('Simulation mode turned on. Crawl will be simulated and no files will be saved.')