def apply_plugin(): """ All plugin modules need to define this method """ # This method is expected to perform the following steps. # 1. Register the required hook function # 2. Get the config object and set/override any required settings # 3. Print any informational messages. # The first step is required, the last two are of course optional # depending upon the required application of the plugin. cfg = objects.config cfg.simulate = True cfg.localise = 0 # Dummy function that does not really write the mirrored files. hooks.register_plugin_function('connector:save_url_plugin', save_url) # Hook to get access to the downloaded data after process_url has been called. hooks.register_post_callback_method('crawler:fetcher_process_url_callback', process_url) # Turn off caching, since no files are saved cfg.pagecache = 0 # Turn off header dumping, since no files are saved cfg.urlheaders = 0 logconsole( 'Simulation mode turned on. Crawl will be simulated and no files will be saved.' )
def apply_plugin(): """ All plugin modules need to define this method """ # This method is expected to perform the following steps. # 1. Register the required hook function # 2. Get the config object and set/override any required settings # 3. Print any informational messages. # The first step is required, the last two are of course optional # depending upon the required application of the plugin. cfg = objects.config cfg.simulate = True cfg.localise = 0 # Dummy function that does not really write the mirrored files. hooks.register_plugin_function('connector:save_url_plugin', save_url) # Hook to get access to the downloaded data after process_url has been called. hooks.register_post_callback_method('crawler:fetcher_process_url_callback', process_url) # Turn off caching, since no files are saved cfg.pagecache = 0 # Turn off header dumping, since no files are saved cfg.urlheaders = 0 logconsole('Simulation mode turned on. Crawl will be simulated and no files will be saved.')
def apply_plugin(): """ Apply the plugin - overrideable method """ # This method is expected to perform the following steps. # 1. Register the required hook/plugin function # 2. Get the config object and set/override any required settings # 3. Print any informational messages. # The first step is required, the last two are of course optional # depending upon the required application of the plugin. cfg = objects.config hooks.register_post_callback_method('datamgr:post_download_setup_callback', create_index) #logger.disableConsoleLogging() # Turn off session-saver feature cfg.savesessions = False # Turn off interrupt handling # cfg.ignoreinterrupts = True # No need for localising cfg.localise = 0 # Turn off image downloading cfg.images = 0 # Turn off caching cfg.pagecache = 0
def apply_plugin(): """ Apply the plugin - overrideable method """ # This method is expected to perform the following steps. # 1. Register the required hook/plugin function # 2. Get the config object and set/override any required settings # 3. Print any informational messages. # The first step is required, the last two are of course optional # depending upon the required application of the plugin. cfg = objects.config # Makes sense to activate the callback only if swish-integration # is enabled. hooks.register_post_callback_method('crawler:fetcher_process_url_callback', process_url) # Turn off caching, since no files are saved cfg.pagecache = 0 # Turn off console-logging logger = objects.logger #logger.disableConsoleLogging() # Turn off session-saver feature cfg.savesessions = False # Turn off interrupt handling # cfg.ignoreinterrupts = True # No need for localising cfg.localise = 0 # Turn off image downloading cfg.images = 0 # Increase sleep time cfg.sleeptime = 1.0 # sys.stderr = open('swish-errors.txt','wb') # cfg.maxtrackers = 2 cfg.usethreads = 0