Esempio n. 1
0
 def init_phantom(self):
     self.prefixfiles = os.path.join(
         scrapyd_config().get('logs_dir'),
         HYPHE_PROJECT,
         self.name,
         self.crawler.settings['JOBID']
     )
     self.log("Using path %s for PhantomJS crawl" % self.prefixfiles, log.INFO)
     phantom_args = []
     if PROXY and not PROXY.startswith(':'):
         phantom_args.append('--proxy=%s' % PROXY)
     phantom_args.append('--cookies-file=%s-phantomjs-cookie.txt' % self.prefixfiles)
     phantom_args.append('--ignore-ssl-errors=true')
     phantom_args.append('--load-images=false')
     self.capabilities = dict(DesiredCapabilities.PHANTOMJS)
     self.capabilities['phantomjs.page.settings.userAgent'] = self.user_agent
     self.capabilities['takesScreenshot'] = False
     self.capabilities['phantomjs.page.settings.javascriptCanCloseWindows'] = False
     self.capabilities['phantomjs.page.settings.javascriptCanOpenWindows'] = False
     self.phantom = PhantomJS(
         executable_path=PHANTOM['PATH'],
         service_args=phantom_args,
         desired_capabilities=self.capabilities,
         service_log_path="%s-phantomjs.log" % self.prefixfiles
     )
     self.phantom.implicitly_wait(10)
     self.phantom.set_page_load_timeout(60)
     self.phantom.set_script_timeout(self.ph_timeout + 15)
Esempio n. 2
0
 def init_phantom(self):
     self.prefixfiles = os.path.join(scrapyd_config().get('logs_dir'),
                                     HYPHE_PROJECT, self.name,
                                     self.crawler.settings['JOBID'])
     self.log("Using path %s for PhantomJS crawl" % self.prefixfiles,
              log.INFO)
     phantom_args = []
     if PROXY and not PROXY.startswith(':'):
         phantom_args.append('--proxy=%s' % PROXY)
     phantom_args.append('--cookies-file=%s-phantomjs-cookie.txt' %
                         self.prefixfiles)
     phantom_args.append('--ignore-ssl-errors=true')
     phantom_args.append('--load-images=false')
     self.capabilities = dict(DesiredCapabilities.PHANTOMJS)
     self.capabilities[
         'phantomjs.page.settings.userAgent'] = self.user_agent
     self.capabilities['takesScreenshot'] = False
     self.capabilities[
         'phantomjs.page.settings.javascriptCanCloseWindows'] = False
     self.capabilities[
         'phantomjs.page.settings.javascriptCanOpenWindows'] = False
     self.phantom = PhantomJS(executable_path=PHANTOM['PATH'],
                              service_args=phantom_args,
                              desired_capabilities=self.capabilities,
                              service_log_path="%s-phantomjs.log" %
                              self.prefixfiles)
     self.phantom.implicitly_wait(10)
     self.phantom.set_page_load_timeout(60)
     self.phantom.set_script_timeout(self.ph_timeout + 15)