Python HeadlessAgent 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: agents

메소드/함수: HeadlessAgent

hotexamples.com에서의 예제들: 4

Python HeadlessAgent - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 agents.HeadlessAgent에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: agents_test.py 프로젝트: eliant/fpdetective

    def test_init_headless_agent(self):
        ha = ag.HeadlessAgent()
        cr_job = ag.CrawlJob(ha)

        crawl_agent_cfg = {
            'main_js': cm.CASPER_JS_LAZY_HOMEPAGER,
            'cmd_line_options': ag.PHANTOM_COMMON_OPTIONS,
            'timeout': 20,
            'screenshot': True,
            'post_visit_func': lp.parse_log_dump_results
        }

        ha = ag.HeadlessAgent()
        ha.setOptions(crawl_agent_cfg)
        limit = 3
        cr_job_cfg = {
            'desc':
            'Visit top %s sites and use fontconfig\'s debugging facilities to collect data.'
            % limit,
            'max_parallel_procs':
            20,
            'crawl_agent':
            ha,
            'urls':
            wu.gen_url_list(limit)
        }

        cr_job.setOptions(cr_job_cfg)

        ag.run_crawl(cr_job)
        self.dirs_to_remove.append(os.path.realpath(cr_job.job_dir))

예제 #2

파일 보기

파일: dbutils_test.py 프로젝트: telefunkenvf14/fpdetective

 def setUp(self):
     self.dirs_to_remove = []
     self.db_conn = dbu.mysql_init_db('fp_detective_test')
     self.domainInfo = lp.DomainInfo() # create a new DomainInfo obj for tests
     
     self.domainInfo.rank = 1
     self.domainInfo.log_filename = '/var/log/syslog'
     self.domainInfo.url = 'http://google.com'
     self.domainInfo.fonts_loaded = ['Arial', 'Tahoma', 'Georgia', '微软雅黑']
     self.domainInfo.fonts_by_origins = {'http://google.com':['arial', 'Tahoma'], 'http://yahoo.com':['Georgia'] }
     self.domainInfo.requests = ['http://google.com', 'http://yahoo.com']
     self.domainInfo.responses = ['http://abc.com', 'http://xyz.com']
     self.domainInfo.num_font_loads = 50
     self.domainInfo.num_offsetWidth_calls = 15
     self.domainInfo.num_offsetHeight_calls = 15
     self.domainInfo.fp_detected = [fpr.FINGERPRINTER_REGEX.items()[:2]]
     self.domainInfo.crawl_id = 64654
     self.domainInfo.fpd_logs = ['userAgent', 'appCodeName']
     self.domainInfo.fc_dbg_font_loads = ['Arial', 'Tahoma', 'Georgia', 'someotherfont', '微软雅黑']
     self.domainInfo.log_complete = 1
     
     ha = ag.HeadlessAgent()
     self.crawl_job = ag.CrawlJob(ha)
     self.dirs_to_remove.append(self.crawl_job.job_dir)
     self.crawl_job.urls = ['http://google.com', 'http://yahoo.com']
     self.crawl_job.desc

예제 #3

파일 보기

파일: fpdtest.py 프로젝트: telefunkenvf14/fpdetective

    def should_crawl_and_log(self,
                             agent_cfg,
                             urls,
                             expected_strs,
                             unexpected_strs=[]):
        # TODO: add support for normal browsers
        if agent_cfg.has_key("type") and 'chrome' in agent_cfg['type']:
            br = ag.ChromeAgent()
        else:
            br = ag.HeadlessAgent()

        if not agent_cfg.has_key("timeout"):
            agent_cfg["timeout"] = DEFAULT_TEST_CRAWL_TIMEOUT

        br.setOptions(agent_cfg)
        cr_job = ag.CrawlJob(br)
        cr_job.urls = [
            urls,
        ] if isinstance(urls, basestring) else urls
        cr_job.url_tuples = zip(xrange(1, len(urls) + 1), urls)

        ag.run_crawl(cr_job)

        self.assertTrue(os.path.isdir(cr_job.job_dir),
                        'No job folder created!')
        for idx, url in enumerate(cr_job.urls):
            outfile = os.path.join(
                cr_job.job_dir,
                fu.get_out_filename_from_url(url, str(idx + 1)))
            self.assertTrue(os.path.isfile(outfile),
                            'Cannot find log file %s' % outfile)
            self.assert_all_patterns_in_file(outfile, expected_strs)
            self.assert_all_patterns_not_in_file(outfile, unexpected_strs)

예제 #4

파일 보기

 def test_modphantomjs_should_log_access_to_navigator_props(self):
     ph = ag.HeadlessAgent().setOptions(
         ag.AGENT_CFG_PHANTOM_MOD_HOME_PAGE).setOptions(
             {'timeout': TEST_TIMEOUT})
     self.should_log_access_to_navigator_props(ph.__dict__)