コード例 #1
0
 def testGetBlogs(self):
     niche = "Society/Law"
     proxy_site = BuyProxyOrg(buy_proxy_org_account)
     proxies = proxy_site.get_proxies(timeout=5)
     keyword_log_path = "/Users/superCat/Desktop/PycharmProjectPortable/Seeds/KeywordSuggestions/"+niche.replace('/', '-')+".txt"
     # countries = GoogleUtility.CountryCodeEnglish
     countries = ["uk", ]
     min_delay = 2
     max_delay = 5
     max_page = 2
     days_ago = 4*365
     target_keywords_init = ["legal case", "Labour law", "human rights law", "crime law", "Immigration law",
                             "Family law", "Transactional law", "Company law", "Commercial law", "Admiralty law",
                             "Intellectual property law", "international law", "tax law", "banking law", "competition law",
                             "consumer law", "environmental law"]
     suggested_keywords = []
     for country in countries:
         # temp_keywords = self.testGetSuggestionBatch(target_keywords_init, proxies=proxies,
         #                                                   country_code=country,
         #                                                   min_delay=min_delay, max_delay=max_delay)
         temp_keywords = list(set(FileHandler.read_lines_from_file(keyword_log_path)))
         # FileHandler.append_lines_to_file(keyword_log_path, temp_keywords, option="at")
         # suggested_keywords += temp_keywords
         crawl_keywords = [x for x in list(set(target_keywords_init + temp_keywords))]
         self.testGetLinksBatch_single_t(niche, keywords=crawl_keywords, page_count=max_page, index=0, length=100,
                                         country_code=country, source_type=GoogleConst.SourceTypeBlog,
                                         min_delay=min_delay, max_delay=max_delay, days_ago=days_ago,
                                         proxies=proxies, use_browser=False)
コード例 #2
0
    def testOllipldsfapenChrome(self):
        '''
        todo:http://stackoverflow.com/questions/29983106/how-can-i-set-proxy-with-authentication-in-selenium-chrome-web-driver-using-pyth
        :return:
        '''
        # request_url = "https://www.google.com/search?q=crimial%20law&num=100&start=0&site=webhp&tbm=blg&source=lnt&as_qdr=y5"
        request_url = "https://www.google.com/search?q=bbs&num=100&start=0&gl=us&gws_rd=cr&as_qdr=d10"
        # request_url = "https://www.whatismyip.com/"
        # request_url = "http://whatsmyuseragent.com/"
        proxy = BuyProxyOrg(buy_proxy_org_account)
        proxy_list = proxy.get_proxies(5)
        chrome_list = list()
        for item in proxy_list:
            PROXY = item.str_no_auth()
            chrome_options = webdriver.ChromeOptions()
            # PROXY = "23.95.32.92:80"
            # USER_AGENT = "i like ice cream."

            USER_AGENT = WebRequestCommonHeader.webpage_agent
            chrome_options.add_argument('--proxy-server=http://{0:s}'.format(
                PROXY, ))
            chrome_options.add_argument('--user-agent={0:s}'.format(
                USER_AGENT, ))
            chrome = webdriver.Chrome(chrome_options=chrome_options)
            chrome.get(request_url)
            chrome_list.append(chrome)
        time.sleep(60)
        for item in chrome_list:
            item.close()
コード例 #3
0
    def testOllipldsfapenChrome(self):
        '''
        todo:http://stackoverflow.com/questions/29983106/how-can-i-set-proxy-with-authentication-in-selenium-chrome-web-driver-using-pyth
        :return:
        '''
        # request_url = "https://www.google.com/search?q=crimial%20law&num=100&start=0&site=webhp&tbm=blg&source=lnt&as_qdr=y5"
        request_url = "https://www.google.com/search?q=bbs&num=100&start=0&gl=us&gws_rd=cr&as_qdr=d10"
        # request_url = "https://www.whatismyip.com/"
        # request_url = "http://whatsmyuseragent.com/"
        proxy = BuyProxyOrg(buy_proxy_org_account)
        proxy_list = proxy.get_proxies(5)
        chrome_list = list()
        for item in proxy_list:
            PROXY = item.str_no_auth()
            chrome_options = webdriver.ChromeOptions()
            # PROXY = "23.95.32.92:80"
            # USER_AGENT = "i like ice cream."

            USER_AGENT = WebRequestCommonHeader.webpage_agent
            chrome_options.add_argument('--proxy-server=http://{0:s}'.format(PROXY,))
            chrome_options.add_argument('--user-agent={0:s}'.format(USER_AGENT,))
            chrome = webdriver.Chrome(chrome_options=chrome_options)
            chrome.get(request_url)
            chrome_list.append(chrome)
        time.sleep(60)
        for item in chrome_list:
            item.close()
コード例 #4
0
 def testProxyGet(self):
     proxy = BuyProxyOrg(buy_proxy_org_account)
     proxy_list = proxy.get_proxies(5)
     for item in proxy_list:
         print("try proxy:", item)
         # sites = self.testGetLlinks(proxy=item)
         # for site in sites:
         #     print(site)
         time.sleep(1)
コード例 #5
0
 def testProxyGet(self):
     proxy = BuyProxyOrg(buy_proxy_org_account)
     proxy_list = proxy.get_proxies(5)
     for item in proxy_list:
         print("try proxy:", item)
         # sites = self.testGetLlinks(proxy=item)
         # for site in sites:
         #     print(site)
         time.sleep(1)
コード例 #6
0
 def testBingResult(self):
     keyword = "law blog"
     proxy_site = BuyProxyOrg(buy_proxy_org_account)
     proxies = proxy_site.get_proxies(timeout=5)
     sites = BingCom.get_sites(keyword, page_number=1, index=0, length=100, filter_list=filter_list,
                                 country_code="us", source_type="", days_ago=10,
                                 return_domain_home_only=False, proxy=proxies[0], timeout=30)
     for item in sites:
         print(item)
     return sites
コード例 #7
0
 def testBingResult(self):
     keyword = "law blog"
     proxy_site = BuyProxyOrg(buy_proxy_org_account)
     proxies = proxy_site.get_proxies(timeout=5)
     sites = BingCom.get_sites(keyword,
                               page_number=1,
                               index=0,
                               length=100,
                               filter_list=filter_list,
                               country_code="us",
                               source_type="",
                               days_ago=10,
                               return_domain_home_only=False,
                               proxy=proxies[0],
                               timeout=30)
     for item in sites:
         print(item)
     return sites
コード例 #8
0
    def testProxyGetOpen(self):

        request_url = "https://www.google.com/search?q=crimial%20law&num=100&start=0&site=webhp&tbm=blg&source=lnt&as_qdr=y5"
        # request_url = "https://www.whatismyip.com/"
        proxy = BuyProxyOrg(buy_proxy_org_account)
        proxy_list = proxy.get_proxies(5)

        for item in proxy_list:
            PROXY = item.str_no_auth()
            print("try proxy:", str(item))
            chrome_options = webdriver.ChromeOptions()
            chrome_options.add_argument('--proxy-server=http://%s' % PROXY)

            chrome = webdriver.Chrome(chrome_options=chrome_options)
            # driver = WebDriver.get_chrome(additional_options=chrome_options)
            chrome.get(request_url)
            # sites = self.testGetLlinks(proxy=item)
            # for site in sites:
            #     print(site)
            time.sleep(5)
コード例 #9
0
    def testProxyGetOpen(self):

        request_url = "https://www.google.com/search?q=crimial%20law&num=100&start=0&site=webhp&tbm=blg&source=lnt&as_qdr=y5"
        # request_url = "https://www.whatismyip.com/"
        proxy = BuyProxyOrg(buy_proxy_org_account)
        proxy_list = proxy.get_proxies(5)

        for item in proxy_list:
            PROXY = item.str_no_auth()
            print("try proxy:", str(item))
            chrome_options = webdriver.ChromeOptions()
            chrome_options.add_argument('--proxy-server=http://%s' % PROXY)

            chrome = webdriver.Chrome(chrome_options=chrome_options)
            # driver = WebDriver.get_chrome(additional_options=chrome_options)
            chrome.get(request_url)
            # sites = self.testGetLlinks(proxy=item)
            # for site in sites:
            #     print(site)
            time.sleep(5)
コード例 #10
0
    def testGetkeywordsRecursive(self, niche="Society/Law", level=1, keyword_init=[],
                                 proxies=None, country_code="us", min_delay=2, max_delay=5, offset=120):
        keyword_log_path = "/Users/superCat/Desktop/PycharmProjectPortable/Seeds/KeywordSuggestions/"+niche.replace('/', '-')+".txt"

        def save_callback(keywords: list):
            FileHandler.append_lines_to_file(keyword_log_path, keywords, option="at")

        if len(keyword_init) == 0:
            keyword_init = list(set(FileHandler.read_lines_from_file(keyword_log_path)))[offset:]
            for item in keyword_init:
                print(item)
            print("total keywords:", len(keyword_init))
        if proxies is None:
            proxy_site = BuyProxyOrg(buy_proxy_org_account)
            proxies = proxy_site.get_proxies(timeout=5)
        current_level = 0
        keywords_pool = keyword_init
        while current_level < level:
            keyword_init = self.testGetSuggestionBatch(keyword_init, proxies=proxies, country_code=country_code,
                                                       min_delay=min_delay, max_delay=max_delay, callback=save_callback)
            keywords_pool += keyword_init
            current_level += 1
        FileHandler.remove_file_if_exist(keyword_log_path)
        FileHandler.append_lines_to_file(keyword_log_path, keywords_pool, option="t")