def testGetBlogs(self):
     niche = "Society/Law"
     proxy_site = BuyProxyOrg(buy_proxy_org_account)
     proxies = proxy_site.get_proxies(timeout=5)
     keyword_log_path = "/Users/superCat/Desktop/PycharmProjectPortable/Seeds/KeywordSuggestions/"+niche.replace('/', '-')+".txt"
     # countries = GoogleUtility.CountryCodeEnglish
     countries = ["uk", ]
     min_delay = 2
     max_delay = 5
     max_page = 2
     days_ago = 4*365
     target_keywords_init = ["legal case", "Labour law", "human rights law", "crime law", "Immigration law",
                             "Family law", "Transactional law", "Company law", "Commercial law", "Admiralty law",
                             "Intellectual property law", "international law", "tax law", "banking law", "competition law",
                             "consumer law", "environmental law"]
     suggested_keywords = []
     for country in countries:
         # temp_keywords = self.testGetSuggestionBatch(target_keywords_init, proxies=proxies,
         #                                                   country_code=country,
         #                                                   min_delay=min_delay, max_delay=max_delay)
         temp_keywords = list(set(FileHandler.read_lines_from_file(keyword_log_path)))
         # FileHandler.append_lines_to_file(keyword_log_path, temp_keywords, option="at")
         # suggested_keywords += temp_keywords
         crawl_keywords = [x for x in list(set(target_keywords_init + temp_keywords))]
         self.testGetLinksBatch_single_t(niche, keywords=crawl_keywords, page_count=max_page, index=0, length=100,
                                         country_code=country, source_type=GoogleConst.SourceTypeBlog,
                                         min_delay=min_delay, max_delay=max_delay, days_ago=days_ago,
                                         proxies=proxies, use_browser=False)
Example #2
0
    def testOllipldsfapenChrome(self):
        '''
        todo:http://stackoverflow.com/questions/29983106/how-can-i-set-proxy-with-authentication-in-selenium-chrome-web-driver-using-pyth
        :return:
        '''
        # request_url = "https://www.google.com/search?q=crimial%20law&num=100&start=0&site=webhp&tbm=blg&source=lnt&as_qdr=y5"
        request_url = "https://www.google.com/search?q=bbs&num=100&start=0&gl=us&gws_rd=cr&as_qdr=d10"
        # request_url = "https://www.whatismyip.com/"
        # request_url = "http://whatsmyuseragent.com/"
        proxy = BuyProxyOrg(buy_proxy_org_account)
        proxy_list = proxy.get_proxies(5)
        chrome_list = list()
        for item in proxy_list:
            PROXY = item.str_no_auth()
            chrome_options = webdriver.ChromeOptions()
            # PROXY = "23.95.32.92:80"
            # USER_AGENT = "i like ice cream."

            USER_AGENT = WebRequestCommonHeader.webpage_agent
            chrome_options.add_argument('--proxy-server=http://{0:s}'.format(
                PROXY, ))
            chrome_options.add_argument('--user-agent={0:s}'.format(
                USER_AGENT, ))
            chrome = webdriver.Chrome(chrome_options=chrome_options)
            chrome.get(request_url)
            chrome_list.append(chrome)
        time.sleep(60)
        for item in chrome_list:
            item.close()
    def testOllipldsfapenChrome(self):
        '''
        todo:http://stackoverflow.com/questions/29983106/how-can-i-set-proxy-with-authentication-in-selenium-chrome-web-driver-using-pyth
        :return:
        '''
        # request_url = "https://www.google.com/search?q=crimial%20law&num=100&start=0&site=webhp&tbm=blg&source=lnt&as_qdr=y5"
        request_url = "https://www.google.com/search?q=bbs&num=100&start=0&gl=us&gws_rd=cr&as_qdr=d10"
        # request_url = "https://www.whatismyip.com/"
        # request_url = "http://whatsmyuseragent.com/"
        proxy = BuyProxyOrg(buy_proxy_org_account)
        proxy_list = proxy.get_proxies(5)
        chrome_list = list()
        for item in proxy_list:
            PROXY = item.str_no_auth()
            chrome_options = webdriver.ChromeOptions()
            # PROXY = "23.95.32.92:80"
            # USER_AGENT = "i like ice cream."

            USER_AGENT = WebRequestCommonHeader.webpage_agent
            chrome_options.add_argument('--proxy-server=http://{0:s}'.format(PROXY,))
            chrome_options.add_argument('--user-agent={0:s}'.format(USER_AGENT,))
            chrome = webdriver.Chrome(chrome_options=chrome_options)
            chrome.get(request_url)
            chrome_list.append(chrome)
        time.sleep(60)
        for item in chrome_list:
            item.close()
 def testProxyGet(self):
     proxy = BuyProxyOrg(buy_proxy_org_account)
     proxy_list = proxy.get_proxies(5)
     for item in proxy_list:
         print("try proxy:", item)
         # sites = self.testGetLlinks(proxy=item)
         # for site in sites:
         #     print(site)
         time.sleep(1)
Example #5
0
 def testProxyGet(self):
     proxy = BuyProxyOrg(buy_proxy_org_account)
     proxy_list = proxy.get_proxies(5)
     for item in proxy_list:
         print("try proxy:", item)
         # sites = self.testGetLlinks(proxy=item)
         # for site in sites:
         #     print(site)
         time.sleep(1)
 def testBingResult(self):
     keyword = "law blog"
     proxy_site = BuyProxyOrg(buy_proxy_org_account)
     proxies = proxy_site.get_proxies(timeout=5)
     sites = BingCom.get_sites(keyword, page_number=1, index=0, length=100, filter_list=filter_list,
                                 country_code="us", source_type="", days_ago=10,
                                 return_domain_home_only=False, proxy=proxies[0], timeout=30)
     for item in sites:
         print(item)
     return sites
 def testBingResult(self):
     keyword = "law blog"
     proxy_site = BuyProxyOrg(buy_proxy_org_account)
     proxies = proxy_site.get_proxies(timeout=5)
     sites = BingCom.get_sites(keyword,
                               page_number=1,
                               index=0,
                               length=100,
                               filter_list=filter_list,
                               country_code="us",
                               source_type="",
                               days_ago=10,
                               return_domain_home_only=False,
                               proxy=proxies[0],
                               timeout=30)
     for item in sites:
         print(item)
     return sites
    def testProxyGetOpen(self):

        request_url = "https://www.google.com/search?q=crimial%20law&num=100&start=0&site=webhp&tbm=blg&source=lnt&as_qdr=y5"
        # request_url = "https://www.whatismyip.com/"
        proxy = BuyProxyOrg(buy_proxy_org_account)
        proxy_list = proxy.get_proxies(5)

        for item in proxy_list:
            PROXY = item.str_no_auth()
            print("try proxy:", str(item))
            chrome_options = webdriver.ChromeOptions()
            chrome_options.add_argument('--proxy-server=http://%s' % PROXY)

            chrome = webdriver.Chrome(chrome_options=chrome_options)
            # driver = WebDriver.get_chrome(additional_options=chrome_options)
            chrome.get(request_url)
            # sites = self.testGetLlinks(proxy=item)
            # for site in sites:
            #     print(site)
            time.sleep(5)
Example #9
0
    def testProxyGetOpen(self):

        request_url = "https://www.google.com/search?q=crimial%20law&num=100&start=0&site=webhp&tbm=blg&source=lnt&as_qdr=y5"
        # request_url = "https://www.whatismyip.com/"
        proxy = BuyProxyOrg(buy_proxy_org_account)
        proxy_list = proxy.get_proxies(5)

        for item in proxy_list:
            PROXY = item.str_no_auth()
            print("try proxy:", str(item))
            chrome_options = webdriver.ChromeOptions()
            chrome_options.add_argument('--proxy-server=http://%s' % PROXY)

            chrome = webdriver.Chrome(chrome_options=chrome_options)
            # driver = WebDriver.get_chrome(additional_options=chrome_options)
            chrome.get(request_url)
            # sites = self.testGetLlinks(proxy=item)
            # for site in sites:
            #     print(site)
            time.sleep(5)
    def testGetkeywordsRecursive(self, niche="Society/Law", level=1, keyword_init=[],
                                 proxies=None, country_code="us", min_delay=2, max_delay=5, offset=120):
        keyword_log_path = "/Users/superCat/Desktop/PycharmProjectPortable/Seeds/KeywordSuggestions/"+niche.replace('/', '-')+".txt"

        def save_callback(keywords: list):
            FileHandler.append_lines_to_file(keyword_log_path, keywords, option="at")

        if len(keyword_init) == 0:
            keyword_init = list(set(FileHandler.read_lines_from_file(keyword_log_path)))[offset:]
            for item in keyword_init:
                print(item)
            print("total keywords:", len(keyword_init))
        if proxies is None:
            proxy_site = BuyProxyOrg(buy_proxy_org_account)
            proxies = proxy_site.get_proxies(timeout=5)
        current_level = 0
        keywords_pool = keyword_init
        while current_level < level:
            keyword_init = self.testGetSuggestionBatch(keyword_init, proxies=proxies, country_code=country_code,
                                                       min_delay=min_delay, max_delay=max_delay, callback=save_callback)
            keywords_pool += keyword_init
            current_level += 1
        FileHandler.remove_file_if_exist(keyword_log_path)
        FileHandler.append_lines_to_file(keyword_log_path, keywords_pool, option="t")