Python Headers.get_proxy Examples

Programming Language: Python

Namespace/Package Name: headers

Class/Type: Headers

Method/Function: get_proxy

Examples at hotexamples.com: 2

Python Headers.get_proxy - 2 examples found. These are the top rated real world Python examples of headers.Headers.get_proxy extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Headers(30)

get(22)

getHeaders(15)

setdefault(3)

add_header(2)

get_proxy(2)

get_user_agent(2)

set(2)

add(1)

get_header_node(1)

haskey(1)

importRaw(1)

items(1)

to_headers(1)

update(1)

Example #1

Show file

File: custom_crawler.py Project: Ana-Newzera/crawling_project

    def get_page(url):
        retries = 1
        for i in range(10):
            print("Try : ", i, url)
            try:
                userAgentDict = Headers.get_user_agent()
                proxyDict = Headers.get_proxy()
                print(url)

                response = requests.get(url,
                                        proxies=proxyDict,
                                        headers=userAgentDict,
                                        timeout=10)

                status = response.status_code
                print(status)
                if status == 200:
                    return response.text
                print("Got the response code:", status)
            except Exception as e:
                if (i == 9):
                    send_email(
                        "Exception  occurred for this url while getting the html for archive :"
                        + url + "the error: " + e)
                print(
                    "Exception  occurred for this url while getting the html for archive :",
                    url, "the error: ", e)
                wait = retries * 2
                time.sleep(wait)
                retries += 1
        return ""

Example #2

Show file

File: simple_crawler.py Project: Ana-Newzera/crawling_project

 def _fetch_url(url, is_threaded, timeout=None, crawl_type=None):
     """
     Crawls the html content of the parameter url and saves the html in _results
     :param url:
     :param is_threaded: If True, results will be stored for later processing by the fetch_urls method. Else not.
     :param timeout: in seconds, if None, the urllib default is used
     :return: html of the url
     """
     # print("----------------------------------------Entered _fetch_urls", url)
     html = ""
     response = None
     for i in range(10):
         retries = 1
         print("Try to fetch the html:", i, "url:", url)
         try:
             userAgentDict = Headers.get_user_agent()
             if crawl_type == "Archives":  # using proxy only for Archives
                 # print("It is inside")
                 proxyDict = Headers.get_proxy()
                 response = requests.get(
                     url, proxies=proxyDict, headers=userAgentDict, timeout=10)
             else:
                 response = requests.get(
                     url, headers=userAgentDict, timeout=10)
             status = response.status_code
             print(url, status)
             if status == 200:
                 print("[Successful]", url)
                 html = response.text
                 break
         except Exception as e:
             print(
                 "Exception occurred for this url while getting the html for this archive article", url)
             print("the error: ", e)
             wait = retries*2
             time.sleep(wait)
             retries += 1
     if html == "":
         if response == None:
             raise Exception(
                 "[Error]Could not get the html file for this article url: "+url)
         else:
             raise Exception("[Error]Got the status code : ", str(status) + "\n" +
                             "[Error]Could not get the html file for this article url: "+url, response.text)
     if is_threaded:
         SimpleCrawler._results[url] = html
     # print(html)
     return html