Example #1
0
    def http_request(self, request):
        try:
            data = request.get_data()
        except AttributeError:
            data = request.data
        if data is not None and type(data) != str:
            v_files = []
            v_vars = []
            try:
                for (key, value) in list(data.items()):
                    if hasattr(value, 'read'):
                        v_files.append((key, value))
                    else:
                        v_vars.append((key, value))
            except TypeError:
                raise TypeError
            if len(v_files) == 0:
                data = urllib.parse.urlencode(v_vars, doseq)
            else:
                boundary, data = self.multipart_encode(v_vars, v_files)
                contenttype = 'multipart/form-data; boundary=%s' % boundary
                if (request.has_header('Content-Type') and request.get_header(
                        'Content-Type').find('multipart/form-data') != 0):
                    six.print_("Replacing %s with %s" %
                               (request.get_header('content-type'),
                                'multipart/form-data'))
                request.add_unredirected_header('Content-Type', contenttype)
            try:
                request.add_data(data)
            except AttributeError:
                request.data = data

        return request
Example #2
0
    def http_request(self, request):
        print(request.__dict__)
        data = request.data
        if data is not None and type(data) != str:
            v_files = []
            v_vars = []
            try:
                 for key, value in data.items():
                     if isinstance(value,IOBase):
                         v_files.append((key, value))
                     else:
                         v_vars.append((key, value))
            except TypeError:
                systype, value, traceback = sys.exc_info()
                raise TypeError("not a valid non-string sequence or mapping object")

            if len(v_files) == 0:
                data = urllib.urlencode(v_vars, doseq)
            else:
                boundary, data = MultipartPostHandler.multipart_encode(v_vars, v_files)

                contenttype = 'multipart/form-data; boundary=%s' % boundary
                if(request.has_header('Content-Type')
                   and request.get_header('Content-Type').find('multipart/form-data') != 0):
                    print("Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data'))
                request.add_unredirected_header('Content-Type', contenttype)

            request.data = data.encode('utf-8')
        
        return request
Example #3
0
    def testReset(self):
        self.wrapper.setMethod(POST)
        self.wrapper.setQuery('CONSTRUCT WHERE {?a ?b ?c}')
        self.wrapper.setReturnFormat(N3)
        self.wrapper.addParameter('a', 'b')
        self.wrapper.setOnlyConneg(True)

        request = self._get_request(self.wrapper)
        parameters = self._get_parameters_from_request(request)
        onlyConneg = self.wrapper.onlyConneg

        self.assertEqual('POST', request.get_method())
        self.assertTrue(parameters['query'][0].startswith('CONSTRUCT'))
        self.assertTrue('rdf+n3' in request.get_header('Accept'))
        self.assertTrue('a' in parameters)
        self.assertTrue(onlyConneg)

        self.wrapper.resetQuery()

        request = self._get_request(self.wrapper)
        parameters = self._get_parameters_from_request(request)
        onlyConneg = self.wrapper.onlyConneg

        self.assertEqual('GET', request.get_method())
        self.assertTrue(parameters['query'][0].startswith('SELECT'))
        self.assertFalse('rdf+n3' in request.get_header('Accept'))
        self.assertTrue('sparql-results+xml' in request.get_header('Accept'))
        self.assertFalse('a' in parameters)
        self.assertFalse('a' in parameters)
        self.assertTrue(onlyConneg)
Example #4
0
    def testReset(self):
        self.wrapper.setMethod(POST)
        self.wrapper.setQuery("CONSTRUCT WHERE {?a ?b ?c}")
        self.wrapper.setReturnFormat(N3)
        self.wrapper.addParameter("a", "b")
        self.wrapper.setOnlyConneg(True)

        request = self._get_request(self.wrapper)
        parameters = self._get_parameters_from_request(request)
        onlyConneg = self.wrapper.onlyConneg

        self.assertEqual("POST", request.get_method())
        self.assertTrue(parameters["query"][0].startswith("CONSTRUCT"))
        self.assertTrue("rdf+n3" in request.get_header("Accept"))
        self.assertTrue("a" in parameters)
        self.assertTrue(onlyConneg)

        self.wrapper.resetQuery()

        request = self._get_request(self.wrapper)
        parameters = self._get_parameters_from_request(request)
        onlyConneg = self.wrapper.onlyConneg

        self.assertEqual("GET", request.get_method())
        self.assertTrue(parameters["query"][0].startswith("SELECT"))
        self.assertFalse("rdf+n3" in request.get_header("Accept"))
        self.assertTrue("sparql-results+xml" in request.get_header("Accept"))
        self.assertFalse("a" in parameters)
        self.assertFalse("a" in parameters)
        self.assertTrue(onlyConneg)
Example #5
0
    def http_request(self, request):
        data = request.get_data()
        if data is not None and type(data) != str:
            v_files = []
            v_vars = []
            try:
                for (key, value) in list(data.items()):
                    if type(value) == file:
                        v_files.append((key, value))
                    else:
                        v_vars.append((key, value))
            except TypeError:
                systype, value, traceback = sys.exc_info()
                raise TypeError(
                    "not a valid non-string sequence or mapping object"
                ).with_traceback(traceback)

            if len(v_files) == 0:
                data = urllib.parse.urlencode(v_vars, doseq)
            else:
                boundary, data = self.multipart_encode(v_vars, v_files)
                contenttype = 'multipart/form-data; boundary=%s' % boundary
                if (request.has_header('Content-Type') and request.get_header(
                        'Content-Type').find('multipart/form-data') != 0):
                    print("Replacing %s with %s" %
                          (request.get_header('content-type'),
                           'multipart/form-data'))
                request.add_unredirected_header('Content-Type', contenttype)

            request.add_data(data)
        return request
Example #6
0
 def http_request(self, request):
     data = request.get_data()
     if data is not None and type(data) != str:
         v_files = []
         v_vars = []
         try:
             for(key, value) in list(data.items()):
                 if hasattr(value, 'read'):
                     v_files.append((key, value))
                 else:
                     v_vars.append((key, value))
         except TypeError:
             raise TypeError
         if len(v_files) == 0:
             data = urllib.parse.urlencode(v_vars, doseq)
         else:
             boundary, data = self.multipart_encode(v_vars, v_files)
             contenttype = 'multipart/form-data; boundary=%s' % boundary
             if (
                 request.has_header('Content-Type') and
                 request.get_header('Content-Type').find(
                     'multipart/form-data') != 0
             ):
                 six.print_(
                     "Replacing %s with %s" % (
                         request.get_header('content-type'),
                         'multipart/form-data'
                     )
                 )
             request.add_unredirected_header('Content-Type', contenttype)
         request.add_data(data)
     return request
    def _assert_request(self, request):
        self.assertEqual('UTF-8,*', request.get_header('Accept-charset'))
        self.assertEqual('en-us,en;q=0.7,fr;q=0.3',
                         request.get_header('Accept-language'))
        self.assertEqual('gzip', request.get_header('Accept-encoding'))
        self.assertEqual(
            'Mozilla/5.0 (X11; U; Linux x86_64; en-US; ' +
            'rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick)' +
            ' Firefox/3.6.18', request.get_header('User-agent'))

        self.assertEqual(4, len(request.headers))
Example #8
0
def test_proper_headers_are_added(mock_urlopen):
    mock_urlopen.return_value = urlopen_accepted_response()
    message_1 = 'Test Message 1'
    event = aws_log_events.create_aws_event([message_1])

    function.lambda_handler(event, context)

    # Note that header names are somehow lower-cased
    mock_urlopen.assert_called()
    request = mock_urlopen.call_args[0][0]
    assert request.get_header('X-insert-key') == logging_api_key
    assert request.get_header('X-event-source') == 'logs'
    assert request.get_header('Content-encoding') == 'gzip'
Example #9
0
def load_baidu():
    url= "https://search.51job.com/list/030200,000000,0000,00,9,99,%2B,2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare="
    header = {
        #浏览器的版本
        "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36",
        # "haha":"hehe"
    }


    #创建请求对象
    request = urllib.request.Request(url,headers=header)
    #动态的去添加head的信息
    # request.add_header("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36")
    #请求网络数据(不在此处增加请求头信息因为此方法系统没有提供参数)
    response = urllib.request.urlopen(request)
    print(response)
    data = response.read().decode("GBK")
    print(data)

    #获取到完整的url
    # final_url = request.get_full_url()
    # print(final_url)

    with open("code01.html",'w') as f:
        f.write(data)
    #响应头
    # print(response.headers)
    #获取请求头的信息(所有的头的信息)
    # request_headers = request.headers
    # print(request_headers)
    #(2)第二种方式打印headers的信息
    #注意点:首字母需要大写,其他字母都小写
    request_headers = request.get_header("User-agent")
Example #10
0
 def test_instagram_oembed_return_values(self, urlopen):
     urlopen.return_value = self.dummy_response
     result = InstagramOEmbedFinder(
         app_id='123',
         app_secret='abc').find_embed("https://instagr.am/p/CHeRxmnDSYe/")
     self.assertEqual(
         result, {
             'type': 'something',
             'title': 'test_title',
             'author_name': 'test_author',
             'provider_name': 'Instagram',
             'thumbnail_url': 'test_thumbail_url',
             'width': 'test_width',
             'height': 'test_height',
             'html':
             '<blockquote class="instagram-media">Content</blockquote>'
         })
     # check that a request was made with the expected URL / authentication
     request = urlopen.call_args[0][0]
     # check that a request was made with the expected URL / authentication
     request = urlopen.call_args[0][0]
     self.assertEqual(
         request.get_full_url(),
         "https://graph.facebook.com/v9.0/instagram_oembed?url=https%3A%2F%2Finstagr.am%2Fp%2FCHeRxmnDSYe%2F&format=json"
     )
     self.assertEqual(request.get_header('Authorization'), "Bearer 123|abc")
Example #11
0
def load_baidu():
    url = "http://www.baidu.com"

    header = {
        # 浏览器基本信息
        "User-Agent":
        'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36'
    }
    # request.get_header('User-Agent', "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36")

    request = urllib.request.Request(url, headers=header)
    response = urllib.request.urlopen(request)
    print(response)
    data = response.read().decode('utf-8')

    # 获取完整的url
    final_url = request.get_full_url()

    #响应头
    #print(response.headers)

    #获取响应头的信息
    # request_header = request.headers

    request_header = request.get_header('User-agent')
    print(request_header)
    with open("02header.html", "w", encoding='utf-8') as f:
        f.write(data)
Example #12
0
def load_baidu():
    url = "http://www.baidu.com"
    user_agent = ["Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
                  "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50",
                  "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0",
                  "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1",
                  "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11",
                  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11",
                  "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0",
                  "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE",
                  "User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)"


    ]
    # 每次请求的浏览器都不一样
    random_user_agent = random.choice(user_agent)
    # 创建request对象
    request = urllib.request.Request(url)
    # 添加头部信息
    request.add_header("User-Agent",random_user_agent)
    # 发送请求,接收响应
    response = urllib.request.urlopen(request)
    print(response)#<http.client.HTTPResponse object at 0x05029D90>
    # 获取响应内容
    print(response.read())
    # 获取响应的头部信息,如果没有添加头部信息。则是Python-urllib/3.7
    print(request.get_header("User-agent"))
Example #13
0
def load_data():
    url = "https://www.baidu.com"

    header = {
        # 浏览器,用户信息
        # 此处通过fake-useragent获取一个随机的,合理的User-Agent
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0",
        "aim": "an offer"
    }

    # type1
    request = urllib.request.Request(url, headers=header)
    # type2
    # # 创建请求对象
    # request = urllib.request.Request(url)
    # # 为请求对象添加header信息
    # request.add_header("User-Agent",ua.random)

    # 请求网络数据
    response = urllib.request.urlopen(request)
    print(response, end="\n\n")
    str_data = response.read().decode("utf-8")

    # 获取整个请求头的信息
    _request_headers = request.headers
    print(_request_headers, end="\n\n")
    # 获取请求头的某个信息
    # 注意点:首字母大写,其他字母小写
    _useragent = request.get_header("User-agent")
    print(_useragent, end="\n\n")
    return str_data
Example #14
0
 def test_facebook_oembed_return_values(self, urlopen):
     urlopen.return_value = self.dummy_response
     result = FacebookOEmbedFinder(
         app_id="123",
         app_secret="abc").find_embed("https://fb.watch/ABC123eew/")
     self.assertEqual(
         result,
         {
             "type": "something",
             "title": "test_title",
             "author_name": "test_author",
             "provider_name": "Facebook",
             "thumbnail_url": None,
             "width": "test_width",
             "height": "test_height",
             "html":
             '<blockquote class="facebook-media">Content</blockquote>',
         },
     )
     # check that a request was made with the expected URL / authentication
     request = urlopen.call_args[0][0]
     self.assertEqual(
         request.get_full_url(),
         "https://graph.facebook.com/v11.0/oembed_video?url=https%3A%2F%2Ffb.watch%2FABC123eew%2F&format=json",
     )
     self.assertEqual(request.get_header("Authorization"), "Bearer 123|abc")
def load_baidu():
    url = "https://www.baidu.com"
    # 添加请求头信息
    header = {
        # 浏览器的版本
        "User-Agent":
        "Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36"
    }

    # 创建请求对象(添加headers方式一)
    #request = urllib.request.Request(url, headers=header)
    # 创建请求对象(添加headers方式二)
    request = urllib.request.Request(url)
    request.add_header(
        "User-Agent",
        "Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36"
    )

    final_url = request.get_full_url()
    print(final_url)
    # 请求网络数据
    response = urllib.request.urlopen(request)
    data = response.read().decode("utf-8")

    # 第一种获取请求头的信息
    request_headers1 = request.headers
    # print(request_headers1)

    # 第二种获取请求头的信息(首字母大写,其他字母小写)
    request_headers2 = request.get_header("User-agent")
    # print(request_headers2)

    # 将数据写入文件 超文本写入用字符串 视频音频用wb
    with open("load_baidu.html", "w", encoding="utf-8") as f:
        f.write(data)
def load_baidu():
    url = "https://www.baidu.com"
    header = {
        # 浏览器版本,告诉浏览器我是真实的用户
        "User-Agent":
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
    }

    # 创建请求对象
    request = urllib.request.Request(url, headers=header)
    # print(request)

    # 动态的添加请求头信息
    request.add_header(
        "User-Agent",
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
    )

    # 获取请求头信息(打印所有头的信息)
    request_header = request.headers
    print(request_header)

    # 第二种方式打印headers信息
    print(request.get_header("User-agent"))

    # 获取完整的url
    print(request.get_full_url())

    # 请求网络数据(不在此处增加请求头,系统没有此参数)
    response = urllib.request.urlopen(request)
    print(response)
    data = response.read().decode("utf-8")
    with open("02header.html", "w", encoding="utf-8") as f:
        f.write(data)
def load_data():
    url = "https://www.baidu.com/"  #有useragent时可以加s
    user_agent_list = [
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
    ]

    #创建请求头信息
    #request = urllib.request.Request(url,headers=header)
    # 动态添加headers信息
    request = urllib.request.Request(url)
    rand_user_agent = random.choice(user_agent_list)
    request.add_header("User-Agent",rand_user_agent)
    #请求网络数据
    #响应头
    response = urllib.request.urlopen(request)  #request包含url信息
    data = response.read().decode("utf-8")
    #获取完整url
    final_url = request.get_full_url()
    print(2,final_url) 
   # print(response.headers)
    request_headers = request.headers
    print(request_headers)
    #打印特定信息  !!!!!!首字母大写,其他字母都要小写,不然返回none
    request_header = request.get_header("User-agent")
    print(request_header)
    with open("02_headers.html","w",encoding="utf-8") as f:
        f.write(data)
Example #18
0
def load_baidu():
    url = "http://www.baidu.com"
    response = urllib.request.urlopen(url)
    # 创建请求对象
    # headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36"}
    # 动态添加headers信息
    request = urllib.request.Request(url)
    # 动态添加请求头
    request.add_header(
        "User_Agent",
        "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36"
    )
    data = response.read().decode("utf-8")
    with open("data.html", "w", encoding="utf-8") as f:
        f.write(data)
    # 查看响应头信息
    # print(response.headers)
    #第二种打印headers的方法
    ret = request.get_header("User-Agent")
    # 获取完整的url
    final_url = request.get_full_url()

    print(final_url)
    #获取请求头信息
    request_headers = request.headers
Example #19
0
    def __call__(self):
        if not self.follow_symlinks and os.path.islink(self.path):
            raise errors.TraversalError(private_msg="Path %r is a symlink" %
                                        self.path)
        request = quixote.get_request()
        response = quixote.get_response()

        if self.cache_time is None:
            response.set_expires(None)  # don't set the Expires header
        else:
            # explicitly allow client to cache page by setting the Expires
            # header, this is even more efficient than the using
            # Last-Modified/If-Modified-Since since the browser does not need
            # to contact the server
            response.set_expires(seconds=self.cache_time)

        try:
            stat = os.stat(self.path)
        except OSError:
            raise errors.TraversalError
        last_modified = formatdate(stat.st_mtime, usegmt=True)
        if last_modified == request.get_header('If-Modified-Since'):
            # handle exact match of If-Modified-Since header
            response.set_status(304)
            return ''

        # Set the Content-Type for the response and return the file's contents.
        response.set_content_type(self.mime_type)
        if self.encoding:
            response.set_header("Content-Encoding", self.encoding)

        response.set_header('Last-Modified', last_modified)

        return FileStream(open(self.path, 'rb'), stat.st_size)
Example #20
0
def load_baidu():
    url = "http://www.baidu.com"

    #添加请求头信息
    header = {
        "User-Agent":
        "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36"
    }
    #创建 指定url的请求对象,添加ua
    request = urllib.request.Request(url, headers=header)

    request = urllib.request.Request(url)
    #动态添加UA
    #request.add_header("User-agent","Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36")

    #请求网络数据
    respons = urllib.request.urlopen(request)
    data = respons.read().decode("utf-8")
    with open("hesders.html", "w", encoding="utf-8") as f:
        f.write(data)

    #响应头
    #print(respons.headers)
    #注意,要求首字母大写,其它字母小写
    request_header = request.get_header("User-agent")
    print(request_header)
    #获取完整的url
    final_url = request.get_full_url()
    print(final_url)
def load_data():
    url = "https://www.baidu.com/"
    headers = {
        "User-Agent":
        "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
        "HAHA": "hehe"
    }
    # 创造请求对象
    request = urllib.request.Request(url, headers=headers)
    # 还可以利用add_headerd来添加信息
    # request.add_header("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36")

    # 可以利用请求报文来作为url
    response = urllib.request.urlopen(request)
    str_response = response.read().decode("utf-8")

    # 请求头信息
    request_headers = request.headers
    print(request_headers['User-agent'])
    # 还可以调用get_header()来获取header内容(注意首字母大写)
    print(request.get_header("Haha"))

    print(str_response)
    # 利用该方法获取url连接
    # print(request.get_full_url())

    with open("2.html", "w", encoding="utf-8") as f:
        f.write(str_response)
Example #22
0
def load_baidu():
    url = "https://www.baidu.com"
    # 添加请求头的信息
    header = {
        # 浏览器的版本
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36",
        "ha": "he"
    }
    # 创建请求对象
    request = urllib.request.Request(url)
    request.add_header("user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36")
    # 请求网络数据
    response = urllib.request.urlopen(request)
    print(response)
    data = response.read().decode("utf-8")

    # 获取到完整的url
    final_url = request.get_full_url()
    print(final_url)
    # 获取响应头
    # print(response.headers)
    # request_headers = request.headers
    # print(request_headers)
    # 第二种方式打印headers的信息
    request_headers = request.get_header("User-agent")
    print(request_headers)
    with open("02header.html", "w") as f:
        f.write(data)
Example #23
0
def http_request(request):
    data = request.data
    if data is not None and type(data) != str:  
        v_files = []  
        v_vars = []  
        try:  
            for(key, value) in data.items():
                if type(value) == io.BufferedReader:  
                    v_files.append((key, value))  
                else:  
                    v_vars.append((key, value))  
        except TypeError:  
            systype, value, traceback = sys.exc_info()  
            raise TypeError
            
        if len(v_files) == 0:  
            data = urllib.urlencode(v_vars, doseq)  
        else:  
            boundary, data = multipart_encode(v_vars, v_files)
            contenttype = 'multipart/form-data; boundary=%s' % boundary
            if( request.has_header('Content-Type') and request.get_header['Content-Type'].find('multipart/form-data') != 0):  
                print("Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data'))
            request.add_unredirected_header('Content-Type', contenttype)
        request.data = data.encode('utf-8')
    return request
Example #24
0
 def test_instagram_oembed_return_values(self, urlopen):
     urlopen.return_value = self.dummy_response
     result = InstagramOEmbedFinder(
         app_id="123",
         app_secret="abc").find_embed("https://instagr.am/p/CHeRxmnDSYe/")
     self.assertEqual(
         result,
         {
             "type": "something",
             "title": "test_title",
             "author_name": "test_author",
             "provider_name": "Instagram",
             "thumbnail_url": "test_thumbail_url",
             "width": "test_width",
             "height": "test_height",
             "html":
             '<blockquote class="instagram-media">Content</blockquote>',
         },
     )
     # check that a request was made with the expected URL / authentication
     request = urlopen.call_args[0][0]
     self.assertEqual(
         request.get_full_url(),
         "https://graph.facebook.com/v11.0/instagram_oembed?url=https%3A%2F%2Finstagr.am%2Fp%2FCHeRxmnDSYe%2F&format=json",
     )
     self.assertEqual(request.get_header("Authorization"), "Bearer 123|abc")
Example #25
0
def scrapy_picture():
    request = urllib.request.Request(url)

    # 也可以通过调用Request.add_header() 添加/修改一个特定的header
    request.add_header("User-Agent", user_agent)
    # 第一个字母大写,后面的全部小写
    request.get_header("User-agent")

    response = urllib.request.urlopen(request)
    data = response.read().decode('utf-8')
    dic_data = json.loads(data)
    image_url = u'https://cn.bing.com'+dic_data.get('images')[0].get('url')
    name = dic_data.get('images')[0].get('copyright').replace(' ', '').replace('/','&')
    date = dic_data.get('images')[0].get('startdate')
    image_name = date+','+name
    print(image_name)
    return image_url,image_name
Example #26
0
 def ensure_content_type(self, request):
     """Get 'Content-type' header or default it to JSON."""
     # pylint: disable=no-self-use
     # pylint: disable=fixme
     # TODO Refactor HTTP request sending into separate module.
     # see https://github.com/raphaelhuefner/allbar/issues/1
     if not request.has_header('Content-type'):
         request.add_header('Content-type', 'application/json')
     return request.get_header('Content-type')
Example #27
0
 def http_request(self, request):
     if not request.has_header('Cookie'):
         request.add_unredirected_header('Cookie', self.cookie)
         request.add_header('Cookie', self.cookie)
     else:
         cookie = request.get_header('Cookie')
         request.add_unredirected_header('Cookie', cookie + '; ' + cookie)
     self.cookiejar.add_cookie_header(request)
     return request
Example #28
0
def add_header():
    url = "https://www.baidu.com/"
    header = {
        # 添加用户信息的请求头,模仿真实用户的访问
        "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36",
        "abc": "hahahhahah"
    }
    request = urllib.request.Request(url, headers=header)
    # 获取全部请求头
    print(request.headers)
    # 获取指定请求头
    print(request.get_header("User-agent"))
    print(request.get_header("Abc"))
    # 动态添加请求头
    request.add_header("123", "lalalallalal")
    print(request.get_header("123"))
    response = urllib.request.urlopen(request)
    print(response)
Example #29
0
    def download_media(self, the_url):
        '''
        Download the media file referenced by the_url
        Returns the path to the downloaded file
        :param the_url:
        '''

        def get_filename_from_cd(content_disp):
            '''
            Get filename from Content-Disposition
            :param content_disp:
            '''

            to_return = None

            if content_disp:
                fname = re.findall('filename=(.+)', content_disp)

                if fname:
                    to_return = fname[0]

            return to_return


        def get_filename_from_response(the_response):
            '''
            Attempt to get the filename from the response
            :param the_response:
            '''

            url_parts = urllib.parse.urlparse(the_response.geturl())
            to_return = posixpath.basename(url_parts.path)
            # Sanity check
            if not re.match(r'^[\w-]+\.(jpg|jpeg|gif|png)$',
                            to_return, re.IGNORECASE):
                # Nope, "bad" filename
                logging.error("Invalid media filename '%s' - ignoring",
                              to_return)
                to_return = ''

            return to_return

        request = urllib.request.Request(the_url)
        request.add_header('User-Agent', 'Mozilla/5.0')
        response = urllib.request.urlopen(request)
        filename = get_filename_from_cd(
            request.get_header('Content-Disposition')) or \
            get_filename_from_response(response) or \
            'random.jpg'

        media_dir = os.getenv('MEDIA_DIR', '/tmp')
        full_path = media_dir + '/' + filename
        logging.info("Downloading %s as %s...", the_url, full_path)
        with open(full_path, 'wb') as file_chunk:
            file_chunk.write(response.read())

        return full_path
Example #30
0
    def test_issue16464(self):
        # See https://bugs.python.org/issue16464
        # and https://bugs.python.org/issue46648
        handler = self.start_server([
            (200, [], b'any'),
            (200, [], b'any'),
        ])
        opener = urllib.request.build_opener()
        request = urllib.request.Request("http://localhost:%s" % handler.port)
        self.assertEqual(None, request.data)

        opener.open(request, "1".encode("us-ascii"))
        self.assertEqual(b"1", request.data)
        self.assertEqual("1", request.get_header("Content-length"))

        opener.open(request, "1234567890".encode("us-ascii"))
        self.assertEqual(b"1234567890", request.data)
        self.assertEqual("10", request.get_header("Content-length"))
Example #31
0
def http_post(url, para):
    user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
    #headers = {'User-Agent': user_agent}
    request = urllib.request.Request(url, para)
    #request.headers = headers
    print(request.get_full_url())
    print(request.data)
    print(request.get_header('User-Agent'))
    response = urllib.request.urlopen(request)
    print(response.read())
Example #32
0
    def formPost(self, show_error=True):
        responses = []
        urlenc_data = ""
        if ((self.post_data != None) and (self.post_data is not self.No_Data)):
            urlenc_data = urllib.parse.urlencode(
                self.post_data).encode("utf-8")
            #print("data looks like:" + str(urlenc_data))
            request = urllib.request.Request(url=self.requested_url,
                                             data=urlenc_data,
                                             headers=self.default_headers)
        else:
            request = urllib.request.Request(url=self.requested_url,
                                             headers=self.default_headers)
            print("No data for this jsonPost?")

        request.headers['Content-type'] = 'application/x-www-form-urlencoded'
        try:
            responses.append(urllib.request.urlopen(request))
            return responses[0]
        except urllib.error.HTTPError as error:
            if (show_error):
                print(
                    "-------------------------------------------------------------"
                )
                print("%s URL: %s" % (error.code, request.get_full_url()))
                #print("Error: ", sys.exc_info()[0])
                print("Request URL:", request.get_full_url())
                print("Request Content-type:",
                      request.get_header('Content-type'))
                print("Request Accept:", request.get_header('Accept'))
                print("Request Data:")
                LFUtils.debug_printer.pprint(request.data)
                if (len(responses) > 0):
                    print(
                        "-------------------------------------------------------------"
                    )
                    print("Response:")
                    LFUtils.debug_printer.pprint(responses[0].reason)
                    print(
                        "-------------------------------------------------------------"
                    )

        return None
Example #33
0
    def testSetCredentials(self):
        request = self._get_request(self.wrapper)
        self.assertFalse(request.has_header('Authorization'))

        self.wrapper.setCredentials('login', 'password')
        request = self._get_request(self.wrapper)
        self.assertTrue(request.has_header('Authorization'))

        # expected header for login:password
        # should succeed for python 3 since pull request #72
        self.assertEqual("Basic bG9naW46cGFzc3dvcmQ=", request.get_header('Authorization'))
Example #34
0
  def test_extra_headers(self):
    """You can pass in extra headers and they go into the request object."""

    request = feedparser._build_urllib2_request(
      'http://example.com/feed',
      'agent-name',
      None, None, None, None,
      {'Cache-Control': 'max-age=0'})
    # nb, urllib2 folds the case of the headers
    self.assertEquals(
      request.get_header('Cache-control'), 'max-age=0')
Example #35
0
 def test_custom_headers(self):
     url = "http://www.example.com"
     with support.transient_internet(url):
         opener = urllib.request.build_opener()
         request = urllib.request.Request(url)
         self.assertFalse(request.header_items())
         opener.open(request)
         self.assertTrue(request.header_items())
         self.assertTrue(request.has_header('User-agent'))
         request.add_header('User-Agent','Test-Agent')
         opener.open(request)
         self.assertEqual(request.get_header('User-agent'),'Test-Agent')
Example #36
0
    def http_request(self, request):
        data = request.get_data()
        def isfiledata(p_str):
            import re

            r_c = re.compile("^f'(.*)'$")
            rert = r_c.search(str(p_str))
            #rert = re.search("^f'(.*)'$", p_str)
            if rert:
                return rert.group(1)
            else:
                return None

        if data is not None and type(data) != str:
            v_files = []
            v_vars = []
            try:
                 for(key, value) in list(data.items()):
                     if  isfiledata(value):                       # type(value) == file:
                         v_files.append((key, value))
                     else:
                         v_vars.append((key, value))
            except TypeError:
                systype, value, traceback = sys.exc_info()
                raise TypeError("not a valid non-string sequence or mapping object").with_traceback(traceback)

            if len(v_files) == 0:
                data = urllib.parse.urlencode(v_vars, doseq)
            else:
                boundary, data = self.multipart_encode(v_vars, v_files)

                contenttype = 'multipart/form-data; boundary=%s' % boundary
                if(request.has_header('Content-Type')
                   and request.get_header('Content-Type').find('multipart/form-data') != 0):
                    print("Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data'))
                request.add_unredirected_header('Content-Type', contenttype)

            request.add_data(data)
        
        return request
Example #37
0
def statuses_mentions(query=None):
    base_url = "http://api.twitter.com/1/statuses/mentions.json"
    parameters = generate_base_data()
    query_string = ""
    if not query == None:
        query_string = add_params(query)
        parameters.extend(query)

    request = urllib.request.Request(base_url + query_string)
    signature = myoauth.oauth_sign(
        request.get_method(), base_url, parameters, OAUTH_CONSUMER_SECRET, OAUTH_TOKEN_SECRET
    )
    header_string = generate_header_string(parameters, [["oauth_signature", signature]])
    request.add_header("Authorization", header_string)

    print(request.get_header("Authorization"))
    print(request.get_method())
    print(request.get_full_url())
    return urllib.request.urlopen(request)
Example #38
0
        def http_request(self, request):
            scheme = request.get_type()
            if scheme not in ["http", "https"]:
                # robots exclusion only applies to HTTP
                return request

            if request.get_selector() == "/robots.txt":
                # /robots.txt is always OK to fetch
                return request

            host = request.get_host()

            # robots.txt requests don't need to be allowed by robots.txt :-)
            origin_req = getattr(request, "_origin_req", None)
            if (origin_req is not None and
                origin_req.get_selector() == "/robots.txt" and
                origin_req.get_host() == host
                ):
                return request

            if host != self._host:
                self.rfp = self.rfp_class()
                try:
                    self.rfp.set_opener(self.parent)
                except AttributeError:
                    debug("%r instance does not support set_opener" %
                          self.rfp.__class__)
                self.rfp.set_url(scheme+"://"+host+"/robots.txt")
                self.rfp.read()
                self._host = host

            ua = request.get_header("User-agent", "")
            if self.rfp.can_fetch(ua, request.get_full_url()):
                return request
            else:
                # XXX This should really have raised URLError.  Too late now...
                msg = "request disallowed by robots.txt"
                raise RobotExclusionError(
                    request,
                    request.get_full_url(),
                    403, msg,
                    self.http_response_class(StringIO()), StringIO(msg))
Example #39
0
request.add_header('Origin','http://www.diaochapai.com')
request.add_header('Accept','*/*')
request.add_header('Accept-Language','en-US,en;q=0.8')
request.add_header('X-Requested-With','XMLHttpRequest')
data = {"captcha":"24109","response":{"3271eabe-1200-4db3-b01c-8397c91fca20":{"choice":["c6cfa9a5-8832-421b-bcd8-1ef22e84b3dc"],"specify":{}}}}





p = re.compile("_vid=")
def Getvid(number):
    vid_list=[]
    for i in range(number):
        r = requests.get("http://www.diaochapai.com/survey/adc99e84-22fd-4de0-ac30-6e77e6347952")
        if r.status_code == 200:
            for cookie in r.cookies:
                match = p.search(str(cookie))
                if match:
                    vid_list.append(str(cookie)[13:50])
    return vid_list
vid_list = Getvid(100)
for i in range(100):
    request.add_header('Cookie','_vid=%s;captcha_token=bb238ba1-9e3d-42e8-8bc7-50db0cc9649a'%(vid_list[i]))
    try:
        response =opener.open(request,json.dumps(data).encode('utf-8'))
        html = response.read().decode('utf-8')
        print (vid_list[i],request.get_header("Cookie"))
    except urllib.error.HTTPError as e:
        print ("reason:",e.reason,"code:",e.code,"headers:",e.headers)