def http_request(self, request): try: data = request.get_data() except AttributeError: data = request.data if data is not None and type(data) != str: v_files = [] v_vars = [] try: for (key, value) in list(data.items()): if hasattr(value, 'read'): v_files.append((key, value)) else: v_vars.append((key, value)) except TypeError: raise TypeError if len(v_files) == 0: data = urllib.parse.urlencode(v_vars, doseq) else: boundary, data = self.multipart_encode(v_vars, v_files) contenttype = 'multipart/form-data; boundary=%s' % boundary if (request.has_header('Content-Type') and request.get_header( 'Content-Type').find('multipart/form-data') != 0): six.print_("Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data')) request.add_unredirected_header('Content-Type', contenttype) try: request.add_data(data) except AttributeError: request.data = data return request
def http_request(self, request): print(request.__dict__) data = request.data if data is not None and type(data) != str: v_files = [] v_vars = [] try: for key, value in data.items(): if isinstance(value,IOBase): v_files.append((key, value)) else: v_vars.append((key, value)) except TypeError: systype, value, traceback = sys.exc_info() raise TypeError("not a valid non-string sequence or mapping object") if len(v_files) == 0: data = urllib.urlencode(v_vars, doseq) else: boundary, data = MultipartPostHandler.multipart_encode(v_vars, v_files) contenttype = 'multipart/form-data; boundary=%s' % boundary if(request.has_header('Content-Type') and request.get_header('Content-Type').find('multipart/form-data') != 0): print("Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data')) request.add_unredirected_header('Content-Type', contenttype) request.data = data.encode('utf-8') return request
def testReset(self): self.wrapper.setMethod(POST) self.wrapper.setQuery('CONSTRUCT WHERE {?a ?b ?c}') self.wrapper.setReturnFormat(N3) self.wrapper.addParameter('a', 'b') self.wrapper.setOnlyConneg(True) request = self._get_request(self.wrapper) parameters = self._get_parameters_from_request(request) onlyConneg = self.wrapper.onlyConneg self.assertEqual('POST', request.get_method()) self.assertTrue(parameters['query'][0].startswith('CONSTRUCT')) self.assertTrue('rdf+n3' in request.get_header('Accept')) self.assertTrue('a' in parameters) self.assertTrue(onlyConneg) self.wrapper.resetQuery() request = self._get_request(self.wrapper) parameters = self._get_parameters_from_request(request) onlyConneg = self.wrapper.onlyConneg self.assertEqual('GET', request.get_method()) self.assertTrue(parameters['query'][0].startswith('SELECT')) self.assertFalse('rdf+n3' in request.get_header('Accept')) self.assertTrue('sparql-results+xml' in request.get_header('Accept')) self.assertFalse('a' in parameters) self.assertFalse('a' in parameters) self.assertTrue(onlyConneg)
def testReset(self): self.wrapper.setMethod(POST) self.wrapper.setQuery("CONSTRUCT WHERE {?a ?b ?c}") self.wrapper.setReturnFormat(N3) self.wrapper.addParameter("a", "b") self.wrapper.setOnlyConneg(True) request = self._get_request(self.wrapper) parameters = self._get_parameters_from_request(request) onlyConneg = self.wrapper.onlyConneg self.assertEqual("POST", request.get_method()) self.assertTrue(parameters["query"][0].startswith("CONSTRUCT")) self.assertTrue("rdf+n3" in request.get_header("Accept")) self.assertTrue("a" in parameters) self.assertTrue(onlyConneg) self.wrapper.resetQuery() request = self._get_request(self.wrapper) parameters = self._get_parameters_from_request(request) onlyConneg = self.wrapper.onlyConneg self.assertEqual("GET", request.get_method()) self.assertTrue(parameters["query"][0].startswith("SELECT")) self.assertFalse("rdf+n3" in request.get_header("Accept")) self.assertTrue("sparql-results+xml" in request.get_header("Accept")) self.assertFalse("a" in parameters) self.assertFalse("a" in parameters) self.assertTrue(onlyConneg)
def http_request(self, request): data = request.get_data() if data is not None and type(data) != str: v_files = [] v_vars = [] try: for (key, value) in list(data.items()): if type(value) == file: v_files.append((key, value)) else: v_vars.append((key, value)) except TypeError: systype, value, traceback = sys.exc_info() raise TypeError( "not a valid non-string sequence or mapping object" ).with_traceback(traceback) if len(v_files) == 0: data = urllib.parse.urlencode(v_vars, doseq) else: boundary, data = self.multipart_encode(v_vars, v_files) contenttype = 'multipart/form-data; boundary=%s' % boundary if (request.has_header('Content-Type') and request.get_header( 'Content-Type').find('multipart/form-data') != 0): print("Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data')) request.add_unredirected_header('Content-Type', contenttype) request.add_data(data) return request
def http_request(self, request): data = request.get_data() if data is not None and type(data) != str: v_files = [] v_vars = [] try: for(key, value) in list(data.items()): if hasattr(value, 'read'): v_files.append((key, value)) else: v_vars.append((key, value)) except TypeError: raise TypeError if len(v_files) == 0: data = urllib.parse.urlencode(v_vars, doseq) else: boundary, data = self.multipart_encode(v_vars, v_files) contenttype = 'multipart/form-data; boundary=%s' % boundary if ( request.has_header('Content-Type') and request.get_header('Content-Type').find( 'multipart/form-data') != 0 ): six.print_( "Replacing %s with %s" % ( request.get_header('content-type'), 'multipart/form-data' ) ) request.add_unredirected_header('Content-Type', contenttype) request.add_data(data) return request
def _assert_request(self, request): self.assertEqual('UTF-8,*', request.get_header('Accept-charset')) self.assertEqual('en-us,en;q=0.7,fr;q=0.3', request.get_header('Accept-language')) self.assertEqual('gzip', request.get_header('Accept-encoding')) self.assertEqual( 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; ' + 'rv:1.9.2.18) Gecko/20110628 Ubuntu/10.10 (maverick)' + ' Firefox/3.6.18', request.get_header('User-agent')) self.assertEqual(4, len(request.headers))
def test_proper_headers_are_added(mock_urlopen): mock_urlopen.return_value = urlopen_accepted_response() message_1 = 'Test Message 1' event = aws_log_events.create_aws_event([message_1]) function.lambda_handler(event, context) # Note that header names are somehow lower-cased mock_urlopen.assert_called() request = mock_urlopen.call_args[0][0] assert request.get_header('X-insert-key') == logging_api_key assert request.get_header('X-event-source') == 'logs' assert request.get_header('Content-encoding') == 'gzip'
def load_baidu(): url= "https://search.51job.com/list/030200,000000,0000,00,9,99,%2B,2,1.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare=" header = { #浏览器的版本 "User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36", # "haha":"hehe" } #创建请求对象 request = urllib.request.Request(url,headers=header) #动态的去添加head的信息 # request.add_header("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36") #请求网络数据(不在此处增加请求头信息因为此方法系统没有提供参数) response = urllib.request.urlopen(request) print(response) data = response.read().decode("GBK") print(data) #获取到完整的url # final_url = request.get_full_url() # print(final_url) with open("code01.html",'w') as f: f.write(data) #响应头 # print(response.headers) #获取请求头的信息(所有的头的信息) # request_headers = request.headers # print(request_headers) #(2)第二种方式打印headers的信息 #注意点:首字母需要大写,其他字母都小写 request_headers = request.get_header("User-agent")
def test_instagram_oembed_return_values(self, urlopen): urlopen.return_value = self.dummy_response result = InstagramOEmbedFinder( app_id='123', app_secret='abc').find_embed("https://instagr.am/p/CHeRxmnDSYe/") self.assertEqual( result, { 'type': 'something', 'title': 'test_title', 'author_name': 'test_author', 'provider_name': 'Instagram', 'thumbnail_url': 'test_thumbail_url', 'width': 'test_width', 'height': 'test_height', 'html': '<blockquote class="instagram-media">Content</blockquote>' }) # check that a request was made with the expected URL / authentication request = urlopen.call_args[0][0] # check that a request was made with the expected URL / authentication request = urlopen.call_args[0][0] self.assertEqual( request.get_full_url(), "https://graph.facebook.com/v9.0/instagram_oembed?url=https%3A%2F%2Finstagr.am%2Fp%2FCHeRxmnDSYe%2F&format=json" ) self.assertEqual(request.get_header('Authorization'), "Bearer 123|abc")
def load_baidu(): url = "http://www.baidu.com" header = { # 浏览器基本信息 "User-Agent": 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36' } # request.get_header('User-Agent', "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36") request = urllib.request.Request(url, headers=header) response = urllib.request.urlopen(request) print(response) data = response.read().decode('utf-8') # 获取完整的url final_url = request.get_full_url() #响应头 #print(response.headers) #获取响应头的信息 # request_header = request.headers request_header = request.get_header('User-agent') print(request_header) with open("02header.html", "w", encoding='utf-8') as f: f.write(data)
def load_baidu(): url = "http://www.baidu.com" user_agent = ["Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50", "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50", "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0", "Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1", "Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; SE 2.X MetaSr 1.0; SE 2.X MetaSr 1.0; .NET CLR 2.0.50727; SE 2.X MetaSr 1.0", "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; 360SE", "User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)" ] # 每次请求的浏览器都不一样 random_user_agent = random.choice(user_agent) # 创建request对象 request = urllib.request.Request(url) # 添加头部信息 request.add_header("User-Agent",random_user_agent) # 发送请求,接收响应 response = urllib.request.urlopen(request) print(response)#<http.client.HTTPResponse object at 0x05029D90> # 获取响应内容 print(response.read()) # 获取响应的头部信息,如果没有添加头部信息。则是Python-urllib/3.7 print(request.get_header("User-agent"))
def load_data(): url = "https://www.baidu.com" header = { # 浏览器,用户信息 # 此处通过fake-useragent获取一个随机的,合理的User-Agent "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:72.0) Gecko/20100101 Firefox/72.0", "aim": "an offer" } # type1 request = urllib.request.Request(url, headers=header) # type2 # # 创建请求对象 # request = urllib.request.Request(url) # # 为请求对象添加header信息 # request.add_header("User-Agent",ua.random) # 请求网络数据 response = urllib.request.urlopen(request) print(response, end="\n\n") str_data = response.read().decode("utf-8") # 获取整个请求头的信息 _request_headers = request.headers print(_request_headers, end="\n\n") # 获取请求头的某个信息 # 注意点:首字母大写,其他字母小写 _useragent = request.get_header("User-agent") print(_useragent, end="\n\n") return str_data
def test_facebook_oembed_return_values(self, urlopen): urlopen.return_value = self.dummy_response result = FacebookOEmbedFinder( app_id="123", app_secret="abc").find_embed("https://fb.watch/ABC123eew/") self.assertEqual( result, { "type": "something", "title": "test_title", "author_name": "test_author", "provider_name": "Facebook", "thumbnail_url": None, "width": "test_width", "height": "test_height", "html": '<blockquote class="facebook-media">Content</blockquote>', }, ) # check that a request was made with the expected URL / authentication request = urlopen.call_args[0][0] self.assertEqual( request.get_full_url(), "https://graph.facebook.com/v11.0/oembed_video?url=https%3A%2F%2Ffb.watch%2FABC123eew%2F&format=json", ) self.assertEqual(request.get_header("Authorization"), "Bearer 123|abc")
def load_baidu(): url = "https://www.baidu.com" # 添加请求头信息 header = { # 浏览器的版本 "User-Agent": "Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36" } # 创建请求对象(添加headers方式一) #request = urllib.request.Request(url, headers=header) # 创建请求对象(添加headers方式二) request = urllib.request.Request(url) request.add_header( "User-Agent", "Mozilla/5.0 (Linux; U; Android 8.1.0; zh-cn; BLA-AL00 Build/HUAWEIBLA-AL00) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/57.0.2987.132 MQQBrowser/8.9 Mobile Safari/537.36" ) final_url = request.get_full_url() print(final_url) # 请求网络数据 response = urllib.request.urlopen(request) data = response.read().decode("utf-8") # 第一种获取请求头的信息 request_headers1 = request.headers # print(request_headers1) # 第二种获取请求头的信息(首字母大写,其他字母小写) request_headers2 = request.get_header("User-agent") # print(request_headers2) # 将数据写入文件 超文本写入用字符串 视频音频用wb with open("load_baidu.html", "w", encoding="utf-8") as f: f.write(data)
def load_baidu(): url = "https://www.baidu.com" header = { # 浏览器版本,告诉浏览器我是真实的用户 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" } # 创建请求对象 request = urllib.request.Request(url, headers=header) # print(request) # 动态的添加请求头信息 request.add_header( "User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36" ) # 获取请求头信息(打印所有头的信息) request_header = request.headers print(request_header) # 第二种方式打印headers信息 print(request.get_header("User-agent")) # 获取完整的url print(request.get_full_url()) # 请求网络数据(不在此处增加请求头,系统没有此参数) response = urllib.request.urlopen(request) print(response) data = response.read().decode("utf-8") with open("02header.html", "w", encoding="utf-8") as f: f.write(data)
def load_data(): url = "https://www.baidu.com/" #有useragent时可以加s user_agent_list = [ "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36" ] #创建请求头信息 #request = urllib.request.Request(url,headers=header) # 动态添加headers信息 request = urllib.request.Request(url) rand_user_agent = random.choice(user_agent_list) request.add_header("User-Agent",rand_user_agent) #请求网络数据 #响应头 response = urllib.request.urlopen(request) #request包含url信息 data = response.read().decode("utf-8") #获取完整url final_url = request.get_full_url() print(2,final_url) # print(response.headers) request_headers = request.headers print(request_headers) #打印特定信息 !!!!!!首字母大写,其他字母都要小写,不然返回none request_header = request.get_header("User-agent") print(request_header) with open("02_headers.html","w",encoding="utf-8") as f: f.write(data)
def load_baidu(): url = "http://www.baidu.com" response = urllib.request.urlopen(url) # 创建请求对象 # headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36"} # 动态添加headers信息 request = urllib.request.Request(url) # 动态添加请求头 request.add_header( "User_Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.81 Safari/537.36" ) data = response.read().decode("utf-8") with open("data.html", "w", encoding="utf-8") as f: f.write(data) # 查看响应头信息 # print(response.headers) #第二种打印headers的方法 ret = request.get_header("User-Agent") # 获取完整的url final_url = request.get_full_url() print(final_url) #获取请求头信息 request_headers = request.headers
def __call__(self): if not self.follow_symlinks and os.path.islink(self.path): raise errors.TraversalError(private_msg="Path %r is a symlink" % self.path) request = quixote.get_request() response = quixote.get_response() if self.cache_time is None: response.set_expires(None) # don't set the Expires header else: # explicitly allow client to cache page by setting the Expires # header, this is even more efficient than the using # Last-Modified/If-Modified-Since since the browser does not need # to contact the server response.set_expires(seconds=self.cache_time) try: stat = os.stat(self.path) except OSError: raise errors.TraversalError last_modified = formatdate(stat.st_mtime, usegmt=True) if last_modified == request.get_header('If-Modified-Since'): # handle exact match of If-Modified-Since header response.set_status(304) return '' # Set the Content-Type for the response and return the file's contents. response.set_content_type(self.mime_type) if self.encoding: response.set_header("Content-Encoding", self.encoding) response.set_header('Last-Modified', last_modified) return FileStream(open(self.path, 'rb'), stat.st_size)
def load_baidu(): url = "http://www.baidu.com" #添加请求头信息 header = { "User-Agent": "Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36" } #创建 指定url的请求对象,添加ua request = urllib.request.Request(url, headers=header) request = urllib.request.Request(url) #动态添加UA #request.add_header("User-agent","Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.94 Safari/537.36") #请求网络数据 respons = urllib.request.urlopen(request) data = respons.read().decode("utf-8") with open("hesders.html", "w", encoding="utf-8") as f: f.write(data) #响应头 #print(respons.headers) #注意,要求首字母大写,其它字母小写 request_header = request.get_header("User-agent") print(request_header) #获取完整的url final_url = request.get_full_url() print(final_url)
def load_data(): url = "https://www.baidu.com/" headers = { "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", "HAHA": "hehe" } # 创造请求对象 request = urllib.request.Request(url, headers=headers) # 还可以利用add_headerd来添加信息 # request.add_header("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36") # 可以利用请求报文来作为url response = urllib.request.urlopen(request) str_response = response.read().decode("utf-8") # 请求头信息 request_headers = request.headers print(request_headers['User-agent']) # 还可以调用get_header()来获取header内容(注意首字母大写) print(request.get_header("Haha")) print(str_response) # 利用该方法获取url连接 # print(request.get_full_url()) with open("2.html", "w", encoding="utf-8") as f: f.write(str_response)
def load_baidu(): url = "https://www.baidu.com" # 添加请求头的信息 header = { # 浏览器的版本 "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36", "ha": "he" } # 创建请求对象 request = urllib.request.Request(url) request.add_header("user-agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36") # 请求网络数据 response = urllib.request.urlopen(request) print(response) data = response.read().decode("utf-8") # 获取到完整的url final_url = request.get_full_url() print(final_url) # 获取响应头 # print(response.headers) # request_headers = request.headers # print(request_headers) # 第二种方式打印headers的信息 request_headers = request.get_header("User-agent") print(request_headers) with open("02header.html", "w") as f: f.write(data)
def http_request(request): data = request.data if data is not None and type(data) != str: v_files = [] v_vars = [] try: for(key, value) in data.items(): if type(value) == io.BufferedReader: v_files.append((key, value)) else: v_vars.append((key, value)) except TypeError: systype, value, traceback = sys.exc_info() raise TypeError if len(v_files) == 0: data = urllib.urlencode(v_vars, doseq) else: boundary, data = multipart_encode(v_vars, v_files) contenttype = 'multipart/form-data; boundary=%s' % boundary if( request.has_header('Content-Type') and request.get_header['Content-Type'].find('multipart/form-data') != 0): print("Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data')) request.add_unredirected_header('Content-Type', contenttype) request.data = data.encode('utf-8') return request
def test_instagram_oembed_return_values(self, urlopen): urlopen.return_value = self.dummy_response result = InstagramOEmbedFinder( app_id="123", app_secret="abc").find_embed("https://instagr.am/p/CHeRxmnDSYe/") self.assertEqual( result, { "type": "something", "title": "test_title", "author_name": "test_author", "provider_name": "Instagram", "thumbnail_url": "test_thumbail_url", "width": "test_width", "height": "test_height", "html": '<blockquote class="instagram-media">Content</blockquote>', }, ) # check that a request was made with the expected URL / authentication request = urlopen.call_args[0][0] self.assertEqual( request.get_full_url(), "https://graph.facebook.com/v11.0/instagram_oembed?url=https%3A%2F%2Finstagr.am%2Fp%2FCHeRxmnDSYe%2F&format=json", ) self.assertEqual(request.get_header("Authorization"), "Bearer 123|abc")
def scrapy_picture(): request = urllib.request.Request(url) # 也可以通过调用Request.add_header() 添加/修改一个特定的header request.add_header("User-Agent", user_agent) # 第一个字母大写,后面的全部小写 request.get_header("User-agent") response = urllib.request.urlopen(request) data = response.read().decode('utf-8') dic_data = json.loads(data) image_url = u'https://cn.bing.com'+dic_data.get('images')[0].get('url') name = dic_data.get('images')[0].get('copyright').replace(' ', '').replace('/','&') date = dic_data.get('images')[0].get('startdate') image_name = date+','+name print(image_name) return image_url,image_name
def ensure_content_type(self, request): """Get 'Content-type' header or default it to JSON.""" # pylint: disable=no-self-use # pylint: disable=fixme # TODO Refactor HTTP request sending into separate module. # see https://github.com/raphaelhuefner/allbar/issues/1 if not request.has_header('Content-type'): request.add_header('Content-type', 'application/json') return request.get_header('Content-type')
def http_request(self, request): if not request.has_header('Cookie'): request.add_unredirected_header('Cookie', self.cookie) request.add_header('Cookie', self.cookie) else: cookie = request.get_header('Cookie') request.add_unredirected_header('Cookie', cookie + '; ' + cookie) self.cookiejar.add_cookie_header(request) return request
def add_header(): url = "https://www.baidu.com/" header = { # 添加用户信息的请求头,模仿真实用户的访问 "User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.75 Safari/537.36", "abc": "hahahhahah" } request = urllib.request.Request(url, headers=header) # 获取全部请求头 print(request.headers) # 获取指定请求头 print(request.get_header("User-agent")) print(request.get_header("Abc")) # 动态添加请求头 request.add_header("123", "lalalallalal") print(request.get_header("123")) response = urllib.request.urlopen(request) print(response)
def download_media(self, the_url): ''' Download the media file referenced by the_url Returns the path to the downloaded file :param the_url: ''' def get_filename_from_cd(content_disp): ''' Get filename from Content-Disposition :param content_disp: ''' to_return = None if content_disp: fname = re.findall('filename=(.+)', content_disp) if fname: to_return = fname[0] return to_return def get_filename_from_response(the_response): ''' Attempt to get the filename from the response :param the_response: ''' url_parts = urllib.parse.urlparse(the_response.geturl()) to_return = posixpath.basename(url_parts.path) # Sanity check if not re.match(r'^[\w-]+\.(jpg|jpeg|gif|png)$', to_return, re.IGNORECASE): # Nope, "bad" filename logging.error("Invalid media filename '%s' - ignoring", to_return) to_return = '' return to_return request = urllib.request.Request(the_url) request.add_header('User-Agent', 'Mozilla/5.0') response = urllib.request.urlopen(request) filename = get_filename_from_cd( request.get_header('Content-Disposition')) or \ get_filename_from_response(response) or \ 'random.jpg' media_dir = os.getenv('MEDIA_DIR', '/tmp') full_path = media_dir + '/' + filename logging.info("Downloading %s as %s...", the_url, full_path) with open(full_path, 'wb') as file_chunk: file_chunk.write(response.read()) return full_path
def test_issue16464(self): # See https://bugs.python.org/issue16464 # and https://bugs.python.org/issue46648 handler = self.start_server([ (200, [], b'any'), (200, [], b'any'), ]) opener = urllib.request.build_opener() request = urllib.request.Request("http://localhost:%s" % handler.port) self.assertEqual(None, request.data) opener.open(request, "1".encode("us-ascii")) self.assertEqual(b"1", request.data) self.assertEqual("1", request.get_header("Content-length")) opener.open(request, "1234567890".encode("us-ascii")) self.assertEqual(b"1234567890", request.data) self.assertEqual("10", request.get_header("Content-length"))
def http_post(url, para): user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' #headers = {'User-Agent': user_agent} request = urllib.request.Request(url, para) #request.headers = headers print(request.get_full_url()) print(request.data) print(request.get_header('User-Agent')) response = urllib.request.urlopen(request) print(response.read())
def formPost(self, show_error=True): responses = [] urlenc_data = "" if ((self.post_data != None) and (self.post_data is not self.No_Data)): urlenc_data = urllib.parse.urlencode( self.post_data).encode("utf-8") #print("data looks like:" + str(urlenc_data)) request = urllib.request.Request(url=self.requested_url, data=urlenc_data, headers=self.default_headers) else: request = urllib.request.Request(url=self.requested_url, headers=self.default_headers) print("No data for this jsonPost?") request.headers['Content-type'] = 'application/x-www-form-urlencoded' try: responses.append(urllib.request.urlopen(request)) return responses[0] except urllib.error.HTTPError as error: if (show_error): print( "-------------------------------------------------------------" ) print("%s URL: %s" % (error.code, request.get_full_url())) #print("Error: ", sys.exc_info()[0]) print("Request URL:", request.get_full_url()) print("Request Content-type:", request.get_header('Content-type')) print("Request Accept:", request.get_header('Accept')) print("Request Data:") LFUtils.debug_printer.pprint(request.data) if (len(responses) > 0): print( "-------------------------------------------------------------" ) print("Response:") LFUtils.debug_printer.pprint(responses[0].reason) print( "-------------------------------------------------------------" ) return None
def testSetCredentials(self): request = self._get_request(self.wrapper) self.assertFalse(request.has_header('Authorization')) self.wrapper.setCredentials('login', 'password') request = self._get_request(self.wrapper) self.assertTrue(request.has_header('Authorization')) # expected header for login:password # should succeed for python 3 since pull request #72 self.assertEqual("Basic bG9naW46cGFzc3dvcmQ=", request.get_header('Authorization'))
def test_extra_headers(self): """You can pass in extra headers and they go into the request object.""" request = feedparser._build_urllib2_request( 'http://example.com/feed', 'agent-name', None, None, None, None, {'Cache-Control': 'max-age=0'}) # nb, urllib2 folds the case of the headers self.assertEquals( request.get_header('Cache-control'), 'max-age=0')
def test_custom_headers(self): url = "http://www.example.com" with support.transient_internet(url): opener = urllib.request.build_opener() request = urllib.request.Request(url) self.assertFalse(request.header_items()) opener.open(request) self.assertTrue(request.header_items()) self.assertTrue(request.has_header('User-agent')) request.add_header('User-Agent','Test-Agent') opener.open(request) self.assertEqual(request.get_header('User-agent'),'Test-Agent')
def http_request(self, request): data = request.get_data() def isfiledata(p_str): import re r_c = re.compile("^f'(.*)'$") rert = r_c.search(str(p_str)) #rert = re.search("^f'(.*)'$", p_str) if rert: return rert.group(1) else: return None if data is not None and type(data) != str: v_files = [] v_vars = [] try: for(key, value) in list(data.items()): if isfiledata(value): # type(value) == file: v_files.append((key, value)) else: v_vars.append((key, value)) except TypeError: systype, value, traceback = sys.exc_info() raise TypeError("not a valid non-string sequence or mapping object").with_traceback(traceback) if len(v_files) == 0: data = urllib.parse.urlencode(v_vars, doseq) else: boundary, data = self.multipart_encode(v_vars, v_files) contenttype = 'multipart/form-data; boundary=%s' % boundary if(request.has_header('Content-Type') and request.get_header('Content-Type').find('multipart/form-data') != 0): print("Replacing %s with %s" % (request.get_header('content-type'), 'multipart/form-data')) request.add_unredirected_header('Content-Type', contenttype) request.add_data(data) return request
def statuses_mentions(query=None): base_url = "http://api.twitter.com/1/statuses/mentions.json" parameters = generate_base_data() query_string = "" if not query == None: query_string = add_params(query) parameters.extend(query) request = urllib.request.Request(base_url + query_string) signature = myoauth.oauth_sign( request.get_method(), base_url, parameters, OAUTH_CONSUMER_SECRET, OAUTH_TOKEN_SECRET ) header_string = generate_header_string(parameters, [["oauth_signature", signature]]) request.add_header("Authorization", header_string) print(request.get_header("Authorization")) print(request.get_method()) print(request.get_full_url()) return urllib.request.urlopen(request)
def http_request(self, request): scheme = request.get_type() if scheme not in ["http", "https"]: # robots exclusion only applies to HTTP return request if request.get_selector() == "/robots.txt": # /robots.txt is always OK to fetch return request host = request.get_host() # robots.txt requests don't need to be allowed by robots.txt :-) origin_req = getattr(request, "_origin_req", None) if (origin_req is not None and origin_req.get_selector() == "/robots.txt" and origin_req.get_host() == host ): return request if host != self._host: self.rfp = self.rfp_class() try: self.rfp.set_opener(self.parent) except AttributeError: debug("%r instance does not support set_opener" % self.rfp.__class__) self.rfp.set_url(scheme+"://"+host+"/robots.txt") self.rfp.read() self._host = host ua = request.get_header("User-agent", "") if self.rfp.can_fetch(ua, request.get_full_url()): return request else: # XXX This should really have raised URLError. Too late now... msg = "request disallowed by robots.txt" raise RobotExclusionError( request, request.get_full_url(), 403, msg, self.http_response_class(StringIO()), StringIO(msg))
request.add_header('Origin','http://www.diaochapai.com') request.add_header('Accept','*/*') request.add_header('Accept-Language','en-US,en;q=0.8') request.add_header('X-Requested-With','XMLHttpRequest') data = {"captcha":"24109","response":{"3271eabe-1200-4db3-b01c-8397c91fca20":{"choice":["c6cfa9a5-8832-421b-bcd8-1ef22e84b3dc"],"specify":{}}}} p = re.compile("_vid=") def Getvid(number): vid_list=[] for i in range(number): r = requests.get("http://www.diaochapai.com/survey/adc99e84-22fd-4de0-ac30-6e77e6347952") if r.status_code == 200: for cookie in r.cookies: match = p.search(str(cookie)) if match: vid_list.append(str(cookie)[13:50]) return vid_list vid_list = Getvid(100) for i in range(100): request.add_header('Cookie','_vid=%s;captcha_token=bb238ba1-9e3d-42e8-8bc7-50db0cc9649a'%(vid_list[i])) try: response =opener.open(request,json.dumps(data).encode('utf-8')) html = response.read().decode('utf-8') print (vid_list[i],request.get_header("Cookie")) except urllib.error.HTTPError as e: print ("reason:",e.reason,"code:",e.code,"headers:",e.headers)