def gen_detikcom(): try: url = httplib.urlsplit(jahex) http = httplib.HTTPConnection(url.netloc, timeout=60) headers = {'Host': url.netloc} http.request("GET", url.path + (url.query != "" and "?" + url.query or ""), headers=headers) resp = http.getresponse() sys.stdout.write("%s : Create static on %s : %s\n" % (time.ctime(), jahex, resp.status)) sys.stdout.flush() base64string = base64.encodestring('%s:%s' % ('detik', 'kenthir2010'))[:-1] url = httplib.urlsplit(kangkung) http = httplib.HTTPConnection(url.netloc, timeout=60) headers = { 'Host': url.netloc, 'Authorization': 'Basic %s' % base64string } http.request("GET", url.path + (url.query != "" and "?" + url.query or ""), headers=headers) resp = http.getresponse() sys.stdout.write("%s : Create static on %s : %s\n" % (time.ctime(), kangkung, resp.status)) sys.stdout.flush() except Exception, e: pass
def __really_request(self, method, uri, body): htcs = httplib.HTTPSConnection( httplib.urlsplit(self.__management_url).netloc ) headers = { 'Accept' : 'application/json', 'X-Auth-Token' : self.__auth_token, 'Content-Type' : 'application/json', } htcs.request(method, httplib.urlsplit(self.__management_url).path + uri, body, headers) return htcs.getresponse()
def __request__(self, url, post=False): ''' Основная функция запроса ''' if post: method = 'POST' else: method = 'GET' urlsplit = httplib.urlsplit(url) if not urlsplit.netloc and not urlsplit.scheme: url = 'http://%s' % url urlsplit = httplib.urlsplit(url) params = urlsplit.query headers = self.get_headers(url) if self.setting.proxy: server_address = self.setting.proxy.get_proxy() else: server_address = urlsplit.netloc connect = httplib.HTTPConnection(server_address) time_begin_request = time.time() try: connect.request(method, url, params, headers) except: self.setting.log_error('%s %s' % (url, 'connection error')) status = 404 data = None time_request = time.time() - time_begin_request else: response = connect.getresponse() time_request = time.time() - time_begin_request status = response.status self.setting.log_info('%s %s' % (url, response.status)) if response.status == 302: self.setting.log_warn('%s %s %s' % (url, response.status, response.getheader('location'))) data = self.get_method(location, None) else: data = response.read() cookie = response.getheader('Set-Cookie') if self.setting.use_cookie: if cookie: cookie_name, cookie_data, cookie_path = self.__convert_raw_cookie__(cookie) self.add_cookie(cookie_name, cookie_data, cookie_path) resolve_answer = (status, url, time_request, data) if self.kraken_response: self.kraken_response(resolve_answer) self.busy = False return resolve_answer
def testAuth(self): # CNY-981 try: contentServer = rephelp.HTTPServerController(authRequester()) baseUrl = contentServer.url() # test with user:pass contentURL = 'http://*****:*****@%s' % httplib.urlsplit(baseUrl)[1] name = 'foo.tar.gz' url = contentURL + '/' + name cached = lookaside.fetchURL(self.cfg, url, name) f = open(cached, 'r') self.assertEqual(f.read(), 'Hello, world!\n') # test with no password given contentURL = 'http://user@%s' % httplib.urlsplit(baseUrl)[1] name = 'foo2.tar.gz' url = contentURL + '/' + name cached = lookaside.fetchURL(self.cfg, url, name) f = open(cached, 'r') self.assertEqual(f.read(), 'Hello, world 2!\n') # test with no password at all name = 'foo3.tar.gz' url = baseUrl + '/' + name cached = self.logCheck(lookaside.fetchURL, (self.cfg, url, name), [ 'error: error downloading http://localhost:[0-9]*//foo3.tar.gz: HTTP Error 401: Unauthorized' ], regExp=True) self.assertEqual(cached, None) # test ftp with user:pass def fakeOpen(od, req, *args, **kw): self.req = req import StringIO s = 'baz file contents' r = StringIO.StringIO(s) r.headers = {'contentLength': len(s)} return r import urllib2 self.mock(urllib2.OpenerDirector, 'open', fakeOpen) url = 'ftp://*****:*****@foo.com/bar/baz.tgz' name = 'baz.tgz' cached = lookaside.fetchURL(self.cfg, url, name) self.assertEqual(url, self.req.get_full_url()) self.assertEqual(open(cached).read(), 'baz file contents') finally: contentServer.kill()
def Check_URL(parameters, curdir, form, user_info=None): """Returns (True, status, reason) if the url is valid or (False, status, reason) if different.""" try: f = open("%s/%s" % (curdir, parameters['url']), "r") url = f.read().replace("\n"," ") f.close() except: url = "The URL is needed" common_errors_list = [400, 404, 500] url_tuple = urlsplit(url) if not url_tuple[0]: url = "http://" + url url_tuple = urlsplit(url) if not url_tuple[0] and not url_tuple[1]: #return (False, 000, "Not Valid") raise InvenioWebSubmitFunctionStop(CFG_INVALID_URL % (url,)) # HTTPConnection had the timeout parameter introduced in python 2.6 # for the older versions we have to get and set the default timeout # In order to use a custom timeout pass it as an extra argument to this function #old_timeout = getdefaulttimeout() #setdefaulttimeout(timeout) conn = HTTPConnection(url_tuple[1]) #setdefaulttimeout(old_timeout) try: conn.request("GET", url_tuple[2]) except: #return (False, 000, "Not Valid") raise InvenioWebSubmitFunctionStop(CFG_INVALID_URL % (url,)) response = conn.getresponse() status = response.status reason = response.reason if str(status).startswith('1') or str(status).startswith('2') or str(status).startswith('3'): #return (True, status, reason) return "" elif str(status).startswith('4') or str(status).startswith('5'): if status in common_errors_list: #return (False, status, reason) raise InvenioWebSubmitFunctionStop(CFG_INVALID_URL % (url,)) else: #return (True, status, reason) return ""
def testAuth(self): # CNY-981 try: contentServer = rephelp.HTTPServerController(authRequester()) baseUrl = contentServer.url() # test with user:pass contentURL = 'http://*****:*****@%s' % httplib.urlsplit(baseUrl)[1] name = 'foo.tar.gz' url = contentURL + '/' + name cached = lookaside.fetchURL(self.cfg, url, name) f = open(cached, 'r') self.assertEqual(f.read(), 'Hello, world!\n') # test with no password given contentURL = 'http://user@%s' % httplib.urlsplit(baseUrl)[1] name = 'foo2.tar.gz' url = contentURL + '/' + name cached = lookaside.fetchURL(self.cfg, url, name) f = open(cached, 'r') self.assertEqual(f.read(), 'Hello, world 2!\n') # test with no password at all name = 'foo3.tar.gz' url = baseUrl + '/' + name cached = self.logCheck(lookaside.fetchURL, (self.cfg, url, name), ['error: error downloading http://localhost:[0-9]*//foo3.tar.gz: HTTP Error 401: Unauthorized'], regExp=True) self.assertEqual(cached, None) # test ftp with user:pass def fakeOpen(od, req, *args, **kw): self.req = req import StringIO s = 'baz file contents' r = StringIO.StringIO(s) r.headers = {'contentLength': len(s)} return r import urllib2 self.mock(urllib2.OpenerDirector, 'open', fakeOpen) url = 'ftp://*****:*****@foo.com/bar/baz.tgz' name = 'baz.tgz' cached = lookaside.fetchURL(self.cfg, url, name) self.assertEqual(url, self.req.get_full_url()) self.assertEqual(open(cached).read(), 'baz file contents') finally: contentServer.kill()
def __init__(self, base_url, auth, account=None, cloudspace=None, proxy=None): log.info('%s.init(base_url: %s)' % (self.__name__, base_url)) url_parts = httplib.urlsplit(base_url) self.host = url_parts.hostname self.scheme = url_parts.scheme self.auth = auth self.account = account self.cloudspace = cloudspace self.proxy = proxy self.conn = None self.redirect_attempt = 0 self.cache = {} try: with open(CACHE_FILE) as cache_file: log.info('%s.init(): reading API cache file %s' % (self.__name__, CACHE_FILE)) self.cache = json.load(cache_file) except Exception: log.info('%s.init(): cannot read cache' % self.__name__)
def httpRequest(self,method,url,data={}): try: _urld = httplib.urlsplit(url) conn = httplib.HTTPConnection(_urld.netloc,80,True,3) conn.connect() data = urllib.urlencode(data) if method=='get': conn.putrequest("GET", url, None) conn.putheader("Content-Length",'0') elif method=='post': conn.putrequest("POST", url) conn.putheader("Content-Length", str(len(data))) conn.putheader("Content-Type", "application/x-www-form-urlencoded") conn.putheader("Connection", "close") conn.endheaders() if len(data)>0: conn.send(data) f = conn.getresponse() self.httpBody = f.read().encode('gbk') f.close() conn.close() except: traceback.print_exc(sys.stderr) self.httpBody='' return self.httpBody
def httpRequest(self, method, url, data={}): try: _urld = httplib.urlsplit(url) conn = httplib.HTTPConnection(_urld.netloc, 80, True, 3) conn.connect() data = urllib.urlencode(data) if method == 'get': conn.putrequest("GET", url, None) conn.putheader("Content-Length", '0') elif method == 'post': conn.putrequest("POST", url) conn.putheader("Content-Length", str(len(data))) conn.putheader("Content-Type", "application/x-www-form-urlencoded") conn.putheader("Connection", "close") conn.endheaders() if len(data) > 0: conn.send(data) f = conn.getresponse() self.httpBody = f.read().encode('gbk') f.close() conn.close() except: traceback.print_exc(sys.stderr) self.httpBody = '' return self.httpBody
def httpRequest(self,method,url,data={}): try: _url = httplib.urlsplit(url) #logging.info("method %s type %s type netloc %s %s\n"%(method,type(_url),type(_url.netloc),_url.netloc)) try: _server,_port = _url.netloc.split(':') except: _server = _url.netloc _port = 80 _conn = httplib.HTTPConnection(_server,_port,True,3) _conn.connect() data = urllib.urlencode(data) #logging.info("data %s\n"%(data)) if method == 'get': _conn.putrequest("GET",url,None) _conn.putheader("Content-Length",'0') elif method == 'post': _conn.putrequest("POST",url) _conn.putheader("Content-Length", str(len(data))) _conn.putheader("Content-Type", "application/x-www-form-urlencoded") _conn.putheader("Connection", "close") _conn.endheaders() if len(data) > 0: _conn.send(data) f = _conn.getresponse() self.httpBody = f.read().decode('utf8').encode('gbk') f.close() _conn.close() #logging.info("response %s"%(str(self.httpBody))) except: traceback.print_exc(sys.stderr) self.httpBody = '' return self.httpBody
def __init__(self, url): Publisher.__init__(self) #: Indicates which method should be called. self.withNode = False self.url = url (self.scheme, self.netloc, self.path, self.query, self.frag) = httplib.urlsplit(url) self.headers = {'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'}
def http_post(url, data=None, data_type = 'x-www-form-urlencoded',user_agent=''): res = None urlInfo = httplib.urlsplit(url) print(url) if url.find('https://')>-1: conn = httplib.HTTPSConnection(urlInfo.netloc,timeout=5) else: conn = httplib.HTTPConnection(urlInfo.netloc,timeout=5) try: conn.connect() if data: conn.putrequest("POST", urlInfo.path) conn.putheader("Content-Length", len(data)) conn.putheader("Content-Type", "application/%s"%data_type) if url.find('gfan.com') > 0: conn.putheader("User-Agent",user_agent) else: conn.putrequest("GET", url) conn.putheader("Content-Length", 0) conn.putheader("Connection", "close") conn.endheaders() if data: conn.send(data) response = conn.getresponse() if response: res = response.read() response.close() return res except Exception, ex: raise ex
def httpRequest(self, method, url, data={}): try: _urld = httplib.urlsplit(url) conn = httplib.HTTPConnection(_urld.netloc, 80, True, 3) conn.connect() data = urllib.urlencode(data) if method == 'get': conn.putrequest('GET', url, None) conn.putheader('Content-Length', '0') elif method == 'post': conn.putrequest('POST', url) conn.putheader('Content-Length', str(len(data))) conn.putheader('Content-Type', 'application/x-www-form-urlencoded') conn.putheader('Connection', 'close') conn.endheaders() if len(data) > 0: conn.send(data) f = conn.getresponse() self.httpBody = f.read().encode('gbk') f.close() conn.close() except: self.httpBody = '' return self.httpBody
def render(self, context): """ Example output: tag:zena.cz,2004-05-27:/12/10245 2004-05-27 ... publication date 12 ... content type 10245 ... publishable ID If self.hashed_output is set, whole tag is hashed by SHA1. """ publishable = template.Variable(self.var_name).resolve(context) if publishable.publish_from: pub_date = publishable.publish_from.strftime('%Y-%m-%d') else: pub_date = '1970-01-01' url = publishable.get_absolute_url(domain=True) url_splitted = urlsplit(url) out = 'tag:%s,%s:/%d/%d' % ( url_splitted.netloc, pub_date, publishable.content_type_id, publishable.pk ) if self.hashed_output: h = hashlib.sha1(out) out = h.hexdigest() return out
def make_http_request(method, url, headers, body, timeout, acceptable_statuses, attempts): scheme, netloc, path, query, fragment = urlsplit(url) if scheme == 'https': Connection = HTTPSConnection else: Connection = HTTPConnection for attempt in xrange(attempts): try: with Timeout(timeout): conn = Connection(netloc) conn.request(method, '%s?%s' % (path, query), body, headers) resp = conn.getresponse() if resp.status in acceptable_statuses or \ resp.status // 100 in acceptable_statuses: return resp except Exception as e: if attempt >= attempts - 1: raise e except Timeout: if attempt >= attempts - 1: raise RuntimeError('Request to %s timed out.' % (url)) raise RuntimeError('HTTP request failed: %s %s' % (method, url))
def on_renamed(self, pathname, new_pathname): relpath = os.path.relpath(pathname, self.src) newrelpath = os.path.relpath(new_pathname, self.src) for dst in self.dsts: retry = 0 url = httplib.urlsplit(os.path.join(dst, relpath)) http = httplib.HTTPConnection(url.netloc, timeout=60) dst_path = url.path new_dst_path = os.path.join(dst, newrelpath) success = False while not success and retry <= max_retry: try: http.request( "POST", urllib.quote(dst_path), urllib.quote('operation=rename&destination=%s' % (new_dst_path))) resp = http.getresponse() if resp.status != 500: success = True else: retry += 1 except Exception, e: success = False retry += 1 http.close()
def __init__(self, baseurl, encoding="xml", verbose=False, username=None, password=None): if not baseurl.endswith("/"): baseurl += "/" self.baseurl = httplib.urlsplit(baseurl) self.verbose = verbose self.username = username self.password = password if encoding == "json": self.encoding = json self.extension = ".json" self.content_type = "application/json" elif encoding == "xml": self.encoding = apixml self.extension = ".xml" self.content_type = "application/xml" else: raise ValueError("Unsupported encoding: %s" % encoding)
def proxy_douban_image(douban_url): if not app.config['PROXY_DOUBAN_IMAGE']: return douban_url splited = httplib.urlsplit(douban_url) if splited: return '/douban_images{path}'.format(path=splited.path) return douban_url
def __init__(self, method, url, headers = None, data = None, files = None, debug = False, cookies = None, auto_redirect = False): assert url.startswith('http') url = util.utf8(url) self.url = url self.method = method self.data = data or {} self.files = files self.body = None self.auto_redirect = auto_redirect cookies = cookies or {} for name, value in cookies.items(): cookie_manager.set_cookie(name, value) _split_url = httplib.urlsplit(url) self.host = _split_url.netloc self.uri = _split_url.path if _split_url.query: self.uri += '?' + _split_url.query if _split_url.scheme == 'https': Connection = httplib.HTTPSConnection else: Connection = httplib.HTTPConnection self.__conn = Connection(host = self.host) self.__conn.set_debuglevel(debug and logging.DEBUG or 0) self.headers = headers or {} self.generate_header(headers)
def parse_url(self, url): ''' parse url and convert it to filesystem path: $BASE_DIR/domain/page_id/ list (omitting host http://api.springnote.com/) * /pages.json => /default/ * /pages.json?domain=jangxyz => /jangxyz/ * /pages/563954/revisions.json => /default/563954/revisions/ * /pages/563954/attachments.json => /default/563954/attachments/ * /pages/563954/comments.json => /default/563954/comments/ get * /pages/563954.json?domain=jangxyz => /jangxyz/563954/563954.json * /pages/563954/revisions/29685883.json => /default/563954/revisions/29685883.json * /pages/563954/attachments/559756.json => /default/563954/attachments/559756.json ''' # extract path and query from url _scheme, _netloc, path, query, _fragment = httplib.urlsplit(url) domain = self.query_dict(query).get('domain') # build path path, _sep, format = path.partition('.') id = (path.split('/', 3) + [None]*3)[2] resource_dict = {} l = filter(None, path.split('/')) for resource, r_id in zip(l[::2], l[1::2] + [None]): resource_dict[resource] = r_id or True resource_dict.pop('pages') return self.build_path(id, domain, format, **resource_dict)
def unmeta(url, res): """ Finds any meta redirects a httplib.response object that has text/html as content-type. Args: url (str): The url to follow one redirect res (httplib.response): a http.response object Returns: (str). The return resolved url """ if res and (res.getheader('Content-type') or "").startswith('text/html'): size=65535 if res.getheader('Content-Length'): try: tmp=int(res.getheader('Content-length')) if tmp<65535: size=tmp except: print "wrong content-length:",res.getheader('Content-length') root=parse(StringIO(res.read(size))) for x in root.xpath('//meta[@http-equiv="refresh"]'): newurl=x.get('content').split(';') if len(newurl)>1: newurl=newurl[1].strip()[4:] parts=httplib.urlsplit(urllib.unquote_plus(newurl)) if parts.scheme and parts.netloc: url=newurl return weedparams(url)
def __init__(self, method, url, headers = None, data = None, files = None, debug = False): assert url.startswith('http') self.url = url self.method = method self.data = data or {} self.files = files self.body = None _split_url = httplib.urlsplit(url) self.host = _split_url.netloc self.uri = _split_url.path if _split_url.query: self.uri += '?' + _split_url.query if _split_url.scheme == 'https': Connection = httplib.HTTPSConnection else: Connection = httplib.HTTPConnection self.__conn = Connection(host = self.host) self.__conn.set_debuglevel(debug and logging.DEBUG or 0) self.headers = {} self.generate_header(headers)
def make_http_request( method, url, headers, body, timeout, acceptable_statuses, attempts): scheme, netloc, path, query, fragment = urlsplit(url) if scheme == 'https': Connection = HTTPSConnection else: Connection = HTTPConnection for attempt in xrange(attempts): try: with Timeout(timeout): conn = Connection(netloc) conn.request(method, '%s?%s' % (path, query), body, headers) resp = conn.getresponse() if resp.status in acceptable_statuses or \ resp.status // 100 in acceptable_statuses: return resp except Exception as e: if attempt >= attempts - 1: raise e except Timeout: if attempt >= attempts - 1: raise RuntimeError('Request to %s timed out.' % (url)) raise RuntimeError('HTTP request failed: %s %s' % (method, url))
def httpRequest(self, method, url, data={}): try: _url = httplib.urlsplit(url) #logging.info("method %s type %s type netloc %s %s\n"%(method,type(_url),type(_url.netloc),_url.netloc)) try: _server, _port = _url.netloc.split(':') except: _server = _url.netloc _port = 80 _conn = httplib.HTTPConnection(_server, _port, True, 3) _conn.connect() data = urllib.urlencode(data) #logging.info("data %s\n"%(data)) if method == 'get': _conn.putrequest("GET", url, None) _conn.putheader("Content-Length", '0') elif method == 'post': _conn.putrequest("POST", url) _conn.putheader("Content-Length", str(len(data))) _conn.putheader("Content-Type", "application/x-www-form-urlencoded") _conn.putheader("Connection", "close") _conn.endheaders() if len(data) > 0: _conn.send(data) f = _conn.getresponse() self.httpBody = f.read().decode('utf8').encode('gbk') f.close() _conn.close() #logging.info("response %s"%(str(self.httpBody))) except: traceback.print_exc(sys.stderr) self.httpBody = '' return self.httpBody
def httpPost(url, data): res = None try: urlInfo = httplib.urlsplit(url) print urlInfo.path, urlInfo.port conn = httplib.HTTPConnection(urlInfo.netloc) conn.connect() if data: conn.putrequest("POST", urlInfo.path) conn.putheader("Content-Length", len(data)) conn.putheader("Content-Type", "application/x-www-form-urlencoded") else: conn.putrequest("GET", urlInfo.path, None) conn.putheader("Content-Length", 0) conn.putheader("Connection", "close") conn.endheaders() if data: conn.send(data) response = conn.getresponse() if response: res = response.read() response.close() conn.close() return res except Exception, ex: raise ex
def unshorten(url, cache=None, ua=None, **kwargs): """ resolves all HTTP/META redirects and optionally caches them in any object supporting a __getitem__, __setitem__ interface Args: url (str): The url to follow one redirect cache (PersistentCryptoDict): an optional PersistentCryptoDict instance ua (fn): A function returning a User Agent string (optional), the default is googlebot. **kwargs (dict): optional proxy args for urlclean.httpresolve (default: localhost:8118) Returns: (str). The return final cleaned url. """ prev=None origurl=url seen=[] while url!=prev: # abort recursions if url in seen: return "" seen.append(url) if cache: cached=cache[url] if cached: return cached url=weedparams(url) # expand relative urls us=httplib.urlsplit(url) if us.scheme=='' and us.netloc == '': url=urljoin(prev, url) elif us.scheme=='': url="%s:%s" % (httplib.urlsplit(prev).scheme, url) prev=url url,root=httpresolve(url, ua=ua, **kwargs) url=unmeta(url,root) for p in plugins.modules: url=p.convert(url) try: url=p.convert(url) except: pass # ignore plugin failures if cache: cache[origurl]=url return url
def split_clc_url(self, clc_url): """ Splits a cloud controller endpoint url. """ parts = httplib.urlsplit(clc_url) is_secure = parts.scheme == 'https' ip, port = parts.netloc.split(':') return {'ip': ip, 'port': int(port), 'is_secure': is_secure}
def split_clc_url(self, clc_url): """ Splits a cloud controller endpoint url. """ parts = httplib.urlsplit(clc_url) is_secure = parts.scheme == "https" ip, port = parts.netloc.split(":") return {"ip": ip, "port": int(port), "is_secure": is_secure}
def setInfo(self, d): self.blogid = d[u"blogid"] self.blogname = d[u"blogName"] self.headers["Referer"] = d[u"url"] opener.addheader("Referer", d[u"url"]) self.url = d[u"url"] + "api/" split = httplib.urlsplit(self.url) self.host = split[1] self.path = split[2]
def APIRequest(method, url, data=None, encode=default_encoding, decode=default_encoding, content_type=None, expected_type=None, get_response=False): if encode == "json": data = json.dumps(data) elif encode == "xml": data = pyxml.dumps(data) if content_type == None: content_type = deduce_content_type(encode) surl = httplib.urlsplit(url) if encode and not url.endswith("." + encode): url = surl.path + "." + encode else: url = surl.path if surl.query: url += "?" + surl.query print >> sys.stderr, method, surl.geturl().replace(surl.path, url) conn = httplib.HTTPConnection(surl.hostname, surl.port) conn.request(method, url, body=data, headers={"Content-Type": content_type}) r = conn.getresponse() if expected_type == None: expected_type = deduce_content_type(decode) # TODO: enable this test once it is suported. # assert expected_type in r.getheader("Content-Type"), "received %s instead of %s" % ( # r.getheader("Content-Type"), expected_type) recv = r.read() try: if decode == "json": recv = json.loads(recv) elif decode == "xml": recv = pyxml.loads(recv) except: pass print >> sys.stderr, r.status, r.reason assert 200 <= r.status < 300, recv return (recv, r) if get_response else recv
def dorequest(self, timeout=TIMEOUT, HttpMethod="POST", parsexsams=True): """ Sends the request to the database node and returns a result.Result instance. The request uses 'POST' requests by default. If the request fails or if stated in the parameter 'HttpMethod', 'GET' requests will be performed. The returned result will be parsed by default and the model defined in 'specmodel' will be populated by default (parseexams = True). """ self.xml = None #self.get_xml(self.Source.Requesturl) url = self.baseurl + self.querypath urlobj = urlsplit(url) if urlobj.scheme == 'https': conn = HTTPSConnection(urlobj.netloc, timeout=timeout) else: conn = HTTPConnection(urlobj.netloc, timeout=timeout) conn.putrequest(HttpMethod, urlobj.path + "?" + urlobj.query) conn.endheaders() try: res = conn.getresponse() except socket.timeout: # error handling has to be included self.status = 408 self.reason = "Socket timeout" raise TimeOutError self.status = res.status self.reason = res.reason if not parsexsams: if res.status == 200: result = r.Result() result.Content = res.read() elif res.status == 400 and HttpMethod == 'POST': # Try to use http-method: GET result = self.dorequest(HttpMethod='GET', parsexsams=parsexsams) else: result = None else: if res.status == 200: self.xml = res.read() result = r.Result() result.Xml = self.xml result.populate_model() elif res.status == 400 and HttpMethod == 'POST': # Try to use http-method: GET result = self.dorequest(HttpMethod='GET', parsexsams=parsexsams) else: result = None return result
def __init__(self, url): Publisher.__init__(self) #: Indicates which method should be called. self.withNode = False self.url = url (self.scheme, self.netloc, self.path, self.query, self.frag) = httplib.urlsplit(url) self.headers = { 'User-Agent': 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' }
def __init__(self, uri, api): #logging.info("init.uri: %s" % uri) self.api = api self.uri = uri self.scheme, self.host, self.url, z1, z2 = httplib.urlsplit(self.api.base_url + self.uri) self.id = None self.conn = None self.headers = {'User-Agent': USER_AGENT} self.attrs = {} self._errors = {}
def start(self, url, maxdepth=3): if not url.startswith('http'): raise ValueError('Invalid URL %s; URL must start with either http:// or https://' % repr(url)) self.maxdepth = maxdepth self.scope = urlsplit(url) lc = self._exec_start_plugins() if lc == CrawlerLifeCycle.END: return self.queue_crawl(url) self._crawl()
def dorequest(self, timeout = TIMEOUT, HttpMethod = "POST", parsexsams = True): """ Sends the request to the database node and returns a result.Result instance. The request uses 'POST' requests by default. If the request fails or if stated in the parameter 'HttpMethod', 'GET' requests will be performed. The returned result will be parsed by default and the model defined in 'specmodel' will be populated by default (parseexams = True). """ self.xml = None #self.get_xml(self.Source.Requesturl) url = self.baseurl + self.querypath urlobj = urlsplit(url) if urlobj.scheme == 'https': conn = HTTPSConnection(urlobj.netloc, timeout = timeout) else: conn = HTTPConnection(urlobj.netloc, timeout = timeout) conn.putrequest(HttpMethod, urlobj.path+"?"+urlobj.query) conn.endheaders() try: res = conn.getresponse() except socket.timeout: # error handling has to be included self.status = 408 self.reason = "Socket timeout" raise TimeOutError self.status = res.status self.reason = res.reason if not parsexsams: if res.status == 200: result = r.Result() result.Content = res.read() elif res.status == 400 and HttpMethod == 'POST': # Try to use http-method: GET result = self.dorequest( HttpMethod = 'GET', parsexsams = parsexsams) else: result = None else: if res.status == 200: self.xml = res.read() result = r.Result() result.Xml = self.xml result.populate_model() elif res.status == 400 and HttpMethod == 'POST': # Try to use http-method: GET result = self.dorequest( HttpMethod = 'GET', parsexsams = parsexsams) else: result = None return result
def unshortenURL(url): """ unshortenURL https://github.com/cudeso/expandurl""" url_ua = config['urls_ua'] urls_timeout = config['urls_timeout'] currenturl = url.strip() previousurl = None while currenturl != previousurl: try: httprequest = httplib.urlsplit(currenturl) scheme = httprequest.scheme.lower() netloc = httprequest.netloc.lower() previousurl = currenturl if scheme == 'http': conn = httplib.HTTPConnection(netloc, timeout=5) req = currenturl[7 + len(netloc):] location = "%s://%s" % (scheme, netloc) elif scheme == 'https': conn = httplib.HTTPSConnection(netloc, timeout=5) req = currenturl[8 + len(netloc):] location = "%s://%s" % (scheme, netloc) conn.request("HEAD", req, None, { 'User-Agent': url_ua, 'Accept': '*/*', }) res = conn.getresponse() if res.status in [301, 304]: currenturl = res.getheader('Location') httprequest_redirect = httplib.urlsplit(currenturl) if httprequest_redirect.scheme.lower( ) != 'http' and httprequest_redirect.scheme.lower() != 'https': # currenturl does not contain http(s) currenturl = "%s://%s%s" % (scheme, netloc, currenturl) except: currenturl = url return currenturl
def fix_destination(hostname, request): # fix data request for rename and symlink userinput = parseinput(urllib.unquote(request.data), operation='', destination='', modification_time='', access_time='') #if userinput.operation in ['rename', 'symlink']: if userinput.operation in ['rename']: dest = httplib.urlsplit(userinput.destination) userinput.destination = "http://%s%s" % (hostname, dest.path) request.data = '' for item in userinput.__dict__: if userinput.__dict__[item] == '': continue request.data += "%s=%s&" % (item, userinput.__dict__[item]) request.data = urllib.quote(request.data)
def __init__(self, url): scheme, netloc, path, query, fragment = httplib.urlsplit(url) self.scheme = scheme self.netloc = netloc self.path = path # If there is only one item, extract from list query = cgi.parse_qs(query) for k, v in query.items(): if len(v) == 1: query[k] = v[0] self.query = query self.fragment = fragment
def dotransform(request, response): domain = request.value exclude = set() for i in range(0, config['bingsubdomains/maxrecursion']): q = ' '.join(['site:%s' % domain] + map(lambda x: '-site:%s' % x, exclude)) results = searchweb(q) for r in results: domains = [urlsplit(d).netloc for d in findall('<web:Url>(.+?)</web:Url>', r)] for d in domains: if d not in exclude and d != domain: exclude.add(d) response += DNSName(d) return response
def unshortenURL(url): """ unshortenURL https://github.com/cudeso/expandurl""" url_ua = config['urls_ua'] urls_timeout = config['urls_timeout'] currenturl = url.strip() previousurl = None while currenturl != previousurl: try: httprequest = httplib.urlsplit(currenturl) scheme = httprequest.scheme.lower() netloc = httprequest.netloc.lower() previousurl = currenturl if scheme == 'http': conn = httplib.HTTPConnection(netloc, timeout=5) req = currenturl[7+len(netloc):] location = "%s://%s" % (scheme, netloc) elif scheme=='https': conn = httplib.HTTPSConnection(netloc, timeout=5) req = currenturl[8+len(netloc):] location = "%s://%s" % (scheme, netloc) conn.request("HEAD", req, None, {'User-Agent': url_ua,'Accept': '*/*',}) res = conn.getresponse() if res.status in [301, 304]: currenturl = res.getheader('Location') httprequest_redirect = httplib.urlsplit(currenturl) if httprequest_redirect.scheme.lower() != 'http' and httprequest_redirect.scheme.lower() != 'https': # currenturl does not contain http(s) currenturl = "%s://%s%s" % (scheme, netloc,currenturl) except: currenturl = url return currenturl
def on_modified(self, pathname, data): relpath = os.path.relpath(pathname, self.src) for dst in self.dsts: retry = 0 url = httplib.urlsplit(os.path.join(dst, relpath)) http = httplib.HTTPConnection(url.netloc, timeout=60) dst_path = url.path if os.path.islink(pathname): success = False rel_origpath = os.path.relpath(os.path.realpath(pathname), self.src) dst_origpath = os.path.join(dst, rel_origpath) while not success and retry <= max_retry: try: http.request( "POST", urllib.quote(dst_path), urllib.quote('operation=symlink&destination=%s' % (dst_origpath))) resp = http.getresponse() if resp.status != 500: success = True else: retry += 1 except Exception, e: success = False retry += 1 elif os.path.isdir(pathname): success = False if not dst_path.endswith('/'): dst_path = dst_path + '/' while not success and retry <= max_retry: try: http.request("PUT", urllib.quote(dst_path)) resp = http.getresponse() if resp.status == 404: # use force directory creation http.request( "PUT", urllib.quote(dst_path) + "?force_create_dir=1") resp = http.getresponse() if resp.status != 500: success = True else: retry += 1 except Exception, e: success = False retry += 1
def http_post(url, data=None, data_type='x-www-form-urlencoded', user_agent='', timeout_param=5): res = None urlInfo = httplib.urlsplit(url) uri = '%s?%s' % (urlInfo.path, urlInfo.query) if url.find('https://') > -1: conn = httplib.HTTPSConnection(urlInfo.netloc, timeout=timeout_param) else: conn = httplib.HTTPConnection(urlInfo.netloc, timeout=timeout_param) # if True: try: conn.connect() if data: if isinstance(data, unicode): data = data.encode('utf-8') conn.putrequest("POST", uri) conn.putheader("Content-Length", len(data)) conn.putheader("Content-Type", "application/%s" % data_type) if user_agent != '': conn.putheader("User-Agent", user_agent) else: conn.putrequest("GET", uri) conn.putheader("Content-Length", 0) conn.putheader("Content-Type", "application/x-www-form-urlencoded") conn.putheader("Connection", "close") conn.endheaders() if data: conn.send(data) response = conn.getresponse() if response: res = response.read() response.close() print '-' * 20, response.status, res return (response.status, res) except socket.timeout: raise HttpTimeOut('Connect %s time out' % url) except Exception, ex: raise ex
def getFile(file_url): if file_url.startswith('http'): http, host, path = httplib.urlsplit(file_url)[:3] if http == 'https': conn = httplib.HTTPSConnection(host) else: conn = httplib.HTTPConnection(host) conn.request("GET", path) response = conn.getresponse() return response.read().strip() else: return open(file_url).read().strip()
def APIRequest(method, url, data=None, encode=default_encoding, decode=default_encoding, content_type=None, expected_type=None, get_response=False): if encode == "json": data = json.dumps(data) elif encode == "xml": data = pyxml.dumps(data) if content_type == None: content_type = deduce_content_type(encode) surl = httplib.urlsplit(url) if encode and not url.endswith("." + encode): url = surl.path + "." + encode else: url = surl.path if surl.query: url += "?" + surl.query print >>sys.stderr, method, surl.geturl().replace(surl.path, url) conn = httplib.HTTPConnection(surl.hostname, surl.port) conn.request(method, url, body=data, headers={"Content-Type":content_type}) r = conn.getresponse() if expected_type == None: expected_type = deduce_content_type(decode) # TODO: enable this test once it is suported. # assert expected_type in r.getheader("Content-Type"), "received %s instead of %s" % ( # r.getheader("Content-Type"), expected_type) recv = r.read() try: if decode == "json": recv = json.loads(recv) elif decode == "xml": recv = pyxml.loads(recv) except: pass print >>sys.stderr, r.status, r.reason assert 200 <= r.status < 300, recv return (recv, r) if get_response else recv
def http_post(url, data='', data_type = 'x-www-form-urlencoded',user_agent='', timeout_param=5,timeout=5,headers=[],cookie=''): res = None urlInfo = httplib.urlsplit(url) uri = ( '%s?%s' % (urlInfo.path,urlInfo.query) ) if urlInfo.query else urlInfo.path if url.find('https://')>-1: conn = httplib.HTTPSConnection(urlInfo.netloc,timeout=timeout or timeout_param) else: conn = httplib.HTTPConnection(urlInfo.netloc,timeout=timeout or timeout_param) try: conn.connect() if data: if isinstance(data,unicode): data = data.encode('utf-8') conn.putrequest("POST", uri) else: conn.putrequest("GET", uri) for k,v in headers: conn.putheader(k, v) if cookie : if isinstance(cookie,dict): cookie = ' '.join(['%s=%s;' % (urllib2.quote(str(k)),urllib2.quote(str(v))) for k,v in cookie.iteritems()]) conn.putheader("Cookie", cookie) conn.putheader("Content-Length", len(data)) conn.putheader("Content-Type", "application/%s" % data_type) if user_agent!='': conn.putheader("User-Agent",user_agent) conn.putheader("Connection", "close") conn.endheaders() if data: conn.send(data) response = conn.getresponse() if response: res = response.read() response.close() return res except socket.timeout: raise HttpTimeOut('Connect %s time out' % url) except Exception, ex: raise ex
def __init__(self, url, autoflush=False): '''Instantiates a new `PostTarget`. :Parameters: url : string A URL with the address to post to. autoflush : bool [default is ``False``] ``True`` to post every string written; ``False`` to only post when `flush` is called. ''' self._url = url self._connection = HTTPConnection(urlsplit(url)[1], timeout=10) self._autoflush = autoflush self._body = ''
def socket_post(url, data='', timeout_param=5, timeout=5, bufsize=8192): # { # "req_type": 5000, # "content": [0], # "player_ids": [] # } conn = res = None try: urlInfo = httplib.urlsplit(url) host, port = urlInfo.netloc.split(':') port = int(port) conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM) conn.settimeout(timeout or timeout_param) conn.connect((host, port)) # qs = urlparse.parse_qs(data) # 由于未知原因(有可能是长度限制),导致长度长的时候后面的内容被切割,所以换方法 qs = {} for one in data.split('&'): lis = one.split('=') qs[lis[0]] = eval(lis[1]) data = {} data['player_ids'] = qs.get('player_id', []) # data['req_type'] = int(qs.get('req_type', [0])[0]) # data['content'] = json.loads(qs.get('content')[0]) data['req_type'] = qs.get('req_type', [0]) data['content'] = qs.get('content') data = json.dumps(data) data = data + '\n' print '==> send data: ', repr(data) conn.send(data) res = "" for i in range(50): if res and res[-1] == "\n": break _res = conn.recv(bufsize) res += _res return res except socket.timeout: raise SocketTimeOut('Connect %s time out' % url) except Exception, ex: print ex raise ex
def savefile(url): fpath = httplib.urlsplit(url)[2] fname = fpath.rsplit("/", 1)[1] spath = "files" if fname.find(".") != -1: ext = fname.rsplit(".", 1)[1] if ext.lower() in ["gif", "jpg", "jpeg", "png", "bmp", "tiff", "svg"]: spath = "images" spath += os.path.sep + fname try: opener.retrieve(url, filename=OUTPUT_DIR + spath) except IOError, err: fdlog = open("logfile.txt", "a+") fdlog.write("Fetching " + url + " raised\n") fdlog.write(err.__str__()) fdlog.write("\n") fdlog.close()
def dotransform(request, response): domain = request.value exclude = set() for i in range(0, config['bingsubdomains/maxrecursion']): q = ' '.join(['site:%s' % domain] + map(lambda x: '-site:%s' % x, exclude)) results = searchweb(q) for r in results: domains = [ urlsplit(d).netloc for d in findall('<web:Url>(.+?)</web:Url>', r) ] for d in domains: if d not in exclude and d != domain: exclude.add(d) response += DNSName(d) return response
def _notify(self, this, host, proto='http'): if proto == 'http': conn = httplib.HTTPConnection(host.url) elif proto == 'https': conn = httplib.HTTPSConnection(host.url) params = urllib.urlencode({'id': this.id}) conn.request("POST", reverse('sync'), params) response = conn.getresponse() if response.status == httplib.MOVED_PERMANENTLY: host_val = httplib.urlsplit(response.getheader('location')) try: host = self.get(url=host_val.netloc) except ObjectDoesNotExist: logger.warning("Host %s not registered" % host_val.netloc) return False return self._notify(this, host, proto=host_val.scheme) return True