def open_http(url, data=None): """Use HTTP protocol.""" user_passwd = None proxy_passwd= None if isinstance(url, str): host, selector = splithost(url) if host: user_passwd, host = splituser(host) host = urllib.parse.unquote(host) realhost = host else: host, selector = url # check whether the proxy contains authorization information proxy_passwd, host = splituser(host) # now we proceed with the url we want to obtain urltype, rest = urllib.parse.splittype(selector) url = rest user_passwd = None if urltype.lower() != 'http': realhost = None else: realhost, rest = splithost(rest) if realhost: user_passwd, realhost = splituser(realhost) if user_passwd: selector = "%s://%s%s" % (urltype, realhost, rest) if urllib.proxy_bypass(realhost): host = realhost #print "proxy via http:", host, selector if not host: raise IOError('http error', 'no host given') if proxy_passwd: import base64 proxy_auth = base64.b64encode(proxy_passwd).strip() else: proxy_auth = None if user_passwd: import base64 auth = base64.b64encode(user_passwd).strip() else: auth = None c = FakeHTTPConnection(host) if data is not None: c.putrequest('POST', selector) c.putheader('Content-Type', 'application/x-www-form-urlencoded') c.putheader('Content-Length', '%d' % len(data)) else: c.putrequest('GET', selector) if proxy_auth: c.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth) if auth: c.putheader('Authorization', 'Basic %s' % auth) if realhost: c.putheader('Host', realhost) for args in URLopener().addheaders: c.putheader(*args) c.endheaders() return c
def post(self, query): i = self.institution logging.debug('posting data to %s' % i.url) logging.debug('---- request ----') logging.debug(query) garbage, path = splittype(i.url) host, selector = splithost(path) h = HTTPSConnection(host, timeout=60) # Discover requires a particular ordering of headers, so send the # request step by step. h.putrequest('POST', selector, skip_host=True, skip_accept_encoding=True) h.putheader('Content-Type', 'application/x-ofx') h.putheader('Host', host) h.putheader('Content-Length', len(query)) h.putheader('Connection', 'Keep-Alive') if self.accept: h.putheader('Accept', self.accept) if self.user_agent: h.putheader('User-Agent', self.user_agent) h.endheaders(query.encode()) res = h.getresponse() response = res.read().decode('ascii', 'ignore') logging.debug('---- response ----') logging.debug(res.__dict__) logging.debug(response) res.close() return response
def here(modal, string): ban_words = ['here', 'click', 'Here', 'Click', 'CLICK', 'HERE'] here_num = 0 ban_flag = 0 stand_host = modal stand_host_1 = '' urls = re.findall(r'<[Aa].*?href=.*?</[Aa]>', string, re.S) for url in urls: for word in ban_words: if word in url: #如果找到click、here敏感词 ban_flag = 1 break if ban_flag == 1: http_url = re.findall( r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', str(url)) if len(http_url) > 0: first_url = http_url[0] proto, rest = splittype(first_url) host, rest = splithost(rest) host, port = splitport(host) stand_host = host # print(host) if host in modal: #如果域名相同->0 pass else: stand_host_1 = host here_num = 1 #域名不同->1 if stand_host_1: stand_host = stand_host_1 return str(here_num), str(stand_host)
def __init__(self, username=None, password=None, serverurl=None): xmlrpclib.Transport.__init__(self) self.username = username self.password = password self.verbose = False self.serverurl = serverurl if serverurl.startswith('http://'): type, uri = urllib.splittype(serverurl) host, path = urllib.splithost(uri) host, port = urllib.splitport(host) if port is None: port = 80 else: port = int(port) def get_connection(host=host, port=port): return httplib.HTTPConnection(host, port) self._get_connection = get_connection elif serverurl.startswith('unix://'): def get_connection(serverurl=serverurl): # we use 'localhost' here because domain names must be # < 64 chars (or we'd use the serverurl filename) conn = UnixStreamHTTPConnection('localhost') conn.socketfile = serverurl[7:] return conn self._get_connection = get_connection else: raise ValueError('Unknown protocol for serverurl %s' % serverurl)
def work(self, task): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: pro_, rest = splittype(task[0]) host, rest = splithost(rest) host, port = splitport(host) task.append(rest) task.append(host) sock.setblocking(0) sock.connect_ex((host, int(port) if port else 80)) def timeout_cb(): if not sock._closed: KBEngine.deregisterWriteFileDescriptor(sock.fileno()) sock.close() if task and task[2]: task[2](None) self._write_timer[sock.fileno()] = self.add_timer( REQUEST_TIMEOUT, timeout_cb) KBEngine.registerWriteFileDescriptor( sock.fileno(), Functor(self.onSend, task, sock)) except: self._tasks.append(task) self.logsError() if not sock._closed: sock.close()
def __init__(self, server_url, proxy=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, ca_cert=None, client_cert=None, client_cert_pass=None): """ initialize the transport class """ xmlrpclib.Transport.__init__(self) client_cert = client_cert or (None, None) self.disable_ssl_validation = False self.scheme = urllib.splittype(server_url)[0] self.https = (self.scheme == 'https') self.proxy = None self.timeout = timeout self._certfile, self._keyfile = client_cert self.ca_cert = ca_cert self.client_cert_pass = client_cert_pass # pull system proxy if no proxy is forced if not proxy: if self.https: proxy = os.environ.get('https_proxy', None) else: proxy = os.environ.get('http_proxy', None) if proxy: scheme, proxy_url = urllib.splittype(proxy) self.proxy = urllib.splithost(proxy_url)[0] # re-check if we need to support https self.https = (scheme == 'https')
def open_local_file(self, url): """Use local file.""" import mimetypes, mimetools, email.Utils try: from io import StringIO except ImportError: from io import StringIO host, file = splithost(url) localname = url2pathname(file) try: stats = os.stat(localname) except OSError as e: raise IOError(e.errno, e.strerror, e.filename) size = stats.st_size modified = email.Utils.formatdate(stats.st_mtime, usegmt=True) mtype = mimetypes.guess_type(url)[0] headers = mimetools.Message( StringIO( 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % (mtype or 'text/plain', size, modified))) if not host: urlfile = file if file[:1] == '/': urlfile = 'file://' + file return addinfourl(open(localname, 'rb'), headers, urlfile) host, port = splitport(host) if not port \ and socket.gethostbyname(host) in (localhost(), thishost()): urlfile = file if file[:1] == '/': urlfile = 'file://' + file return addinfourl(open(localname, 'rb'), headers, urlfile) raise IOError('local file error', 'not on local host')
def get_info_by_url(url): protocol, rest = parse.splittype(url) host, path = parse.splithost(rest) host, port = parse.splitport(host) if port is None: port = '80' return protocol, host, path, port
def handleData(self, response): s = response.text chapters_dict = dict() chapters = [] queue_out = Queue() for field in chapter_fields: if self.re_rule.get(field): chapters_dict[field] = getRe(s, self.re_rule[field]) elif self.xpath_rule.get(field): chapters_dict[field] = self.getXpath(s, self.xpath_rule[field]) urls = chapters_dict['url'] if urls[0] != '' and urls[0][0] == '/' and urls[0][1] != '/': menu_url = WEB_SETTINGS[self.web]['menu'].format('') proto, rest = splittype(menu_url) host, rest = splithost(rest) chapters_dict['url'] = [ proto + '://' + host + url[:] for url in urls ] print(chapters_dict) # self.req().createChapter(self.web, chapters_dict['url'], queue_out) for i in range(len(chapters_dict['url'])): chapter = Chapter() chapter.url = chapters_dict['url'][i].replace(' ', '') chapter.title = chapters_dict['title'][i].replace(' ', '') chapter.content = '该章节下载失败' chapters.append(chapter) # self.dContent(chapters, queue_out) # Ui_MainWindow.tabWidget.get_ResultWidget('空').label.setText('啊哈哈') return chapters
def find_modal(list): domain_list = [] for url in list: http_url = re.findall( r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', str(url)) if len(http_url) > 0: first_url = http_url[0] proto, rest = splittype(first_url) host, rest = splithost(rest) host, port = splitport(host) domain_list.append(host) # print(host) else: host_ip_num = 0 word_counts = collections.Counter(domain_list) # 出现频率最高的3个单词 top_one = word_counts.most_common(1) if len(top_one) > 0: modal = top_one[0][0] # print(modal) else: modal = '-' return modal
def retry_proxy_https_basic_auth(self, url, realm, data=None): host, selector = splithost(url) newurl = 'https://' + host + selector proxy = self.proxies['https'] urltype, proxyhost = splittype(proxy) proxyhost, proxyselector = splithost(proxyhost) i = proxyhost.find('@') + 1 proxyhost = proxyhost[i:] user, passwd = self.get_user_passwd(proxyhost, realm, i) if not (user or passwd): return None proxyhost = quote(user, safe='') + ':' + quote( passwd, safe='') + '@' + proxyhost self.proxies['https'] = 'https://' + proxyhost + proxyselector if data is None: return self.open(newurl) else: return self.open(newurl, data)
def __init__(self, url, headers=None): self.url = url self.headers = headers self.origin_req_host = cookielib.request_host(self) self.type, r = splittype(url) self.host, r = splithost(r) if self.host: self.host = unquote(self.host)
def _do_post(self, query, extra_headers=[]): """ Do a POST to the Institution. :param query: Body content to POST (OFX Query) :type query: str :param extra_headers: Extra headers to send with the request, as a list of (Name, Value) header 2-tuples. :type extra_headers: list :return: 2-tuple of (HTTPResponse, str response body) :rtype: tuple """ i = self.institution logging.debug('posting data to %s' % i.url) garbage, path = splittype(i.url) host, selector = splithost(path) try: h = HTTPSConnection(host, timeout=60) h.connect() except ssl.SSLError as ex: if (ex.reason == "UNSUPPORTED_PROTOCOL"): h = HTTPSConnection(host, timeout=60, context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) h.connect() else: raise # Discover requires a particular ordering of headers, so send the # request step by step. h.putrequest('POST', selector, skip_host=True, skip_accept_encoding=True) headers = [('Content-Type', 'application/x-ofx'), ('Host', host), ('Content-Length', len(query)), ('Connection', 'Keep-Alive')] if self.accept: headers.append(('Accept', self.accept)) if self.user_agent: headers.append(('User-Agent', self.user_agent)) for ehname, ehval in extra_headers: headers.append((ehname, ehval)) logging.debug('---- request headers ----') for hname, hval in headers: logging.debug('%s: %s', hname, hval) h.putheader(hname, hval) logging.debug('---- request body (query) ----') logging.debug(query) h.endheaders(query.encode()) res = h.getresponse() response = res.read().decode('ascii', 'ignore') logging.debug('---- response ----') logging.debug(res.__dict__) logging.debug('Headers: %s', res.getheaders()) logging.debug(response) res.close() return res, response
def get_parser_from_url(url): global PARSER protocol, s1 = splittype(url) host, path = splithost(s1) for i, j in PARSER.items(): if i in host: return j return None
def _do_post(self, query, extra_headers=[]): """ Do a POST to the Institution. :param query: Body content to POST (OFX Query) :type query: str :param extra_headers: Extra headers to send with the request, as a list of (Name, Value) header 2-tuples. :type extra_headers: list :return: 2-tuple of (HTTPResponse, str response body) :rtype: tuple """ i = self.institution logging.debug('posting data to %s' % i.url) garbage, path = splittype(i.url) host, selector = splithost(path) try: h = HTTPSConnection(host, timeout=60) h.connect() except ssl.SSLError as ex: if (ex.reason == "UNSUPPORTED_PROTOCOL"): h = HTTPSConnection(host, timeout=60, context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) h.connect() else: raise # Discover requires a particular ordering of headers, so send the # request step by step. h.putrequest('POST', selector, skip_host=True, skip_accept_encoding=True) headers = [ ('Content-Type', 'application/x-ofx'), ('Host', host), ('Content-Length', len(query)), ('Connection', 'Keep-Alive') ] if self.accept: headers.append(('Accept', self.accept)) if self.user_agent: headers.append(('User-Agent', self.user_agent)) for ehname, ehval in extra_headers: headers.append((ehname, ehval)) logging.debug('---- request headers ----') for hname, hval in headers: logging.debug('%s: %s', hname, hval) h.putheader(hname, hval) logging.debug('---- request body (query) ----') logging.debug(query) h.endheaders(query.encode()) res = h.getresponse() response = res.read().decode('ascii', 'ignore') logging.debug('---- response ----') logging.debug(res.__dict__) logging.debug('Headers: %s', res.getheaders()) logging.debug(response) res.close() return res, response
def _fix_url(self, url): if url != "": if "http" not in url: t, other = parse.splittype(self.pre_url) host, path = parse.splithost(other) if url[0] != "/": url = t + "://" + host + "/" + url else: url = t + "://" + host + url return url
def __init__(self, uri, transport=None, encoding=None, verbose=0, version=None, headers=None, history=None, config=jsonrpclib.config.DEFAULT, context=None, timeout=None): """ Sets up the server proxy :param uri: Request URI :param transport: Custom transport handler :param encoding: Specified encoding :param verbose: Log verbosity level :param version: JSON-RPC specification version :param headers: Custom additional headers for each request :param history: History object (for tests) :param config: A JSONRPClib Config instance :param context: The optional SSLContext to use """ # Store the configuration self._config = config self.__version = version or config.version schema, uri = splittype(uri) if schema not in ('http', 'https'): _logger.error("jsonrpclib only support http(s) URIs, not %s", schema) raise IOError('Unsupported JSON-RPC protocol.') self.__host, self.__handler = splithost(uri) if not self.__handler: # Not sure if this is in the JSON spec? self.__handler = '/' if transport is None: if schema == 'https': transport = SafeTransport(config=config, context=context, timeout=timeout) else: transport = Transport(config=config, timeout=timeout) self.__transport = transport self.__encoding = encoding self.__verbose = verbose self.__history = history # Global custom headers are injected into Transport self.__transport.push_headers(headers or {})
def domain_name(list): domain_name_list = [] for url in list: proto, rest = splittype(url) host, rest = splithost(rest) host, port = splitport(host) if host not in domain_name_list: domain_name_list.append(host) else: pass return len(domain_name_list)
def parsePage(spider, url, response): html = response.content selector = etree.HTML(html) html = html.decode('utf-8') propertys = json.loads(spider.args['PagePropertyRegularExpression']) for key in propertys: item = propertys[key] if item.startswith('$'): p1 = r'%s' % item[1:] pattern = re.compile(p1) match = pattern.search(html) if match: propertys[key] = match.group(1) #对文章的内容进行特殊处理,提取图片 if key == 'content_raw': contentselector = etree.HTML(propertys[key]) etree.strip_elements(contentselector, 'script') etree.strip_tags(contentselector, 'a') propertys[key] = etree.tostring(contentselector).decode( 'utf-8') if spider.args['DownLoadImg'] == 1: for imgsrc in contentselector.xpath("//img/@src"): if imgsrc is not None and len(imgsrc) > 0: cache.rpush('link-img', imgsrc) proto, rest = splittype(imgsrc) res, rest = splithost(rest) propertys[key] = propertys[key].replace( imgsrc, imgsrc.replace(res, 'img.zyai.top')) logging.info('push a img link to queue %s .' % imgsrc) else: item = selector.xpath(item)[0] propertys[key] = item dataPersistenceType = spider.args['DataPersistenceType'] if dataPersistenceType == 'WPRPC': wp = Client('http://tech.cocopass.com/xmlrpc.php', 'admin', '19841204') """ 发表博文 """ post = WordPressPost() post.title = propertys['title'].encode('utf-8') post.content = propertys['content_raw'].encode('utf-8') post.post_status = 'publish' post.terms_names = {'post_tag': [post.title], 'category': ['爱好']} wp.call(NewPost(post)) logging.info('successfully post one article: %s .' % propertys['title']) elif dataPersistenceType == 'MYSQL': pass
def load(self, url): self.url = url self.protocol, s1 = urllib_parse.splittype(self.url) s2, self.path = urllib_parse.splithost(s1) self.host, self.port = urllib_parse.splitport(s2) if not self.port: if self.protocol == 'http': self.port = 80 elif self.protocol == 'https': self.port = 443
def url_split(url): """Split url in a tuple (scheme, hostname, port, document) where hostname is always lowercased. Precondition: url is syntactically correct URI (eg has no whitespace) """ scheme, netloc = parse.splittype(url) host, document = parse.splithost(netloc) port = default_ports.get(scheme, 0) if host: host = host.lower() host, port = splitport(host, port=port) return scheme, host, port, document
def retry_https_basic_auth(self, url, realm, data=None): host, selector = splithost(url) i = host.find('@') + 1 host = host[i:] user, passwd = self.get_user_passwd(host, realm, i) if not (user or passwd): return None host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host newurl = 'https://' + host + selector if data is None: return self.open(newurl) else: return self.open(newurl, data)
def getp(): path = request.args.get('path') resp = requests.get(path, headers=headers, timeout=5) proto, rest = UrlPase.splittype(resp.url) host, rest = UrlPase.splithost(rest) if host == 'm.zwdu.com' or host == 'm.biqubao.com': resp.encoding = "GBK" else: resp.encoding = "utf-8" content = resp.text return content
def __init__( self, uri, transport=None, encoding=None, verbose=0, version=None, headers=None, history=None, config=jsonrpclib.config.DEFAULT, ): """ Sets up the server proxy :param uri: Request URI :param transport: Custom transport handler :param encoding: Specified encoding :param verbose: Log verbosity level :param version: JSON-RPC specification version :param headers: Custom additional headers for each request :param history: History object (for tests) :param config: A JSONRPClib Config instance """ # Store the configuration self._config = config if not version: version = config.version self.__version = version schema, uri = splittype(uri) if schema not in ("http", "https"): raise IOError("Unsupported JSON-RPC protocol.") self.__host, self.__handler = splithost(uri) if not self.__handler: # Not sure if this is in the JSON spec? self.__handler = "/" if transport is None: if schema == "https": transport = SafeTransport(config=config) else: transport = Transport(config=config) self.__transport = transport self.__encoding = encoding self.__verbose = verbose self.__history = history # Global custom headers are injected into Transport self.__transport.push_headers(headers or {})
def load(self, url): self.url = url self.protocol, s1 = splittype(self.url) s2, self.path = splithost(s1) self.host, port = splitport(s2) self.port = int(port) if port is not None else None if not self.port: if self.protocol == 'http': self.port = 80 elif self.protocol == 'https': self.port = 443
def all_port(list): port_num = 0 for url in list: proto, rest = splittype(url) host, rest = splithost(rest) host, port = splitport(host) if str(port) == "None": pass else: port_num += 1 if port_num > 0: return 1 else: return 0
def __setattr__(self, key, value): object.__setattr__(self, key, value) if key == 'url': self.protocol, s1 = urllib_parse.splittype(self.url) if s1: s2, self.path = urllib_parse.splithost(s1) if s2: self.host, self.port = urllib_parse.splitport(s2) if not getattr(self, 'port', None): if self.protocol == 'http': self.port = 80 elif self.protocol == 'https': self.port = 443
def fetchurl(url, query=None): if query is not None: assert '?' not in url, ("Either include query in url" "or pass as parameter, but not both") url += '?' + urlencode(query) proto, tail = splittype(url) if proto != 'http': raise RuntimeError("Unsupported protocol HTTP") host, tail = splithost(tail) cli = HTTPClient(host) resp = cli.request(tail, headers={'Host': host}) if resp.status.endswith('200 OK'): return resp.body raise RequestError(resp.status, resp)
def __setattr__(self, key, value): object.__setattr__(self, key, value) if key == 'url': self.protocol, s1 = splittype(self.url) if s1: s2, self.path = splithost(s1) if s2: self.host, port = splitport(s2) self.port = int(port) if port is not None else None if not getattr(self, 'port', None): if self.protocol == 'http': self.port = 80 elif self.protocol == 'https': self.port = 443
def process_request(self, headers, web, url): for key, value in base_heard.items(): headers[key] = value headers['User-Agent'] = self.ua.random proto, rest = splittype(url) host, rest = splithost(rest) # host = WEB_SETTINGS[web]['host'] if host[-1] == '/': host = host.split('/')[-2] else: host = host.split('/')[-1] headers['Host'] = host headers['Referer'] = WEB_SETTINGS[web]['search'].format('') headers['Upgrade-Insecure-Requests'] = '1'
def fetchurl(url, query=None): if query is not None: assert '?' not in url, ("Either include query in url" "or pass as parameter, but not both") url += '?' + urlencode(query) proto, tail = splittype(url) if proto != 'http': raise RuntimeError("Unsupported protocol HTTP") host, tail = splithost(tail) ip = gethub().dns_resolver.gethostbyname(host) cli = HTTPClient(ip) resp = cli.request(tail, headers={'Host': host}) if resp.status.endswith('200 OK'): return resp.body raise RequestError(resp.status, resp)
def stat_page(self): from urllib.parse import splittype, splithost from http.client import HTTPConnection url = self.get_url() self.log(20, 'Statting page {!r} at {!r}.'.format(self.name, url)) (_, dp) = splittype(url) (host, path) = splithost(dp) conn = HTTPConnection(host) conn.request('HEAD', path) res = conn.getresponse() lmt_raw = res.getheader('last-modified') lm_dts = self._parse_http_dt(lmt_raw) return lm_dts
def __init__(self, uri, transport=None, encoding=None, verbose=False, allow_none=False, use_datetime=False): type, uri = urlparser.splittype(uri) if type not in ('scgi'): raise IOError('unsupported XML-RPC protocol') self.__host, self.__handler = urlparser.splithost(uri) if not self.__handler: self.__handler = '/' if transport is None: transport = SCGITransport(use_datetime=use_datetime) self.__transport = transport self.__encoding = encoding self.__verbose = verbose self.__allow_none = allow_none
def index(request): # print(request) url = request.GET.get('url', None) host = parse.splithost(parse.splittype(url)[1])[0] logger.info('host is {}'.format(host)) source = None for tag in Source: if tag.name in host: source = tag.value break if source is None: return HttpResponseBadRequest('Unknown url {}'.format(url)) # 更新数据库 item, create_ = SearchUrl.objects.get_or_create( request_url=url, defaults=dict(source=source)) return HttpResponse('ok')
def request(self, uri, method="GET", body='', headers=None, redirections=httplib2.DEFAULT_MAX_REDIRECTS, connection_type=None): DEFAULT_POST_CONTENT_TYPE = 'application/x-www-form-urlencoded' if not isinstance(headers, dict): headers = {} if method == "POST": headers['Content-Type'] = headers.get('Content-Type', DEFAULT_POST_CONTENT_TYPE) is_form_encoded = \ headers.get('Content-Type') == 'application/x-www-form-urlencoded' if is_form_encoded and body: parameters = parse_qs(body) else: parameters = None req = Request.from_consumer_and_token(self.consumer, token=self.token, http_method=method, http_url=uri, parameters=parameters, body=body, is_form_encoded=is_form_encoded) req.sign_request(self.method, self.consumer, self.token) schema, rest = splittype(uri) if rest.startswith('//'): hierpart = '//' else: hierpart = '' host, rest = splithost(rest) realm = schema + ':' + hierpart + host if is_form_encoded: body = req.to_postdata() elif method == "GET": uri = req.to_url() else: headers.update(req.to_header(realm=realm)) return httplib2.Http.request(self, uri, method=method, body=body, headers=headers, redirections=redirections, connection_type=connection_type)
def __init__(self, uri, transport=None, encoding=None, verbose=0, version=None, headers=None, history=None, config=jsonrpclib.config.DEFAULT, context=None): """ Sets up the server proxy :param uri: Request URI :param transport: Custom transport handler :param encoding: Specified encoding :param verbose: Log verbosity level :param version: JSON-RPC specification version :param headers: Custom additional headers for each request :param history: History object (for tests) :param config: A JSONRPClib Config instance :param context: The optional SSLContext to use """ # Store the configuration self._config = config self.__version = version or config.version schema, uri = splittype(uri) if schema not in ('http', 'https'): _logger.error("jsonrpclib only support http(s) URIs, not %s", schema) raise IOError('Unsupported JSON-RPC protocol.') self.__host, self.__handler = splithost(uri) if not self.__handler: # Not sure if this is in the JSON spec? self.__handler = '/' if transport is None: if schema == 'https': transport = SafeTransport(config=config, context=context) else: transport = Transport(config=config) self.__transport = transport self.__encoding = encoding self.__verbose = verbose self.__history = history # Global custom headers are injected into Transport self.__transport.push_headers(headers or {})
def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): #putrequest is called before connect, so can interpret url and get #real host/port to be used to make CONNECT request to proxy proto, rest = splittype(url) if proto is None: raise ValueError("unknown URL type: %s" % url) #get host host, rest = splithost(rest) #try to get port host, port = splitport(host) #if port is not defined try to get from proto if port is None: try: port = self._ports[proto] except KeyError: raise ValueError("unknown protocol for: %s" % url) self._real_host = host self._real_port = int(port) M2Crypto.httpslib.HTTPSConnection.putrequest(self, method, url, skip_host, skip_accept_encoding)
def proxy_open(self, req, proxy, type): orig_type = req.get_type() type, r_type = splittype(proxy) host, XXX = splithost(r_type) if '@' in host: user_pass, host = host.split('@', 1) user_pass = base64.encodestring(unquote(user_pass)).strip() req.add_header('Proxy-Authorization', 'Basic '+user_pass) host = unquote(host) req.set_proxy(host, type) if orig_type == type: # let other handlers take care of it # XXX this only makes sense if the proxy is before the # other handlers return None else: # need to start over, because the other handlers don't # grok the proxy's URL type return self.parent.open(req)
def __init__(self, uri, transport=None, encoding=None, verbose=None, allow_none=0): utype, uri = splittype(uri) if utype not in ("http", "https"): raise IOError("Unsupported JSONRPC protocol") self.__host, self.__handler = splithost(uri) if not self.__handler: self.__handler = "/RPC2" if transport is None: if utype == "https": transport = SafeTransport() else: transport = Transport() self.__transport = transport self.__encoding = encoding self.__verbose = verbose self.__allow_none = allow_none
def post(self, query): i = self.institution logging.debug('posting data to %s' % i.url) logging.debug('---- request ----') logging.debug(query) garbage, path = splittype(i.url) host, selector = splithost(path) h = HTTPSConnection(host) h.request('POST', selector, query, { "Content-type": "application/x-ofx", "Accept": "*/*, application/x-ofx" }) res = h.getresponse() response = res.read().decode('ascii', 'ignore') logging.debug('---- response ----') logging.debug(res.__dict__) logging.debug(response) res.close() return response
def __init__(self, uri, basepath=None): self.basepath = basepath self.mimetype = None self.file = None self.data = None self.uri = None self.local = None self.tmp_file = None uri = uri or str() if type(uri) != str: uri = uri.decode("utf-8") log.debug("FileObject %r, Basepath: %r", uri, basepath) # Data URI if uri.startswith("data:"): m = _rx_datauri.match(uri) self.mimetype = m.group("mime") self.data = base64.b64decode(m.group("data").encode("utf-8")) else: # Check if we have an external scheme if basepath and not urlparse.urlparse(uri).scheme: urlParts = urlparse.urlparse(basepath) else: urlParts = urlparse.urlparse(uri) log.debug("URLParts: {}".format((urlParts, urlParts.scheme))) if urlParts.scheme == 'file': if basepath and uri.startswith('/'): uri = urlparse.urljoin(basepath, uri[1:]) urlResponse = urllib2.urlopen(uri) self.mimetype = urlResponse.info().get( "Content-Type", '').split(";")[0] self.uri = urlResponse.geturl() self.file = urlResponse # Drive letters have len==1 but we are looking # for things like http: elif urlParts.scheme in ('http', 'https'): log.debug("Sending request for {} with httplib".format(uri)) # External data if basepath: uri = urlparse.urljoin(basepath, uri) log.debug("Uri parsed: {}".format(uri)) #path = urlparse.urlsplit(url)[2] #mimetype = getMimeType(path) # Using HTTPLIB server, path = urlparse.splithost(uri[uri.find("//"):]) if uri.startswith("https://"): conn = httplib.HTTPSConnection(server) else: conn = httplib.HTTPConnection(server) conn.request("GET", path) r1 = conn.getresponse() # log.debug("HTTP %r %r %r %r", server, path, uri, r1) if (r1.status, r1.reason) == (200, "OK"): self.mimetype = r1.getheader( "Content-Type", '').split(";")[0] self.uri = uri log.debug("here") if r1.getheader("content-encoding") == "gzip": import gzip self.file = gzip.GzipFile( mode="rb", fileobj=six.StringIO(r1.read())) else: self.file = r1 else: log.debug("Received non-200 status: {}".format((r1.status, r1.reason))) try: urlResponse = urllib2.urlopen(uri) except urllib2.HTTPError as e: log.error("Could not process uri: {}".format(e)) return self.mimetype = urlResponse.info().get( "Content-Type", '').split(";")[0] self.uri = urlResponse.geturl() self.file = urlResponse else: log.debug("Unrecognized scheme, assuming local file path") # Local data if basepath: uri = os.path.normpath(os.path.join(basepath, uri)) if os.path.isfile(uri): self.uri = uri self.local = uri self.setMimeTypeByName(uri) if self.mimetype.startswith('text'): self.file = open(uri, "r") #removed bytes... lets hope it goes ok :/ else: self.file = open(uri, "rb") #removed bytes... lets hope it goes ok :/
def get_host(self): if self.host is None: self.host, self.__r_host = splithost(self.__r_type) if self.host: self.host = unquote(self.host) return self.host
def __init__(self, uri, transport=None, encoding=None, verbose=0, version=None, headers=None, history=None, config=jsonrpclib.config.DEFAULT, context=None): """ Sets up the server proxy :param uri: Request URI :param transport: Custom transport handler :param encoding: Specified encoding :param verbose: Log verbosity level :param version: JSON-RPC specification version :param headers: Custom additional headers for each request :param history: History object (for tests) :param config: A JSONRPClib Config instance :param context: The optional SSLContext to use """ # Store the configuration self._config = config self.__version = version or config.version schema, uri = splittype(uri) use_unix = False if schema.startswith("unix+"): schema = schema[len("unix+"):] use_unix = True if schema not in ('http', 'https'): _logger.error("jsonrpclib only support http(s) URIs, not %s", schema) raise IOError('Unsupported JSON-RPC protocol.') self.__host, self.__handler = splithost(uri) if use_unix: unix_path = self.__handler self.__handler = '/' elif not self.__handler: # Not sure if this is in the JSON spec? self.__handler = '/' if transport is None: if use_unix: if schema == "http": # In Unix mode, we use the path part of the URL (handler) # as the path to the socket file transport = UnixTransport( config=config, path=unix_path ) elif schema == 'https': transport = SafeTransport(config=config, context=context) else: transport = Transport(config=config) if transport is None: raise IOError( "Unhandled combination: UNIX={}, protocol={}" .format(use_unix, schema) ) self.__transport = transport self.__encoding = encoding self.__verbose = verbose self.__history = history # Global custom headers are injected into Transport self.__transport.push_headers(headers or {})