def __init__(self, server_url, proxy=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, ca_cert=None, client_cert=None, client_cert_pass=None): """ initialize the transport class """ xmlrpclib.Transport.__init__(self) client_cert = client_cert or (None, None) self.disable_ssl_validation = False self.scheme = urllib.splittype(server_url)[0] self.https = (self.scheme == 'https') self.proxy = None self.timeout = timeout self._certfile, self._keyfile = client_cert self.ca_cert = ca_cert self.client_cert_pass = client_cert_pass # pull system proxy if no proxy is forced if not proxy: if self.https: proxy = os.environ.get('https_proxy', None) else: proxy = os.environ.get('http_proxy', None) if proxy: scheme, proxy_url = urllib.splittype(proxy) self.proxy = urllib.splithost(proxy_url)[0] # re-check if we need to support https self.https = (scheme == 'https')
def start(self, ssh, url, port=16789, rpcport=6789, cmd=None, mpcactor=None, vcactor=None) -> bool: """ 启动一个节点; :param ssh:ssh连接对象 :param url: 配置文件中的url :param port: p2p端口 :param rpcport: rpc端口或ws端口 :param cmd: 启动命令 :param mpcactor: mpc账户,不需要0x :return: bool """ if not cmd: base_ws = '--identity "platon" --verbosity 4 --debug --ws --wsorigins "*" --txpool.nolocals --wsapi "db,eth,net,web3,miner,admin,personal" --wsaddr 0.0.0.0' base_http = '--identity "platon" --verbosity 4 --debug --rpc --txpool.nolocals --rpcapi "db,eth,net,web3,miner,admin,personal" --rpcaddr 0.0.0.0' base_ws += ' --syncmode "{}"'.format(self.syncmode) base_http += ' --syncmode "{}"'.format(self.syncmode) if self.net_type: base_http = base_http + " --" + self.net_type base_ws = base_ws + " --" + self.net_type if parse.splittype(url)[0] == "ws": if mpcactor: cmd = '''nohup {}/node-{}/platon {} --datadir {}/node-{}/data --port {} --wsport {} --mpc --mpc.actor {} > {}/node-{}/nohup.out 2>&1 &'''.format( self.deploy_path, port, base_ws, self.deploy_path, port, port, rpcport, "0x" + str(mpcactor), self.deploy_path, port) elif vcactor: cmd = '''nohup {}/node-{}/platon {} --datadir {}/node-{}/data --port {} --wsport {} --vc --vc.actor {} --vc.password 88888888 > {}/node-{}/nohup.out 2>&1 &'''.format( self.deploy_path, port, base_ws, self.deploy_path, port, port, rpcport, "0x" + str(vcactor), self.deploy_path, port) else: cmd = '''nohup {}/node-{}/platon {} --datadir {}/node-{}/data --port {} --wsport {} > {}/node-{}/nohup.out 2>&1 &'''.format( self.deploy_path, port, base_ws, self.deploy_path, port, port, rpcport, self.deploy_path, port) elif parse.splittype(url)[0] == "http": if mpcactor: cmd = '''nohup {}/node-{}/platon {} --datadir {}/node-{}/data --port {} --rpcport {} --mpc --mpc.actor {} > {}/node-{}/nohup.out 2>&1 &'''.format( self.deploy_path, port, base_http, self.deploy_path, port, port, rpcport, "0x" + str(mpcactor), self.deploy_path, port) elif vcactor: cmd = '''nohup {}/node-{}/platon {} --datadir {}/node-{}/data --port {} --rpcport {} --vc --vc.actor {} --vc.password 88888888 > {}/node-{}/nohup.out 2>&1 &'''.format( self.deploy_path, port, base_http, self.deploy_path, port, port, rpcport, "0x" + str(vcactor), self.deploy_path, port) else: cmd = '''nohup {}/node-{}/platon {} --datadir {}/node-{}/data --port {} --rpcport {} > {}/node-{}/nohup.out 2>&1 &'''.format( self.deploy_path, port, base_http, self.deploy_path, port, port, rpcport, self.deploy_path, port) else: raise Exception("url连接类型不正确") self.run_ssh(ssh, cmd) result = self.run_ssh( ssh, "ps -ef|grep platon|grep %s|grep -v grep|awk {'print $2'}" % str( rpcport)) if not result: return False else: return True
def post(self, query): i = self.institution logging.debug('posting data to %s' % i.url) logging.debug('---- request ----') logging.debug(query) garbage, path = splittype(i.url) host, selector = splithost(path) h = HTTPSConnection(host, timeout=60) # Discover requires a particular ordering of headers, so send the # request step by step. h.putrequest('POST', selector, skip_host=True, skip_accept_encoding=True) h.putheader('Content-Type', 'application/x-ofx') h.putheader('Host', host) h.putheader('Content-Length', len(query)) h.putheader('Connection', 'Keep-Alive') if self.accept: h.putheader('Accept', self.accept) if self.user_agent: h.putheader('User-Agent', self.user_agent) h.endheaders(query.encode()) res = h.getresponse() response = res.read().decode('ascii', 'ignore') logging.debug('---- response ----') logging.debug(res.__dict__) logging.debug(response) res.close() return response
def here(modal, string): ban_words = ['here', 'click', 'Here', 'Click', 'CLICK', 'HERE'] here_num = 0 ban_flag = 0 stand_host = modal stand_host_1 = '' urls = re.findall(r'<[Aa].*?href=.*?</[Aa]>', string, re.S) for url in urls: for word in ban_words: if word in url: #如果找到click、here敏感词 ban_flag = 1 break if ban_flag == 1: http_url = re.findall( r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', str(url)) if len(http_url) > 0: first_url = http_url[0] proto, rest = splittype(first_url) host, rest = splithost(rest) host, port = splitport(host) stand_host = host # print(host) if host in modal: #如果域名相同->0 pass else: stand_host_1 = host here_num = 1 #域名不同->1 if stand_host_1: stand_host = stand_host_1 return str(here_num), str(stand_host)
def load_timestream(file_path): """Load a time stream from either a text file, HDF5 file, or URL The argument "file_path" can be one of the following: 1. A path to a text file; 2. A path to an HDF5 file; 3. An URL pointing to the JSON record of a test; 4. An URL pointing to an HDF5 file. Return a pair consisting of a dictionary containing the medatada and a Timestream object.""" if isinstance(file_path, Path) or (not urlparse.splittype(file_path)[0]): # Local path ext = os.path.splitext(file_path)[1] if ext.lower() == ".txt": return None, load_text_file(file_path) else: return load_hdf5_file(file_path) else: # URL url = file_path req = urlreq.urlopen(url) content_type = req.info().get_content_type() save_to_cache = False result = None default_hdf5_file_name # We are *forced* to create a named temporary file and close it # before reading, because h5py does not support reading from # file-like objects like BytesIO or an already opened TemporaryFile with NamedTemporaryFile(suffix="h5", delete=False) as h5_file: h5_file_name = h5_file.name if content_type == "application/json": metadata = json.loads(req.read().decode("utf-8")) file_path = default_hdf5_file_name(metadata) if file_path.is_file(): log.info(f'Test {url} found in cache "{file_path}"') _, result = load_timestream(file_path) else: save_to_cache = True download_test(url, metadata, h5_file) elif content_type == "application/hdf5": copyfileobj(req, h5_file) else: raise ValueError('unknown content type: "{0}"'.format(content_type)) if result is None: result = load_hdf5_file(h5_file_name)[1] if save_to_cache: log.info(f'Copy file downloaded from {url} to cache "{file_path}"') copyfile(src=h5_file_name, dst=file_path) os.remove(h5_file_name) return metadata, result
def work(self, task): sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: pro_, rest = splittype(task[0]) host, rest = splithost(rest) host, port = splitport(host) task.append(rest) task.append(host) sock.setblocking(0) sock.connect_ex((host, int(port) if port else 80)) def timeout_cb(): if not sock._closed: KBEngine.deregisterWriteFileDescriptor(sock.fileno()) sock.close() if task and task[2]: task[2](None) self._write_timer[sock.fileno()] = self.add_timer( REQUEST_TIMEOUT, timeout_cb) KBEngine.registerWriteFileDescriptor( sock.fileno(), Functor(self.onSend, task, sock)) except: self._tasks.append(task) self.logsError() if not sock._closed: sock.close()
def __init__(self, uri, username=None, password=None, verify=False, sp=None, sp_kwargs=None): self.uri = uri # : From X{__init__(self, url)} self.username = username self.password = password self.schema = urlparser.splittype(uri)[0] if sp: self.sp = sp elif self.schema in ['http', 'https']: self.sp = HTTPServerProxy elif self.schema == 'scgi': self.sp = SCGIServerProxy else: raise NotImplementedError() self.sp_kwargs = sp_kwargs or {} self.torrents = [] # : List of L{Torrent} instances self._rpc_methods = [] # : List of rTorrent RPC methods self._torrent_cache = [] self._client_version_tuple = () if verify is True: self._verify_conn()
def __init__(self, username=None, password=None, serverurl=None): xmlrpclib.Transport.__init__(self) self.username = username self.password = password self.verbose = False self.serverurl = serverurl if serverurl.startswith('http://'): type, uri = urllib.splittype(serverurl) host, path = urllib.splithost(uri) host, port = urllib.splitport(host) if port is None: port = 80 else: port = int(port) def get_connection(host=host, port=port): return httplib.HTTPConnection(host, port) self._get_connection = get_connection elif serverurl.startswith('unix://'): def get_connection(serverurl=serverurl): # we use 'localhost' here because domain names must be # < 64 chars (or we'd use the serverurl filename) conn = UnixStreamHTTPConnection('localhost') conn.socketfile = serverurl[7:] return conn self._get_connection = get_connection else: raise ValueError('Unknown protocol for serverurl %s' % serverurl)
def __init__(self, timeout=ExistDB.DEFAULT_TIMEOUT, session=None, url=None, encoding='UTF-8', *args, **kwargs): # if default timeout is requested, use the global socket default if timeout is ExistDB.DEFAULT_TIMEOUT: timeout = socket.getdefaulttimeout() xmlrpc.client.Transport.__init__(self, *args, **kwargs) self.timeout = timeout # NOTE: assumues that if basic auth is needed, it is set # on the session that is passed in if session: self.session = session else: self.session = requests.Session() self.session.headers.update({ 'User-Agent': self.user_agent, 'Content-Type': 'application/xml' }) # determine whether https is needed based on the url if url is not None: self.use_https = (splittype(url)[0] == 'https') self.encoding = encoding
def _get_site_meta(self, article): """ Extract additional metadata about the article and its source, beyond what newspaper does for us by default. I think in the future it would be good to extend newspaper's content extractor with a more sophisticated one that does some of the below, and more. """ source_url = article.source_url proto, url = [p.strip('/') for p in urlparse.splittype(source_url)] name = article.meta_site_name if not name: for xpath in META_SITE_NAME_EX: name = article.extractor.get_meta_content( article.clean_doc, xpath) if name: break else: self.log.warning( f'{article.url} did not have a meta_site_name') name = tldextract.extract(source_url).domain.capitalize() favicon = article.meta_favicon if favicon: if favicon[:2] == '//': # protocol-relative URL favicon = f'{proto}:{favicon}' elif favicon[0] == '/': # relative URL to site base favicon = source_url + favicon return {'url': url, 'name': name, 'icon_url': favicon}
def download(self, url, localpath, username=None, passwd=None, overwrite=True): '''Download a url to a file or a directory, supported protocols: http, https, ftp, file @param url: URL to download from @type url: string @param localpath: filename or directory to download the url to pass - to return data @type localpath: string @param username: username for the url if it requires authentication @type username: string @param passwd: password for the url if it requires authentication @type passwd: string ''' if not url: raise ValueError('URL can not be None or empty string') if not localpath: raise ValueError('Local path to download the url to can not be None or empty string') filename = '' if localpath == '-': filename = '-' if j.sal.fs.isDir(localpath): filename = j.sal.fs.joinPaths(localpath, j.sal.fs.getBaseName(url)) else: if j.sal.fs.isDir(j.sal.fs.getDirName(localpath)): filename = localpath else: raise ValueError('Local path is an invalid path') self.logger.debug('Downloading url %s to local path %s'%(url, filename)) from urllib.request import FancyURLopener from urllib.parse import splittype class myURLOpener(FancyURLopener): # read a URL, with automatic HTTP authentication def __init__(self, user, passwd): self._user = user self._passwd = passwd self._promptcalled = False FancyURLopener.__init__(self) def prompt_user_passwd(self, host, realm): if not self._user or not self._passwd: raise j.exceptions.RuntimeError('Server requested authentication but nothing was given') if not self._promptcalled: self._promptcalled = True return self._user, self._passwd raise j.exceptions.RuntimeError('Could not authenticate with the given authentication user:%s and password:%s'%(self._user, self._passwd)) urlopener = myURLOpener(username, passwd) if not j.sal.fs.exists(filename): overwrite=True if overwrite: if username and passwd and splittype(url)[0] == 'ftp': url = url.split('://')[0]+'://%s:%s@'%(username,passwd)+url.split('://')[1] if filename != '-': urlopener.retrieve(url, filename, None, None) self.logger.debug('URL %s is downloaded to local path %s'%(url, filename)) return else: return urlopener.open(url).read() return print("!!! File already exists did not overwrite")
def find_modal(list): domain_list = [] for url in list: http_url = re.findall( r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', str(url)) if len(http_url) > 0: first_url = http_url[0] proto, rest = splittype(first_url) host, rest = splithost(rest) host, port = splitport(host) domain_list.append(host) # print(host) else: host_ip_num = 0 word_counts = collections.Counter(domain_list) # 出现频率最高的3个单词 top_one = word_counts.most_common(1) if len(top_one) > 0: modal = top_one[0][0] # print(modal) else: modal = '-' return modal
def get_info_by_url(url): protocol, rest = parse.splittype(url) host, path = parse.splithost(rest) host, port = parse.splitport(host) if port is None: port = '80' return protocol, host, path, port
def handleData(self, response): s = response.text chapters_dict = dict() chapters = [] queue_out = Queue() for field in chapter_fields: if self.re_rule.get(field): chapters_dict[field] = getRe(s, self.re_rule[field]) elif self.xpath_rule.get(field): chapters_dict[field] = self.getXpath(s, self.xpath_rule[field]) urls = chapters_dict['url'] if urls[0] != '' and urls[0][0] == '/' and urls[0][1] != '/': menu_url = WEB_SETTINGS[self.web]['menu'].format('') proto, rest = splittype(menu_url) host, rest = splithost(rest) chapters_dict['url'] = [ proto + '://' + host + url[:] for url in urls ] print(chapters_dict) # self.req().createChapter(self.web, chapters_dict['url'], queue_out) for i in range(len(chapters_dict['url'])): chapter = Chapter() chapter.url = chapters_dict['url'][i].replace(' ', '') chapter.title = chapters_dict['title'][i].replace(' ', '') chapter.content = '该章节下载失败' chapters.append(chapter) # self.dContent(chapters, queue_out) # Ui_MainWindow.tabWidget.get_ResultWidget('空').label.setText('啊哈哈') return chapters
def get_sha_and_start_download(self, download_result): res = download_result[self.new_download_url] checksum = res.buffer.getvalue().decode('utf-8').split()[0] url = self.new_download_url.replace('sums.php', 'download.php') + '&r=1' if not self.https: self.new_download_url = 'https://' + parse.splittype(self.new_download_url)[1] self.check_data_and_start_download(url, checksum)
def get_source_meta(article): """ Extract additional metadata about the article and its source, beyond what newspaper does for us by default. I think in the future it would be good to extend newspaper's content extractor with a more sophisticated one that does some of the below, and more. """ source_url = article.source_url key = urlparse.splittype(source_url)[1].strip('/') name = article.meta_site_name if not name: for xpath in META_SITE_NAME_EX: name = article.extractor.get_meta_content(article.clean_doc, xpath) if name: break else: warnings.warn(f'{article.url} did not have a meta_site_name') name = tldextract.extract(source_url).domain.capitalize() publish_date = article.publish_date if not publish_date: warnings.warn(f'{article.url} did not have a publish_date') else: publish_date = publish_date.astimezone(timezone.utc).isoformat() favicon = article.meta_favicon if favicon and favicon[0] == '/': # relative URL to site base favicon = source_url + favicon return (key, name, publish_date, favicon)
def __init__(self, url, headers=None): self.url = url self.headers = headers self.origin_req_host = cookielib.request_host(self) self.type, r = splittype(url) self.host, r = splithost(r) if self.host: self.host = unquote(self.host)
def _do_post(self, query, extra_headers=[]): """ Do a POST to the Institution. :param query: Body content to POST (OFX Query) :type query: str :param extra_headers: Extra headers to send with the request, as a list of (Name, Value) header 2-tuples. :type extra_headers: list :return: 2-tuple of (HTTPResponse, str response body) :rtype: tuple """ i = self.institution logging.debug('posting data to %s' % i.url) garbage, path = splittype(i.url) host, selector = splithost(path) try: h = HTTPSConnection(host, timeout=60) h.connect() except ssl.SSLError as ex: if (ex.reason == "UNSUPPORTED_PROTOCOL"): h = HTTPSConnection(host, timeout=60, context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) h.connect() else: raise # Discover requires a particular ordering of headers, so send the # request step by step. h.putrequest('POST', selector, skip_host=True, skip_accept_encoding=True) headers = [('Content-Type', 'application/x-ofx'), ('Host', host), ('Content-Length', len(query)), ('Connection', 'Keep-Alive')] if self.accept: headers.append(('Accept', self.accept)) if self.user_agent: headers.append(('User-Agent', self.user_agent)) for ehname, ehval in extra_headers: headers.append((ehname, ehval)) logging.debug('---- request headers ----') for hname, hval in headers: logging.debug('%s: %s', hname, hval) h.putheader(hname, hval) logging.debug('---- request body (query) ----') logging.debug(query) h.endheaders(query.encode()) res = h.getresponse() response = res.read().decode('ascii', 'ignore') logging.debug('---- response ----') logging.debug(res.__dict__) logging.debug('Headers: %s', res.getheaders()) logging.debug(response) res.close() return res, response
def get_parser_from_url(url): global PARSER protocol, s1 = splittype(url) host, path = splithost(s1) for i, j in PARSER.items(): if i in host: return j return None
def _do_post(self, query, extra_headers=[]): """ Do a POST to the Institution. :param query: Body content to POST (OFX Query) :type query: str :param extra_headers: Extra headers to send with the request, as a list of (Name, Value) header 2-tuples. :type extra_headers: list :return: 2-tuple of (HTTPResponse, str response body) :rtype: tuple """ i = self.institution logging.debug('posting data to %s' % i.url) garbage, path = splittype(i.url) host, selector = splithost(path) try: h = HTTPSConnection(host, timeout=60) h.connect() except ssl.SSLError as ex: if (ex.reason == "UNSUPPORTED_PROTOCOL"): h = HTTPSConnection(host, timeout=60, context=ssl.SSLContext(ssl.PROTOCOL_TLSv1)) h.connect() else: raise # Discover requires a particular ordering of headers, so send the # request step by step. h.putrequest('POST', selector, skip_host=True, skip_accept_encoding=True) headers = [ ('Content-Type', 'application/x-ofx'), ('Host', host), ('Content-Length', len(query)), ('Connection', 'Keep-Alive') ] if self.accept: headers.append(('Accept', self.accept)) if self.user_agent: headers.append(('User-Agent', self.user_agent)) for ehname, ehval in extra_headers: headers.append((ehname, ehval)) logging.debug('---- request headers ----') for hname, hval in headers: logging.debug('%s: %s', hname, hval) h.putheader(hname, hval) logging.debug('---- request body (query) ----') logging.debug(query) h.endheaders(query.encode()) res = h.getresponse() response = res.read().decode('ascii', 'ignore') logging.debug('---- response ----') logging.debug(res.__dict__) logging.debug('Headers: %s', res.getheaders()) logging.debug(response) res.close() return res, response
def __init__(self, uri, transport=None, encoding=None, verbose=0, version=None, headers=None, history=None, config=jsonrpclib.config.DEFAULT, context=None, timeout=None): """ Sets up the server proxy :param uri: Request URI :param transport: Custom transport handler :param encoding: Specified encoding :param verbose: Log verbosity level :param version: JSON-RPC specification version :param headers: Custom additional headers for each request :param history: History object (for tests) :param config: A JSONRPClib Config instance :param context: The optional SSLContext to use """ # Store the configuration self._config = config self.__version = version or config.version schema, uri = splittype(uri) if schema not in ('http', 'https'): _logger.error("jsonrpclib only support http(s) URIs, not %s", schema) raise IOError('Unsupported JSON-RPC protocol.') self.__host, self.__handler = splithost(uri) if not self.__handler: # Not sure if this is in the JSON spec? self.__handler = '/' if transport is None: if schema == 'https': transport = SafeTransport(config=config, context=context, timeout=timeout) else: transport = Transport(config=config, timeout=timeout) self.__transport = transport self.__encoding = encoding self.__verbose = verbose self.__history = history # Global custom headers are injected into Transport self.__transport.push_headers(headers or {})
def parsePage(spider, url, response): html = response.content selector = etree.HTML(html) html = html.decode('utf-8') propertys = json.loads(spider.args['PagePropertyRegularExpression']) for key in propertys: item = propertys[key] if item.startswith('$'): p1 = r'%s' % item[1:] pattern = re.compile(p1) match = pattern.search(html) if match: propertys[key] = match.group(1) #对文章的内容进行特殊处理,提取图片 if key == 'content_raw': contentselector = etree.HTML(propertys[key]) etree.strip_elements(contentselector, 'script') etree.strip_tags(contentselector, 'a') propertys[key] = etree.tostring(contentselector).decode( 'utf-8') if spider.args['DownLoadImg'] == 1: for imgsrc in contentselector.xpath("//img/@src"): if imgsrc is not None and len(imgsrc) > 0: cache.rpush('link-img', imgsrc) proto, rest = splittype(imgsrc) res, rest = splithost(rest) propertys[key] = propertys[key].replace( imgsrc, imgsrc.replace(res, 'img.zyai.top')) logging.info('push a img link to queue %s .' % imgsrc) else: item = selector.xpath(item)[0] propertys[key] = item dataPersistenceType = spider.args['DataPersistenceType'] if dataPersistenceType == 'WPRPC': wp = Client('http://tech.cocopass.com/xmlrpc.php', 'admin', '19841204') """ 发表博文 """ post = WordPressPost() post.title = propertys['title'].encode('utf-8') post.content = propertys['content_raw'].encode('utf-8') post.post_status = 'publish' post.terms_names = {'post_tag': [post.title], 'category': ['爱好']} wp.call(NewPost(post)) logging.info('successfully post one article: %s .' % propertys['title']) elif dataPersistenceType == 'MYSQL': pass
def get_db_engine(): if settings.DB_URI is None: raise EnvironmentError('需要配置"DB_URI"变量!') _typ, _ = splittype(settings.DB_URI) if _typ.startswith('sqlite'): return create_engine(settings.DB_URI, connect_args={"check_same_thread": False} # 只有SQLite才需要,其他数据库不需要。SQLite 只允许一个线程与其通信 ) else: return create_engine(settings.DB_URI, )
def _fix_url(self, url): if url != "": if "http" not in url: t, other = parse.splittype(self.pre_url) host, path = parse.splithost(other) if url[0] != "/": url = t + "://" + host + "/" + url else: url = t + "://" + host + url return url
def domain_name(list): domain_name_list = [] for url in list: proto, rest = splittype(url) host, rest = splithost(rest) host, port = splitport(host) if host not in domain_name_list: domain_name_list.append(host) else: pass return len(domain_name_list)
def parse_download_link(self, line, in_download): """Parse Eclipse download links""" if self.download_keyword in line and self.bits in line and 'linux' in line: in_download = True else: in_download = False if in_download: p = re.search(r"href='(http://www\.eclipse\.org\/downloads/download\.php\?file=.*\.tar\.gz)'", line) with suppress(AttributeError): self.new_download_url = p.group(1).replace('download.php', 'sums.php').replace('http://', 'https://') self.https = True if parse.splittype(self.new_download_url)[0] is "https" else False return ((None, None), in_download)
def add_proxies(): # request session proxies if app.PROXY_SETTING: log.debug(u"Using global proxy: " + app.PROXY_SETTING) scheme, address = splittype(app.PROXY_SETTING) address = app.PROXY_SETTING if scheme else 'http://' + app.PROXY_SETTING return { "http": address, "https": address, } else: return None
def url_split(url): """Split url in a tuple (scheme, hostname, port, document) where hostname is always lowercased. Precondition: url is syntactically correct URI (eg has no whitespace) """ scheme, netloc = parse.splittype(url) host, document = parse.splithost(netloc) port = default_ports.get(scheme, 0) if host: host = host.lower() host, port = splitport(host, port=port) return scheme, host, port, document
def getp(): path = request.args.get('path') resp = requests.get(path, headers=headers, timeout=5) proto, rest = UrlPase.splittype(resp.url) host, rest = UrlPase.splithost(rest) if host == 'm.zwdu.com' or host == 'm.biqubao.com': resp.encoding = "GBK" else: resp.encoding = "utf-8" content = resp.text return content
def load(self, url): self.url = url self.protocol, s1 = urllib_parse.splittype(self.url) s2, self.path = urllib_parse.splithost(s1) self.host, self.port = urllib_parse.splitport(s2) if not self.port: if self.protocol == 'http': self.port = 80 elif self.protocol == 'https': self.port = 443
def __init__( self, uri, transport=None, encoding=None, verbose=0, version=None, headers=None, history=None, config=jsonrpclib.config.DEFAULT, ): """ Sets up the server proxy :param uri: Request URI :param transport: Custom transport handler :param encoding: Specified encoding :param verbose: Log verbosity level :param version: JSON-RPC specification version :param headers: Custom additional headers for each request :param history: History object (for tests) :param config: A JSONRPClib Config instance """ # Store the configuration self._config = config if not version: version = config.version self.__version = version schema, uri = splittype(uri) if schema not in ("http", "https"): raise IOError("Unsupported JSON-RPC protocol.") self.__host, self.__handler = splithost(uri) if not self.__handler: # Not sure if this is in the JSON spec? self.__handler = "/" if transport is None: if schema == "https": transport = SafeTransport(config=config) else: transport = Transport(config=config) self.__transport = transport self.__encoding = encoding self.__verbose = verbose self.__history = history # Global custom headers are injected into Transport self.__transport.push_headers(headers or {})
def load(self, url): self.url = url self.protocol, s1 = splittype(self.url) s2, self.path = splithost(s1) self.host, port = splitport(s2) self.port = int(port) if port is not None else None if not self.port: if self.protocol == 'http': self.port = 80 elif self.protocol == 'https': self.port = 443
def _load_workflow(self, fname_snapshot): stype = splittype(fname_snapshot)[0] if stype == "odbc": import pyodbc addr = fname_snapshot[7:] parsed = addr.split('&') try: odbc, table, id_, log_id = parsed[:4] except TypeError: self.warning("Invalid ODBC source format. Here is the " "template: odbc://<odbc data source spec>&" "<table>&<id>&<log id>[&<optional name>]\n" "<table> and <log id> may be empty (\"veles\" and" " <id> value will be used).") return None if not table: table = "veles" if not log_id: log_id = id_ if len(parsed) > 4: if len(parsed) > 5: self.warning("Invalid ODBC source format") return None name = parsed[-1] else: name = None try: return SnapshotterToDB.import_(odbc, table, id_, log_id, name) except pyodbc.Error as e: self.warning( "Failed to load the snapshot from ODBC source: %s", e) return None elif stype in ("http", "https"): try: self.info("Downloading %s...", fname_snapshot) fname_snapshot = self.snapshot_file_name = wget.download( fname_snapshot, root.common.dirs.snapshots) print() sys.stdout.flush() except: self.exception("Failed to fetch the snapshot at \"%s\"", fname_snapshot) return None try: return SnapshotterToFile.import_(fname_snapshot) except FileNotFoundError: if fname_snapshot.strip() != "": self.warning("Workflow snapshot %s does not exist", fname_snapshot) return None
def fetchurl(url, query=None): if query is not None: assert '?' not in url, ("Either include query in url" "or pass as parameter, but not both") url += '?' + urlencode(query) proto, tail = splittype(url) if proto != 'http': raise RuntimeError("Unsupported protocol HTTP") host, tail = splithost(tail) cli = HTTPClient(host) resp = cli.request(tail, headers={'Host': host}) if resp.status.endswith('200 OK'): return resp.body raise RequestError(resp.status, resp)
def stat_page(self): from urllib.parse import splittype, splithost from http.client import HTTPConnection url = self.get_url() self.log(20, 'Statting page {!r} at {!r}.'.format(self.name, url)) (_, dp) = splittype(url) (host, path) = splithost(dp) conn = HTTPConnection(host) conn.request('HEAD', path) res = conn.getresponse() lmt_raw = res.getheader('last-modified') lm_dts = self._parse_http_dt(lmt_raw) return lm_dts
def __init__(self, uri, transport=None, encoding=None, verbose=False, allow_none=False, use_datetime=False): type, uri = urlparser.splittype(uri) if type not in ('scgi'): raise IOError('unsupported XML-RPC protocol') self.__host, self.__handler = urlparser.splithost(uri) if not self.__handler: self.__handler = '/' if transport is None: transport = SCGITransport(use_datetime=use_datetime) self.__transport = transport self.__encoding = encoding self.__verbose = verbose self.__allow_none = allow_none
def request(self, uri, method="GET", body='', headers=None, redirections=httplib2.DEFAULT_MAX_REDIRECTS, connection_type=None): DEFAULT_POST_CONTENT_TYPE = 'application/x-www-form-urlencoded' if not isinstance(headers, dict): headers = {} if method == "POST": headers['Content-Type'] = headers.get('Content-Type', DEFAULT_POST_CONTENT_TYPE) is_form_encoded = \ headers.get('Content-Type') == 'application/x-www-form-urlencoded' if is_form_encoded and body: parameters = parse_qs(body) else: parameters = None req = Request.from_consumer_and_token(self.consumer, token=self.token, http_method=method, http_url=uri, parameters=parameters, body=body, is_form_encoded=is_form_encoded) req.sign_request(self.method, self.consumer, self.token) schema, rest = splittype(uri) if rest.startswith('//'): hierpart = '//' else: hierpart = '' host, rest = splithost(rest) realm = schema + ':' + hierpart + host if is_form_encoded: body = req.to_postdata() elif method == "GET": uri = req.to_url() else: headers.update(req.to_header(realm=realm)) return httplib2.Http.request(self, uri, method=method, body=body, headers=headers, redirections=redirections, connection_type=connection_type)
def __init__(self, uri, transport=None, encoding=None, verbose=0, version=None, headers=None, history=None, config=jsonrpclib.config.DEFAULT, context=None): """ Sets up the server proxy :param uri: Request URI :param transport: Custom transport handler :param encoding: Specified encoding :param verbose: Log verbosity level :param version: JSON-RPC specification version :param headers: Custom additional headers for each request :param history: History object (for tests) :param config: A JSONRPClib Config instance :param context: The optional SSLContext to use """ # Store the configuration self._config = config self.__version = version or config.version schema, uri = splittype(uri) if schema not in ('http', 'https'): _logger.error("jsonrpclib only support http(s) URIs, not %s", schema) raise IOError('Unsupported JSON-RPC protocol.') self.__host, self.__handler = splithost(uri) if not self.__handler: # Not sure if this is in the JSON spec? self.__handler = '/' if transport is None: if schema == 'https': transport = SafeTransport(config=config, context=context) else: transport = Transport(config=config) self.__transport = transport self.__encoding = encoding self.__verbose = verbose self.__history = history # Global custom headers are injected into Transport self.__transport.push_headers(headers or {})
def proxy_open(self, req, proxy, type): orig_type = req.get_type() type, r_type = splittype(proxy) host, XXX = splithost(r_type) if '@' in host: user_pass, host = host.split('@', 1) user_pass = base64.encodestring(unquote(user_pass)).strip() req.add_header('Proxy-Authorization', 'Basic '+user_pass) host = unquote(host) req.set_proxy(host, type) if orig_type == type: # let other handlers take care of it # XXX this only makes sense if the proxy is before the # other handlers return None else: # need to start over, because the other handlers don't # grok the proxy's URL type return self.parent.open(req)
def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): #putrequest is called before connect, so can interpret url and get #real host/port to be used to make CONNECT request to proxy proto, rest = splittype(url) if proto is None: raise ValueError("unknown URL type: %s" % url) #get host host, rest = splithost(rest) #try to get port host, port = splitport(host) #if port is not defined try to get from proto if port is None: try: port = self._ports[proto] except KeyError: raise ValueError("unknown protocol for: %s" % url) self._real_host = host self._real_port = int(port) M2Crypto.httpslib.HTTPSConnection.putrequest(self, method, url, skip_host, skip_accept_encoding)
def __init__(self, uri, transport=None, encoding=None, verbose=None, allow_none=0): utype, uri = splittype(uri) if utype not in ("http", "https"): raise IOError("Unsupported JSONRPC protocol") self.__host, self.__handler = splithost(uri) if not self.__handler: self.__handler = "/RPC2" if transport is None: if utype == "https": transport = SafeTransport() else: transport = Transport() self.__transport = transport self.__encoding = encoding self.__verbose = verbose self.__allow_none = allow_none
def post(self, query): i = self.institution logging.debug('posting data to %s' % i.url) logging.debug('---- request ----') logging.debug(query) garbage, path = splittype(i.url) host, selector = splithost(path) h = HTTPSConnection(host) h.request('POST', selector, query, { "Content-type": "application/x-ofx", "Accept": "*/*, application/x-ofx" }) res = h.getresponse() response = res.read().decode('ascii', 'ignore') logging.debug('---- response ----') logging.debug(res.__dict__) logging.debug(response) res.close() return response
def parse_proxy(proxy): """ _parse_proxy from urllib """ scheme, r_scheme = splittype(proxy) if not r_scheme.startswith("/"): # authority scheme = None authority = proxy else: # URL if not r_scheme.startswith("//"): raise ValueError("proxy URL with no authority: %r" % proxy) # We have an authority, so for RFC 3986-compliant URLs (by ss 3. # and 3.3.), path is empty or starts with '/' end = r_scheme.find("/", 2) if end == -1: end = None authority = r_scheme[2:end] userinfo, hostport = splituser(authority) if userinfo is not None: user, password = splitpasswd(userinfo) else: user = password = None return scheme, user, password, hostport
def __init__(self, uri, transport=None, encoding=None, verbose=0, version=None, headers=None, history=None, config=jsonrpclib.config.DEFAULT, context=None): """ Sets up the server proxy :param uri: Request URI :param transport: Custom transport handler :param encoding: Specified encoding :param verbose: Log verbosity level :param version: JSON-RPC specification version :param headers: Custom additional headers for each request :param history: History object (for tests) :param config: A JSONRPClib Config instance :param context: The optional SSLContext to use """ # Store the configuration self._config = config self.__version = version or config.version schema, uri = splittype(uri) use_unix = False if schema.startswith("unix+"): schema = schema[len("unix+"):] use_unix = True if schema not in ('http', 'https'): _logger.error("jsonrpclib only support http(s) URIs, not %s", schema) raise IOError('Unsupported JSON-RPC protocol.') self.__host, self.__handler = splithost(uri) if use_unix: unix_path = self.__handler self.__handler = '/' elif not self.__handler: # Not sure if this is in the JSON spec? self.__handler = '/' if transport is None: if use_unix: if schema == "http": # In Unix mode, we use the path part of the URL (handler) # as the path to the socket file transport = UnixTransport( config=config, path=unix_path ) elif schema == 'https': transport = SafeTransport(config=config, context=context) else: transport = Transport(config=config) if transport is None: raise IOError( "Unhandled combination: UNIX={}, protocol={}" .format(use_unix, schema) ) self.__transport = transport self.__encoding = encoding self.__verbose = verbose self.__history = history # Global custom headers are injected into Transport self.__transport.push_headers(headers or {})
def _parse_proxy(proxy): """Return (scheme, user, password, host/port) given a URL or an authority. If a URL is supplied, it must have an authority (host:port) component. According to RFC 3986, having an authority component means the URL must have two slashes after the scheme: >>> _parse_proxy('file:/ftp.example.com/') Traceback (most recent call last): ValueError: proxy URL with no authority: 'file:/ftp.example.com/' The first three items of the returned tuple may be None. Examples of authority parsing: >>> _parse_proxy('proxy.example.com') (None, None, None, 'proxy.example.com') >>> _parse_proxy('proxy.example.com:3128') (None, None, None, 'proxy.example.com:3128') The authority component may optionally include userinfo (assumed to be username:password): >>> _parse_proxy('joe:[email protected]') (None, 'joe', 'password', 'proxy.example.com') >>> _parse_proxy('joe:[email protected]:3128') (None, 'joe', 'password', 'proxy.example.com:3128') Same examples, but with URLs instead: >>> _parse_proxy('http://proxy.example.com/') ('http', None, None, 'proxy.example.com') >>> _parse_proxy('http://proxy.example.com:3128/') ('http', None, None, 'proxy.example.com:3128') >>> _parse_proxy('http://*****:*****@proxy.example.com/') ('http', 'joe', 'password', 'proxy.example.com') >>> _parse_proxy('http://*****:*****@proxy.example.com:3128') ('http', 'joe', 'password', 'proxy.example.com:3128') Everything after the authority is ignored: >>> _parse_proxy('ftp://*****:*****@proxy.example.com/rubbish:3128') ('ftp', 'joe', 'password', 'proxy.example.com') Test for no trailing '/' case: >>> _parse_proxy('http://*****:*****@proxy.example.com') ('http', 'joe', 'password', 'proxy.example.com') """ scheme, r_scheme = splittype(proxy) if not r_scheme.startswith("/"): # authority scheme = None authority = proxy else: # URL if not r_scheme.startswith("//"): raise ValueError("proxy URL with no authority: %r" % proxy) # We have an authority, so for RFC 3986-compliant URLs (by ss 3. # and 3.3.), path is empty or starts with '/' end = r_scheme.find("/", 2) if end == -1: end = None authority = r_scheme[2:end] userinfo, hostport = splituser(authority) if userinfo is not None: user, password = splitpasswd(userinfo) else: user = password = None return scheme, user, password, hostport
def get_type(self): if self.type is None: self.type, self.__r_type = splittype(self.__original) if self.type is None: raise ValueError("unknown url type: %s" % self.__original) return self.type