def is_in_page(driver, url): meta_a = urlparse(url) meta_b = urlparse(getattr(driver, 'current_url', '')) return all([ meta_a.netloc == meta_b.netloc, meta_a.path.rstrip('/') == meta_b.path.rstrip('/'), ])
def with_input(self, url, path): u = urlparse(process_url(url)) if u.scheme not in self.file_store.supported: raise ValueError("Unsupported scheme - must be one of %s" % (self.file_store.supported)) if u.scheme == "file" and self.file_store.scheme != "file": raise ValueError("please upload your input file to the file store") u = urlparse(path) if u.scheme not in ["file", ""]: raise ValueError("runtime path must be defined as a local path") if u.scheme == "file": path = os.path.join(u.netloc, u.path) if path.startswith("./"): pass elif not os.path.isabs(path): raise ValueError( "runtime path must be an absolute path or start with './'") self.input_files.append( tes.Input(path=os.path.join("/tmp/tesseract", re.sub("^./", "", path)), url=url, type="FILE"))
def test_if_orcid_endpoint_url(self, service, auth_endpoint): url = service.create_authorization_url('SCOPE', 'URL') scheme, netloc, path, _, _, _ = urlparse(url) o_scheme, o_netloc, o_path, _, _, _ = urlparse(auth_endpoint) assert scheme == o_scheme assert netloc == o_netloc assert path == o_path
def __attrs_post_init__(self): u = urlparse(self.url) self.scheme = u.scheme self.bucket = u.netloc if self.scheme == "file": self.path = re.sub("/$", "", u.path) else: self.path = re.sub("^/|/$", "", u.path) if self.scheme != "file": self.provider = get_driver( lookup_provider(self.scheme, self.region)) if self.key is None and self.secret is None: self.key, self.secret = lookup_credentials(self.scheme) # Openstack Swift if self.scheme == "swift": try: auth_url = urlparse(os.environ['OS_AUTH_URL']) self.swift_force_auth_url = "%s://%s" % (auth_url.scheme, auth_url.netloc) self.swift_tenant_name = os.environ['OS_TENANT_NAME'] except Exception: raise ValueError( "OS_AUTH_URL and/or OS_TENANT_NAME were not found") self.driver = self.provider( key=self.key, secret=self.secret, ex_force_auth_url=self.swift_force_auth_url, ex_tenant_name=self.swift_tenant_name, ex_force_auth_version=self.swift_force_auth_version, ) # Google Storage or Amazon S3 else: if self.region is None: self.region = lookup_region(self.scheme) if self.project is None: self.project = lookup_project(self.scheme) self.driver = self.provider(key=self.key, secret=self.secret, region=self.region, project=self.project) self.key = None self.secret = None self.__create_store()
def _endpoint_from_url(self, url): url_path = urlparse(url).path base_url_path_parts = urlparse(self.api.base_url).path.split("/") if len(base_url_path_parts) > 2: # There are some extra directories in the path, remove them from url extra_path = "/".join(base_url_path_parts[:-1]) url_path = url_path[len(extra_path):] split_url_path = url_path.split("/") if split_url_path[2] == "plugins": # Keep plugins in app path app = "/".join(split_url_path[2:4]) name = split_url_path[4] else: app, name = split_url_path[2:4] return getattr(pynautobot.core.app.App(self.api, app), name)
def resolve_link(self, snow_record, field_to_resolve, **kparams): """ Get the info from the link and return a SnowRecord. """ try: link = snow_record.links()[field_to_resolve] except KeyError as e: return SnowRecord.NotFound( self, snow_record._table_name, "Could not find field %s in record" % field_to_resolve, [snow_record, field_to_resolve, self]) if kparams: link += ('&', '?')[urlparse(link).query == ''] link += '&'.join("%s=%s" % (key, val) for (key, val) in kparams.items()) linked_response = self.req("get", link) # rety here... rjson = linked_response.json() rtablename = SnowRecord.tablename_from_link(link) # could do this, but better to not mutate: # setattr(snow_record, field_to_resolve, linked) # so just return new record. could infer if "result" in rjson: linked = SnowRecord(self, rtablename, **rjson["result"]) else: linked = SnowRecord.NotFound(self, rtablename, "Could not resolve link %s" % link, [rjson, rtablename, link, self]) return linked
def __validate_url(self, attribute, value): u = urlparse(value) if u.scheme == "file" and u.netloc != "": raise ValueError("invalid url") if u.scheme not in self.supported: raise ValueError("Unsupported scheme - must be one of %s" % (self.supported))
def __check_url(self, attribute, value): u = urlparse(value) if u.scheme not in ["http", "https"]: raise ValueError( "Unsupported URL scheme - must be one of %s" % (["http", "https"]) )
def __init__(self, url='http://localhost:8069', timeout=120, version=None): try: if timeout is not None: timeout = float(timeout) except (ValueError, TypeError): raise ValueError("The timeout must be a float") parsereault = urlparse(url) self._root_url = url self._host = parsereault.hostname self._port = parsereault.port or DEFAULT_PORTS.get(parsereault.scheme) self._protocol = parsereault.scheme self._env = None self._login = None self._password = None self._db = DB(self) self._report = Report(self) # Instanciate the server connector try: self._client = Client(self._root_url, timeout=timeout, version=version) except ConnectorError as exc: raise error.InternalError(exc.message) # Dictionary of configuration options self._config = tools.Config(self, { 'auto_commit': True, 'auto_context': True, 'timeout': timeout })
def parse_request_uri(self): properties = self.parse_request_properties() zone = properties.get("zone", "") port = str(self.config.port) endpoint = "".join( [self.config.protocol, "://", self.config.host, ":", port]) if zone != "": endpoint = "".join([ self.config.protocol, "://", zone, ".", self.config.host, ":", port ]) request_uri = self.operation["URI"] if len(properties): for (k, v) in properties.items(): endpoint = endpoint.replace("<%s>" % k, v) request_uri = request_uri.replace("<%s>" % k, v) parsed_uri = endpoint + request_uri parsed_params = self.parse_request_params() if len(parsed_params): scheme, netloc, path, params, req_query, fragment = urlparse( parsed_uri, allow_fragments=False) query = [req_query] for (k, v) in parsed_params.items(): query.append("%s=%s" % (k, v)) if not req_query: query.pop(0) parsed_uri = urlunparse( (scheme, netloc, path, params, "", fragment)) + "?" + "&".join( sorted(query)) return parsed_uri
def load_url(url, model_dir=None, map_location=None): r"""Loads the Torch serialized object at the given URL. If the object is already present in `model_dir`, it's deserialized and returned. The filename part of the URL should follow the naming convention ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more digits of the SHA256 hash of the contents of the file. The hash is used to ensure unique names and to verify the contents of the file. The default value of `model_dir` is ``$TORCH_HOME/models`` where ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be overriden with the ``$TORCH_MODEL_ZOO`` environment variable. Args: url (string): URL of the object to download model_dir (string, optional): directory in which to save the object map_location (optional): a function or a dict specifying how to remap storage locations (see torch.load) Example: >>> state_dict = torch.utils.model_zoo.load_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth') """ if model_dir is None: torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) if not os.path.exists(model_dir): os.makedirs(model_dir) parts = urlparse(url) filename = os.path.basename(parts.path) cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file): sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) hash_prefix = HASH_REGEX.search(filename).group(1) _download_url_to_file(url, cached_file, hash_prefix) return torch.load(cached_file, map_location=map_location)
def test_url_optional_state_with_spaces(self, orcid_with_params): url = orcid_with_params.create_authorization_url( 'SCOPE', 'URL', state='STATE WITH SPACES' ) _, _, _, _, query, _ = urlparse(url) assert 'state=STATE+WITH+SPACES' in query
def load_from_url(url, save_dir='facelet_bank'): parts = urlparse(url) filename = os.path.basename(parts.path) cached_file = os.path.join(app_config.MODEL_DIR, save_dir, filename) if not os.path.exists(cached_file): sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) _download_url_to_file(url, cached_file) return torch.load(cached_file)
def __init__(self, docker_daemon: dict, dispatch_queue: Queue): self.url = urlparse(docker_daemon.get('host')) self._client_session = self.__configure_docker_client(docker_daemon) self._docker_info = self._client_session.version() self._dispatch_queue = dispatch_queue self._handler = ProcessHandler() super(Officer, self).__init__()
def test_if_url_contains_optional_state(self, orcid_with_params): url = orcid_with_params.create_authorization_url( 'SCOPE', 'URL', state='STATE' ) _, _, _, _, query, _ = urlparse(url) assert 'state=STATE' in query
def wait(self, url): domain = urlparse(url) last_acessed = self.domains.get(domain) if self.delay > 0 and last_acessed is not None: sleep_secs = self.delay - (datetime.datetime.now() - last_acessed).seconds if sleep_secs > 0: # 该domain刚访问过,因此需要休眠 time.sleep(sleep_secs) self.domains[domain] = datetime.datetime.now()
def process_url(value): value = strconv(value) u = urlparse(value) if u.scheme == "": url = "file://" + os.path.abspath(value) elif u.scheme == "file" and u.netloc != "": url = "file://" + os.path.abspath(os.path.join(u.netloc, u.path)) else: url = value return url
def find_href_src(html_str): '''提取页面所有连接,并用集合去重''' href = set( i for i in re.findall(r'href\s*=[\'\"]([^\'\">]*)[\'\"][^>]*>', html_str) if 'javascript' not in i and urlparse(i).fragment == '') src = set( a.strip('url()') for a in re.findall(r'src=\s*[\"\']([^\"\'>]+)[\'\"]', html_str)) return href | src
def _log_request(self, url, method, response, duration): """Log request, store traceback/response data and update counts.""" domain = urlparse(url).netloc if not self._check_domain(domain): return m_init = 'monitor_requests/__init__.py' tb_list = [f for f in traceback.format_stack() if m_init not in f] if self._check_mocked(tb_list): return self.data.log(url, domain, method, response, tb_list, duration)
def download_model(url, model_dir, hash_prefix, progress=True): if not os.path.exists(model_dir): os.makedirs(model_dir) parts = urlparse(url) filename = os.path.basename(parts.path) cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file): sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) _download_url_to_file(url, cached_file, hash_prefix, progress=progress) unzip_file(cached_file, os.path.join(model_dir, filename.split('.')[0]))
def step_impl(context): current_acl = bucket.get_acl() if is_changed is True: for x in current_acl["acl"]: if x["grantee"]["type"] == "group": current_acl["acl"].remove(x) bucket.put_acl(current_acl["acl"]) result = urlparse(context.output) object_key = result.path assert_that(object_key.split("/")[-1].strip() ).is_equal_to(context.input[0]["name"])
def is_url(url): """Check if provided string is a url. Args: url (str): url to check Returns: bool: True if arg url is a valid url """ scheme = requtil.urlparse(str(url)).scheme return scheme in ('http', 'https',)
def get_sas_parts(cls, sas_uri): """ Parse a SAS URL into component parts """ parsed = urlparse(sas_uri) account_name, service, endpoint_suffix = parsed.hostname.split(".", 2) if service != "blob": raise ValueError query = parsed.query path = parsed.path.split("/")[1:] container = path[0] blob_name = "/".join(path[1:]) return account_name, endpoint_suffix, container, blob_name, query
def __init__(self, docker_daemon: dict, brokerage_queue: Queue, dispatch_queue: Queue, inventory, inventory_event: Event): self.url = urlparse(docker_daemon.get('host')) self._client_session = self.__configure_docker_client(docker_daemon) self._docker_info = self._client_session.version() self._brokerage_queue = brokerage_queue self._dispatch_queue = dispatch_queue self._handler = ProcessHandler() self._inventory_event = inventory_event self._inventory = inventory self._start_time = int(time.time()) super(Inspector, self).__init__()
def make_response(self): """处理 m3u8 视频文件 原地址得到的 m3u8 文件中,视频地址缺少域名,无法正常播放 """ logger.info(f"M3U8Handler 正在处理: {self.raw_url}") _, data_iter = self.get_stream() url_info = urlparse(self.raw_url) domain = f"{url_info.scheme}://{url_info.hostname}/" text = next(data_iter).decode( 'utf-8') # 获取迭代器的数据, 一般 m3u8 文件不会有 512kb,一次就可以读取完成 text = re.sub(r'\n(\d+?.+?ts)', fr'\n{domain}\1', text) return Response(text, status=200)
def get_url_filename(url): """Parse filename from url Args: url (str): url to parse Returns: str: filename of url """ path = requtil.urlparse(url).path file_name = Path(path).name return file_name
def parse_request_headers(self): parsed_headers = dict() if "Headers" in self.operation: for (k, v) in self.operation["Headers"].items(): if v != "" and v is not None: if k[:5].lower() == "x-qs-": k = k.lower() if is_python2: parsed_headers[k] = quote( unicode(v).encode("utf-8")) elif is_python3: parsed_headers[k] = quote(str(v)) else: parsed_headers[k] = v # Handle header Date if is_python2: parsed_headers["Date"] = self.operation["Headers"].get( "Date", strftime("%a, %d %b %Y %H:%M:%S GMT".encode("utf-8"), gmtime())) elif is_python3: parsed_headers["Date"] = self.operation["Headers"].get( "Date", strftime("%a, %d %b %Y %H:%M:%S GMT", gmtime())) # Handle header User-Agent parsed_headers["User-Agent"] = ( "qingstor-sdk-python/{sdk_version} " "(Python v{python_version}; {system})").format( sdk_version=__version__, python_version=platform.python_version(), system=sys.platform) # Handle header Content-Type parsed_body, is_json = self.parse_request_body() filename = urlparse(self.parse_request_uri()).path parsed_headers["Content-Type"] = self.operation["Headers"].get( "Content-Type") or mimetypes.guess_type(filename)[0] if is_json: parsed_headers["Content-Type"] = "application/json" if parsed_headers["Content-Type"] is None: parsed_headers["Content-Type"] = "application/octet-stream" # Handle specific API if "API" in self.operation: if self.operation["API"] == "DeleteMultipleObjects": md5obj = hashlib.md5() md5obj.update(parsed_body.encode()) parsed_headers["Content-MD5"] = base64.b64encode( md5obj.digest()).decode() return parsed_headers
def detect(resource): if isinstance(resource, UploadIO): return resource.file_ext elif 'filename' in dir(resource): path = resource.filename elif isinstance(resource, FileIO): path = resource.name elif isinstance(resource, Result): path = resource.file.filename else: path = utils.urlparse(resource).path return splitext(path)[1][1:].lower()
def _configure_endpoint(self, endpoint): if endpoint is None: endpoint = DEFAULT_REGISTRY alias = self.config.get_registry_alias(endpoint) if alias: endpoint = alias if not re.match("https?://", endpoint): if endpoint.startswith("localhost"): scheme = "http://" else: scheme = "https://" endpoint = scheme + endpoint return urlparse(endpoint + DEFAULT_PREFIX)
def get_token(self): url = "https://www.zhipin.com/job_detail" params = {"query": self.query, "city": self.city, "position": ""} ret = session.get(url, headers=headers, params=params, allow_redirects=False) print(ret.status_code) print(ret.content.decode()) print(ret.headers) location = ret.headers.get("location") seed = unquote(location) path = urlparse(seed).query params = path.split("&") print(params) item = {} for p in params: p_list = p.split("=") item[p_list[0]] = p_list[1] print(item) self.seed = item.get('seed') + "=" self.ts = item.get('ts') self.name = item.get('name') jscode = f"""let seed = "{self.seed}", ts = {self.ts};""" ret = getencpassword(session, "./ABC.z.js", jscode) print(ret) o_cookie = { "__zp_stoken__": quote(ret), #"_l":quote(f""" # l=/job_detail/?query=爬虫&city=101200100&industry=&position=&r=https://www.zhipin.com/web/common/security-check.html?seed={self.seed}&name={self.name}&ts={self.ts}&callbackUrl=/job_detail/?query=爬虫&city=101200100&industry=&position=&srcReferer=https://www.zhipin.com/web/common/security-check.html?seed={self.seed}&name={self.name}&ts={self.ts}&callbackUrl=/job_detail/?query=爬虫&city=101200100&industry=&position=&srcReferer=https://www.zhipin.com/web/common/security-check.html?seed={self.seed}&name={self.name}&ts={self.ts}&callbackUrl=/job_detail/?query=爬虫&city={self.city}&industry=&position=&srcReferer=https://www.zhipin.com/wuhan/&friend_source=0 #"""), "_a": "77323247.1586174956..1586174956.1.1.1.1", "_c": f"{self.ts}", "_g": "-", #"Hm_lvt_194df3105ad7148dcf2b98a91b5e727a":"1586159333,1586160680,1586163030,1586163673", #"Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a":"1586168065" } cookie = cookiejar_from_dict(o_cookie) session.cookies = cookie url = "https://www.zhipin.com/" + location ret = session.get( url, headers=headers.update({ "referer": f"https://www.zhipin.com/job_detail/?query={self.query}&city=100010000&industry=&position=" })) print(ret)
def sign(self): self.req.headers["Authorization"] = "".join([ "QS ", self.access_key_id, ":", self.get_authorization() ]) self.logger.debug(self.req.headers["Authorization"]) prepared = self.req.prepare() scheme, netloc, path, params, query, fragment = urlparse( prepared.url, allow_fragments=False ) path = quote(unquote(path)) prepared.url = urlunparse( (scheme, netloc, path, params, query, fragment) ) return prepared
def load_url(url, model_dir=None, map_location=None, progress=True): if model_dir is None: torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch')) model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models')) if not os.path.exists(model_dir): os.makedirs(model_dir) parts = urlparse(url) filename = os.path.basename(parts.path) cached_file = os.path.join(model_dir, filename) if not os.path.exists(cached_file): sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file)) hash_prefix = HASH_REGEX.search(filename).group(1) _download_url_to_file(url, cached_file, hash_prefix, progress=progress) return torch.load(cached_file, map_location=map_location)
def url2local(url): '''将url 转换成本地文件名,没有后缀的连接,添加指定的后缀 ''' # 取url路径 temp = urlparse(url).path # 分割 temp = re.split('[\/]', temp) # 拼接 local_name = os.path.join(*temp).strip('\/') # 获取扩展名 sufix = os.path.splitext(local_name)[1] # 如果没有后缀,添加指定后缀 if sufix == '': local_name += suffix return local_name
def step_impl(context): if is_changed is True: bucket.put_acl([{ "grantee": { "type": "group", "name": "QS_ALL_USERS" }, "permission": "READ" }]) result = urlparse(context.output) params = result.query object_key = result.path assert_that(params).contains("signature") assert_that(params).contains("access_key_id") assert_that(params).contains("expires") assert_that(object_key.split("/")[-1]).is_equal_to(context.input[0]["name"])
def tls_config_from_options(options): tls = options.get('--tls', False) ca_cert = options.get('--tlscacert') cert = options.get('--tlscert') key = options.get('--tlskey') verify = options.get('--tlsverify') hostname = urlparse(options.get('--host') or '').hostname advanced_opts = any([ca_cert, cert, key, verify]) if tls is True and not advanced_opts: return True elif advanced_opts: client_cert = None if cert or key: client_cert = (cert, key) return TLSConfig( client_cert=client_cert, verify=verify, ca_cert=ca_cert, assert_hostname=( hostname or not options.get('--skip-hostname-check', False) ) ) else: return None
def test_if_url(self, orcid_with_params): url = orcid_with_params.create_authorization_url('SCOPE', 'URL') scheme, netloc, _, _, _, _ = urlparse(url) assert scheme != '' assert netloc != ''
def test_if_url_contains_scope(self, orcid_with_params): url = orcid_with_params.create_authorization_url('SCOPE', 'URL') _, _, _, _, query, _ = urlparse(url) assert 'scope=SCOPE' in query
def test_if_url_contains_redirect_uri(self, orcid_with_params): url = orcid_with_params.create_authorization_url('SCOPE', 'URL') _, _, _, _, query, _ = urlparse(url) assert 'redirect_uri=URL' in query
def handle_jscode_source(self, source): # This is where we get the box and session IDs from. script_url = urlparse(source) script_query = parse_qs(script_url.query) self.params["sid"] = script_query["sid"][0] self.params["bid"] = script_query["b"][0]
def __init__(self, url): self.base_url = urlparse(url).netloc self.url = 'http://%s' % self.base_url self.session = requests.Session() self.session.headers.update({'Accept': 'application/json'})
def kwick_login(self, kwick_username, kwick_password): """ Wenn erfolgreich: {'loggedIn': True, 'session_id': 'ein hash', 'session_name': 'a2K3j8G1', 'userid': XXXXXXX} Der name der Session ist immer gleich """ url = '/login' data = dict( kwick_username=kwick_username, kwick_password=kwick_password ) r = self.request(url, data, mobile=True, quirk=False) if r: self.superapi_session.cookies.set('a2K3j8G1', self.mobile_session.cookies['a2K3j8G1'], domain=urlparse(self.superapi_host).netloc) return self.request(url, data)