Exemple #1
0
 def validate_ticket(self, ticket, request):
     service_name = self.service_name
     ticket_name = self.ticket_name
     this_url = self.get_url(request)
     p = urlparse.urlparse(this_url)
     qs_map = urlparse.parse_qs(p.query)
     if ticket_name in qs_map:
         del qs_map[ticket_name]
     param_str = urlencode(qs_map, doseq=True)
     p = urlparse.ParseResult(*tuple(p[:4] + (param_str, ) + p[5:]))
     service_url = urlparse.urlunparse(p)
     params = {
         service_name: service_url,
         ticket_name: ticket,
     }
     param_str = urlencode(params, doseq=True)
     p = urlparse.urlparse(self.cas_info['service_validate_url'])
     p = urlparse.ParseResult(*tuple(p[:4] + (param_str, ) + p[5:]))
     service_validate_url = urlparse.urlunparse(p)
     self.log("Requesting service-validate URL => '{0}' ...".format(
         service_validate_url))
     http_client = HTTPClient(self.cas_agent)
     d = http_client.get(service_validate_url)
     d.addCallback(treq.content)
     d.addCallback(self.parse_sv_results, service_url, ticket, request)
     return d
Exemple #2
0
    def setUp(self):
        super(TestClientHTTPBasicAuth, self).setUp()
        conf = ceilometer_service.prepare_service(argv=[], config_files=[])
        self.CONF = self.useFixture(config_fixture.Config(conf)).conf
        self.parsed_url = urlparse.urlparse(
            'http://127.0.0.1:8080/controller/nb/v2?container_name=default&'
            'container_name=egg&auth=%s&user=admin&password=admin_pass&'
            'scheme=%s' % (self.auth_way, self.scheme))
        self.params = urlparse.parse_qs(self.parsed_url.query)
        self.endpoint = urlparse.urlunparse(
            urlparse.ParseResult(self.scheme, self.parsed_url.netloc,
                                 self.parsed_url.path, None, None, None))
        odl_params = {
            'auth': self.params.get('auth')[0],
            'user': self.params.get('user')[0],
            'password': self.params.get('password')[0]
        }
        self.client = client.Client(self.CONF, self.endpoint, odl_params)

        self.resp = mock.MagicMock()
        self.get = mock.patch('requests.get', return_value=self.resp).start()

        self.resp.raw.version = 1.1
        self.resp.status_code = 200
        self.resp.reason = 'OK'
        self.resp.headers = {}
        self.resp.content = 'dummy'
Exemple #3
0
def wbsearchentities(
    search,
    language='en',
    strictlanguage=False,
    type_='item',
    limit=7,
    continue_=0,
    props=[],
):
    """Wrap Wikibase API wbsearchentities module: https://www.wikidata.org/w/api.php?action=help&modules=wbsearchentities"""
    query = {
        'action': 'wbsearchentities',
        'search': search,
        'language': language,
        'type': type_,
        'limit': limit,
        'continue': continue_,
        'format': 'json'
    }
    if strictlanguage:
        query['strictlanguage'] = True
    if any(props):
        query['props'] = sep_join(props)
    url = parse.urlunparse(
        parse.ParseResult(scheme='https',
                          netloc='www.wikidata.org',
                          path='/w/api.php',
                          params='',
                          query=parse.urlencode(query),
                          fragment=''))
    response = requests.get(url)
    print(f'[Status {response.status_code}]: {parse.unquote(response.url)}',
          file=sys.stderr)
    if response.status_code == 200:
        return response.json()
Exemple #4
0
    def get_sample_data(self, meter_name, parse_url, params, cache):

        extractor = self._get_extractor(meter_name)
        if extractor is None:
            # The way to getting meter is not implemented in this driver or
            # OpenDaylight REST API has not api to getting meter.
            return None

        iter = self._get_iter(meter_name)
        if iter is None:
            # The way to getting meter is not implemented in this driver or
            # OpenDaylight REST API has not api to getting meter.
            return None

        parts = urlparse.ParseResult(
            params.get('scheme', ['http'])[0], parse_url.netloc,
            parse_url.path, None, None, None)
        endpoint = urlparse.urlunparse(parts)

        data = self._prepare_cache(endpoint, params, cache)

        samples = []
        for name, value in six.iteritems(data):
            for sample in iter(extractor, value):
                if sample is not None:
                    # set controller name and container name
                    # to resource_metadata
                    sample[2]['controller'] = 'OpenDaylight'
                    sample[2]['container'] = name

                    samples.append(sample + (None, ))

        return samples
def validate_indico_url(url):
    """Construct the indico URL needed to get required timetable contents.
    
    eg https://indico.cern.ch/event/662485/timetable/?view=standard
    
    Parameters
    ----------
    url : str
        Base URL to use
    
    Returns
    -------
    str
        Correctly formatted URL
    """
    url_info = urlparse.urlparse(url)
    path_parts = url_info.path.split("/")
    start_ind = path_parts.index("event")
    parts = path_parts[start_ind:start_ind + 2]
    parts.append("timetable")
    new_path = "/".join([""] + parts + [""])
    new_url_info = urlparse.ParseResult(scheme=url_info.scheme,
                                        netloc=url_info.netloc,
                                        path=new_path,
                                        params='',
                                        query='view=standard',
                                        fragment='')
    return new_url_info.geturl()
Exemple #6
0
def enforce_domain_https():
    url = parse.urlparse(request.url)
    is_http = 'prematch.org' in url.netloc.lower() and url.scheme == 'http'
    if 'appspot' in url.netloc or is_http:
        newurl = parse.ParseResult('https', 'prematch.org', url.path,
                                   url.params, url.query, url.fragment)
        return redirect(newurl.geturl(), code=301)
Exemple #7
0
def detect_urls_in_document(document: str, url: str) -> List[str]:

    url_parts = parse.urlparse(url)
    soup = BeautifulSoup(document, "html.parser")
    links = list(soup.select("a"))

    # document 내에 a 링크에 대한 처리
    # 상대 적인 좌표의 경우, 현재 url scheme 및 netloc 사용
    urls = []
    for item in links:

        href = item.get("href", None)

        if not href:
            continue

        parts = parse.urlparse("{}".format(href))
        if not parts.path:
            continue

        if parts.scheme not in ['http', 'https']:
            continue

        updated_parts = parse.ParseResult(
            scheme=parts.scheme if parts.scheme else url_parts.scheme,
            netloc=parts.netloc if parts.netloc else url_parts.netloc,
            path=parts.path,
            query=parts.query,
            params=parts.params,
            fragment=parts.fragment)
        urls.append(parse.urlunparse(updated_parts))

    return urls
Exemple #8
0
    def test_novnc_bad_token(self):
        """Test accessing novnc console with bad token

        Do the WebSockify HTTP Request to novnc proxy with a bad token,
        the novnc proxy should reject the connection and closed it.
        """
        if self.use_get_remote_console:
            body = self.client.get_remote_console(
                self.server['id'], console_type='novnc',
                protocol='vnc')['remote_console']
        else:
            body = self.client.get_vnc_console(self.server['id'],
                                               type='novnc')['console']
        self.assertEqual('novnc', body['type'])
        # Do the WebSockify HTTP Request to novncproxy with a bad token
        parts = urlparse.urlparse(body['url'])
        qparams = urlparse.parse_qs(parts.query)
        if 'path' in qparams:
            qparams['path'] = urlparse.unquote(qparams['path'][0]).replace(
                'token=', 'token=bad')
        elif 'token' in qparams:
            qparams['token'] = 'bad' + qparams['token'][0]
        new_query = urlparse.urlencode(qparams)
        new_parts = urlparse.ParseResult(parts.scheme, parts.netloc,
                                         parts.path, parts.params, new_query,
                                         parts.fragment)
        url = urlparse.urlunparse(new_parts)
        self._websocket = compute.create_websocket(url)
        # Make sure the novncproxy rejected the connection and closed it
        data = self._websocket.receive_frame()
        self.assertTrue(
            data is None or not data,
            "The novnc proxy actually sent us some data, but we "
            "expected it to close the connection.")
Exemple #9
0
def imdb_key_words(imdb_url):
    return_tuple_kw_votes = []
    page_url_parts = url_parse.urlparse(imdb_url)
    print(page_url_parts)
    new_url = url_parse.ParseResult(scheme=page_url_parts.scheme,
                                    netloc=page_url_parts.netloc,
                                    path=page_url_parts.path + "/keywords",
                                    params='',
                                    query='',
                                    fragment='')

    keywords_url = url_parse.urlunparse(new_url)
    print(keywords_url)
    page = requests.get(keywords_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    keyword_list = soup.findAll("td", {"class": "soda sodavote"})
    print(keyword_list)
    for keyword_entry in keyword_list:
        keyword = (keyword_entry.find('div', {
            "class": "sodatext"
        }).find('a').text)
        vote_text = keyword_entry.find('div', {
            'class': 'interesting-count-text'
        }).find('a').text
        m = re.match(" ([0-9]+)", vote_text)
        #print(vote_text)
        if m and int(m.group(0)) > 0:
            return_tuple_kw_votes.append((keyword, int(m.group(0))))
            print(keyword, int(m.group(0)))

        #print(keyword_entry.a.text)
        #print(len(movie_desc))
    return return_tuple_kw_votes
Exemple #10
0
def force_https(parsed):
    return parse.ParseResult(scheme="https",
                             netloc=parsed.netloc,
                             path=parsed.path,
                             params=parsed.params,
                             query=parsed.query,
                             fragment=parsed.fragment)
Exemple #11
0
def download_chapters(chapter_hrefs: List[Chapter], base_directory: str):
    processing_list = []
    for chapter in chapter_hrefs:
        target = os.path.join(base_directory, chapter.directory)
        logger.info(f"Making directory {target}")
        os.makedirs(target, exist_ok=True)
        for index, image in enumerate(chapter.images_hrefs, start=1):
            logger.debug(f"Processing {image}")
            parsed = parse.urlparse(image)
            safe_url = parse.ParseResult(
                scheme=parsed.scheme if parsed.scheme else "http",
                netloc=parsed.netloc,
                path=parsed.path,
                params=parsed.params,
                query=parsed.query,
                fragment=parsed.fragment)
            match = re.search("^.*(?P<extension>\\.[a-zA-Z]+)$",
                              parsed.path.strip())
            extension = match.group("extension") if match else ""
            filepath = os.path.join(base_directory, chapter.directory,
                                    str(index).rjust(3, '0') + extension)
            processing_list.append((filepath, safe_url.geturl(), index))
    with ThreadPoolExecutor(max_workers=image_downloading_workers) as executor:
        futures = []
        for (filepath, url, index) in processing_list:
            futures.append(
                executor.submit(download_image, filepath, url, index))
        for future in futures:
            future.result()
Exemple #12
0
def urlparse(url: str,
             scheme: str = '',
             allow_fragments: bool = True) -> 'ParseResult':
    """Wrapper function for :func:`urllib.parse.urlparse`.

    Args:
        url: URL to be parsed
        scheme: URL scheme
        allow_fragments: if allow fragments

    Returns:
        The parse result.

    Note:
        The function suppressed possible errors when calling
        :func:`urllib.parse.urlparse`. If any, it will return
        ``urllib.parse.ParseResult(scheme=scheme, netloc='', path=url, params='', query='', fragment='')``
        directly.

    """
    with contextlib.suppress(ValueError):
        return urllib_parse.urlparse(url,
                                     scheme,
                                     allow_fragments=allow_fragments)
    return urllib_parse.ParseResult(scheme=scheme,
                                    netloc='',
                                    path=url,
                                    params='',
                                    query='',
                                    fragment='')
Exemple #13
0
def strip_query(parsed):
    return parse.ParseResult(scheme=parsed.scheme,
                             netloc=parsed.netloc,
                             path=parsed.path,
                             params=parsed.params,
                             query=None,
                             fragment=parsed.fragment)
Exemple #14
0
def trail_request(method,
                  path,
                  parameters: typing.Dict = {},
                  body: typing.Any = None):
    url = parse.ParseResult(scheme='http',
                            netloc='localhost:8080',
                            path=path,
                            query=parse.urlencode(parameters),
                            params='',
                            fragment='')

    req = request.Request(
        parse.urlunparse(url),
        headers={
            'accept': 'application/json',
            'Content-Type': 'application/json'
        },
        method=method,
        data=None if body is None else json.dumps(body).encode())

    with request.urlopen(req) as f:
        if f.status == 204:
            response = []
        else:
            response = json.load(f)

        if f.status >= 400:
            raise RuntimeError(f'{response.message}, '
                               f'reasons: {response.reasons}')

    return response
Exemple #15
0
def default_pfnunparse(pfnDict):
    """
    Create PFN URI from pfnDict

    :param dict pfnDict:
    """

    try:
        if not isinstance(pfnDict, dict):
            return S_ERROR("pfnunparse: wrong type for pfnDict argument, expected a dict, got %s" % type(pfnDict))
        allDict = dict.fromkeys(["Protocol", "Host", "Port", "Path", "FileName", "Options"], "")
        allDict.update(pfnDict)

        scheme = allDict["Protocol"]

        netloc = allDict["Host"]
        if allDict["Port"]:
            netloc += ":%s" % allDict["Port"]

        path = os.path.join(allDict["Path"], allDict["FileName"])
        query = allDict["Options"]

        pr = parse.ParseResult(scheme=scheme, netloc=netloc, path=path, params="", query=query, fragment="")

        pfn = pr.geturl()

        return S_OK(pfn)

    except Exception as e:  # pylint: disable=broad-except
        errStr = "Pfn.default_pfnunparse: Exception while unparsing pfn: %s" % pfnDict
        gLogger.exception(errStr, lException=e)
        return S_ERROR(errStr)
Exemple #16
0
def get_NPR_URL(date_s, program_id, NPR_API_key):
    """
    get the NPR API tag for a specific NPR program.
    
    :param str date_s: a date string formatted as "YYYY-MM-DD".
    :param int program_id: the NPR program's integer ID.
    :param str NPR_API_key: the NPR API key.
    :returns: a :py:class:`str` of the exploded URL for REST API calls to the NPR API server.
    :rtype: str
    
    .. note::

       no methods call this function any more, instead using the :py:mod:`requests` module's cleaner, higher-level functionality of REST API commands.
    """
    nprApiDate = date_s.strftime('%Y-%m-%d')
    result = urlparse.ParseResult(scheme='https',
                                  netloc='api.npr.org',
                                  path='/query',
                                  params='',
                                  query=urlencode({
                                      'id': program_id,
                                      'date': nprApiDate,
                                      'dateType': 'story',
                                      'output': 'NPRML',
                                      'apiKey': NPR_API_key
                                  }),
                                  fragment='')
    return result.geturl()
    def url_set_params(url: str, new_params: Optional[Dict[str,
                                                           List[str]]]) -> str:
        """Sets params within a URL to a given mapping

        Arguments:
            url {str} -- URL which is parsed and changed
            new_params {Dict[str, List[str]]} -- new params to set for the URL

        Raises:
            ValueError: No URL specified

        Returns:
            {str} -- new modified URL
        """
        if (not url):
            raise ValueError("need url to set params")
        if (new_params is None):
            new_params = {}

        parse_result = parse.urlparse(url)
        query_params_encoded = parse.urlencode(new_params, True)

        parse_result = parse.ParseResult(
            parse_result.scheme,
            parse_result.netloc,
            parse_result.path,
            parse_result.params,
            query_params_encoded,  # change query params
            parse_result.fragment)

        new_url: str = parse.urlunparse(parse_result)

        return new_url
def subtitle_page_scrape(s_url, meta_data):
    page_url_parts = url_parse.urlparse(s_url)
    page = requests.get(s_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    movie_desc = soup.find("div", {"class":"movie-desc"})

    print(movie_desc.get_text())

    meta_data.PlotSummary = movie_desc.text
    meta_data.ImdbUrl = _get_imdb_url(soup)

    year = soup.find("div", attrs={"id": "circle-score-year", "class": "circle", "data-info":"year"})
    meta_data.Year = year.get("data-text")

    subtitles_table = soup.find('tbody')
    
    for subtitle_entry in subtitles_table.find_all('tr'):
        sub_lang = subtitle_entry.find('span', {"class":"sub-lang"})
        if sub_lang.get_text().lower() == "english":
            download_cell = subtitle_entry.find('td', {"class":"download-cell"})
            download_path = download_cell.a.get('href').split('/')[-1] + ".zip"
            new_url = url_parse.ParseResult(scheme='https', 
                                        netloc="yifysubtitles.org", 
                                        path="/subtitle/" + download_path, 
                                        params='', query='', fragment='')
            subtitle_url = url_parse.urlunparse(new_url)
            print(subtitle_url)
            meta_data.SubtitleUrl = subtitle_url
            break
    return
Exemple #19
0
def construct_unique_key(baseurl, params):
    ''' constructs a key that is guaranteed to uniquely and
    repeatably identify an API request by its baseurl and params

    AUTOGRADER NOTES: To correctly test this using the autograder, use an underscore ("_")
    to join your baseurl with the params and all the key-value pairs from params
    E.g., baseurl_key1_value1

    Parameters
    ----------
    baseurl: string
        The URL for the API endpoint
    params: dict
        A dictionary of param:value pairs

    Returns
    -------
    string
        the unique key as a string
    '''

    u = urlparse.urlparse(baseurl)
    orig_params = dict(urlparse.parse_qsl(u.query))
    orig_params.update(params)
    newquery = urlencode(orig_params)
    new_url = urlparse.ParseResult(u.scheme, u.netloc, u.path, u.params,
                                   newquery, u.fragment).geturl()
    a = hashlib.md5()
    a.update(new_url.encode('utf-8'))
    return a.digest().hex()
Exemple #20
0
def d2url(d):
    "Breaks down a URL into a dictionary, including decoding query string"
    if type(d.get("query")) is dict:
        d = d.copy()
        d["query"] = urls.urlencode(d["query"])
    parts = urls.ParseResult(**d)
    return urls.unparse(parts)
Exemple #21
0
    def _parse_my_resource(resource):

        parse_url = netutils.urlsplit(resource)

        params = urlparse.parse_qs(parse_url.query)
        parts = urlparse.ParseResult(parse_url.scheme, parse_url.netloc,
                                     parse_url.path, None, None, None)
        return parts, params
Exemple #22
0
def check_prefix(url):
    p = urlparse.urlparse(url, 'https')
    netloc = p.netloc or p.path
    path = p.path if p.netloc else ''

    p = urlparse.ParseResult('https', netloc, path, *p[3:])
    url = p.geturl()
    return url
Exemple #23
0
def trim_youtu_be(parsed):
    # rewrite to yotuube.com internally
    parsed = parse.ParseResult(scheme='https',
                               netloc='youtube.com',
                               path='/watch',
                               params=None,
                               query="v=%s" % parsed.path[1:],
                               fragment=None)
    return trim_youtube_com(parsed)
 def __init__(self, tcpdir, pointer):
     http.Message.__init__(self, tcpdir, pointer, dpkt_http.Request)
     # get query string. its the URL after the first '?'
     uri = urlparse.urlparse(self.msg.uri)
     self.host = self.msg.headers['host'] if 'host' in self.msg.headers else ''
     fullurl = urlparse.ParseResult('http', self.host, uri.path, uri.params, uri.query, uri.fragment)
     self.fullurl = fullurl.geturl()
     self.url, frag = urlparse.urldefrag(self.fullurl)
     self.query = urlparse.parse_qs(uri.query, keep_blank_values=True)
def _cronitor_url(key, command):
    return parse.urlunparse(parse.ParseResult(
        scheme='https',
        netloc='cronitor.link',
        path='{}/{}'.format(key, command),
        params='',
        query=parse.urlencode({'host': 'http://localhost:6011'}),
        fragment=''
    ))
Exemple #26
0
 def _get_post_url(self) -> str:
     parsed_url = parse.ParseResult(
         scheme=self.protocol,
         netloc=f"{self.server}:{self.port}",
         params="",
         path="/write",
         query=f"db={self.database}&precision=s",
         fragment="",
     )
     return parse.urlunparse(parsed_url)
Exemple #27
0
 def storage_url(self) -> str:
     store_url = parse.quote_plus(self._wrapped_storage.storage_url)
     parsed_url = parse.ParseResult(
         scheme="debug",
         path=str(pathlib.Path(self._path).absolute()),
         netloc="",
         params="",
         query=f"storage={store_url}",
         fragment="")
     return parse.urlunparse(parsed_url)
Exemple #28
0
def safeUrl(url: str) -> str:
    from urllib import parse
    pr = parse.urlparse(url)
    pr2 = parse.ParseResult(scheme=pr.scheme,
                            netloc=pr.netloc,
                            path=parse.quote(pr.path, '/%'),
                            params=pr.params,
                            query=pr.query,
                            fragment=pr.fragment)
    return parse.urlunparse(pr2)
Exemple #29
0
def strip_www(parsed):
    if parsed.netloc[:4] != "www.":
        return parsed

    return parse.ParseResult(scheme=parsed.scheme,
                             netloc=parsed.netloc[4:],
                             path=parsed.path,
                             params=parsed.params,
                             query=parsed.query,
                             fragment=parsed.fragment)
Exemple #30
0
def trim_i_imgur(parsed):
    # should end with an image extension
    if parsed.path[-4:-3] != ".":
        return parsed
    return parse.ParseResult(scheme='http',
                             netloc='imgur.com',
                             path=parsed.path[1:-4],
                             params=None,
                             query=None,
                             fragment=None)