Python ParseResult.geturl Exemples, urllib.parse.ParseResult.geturl Python Exemples

Exemple #1

0

Afficher le fichier

    def __init__(self, *args, **kwargs):
        if 'signalwire_space_url' in kwargs:
            signalwire_space_url = kwargs.pop('signalwire_space_url',
                                              "api.signalwire.com")
        else:
            signalwire_space_url = os.environ['SIGNALWIRE_SPACE_URL']

        p = urlparse(signalwire_space_url, 'http')
        netloc = p.netloc or p.path
        path = p.path if p.netloc else ''
        p = ParseResult('https', netloc, path, *p[3:])

        super(Client, self).__init__(*args, **kwargs)
        self._api = TwilioApi(self)
        self._api.base_url = p.geturl()

        TwilioFax.__init__ = patched_fax_init
        TwilioV1.__init__ = patched_fax_v1_init

        self._fax = TwilioFax(self)
        self._fax.base_url = p.geturl()

        TwilioRestException.__str__ = patched_str
        AccountInstance.__init__ = patched_accountinstance_init
        LocalInstance.__init__ = patched_localinstance_init
        TollFreeInstance.__init__ = patched_tollfreeinstance_init
        ApplicationInstance.__init__ = patched_applicationinstance_init
        IncomingPhoneNumberInstance.__init__ = patched_incomingphonenumberinstance_init
        RecordingInstance.__init__ = patched_recordinginstance_init
        TranscriptionInstance.__init__ = patched_transcriptioninstance_init

Exemple #2

0

Afficher le fichier

Fichier : url.py Projet : flzara/shaystack

def _import_ts(
        parsed_source: ParseResult,  # pylint: disable=too-many-locals,too-many-arguments
        parsed_destination: ParseResult,
        source_grid: Grid,
        customer_id: str,
        force: bool,  # Copy even if the data is identical
        merge_ts: bool,  # Merge current TS with the new period of TS
        envs: Dict[str, str],
        use_thread: bool = True):
    # Now, it's time to upload the referenced time-series
    source_url = parsed_source.geturl()
    source_home = source_url[0:source_url.rfind('/') + 1]
    destination_url = parsed_destination.geturl()
    destination_home = destination_url[0:destination_url.rfind('/') + 1]
    requests = []
    for row in source_grid:
        if "hisURI" in row:
            source_time_serie = source_home + row["hisURI"]
            destination_time_serie = destination_home + row["hisURI"]
            if use_thread:
                requests.append((urlparse(source_time_serie),
                                 urlparse(destination_time_serie), customer_id,
                                 True, False, force, True, envs))
            else:
                _update_grid_on_s3(urlparse(source_time_serie),
                                   urlparse(destination_time_serie),
                                   customer_id=customer_id,
                                   compare_grid=True,
                                   update_time_series=False,
                                   force=force,
                                   merge_ts=True,
                                   envs=envs)
    if requests:
        with ThreadPool(processes=_POOL_SIZE) as pool:
            pool.starmap(_update_grid_on_s3, requests)

Exemple #3

0

Afficher le fichier

    def __Download(self, link: ParseResult, dst: str) -> int:
        """
        __Download(link: ParseResult, dst: str) -> str
        驱动下载本体

        :param link: 下载链接
        :param dst: 保存路径及文件名
        :return: str
        """
        size = int(urlopen(link.geturl()).info().get('Content-Length', -1))
        bar = tqdm(total=size,
                   initial=0,
                   unit='B',
                   unit_scale=True,
                   desc=os.path.basename(dst),
                   ncols=70)
        req = requests.get(url=link.geturl(), stream=True)
        with open(dst, 'ab') as f:
            for chunk in req.iter_content(chunk_size=1024):
                if chunk:
                    f.write(chunk)
                    bar.update(len(chunk))
                else:
                    f.close()
        bar.close()
        return size

Exemple #4

0

Afficher le fichier

Fichier : url.py Projet : flzara/shaystack

    def _periodic_refresh_versions(self, parsed_uri: ParseResult,
                                   first_time: bool) -> None:
        """ Refresh list of versions """
        # Refresh at a rounded period, then all cloud instances refresh data at the same time.
        now = datetime.utcnow().replace(tzinfo=pytz.UTC)
        next_time = now.replace(minute=0, second=0) + timedelta(
            minutes=(now.minute + self._periodic_refresh) //
            self._periodic_refresh * self._periodic_refresh)
        assert next_time > now
        if parsed_uri.scheme == "s3":
            assert BOTO3_AVAILABLE, "Use 'pip install boto3'"
            start_of_current_period = \
                (next_time - timedelta(minutes=self._periodic_refresh)).replace(tzinfo=pytz.UTC)
            s3_client = self._s3()
            s3_obj_version = s3_client.list_object_versions(
                Bucket=parsed_uri.netloc, Prefix=parsed_uri.path[1:])
            if "Versions" in s3_obj_version:
                obj_versions = [(v["LastModified"], v["VersionId"])
                                for v in s3_obj_version["Versions"]]
            else:
                meta = s3_client.get_object(Bucket=parsed_uri.netloc,
                                            Key=parsed_uri.path[1:])
                obj_versions = [meta["LastModified"], meta["VersionId"]]
            obj_versions = sorted(obj_versions,
                                  key=lambda x: x[0],
                                  reverse=True)
            self._lock.acquire()  # pylint: disable=consider-using-with
            all_versions = self._versions.get(parsed_uri.geturl(),
                                              OrderedDict())
            concurrency = self._function_concurrency()
            for date_version, version_id in obj_versions:
                if date_version not in all_versions:
                    # Purge refresh during current period. Then, all AWS instance see the
                    # same data and wait the end of the current period to refresh.
                    # Else, it's may be possible to have two different versions if an
                    # new AWS Lambda instance was created after an updated version.
                    if not first_time or concurrency <= 1 or date_version < start_of_current_period:
                        all_versions[date_version] = version_id  # Add a slot
                    else:
                        log.warning(
                            "Ignore the version '%s' ignore until the next period.\n"
                            + "Then, all lambda instance are synchronized.",
                            version_id)
            self._versions[
                parsed_uri.geturl()] = all_versions  # Lru and versions)
            self._lock.release()
        else:
            self._versions[parsed_uri.geturl()] = {
                datetime(1, 1, 1, tzinfo=pytz.UTC): "direct_file"
            }

        if self._periodic_refresh:
            partial_refresh = functools.partial(
                self._periodic_refresh_versions, parsed_uri, False)
            self._timer = threading.Timer((next_time - now).seconds,
                                          partial_refresh)
            self._timer.daemon = True
            self._timer.start()

Exemple #5

0

Afficher le fichier

 def format_url(self, url):
     parsed_url = urlparse(url, 'http')
     site_location = parsed_url.netloc or parsed_url.path
     path = parsed_url.path if parsed_url.netloc else ''
     site_location = site_location.replace('www.', '')
     parsed_url = ParseResult('http', site_location, path, *parsed_url[3:])
     if not validators.url(parsed_url.geturl()):
         raise Exception("Error!")
     return parsed_url.geturl(), site_location

Exemple #6

0

Afficher le fichier

 def _open_url(url: ParseResult) -> None:
     """Opens a URL."""
     opener = config.commands.open.command
     try:
         url_str: str = url.geturl() if url.scheme else str(RelPath(url.geturl()).path)
         LOGGER.debug('Opening "%s" with %s.', url_str, opener)
         with open(os.devnull, "w", encoding="utf-8") as devnull:
             subprocess.Popen(  # pylint: disable=consider-using-with
                 [opener, url_str], stdout=devnull, stderr=devnull, stdin=devnull, close_fds=True
             )
     except FileNotFoundError as err:
         LOGGER.error(err)

Exemple #7

0

Afficher le fichier

Fichier : login.py Projet : brendan-donegan/alburnum-maas-client

def fetch_api_description(url: ParseResult, credentials: Optional[Credentials],
                          insecure: bool):
    """Fetch the API description from the remote MAAS instance."""
    session = bones.SessionAPI.fromURL(url.geturl(),
                                       credentials=credentials,
                                       insecure=insecure)
    return session.description

Exemple #8

0

Afficher le fichier

Fichier : routes.py Projet : abiramen/terriblink

def add():
    if 'logged_in' in session and session['logged_in']:
        link = request.form['link'].lower()
        dest = request.form['dest']
        p = urlparse(dest, 'https')
        netloc = p.netloc or p.path
        path = p.path if p.netloc else ''
        p = ParseResult('https', netloc, path, *p[3:])
        dest = p.geturl()


        if re.match(r'[a-z0-9-]{1,30}', link) is not None and \
           Shortlink.query.get(link) is None and \
           validators.url(dest):
            shortlink = Shortlink(link=link, dest=dest)
            db.session.add(shortlink)
            db.session.commit()
            session['message'] = 'Added!'
            return redirect(url_for('admin'))
        else:
            session['message'] = 'There was an issue. Please check your input.'
            return redirect(url_for('admin'))

    else:
        return '403 Forbidden', 403

Exemple #9

0

Afficher le fichier

    def __init__(self,
                 srctype,
                 origurl,
                 hstspreloadchecker,
                 domain=None,
                 autoExitFilename=None):  #{{{
        self.links = []
        self.alerts = set()
        self.srctype = srctype
        self.domain = domain
        logging.debug("LoginPageChecker set domain to {}".format(self.domain))
        self.HSTSPreloadListChecker = hstspreloadchecker
        self.autoExitFilename = autoExitFilename

        # toplevel URLs that have no path (e.g. http://test.com)
        # should be converted to end in / (http://test.com/)
        # otherwise things are messed up. WebKit translates http://test.com to http://test.com/ automagically
        # which messes up the redirect chain. So perform this translation even before Webkit does it.
        urlparts = urlparse(origurl)
        if urlparts.path == "":
            urlparts = ParseResult(urlparts[0], urlparts[1], "/", urlparts[3],
                                   urlparts[4], urlparts[5])
            origurl = urlparts.geturl()

        self.origurl = origurl
        self.pwFields = {}
        self.url = None
        self.initclick = None
        self.preclicks = []
        self.resultFlag = False

        self.redirectPageResources = {}
        self.mainRedirectChain = []

Exemple #10

0

Afficher le fichier

 def _get_service_url(self):
     parsed_url = urlparse(self.base_url)
     encoded_get_args = self.extra_queryparams
     _service_url = ParseResult(parsed_url.scheme, parsed_url.netloc,
                                parsed_url.path, parsed_url.params,
                                encoded_get_args, parsed_url.fragment)
     return _service_url.geturl()

Exemple #11

0

Afficher le fichier

Fichier : utils.py Projet : loren-jiang/shopify-scrape

def format_url(my_url, scheme='https', return_type="url"):
    """Takes input string to valid URL format

    Args:
        my_url (string): URL-like string

    Returns:
        string: Properly formatted URL
    """
    if scheme not in URL_SCHEMES:
        raise ValueError(f"'scheme' arg must be in one of {URL_SCHEMES}")
    if return_type not in URL_RETURN_TYPES:
        raise ValueError(
            f"'return_type' arg must be one of {URL_RETURN_TYPES}")

    p = urlparse(my_url, scheme=scheme)
    netloc = p.netloc or p.path
    path = p.path if p.netloc else ''
    p = ParseResult(scheme, netloc, path, *p[3:])
    url = p.geturl()
    if not is_valid_url(url):
        raise InvalidURL

    if return_type == 'parse_result':
        return p
    return url

Exemple #12

0

Afficher le fichier

 def __all_url() -> str:
     """Method returns the url used to fetch all data from John hopkins server
     
     Returns:
         str: URL for getting all Covid data
     """
     url = urlparse("https://services1.arcgis.com")
     path = "/0MSEUqKaxRlEPj5g/arcgis/rest/services/ncov_cases/FeatureServer/2/query"
     query = {}
     query["f"] = "json"
     query["where"] = "Confirmed > 0"
     query["returnGeometry"] = "false"
     query["spatialRel"] = "esriSpatialRelIntersects"
     query["outFields"] = "*"
     query["orderByFields"] = "Confirmed desc"
     query["resultOffset"] = "0"
     query["resultRecordCount"] = "200"
     query["cacheHint"] = "true"
     url = ParseResult(
         scheme=url.scheme,
         netloc=url.hostname,
         path=path,
         query=urlencode(query),
         params=url.params,
         fragment=url.fragment,
     )
     return url.geturl()

Exemple #13

0

Afficher le fichier

 def __country_url(object_id: str) -> str:
     """Method formats and encodes the URL for a specific country information regarding Covid
     
     Args:
         country (str): Country name e.g. "sweden"
     
     Returns:
         str: Formatted encoded URL for the requested country
     """
     url = urlparse("https://services1.arcgis.com")
     path = "/0MSEUqKaxRlEPj5g/arcgis/rest/services/ncov_cases/FeatureServer/2/query"
     query = {}
     query["f"] = "json"
     query["where"] = f"OBJECTID = {object_id}"
     query["returnGeometry"] = "false"
     query["spatialRel"] = "esriSpatialRelIntersects"
     query["outFields"] = "*"
     query["resultOffset"] = "0"
     query["resultRecordCount"] = "1"
     query["cacheHint"] = "true"
     url = ParseResult(
         scheme=url.scheme,
         netloc=url.hostname,
         path=path,
         query=urlencode(query),
         params=url.params,
         fragment=url.fragment,
     )
     return url.geturl()

Exemple #14

0

Afficher le fichier

def fetch_resource_by_uuid(gatekeeper_endpoint: ParseResult,
                           workspace_dir: str, kind: Kind,
                           uuid: str) -> Dict[str, Any]:
    """Fetch a resource by its uuid. Return `None` if nothing is found.
    Raise a RuntimeError exception when a error is detected within the
    gatekeeper's API."""
    url = urljoin(gatekeeper_endpoint.geturl(),
                  os.path.join(_get_path_from_kind(kind), uuid))
    _LOGGER.info('Fetching a resource by uuid at %s', url)
    auth = 'Bearer ' + _get_workspace_token(workspace_dir)
    res_resp = requests.get(url,
                            headers={
                                'content-type': 'application/json',
                                'Authorization': auth
                            })
    try:
        res_resp.raise_for_status()
    except requests.exceptions.HTTPError as exc_notfound:
        _LOGGER.exception(
            'Failed to retrieve a resource at %s '
            '(status code = %d)', res_resp.url, res_resp.status_code)
        if exc_notfound.response.status_code == 404:
            return None
        else:
            raise
    tmp = res_resp.json()
    if not isinstance(tmp, dict) or len(tmp) <= 0:
        exc = RuntimeError('The returned json is malformed:  {}'.format(tmp))
        _LOGGER.error('Error while fetching a resource using an uuid: %s', exc)
        raise exc
    _LOGGER.info('Succeed to retrieve the resource %s (status code = %d)',
                 res_resp.url, res_resp.status_code)
    if kind.name in tmp:  # the resource is boxed
        tmp = tmp[kind.name]
    return tmp

Exemple #15

0

Afficher le fichier

Fichier : fetch.py Projet : cgeoffroy/son-analyze

def fetch_resource_by_uuid(gatekeeper_endpoint: ParseResult,
                           workspace_dir: str, kind: Kind,
                           uuid: str) -> Dict[str, Any]:
    """Fetch a resource by its uuid. Return `None` if nothing is found.
    Raise a RuntimeError exception when a error is detected within the
    gatekeeper's API."""
    url = urljoin(gatekeeper_endpoint.geturl(),
                  os.path.join(_get_path_from_kind(kind), uuid))
    _LOGGER.info('Fetching a resource by uuid at %s', url)
    auth = 'Bearer ' + _get_workspace_token(workspace_dir)
    res_resp = requests.get(url, headers={'content-type': 'application/json',
                                          'Authorization': auth})
    try:
        res_resp.raise_for_status()
    except requests.exceptions.HTTPError as exc_notfound:
        _LOGGER.exception('Failed to retrieve a resource at %s '
                          '(status code = %d)', res_resp.url,
                          res_resp.status_code)
        if exc_notfound.response.status_code == 404:
            return None
        else:
            raise
    tmp = res_resp.json()
    if not isinstance(tmp, dict) or len(tmp) <= 0:
        exc = RuntimeError('The returned json is malformed:  {}'.format(tmp))
        _LOGGER.error('Error while fetching a resource using an uuid: %s', exc)
        raise exc
    _LOGGER.info('Succeed to retrieve the resource %s (status code = %d)',
                 res_resp.url, res_resp.status_code)
    if kind.name in tmp:  # the resource is boxed
        tmp = tmp[kind.name]
    return tmp

Exemple #16

0

Afficher le fichier

Fichier : sensor.py Projet : mzoworka/home_assistant_core

async def async_setup_entry(hass, entry, async_add_entities):
    """Add solarlog entry."""
    host_entry = entry.data[CONF_HOST]
    device_name = entry.title

    url = urlparse(host_entry, "http")
    netloc = url.netloc or url.path
    path = url.path if url.netloc else ""
    url = ParseResult("http", netloc, path, *url[3:])
    host = url.geturl()

    try:
        api = await hass.async_add_executor_job(SolarLog, host)
        _LOGGER.debug("Connected to Solar-Log device, setting up entries")
    except (OSError, HTTPError, Timeout):
        _LOGGER.error(
            "Could not connect to Solar-Log device at %s, check host ip address", host
        )
        return

    # Create solarlog data service which will retrieve and update the data.
    data = await hass.async_add_executor_job(SolarlogData, hass, api, host)

    # Create a new sensor for each sensor type.
    entities = []
    for sensor_key in SENSOR_TYPES:
        sensor = SolarlogSensor(entry.entry_id, device_name, sensor_key, data)
        entities.append(sensor)

    async_add_entities(entities, True)
    return True

Exemple #17

0

Afficher le fichier

def _fetch_resource_by_uuid(gatekeeper_endpoint: ParseResult, path: str,
                            uuid: str) -> Dict[str, Any]:
    """Fetch a resource by its uuid. Return `None` if nothing is found.
    Raise a RuntimeError exception when a error is detected within the
    gatekeeper's API."""
    url = urljoin(gatekeeper_endpoint.geturl(), os.path.join(path, uuid))
    _LOGGER.info('Fetching a resource by uuid at %s', url)
    res_resp = requests.get(url, headers={'content-type': 'application/json'})
    try:
        res_resp.raise_for_status()
    except requests.exceptions.HTTPError as exc_notfound:
        _LOGGER.exception(
            'Failed to retrieve a resource at %s '
            '(status code = %d)', res_resp.url, res_resp.status_code)
        if exc_notfound.response.status_code == 404:
            return None
        else:
            raise
    tmp = res_resp.json()
    if not isinstance(tmp, dict) or len(tmp) <= 0:
        exc = RuntimeError('The returned json is malformed:  {}'.format(tmp))
        _LOGGER.error(exc)
        raise exc
    _LOGGER.info('Succeed to retrieve the resource %s (status code = %d)',
                 res_resp.url, res_resp.status_code)
    return tmp

Exemple #18

0

Afficher le fichier

def get_past_league_data(league_name):
    data_links = scrape_data_links()
    try:
        ninja_url = ParseResult(
            scheme="https",
            netloc="poe.ninja",
            path=data_links[league_name],
            params="",
            query="",
            fragment="",
        )

        with HTMLSession() as session:
            response = session.get(ninja_url.geturl())
            zip = zipfile.ZipFile(BytesIO(response.content))
            zipped_contents = {}
            for info in zip.infolist():
                zipped_contents[info.filename.split(".")[-2]] = pd.read_csv(
                    StringIO(zip.read(info.filename).decode()), sep=";"
                )
            return zipped_contents

    except KeyError:
        raise KeyError(
            f"{league_name} not a valid league name, try: {data_links.keys()}"
        )

Exemple #19

0

Afficher le fichier

def urlparams(url_, fragment=None, query_dict=None, **query):
    """
    Add a fragment and/or query parameters to a URL.

    New query params will be appended to exising parameters, except duplicate
    names, which will be replaced.
    """
    url_ = urlparse(url_)
    fragment = fragment if fragment is not None else url_.fragment

    q = url_.query
    new_query_dict = (QueryDict(smart_bytes(q), mutable=True)
                      if q else QueryDict('', mutable=True))
    if query_dict:
        for k, l in query_dict.lists():
            new_query_dict[k] = None  # Replace, don't append.
            for v in l:
                new_query_dict.appendlist(k, v)

    for k, v in query.items():
        # Replace, don't append.
        if isinstance(v, list):
            new_query_dict.setlist(k, v)
        else:
            new_query_dict[k] = v

    query_string = urlencode([(k, v) for k, l in new_query_dict.lists()
                              for v in l if v is not None])
    new = ParseResult(url_.scheme, url_.netloc, url_.path, url_.params,
                      query_string, fragment)
    return new.geturl()

Exemple #20

0

Afficher le fichier

Fichier : gitlab.py Projet : SuperSandro2000/nix-update

def fetch_gitlab_versions(url: ParseResult) -> List[Version]:
    match = GITLAB_API.match(url.geturl())
    if not match:
        return []
    domain = match.group("domain")
    project_id = match.group("project_id")
    gitlab_url = f"https://{domain}/api/v4/projects/{project_id}/repository/tags"
    info(f"fetch {gitlab_url}")
    resp = urllib.request.urlopen(gitlab_url)
    json_tags = json.loads(resp.read())
    if len(json_tags) == 0:
        raise VersionError("No git tags found")
    releases = []
    tags = []
    for tag in json_tags:
        name = tag["name"]
        assert isinstance(name, str)
        if tag["release"]:
            # TODO: has gitlab preleases?
            releases.append(Version(name))
        else:
            tags.append(Version(name))
    # if no release is found, use latest tag
    if releases == []:
        return tags
    return releases

Exemple #21

0

Afficher le fichier

    def __Select_Driver(self, drivers_link: ParseResult) -> ResultSet:
        """
        __Select_Driver(drivers_link: ParseResult) -> ResultSet
        具体版本号驱动列表

        :param drivers_link: 具体版本号驱动链接
        :return: ResultSet
        """
        html = requests.get(url=drivers_link.geturl())
        for retry in range(3):
            if html.status_code == 200:
                break
            html = requests.get(url=drivers_link.geturl())
        html.encoding = html.apparent_encoding
        soup = BeautifulSoup(html.text, 'html.parser')
        return soup.pre.select(selector='a')

Exemple #22

0

Afficher le fichier

Fichier : SiteCrawler.py Projet : adamtorok96/SiteCrawler

def get_url(parent_url, url):
    parsed_parent_url = urlparse(parent_url)
    parsed_url = urlparse(url)

    scheme = parsed_url.scheme
    netloc = parsed_url.netloc
    path = parsed_url.path

    if len(scheme) == 0:
        scheme = parsed_parent_url.scheme

    if len(netloc) == 0:
        netloc = parsed_parent_url.netloc

    if path == '/':
        path = ''

    parsed_url = ParseResult(scheme=scheme,
                             netloc=netloc,
                             path=path,
                             params=parsed_url.params,
                             query=parsed_url.query,
                             fragment='')

    return parsed_url.geturl()

Exemple #23

0

Afficher le fichier

Fichier : coinpycli.py Projet : mrkovec/coinpy

 def proc_addr(addr: str) -> Tuple[str, str, int]:
     p = urlparse(addr, 'http')
     netloc = p.netloc or p.path
     if netloc.startswith(':'):
         netloc = 'localhost' + netloc
     path = p.path if p.netloc else ''
     p = ParseResult('http', netloc, path, '', '', '')
     return (p.geturl(), p.hostname, p.port)

Exemple #24

0

Afficher le fichier

def modify_site_name(site):
    if not site:
        return
    site = urlparse(site, 'http')
    netloc = site.netloc or site.path
    path = site.path if site.netloc else ''
    site = ParseResult(site.scheme, netloc, path, *site[3:])
    return site.geturl()

Exemple #25

0

Afficher le fichier

Fichier : scrapper.py Projet : mmelk057/SocialScrapper

 def __init__(self, baseUrl: ParseResult):
     self.baseUrl = baseUrl
     self.queueManager = QueueManager(baseUrl.geturl())
     self.reqHeaders = deepcopy(config.headers)
     self.reqHeaders['User-Agent'] = config.userAgents["Google"]
     self.supportedPlatforms = []
     for platform in config.supportedPlatforms:
         self.supportedPlatforms.append(
             config.SocialPlatform(**config.supportedPlatforms[platform]))

Exemple #26

0

Afficher le fichier

Fichier : models.py Projet : fnp/django-cas-provider

 def get_redirect_url(self):
     parsed = urlparse(self.service)
     query = parse_qs(parsed.query)
     query['ticket'] = [self.ticket]
     query = [((k, v) if len(v) > 1 else (k, v[0])) for k, v in query.items()]
     parsed = ParseResult(parsed.scheme, parsed.netloc,
                                   parsed.path, parsed.params,
                                   urlencode(query), parsed.fragment)
     return parsed.geturl()

Exemple #27

0

Afficher le fichier

Fichier : utils.py Projet : techtosee/CrowdClick

def convert_url(website_link: str) -> str:
    # Prepend http schema to url if needed
    p = urlparse(website_link, 'http')
    netloc = p.netloc or p.path
    path = p.path if p.netloc else ''
    # if not netloc.startswith('www.'):
    #     netloc = 'www.' + netloc
    p = ParseResult(p.scheme, netloc, path, *p[3:])
    return p.geturl()

Exemple #28

0

Afficher le fichier

 def service_url(self):
     parsed_url = urlparse(self.base_url)
     encoded_get_args = self.extra_queryparams
     service_url = ParseResult(parsed_url.scheme, parsed_url.netloc,
                               parsed_url.path, parsed_url.params,
                               encoded_get_args, parsed_url.fragment)
     service_url = service_url.geturl() if not self.proxy_base else urljoin(
         settings.SITEURL, reverse('service_proxy', args=[self.id]))
     return service_url

Exemple #29

0

Afficher le fichier

Fichier : url_utils.py Projet : hritik5102/Fake-news-classification-model

def format_url(url):
    """    
    Formats url by adding 'http://' if necessary and deleting 'www.' 

    :param url: ulr to article or domain
    :return: formatted url e.g. the following urls: 
                'http://www.google.pl/', 'google.pl/', 'google.pl/', 'www.google.pl/', 
                'http://google.pl/', 'https://www.google.pl/'
              will be all formatted to: http://google.pl/
    """
    parsed_url = urlparse(url, 'http')
    netloc = parsed_url.netloc or parsed_url.path
    path = parsed_url.path if parsed_url.netloc else ''
    netloc = netloc.replace('www.', '')

    parsed_url = ParseResult('http', netloc, path, *parsed_url[3:])
    if not validators.url(parsed_url.geturl()):
        raise ValueError('Provided url=' + url + ' is not valid')
    return parsed_url.geturl()

Exemple #30

0

Afficher le fichier

 def go_to_page(self, page):
     u = urlparse(self.br.geturl())
     data = dict(parse_qsl(u.query))
     if page == 0:
         del data['page']
     else:
         data['page'] = page
     res = ParseResult(scheme=u.scheme, netloc=u.hostname, path=u.path, params=u.params, query=urlencode(data),
                       fragment=u.fragment)
     self.br.open(res.geturl())

Exemple #31

0

Afficher le fichier

Fichier : main.py Projet : Domis97/wat-minwd-i6e3s1

def createURL(category):
    global queryURL
    parsedResult = ParseResult(scheme='https',
                               netloc=baseURL,
                               path='',
                               params='',
                               query=("q=%s" % category),
                               fragment='')

    queryURL = parsedResult.geturl()

Exemple #32

0

Afficher le fichier

Fichier : helper.py Projet : openstax/staxing

 def url_parse(self, site):
     """Parse the url into a valid url."""
     parse = list(
         urlparse(site if urlparse(site).scheme else '%s%s' % ('//', site)))
     parse[0] = b'https'
     for index, value in enumerate(parse):
         parse[index] = value.decode('utf-8') if isinstance(value, bytes) \
             else value
     parse = ParseResult(*parse)
     return parse.geturl()

Exemple #33

0

Afficher le fichier

Fichier : helpers.py Projet : ziyuhan/staxing

 def __init__(self, username=None, password=None, site=None, email=None):
     self.username = username
     self.password = password
     parse = list(urlparse(site if urlparse(site).scheme else
                  '%s%s' % ('//', site)))
     parse[0] = b'https'
     for index, value in enumerate(parse):
         parse[index] = value.decode('utf-8') if isinstance(value, bytes) \
             else value
     parse = ParseResult(*parse)
     self.url = parse.geturl()
     self.email = email

Exemple #34

0

Afficher le fichier

Fichier : html-spider.py Projet : yunfeng-net/useful-script

 def parse(self, url, respar):
     res = urlparse(url)
     nl = res.path
     #print(res)
     if respar.path.rfind("/")>=0:
         nl = respar.path[0:respar.path.rfind("/")+1] + res.path
     res2 = ParseResult(scheme = res.scheme or respar.scheme,
                       netloc = res.netloc or respar.netloc,
                       path = nl,
                        params='', query='', fragment='')
     url = res2.geturl()
     #print("parse", url, res2, respar)
     return (url, res2)

Exemple #35

0

Afficher le fichier

Fichier : util.py Projet : muelli/gnome-keysign

def download_key_http(address, port):
    url = ParseResult(
        scheme='http',
        # This seems to work well enough with both IPv6 and IPv4
        netloc="[[%s]]:%d" % (address, port),
        path='/',
        params='',
        query='',
        fragment='')
    log.debug("Starting HTTP request")
    data = requests.get(url.geturl(), timeout=5).content
    log.debug("finished downloading %d bytes", len(data))
    return data

Exemple #36

0

Afficher le fichier

Fichier : views.py Projet : marplatense/pyramid_oauth2_provider

def handle_authcode(request, client, redirection_uri, state=None):
    parts = urlparse(redirection_uri.uri)
    qparams = dict(parse_qsl(parts.query))

    user_id = authenticated_userid(request)
    auth_code = Oauth2Code(client, user_id)
    db.add(auth_code)
    db.flush()

    qparams['code'] = auth_code.authcode
    if state:
        qparams['state'] = state
    parts = ParseResult(
        parts.scheme, parts.netloc, parts.path, parts.params,
        urlencode(qparams), '')
    return HTTPFound(location=parts.geturl())

Exemple #37

0

Afficher le fichier

Fichier : fetch.py Projet : cgeoffroy/son-analyze

def fetch_resource(gatekeeper_endpoint: ParseResult, workspace_dir: str,
                   kind: Kind, vendor: str, name: str,
                   version: str) -> Tuple[str, Dict[str, Any]]:
    """Fetch a resource and return the Json as a dictionary. Return `None` if
     nothing is found. It raise a RuntimeError exception when a gatekeeper API
     is dectected"""
    url = urljoin(gatekeeper_endpoint.geturl(), _get_path_from_kind(kind))
    _LOGGER.info('Fetching a %s resource by name at %s', kind, url)
    query_params_raw = {'vendor': vendor,  # Dict[Str, Str]
                        'name': name,
                        'version': version}
    # We force the order of the query's parameters to lower the impact on tests
    # when a key is added or removed
    query_params = collections.OrderedDict(sorted(query_params_raw.items()))
    auth = 'Bearer ' + _get_workspace_token(workspace_dir)
    res_resp = requests.get(url, params=query_params,
                            headers={'content-type': 'application/json',
                                     'Authorization': auth})
    try:
        res_resp.raise_for_status()
    except requests.exceptions.HTTPError:
        _LOGGER.exception('Failed to retrieve a resource at %s '
                          '(status code = %d)', res_resp.url,
                          res_resp.status_code)
        # REMARK: if nothing is found, then the API return an empty [] and
        # not 404
        raise
    tmp = res_resp.json()
    if not isinstance(tmp, list):
        exc = RuntimeError('The returned json is not boxed by a list')
        _LOGGER.error('The GK API must return a list of resources: %s', exc)
        raise exc
    _LOGGER.info('Succeed to retrieve the resource %s (status code = %d): %s',
                 res_resp.url, res_resp.status_code, tmp[:20])
    for elt in tmp:
        this_uuid = elt['uuid']
        if not this_uuid:
            _LOGGER.warning('Ignoring an element without id or uuid: %s', elt)
            continue
        if kind.name in elt:  # the resource is boxed
            elt = elt[kind.name]
        if all([elt['vendor'] == vendor, elt['name'] == name,
                elt['version'] == version]):
            return (this_uuid, elt)
    return None

Exemple #38

0

Afficher le fichier

Fichier : installer.py Projet : spacether/pycalculix

def get_direct_url(url, headers):
    """Gets the zip direct download link from the project download page"""
    direct_download_url = href_from_link_text(url,
                                              headers,
                                              'Problems Downloading')
    parsed_download_url = urlparse(direct_download_url)
    if parsed_download_url.scheme not in ['http', 'https']:
        # url is relative, and is missing the scheme and netloc
        parsed_parent_url = urlparse(url)
        parsed_download_url = ParseResult(parsed_parent_url.scheme,
                                          parsed_parent_url.netloc,
                                          parsed_download_url.path,
                                          parsed_download_url.params,
                                          parsed_download_url.query,
                                          parsed_download_url.fragment)
        direct_download_url = parsed_download_url.geturl()
    direct_download_url = href_from_link_text(direct_download_url,
                                              headers,
                                              'direct link')
    return direct_download_url

Exemple #39

0

Afficher le fichier

Fichier : action.py Projet : CludeX/XwareDesktop

    def __init__(self, parsed: ParseResult = None):
        self.parsed = parsed
        if parsed is None:
            self.kind = TaskCreationType.Empty
            self.url = None
            return

        path = parsed.path
        scheme = parsed.scheme

        url = parsed.geturl()

        # subtaskInfo is set by Model
        self.subtaskInfo = []

        self.url = url
        self.path = None
        self.kind = None
        if path.endswith(".torrent"):
            if scheme == "":
                self.kind = TaskCreationType.LocalTorrent
                return
            elif scheme in ("http", "https", "ftp"):
                self.kind = TaskCreationType.RemoteTorrent
                return

        if path.endswith(".metalink") or path.endswith(".meta4"):
            if scheme in ("http", "https", "ftp"):
                self.kind = TaskCreationType.MetaLink

        elif scheme == "ed2k":
            self.kind = TaskCreationType.Emule

        elif scheme == "magnet":
            self.kind = TaskCreationType.Magnet

        elif scheme in ("http", "https", "ftp"):
            self.kind = TaskCreationType.Normal

Exemple #40

0

Afficher le fichier

Fichier : batch.py Projet : cgeoffroy/son-analyze

def batch_raw_query(prometheus_endpoint: ParseResult,
                    start_timestamp: int,
                    end_timestamp: int,
                    step: datetime.timedelta,
                    query: str,
                    maxpts=11000) -> Iterable[bytes]:
    """Retrieve metrics from a Prometheus database"""
    sstep = '{}s'.format(int(step.total_seconds()))
    url = urljoin(prometheus_endpoint.geturl(), 'api/v1/query_range')

    def sub(sub_start, sub_end):
        """sub"""
        payload = [('start', sub_start),
                   ('end', sub_end),
                   ('step', sstep),
                   ('query', query)]
        req = requests.get(url, params=payload)
        return req.content
    delta = end_timestamp - start_timestamp
    batch_size = min(delta // int(step.total_seconds()), maxpts)  # type: int
    for limits in _create_batches(start_timestamp, end_timestamp, batch_size):
        sub_start, sub_end = limits
        yield sub(sub_start, sub_end)

Exemple #41

0

Afficher le fichier

Fichier : uri.py Projet : watsonpy/watson-http

class Url(object):

    """An object based representation of a Url.
    """
    @property
    def scheme(self):
        return self._parts.scheme

    @property
    def netloc(self):
        return self._parts.netloc

    @property
    def hostname(self):
        return self._parts.hostname

    @property
    def subdomain(self):
        """
        Returns the subdomain for the URL.
        With thanks: http://stackoverflow.com/questions/1189128/regex-to-extract-subdomain-from-url
        """
        regex = r'(?:http[s]*\:\/\/)*(.*?)\.(?=[^\/]*\..{2,5})'
        hostname = self.hostname
        if not hostname:
            return None
        matches = re.match(regex, hostname)
        return matches.group(1) if matches else None

    @property
    def port(self):
        return self._parts.port

    @property
    def path(self):
        return self._parts.path

    def path_index(self, index=0):
        try:
            split_path = self.path.strip('/').split('/')
            return split_path[index]
        except:
            return None

    @property
    def params(self):
        return self._parts.params

    @property
    def query(self):
        return self._parts.query

    @property
    def fragment(self):
        return self._parts.fragment

    @property
    def username(self):
        return self._parts.username

    @property
    def password(self):
        return self._parts.password

    def __init__(self, url):
        """Initialize the url object.

        Create a new Url object from either a well formed url string,
        a dict of key/values, or a ParseResult.

        Args:
            url (mixed): The value to generate the url from.
        """
        if isinstance(url, ParseResult):
            self._parts = url
        elif isinstance(url, dict):
            if 'hostname' in url and 'netloc' not in url:
                netloc = url.pop('hostname')
                if 'port' in url:
                    netloc += ':' + str(url.pop('port'))
                url['netloc'] = netloc
            if 'scheme' not in url:
                url['scheme'] = 'http'
            if 'username' in url:
                url['netloc'] = '{0}:{1}@{2}'.format(
                    url.pop('username'), url.pop('password', ''), url['netloc'])
            if 'params' not in url:
                url['params'] = None
            if 'fragment' not in url:
                url['fragment'] = None
            if 'path' not in url:
                url['path'] = '/'
            if 'query' not in url:
                url['query'] = None
            self._parts = ParseResult(**url)
        elif isinstance(url, str):
            self._parts = urlparse(url)

    def assemble(self):
        return self._parts.geturl()

    def __str__(self):
        return self.assemble()

Exemple #42

0

Afficher le fichier

Fichier : helper.py Projet : sisiz/staxing

    def __init__(self,
                 username,
                 password,
                 site='https://tutor-qa.openstax.org',
                 email=None,
                 email_username=None,
                 email_password=None,
                 driver_type='chrome',
                 capabilities=None,
                 pasta_user=None,
                 wait_time=DEFAULT_WAIT_TIME,
                 opera_driver='',
                 existing_driver=None,
                 **kwargs):
        """
        Base user constructor.

        username (string): website username
        password (string): website password
        site (string): website URL
        driver_type (string): web browser type
        pasta_user (PastaSauce): optional API access for saucelabs
        capabilities (dict): browser settings; copy object to avoid overwrite
            Defaults:
                DesiredCapabilities.ANDROID.copy()
                DesiredCapabilities.CHROME.copy()
                DesiredCapabilities.EDGE.copy()
                DesiredCapabilities.FIREFOX.copy()
                DesiredCapabilities.HTMLUNIT.copy()
                DesiredCapabilities.HTMLUNITWITHJS.copy()
                DesiredCapabilities.INTERNETEXPLORER.copy()
                DesiredCapabilities.IPAD.copy()
                DesiredCapabilities.IPHONE.copy()
                DesiredCapabilities.ORERA.copy()
                DesiredCapabilities.PHANTOMJS.copy()
                DesiredCapabilities.SAFARI.copy()
            Keys:
                platform
                browserName
                version
                javascriptEnabled
        wait (int): standard time, in seconds, to wait for Selenium commands
        opera_driver (string): Chromium location
        """
        self.username = username
        self.password = password
        parse = list(
            urlparse(
                site if urlparse(site).scheme
                else '%s%s' % ('//', site)
            )
        )
        parse[0] = b'https'
        for index, value in enumerate(parse):
            parse[index] = value.decode('utf-8') if isinstance(value, bytes) \
                else value
        parse = ParseResult(*parse)
        self.url = parse.geturl()
        self.email = email
        self.email_username = email_username
        self.email_password = email_password
        super(User, self).__init__(driver_type=driver_type,
                                   capabilities=capabilities,
                                   pasta_user=pasta_user,
                                   wait_time=wait_time,
                                   opera_driver=opera_driver,
                                   existing_driver=existing_driver,
                                   **kwargs)