Beispiel #1
0
    def _get_all(self, url):
        """
        Page with the way Github implemented pagination.
        Returns:
            A list of event dictionaries.
        """

        # Build up results
        results = []

        # start a counter
        page_num = 1
        previous_url = None
        next_url = url
        while True:
            print("Working on page {}".format(page_num))

            # get this page
            res = requests.get(url=next_url)
            res.raise_for_status()

            # add result to results
            res_list = json.loads(res.text)
            assert isinstance(res_list, list)

            # drop results in our list
            results += res_list

            # get next page url
            link_text = res.headers.get('Link', None)
            if link_text is None:
                print("Only one page of results found")
                break

            # Grab URLs
            next_url = [
                x['url'] for x in parse_header_links(link_text)
                if x['rel'] == 'next'
            ][0]
            last_url = [
                x['url'] for x in parse_header_links(link_text)
                if x['rel'] == 'last'
            ][0]
            if next_url == last_url:
                break

            page_num += 1

        return (results)
Beispiel #2
0
    def parse_result_ok(self,
                        xml_content: str,
                        headers: Optional[Dict] = None) -> Dict[str, Any]:
        """Given an xml content as string, returns a deposit dict."""
        link_header = headers.get("Link", "") if headers else ""
        links = parse_header_links(link_header)
        data = ElementTree.fromstring(xml_content)
        total_result = data.findtext("swh:count", "0",
                                     namespaces=NAMESPACES).strip()
        keys = [
            "id",
            "reception_date",
            "complete_date",
            "external_id",
            "swhid",
            "status",
            "status_detail",
            "swhid_context",
            "origin_url",
        ]
        entries = data.findall("atom:entry", namespaces=NAMESPACES)
        deposits_d = [{
            key: deposit.findtext(f"swh:{key}", namespaces=NAMESPACES)
            for key in keys
            if deposit.find(f"swh:{key}", namespaces=NAMESPACES) is not None
        } for deposit in entries]

        return {
            "count": total_result,
            "deposits": deposits_d,
            **{entry["rel"]: entry["url"]
               for entry in links},
        }
Beispiel #3
0
def iter_api_v3(endpoint, params=None, page_size=DEFAULT_PAGE_SIZE, wrap=None):
    """
    Helper function to iterate over API results (v3 version of the API)
    """
    effective_params = urlencode(dict(params or {}, page=page_size))
    url = f'{endpoint}?{effective_params}'
    while True:
        token = APICredentials.get_access_token()
        headers = {'Authorization': f'Bearer {token}'}
        resp = requests.get(url, headers=headers)
        resp.raise_for_status()
        json_resp = resp.json()
        for result in json_resp:
            if wrap:
                result = wrap(result)
            yield result

        # No "Link" to iterate
        if 'Link' not in resp.headers:
            break

        # Parse link and see if there's a "next" element
        links = parse_header_links(resp.headers['Link'])
        for link in links:
            if link.get('rel') == 'next':
                url = link['url']
                break
        else:
            break
Beispiel #4
0
    def get_cursor_param(self) -> str:
        """Get cursor param from response header links.

        Example: Finding the next page link
        'https://XXX.com/cloudapi/1.0.0/edgeGateways/{gateway-id}}/nat/rules?cursor=abcde
        would return 'abcde'

        :return: cursor param
        :rtype: str
        """  # noqa: E501
        last_response_headers = self.get_last_response_headers()
        if not last_response_headers:
            return ''

        # Find link corresponding to the next page
        unparsed_links = last_response_headers[ResponseKeys.LINK]
        parsed_links = requests_utils.parse_header_links(unparsed_links)
        for link in parsed_links:
            if link[ResponseKeys.REL] == 'nextPage':
                # Parse cursor param
                cursor_url = link[ResponseKeys.URL]
                parsed_result: parse.ParseResult = parse.urlparse(cursor_url)
                parsed_query_map = parse.parse_qs(parsed_result.query)

                # The parse_qs function maps each query key to a list,
                # so we assume there is at most one cursor param and get that
                # element if the list is not empty
                cursor_list = parsed_query_map.get('cursor')
                if cursor_list:
                    return cursor_list[0]
                else:
                    return ''
        return ''
Beispiel #5
0
    def post(self, request, pk, *args, **kwargs):
        subscription = get_object_or_404(Subscription, pk=pk)

        if subscription.secret:
            signature = request.META.get('HTTP_X_HUB_SIGNATURE', None)
            if signature is None:
                logger.debug("Ignoring payload for subscription {0}, missing "
                             "signature".format(subscription.pk))
                return HttpResponse('')

            hasher = hmac.new(subscription.secret.encode('utf-8'),
                              request.body,
                              hashlib.sha1)
            digest = 'sha1=%s' % hasher.hexdigest()
            if signature != digest:
                logger.debug("Mismatching signature for subscription {0}: "
                             "got {1}, expected {2}".format(subscription.pk,
                                                            signature,
                                                            digest))
                return HttpResponse('')

        self.links = None
        if 'HTTP_LINK' in request.META:
            self.links = parse_header_links(request.META['HTTP_LINK'])
        updated.send(sender=subscription, notification=request.body,
                     request=request, links=self.links)
        self.subscription = subscription
        self.handle_subscription()
        return HttpResponse('')
Beispiel #6
0
def rewrite_links(link_header):
    """Rewrite Link header Github API endpoints to our own.

    <https://api.github.com/repositories/17839063/iss...&page=2>; rel="next",
    <https://api.github.com/repositories/17839063/iss...&page=4>; rel="last"

    is transformed into

    </api/issues?per_page=50&page=2>; rel="next",
    </api/issues?per_page=50&page=4>; rel="last" etc.
    """
    header_link_data = parse_header_links(link_header)
    for data in header_link_data:
        uri = data['url']
        uri_tuple = urllib.parse.urlsplit(uri)
        path = uri_tuple.path
        query = uri_tuple.query
        if path.startswith('/repositories/'):
            # remove repositories and takes the second element
            # of ['17839063', 'issues/398/comments']
            path = path.lstrip('/repositories/').split('/', 1)[1]
        elif path.startswith('/search/issues'):
            path = 'issues/search'
        api_path = '{}{}'.format('/api/', path)
        data['url'] = urllib.parse.urlunsplit(('', '', api_path, query, ''))
    return format_link_header(header_link_data)
Beispiel #7
0
 def get_all_devices(self, page=1):
     r, h = self.client.devices.get_devices(page=page, _request_options=self.uauth).result()
     for i in parse_header_links(h.headers["link"]):
         if i["rel"] == "next":
             page = int(dict(urlparse.parse_qs(urlparse.urlsplit(i["url"]).query))["page"][0])
             return r + self.get_all_devices(page=page)
     else:
         return r
Beispiel #8
0
 def get_next_url(headers):
     links = headers.get("Link")
     if links is not None:
         for link in parse_header_links(links):
             if link['rel'] == 'next':
                 return link["url"]
     else:
         return links
Beispiel #9
0
 def url(self, response=None):
     try:
         link = response.headers['Link']
         #last element in list contains next link
         u = parse_header_links(link)[-1]['url']
         return u
     except (KeyError, AttributeError):
         return self.__url
def test_get_container(baseurl):
    r = requests.get(baseurl)

    assert 200 == r.status_code
    assert "text/turtle" in r.headers['content-type']
    links = parse_header_links(r.headers['link'])
    types = [l['url'] for l in links if (l['rel'] == 'type')]
    assert "http://www.w3.org/ns/ldp#BasicContainer" in types
    assert "http://www.w3.org/ns/ldp#Resource" in types
Beispiel #11
0
def calc_tag(ctx, tag, ids, reset_tags, **filters):
    """
    Tag calculations. Use the parameters to limit the list to certain subsets of calculations
    """

    # filter out filters not specified
    params = {k: v for k, v in filters.items() if v is not None}

    if params and ids:
        raise click.UsageError("can't specify both specific IDs and filters")

    if params:
        click.echo("Fetching calculations...", err=True)
        params['hide_tags'] = ['dummy-tag-here'
                               ]  # make sure that all calculations are found
        req = ctx.obj['session'].get(ctx.obj['calc_url'], params=params)
        req.raise_for_status()
        ids = [c['id'] for c in req.json()]

        if req.links['last']:
            while True:
                try:
                    next_link = [
                        l['url']
                        for l in parse_header_links(req.headers['Link'])
                        if l['rel'] == 'next'
                    ][0]
                except IndexError:
                    break

                req = ctx.obj['session'].get(next_link, params=params)
                req.raise_for_status()
                ids += [c['id'] for c in req.json()]

    if params and ids:  # if selected by params and the search returned something
        click.confirm(
            "Are you sure you want to tag {} calculations with '{}'?".format(
                len(ids), tag),
            abort=True)

    for cid in ids:
        click.echo("Setting tag '{}' for calculation {}".format(tag, cid),
                   err=True)
        req = ctx.obj['session'].get(ctx.obj['calc_url'] + '/{}'.format(cid))
        req.raise_for_status()
        calc = req.json()

        metadata = calc['metadata']

        if not 'tags' in metadata or reset_tags:
            metadata['tags'] = []

        metadata['tags'].append(tag)

        req = ctx.obj['session'].patch(calc['_links']['self'],
                                       json={'metadata': metadata})
        req.raise_for_status()
Beispiel #12
0
def sanitize_link(link_header):
    """Remove any oauth tokens from the Link header from GitHub.

    see Also rewrite_links.
    """
    header_link_data = parse_header_links(link_header)
    for data in header_link_data:
        data['url'] = remove_oauth(data['url'])
    return format_link_header(header_link_data)
Beispiel #13
0
 def getAllDevices(self, page=1, sort=None, has_group=None):
     r, h = self.client.devices.get_devices(page=page, sort=sort, has_group=has_group,
                                            Authorization="foo").result()
     for i in parse_header_links(h.headers["link"]):
         if i["rel"] == "next":
             page = int(dict(urlparse.parse_qs(urlparse.urlsplit(i["url"]).query))["page"][0])
             return r + self.getAllDevices(page=page, sort=sort)
     else:
         return r
Beispiel #14
0
 def get_devices(self, page=1, status=None, auth=None):
     if auth is None:
         auth=self.uauth
     r, h = self.client.devices.get_devices(page=page, status=status, _request_options={"headers": auth}).result()
     for i in parse_header_links(h.headers["link"]):
         if i["rel"] == "next":
             page = int(dict(urlparse.parse_qs(urlparse.urlsplit(i["url"]).query))["page"][0])
             return r + self.get_devices(page=page, auth=auth)
     else:
         return r
Beispiel #15
0
def send_webmention(source_url,
                    target_url,
                    config=LinkbackConfig(),
                    resp_content=None,
                    resp_headers=None):
    try:
        if resp_content is None:
            resp_content, resp_headers = requests_get_with_max_size(
                target_url, config)
        # WebMention server autodiscovery:
        server_uri = None
        link_header = resp_headers.get('Link')
        if link_header:
            try:
                server_uri = next(
                    lh.get('url') for lh in parse_header_links(link_header)
                    if lh.get('url') and lh.get('rel') in WEBMENTION_POSS_REL)
            except StopIteration:
                pass
        if not server_uri and resp_headers.get('Content-Type',
                                               '').startswith('text/html'):
            # As a falback, we try parsing the HTML, looking for <link> elements
            doc_soup = BeautifulSoup(
                resp_content, BS4_HTML_PARSER
            )  # HTML parsing could be factord out of both methods
            for link in doc_soup.find_all(rel=WEBMENTION_POSS_REL, href=True):
                if link.get('href'):
                    server_uri = link.get('href')
        if not server_uri:
            return False
        LOGGER.debug("WebMention URI detected: %s", server_uri)
        server_uri = urljoin(target_url, server_uri)
        # Performing WebMention request:
        response = requests.post(server_uri,
                                 headers={'User-Agent': config.user_agent},
                                 timeout=config.timeout,
                                 data={
                                     'source': source_url,
                                     'target': target_url
                                 },
                                 verify=config.cert_verify)
        response.raise_for_status()
        LOGGER.info(
            "WebMention notification sent for URL %s, endpoint response: %s",
            target_url, response.text)
        return True
    except (ConnectionError, HTTPError, RequestException, SSLError) as error:
        LOGGER.error("Failed to send WebMention for link url %s: [%s] %s",
                     target_url, error.__class__.__name__, error)
        return False
    except Exception:  # unexpected exception => we display the stacktrace:
        LOGGER.exception("Failed to send WebMention for link url %s",
                         target_url)
        return False
Beispiel #16
0
    async def getter(url):
        async with session.get(url) as response:
            if response.status == codes.ok:
                for person in await response.json():
                    yield person

                links = parse_header_links(response.headers.get('link', ''))
                for link in links:
                    if link['rel'] == 'next':
                        async for person in getter(link['url']):
                            yield person
Beispiel #17
0
    def links_to_dict(header):
        links_dict = {}

        if header:
            header_links = parse_header_links(header)

            for link in header_links:
                key = link.get("rel") or link.get("url")
                links_dict[key] = link

        return links_dict
Beispiel #18
0
    def has_next(self, response=None):
        try:
            link = response.headers['Link']
            rel = parse_header_links(link)[-1][
                'rel']  #last element in list contains next link
        except KeyError:
            return False

        if (response == None) or (rel == 'next'):
            return True
        else:
            return False
Beispiel #19
0
 def get_next_url(self, headers):
     links = headers.get("Link")
     if links is not None:
         self.assertEqual(headers['X-OpenAPI-Pagination'], 'true')
         self.assertEqual(headers['X-OpenAPI-Paginated-Content-Key'],
                          'results')
         for link in parse_header_links(links):
             if link['rel'] == 'next':
                 return link["url"]
     else:
         self.assertEqual(headers['X-OpenAPI-Pagination'], 'false')
         return links
Beispiel #20
0
def fetch_all_starred_repos():
    url = 'user/starred'
    while True:
        current_app.logger.debug('Fetching %s...', url)
        resp = github.get(url)
        yield from resp.data
        link = resp._resp.headers.get('Link', '')
        for i in parse_header_links(link):
            if i['rel'] == 'next':
                url = i['url']
                break
        else:
            return
Beispiel #21
0
 def _get_links(self, response, relation_type):
     """
     Retrieves all Link URIs of relation_type from the response.
     :param requests.Response response: The requests HTTP response.
     :param str relation_type: The relation type to filter by.
     """
     # Can't use response.links directly because it drops multiple links
     # of the same relation type, which is possible in RFC8555 responses.
     if 'Link' not in response.headers:
         return []
     links = parse_header_links(response.headers['Link'])
     return [l['url'] for l in links
             if 'rel' in l and 'url' in l and l['rel'] == relation_type]
Beispiel #22
0
def return_urit_and_urir(link_headers):

    header_links = parse_header_links(link_headers)

    for item in header_links:

        if item['rel'] == 'timemap':
            urit = item['url']

        if 'original' in item['rel']:
            # handles original latest-version and similar
            urir = item['url']

    return (urit, urir)
Beispiel #23
0
    def get_topic_hub_url(link_header):
        """
        Uses the parse_header_links method in requests to parse link
        headers and return the topic and hub urls.
        """

        links = parse_header_links(link_header)
        topic = ""
        hub_url = ""
        for link in links:
            if link.get('rel') == 'self':
                topic = link.get('url')
            elif link.get('rel') == 'hub':
                hub_url = link.get('url')
        return (topic, hub_url)
Beispiel #24
0
    def get_topic_hub_url(link_header):
        """
        Uses the parse_header_links method in requests to parse link
        headers and return the topic and hub urls.
        """

        links = parse_header_links(link_header)
        topic = ""
        hub_url = ""
        for link in links:
            if link.get('rel') == 'self':
                topic = link.get('url')
            elif link.get('rel') == 'hub':
                hub_url = link.get('url')
        return (topic, hub_url)
Beispiel #25
0
def get_header_links(r: Response, rel=None):  # for REST API pagination
    try:
        rels = parse_header_links(r.headers.get("link"))
        if rel is None:
            return rels
        for d in rels:
            currel = d.get("rel", None)
            if currel == rel:
                return d.get("url", None)
        return None
    except Exception:
        if rel is None:
            return []
        else:
            return None
Beispiel #26
0
    async def refresh(self):
        header_links = self.headers.get('link')
        if not header_links:
            raise StopAsyncIteration
        links = request_utils.parse_header_links(header_links)
        next_url = None
        for link in links:
            if link.get('rel') == 'next':
                next_url = link['url']
                break
        else:
            raise StopAsyncIteration

        async with self.session.get(next_url) as response:
            self.headers = response.headers
            self.events = await response.json()
Beispiel #27
0
    def fetch_collection_paging_response(self, codes, replica: str,
                                         per_page: int):
        """
        GET /collections and iterate through the paging responses containing all of a user's collections.

        If fetch_all is not True, this will return as soon as it gets one successful 206 paging reply.
        """
        url = UrlBuilder().set(path="/v1/collections/")
        url.add_query("replica", replica)
        url.add_query("per_page", str(per_page))
        resp_obj = self.assertGetResponse(
            str(url), codes, headers=get_auth_header(authorized=True))

        if codes == requests.codes.bad_request:
            return True

        link_header = resp_obj.response.headers.get('Link')
        paging_response = False

        while link_header:
            # Make sure we're getting the expected response status code
            self.assertEqual(resp_obj.response.status_code,
                             requests.codes.partial)
            paging_response = True
            link = parse_header_links(link_header)[0]
            self.assertEquals(link['rel'], 'next')
            parsed = urlsplit(link['url'])
            url = UrlBuilder().set(path=parsed.path,
                                   query=parse_qsl(parsed.query),
                                   fragment=parsed.fragment)
            self.assertEqual(resp_obj.response.headers['X-OpenAPI-Pagination'],
                             'true')
            self.assertEqual(
                resp_obj.response.headers['X-OpenAPI-Paginated-Content-Key'],
                'collections')
            resp_obj = self.assertGetResponse(
                str(url),
                expected_code=codes,
                headers=get_auth_header(authorized=True))
            link_header = resp_obj.response.headers.get('Link')

        self.assertEqual(resp_obj.response.headers['X-OpenAPI-Pagination'],
                         'false')
        self.assertEqual(resp_obj.response.status_code, requests.codes.ok)
        return paging_response
Beispiel #28
0
 def getAllDevices(self,
                   page=1,
                   sort=None,
                   has_group=None,
                   JWT="foo.bar.baz"):
     if not JWT.startswith("Bearer "):
         JWT = "Bearer " + JWT
     r, h = self.client.Management_API.List_Device_Inventories(
         page=page, sort=sort, has_group=has_group,
         Authorization=JWT).result()
     for i in parse_header_links(h.headers["link"]):
         if i["rel"] == "next":
             page = int(
                 dict(urlparse.parse_qs(urlparse.urlsplit(
                     i["url"]).query))["page"][0])
             return r + self.getAllDevices(page=page, sort=sort)
     else:
         return r
Beispiel #29
0
    def _parse(self, header: str):
        if not header or not isinstance(header, str):
            return

        header = header.strip()

        links = parse_header_links(header)
        for link in links:
            try:
                rel = link['rel']
                url = link['url']
            except KeyError:
                # ignore links not having relationship and url
                continue
            else:
                try:
                    self.__dict__[rel] = url
                except KeyError:
                    # ignore links we do not support
                    continue
Beispiel #30
0
 def get_account_course_count(self, term_id=None):
     """
     does a request to the account courses endpoint using "1" as the
     per_page value. the response will contain a Link header which we can parse
     to get the total course count. Note: HEAD requests work with local
     dev instances of canvas but seem to be blocked by canvas cloud
     :param term_id:
     :return: integer
     """
     params = { 'per_page': 1 }
     if term_id is not None:
         params['enrollment_term_id'] = 'sis_term_id:%s' % term_id
     resp = self.make_request('GET', self.account_courses.path, params=params)
     # parse the pagination urls canvas inserts in the response Link: header
     page_links = parse_header_links(resp.headers['link'])
     # parse the url marked 'last' to get total number of pages, i.e. courses
     last_link = next((x for x in page_links if x['rel'] == 'last'), None)
     url_parts = urlparse(last_link['url'])
     page_params = QueryDict(url_parts.query, encoding='utf-8')
     return int(page_params['page'])
Beispiel #31
0
def _scroll_results(api_client, url):
    """Iterates through pages of results, and returns them all."""
    results = []

    while True:
        rv = check_api_get_responses(api_client, url, status_code=200)

        results.extend(rv.data)

        if "Link" in rv:
            for link in parse_header_links(rv["Link"]):
                if link["rel"] == "next":
                    # Found link to next page of results
                    url = link["url"]
                    break
            else:
                # No link with 'rel=next'
                break
        else:
            # No Link header
            break

    return results
Beispiel #32
0
def test_parse_header_links(value, expected):
    assert parse_header_links(value) == expected
Beispiel #33
0
def merakiRequest(p_apiKey,
                  p_httpVerb,
                  p_endpoint,
                  p_additionalHeaders=None,
                  p_queryItems=None,
                  p_requestBody=None,
                  p_verbose=False,
                  p_retry=0):
    #returns success, errors, responseHeaders, responseBody

    if p_retry > API_MAX_RETRIES:
        if (p_verbose):
            print("ERROR: Reached max retries")
        return False, None, None, None

    bearerString = "Bearer " + p_apiKey
    headers = {"Authorization": bearerString}
    if not p_additionalHeaders is None:
        headers.update(p_additionalHeaders)

    query = ""
    if not p_queryItems is None:
        query = "?" + urlencode(p_queryItems)
    url = API_BASE_URL + p_endpoint + query

    verb = p_httpVerb.upper()

    session = NoRebuildAuthSession()

    try:
        if (p_verbose):
            print(verb, url)
        if verb == "GET":
            r = session.get(url,
                            headers=headers,
                            timeout=(API_CONNECT_TIMEOUT,
                                     API_TRANSMIT_TIMEOUT))
        elif verb == "PUT":
            if not p_requestBody is None:
                if (p_verbose):
                    print("body", p_requestBody)
                r = session.put(url,
                                headers=headers,
                                json=p_requestBody,
                                timeout=(API_CONNECT_TIMEOUT,
                                         API_TRANSMIT_TIMEOUT))
        elif verb == "POST":
            if not p_requestBody is None:
                if (p_verbose):
                    print("body", p_requestBody)
                r = session.post(url,
                                 headers=headers,
                                 json=p_requestBody,
                                 timeout=(API_CONNECT_TIMEOUT,
                                          API_TRANSMIT_TIMEOUT))
        elif verb == "DELETE":
            r = session.delete(url,
                               headers=headers,
                               timeout=(API_CONNECT_TIMEOUT,
                                        API_TRANSMIT_TIMEOUT))
        else:
            return False, None, None, None
    except:
        return False, None, None, None

    if (p_verbose):
        print(r.status_code)

    success = r.status_code in range(200, 299)
    errors = None
    responseHeaders = None
    responseBody = None

    if r.status_code == API_STATUS_RATE_LIMIT:
        if (p_verbose):
            print("Hit max request rate. Retrying %s after %s seconds" %
                  (p_retry + 1, r.headers["Retry-After"]))
        time.sleep(int(r.headers["Retry-After"]))
        success, errors, responseHeaders, responseBody = merakiRequest(
            p_apiKey, p_httpVerb, p_endpoint, p_additionalHeaders,
            p_queryItems, p_requestBody, p_verbose, p_retry + 1)
        return success, errors, responseHeaders, responseBody

    try:
        rjson = r.json()
    except:
        rjson = None

    if not rjson is None:
        if "errors" in rjson:
            errors = rjson["errors"]
            if (p_verbose):
                print(errors)
        else:
            responseBody = rjson

    if "Link" in r.headers:
        parsedLinks = utils.parse_header_links(r.headers["Link"])
        for link in parsedLinks:
            if link["rel"] == "next":
                if (p_verbose):
                    print("Next page:", link["url"])
                splitLink = link["url"].split("/api/v1")
                success, errors, responseHeaders, nextBody = merakiRequest(
                    p_apiKey,
                    p_httpVerb,
                    splitLink[1],
                    p_additionalHeaders=p_additionalHeaders,
                    p_requestBody=p_requestBody,
                    p_verbose=p_verbose)
                if success:
                    if not responseBody is None:
                        responseBody.append(nextBody)
                else:
                    responseBody = None

    return success, errors, responseHeaders, responseBody
Beispiel #34
0
def test_parse_header_links(value, expected):
    assert parse_header_links(value) == expected