def _get_all(self, url): """ Page with the way Github implemented pagination. Returns: A list of event dictionaries. """ # Build up results results = [] # start a counter page_num = 1 previous_url = None next_url = url while True: print("Working on page {}".format(page_num)) # get this page res = requests.get(url=next_url) res.raise_for_status() # add result to results res_list = json.loads(res.text) assert isinstance(res_list, list) # drop results in our list results += res_list # get next page url link_text = res.headers.get('Link', None) if link_text is None: print("Only one page of results found") break # Grab URLs next_url = [ x['url'] for x in parse_header_links(link_text) if x['rel'] == 'next' ][0] last_url = [ x['url'] for x in parse_header_links(link_text) if x['rel'] == 'last' ][0] if next_url == last_url: break page_num += 1 return (results)
def parse_result_ok(self, xml_content: str, headers: Optional[Dict] = None) -> Dict[str, Any]: """Given an xml content as string, returns a deposit dict.""" link_header = headers.get("Link", "") if headers else "" links = parse_header_links(link_header) data = ElementTree.fromstring(xml_content) total_result = data.findtext("swh:count", "0", namespaces=NAMESPACES).strip() keys = [ "id", "reception_date", "complete_date", "external_id", "swhid", "status", "status_detail", "swhid_context", "origin_url", ] entries = data.findall("atom:entry", namespaces=NAMESPACES) deposits_d = [{ key: deposit.findtext(f"swh:{key}", namespaces=NAMESPACES) for key in keys if deposit.find(f"swh:{key}", namespaces=NAMESPACES) is not None } for deposit in entries] return { "count": total_result, "deposits": deposits_d, **{entry["rel"]: entry["url"] for entry in links}, }
def iter_api_v3(endpoint, params=None, page_size=DEFAULT_PAGE_SIZE, wrap=None): """ Helper function to iterate over API results (v3 version of the API) """ effective_params = urlencode(dict(params or {}, page=page_size)) url = f'{endpoint}?{effective_params}' while True: token = APICredentials.get_access_token() headers = {'Authorization': f'Bearer {token}'} resp = requests.get(url, headers=headers) resp.raise_for_status() json_resp = resp.json() for result in json_resp: if wrap: result = wrap(result) yield result # No "Link" to iterate if 'Link' not in resp.headers: break # Parse link and see if there's a "next" element links = parse_header_links(resp.headers['Link']) for link in links: if link.get('rel') == 'next': url = link['url'] break else: break
def get_cursor_param(self) -> str: """Get cursor param from response header links. Example: Finding the next page link 'https://XXX.com/cloudapi/1.0.0/edgeGateways/{gateway-id}}/nat/rules?cursor=abcde would return 'abcde' :return: cursor param :rtype: str """ # noqa: E501 last_response_headers = self.get_last_response_headers() if not last_response_headers: return '' # Find link corresponding to the next page unparsed_links = last_response_headers[ResponseKeys.LINK] parsed_links = requests_utils.parse_header_links(unparsed_links) for link in parsed_links: if link[ResponseKeys.REL] == 'nextPage': # Parse cursor param cursor_url = link[ResponseKeys.URL] parsed_result: parse.ParseResult = parse.urlparse(cursor_url) parsed_query_map = parse.parse_qs(parsed_result.query) # The parse_qs function maps each query key to a list, # so we assume there is at most one cursor param and get that # element if the list is not empty cursor_list = parsed_query_map.get('cursor') if cursor_list: return cursor_list[0] else: return '' return ''
def post(self, request, pk, *args, **kwargs): subscription = get_object_or_404(Subscription, pk=pk) if subscription.secret: signature = request.META.get('HTTP_X_HUB_SIGNATURE', None) if signature is None: logger.debug("Ignoring payload for subscription {0}, missing " "signature".format(subscription.pk)) return HttpResponse('') hasher = hmac.new(subscription.secret.encode('utf-8'), request.body, hashlib.sha1) digest = 'sha1=%s' % hasher.hexdigest() if signature != digest: logger.debug("Mismatching signature for subscription {0}: " "got {1}, expected {2}".format(subscription.pk, signature, digest)) return HttpResponse('') self.links = None if 'HTTP_LINK' in request.META: self.links = parse_header_links(request.META['HTTP_LINK']) updated.send(sender=subscription, notification=request.body, request=request, links=self.links) self.subscription = subscription self.handle_subscription() return HttpResponse('')
def rewrite_links(link_header): """Rewrite Link header Github API endpoints to our own. <https://api.github.com/repositories/17839063/iss...&page=2>; rel="next", <https://api.github.com/repositories/17839063/iss...&page=4>; rel="last" is transformed into </api/issues?per_page=50&page=2>; rel="next", </api/issues?per_page=50&page=4>; rel="last" etc. """ header_link_data = parse_header_links(link_header) for data in header_link_data: uri = data['url'] uri_tuple = urllib.parse.urlsplit(uri) path = uri_tuple.path query = uri_tuple.query if path.startswith('/repositories/'): # remove repositories and takes the second element # of ['17839063', 'issues/398/comments'] path = path.lstrip('/repositories/').split('/', 1)[1] elif path.startswith('/search/issues'): path = 'issues/search' api_path = '{}{}'.format('/api/', path) data['url'] = urllib.parse.urlunsplit(('', '', api_path, query, '')) return format_link_header(header_link_data)
def get_all_devices(self, page=1): r, h = self.client.devices.get_devices(page=page, _request_options=self.uauth).result() for i in parse_header_links(h.headers["link"]): if i["rel"] == "next": page = int(dict(urlparse.parse_qs(urlparse.urlsplit(i["url"]).query))["page"][0]) return r + self.get_all_devices(page=page) else: return r
def get_next_url(headers): links = headers.get("Link") if links is not None: for link in parse_header_links(links): if link['rel'] == 'next': return link["url"] else: return links
def url(self, response=None): try: link = response.headers['Link'] #last element in list contains next link u = parse_header_links(link)[-1]['url'] return u except (KeyError, AttributeError): return self.__url
def test_get_container(baseurl): r = requests.get(baseurl) assert 200 == r.status_code assert "text/turtle" in r.headers['content-type'] links = parse_header_links(r.headers['link']) types = [l['url'] for l in links if (l['rel'] == 'type')] assert "http://www.w3.org/ns/ldp#BasicContainer" in types assert "http://www.w3.org/ns/ldp#Resource" in types
def calc_tag(ctx, tag, ids, reset_tags, **filters): """ Tag calculations. Use the parameters to limit the list to certain subsets of calculations """ # filter out filters not specified params = {k: v for k, v in filters.items() if v is not None} if params and ids: raise click.UsageError("can't specify both specific IDs and filters") if params: click.echo("Fetching calculations...", err=True) params['hide_tags'] = ['dummy-tag-here' ] # make sure that all calculations are found req = ctx.obj['session'].get(ctx.obj['calc_url'], params=params) req.raise_for_status() ids = [c['id'] for c in req.json()] if req.links['last']: while True: try: next_link = [ l['url'] for l in parse_header_links(req.headers['Link']) if l['rel'] == 'next' ][0] except IndexError: break req = ctx.obj['session'].get(next_link, params=params) req.raise_for_status() ids += [c['id'] for c in req.json()] if params and ids: # if selected by params and the search returned something click.confirm( "Are you sure you want to tag {} calculations with '{}'?".format( len(ids), tag), abort=True) for cid in ids: click.echo("Setting tag '{}' for calculation {}".format(tag, cid), err=True) req = ctx.obj['session'].get(ctx.obj['calc_url'] + '/{}'.format(cid)) req.raise_for_status() calc = req.json() metadata = calc['metadata'] if not 'tags' in metadata or reset_tags: metadata['tags'] = [] metadata['tags'].append(tag) req = ctx.obj['session'].patch(calc['_links']['self'], json={'metadata': metadata}) req.raise_for_status()
def sanitize_link(link_header): """Remove any oauth tokens from the Link header from GitHub. see Also rewrite_links. """ header_link_data = parse_header_links(link_header) for data in header_link_data: data['url'] = remove_oauth(data['url']) return format_link_header(header_link_data)
def getAllDevices(self, page=1, sort=None, has_group=None): r, h = self.client.devices.get_devices(page=page, sort=sort, has_group=has_group, Authorization="foo").result() for i in parse_header_links(h.headers["link"]): if i["rel"] == "next": page = int(dict(urlparse.parse_qs(urlparse.urlsplit(i["url"]).query))["page"][0]) return r + self.getAllDevices(page=page, sort=sort) else: return r
def get_devices(self, page=1, status=None, auth=None): if auth is None: auth=self.uauth r, h = self.client.devices.get_devices(page=page, status=status, _request_options={"headers": auth}).result() for i in parse_header_links(h.headers["link"]): if i["rel"] == "next": page = int(dict(urlparse.parse_qs(urlparse.urlsplit(i["url"]).query))["page"][0]) return r + self.get_devices(page=page, auth=auth) else: return r
def send_webmention(source_url, target_url, config=LinkbackConfig(), resp_content=None, resp_headers=None): try: if resp_content is None: resp_content, resp_headers = requests_get_with_max_size( target_url, config) # WebMention server autodiscovery: server_uri = None link_header = resp_headers.get('Link') if link_header: try: server_uri = next( lh.get('url') for lh in parse_header_links(link_header) if lh.get('url') and lh.get('rel') in WEBMENTION_POSS_REL) except StopIteration: pass if not server_uri and resp_headers.get('Content-Type', '').startswith('text/html'): # As a falback, we try parsing the HTML, looking for <link> elements doc_soup = BeautifulSoup( resp_content, BS4_HTML_PARSER ) # HTML parsing could be factord out of both methods for link in doc_soup.find_all(rel=WEBMENTION_POSS_REL, href=True): if link.get('href'): server_uri = link.get('href') if not server_uri: return False LOGGER.debug("WebMention URI detected: %s", server_uri) server_uri = urljoin(target_url, server_uri) # Performing WebMention request: response = requests.post(server_uri, headers={'User-Agent': config.user_agent}, timeout=config.timeout, data={ 'source': source_url, 'target': target_url }, verify=config.cert_verify) response.raise_for_status() LOGGER.info( "WebMention notification sent for URL %s, endpoint response: %s", target_url, response.text) return True except (ConnectionError, HTTPError, RequestException, SSLError) as error: LOGGER.error("Failed to send WebMention for link url %s: [%s] %s", target_url, error.__class__.__name__, error) return False except Exception: # unexpected exception => we display the stacktrace: LOGGER.exception("Failed to send WebMention for link url %s", target_url) return False
async def getter(url): async with session.get(url) as response: if response.status == codes.ok: for person in await response.json(): yield person links = parse_header_links(response.headers.get('link', '')) for link in links: if link['rel'] == 'next': async for person in getter(link['url']): yield person
def links_to_dict(header): links_dict = {} if header: header_links = parse_header_links(header) for link in header_links: key = link.get("rel") or link.get("url") links_dict[key] = link return links_dict
def has_next(self, response=None): try: link = response.headers['Link'] rel = parse_header_links(link)[-1][ 'rel'] #last element in list contains next link except KeyError: return False if (response == None) or (rel == 'next'): return True else: return False
def get_next_url(self, headers): links = headers.get("Link") if links is not None: self.assertEqual(headers['X-OpenAPI-Pagination'], 'true') self.assertEqual(headers['X-OpenAPI-Paginated-Content-Key'], 'results') for link in parse_header_links(links): if link['rel'] == 'next': return link["url"] else: self.assertEqual(headers['X-OpenAPI-Pagination'], 'false') return links
def fetch_all_starred_repos(): url = 'user/starred' while True: current_app.logger.debug('Fetching %s...', url) resp = github.get(url) yield from resp.data link = resp._resp.headers.get('Link', '') for i in parse_header_links(link): if i['rel'] == 'next': url = i['url'] break else: return
def _get_links(self, response, relation_type): """ Retrieves all Link URIs of relation_type from the response. :param requests.Response response: The requests HTTP response. :param str relation_type: The relation type to filter by. """ # Can't use response.links directly because it drops multiple links # of the same relation type, which is possible in RFC8555 responses. if 'Link' not in response.headers: return [] links = parse_header_links(response.headers['Link']) return [l['url'] for l in links if 'rel' in l and 'url' in l and l['rel'] == relation_type]
def return_urit_and_urir(link_headers): header_links = parse_header_links(link_headers) for item in header_links: if item['rel'] == 'timemap': urit = item['url'] if 'original' in item['rel']: # handles original latest-version and similar urir = item['url'] return (urit, urir)
def get_topic_hub_url(link_header): """ Uses the parse_header_links method in requests to parse link headers and return the topic and hub urls. """ links = parse_header_links(link_header) topic = "" hub_url = "" for link in links: if link.get('rel') == 'self': topic = link.get('url') elif link.get('rel') == 'hub': hub_url = link.get('url') return (topic, hub_url)
def get_header_links(r: Response, rel=None): # for REST API pagination try: rels = parse_header_links(r.headers.get("link")) if rel is None: return rels for d in rels: currel = d.get("rel", None) if currel == rel: return d.get("url", None) return None except Exception: if rel is None: return [] else: return None
async def refresh(self): header_links = self.headers.get('link') if not header_links: raise StopAsyncIteration links = request_utils.parse_header_links(header_links) next_url = None for link in links: if link.get('rel') == 'next': next_url = link['url'] break else: raise StopAsyncIteration async with self.session.get(next_url) as response: self.headers = response.headers self.events = await response.json()
def fetch_collection_paging_response(self, codes, replica: str, per_page: int): """ GET /collections and iterate through the paging responses containing all of a user's collections. If fetch_all is not True, this will return as soon as it gets one successful 206 paging reply. """ url = UrlBuilder().set(path="/v1/collections/") url.add_query("replica", replica) url.add_query("per_page", str(per_page)) resp_obj = self.assertGetResponse( str(url), codes, headers=get_auth_header(authorized=True)) if codes == requests.codes.bad_request: return True link_header = resp_obj.response.headers.get('Link') paging_response = False while link_header: # Make sure we're getting the expected response status code self.assertEqual(resp_obj.response.status_code, requests.codes.partial) paging_response = True link = parse_header_links(link_header)[0] self.assertEquals(link['rel'], 'next') parsed = urlsplit(link['url']) url = UrlBuilder().set(path=parsed.path, query=parse_qsl(parsed.query), fragment=parsed.fragment) self.assertEqual(resp_obj.response.headers['X-OpenAPI-Pagination'], 'true') self.assertEqual( resp_obj.response.headers['X-OpenAPI-Paginated-Content-Key'], 'collections') resp_obj = self.assertGetResponse( str(url), expected_code=codes, headers=get_auth_header(authorized=True)) link_header = resp_obj.response.headers.get('Link') self.assertEqual(resp_obj.response.headers['X-OpenAPI-Pagination'], 'false') self.assertEqual(resp_obj.response.status_code, requests.codes.ok) return paging_response
def getAllDevices(self, page=1, sort=None, has_group=None, JWT="foo.bar.baz"): if not JWT.startswith("Bearer "): JWT = "Bearer " + JWT r, h = self.client.Management_API.List_Device_Inventories( page=page, sort=sort, has_group=has_group, Authorization=JWT).result() for i in parse_header_links(h.headers["link"]): if i["rel"] == "next": page = int( dict(urlparse.parse_qs(urlparse.urlsplit( i["url"]).query))["page"][0]) return r + self.getAllDevices(page=page, sort=sort) else: return r
def _parse(self, header: str): if not header or not isinstance(header, str): return header = header.strip() links = parse_header_links(header) for link in links: try: rel = link['rel'] url = link['url'] except KeyError: # ignore links not having relationship and url continue else: try: self.__dict__[rel] = url except KeyError: # ignore links we do not support continue
def get_account_course_count(self, term_id=None): """ does a request to the account courses endpoint using "1" as the per_page value. the response will contain a Link header which we can parse to get the total course count. Note: HEAD requests work with local dev instances of canvas but seem to be blocked by canvas cloud :param term_id: :return: integer """ params = { 'per_page': 1 } if term_id is not None: params['enrollment_term_id'] = 'sis_term_id:%s' % term_id resp = self.make_request('GET', self.account_courses.path, params=params) # parse the pagination urls canvas inserts in the response Link: header page_links = parse_header_links(resp.headers['link']) # parse the url marked 'last' to get total number of pages, i.e. courses last_link = next((x for x in page_links if x['rel'] == 'last'), None) url_parts = urlparse(last_link['url']) page_params = QueryDict(url_parts.query, encoding='utf-8') return int(page_params['page'])
def _scroll_results(api_client, url): """Iterates through pages of results, and returns them all.""" results = [] while True: rv = check_api_get_responses(api_client, url, status_code=200) results.extend(rv.data) if "Link" in rv: for link in parse_header_links(rv["Link"]): if link["rel"] == "next": # Found link to next page of results url = link["url"] break else: # No link with 'rel=next' break else: # No Link header break return results
def test_parse_header_links(value, expected): assert parse_header_links(value) == expected
def merakiRequest(p_apiKey, p_httpVerb, p_endpoint, p_additionalHeaders=None, p_queryItems=None, p_requestBody=None, p_verbose=False, p_retry=0): #returns success, errors, responseHeaders, responseBody if p_retry > API_MAX_RETRIES: if (p_verbose): print("ERROR: Reached max retries") return False, None, None, None bearerString = "Bearer " + p_apiKey headers = {"Authorization": bearerString} if not p_additionalHeaders is None: headers.update(p_additionalHeaders) query = "" if not p_queryItems is None: query = "?" + urlencode(p_queryItems) url = API_BASE_URL + p_endpoint + query verb = p_httpVerb.upper() session = NoRebuildAuthSession() try: if (p_verbose): print(verb, url) if verb == "GET": r = session.get(url, headers=headers, timeout=(API_CONNECT_TIMEOUT, API_TRANSMIT_TIMEOUT)) elif verb == "PUT": if not p_requestBody is None: if (p_verbose): print("body", p_requestBody) r = session.put(url, headers=headers, json=p_requestBody, timeout=(API_CONNECT_TIMEOUT, API_TRANSMIT_TIMEOUT)) elif verb == "POST": if not p_requestBody is None: if (p_verbose): print("body", p_requestBody) r = session.post(url, headers=headers, json=p_requestBody, timeout=(API_CONNECT_TIMEOUT, API_TRANSMIT_TIMEOUT)) elif verb == "DELETE": r = session.delete(url, headers=headers, timeout=(API_CONNECT_TIMEOUT, API_TRANSMIT_TIMEOUT)) else: return False, None, None, None except: return False, None, None, None if (p_verbose): print(r.status_code) success = r.status_code in range(200, 299) errors = None responseHeaders = None responseBody = None if r.status_code == API_STATUS_RATE_LIMIT: if (p_verbose): print("Hit max request rate. Retrying %s after %s seconds" % (p_retry + 1, r.headers["Retry-After"])) time.sleep(int(r.headers["Retry-After"])) success, errors, responseHeaders, responseBody = merakiRequest( p_apiKey, p_httpVerb, p_endpoint, p_additionalHeaders, p_queryItems, p_requestBody, p_verbose, p_retry + 1) return success, errors, responseHeaders, responseBody try: rjson = r.json() except: rjson = None if not rjson is None: if "errors" in rjson: errors = rjson["errors"] if (p_verbose): print(errors) else: responseBody = rjson if "Link" in r.headers: parsedLinks = utils.parse_header_links(r.headers["Link"]) for link in parsedLinks: if link["rel"] == "next": if (p_verbose): print("Next page:", link["url"]) splitLink = link["url"].split("/api/v1") success, errors, responseHeaders, nextBody = merakiRequest( p_apiKey, p_httpVerb, splitLink[1], p_additionalHeaders=p_additionalHeaders, p_requestBody=p_requestBody, p_verbose=p_verbose) if success: if not responseBody is None: responseBody.append(nextBody) else: responseBody = None return success, errors, responseHeaders, responseBody