def test_index_pagination(self): users = mixer.cycle(50).blend(User, last_ip=faker.ipv4()) res = self.client.get('/api/users') self.assertEqual(int(res.headers['X-Total-Count']), 50) self.assertEqual(len(res.json['items']), 20) next_link = parse(res.headers['Link']).links_by_attr_pairs([('rel', 'next')])[0] next_page = self.client.get(next_link.href) self.assertEqual(int(next_page.headers['X-Total-Count']), 50) self.assertEqual(len(next_page.json['items']), 20) self.assertTrue('page=2' in parse(next_page.headers['Link']).links_by_attr_pairs([('rel', 'self')])[0].href)
def parse(cls, identifier: str, reference: DtsReference, resolver: "HttpDtsResolver", response: "Response"): o = cls(identifier=identifier, reference=reference, resolver=resolver, resource=xmlparser(response.text)) links = link_header.parse(response.headers.get("Link", "")) links = { link.rel: parse_qs(urlparse(link.href).query) for link in links.links } if links.get("next"): o._next_id = o._dict_to_ref(links.get("next")) if links.get("prev"): o._prev_id = o._dict_to_ref(links.get("prev")) if links.get("parent"): o._parent = o._dict_to_ref(links.get("up")) if links.get("first"): o._first_id = o._dict_to_ref(links.get("first")) if links.get("parent"): o._last_id = o._dict_to_ref(links.get("last")) if links.get("collection"): o._collection = o._dict_to_ref(links.get("collection")) return o
def term_move_to_root_test(api, client, sample_taxonomy): resp = client.post('/api/2.0/taxonomies/test/a/aa', headers={ 'Destination': '/', 'Content-Type': 'application/vnd.move' }) assert resp.status_code == 200 exp = { 'links': { 'self': 'http://localhost/api/2.0/taxonomies/test/aa', }, 'title': 'AA' } assert json.loads(resp.data) == exp taxonomies = client.get('/api/2.0/taxonomies/test/aa') assert json.loads(taxonomies.data) == exp resp = client.get('/api/2.0/taxonomies/test/a/aa') assert resp.status_code == 301 assert resp.headers[ 'Location'] == 'http://localhost/api/2.0/taxonomies/test/aa' links = parse(resp.headers['Link']) assert links2dict(links) == { 'self': 'http://localhost/api/2.0/taxonomies/test/a/aa', 'obsoleted_by': 'http://localhost/api/2.0/taxonomies/test/aa' } assert resp.json == { 'links': { 'obsoleted_by': 'http://localhost/api/2.0/taxonomies/test/aa', 'self': 'http://localhost/api/2.0/taxonomies/test/a/aa' }, 'status': 'moved' }
def parse(self, response): """Parse a APS record into a HEP record. Attempts to parse an XML JATS full text first, if available, and falls back to parsing JSON if such is not available. """ aps_response = json.loads(response.body_as_unicode()) for article in aps_response['data']: doi = get_value(article, 'identifiers.doi', default='') if doi: request = Request(url='{}/{}'.format(self.aps_base_url, doi), headers={'Accept': 'text/xml'}, callback=self._parse_jats, errback=self._parse_json_on_failure) request.meta['json_article'] = article request.meta['original_response'] = response yield request # Pagination support. Will yield until no more "next" pages are found if 'Link' in response.headers: links = link_header.parse(response.headers['Link']) next = links.links_by_attr_pairs([('rel', 'next')]) if next: next_url = next[0].href yield Request(next_url)
def next(self): # if cache is empty, we grab more if len(self.cache) == 0: # in last iteration we found out that we reach the end if self.next_is_stop: raise StopIteration response = self.entity.get(params=self.params) js = response.json() # if given key is not present in json, something went wrong and for now we just quit if "errors" in js: first_error = js["errors"][0] message = "{}: {}".format(first_error["code"], first_error["message"]) if "detail" in first_error and "name" in first_error["detail"]: message += " ({})".format(first_error["detail"]["name"]) raise self.entity.not_found_error(message) if js[self.entity.response_key] is None: raise StopIteration self.cache = collections.deque(js[self.entity.response_key]) # if there is no more link, this is our last iteration # otherwise we parse the url for next iteration if "Link" not in response.headers: self.next_is_stop = True else: parsed_link = link_header.parse(response.headers["Link"]).links assert len(parsed_link) == 1 self.entity.relative_url = parsed_link.pop().href return self.cache.popleft()
def parse(self, response): """Parse a APS JSON file into a HEP record.""" aps_response = json.loads(response.body_as_unicode()) for article in aps_response['data']: record = HEPLoader(item=HEPRecord(), response=response) record.add_value('dois', get_nested(article, 'identifiers', 'doi')) record.add_value('page_nr', str(article.get('numPages', ''))) record.add_value('abstract', get_nested(article, 'abstract', 'value')) record.add_value('title', get_nested(article, 'title', 'value')) # record.add_value('subtitle', '') authors, collaborations = self._get_authors_and_collab(article) record.add_value('authors', authors) record.add_value('collaborations', collaborations) # record.add_value('free_keywords', free_keywords) # record.add_value('classification_numbers', classification_numbers) record.add_value('journal_title', get_nested(article, 'journal', 'abbreviatedName')) record.add_value('journal_issue', get_nested(article, 'issue', 'number')) record.add_value('journal_volume', get_nested(article, 'volume', 'number')) # record.add_value('journal_artid', ) published_date = article.get('date', '') record.add_value('journal_year', int(published_date[:4])) record.add_value('date_published', published_date) record.add_value('field_categories', [ { 'term': term.get('label'), 'scheme': 'APS', 'source': '', } for term in get_nested( article, 'classificationSchemes', 'subjectAreas' ) ]) record.add_value('copyright_holder', get_nested(article, 'rights', 'copyrightHolders')[0]['name']) record.add_value('copyright_year', str(get_nested(article, 'rights', 'copyrightYear'))) record.add_value('copyright_statement', get_nested(article, 'rights', 'rightsStatement')) record.add_value('copyright_material', 'Article') license = get_license( license_url=get_nested(article, 'rights', 'licenses')[0]['url'] ) record.add_value('license', license) record.add_value('collections', ['HEP', 'Citeable', 'Published']) yield record.load_item() # Pagination support. Will yield until no more "next" pages are found if 'Link' in response.headers: links = link_header.parse(response.headers['Link']) next = links.links_by_attr_pairs([('rel', 'next')]) if next: next_url = next[0].href yield Request(next_url)
async def _discovery_directory_uri(self, host, blacklist=set()): lookup_uri = urljoin(host, '/.well-known/core?rt=core.rd') try: # FIXME: this should be able to deal with multicasts response = await self._context.request( Message(code=GET, uri=lookup_uri, accept=40) ).response_raising links = link_header.parse(response.payload.decode('utf8')) except (UnicodeDecodeError, link_header.ParseException) as e: self.log.error("Error parsing the RD's self description") raise addresses = [l.get_target(response.get_request_uri()) for l in links.links if 'core.rd' in " ".join(l.rt).split(" ")] unfiltered_addresses = len(addresses) addresses = [a for a in addresses if a not in blacklist] if not addresses: if len(addresses) != unfiltered_addresses: raise self._UnrecoverableError("All discovered Directory Resources are blacklisted") else: raise self._UnrecoverableError("No registration interface found in RD's response") if len(addresses) > 1: self.log.warn("More than one registration interface found," \ " picking the first") return addresses[0]
async def _discovery_directory_uri(self, host, blacklist=set()): lookup_uri = urljoin(host, '/.well-known/core?rt=core.rd') try: # FIXME: this should be able to deal with multicasts response = await self._context.request( Message(code=GET, uri=lookup_uri, accept=40)).response_raising links = link_header.parse(response.payload.decode('utf8')) except (UnicodeDecodeError, link_header.ParseException) as e: self.log.error("Error parsing the RD's self description") raise addresses = [ l.get_target(response.get_request_uri()) for l in links.links if 'core.rd' in " ".join(l.rt).split(" ") ] unfiltered_addresses = len(addresses) addresses = [a for a in addresses if a not in blacklist] if not addresses: if len(addresses) != unfiltered_addresses: raise self._UnrecoverableError( "All discovered Directory Resources are blacklisted") else: raise self._UnrecoverableError( "No registration interface found in RD's response") if len(addresses) > 1: self.log.warn("More than one registration interface found," \ " picking the first") return addresses[0]
async def test_discovery(self): request = aiocoap.Message(code=aiocoap.GET, uri='coap://%s/.well-known/core?rt=core.rd*'%self.rd_netloc) response = await self.client.request(request).response self.assertEqual(response.code, aiocoap.CONTENT, "RD discovery did not give content") links = link_header.parse(response.payload.decode('utf8')) for rt in ('core.rd', 'core.rd-lookup-ep', 'core.rd-lookup-res', 'core.rd-lookup-gp', 'core.rd-group'): self.assertEqual(len([x for x in links.links if x.rt == [rt]]), 1, "Not exactly one entry of rt=%s found"%rt)
def test_servicelist_to_corelf(): '''Service.to_corelf should give a Link object''' slist = [services.Service(**case['service']) for case in EXAMPLE_SERVICES.values()] links = link_header.parse(str(services.servicelist_to_corelf(slist, '/uri/base'))) uris = {('uri', 'base') + (srv.name, ) for srv in slist} for link in links.links: assert tuple(link.href.strip('/').split('/')) in uris uris.remove(tuple(link.href.strip('/').split('/'))) assert len(uris) == 0
def link_format_from_message(message): try: if message.opt.content_format == aiocoap.numbers.media_types_rev['application/link-format']: return link_header.parse(message.payload.decode('utf8')) # FIXME this should support json/cbor too else: raise error.UnsupportedMediaType() except (UnicodeDecodeError, link_header.ParseException): raise error.BadRequest()
async def test_discovery(self): request = aiocoap.Message(code=aiocoap.GET, uri='coap://%s/.well-known/core?rt=core.rd*'%self.rd_netloc) response = await self.client.request(request).response self.assertEqual(response.code, aiocoap.CONTENT, "RD discovery did not give content") links = link_header.parse(response.payload.decode('utf8')) # Not checking for presence of group resources: not implemented here for rt in ('core.rd', 'core.rd-lookup-ep', 'core.rd-lookup-res'): self.assertEqual(len([x for x in links.links if x.rt == [rt]]), 1, "Not exactly one entry of rt=%s found"%rt)
def parse(self, response): """Parse a APS JSON file into a HEP record.""" aps_response = json.loads(response.body_as_unicode()) for article in aps_response['data']: record = HEPLoader(item=HEPRecord(), response=response) record.add_value('dois', get_nested(article, 'identifiers', 'doi')) record.add_value('page_nr', str(article.get('numPages', ''))) record.add_value('abstract', get_nested(article, 'abstract', 'value')) record.add_value('title', get_nested(article, 'title', 'value')) # record.add_value('subtitle', '') authors, collaborations = self._get_authors_and_collab(article) record.add_value('authors', authors) record.add_value('collaborations', collaborations) # record.add_value('free_keywords', free_keywords) # record.add_value('classification_numbers', classification_numbers) record.add_value('journal_title', get_nested(article, 'journal', 'abbreviatedName')) record.add_value('journal_issue', get_nested(article, 'issue', 'number')) record.add_value('journal_volume', get_nested(article, 'volume', 'number')) # record.add_value('journal_artid', ) published_date = article.get('date', '') record.add_value('journal_year', int(published_date[:4])) record.add_value('date_published', published_date) record.add_value( 'copyright_holder', get_nested(article, 'rights', 'copyrightHolders')[0]['name']) record.add_value( 'copyright_year', str(get_nested(article, 'rights', 'copyrightYear'))) record.add_value('copyright_statement', get_nested(article, 'rights', 'rightsStatement')) record.add_value('copyright_material', 'Article') license = get_license(license_url=get_nested( article, 'rights', 'licenses')[0]['url']) record.add_value('license', license) record.add_value('collections', ['HEP', 'Citeable', 'Published']) yield record.load_item() # Pagination support. Will yield until no more "next" pages are found if 'Link' in response.headers: links = link_header.parse(response.headers['Link']) next = links.links_by_attr_pairs([('rel', 'next')]) if next: next_url = next[0].href yield Request(next_url)
def test_coap_wkc(coap_server_setup): #pylint: disable=redefined-outer-name """Test that the CoAP server provides the expected resource links in /.well-known/core""" req = aiocoap.Message(code=Code.GET) req.opt.uri_path = URI_PATH_WKC req.opt.accept = aiocoap.numbers.media_types_rev['application/link-format'] res = yield from coap_server_setup.coap_server.site.render(req) assert isinstance(res, aiocoap.Message) links = link_header.parse(res.payload.decode('utf-8')) for resource in EXPECTED_ROOT_RESOURCES: assert resource in [tuple(link[0][1:].split('/')) for link in links.to_py()]
def test_bookmarks_return_link_last(self): for _ in range(25): self.add_bookmark() res = self.app.get('/api/v1/bookmarks', headers=[self.user1]) self.assertTrue('Link' in res.headers) links = lh.parse(res.headers['Link']).to_py() self.assertIn([ parse_url('http://localhost/api/v1/bookmarks?page=2'), [['rel', 'last']] ], parse_link_in_header(links))
def test_coap_wkc(coap_server_setup): #pylint: disable=redefined-outer-name """Test that the CoAP server provides the expected resource links in /.well-known/core""" req = aiocoap.Message(code=Code.GET) req.opt.uri_path = URI_PATH_WKC req.opt.accept = aiocoap.numbers.media_types_rev['application/link-format'] res = yield from coap_server_setup.coap_server.site.render(req) assert isinstance(res, aiocoap.Message) links = link_header.parse(res.payload.decode('utf-8')) for resource in EXPECTED_ROOT_RESOURCES: assert resource in [ tuple(link[0][1:].split('/')) for link in links.to_py() ]
def test_servicelist_to_corelf(): '''Service.to_corelf should give a Link object''' slist = [ services.Service(**case['service']) for case in EXAMPLE_SERVICES.values() ] links = link_header.parse( str(services.servicelist_to_corelf(slist, '/uri/base'))) uris = {('uri', 'base') + (srv.name, ) for srv in slist} for link in links.links: assert tuple(link.href.strip('/').split('/')) in uris uris.remove(tuple(link.href.strip('/').split('/'))) assert len(uris) == 0
async def _get_endpoint(self, rt): """Return the URI for a given rt in the configured RD""" if not hasattr(self, '_endpoints'): request = aiocoap.Message(code=aiocoap.GET, uri='coap://%s/.well-known/core?rt=core.rd*'%self.rd_netloc) response = await self.client.request(request).response self._endpoints = {entry.rt[0]: entry.get_target(response.get_request_uri()) for entry in link_header.parse(response.payload.decode('utf8')).links } return self._endpoints[rt]
def _get_endpoint(self, rt): """Return the URI for a given rt in the configured RD""" if not hasattr(self, '_endpoints'): request = aiocoap.Message(code=aiocoap.GET, uri='coap://%s/.well-known/core?rt=core.rd*'%self.rd_netloc) response = yield from self.client.request(request).response self._endpoints = {entry.rt[0]: entry.get_target(response.get_request_uri()) for entry in link_header.parse(response.payload.decode('utf8')).links } return self._endpoints[rt]
def _recursive_gh_get(href, items): """Recursively get list of GitHub objects. See https://developer.github.com/v3/guides/traversing-with-pagination/ """ response = _request('GET', href) response.raise_for_status() items.extend(response.json()) if "link" not in response.headers: return links = link_header.parse(response.headers["link"]) rels = {link.rel: link.href for link in links.links} if "next" in rels: _recursive_gh_get(rels["next"], items)
def test_coap_service_list_corelf(coap_server_filled): #pylint: disable=redefined-outer-name """Test CoAP service query""" coap_server = coap_server_filled.coap_server mydir = coap_server_filled.directory_spy.real numitems = len(mydir.service_list()) content_format = aiocoap.numbers.media_types_rev['application/link-format'] req = aiocoap.Message(code=Code.GET, payload=''.encode('utf-8')) req.opt.accept = content_format req.opt.uri_path = URI_PATH_SERVICE res = yield from coap_server.site.render(req) assert isinstance(res, aiocoap.Message) assert res.code in (Code.CONTENT, ) assert res.opt.content_format == content_format slist = link_header.parse(res.payload.decode('utf-8')) sdict = {tuple(link.href.strip('/').split('/')): link for link in slist.links} assert len(slist.links) == numitems for service in mydir.service_list(): assert URI_PATH_SERVICE + (service.name, ) in sdict
def add_link_list(self, rawdata, identifier): self.remove_link_list(identifier) source_txt = '[ref=node]' + identifier + '[/ref]' source_node = self.tree_view.add_node(NodeLabel(text=source_txt, controller=self, identifier=identifier)) source_node.bind(on_ref_press=source_node.open_node_label_popup) self.discoveries[identifier] = (source_node, rawdata) link_list = link_header.parse(rawdata) for link in link_list.links: link_txt = '[ref=world]' + link.href + '[/ref]' link_node = self.tree_view.add_node(NodeLabel(text=link_txt, controller=self), source_node) link_node.link = link link_node.bind(on_ref_press=self.choose_link) for attribute in link.attr_pairs: if attribute[1] is not None: attr_txt = "[color=00ffff][b]" + attribute[0] + " = [/b][/color][color=0066ff]" + attribute[1] + "[/color]" else: attr_txt = "[color=00ffff][b]" + attribute[0] + "[/b][/color]" self.tree_view.add_node(NodeLabel(text=attr_txt, controller=self), link_node)
def test_create_taxonomy(app, client, tax_url): print(client.get(tax_url).data) assert client.get(tax_url).json == [] resp = client.put(tax_url + 'test', data='{}', content_type='application/json') assert resp.status_code == 201 created_link = parse(resp.headers['Link']).links_by_attr_pairs([ ('rel', 'self') ])[0].href assert client.get(tax_url).json == [{ 'code': 'test', 'links': { 'self': 'http://127.0.0.1:5000/2.0/taxonomies/test/' } }]
async def get_absolute_url(self, url, is_paginated=False) -> tuple: with timeout(self._timeout): params = {} if is_paginated: params['per_page'] = self._items_per_page async with self._client.get(url, params=params) as response: if response.status >= 400: raise HttpException(response.status, url) self._last_limits = { 'limit': response.headers.get('X-RateLimit-Limit'), 'remaining': response.headers.get('X-RateLimit-Limit') } link_header_value = response.headers.get('link') links_dict = {} if link_header_value: links = link_header.parse(link_header_value) links_dict = {link.rel: link.href for link in links.links} return await response.json(), self._last_limits, links_dict
def parse_pagination(headers): """ Parses headers to create a pagination objects :param headers: HTTP Headers :type headers: dict :return: Navigation object for pagination :rtype: _Navigation """ links = { link.rel: parse_qs(link.href).get("page", None) for link in link_header.parse(headers.get("Link", "")).links } return _Navigation( links.get("previous", [None])[0], links.get("next", [None])[0], links.get("last", [None])[0], links.get("current", [None])[0], links.get("first", [None])[0])
async def get_absolute_url(self, url, is_paginated=False) -> tuple: with timeout(self._timeout): params = {} if is_paginated: params['per_page'] = self._items_per_page async with self._client.get( url, params=params) as response: if response.status >= 400: raise HttpException(response.status, url) self._last_limits = { 'limit': response.headers.get('X-RateLimit-Limit'), 'remaining': response.headers.get('X-RateLimit-Limit') } link_header_value = response.headers.get('link') links_dict = {} if link_header_value: links = link_header.parse(link_header_value) links_dict = {link.rel: link.href for link in links.links} return await response.json(), self._last_limits, links_dict
def download_labels(user, project): assert user != None assert project != None labels = [] url = 'https://api.github.com/repos/{}/{}/labels'.format(user, project) try: while url: page = giturlopen(url) labels += json.loads(page.read().decode()) headers = dict(page.getheaders()) url = None if 'Link' in headers: for link in link_header.parse(headers['Link']).links: if 'rel' in link and link.rel == 'next': url = link.href sleep(max(0, gauss(1, 1 / 3))) break except Exception as e: stderr.write('E: cannot load {} because of\n {}\n'.format(url, e)) logging.error(e) return labels
def test_get_items_last_page(): per_page = 3 response = client.get("/drivers?page=7&per_page={}".format(per_page)) links = link_header.parse(response.headers["Link"]) body = response.json() assert response.status_code == HTTPStatus.OK assert len(body) == 2 assert_link_match(links, "first", base_path, { "page": ["1"], "per_page": [str(per_page)] }) assert_link_match(links, "prev", base_path, { "page": ["6"], "per_page": [str(per_page)] }) assert_link_match(links, "last", base_path, { "page": ["7"], "per_page": [str(per_page)] }) assert_no_link(links, "next")
def test_coap_service_list_corelf(coap_server_filled): #pylint: disable=redefined-outer-name """Test CoAP service query""" coap_server = coap_server_filled.coap_server mydir = coap_server_filled.directory_spy.real numitems = len(mydir.service_list()) content_format = aiocoap.numbers.media_types_rev['application/link-format'] req = aiocoap.Message(code=Code.GET, payload=''.encode('utf-8')) req.opt.accept = content_format req.opt.uri_path = URI_PATH_SERVICE res = yield from coap_server.site.render(req) assert isinstance(res, aiocoap.Message) assert res.code in (Code.CONTENT, ) assert res.opt.content_format == content_format slist = link_header.parse(res.payload.decode('utf-8')) sdict = { tuple(link.href.strip('/').split('/')): link for link in slist.links } assert len(slist.links) == numitems for service in mydir.service_list(): assert URI_PATH_SERVICE + (service.name, ) in sdict
def download_issues(user, project): assert user != None assert project != None issues = [] url = 'https://api.github.com/repos/{}/{}/issues?state=all&since=2013-01-01T00:01:00Z'.format( user, project) try: while url: print("Url : ", url) # logging.info(url) page = giturlopen(url) issues += json.loads(page.read().decode()) headers = dict(page.getheaders()) url = None if 'Link' in headers: for link in link_header.parse(headers['Link']).links: if 'rel' in link and link.rel == 'next': url = link.href sleep(max(0, gauss(1, 1 / 3))) break except Exception as e: stderr.write('E: cannot load {} because of\n {}\n'.format(url, e)) logging.error(e) return issues
def _obtain_registration_address(self): lookup_uri = urljoin(self.rd, '/.well-known/core?rt=core.rd') try: response = yield from self.context.request( Message(code=GET, uri=lookup_uri, accept=40)).response_raising links = link_header.parse(response.payload.decode('utf8')) except (UnicodeDecodeError, link_header.ParseException) as e: self.log.error("Error parsing the RD's self description") raise addresses = [ l.get_target(lookup_uri) for l in links.links if 'core.rd' in " ".join(l.rt).split(" ") ] if not addresses: self.log.error("No registration interface found in RD's response") raise _SilentError() if len(addresses) > 1: self.log.warn("More than one registration interface found," \ " picking the first") return addresses[0]
def parse(linkformat): data = link_header.parse(linkformat) data.__class__ = LinkFormat for l in data.links: l.__class__ = Link return data
def process_link_format(self, button): link_list = link_header.parse(self.response.payload) self.controller.screen_manager.get_screen('nodes').add_link_list( link_list, self.request_uri) self.controller.popup.dismiss()
def parse(self, response): """Parse a APS JSON file into a HEP record.""" aps_response = json.loads(response.body_as_unicode()) for article in aps_response['data']: record = HEPLoader(item=HEPRecord(), response=response) dois = get_nested(article, 'identifiers', 'doi') record.add_value('dois', dois) journal_doctype = self.article_type_mapping.get( article.get('articleType'), 'other') if journal_doctype == 'other': logger.warning( 'Journal_doctype is %s for article %s. Do we need other mapping for this?' % (journal_doctype, dois)) record.add_value('journal_doctype', journal_doctype) page_nr = article.get('numPages') if page_nr is not None: record.add_value('page_nr', page_nr) arxiv = get_nested(article, 'identifiers', 'arxiv').replace('arXiv:', '') if not arxiv: logger.warning('No arxiv eprints found for article %s.' % dois) else: record.add_value('arxiv_eprints', {'value': arxiv}) record.add_value('abstract', get_nested(article, 'abstract', 'value')) record.add_value('title', get_nested(article, 'title', 'value')) authors, collaborations = self._get_authors_and_collab( article, dois) record.add_value('authors', authors) record.add_value('collaborations', collaborations) record.add_value('journal_title', get_nested(article, 'journal', 'name')) record.add_value('journal_issue', get_nested(article, 'issue', 'number')) record.add_value('journal_volume', get_nested(article, 'volume', 'number')) published_date = article['date'] record.add_value('journal_year', int(published_date[:4])) record.add_value('date_published', published_date) record.add_value('field_categories', [{ 'term': term.get('label'), 'scheme': 'APS', 'source': '', } for term in get_nested(article, 'classificationSchemes', 'subjectAreas')]) copyright_holders = get_nested(article, 'rights', 'copyrightHolders') if copyright_holders: record.add_value('copyright_holder', copyright_holders[0]['name']) record.add_value( 'copyright_year', str(get_nested(article, 'rights', 'copyrightYear'))) record.add_value('copyright_statement', get_nested(article, 'rights', 'rightsStatement')) license = get_license(license_url=get_nested( article, 'rights', 'licenses')[0]['url']) record.add_value('license', license) record.add_value('collections', ['HEP', 'Citeable', 'Published']) yield record.load_item() # Pagination support. Will yield until no more "next" pages are found if 'Link' in response.headers: links = link_header.parse(response.headers['Link']) next = links.links_by_attr_pairs([('rel', 'next')]) if next: next_url = next[0].href yield Request(next_url)