def parse_list(self, response): urls = response.json()['packagesPerMonth'] for url in urls: # URL looks like https://www.zppa.org.zm/ocds/services/recordpackage/getrecordpackage/2016/7 yield self.build_request(url, formatter=join(components(-2), extension='zip'))
def parse_list(self, response): urls = response.json()['packagesPerMonth'] netloc = urlsplit(response.request.url).netloc for url in urls: # URL looks like http://malta-demo-server.eurodyn.com/ocds/services/recordpackage/getrecordpackage/2020/1 yield self.build_request( urlsplit(url)._replace(netloc=netloc).geturl(), formatter=join(components(-2), extension='zip'))
def parse_data(self, response): pattern = 'https://gpp.ppda.go.ug/adminapi/public/api/open-data/v1/releases/{}?fy={}&pde={}' data = response.json() for pdes in data['data']['data']: for plans in pdes['procurement_plans']: for tag in ('planning', 'tender', 'award', 'contract'): yield self.build_request( pattern.format(tag, plans['financial_year'], plans['pde_id']), formatter=join(components(-1), parameters('fy', 'pde')) )
def parse_list(self, response): data = response.json() # The last page returns an empty JSON object. if not data: return for item in data['data']: url = replace_parameters(response.request.url, offset=None) + item['ocid'] yield self.build_request(url, formatter=components(-2)) url = replace_parameters(response.request.url, offset=data['offset']) yield self.build_request(url, formatter=join(components(-1), parameters('offset')), callback=self.parse_list)
def parse_list(self, response): data = json.loads(response.text) for item in data['data']: url = item['uri'] if url: yield self.build_request(url, formatter=components(-1)) if self.sample: break else: next_page_url = data.get('next_page_url') if next_page_url: yield self.build_request(next_page_url, formatter=join(components(-1), parameters('page')), callback=self.parse_list)
def parse_list(self, response): data = response.json() for item in data['data']: url = item['uri'] if url: yield self.build_request(url, self.get_formatter()) else: next_page_url = data.get('next_page_url') if next_page_url: yield self.build_request(next_page_url, formatter=join( self.get_formatter(), parameters('page')), callback=self.parse_list)
def test_join(url, extension, expected): assert join(components(-1), parameters('page'), extension=extension)(url) == expected