def main(): usage = 'Usage: %prog [ --all --cont ]' parser = optparse.OptionParser(usage=usage) parser.add_option('-a', '--all', action='store_true', default=False, help='Fetch all records, rather than a small extract') (options, args) = parser.parse_args() BASE = 'https://buyandsell.gc.ca' folder = os.path.dirname(os.path.realpath(__file__)) if options.all: folder += '/all' # No new data is being published, so just get the four # years that exist. urls = [] for i in range(13, 17): url = '%s/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-%s-%s.json' \ % (BASE, i, i + 1) urls.append(url) else: folder += '/sample' urls = ['%s/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-15-16.json' % BASE] for url in urls: print("Fetching releases for %s" % url) data = common.getUrlAndRetry(url, folder) common.writeReleases(data['releases'], folder, data, url)
def main(): usage = 'Usage: %prog [ --all --cont ]' parser = optparse.OptionParser(usage=usage) parser.add_option('-a', '--all', action='store_true', default=False, help='Fetch all records, rather than a small extract') (options, args) = parser.parse_args() url = 'http://datos.gob.mx/busca/api/3/action/' url += 'package_search?q=organization:inai&rows=500' folder = os.path.dirname(os.path.realpath(__file__)) release_packages = getReleasePackages(url) if options.all: folder += '/all' else: folder += '/sample' for url in release_packages: print('fetching %s' % url) data = common.getUrlAndRetry(url, folder) if not options.all: data = data[:10] if type(data) == list: for d in data: common.writeReleases(d['releases'], folder, d, url) else: common.writeReleases(data['releases'], folder, data, url)
def main(): usage = 'Usage: %prog [ --all --cont ]' parser = optparse.OptionParser(usage=usage) parser.add_option('-a', '--all', action='store_true', default=False, help='Fetch all records, rather than a small extract') (options, args) = parser.parse_args() BASE = 'http://moldova-ocds.yipl.com.np' folder = os.path.dirname(os.path.realpath(__file__)) if options.all: folder += '/all' url = '%s/multiple-file-api/releases.json' % BASE year_urls = fetchYears(url) for url in year_urls: print('fetching %s' % url) data = common.getUrlAndRetry(url, folder) common.writeReleases(data['releases'], folder, data, url) else: folder += '/all' url = '%s/ocds-api/year/2017' % BASE print('fetching %s' % url) data = common.getUrlAndRetry(url, folder) common.writeReleases(data['releases'], folder, data, url)
def main(): usage = 'Usage: %prog [ --all --cont ]' parser = optparse.OptionParser(usage=usage) parser.add_option('-a', '--all', action='store_true', default=False, help='Fetch all records, rather than a small extract') parser.add_option('-p', '--page', type="int", default=1, help='Fetch records from the given page') parser.add_option('-R', '--resume', action='store_true', default=False, help='Continue from the last page (in page.n)') parser.add_option('-b', '--bigquery', action='store_true', default=False, help='Fetch records in bigquery format') (options, args) = parser.parse_args() if options.resume: with open("page.n", 'r') as n: page = int(n.read()) else: page = options.page url = 'https://api.colombiacompra.gov.co/releases/?page=' + str(page) folder = os.path.dirname(os.path.realpath(__file__)) if options.all: folder += '/all' packages = 1 next_url = url while next_url: print('fetching %s' % next_url) data = common.getUrlAndRetry(next_url, folder) if data is None: current_page = int(next_url.split('page=')[1]) next_url = next_url.replace('page=' + str(current_page), 'page=' + str(current_page + 1)) continue if options.bigquery: common.writeReleases(data['releases'], folder, data, next_url) else: common.writeFile('%s.json' % str(packages), folder, data, next_url) next_url = data['links']['next'] with open("page.n", 'w') as n: n.write(str(packages)) packages = packages + 1 else: folder += '/sample' print('fetching %s' % url) data = common.getUrlAndRetry(url, folder) common.writeReleases(data['releases'], folder, data, url)
def fetchReleases(data, folder, url): print('Fetching %s' % url) for r in data['releases']: # These releases are lacking IDs - set the ID to the OCID # (which is unique). r['id'] = r['ocid'] common.writeReleases(data['releases'], folder, data, url)
def main(): usage = 'Usage: %prog [ --all --cont ]' parser = optparse.OptionParser(usage=usage) parser.add_option('-a', '--all', action='store_true', default=False, help='Fetch all records, rather than a small extract') (options, args) = parser.parse_args() url = 'https://tenders.nsw.gov.au' url += '/?event=public.api.%s.search&ResultsPerPage=1000' folder = os.path.dirname(os.path.realpath(__file__)) if options.all: folder += '/all' release_types = ['planning', 'tender', 'contract'] for r in release_types: next_url = url % r while next_url: print('fetching', next_url) data = common.getUrlAndRetry(next_url, folder) common.writeReleases(data['releases'], folder, data, next_url) if 'next' in data['links']: next_url = data['links']['next'] else: next_url = None else: folder += '/sample' next_url = url % 'planning' print('fetching', next_url) data = common.getUrlAndRetry(next_url, folder) common.writeReleases(data['releases'], folder, data, next_url)
def fetchRecord(record_id, folder, get_releases, page=0): ''' Given a record ID, construct the package URL and save locally. ''' url = 'https://www.contrataciones.gov.py:443/' url += 'datos/api/v2/doc/ocds/record-package/%s' % record_id print("Fetching record %s ID: %s > %s" % (page, record_id, url)) data = common.getUrlAndRetry(url, folder) if data: try: common.writeReleases( [data['records'][0]['compiledRelease']], folder, data, url, 'records') if get_releases and 'packages' in data: releases = data['packages'] for url in releases: # Rewrite the release URL - they are published # in an incorrect format. release_url = url\ .replace('/datos/id/', '/datos/api/v2/doc/ocds/')\ .replace('.json', '') print('fetching %s' % release_url) release_data = common.getUrlAndRetry(release_url, folder) if release_data and 'releases' in release_data: common.writeReleases( release_data['releases'], folder, release_data, release_url) except KeyError: err = 'No compiledReleases, skipping this one: %s \n' % url print(err) with open('%s/errors.txt' % folder, 'a') as errors: errors.write(err)
def main(): usage = 'Usage: %prog [ --all --cont ]' parser = optparse.OptionParser(usage=usage) parser.add_option('-a', '--all', action='store_true', default=False, help='Fetch all records, rather than a small extract') (options, args) = parser.parse_args() url = 'https://ville.montreal.qc.ca/vuesurlescontrats/api/releases.json' folder = os.path.dirname(os.path.realpath(__file__)) more_releases = True if options.all: folder += '/all' offset = 0 while more_releases: next_url = url + '?limit=10000&offset=%s' % offset print('fetching %s' % next_url) data = common.getUrlAndRetry(next_url, folder) more_releases = len( common.writeReleases(data['releases'], folder, data, next_url)) offset += 10000 else: folder += '/sample' url += '?limit=10' print('fetching %s' % url) data = common.getUrlAndRetry(url, folder) common.writeReleases(data['releases'], folder, data, url)
def main(): usage = 'Usage: %prog [ --all --cont ]' parser = optparse.OptionParser(usage=usage) parser.add_option('-a', '--all', action='store_true', default=False, help='Fetch all records, rather than a small extract') parser.add_option('-R', '--resume', action='store_true', default=False, help='Continue from the last page (in page.n)') parser.add_option('-p', '--page', action='store', type="int", default=1, help='Start from page n of the results') (options, args) = parser.parse_args() if options.resume: with open("page.n", 'r') as n: page = int(n.read()) else: page = options.page tags = ['planning', 'tender', 'award', 'contract'] for tag in tags: folder = os.path.dirname(os.path.realpath(__file__)) base = 'http://gpp.ppda.go.ug' url = '%s/api/v1/releases?tag=%s&page=%s' % (base, tag, 1) if options.all: folder += '/all/' + tag r = requests.get(url) data = r.json() num_pages = data['pagination']['last_page'] print('%s pages to retrieve' % num_pages) for i in range(page, num_pages + 1): url = '%s/api/v1/releases?tag=%s&page=%s' % \ (base, tag, i) print('fetching %s' % url) data = common.getUrlAndRetry(url, folder) if data is None: continue for r in data['releases']: common.writeReleases([r], folder, data, url) with open("page.n", 'w') as n: n.write(str(i)) with open("page.n", 'w') as n: n.write("1") else: folder += '/sample/' + tag print('fetching %s' % url) data = common.getUrlAndRetry(url, folder) for r in data['releases'][:10]: common.writeReleases([r], folder, data, url)
def main(): usage = 'Usage: %prog [ --all --cont ]' parser = optparse.OptionParser(usage=usage) parser.add_option('-a', '--all', action='store_true', default=False, help='Fetch all records, rather than a small extract') parser.add_option('-R', '--resume', action='store_true', default=False, help='Continue from the last page (in page.n)') parser.add_option('-p', '--page', action='store', type="int", default=1, help='Start from page n of the results') (options, args) = parser.parse_args() if options.resume: with open("page.n", 'r') as n: page = n.read() else: page = options.page folder = os.path.dirname(os.path.realpath(__file__)) BASE = 'https://www.contractsfinder.service.gov.uk' url = '%s/Published/Notices/OCDS/Search?order=asc&page=%s' % (BASE, 1) if options.all: folder += '/all' r = requests.get(url) data = r.json() num_pages = data['maxPage'] print('%s pages to retrieve' % num_pages) for i in range(page, num_pages + 1): url = '%s/Published/Notices/OCDS/Search?order=asc&page=%s' % \ (BASE, i) print('fetching %s' % url) data = common.getUrlAndRetry(url, folder) for r in data['results']: common.writeReleases(r['releases'], folder, r, url) with open("page.n", 'w') as n: n.write(str(i)) with open("page.n", 'w') as n: n.write("1") else: folder += '/sample' print('fetching %s' % url) data = common.getUrlAndRetry(url, folder) for r in data['results'][:10]: common.writeReleases(r['releases'], folder, r, url)
def main(): usage = 'Usage: %prog [ --all --cont ]' parser = optparse.OptionParser(usage=usage) parser.add_option('-a', '--all', action='store_true', default=False, help='Fetch all records, rather than a small extract') parser.add_option('-b', '--bigquery', action='store_true', default=False, help='Save the data in big query format') parser.add_option('-r', '--releases', action='store_true', default=False, help='Fetch individual releases') (options, args) = parser.parse_args() url = 'https://contratacionesabiertas.jalisco.gob.mx/OCApi/2017/contracts' r = requests.get(url) data = r.json() package_urls = [d['URIContract'] for d in data] folder = os.path.dirname(os.path.realpath(__file__)) count = 1 if options.all: folder += '/all' else: folder += '/sample' package_urls = package_urls[:10] for url in package_urls: print('fetching', url) data = common.getUrlAndRetry(url, folder) if options.bigquery: common.writeReleases([data['records'][0]['compiledRelease']], folder, data, url, 'records') else: common.writeFile('%s.json' % str(data['uri'].split('/')[-1]), folder, data, url, 'records') if options.releases: for release_url in data['packages']: if count > 10 and not options.all: break print('fetching', release_url) release = common.getUrlAndRetry(release_url, folder) count = count + 1 if options.bigquery: common.writeReleases(release['releases'], folder, release, release_url) else: common.writeFile( '%s.json' % str(release_url.split('/')[-1]), folder, release, release_url)
def main(): usage = 'Usage: %prog [ --all --cont ]' parser = optparse.OptionParser(usage=usage) parser.add_option('-a', '--all', action='store_true', default=False, help='Fetch all records, rather than a small extract') parser.add_option( '-R', '--resume', action='store_true', default=False, help='Continue from last page (in page.n) for when download broken') parser.add_option('-p', '--page', action='store', type="int", default=1, help='Start from page n of the results') (options, args) = parser.parse_args() if options.resume: with open("page.n", 'r') as n: page = n.read() else: page = options.page BASE = 'http://ocds.prozorro.openprocurement.io' folder = os.path.dirname(os.path.realpath(__file__)) if options.all: folder += '/all' urls = [] r = requests.get(BASE) d = pq(r.content) links = d('.container ol li a') latest_url = "%s/%s" % (BASE, links[-2].attrib['href']) if 'with_extensions' in latest_url: r = requests.get(latest_url) d = pq(r.content) links = d('.container ol li a') for l in links: release_url = "%s/%s" % (latest_url, l.attrib['href']) urls.append(release_url) else: print('Latest URL does not contain extensions - check page') else: folder += '/sample' urls = [ '%s/merged_with_extensions_2017-06-23/release-0000001.json' % BASE ] print('%s release packages to fetch' % len(urls)) for url in urls: package_num = int(url.split('-')[-1].replace('.json', '')) if package_num < page: continue print('fetching %s' % url) data = common.getUrlAndRetry(url, folder) common.writeReleases( data['releases'], folder, data, url) with open("page.n", 'w') as n: n.write(str(package_num))
def main(): usage = 'Usage: %prog [ --all --cont ]' parser = optparse.OptionParser(usage=usage) parser.add_option('-a', '--all', action='store_true', default=False, help='Fetch all records, rather than a small extract') (options, args) = parser.parse_args() url = 'http://nigeriaoc.org/downloadSelected' r = requests.get(url) data = r.json() folder = os.path.dirname(os.path.realpath(__file__)) if options.all: folder += '/all' else: folder += '/sample' data = common.getUrlAndRetry(url, folder) common.writeReleases(data['releases'], folder, data, url)
def main(): usage = 'Usage: %prog [ --all --cont ]' parser = optparse.OptionParser(usage=usage) parser.add_option('-a', '--all', action='store_true', default=False, help='Fetch all records, rather than a small extract') (options, args) = parser.parse_args() url = 'http://www.contratosabiertos.cdmx.gob.mx/api/contratos/todos' r = requests.get(url) data = r.json() package_urls = [d['uri'] for d in data] folder = os.path.dirname(os.path.realpath(__file__)) if options.all: folder += '/all' else: folder += '/sample' package_urls = package_urls[:4] for url in package_urls: print('fetching', url) data = common.getUrlAndRetry(url, folder) common.writeReleases(data['releases'], folder, data, url)
def main(): usage = 'Usage: %prog [ --all --cont ]' parser = optparse.OptionParser(usage=usage) parser.add_option('-a', '--all', action='store_true', default=False, help='Fetch all records, rather than a small extract') (options, args) = parser.parse_args() url = 'https://api.colombiacompra.gov.co/releases/' folder = os.path.dirname(os.path.realpath(__file__)) if options.all: folder += '/all' next_url = url while next_url: print('fetching %s' % next_url) data = common.getUrlAndRetry(next_url, folder) common.writeReleases(data['releases'], folder, data, next_url) next_url = data['links']['next'] else: folder += '/sample' print('fetching %s' % url) data = common.getUrlAndRetry(url, folder) common.writeReleases(data['releases'], folder, data, url)
def main(): usage = 'Usage: %prog [ --all --cont ]' parser = optparse.OptionParser(usage=usage) parser.add_option('-a', '--all', action='store_true', default=False, help='Fetch all records, rather than a small extract') (options, args) = parser.parse_args() url = 'http://datos.gob.mx/busca/api/3/action/' url += 'package_search?q=organization:contrataciones-abiertas&rows=500' folder = os.path.dirname(os.path.realpath(__file__)) release_packages = getReleasePackages(url) if options.all: folder += '/all' else: folder += '/sample' for url in release_packages: print('fetching %s' % url) urllib.request.urlretrieve(url, "file.zip") with zipfile.ZipFile("file.zip", "r") as zip_ref: zip_ref.extractall(os.getcwd()) zip_ref.close() with open(os.getcwd() + '/contratacionesabiertas_bulk.json') as data_file: data = json.load(data_file, encoding='latin1') if not options.all: data = data[:10] if type(data) == list: for d in data: common.writeReleases([d['records'][0]['compiledRelease']], folder, d, url, 'records') else: common.writeReleases(data['releases'], folder, data, url) data_file.close() break