Exemple #1
0
def main():
    usage = 'Usage: %prog [ --all --cont ]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-a',
                      '--all',
                      action='store_true',
                      default=False,
                      help='Fetch all records, rather than a small extract')
    (options, args) = parser.parse_args()
    BASE = 'https://buyandsell.gc.ca'
    folder = os.path.dirname(os.path.realpath(__file__))
    if options.all:
        folder += '/all'
        # No new data is being published, so just get the four
        # years that exist.
        urls = []
        for i in range(13, 17):
            url = '%s/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-%s-%s.json' \
                % (BASE, i, i + 1)
            urls.append(url)
    else:
        folder += '/sample'
        urls = ['%s/cds/public/ocds/tpsgc-pwgsc_ocds_EF-FY-15-16.json' % BASE]
    for url in urls:
        print("Fetching releases for %s" % url)
        data = common.getUrlAndRetry(url, folder)
        common.writeReleases(data['releases'], folder, data, url)
Exemple #2
0
def main():
    usage = 'Usage: %prog [ --all --cont ]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-a',
                      '--all',
                      action='store_true',
                      default=False,
                      help='Fetch all records, rather than a small extract')
    (options, args) = parser.parse_args()
    url = 'http://datos.gob.mx/busca/api/3/action/'
    url += 'package_search?q=organization:inai&rows=500'
    folder = os.path.dirname(os.path.realpath(__file__))
    release_packages = getReleasePackages(url)
    if options.all:
        folder += '/all'
    else:
        folder += '/sample'
    for url in release_packages:
        print('fetching %s' % url)
        data = common.getUrlAndRetry(url, folder)
        if not options.all:
            data = data[:10]
        if type(data) == list:
            for d in data:
                common.writeReleases(d['releases'], folder, d, url)
        else:
            common.writeReleases(data['releases'], folder, data, url)
Exemple #3
0
def main():
    usage = 'Usage: %prog [ --all --cont ]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-a',
                      '--all',
                      action='store_true',
                      default=False,
                      help='Fetch all records, rather than a small extract')
    (options, args) = parser.parse_args()
    BASE = 'http://moldova-ocds.yipl.com.np'
    folder = os.path.dirname(os.path.realpath(__file__))
    if options.all:
        folder += '/all'
        url = '%s/multiple-file-api/releases.json' % BASE
        year_urls = fetchYears(url)
        for url in year_urls:
            print('fetching %s' % url)
            data = common.getUrlAndRetry(url, folder)
            common.writeReleases(data['releases'], folder, data, url)
    else:
        folder += '/all'
        url = '%s/ocds-api/year/2017' % BASE
        print('fetching %s' % url)
        data = common.getUrlAndRetry(url, folder)
        common.writeReleases(data['releases'], folder, data, url)
Exemple #4
0
def main():
    usage = 'Usage: %prog [ --all --cont ]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-a',
                      '--all',
                      action='store_true',
                      default=False,
                      help='Fetch all records, rather than a small extract')
    parser.add_option('-p',
                      '--page',
                      type="int",
                      default=1,
                      help='Fetch records from the given page')
    parser.add_option('-R',
                      '--resume',
                      action='store_true',
                      default=False,
                      help='Continue from the last page (in page.n)')
    parser.add_option('-b',
                      '--bigquery',
                      action='store_true',
                      default=False,
                      help='Fetch records in bigquery format')

    (options, args) = parser.parse_args()

    if options.resume:
        with open("page.n", 'r') as n:
            page = int(n.read())
    else:
        page = options.page

    url = 'https://api.colombiacompra.gov.co/releases/?page=' + str(page)
    folder = os.path.dirname(os.path.realpath(__file__))
    if options.all:
        folder += '/all'
        packages = 1
        next_url = url
        while next_url:
            print('fetching %s' % next_url)
            data = common.getUrlAndRetry(next_url, folder)
            if data is None:
                current_page = int(next_url.split('page=')[1])
                next_url = next_url.replace('page=' + str(current_page),
                                            'page=' + str(current_page + 1))
                continue
            if options.bigquery:
                common.writeReleases(data['releases'], folder, data, next_url)
            else:
                common.writeFile('%s.json' % str(packages), folder, data,
                                 next_url)
            next_url = data['links']['next']
            with open("page.n", 'w') as n:
                n.write(str(packages))
            packages = packages + 1
    else:
        folder += '/sample'
        print('fetching %s' % url)
        data = common.getUrlAndRetry(url, folder)
        common.writeReleases(data['releases'], folder, data, url)
Exemple #5
0
def fetchReleases(data, folder, url):
    print('Fetching %s' % url)
    for r in data['releases']:
        # These releases are lacking IDs - set the ID to the OCID
        # (which is unique).
        r['id'] = r['ocid']
    common.writeReleases(data['releases'], folder, data, url)
Exemple #6
0
def main():
    usage = 'Usage: %prog [ --all --cont ]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-a',
                      '--all',
                      action='store_true',
                      default=False,
                      help='Fetch all records, rather than a small extract')
    (options, args) = parser.parse_args()
    url = 'https://tenders.nsw.gov.au'
    url += '/?event=public.api.%s.search&ResultsPerPage=1000'
    folder = os.path.dirname(os.path.realpath(__file__))
    if options.all:
        folder += '/all'
        release_types = ['planning', 'tender', 'contract']
        for r in release_types:
            next_url = url % r
            while next_url:
                print('fetching', next_url)
                data = common.getUrlAndRetry(next_url, folder)
                common.writeReleases(data['releases'], folder, data, next_url)
                if 'next' in data['links']:
                    next_url = data['links']['next']
                else:
                    next_url = None
    else:
        folder += '/sample'
        next_url = url % 'planning'
        print('fetching', next_url)
        data = common.getUrlAndRetry(next_url, folder)
        common.writeReleases(data['releases'], folder, data, next_url)
Exemple #7
0
def fetchRecord(record_id, folder, get_releases, page=0):
    '''
    Given a record ID, construct the package URL and save locally.
    '''
    url = 'https://www.contrataciones.gov.py:443/'
    url += 'datos/api/v2/doc/ocds/record-package/%s' % record_id
    print("Fetching record %s ID: %s > %s" % (page, record_id, url))
    data = common.getUrlAndRetry(url, folder)
    if data:
        try:
            common.writeReleases(
                [data['records'][0]['compiledRelease']], folder, data, url, 'records')
            if get_releases and 'packages' in data:
                releases = data['packages']
                for url in releases:
                    # Rewrite the release URL - they are published
                    # in an incorrect format.
                    release_url = url\
                        .replace('/datos/id/', '/datos/api/v2/doc/ocds/')\
                        .replace('.json', '')
                    print('fetching %s' % release_url)
                    release_data = common.getUrlAndRetry(release_url, folder)
                    if release_data and 'releases' in release_data:
                        common.writeReleases(
                            release_data['releases'], folder,
                            release_data, release_url)
        except KeyError:
            err = 'No compiledReleases, skipping this one: %s \n' % url
            print(err)
            with open('%s/errors.txt' % folder, 'a') as errors:
                errors.write(err)
Exemple #8
0
def main():
    usage = 'Usage: %prog [ --all --cont ]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-a',
                      '--all',
                      action='store_true',
                      default=False,
                      help='Fetch all records, rather than a small extract')
    (options, args) = parser.parse_args()
    url = 'https://ville.montreal.qc.ca/vuesurlescontrats/api/releases.json'
    folder = os.path.dirname(os.path.realpath(__file__))
    more_releases = True
    if options.all:
        folder += '/all'
        offset = 0
        while more_releases:
            next_url = url + '?limit=10000&offset=%s' % offset
            print('fetching %s' % next_url)
            data = common.getUrlAndRetry(next_url, folder)
            more_releases = len(
                common.writeReleases(data['releases'], folder, data, next_url))
            offset += 10000
    else:
        folder += '/sample'
        url += '?limit=10'
        print('fetching %s' % url)
        data = common.getUrlAndRetry(url, folder)
        common.writeReleases(data['releases'], folder, data, url)
Exemple #9
0
def main():
    usage = 'Usage: %prog [ --all --cont ]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-a',
                      '--all',
                      action='store_true',
                      default=False,
                      help='Fetch all records, rather than a small extract')
    parser.add_option('-R',
                      '--resume',
                      action='store_true',
                      default=False,
                      help='Continue from the last page (in page.n)')
    parser.add_option('-p',
                      '--page',
                      action='store',
                      type="int",
                      default=1,
                      help='Start from page n of the results')

    (options, args) = parser.parse_args()

    if options.resume:
        with open("page.n", 'r') as n:
            page = int(n.read())
    else:
        page = options.page

    tags = ['planning', 'tender', 'award', 'contract']
    for tag in tags:
        folder = os.path.dirname(os.path.realpath(__file__))
        base = 'http://gpp.ppda.go.ug'
        url = '%s/api/v1/releases?tag=%s&page=%s' % (base, tag, 1)
        if options.all:
            folder += '/all/' + tag
            r = requests.get(url)
            data = r.json()
            num_pages = data['pagination']['last_page']
            print('%s pages to retrieve' % num_pages)
            for i in range(page, num_pages + 1):
                url = '%s/api/v1/releases?tag=%s&page=%s' % \
                    (base, tag, i)
                print('fetching %s' % url)
                data = common.getUrlAndRetry(url, folder)
                if data is None:
                    continue
                for r in data['releases']:
                    common.writeReleases([r], folder, data, url)
                with open("page.n", 'w') as n:
                    n.write(str(i))
            with open("page.n", 'w') as n:
                n.write("1")
        else:
            folder += '/sample/' + tag
            print('fetching %s' % url)
            data = common.getUrlAndRetry(url, folder)
            for r in data['releases'][:10]:
                common.writeReleases([r], folder, data, url)
Exemple #10
0
def main():
    usage = 'Usage: %prog [ --all --cont ]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-a',
                      '--all',
                      action='store_true',
                      default=False,
                      help='Fetch all records, rather than a small extract')
    parser.add_option('-R',
                      '--resume',
                      action='store_true',
                      default=False,
                      help='Continue from the last page (in page.n)')
    parser.add_option('-p',
                      '--page',
                      action='store',
                      type="int",
                      default=1,
                      help='Start from page n of the results')

    (options, args) = parser.parse_args()

    if options.resume:
        with open("page.n", 'r') as n:
            page = n.read()
    else:
        page = options.page
    folder = os.path.dirname(os.path.realpath(__file__))

    BASE = 'https://www.contractsfinder.service.gov.uk'
    url = '%s/Published/Notices/OCDS/Search?order=asc&page=%s' % (BASE, 1)
    if options.all:
        folder += '/all'
        r = requests.get(url)
        data = r.json()
        num_pages = data['maxPage']
        print('%s pages to retrieve' % num_pages)
        for i in range(page, num_pages + 1):
            url = '%s/Published/Notices/OCDS/Search?order=asc&page=%s' % \
                (BASE, i)
            print('fetching %s' % url)
            data = common.getUrlAndRetry(url, folder)
            for r in data['results']:
                common.writeReleases(r['releases'], folder, r, url)
            with open("page.n", 'w') as n:
                n.write(str(i))
        with open("page.n", 'w') as n:
            n.write("1")
    else:
        folder += '/sample'
        print('fetching %s' % url)
        data = common.getUrlAndRetry(url, folder)
        for r in data['results'][:10]:
            common.writeReleases(r['releases'], folder, r, url)
Exemple #11
0
def main():
    usage = 'Usage: %prog [ --all --cont ]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-a',
                      '--all',
                      action='store_true',
                      default=False,
                      help='Fetch all records, rather than a small extract')
    parser.add_option('-b',
                      '--bigquery',
                      action='store_true',
                      default=False,
                      help='Save the data in big query format')
    parser.add_option('-r',
                      '--releases',
                      action='store_true',
                      default=False,
                      help='Fetch individual releases')
    (options, args) = parser.parse_args()
    url = 'https://contratacionesabiertas.jalisco.gob.mx/OCApi/2017/contracts'
    r = requests.get(url)
    data = r.json()
    package_urls = [d['URIContract'] for d in data]
    folder = os.path.dirname(os.path.realpath(__file__))
    count = 1
    if options.all:
        folder += '/all'
    else:
        folder += '/sample'
        package_urls = package_urls[:10]
    for url in package_urls:
        print('fetching', url)
        data = common.getUrlAndRetry(url, folder)
        if options.bigquery:
            common.writeReleases([data['records'][0]['compiledRelease']],
                                 folder, data, url, 'records')
        else:
            common.writeFile('%s.json' % str(data['uri'].split('/')[-1]),
                             folder, data, url, 'records')
        if options.releases:
            for release_url in data['packages']:
                if count > 10 and not options.all:
                    break
                print('fetching', release_url)
                release = common.getUrlAndRetry(release_url, folder)
                count = count + 1
                if options.bigquery:
                    common.writeReleases(release['releases'], folder, release,
                                         release_url)
                else:
                    common.writeFile(
                        '%s.json' % str(release_url.split('/')[-1]), folder,
                        release, release_url)
Exemple #12
0
def main():
    usage = 'Usage: %prog [ --all --cont ]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-a', '--all', action='store_true', default=False,
                      help='Fetch all records, rather than a small extract')
    parser.add_option(
        '-R', '--resume', action='store_true', default=False,
        help='Continue from last page (in page.n) for when download broken')
    parser.add_option('-p', '--page', action='store', type="int", default=1,
                      help='Start from page n of the results')
    (options, args) = parser.parse_args()
    if options.resume:
        with open("page.n", 'r') as n:
            page = n.read()
    else:
        page = options.page
    BASE = 'http://ocds.prozorro.openprocurement.io'
    folder = os.path.dirname(os.path.realpath(__file__))
    if options.all:
        folder += '/all'
        urls = []
        r = requests.get(BASE)
        d = pq(r.content)
        links = d('.container ol li a')
        latest_url = "%s/%s" % (BASE, links[-2].attrib['href'])
        if 'with_extensions' in latest_url:
            r = requests.get(latest_url)
            d = pq(r.content)
            links = d('.container ol li a')
            for l in links:
                release_url = "%s/%s" % (latest_url, l.attrib['href'])
                urls.append(release_url)
        else:
            print('Latest URL does not contain extensions - check page')
    else:
        folder += '/sample'
        urls = [
            '%s/merged_with_extensions_2017-06-23/release-0000001.json' % BASE
        ]
    print('%s release packages to fetch' % len(urls))
    for url in urls:
        package_num = int(url.split('-')[-1].replace('.json', ''))
        if package_num < page:
            continue
        print('fetching %s' % url)
        data = common.getUrlAndRetry(url, folder)
        common.writeReleases(
            data['releases'], folder, data, url)
        with open("page.n", 'w') as n:
            n.write(str(package_num))
Exemple #13
0
def main():
    usage = 'Usage: %prog [ --all --cont ]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-a',
                      '--all',
                      action='store_true',
                      default=False,
                      help='Fetch all records, rather than a small extract')
    (options, args) = parser.parse_args()
    url = 'http://nigeriaoc.org/downloadSelected'
    r = requests.get(url)
    data = r.json()
    folder = os.path.dirname(os.path.realpath(__file__))
    if options.all:
        folder += '/all'
    else:
        folder += '/sample'
    data = common.getUrlAndRetry(url, folder)
    common.writeReleases(data['releases'], folder, data, url)
Exemple #14
0
def main():
    usage = 'Usage: %prog [ --all --cont ]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-a',
                      '--all',
                      action='store_true',
                      default=False,
                      help='Fetch all records, rather than a small extract')
    (options, args) = parser.parse_args()
    url = 'http://www.contratosabiertos.cdmx.gob.mx/api/contratos/todos'
    r = requests.get(url)
    data = r.json()
    package_urls = [d['uri'] for d in data]
    folder = os.path.dirname(os.path.realpath(__file__))
    if options.all:
        folder += '/all'
    else:
        folder += '/sample'
        package_urls = package_urls[:4]
    for url in package_urls:
        print('fetching', url)
        data = common.getUrlAndRetry(url, folder)
        common.writeReleases(data['releases'], folder, data, url)
Exemple #15
0
def main():
    usage = 'Usage: %prog [ --all --cont ]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-a',
                      '--all',
                      action='store_true',
                      default=False,
                      help='Fetch all records, rather than a small extract')
    (options, args) = parser.parse_args()
    url = 'https://api.colombiacompra.gov.co/releases/'
    folder = os.path.dirname(os.path.realpath(__file__))
    if options.all:
        folder += '/all'
        next_url = url
        while next_url:
            print('fetching %s' % next_url)
            data = common.getUrlAndRetry(next_url, folder)
            common.writeReleases(data['releases'], folder, data, next_url)
            next_url = data['links']['next']
    else:
        folder += '/sample'
        print('fetching %s' % url)
        data = common.getUrlAndRetry(url, folder)
        common.writeReleases(data['releases'], folder, data, url)
Exemple #16
0
def main():
    usage = 'Usage: %prog [ --all --cont ]'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('-a',
                      '--all',
                      action='store_true',
                      default=False,
                      help='Fetch all records, rather than a small extract')
    (options, args) = parser.parse_args()
    url = 'http://datos.gob.mx/busca/api/3/action/'
    url += 'package_search?q=organization:contrataciones-abiertas&rows=500'
    folder = os.path.dirname(os.path.realpath(__file__))
    release_packages = getReleasePackages(url)
    if options.all:
        folder += '/all'
    else:
        folder += '/sample'
    for url in release_packages:
        print('fetching %s' % url)
        urllib.request.urlretrieve(url, "file.zip")
        with zipfile.ZipFile("file.zip", "r") as zip_ref:
            zip_ref.extractall(os.getcwd())
            zip_ref.close()
        with open(os.getcwd() +
                  '/contratacionesabiertas_bulk.json') as data_file:
            data = json.load(data_file, encoding='latin1')
            if not options.all:
                data = data[:10]
            if type(data) == list:
                for d in data:
                    common.writeReleases([d['records'][0]['compiledRelease']],
                                         folder, d, url, 'records')
            else:
                common.writeReleases(data['releases'], folder, data, url)
            data_file.close()
        break