def datasets(softwares = ['ckan','socrata']): for software in softwares: for catalog in read.catalogs(software): if not SOCRATA_FIX.get(catalog, 'this is a string, not None') == None: for dataset in getattr(read, software)(catalog): dataset['catalog'] = catalog dataset['software'] = software yield dataset
def get_links(softwares = ['ckan','socrata']): dt = DumpTruck('/tmp/open-data.sqlite') dummyrow = dict(zip(['software','catalog','identifier', 'status_code', 'headers', 'error'], (['blah'] * 3) + ([234] * 1) + ([{'a':'b'}] * 2))) dt.create_table(dummyrow, 'links', if_not_exists = True) dt.create_index(['software','catalog','identifier'], 'links', if_not_exists = True, unique = True) for software in softwares: for catalog in read.catalogs(software): if SOCRATA_FIX.get(catalog, 'this is a string, not None') == None: continue try: for row in _check_catalog(software, catalog): dt.upsert(row, 'links') except: print(os.path.join('downloads',software,catalog)) raise