def _build_templates(self): """ Implement build-templates command """ lc = LocalCKAN() output_files = {} next_row = {} output_counter = {} output_path = self.args[2:][-1] dataset_types = get_dataset_types(self.command_name) table = get_chromo(dataset_types[0]) def close_write_file(org_id): book = output_files[org_id] if not book: return book.save(os.path.join(output_path, org_id + "-" + str(output_counter[org_id]) + ".xls")) output_files[org_id] = None def out_file(org_id): if org_id in output_files: next_row[org_id] += 1 # need to start a new file? if next_row[org_id] > SPLIT_XLS_ROWS: close_write_file(org_id) else: return output_files[org_id], next_row[org_id] try: org = lc.action.organization_show(id=org_id, include_data_batch=False) except NotFound: logging.error("org id", org_id, "not found") output_files[org_id] = None next_row[org_id] = 0 return None, None book = excel_template(dataset_types[0], org) output_files[org_id] = book output_counter[org_id] = output_counter.get(org_id, 0) + 1 next_row[org_id] = len(book.get_sheet(0).get_rows()) return book, next_row[org_id] def add_row(book, row, d): sheet = book.get_sheet(0) for i, f in enumerate(table["fields"]): sheet.write(row, i, d[f["datastore_id"]]) for f in self.args[1:-1]: for d in DictReader(open(f, "rb")): book, row = out_file(d["organization"]) if not book: continue add_row(book, row, d) for org_id in output_files: close_write_file(org_id)
def data_batch(org_id, lc, target_dataset): """ Generator of dataset dicts for organization with name org :param org_id: the id for the organization of interest :ptype org_id: str :param lc: local CKAN :ptype lc: obj :param target_dataset: name of target dataset (e.g., 'ati', 'pd', etc.) :ptype target_dataset: str :return generates batches of dataset dict records :rtype batch of dataset dict records """ dataset_types = get_dataset_types() for dataset_type in dataset_types: geno = get_geno(dataset_type) if geno.get('target_dataset') == target_dataset: break else: return result = lc.action.package_search( q="type:{0:s} owner_org:{1:s}".format(dataset_type, org_id), rows=2)['results'] if not result: return if len(result) != 1: sys.stderr.write('1 record expected for %s %s, found %d' % (dataset_type, org_id, len(result))) dataset = result[0] for resource in dataset['resources']: offset = 0 while True: rval = lc.action.datastore_search( resource_id=resource['id'], limit=BATCH_SIZE, offset=offset) records = rval['records'] if not records: break offset += len(records) yield records
def _show(self, dataset_type, org_name): """ Display some information about the status of recombinant datasets """ orgs = [org_name] if org_name else self._get_orgs() types = [dataset_type] if dataset_type else get_dataset_types() for dtype in types: print u'{geno[title]} ({dtype})'.format( geno=get_geno(dtype), dtype=dtype).encode('utf-8') packages = self._get_packages(dtype, orgs) if dataset_type: for p in packages: print p['owner_org'] if 'error' in p: print ' *** {p[error]}'.format(p=p) elif not p['metadata_correct']: print ' ! metadata needs to be updated' for r in p['resources']: print ' - id:{r[id]} {r[name]}'.format(r=r), if 'error' in r: print ' *** {r[error]}'.format(r=r) else: print 'rows:{r[datastore_rows]}'.format(r=r) if not r['datastore_correct']: print ' ! datastore needs to be updated' if not r['metadata_correct']: print ' ! metadata needs to be updated' if len(packages) != len(orgs): print(' > %d orgs but %d records found' % (len(orgs), len(packages))) else: print(' > %d datasets found' % (len(packages), )) need_update = sum(1 for p in packages if not p['all_correct']) if need_update: print(' --> %d need to be updated' % need_update)
def _show(self, dataset_type, org_name): """ Display some information about the status of recombinant datasets """ orgs = [org_name] if org_name else self._get_orgs() types = [dataset_type] if dataset_type else get_dataset_types() for dtype in types: print u'{geno[title]} ({dtype})'.format( geno=get_geno(dtype), dtype=dtype).encode('utf-8') packages = self._get_packages(dtype, orgs) if dataset_type: for p in packages: print p['owner_org'] if 'error' in p: print ' *** {p[error]}'.format(p=p) elif not p['metadata_correct']: print ' ! metadata needs to be updated' for r in p['resources']: print ' - id:{r[id]} {r[name]}'.format(r=r), if 'error' in r: print ' *** {r[error]}'.format(r=r) else: print 'rows:{r[datastore_rows]}'.format(r=r) if not r['datastore_correct']: print ' ! datastore needs to be updated' if not r['metadata_correct']: print ' ! metadata needs to be updated' if len(packages) != len(orgs): print (' > %d orgs but %d records found' % (len(orgs), len(packages))) else: print (' > %d datasets found' % (len(packages),)) need_update = sum(1 for p in packages if not p['all_correct']) if need_update: print (' --> %d need to be updated' % need_update)
def _expand_dataset_types(self, dataset_types): if self.options.all_types: return get_dataset_types() return dataset_types
def package_types(self): return tables.get_dataset_types()
def _expand_dataset_types(self, dataset_types): if self.options.all_types: return get_dataset_types() return dataset_types
def package_types(self): return tables.get_dataset_types()
def recombinant_get_types(): return get_dataset_types()
def recombinant_get_types(): return get_dataset_types()