Beispiel #1
0
    def _build_templates(self):
        """
        Implement build-templates command
        """
        lc = LocalCKAN()
        output_files = {}
        next_row = {}
        output_counter = {}
        output_path = self.args[2:][-1]
        dataset_types = get_dataset_types(self.command_name)
        table = get_chromo(dataset_types[0])

        def close_write_file(org_id):
            book = output_files[org_id]
            if not book:
                return
            book.save(os.path.join(output_path, org_id + "-" + str(output_counter[org_id]) + ".xls"))
            output_files[org_id] = None

        def out_file(org_id):
            if org_id in output_files:
                next_row[org_id] += 1
                # need to start a new file?
                if next_row[org_id] > SPLIT_XLS_ROWS:
                    close_write_file(org_id)
                else:
                    return output_files[org_id], next_row[org_id]
            try:
                org = lc.action.organization_show(id=org_id, include_data_batch=False)
            except NotFound:
                logging.error("org id", org_id, "not found")
                output_files[org_id] = None
                next_row[org_id] = 0
                return None, None
            book = excel_template(dataset_types[0], org)
            output_files[org_id] = book
            output_counter[org_id] = output_counter.get(org_id, 0) + 1
            next_row[org_id] = len(book.get_sheet(0).get_rows())
            return book, next_row[org_id]

        def add_row(book, row, d):
            sheet = book.get_sheet(0)
            for i, f in enumerate(table["fields"]):
                sheet.write(row, i, d[f["datastore_id"]])

        for f in self.args[1:-1]:
            for d in DictReader(open(f, "rb")):
                book, row = out_file(d["organization"])
                if not book:
                    continue
                add_row(book, row, d)

        for org_id in output_files:
            close_write_file(org_id)
def data_batch(org_id, lc, target_dataset):
    """
    Generator of dataset dicts for organization with name org

    :param org_id: the id for the organization of interest
    :ptype org_id: str
    :param lc: local CKAN
    :ptype lc: obj
    :param target_dataset: name of target dataset (e.g., 'ati', 'pd', etc.)
    :ptype target_dataset: str

    :return generates batches of dataset dict records
    :rtype batch of dataset dict records
    """
    dataset_types = get_dataset_types()
    for dataset_type in dataset_types:
        geno = get_geno(dataset_type)
        if geno.get('target_dataset') == target_dataset:
            break
    else:
        return

    result = lc.action.package_search(
        q="type:{0:s} owner_org:{1:s}".format(dataset_type, org_id),
        rows=2)['results']
        
    if not result:
        return
    if len(result) != 1:
       sys.stderr.write('1 record expected for %s %s, found %d' %
            (dataset_type, org_id, len(result)))

    dataset = result[0]
    for resource in dataset['resources']:
        offset = 0
        while True:
            rval = lc.action.datastore_search(
                resource_id=resource['id'],
                limit=BATCH_SIZE,
                offset=offset)
            records = rval['records']
            if not records:
                break
            offset += len(records)
            yield records
    def _show(self, dataset_type, org_name):
        """
        Display some information about the status of recombinant datasets
        """
        orgs = [org_name] if org_name else self._get_orgs()
        types = [dataset_type] if dataset_type else get_dataset_types()

        for dtype in types:
            print u'{geno[title]} ({dtype})'.format(
                geno=get_geno(dtype), dtype=dtype).encode('utf-8')

            packages = self._get_packages(dtype, orgs)
            if dataset_type:
                for p in packages:
                    print p['owner_org']
                    if 'error' in p:
                        print '  *** {p[error]}'.format(p=p)
                    elif not p['metadata_correct']:
                        print '  ! metadata needs to be updated'
                    for r in p['resources']:
                        print ' - id:{r[id]} {r[name]}'.format(r=r),
                        if 'error' in r:
                            print '    *** {r[error]}'.format(r=r)
                        else:
                            print 'rows:{r[datastore_rows]}'.format(r=r)
                            if not r['datastore_correct']:
                                print '   ! datastore needs to be updated'
                            if not r['metadata_correct']:
                                print '   ! metadata needs to be updated'

            if len(packages) != len(orgs):
                print(' > %d orgs but %d records found' %
                      (len(orgs), len(packages)))
            else:
                print(' > %d datasets found' % (len(packages), ))
            need_update = sum(1 for p in packages if not p['all_correct'])
            if need_update:
                print(' --> %d need to be updated' % need_update)
    def _show(self, dataset_type, org_name):
        """
        Display some information about the status of recombinant datasets
        """
        orgs = [org_name] if org_name else self._get_orgs()
        types = [dataset_type] if dataset_type else get_dataset_types()

        for dtype in types:
            print u'{geno[title]} ({dtype})'.format(
                geno=get_geno(dtype), dtype=dtype).encode('utf-8')

            packages = self._get_packages(dtype, orgs)
            if dataset_type:
                for p in packages:
                    print p['owner_org']
                    if 'error' in p:
                        print '  *** {p[error]}'.format(p=p)
                    elif not p['metadata_correct']:
                        print '  ! metadata needs to be updated'
                    for r in p['resources']:
                        print ' - id:{r[id]} {r[name]}'.format(r=r),
                        if 'error' in r:
                            print '    *** {r[error]}'.format(r=r)
                        else:
                            print 'rows:{r[datastore_rows]}'.format(r=r)
                            if not r['datastore_correct']:
                                print '   ! datastore needs to be updated'
                            if not r['metadata_correct']:
                                print '   ! metadata needs to be updated'

            if len(packages) != len(orgs):
                print (' > %d orgs but %d records found' %
                    (len(orgs), len(packages)))
            else:
                print (' > %d datasets found' % (len(packages),))
            need_update = sum(1 for p in packages if not p['all_correct'])
            if need_update:
                print (' --> %d need to be updated' % need_update)
 def _expand_dataset_types(self, dataset_types):
     if self.options.all_types:
         return get_dataset_types()
     return dataset_types
Beispiel #6
0
 def package_types(self):
     return tables.get_dataset_types()
 def _expand_dataset_types(self, dataset_types):
     if self.options.all_types:
         return get_dataset_types()
     return dataset_types
 def package_types(self):
     return tables.get_dataset_types()
def recombinant_get_types():
    return get_dataset_types()
Beispiel #10
0
def recombinant_get_types():
    return get_dataset_types()