Beispiel #1
0
    def run(cls, config_ini_or_ckan_url, dataset_names):
        ckan = common.get_ckanapi(config_ini_or_ckan_url)

        stats = Stats()
        for dataset_name in dataset_names:
            try:
                ckan.call_action('dataset_delete',
                                 {'id': dataset_name})
                print stats.add('Deleted (or was already deleted)', dataset_name)
            except (KeyboardInterrupt, SystemExit):
                raise
            except Exception, e:
                if 'CKANAPIError' in str(e):
                    print e
                    print 'Not calling API correctly - aborting'
                    sys.exit(1)
                print stats.add('Error %s' % type(e).__name__,
                                '%s %s' % (dataset_name, e))
Beispiel #2
0
    def run(cls, config_ini_or_ckan_url, dataset_names):
        ckan = common.get_ckanapi(config_ini_or_ckan_url)

        stats = Stats()
        for dataset_name in dataset_names:
            dataset_name = common.name_stripped_of_url(dataset_name)
            try:
                ckan.call_action('dataset_delete', {'id': dataset_name})
                print stats.add('Deleted (or was already deleted)',
                                dataset_name)
            except (KeyboardInterrupt, SystemExit):
                raise
            except Exception, e:
                if 'CKANAPIError' in str(e):
                    print e
                    print 'Not calling API correctly - aborting'
                    sys.exit(1)
                print stats.add('Error %s' % type(e).__name__,
                                '%s %s' % (dataset_name, e))
def main(source, source_type, destination,
         save_relevant_datasets_json,
         write,
         dataset_filter=None, res_url_filter=None):

    if source_type == 'json':
        all_datasets = get_datasets_from_json(source)
    elif source_type == 'jsonl':
        all_datasets = get_datasets_from_jsonl(source)
    else:
        all_datasets = get_datasets_from_ckan(source)

    datasets = []  # legacy ones
    revamped_datasets = []  # ones created on 3rd October 2016 launch
    revamped_datasets_by_org = {}
    revamped_resources = {}
    csv_out_rows = []
    csv_corrected_rows = []
    try:
        # find all the legacy organogram datasets
        all_datasets = list(all_datasets)  # since we need to iterate it twice
        for dataset in all_datasets:

            if dataset_filter and dataset['name'] != dataset_filter:
                continue
            if res_url_filter and \
                res_url_filter not in [r['url'] for r in dataset['resources']]:
                continue

            # check it an organogram dataset
            dataset_str = repr(dataset).lower()
            if 'rganog' not in dataset_str \
                    and 'roles and salaries' not in dataset_str \
                    and 'pay and post' not in dataset_str \
                    and 'posts and pay' not in dataset_str \
                    and 'organisation chart' not in dataset_str \
                    and 'organization chart' not in dataset_str \
                    and 'org chart' not in dataset_str:
                stats_datasets.add('Ignored - not organograms',
                                   dataset['name'])
                continue
            if dataset['name'] in (
                    'eastbourne-borough-council-public-toilets',
                    'staff-organograms-and-pay-government-offices',
                    ) \
                    or dataset['id'] in (
                        '47f69ebb-9939-419f-880d-1b976676cb0e',
                    ):
                stats_datasets.add('Ignored - not organograms',
                                   dataset['name'])
                continue
            if asbool(dataset.get('unpublished')):
                stats_datasets.add('Ignored - unpublished',
                                   dataset['name'])
                continue
            extras = dict((extra['key'], extra['value'])
                          for extra in dataset['extras'])
            if extras.get('import_source') == 'organograms_v2':
                continue
            if extras.get('import_source') == 'harvest':
                stats_datasets.add('Ignored - harvested so can\'t edit it',
                                   dataset['name'])
                continue

            # legacy dataset
            datasets.append(dataset)

        # find the revamped organogram datasets
        for dataset in all_datasets:
            extras = dict((extra['key'], extra['value'])
                          for extra in dataset['extras'])
            if extras.get('import_source') != 'organograms_v2':
                continue

            org_id = dataset['owner_org']
            revamped_datasets.append(dataset)
            assert org_id not in revamped_datasets_by_org, org_id
            revamped_datasets_by_org[org_id] = dataset
            for res in dataset['resources']:
                date = date_to_year_month(res['date'])
                revamped_resources[(org_id, date)] = res
            continue

        if save_relevant_datasets_json:
            filename = 'datasets_organograms.json'
            if not (dataset_filter or res_url_filter):
                output = json.dumps(
                    datasets + revamped_datasets,
                    indent=4, separators=(',', ': '),  # pretty print)
                    )
                with open(filename, 'wb') as f:
                    f.write(output)
                print 'Written %s' % filename
            else:
                print 'Not written %s because you filtered by a ' \
                    'dataset/resource' % filename

        all_resource_ids_to_delete = defaultdict(list)  # dataset_name: res_id_list
        dataset_names_to_delete = set()
        for dataset in datasets:
            org_id = dataset['owner_org']

            # save csv as it has been
            save_csv_rows(csv_out_rows, dataset, None, None)

            original_dataset = copy.deepcopy(dataset)
            delete_dataset = False

            dataset_to_merge_to = \
                get_dataset_to_merge_to(dataset, revamped_datasets_by_org)

            # detect dates
            for res in dataset['resources']:
                if res_url_filter and res['url'] != res_url_filter:
                    continue
                stats = timeseries_convert.add_date_to_resource(
                    res, dataset=dataset)

            # resource corrections
            resources_to_delete = []
            for res in dataset['resources']:
                if res_url_filter and res['url'] != res_url_filter:
                    continue
                resource_corrections(res, dataset, extras,
                                     revamped_resources,
                                     revamped_datasets_by_org,
                                     dataset_to_merge_to,
                                     org_id,
                                     resources_to_delete,
                                     stats_res)
            for res in resources_to_delete:
                dataset['resources'].remove(res)
            if not dataset['resources']:
                delete_dataset = True
            elif resources_to_delete and not dataset_to_merge_to:
                all_resource_ids_to_delete[dataset['name']].extend(
                    res['id'] for res in resources_to_delete)
            org_id = dataset['owner_org']  # it might have changed

            for res in dataset['resources']:
                if res_url_filter and res['url'] != res_url_filter:
                    continue
                if res.get('resource_type') != 'documentation' and not res.get('date'):
                    stats_dates.add('Missing date', dataset['name'])
                    break
            else:
                stats_dates.add('Ok dates', dataset['name'])

            # record changes
            if delete_dataset:
                stats_datasets.add('Delete dataset - no resources', dataset['name'])
                dataset_names_to_delete.add(dataset['name'])
                continue
            elif original_dataset != dataset:
                stats_datasets.add('Updated dataset', dataset['name'])
                has_changed = True
            else:
                stats_datasets.add('Unchanged dataset', dataset['name'])
                has_changed = False

            if dataset_to_merge_to:
                stats_merge.add('Merge', dataset_to_merge_to)
            else:
                stats_merge.add('No merge', dataset['name'])

            # save csv with corrections
            save_csv_rows(csv_corrected_rows, dataset, has_changed, dataset_to_merge_to)

    except:
        traceback.print_exc()
        import pdb; pdb.set_trace()

    stats_merge.report_value_limit = 500
    stats_res.report_value_limit = 500
    print '\nDatasets\n', stats_datasets
    print '\nDataset merges\n', stats_merge
    print '\nDates\n', stats_dates
    print '\nResources\n', stats_res

    # save csvs
    if dataset_filter or res_url_filter:
        for row in csv_corrected_rows:
            if res_url_filter and row['res_url'] != res_url_filter:
                continue
            pprint(row)
        print 'Not written csv because you specified a particular dataset'
    else:
        headers = [
            'name', 'org_title', 'org_id', 'notes',
            'res_id', 'res_name', 'res_url', 'res_format',
            'res_date', 'res_type',
            'has_changed',
            'merge_to_dataset',
            ]
        for csv_rows, out_filename in (
                (csv_out_rows, 'organogram_legacy_datasets.csv'),
                (csv_corrected_rows, 'organogram_legacy_datasets_corrected.csv'),
                ):
            with open(out_filename, 'wb') as csv_write_file:
                csv_writer = unicodecsv.DictWriter(csv_write_file,
                                                   fieldnames=headers,
                                                   encoding='utf-8')
                csv_writer.writeheader()
                for row in sorted(csv_rows, key=lambda r: r['res_url']):
                    csv_writer.writerow(row)
            print 'Written', out_filename

    # group merges by the revamped_dataset
    resources_to_merge = defaultdict(list)  # revamped_dataset_name: resource_list
    resources_to_update = defaultdict(list)  # dataset_name: resource_list
    for row in csv_corrected_rows:
        if row['has_changed'] is False:
            continue
        res = dict(
            id=row['res_id'],
            description=row['res_name'],  # description is required
            url=row['res_url'],
            format=row['res_format'],
            date=row['res_date'],
            resource_type=row['res_type'])
        if row['merge_to_dataset']:
            res['id'] = None  # ignore the id
            resources_to_merge[row['merge_to_dataset']].append(res)
            # also delete the merged dataset
            if row['name'] not in dataset_names_to_delete:
                dataset_names_to_delete.add(row['name'])
        else:
            resources_to_update[row['name']].append(res)

    # write changes - merges etc
    try:
        if destination:
            if write:
                write_caveat = ''
            else:
                write_caveat = ' (NOP without --write)'
            print 'Writing changes to datasets' + write_caveat
            stats_write_res = Stats()
            stats_write_dataset = Stats()
            ckan = common.get_ckanapi(destination)
            import ckanapi

            print 'Updating datasets'
            for dataset_name, res_list in resources_to_update.iteritems():
                dataset = ckan.action.package_show(id=dataset_name)
                resources_by_id = dict((r['id'], r) for r in dataset['resources'])
                dataset_changed = False
                for res in res_list:
                    res_ref = '%s-%s' % (dataset_name, res_list.index(res))
                    res_to_update = resources_by_id.get(res['id'])
                    if res_to_update:
                        res_changed = False
                        for key in res.keys():
                            if res[key] != res_to_update.get(key):
                                res_to_update[key] = res[key]
                                dataset_changed = True
                                res_changed = True
                        if res_changed:
                            stats_write_res.add(
                                'update - ok' + write_caveat, res_ref)
                        else:
                            stats_write_res.add(
                                'update - not needed', res_ref)
                    else:
                        stats_write_res.add(
                            'update - could not find resource id', dataset_name)
                if dataset_changed:
                    if write:
                        ckan.action.package_update(**dataset)
                    stats_write_dataset.add(
                        'Update done' + write_caveat, dataset_name)
                else:
                    stats_write_dataset.add(
                        'Update not needed', dataset_name)

            print 'Merging datasets'
            for revamped_dataset_name, res_list in \
                    resources_to_merge.iteritems():
                try:
                    dataset = ckan.action.package_show(id=revamped_dataset_name)
                except ckanapi.NotFound:
                    stats_write_dataset.add(
                        'Merge - dataset not found', revamped_dataset_name)
                    continue
                existing_res_urls = set(r['url'] for r in dataset['resources'])
                dataset_changed = False
                for res in res_list:
                    res_ref = '%s-%s' % (revamped_dataset_name, res_list.index(res))
                    if res['url'] in existing_res_urls:
                        stats_write_res.add(
                            'merge - no change - resource URL already there',
                            res_ref)
                    else:
                        dataset_changed = True
                        res['description'] += ' (from legacy dataset)'
                        dataset['resources'].append(res)
                        stats_write_res.add(
                            'merge - add' + write_caveat, res_ref)
                if dataset_changed:
                    if write:
                        ckan.action.package_update(**dataset)
                    stats_write_dataset.add(
                        'Merge done' + write_caveat, revamped_dataset_name)
                else:
                    stats_write_dataset.add('Merge not needed', revamped_dataset_name)

            print 'Deleting resources'
            for dataset_name, res_id_list in \
                    all_resource_ids_to_delete.iteritems():
                if dataset_name in dataset_names_to_delete:
                    stats_write_dataset.add(
                        'Delete resources not needed as deleting dataset later',
                        dataset_name)
                    continue
                try:
                    dataset = ckan.action.package_show(id=dataset_name)
                except ckanapi.NotFound:
                    stats_write_dataset.add(
                        'Delete res - dataset not found', dataset_name)
                    continue
                existing_resources = \
                    dict((r['id'], r) for r in dataset['resources'])
                dataset_changed = False
                for res_id in res_id_list:
                    res_ref = '%s-%s' % (dataset_name, res_id_list.index(res_id))
                    existing_resource = existing_resources.get(res_id)
                    if existing_resource:
                        dataset_changed = True
                        dataset['resources'].remove(existing_resource)
                        stats_write_res.add(
                            'delete res - done' + write_caveat, res_ref)
                    else:
                        stats_write_res.add(
                            'delete res - could not find res id', res_ref)
                if dataset_changed:
                    if write:
                        ckan.action.package_update(**dataset)
                    stats_write_dataset.add(
                        'Delete res done' + write_caveat, dataset_name)
                else:
                    stats_write_dataset.add(
                        'Delete res not needed', dataset_name)

            print 'Deleting datasets'
            for dataset_name in dataset_names_to_delete:
                try:
                    dataset = ckan.action.package_show(id=dataset_name)
                except ckanapi.NotFound:
                    stats_write_dataset.add(
                        'Delete dataset - not found', dataset_name)
                else:
                    if write:
                        ckan.action.package_delete(id=dataset_name)
                    stats_write_dataset.add(
                        'Delete dataset - done' + write_caveat, dataset_name)

            print '\nResources\n', stats_write_res
            print '\nDatasets\n', stats_write_dataset
        else:
            print 'Not written changes to datasets'
    except:
        traceback.print_exc()
        import pdb; pdb.set_trace()
def main(source,
         source_type,
         destination,
         save_relevant_datasets_json,
         write,
         dataset_filter=None,
         res_url_filter=None):

    if source_type == 'json':
        all_datasets = get_datasets_from_json(source)
    elif source_type == 'jsonl':
        all_datasets = get_datasets_from_jsonl(source)
    else:
        all_datasets = get_datasets_from_ckan(source)

    datasets = []  # legacy ones
    revamped_datasets = []  # ones created on 3rd October 2016 launch
    revamped_datasets_by_org = {}
    revamped_resources = {}
    csv_out_rows = []
    csv_corrected_rows = []
    try:
        # find all the legacy organogram datasets
        all_datasets = list(all_datasets)  # since we need to iterate it twice
        for dataset in all_datasets:

            if dataset_filter and dataset['name'] != dataset_filter:
                continue
            if res_url_filter and \
                res_url_filter not in [r['url'] for r in dataset['resources']]:
                continue

            # check it an organogram dataset
            dataset_str = repr(dataset).lower()
            if 'rganog' not in dataset_str \
                    and 'roles and salaries' not in dataset_str \
                    and 'pay and post' not in dataset_str \
                    and 'posts and pay' not in dataset_str \
                    and 'organisation chart' not in dataset_str \
                    and 'organization chart' not in dataset_str \
                    and 'org chart' not in dataset_str:
                stats_datasets.add('Ignored - not organograms',
                                   dataset['name'])
                continue
            if dataset['name'] in (
                    'eastbourne-borough-council-public-toilets',
                    'staff-organograms-and-pay-government-offices',
                    ) \
                    or dataset['id'] in (
                        '47f69ebb-9939-419f-880d-1b976676cb0e',
                    ):
                stats_datasets.add('Ignored - not organograms',
                                   dataset['name'])
                continue
            if asbool(dataset.get('unpublished')):
                stats_datasets.add('Ignored - unpublished', dataset['name'])
                continue
            extras = dict(
                (extra['key'], extra['value']) for extra in dataset['extras'])
            if extras.get('import_source') == 'organograms_v2':
                continue
            if extras.get('import_source') == 'harvest':
                stats_datasets.add('Ignored - harvested so can\'t edit it',
                                   dataset['name'])
                continue

            # legacy dataset
            datasets.append(dataset)

        # find the revamped organogram datasets
        for dataset in all_datasets:
            extras = dict(
                (extra['key'], extra['value']) for extra in dataset['extras'])
            if extras.get('import_source') != 'organograms_v2':
                continue

            org_id = dataset['owner_org']
            revamped_datasets.append(dataset)
            assert org_id not in revamped_datasets_by_org, org_id
            revamped_datasets_by_org[org_id] = dataset
            for res in dataset['resources']:
                date = date_to_year_month(res['date'])
                revamped_resources[(org_id, date)] = res
            continue

        if save_relevant_datasets_json:
            filename = 'datasets_organograms.json'
            if not (dataset_filter or res_url_filter):
                output = json.dumps(
                    datasets + revamped_datasets,
                    indent=4,
                    separators=(',', ': '),  # pretty print)
                )
                with open(filename, 'wb') as f:
                    f.write(output)
                print 'Written %s' % filename
            else:
                print 'Not written %s because you filtered by a ' \
                    'dataset/resource' % filename

        all_resource_ids_to_delete = defaultdict(
            list)  # dataset_name: res_id_list
        dataset_names_to_delete = set()
        for dataset in datasets:
            org_id = dataset['owner_org']

            # save csv as it has been
            save_csv_rows(csv_out_rows, dataset, None, None)

            original_dataset = copy.deepcopy(dataset)
            delete_dataset = False

            dataset_to_merge_to = \
                get_dataset_to_merge_to(dataset, revamped_datasets_by_org)

            # detect dates
            for res in dataset['resources']:
                if res_url_filter and res['url'] != res_url_filter:
                    continue
                stats = timeseries_convert.add_date_to_resource(
                    res, dataset=dataset)

            # resource corrections
            resources_to_delete = []
            for res in dataset['resources']:
                if res_url_filter and res['url'] != res_url_filter:
                    continue
                resource_corrections(res, dataset, extras, revamped_resources,
                                     revamped_datasets_by_org,
                                     dataset_to_merge_to, org_id,
                                     resources_to_delete, stats_res)
            for res in resources_to_delete:
                dataset['resources'].remove(res)
            if not dataset['resources']:
                delete_dataset = True
            elif resources_to_delete and not dataset_to_merge_to:
                all_resource_ids_to_delete[dataset['name']].extend(
                    res['id'] for res in resources_to_delete)
            org_id = dataset['owner_org']  # it might have changed

            for res in dataset['resources']:
                if res_url_filter and res['url'] != res_url_filter:
                    continue
                if res.get('resource_type') != 'documentation' and not res.get(
                        'date'):
                    stats_dates.add('Missing date', dataset['name'])
                    break
            else:
                stats_dates.add('Ok dates', dataset['name'])

            # record changes
            if delete_dataset:
                stats_datasets.add('Delete dataset - no resources',
                                   dataset['name'])
                dataset_names_to_delete.add(dataset['name'])
                continue
            elif original_dataset != dataset:
                stats_datasets.add('Updated dataset', dataset['name'])
                has_changed = True
            else:
                stats_datasets.add('Unchanged dataset', dataset['name'])
                has_changed = False

            if dataset_to_merge_to:
                stats_merge.add('Merge', dataset_to_merge_to)
            else:
                stats_merge.add('No merge', dataset['name'])

            # save csv with corrections
            save_csv_rows(csv_corrected_rows, dataset, has_changed,
                          dataset_to_merge_to)

    except:
        traceback.print_exc()
        import pdb
        pdb.set_trace()

    stats_merge.report_value_limit = 500
    stats_res.report_value_limit = 500
    print '\nDatasets\n', stats_datasets
    print '\nDataset merges\n', stats_merge
    print '\nDates\n', stats_dates
    print '\nResources\n', stats_res

    # save csvs
    if dataset_filter or res_url_filter:
        for row in csv_corrected_rows:
            if res_url_filter and row['res_url'] != res_url_filter:
                continue
            pprint(row)
        print 'Not written csv because you specified a particular dataset'
    else:
        headers = [
            'name',
            'org_title',
            'org_id',
            'notes',
            'res_id',
            'res_name',
            'res_url',
            'res_format',
            'res_date',
            'res_type',
            'has_changed',
            'merge_to_dataset',
        ]
        for csv_rows, out_filename in (
            (csv_out_rows, 'organogram_legacy_datasets.csv'),
            (csv_corrected_rows, 'organogram_legacy_datasets_corrected.csv'),
        ):
            with open(out_filename, 'wb') as csv_write_file:
                csv_writer = unicodecsv.DictWriter(csv_write_file,
                                                   fieldnames=headers,
                                                   encoding='utf-8')
                csv_writer.writeheader()
                for row in sorted(csv_rows, key=lambda r: r['res_url']):
                    csv_writer.writerow(row)
            print 'Written', out_filename

    # group merges by the revamped_dataset
    resources_to_merge = defaultdict(
        list)  # revamped_dataset_name: resource_list
    resources_to_update = defaultdict(list)  # dataset_name: resource_list
    for row in csv_corrected_rows:
        if row['has_changed'] is False:
            continue
        res = dict(
            id=row['res_id'],
            description=row['res_name'],  # description is required
            url=row['res_url'],
            format=row['res_format'],
            date=row['res_date'],
            resource_type=row['res_type'])
        if row['merge_to_dataset']:
            res['id'] = None  # ignore the id
            resources_to_merge[row['merge_to_dataset']].append(res)
            # also delete the merged dataset
            if row['name'] not in dataset_names_to_delete:
                dataset_names_to_delete.add(row['name'])
        else:
            resources_to_update[row['name']].append(res)

    # write changes - merges etc
    try:
        if destination:
            if write:
                write_caveat = ''
            else:
                write_caveat = ' (NOP without --write)'
            print 'Writing changes to datasets' + write_caveat
            stats_write_res = Stats()
            stats_write_dataset = Stats()
            ckan = common.get_ckanapi(destination)
            import ckanapi

            print 'Updating datasets'
            for dataset_name, res_list in resources_to_update.iteritems():
                dataset = ckan.action.package_show(id=dataset_name)
                resources_by_id = dict(
                    (r['id'], r) for r in dataset['resources'])
                dataset_changed = False
                for res in res_list:
                    res_ref = '%s-%s' % (dataset_name, res_list.index(res))
                    res_to_update = resources_by_id.get(res['id'])
                    if res_to_update:
                        res_changed = False
                        for key in res.keys():
                            if res[key] != res_to_update.get(key):
                                res_to_update[key] = res[key]
                                dataset_changed = True
                                res_changed = True
                        if res_changed:
                            stats_write_res.add('update - ok' + write_caveat,
                                                res_ref)
                        else:
                            stats_write_res.add('update - not needed', res_ref)
                    else:
                        stats_write_res.add(
                            'update - could not find resource id',
                            dataset_name)
                if dataset_changed:
                    if write:
                        ckan.action.package_update(**dataset)
                    stats_write_dataset.add('Update done' + write_caveat,
                                            dataset_name)
                else:
                    stats_write_dataset.add('Update not needed', dataset_name)

            print 'Merging datasets'
            for revamped_dataset_name, res_list in \
                    resources_to_merge.iteritems():
                try:
                    dataset = ckan.action.package_show(
                        id=revamped_dataset_name)
                except ckanapi.NotFound:
                    stats_write_dataset.add('Merge - dataset not found',
                                            revamped_dataset_name)
                    continue
                existing_res_urls = set(r['url'] for r in dataset['resources'])
                dataset_changed = False
                for res in res_list:
                    res_ref = '%s-%s' % (revamped_dataset_name,
                                         res_list.index(res))
                    if res['url'] in existing_res_urls:
                        stats_write_res.add(
                            'merge - no change - resource URL already there',
                            res_ref)
                    else:
                        dataset_changed = True
                        res['description'] += ' (from legacy dataset)'
                        dataset['resources'].append(res)
                        stats_write_res.add('merge - add' + write_caveat,
                                            res_ref)
                if dataset_changed:
                    if write:
                        ckan.action.package_update(**dataset)
                    stats_write_dataset.add('Merge done' + write_caveat,
                                            revamped_dataset_name)
                else:
                    stats_write_dataset.add('Merge not needed',
                                            revamped_dataset_name)

            print 'Deleting resources'
            for dataset_name, res_id_list in \
                    all_resource_ids_to_delete.iteritems():
                if dataset_name in dataset_names_to_delete:
                    stats_write_dataset.add(
                        'Delete resources not needed as deleting dataset later',
                        dataset_name)
                    continue
                try:
                    dataset = ckan.action.package_show(id=dataset_name)
                except ckanapi.NotFound:
                    stats_write_dataset.add('Delete res - dataset not found',
                                            dataset_name)
                    continue
                existing_resources = \
                    dict((r['id'], r) for r in dataset['resources'])
                dataset_changed = False
                for res_id in res_id_list:
                    res_ref = '%s-%s' % (dataset_name,
                                         res_id_list.index(res_id))
                    existing_resource = existing_resources.get(res_id)
                    if existing_resource:
                        dataset_changed = True
                        dataset['resources'].remove(existing_resource)
                        stats_write_res.add('delete res - done' + write_caveat,
                                            res_ref)
                    else:
                        stats_write_res.add(
                            'delete res - could not find res id', res_ref)
                if dataset_changed:
                    if write:
                        ckan.action.package_update(**dataset)
                    stats_write_dataset.add('Delete res done' + write_caveat,
                                            dataset_name)
                else:
                    stats_write_dataset.add('Delete res not needed',
                                            dataset_name)

            print 'Deleting datasets'
            for dataset_name in dataset_names_to_delete:
                try:
                    dataset = ckan.action.package_show(id=dataset_name)
                except ckanapi.NotFound:
                    stats_write_dataset.add('Delete dataset - not found',
                                            dataset_name)
                else:
                    if write:
                        ckan.action.package_delete(id=dataset_name)
                    stats_write_dataset.add(
                        'Delete dataset - done' + write_caveat, dataset_name)

            print '\nResources\n', stats_write_res
            print '\nDatasets\n', stats_write_dataset
        else:
            print 'Not written changes to datasets'
    except:
        traceback.print_exc()
        import pdb
        pdb.set_trace()