예제 #1
0
def test_dataset_get_by_name(ckan_client_hl):
    client = ckan_client_hl
    dataset_dict = generate_dataset()
    dataset_dict['name'] = 'example-dataset-name'
    dataset = CkanDataset(dataset_dict)
    created = client.create_dataset(dataset)
    assert created.is_equivalent(dataset)
    dataset_id = created.id

    # Try getting by id
    dataset_1 = client.get_dataset(dataset_id)
    assert created == dataset_1

    # Try getting by name
    dataset_2 = client.get_dataset_by_name('example-dataset-name')
    assert created == dataset_2

    # Try getting by id, but passing name instead
    with pytest.raises(HTTPError) as excinfo:
        client.get_dataset('example-dataset-name')
    assert excinfo.value.status_code == 404

    # Try getting by name, but passing id instead
    with pytest.raises(HTTPError) as excinfo:
        client.get_dataset_by_name(dataset_id)
    assert excinfo.value.status_code == 404
예제 #2
0
def test_ckandataset_creation():
    dataset = CkanDataset({
        'name': 'example-dataset',
        'title': 'Example Dataset',
        'author': 'Foo Bar',
        'author_email': '*****@*****.**',
        'extras': {
            'foo': 'bar',
            'baz': 'SPAM!'
        },
        'groups': ['one', 'two', 'three'],
    })
    assert dataset.name == 'example-dataset'
    assert dataset.title == 'Example Dataset'
    assert dataset.groups == set(['one', 'two', 'three'])
    assert dataset.extras == {'foo': 'bar', 'baz': 'SPAM!'}

    assert isinstance(dataset.resources, ResourcesList)
    assert len(dataset.resources) == 0

    # The order of groups doesn't matter..
    _serialized = dataset.serialize()
    assert sorted(_serialized.pop('groups')) == sorted(['one', 'two', 'three'])

    assert _serialized == {
        'id': None,
        'name': 'example-dataset',
        'title': 'Example Dataset',
        'author': 'Foo Bar',
        'author_email': '*****@*****.**',
        'license_id': '',
        'maintainer': '',
        'maintainer_email': '',
        'notes': '',
        'owner_org': '',
        'private': False,
        'state': 'active',
        'type': 'dataset',
        'url': '',
        'extras': {
            'foo': 'bar',
            'baz': 'SPAM!'
        },
        'resources': [],
        'tags': [],
    }
예제 #3
0
def test_dataset_update_base_fields(ckan_client_hl):
    client = ckan_client_hl  # shortcut
    ckp = MutableCheckpoint()  # to check objects mutation

    # Create our dataset
    dataset_dict = generate_dataset()
    ckp.add(dataset_dict)

    dataset = CkanDataset(generate_dataset())
    dataset.author = 'original author'
    dataset.author_email = '*****@*****.**'
    dataset.license_id = 'cc-zero'
    created = client.create_dataset(dataset)

    # Store a copy of the original dataset
    original_dataset = client.get_dataset(created.id)
    assert created.is_equivalent(original_dataset)
    ckp.add(original_dataset)

    # Update some base fields, send back & check
    to_be_updated = copy.deepcopy(original_dataset)
    to_be_updated.author = 'NEW_AUTHOR'
    to_be_updated.author_email = 'NEW_AUTHOR_EMAIL'
    to_be_updated.license_id = 'cc-by-sa'
    assert to_be_updated.is_modified()

    # Update, get back, check
    updated = client.update_dataset(to_be_updated)
    updated_2 = client.get_dataset(created.id)

    assert updated.is_equivalent(to_be_updated)
    assert updated.is_equivalent(updated_2)

    diffs = diff_mappings(
        original_dataset.serialize(),
        updated.serialize())
    assert diffs['differing'] == set([
        'author', 'author_email', 'license_id',
    ])
    assert diffs['left'] == set()
    assert diffs['right'] == set()

    # Make sure dicts did not mutate
    ckp.check()
예제 #4
0
def test_dataset_update_base_fields(ckan_client_hl):
    client = ckan_client_hl  # shortcut
    ckp = MutableCheckpoint()  # to check objects mutation

    # Create our dataset
    dataset_dict = generate_dataset()
    ckp.add(dataset_dict)

    dataset = CkanDataset(generate_dataset())
    dataset.author = 'original author'
    dataset.author_email = '*****@*****.**'
    dataset.license_id = 'cc-zero'
    created = client.create_dataset(dataset)

    # Store a copy of the original dataset
    original_dataset = client.get_dataset(created.id)
    assert created.is_equivalent(original_dataset)
    ckp.add(original_dataset)

    # Update some base fields, send back & check
    to_be_updated = copy.deepcopy(original_dataset)
    to_be_updated.author = 'NEW_AUTHOR'
    to_be_updated.author_email = 'NEW_AUTHOR_EMAIL'
    to_be_updated.license_id = 'cc-by-sa'
    assert to_be_updated.is_modified()

    # Update, get back, check
    updated = client.update_dataset(to_be_updated)
    updated_2 = client.get_dataset(created.id)

    assert updated.is_equivalent(to_be_updated)
    assert updated.is_equivalent(updated_2)

    diffs = diff_mappings(original_dataset.serialize(), updated.serialize())
    assert diffs['differing'] == set([
        'author',
        'author_email',
        'license_id',
    ])
    assert diffs['left'] == set()
    assert diffs['right'] == set()

    # Make sure dicts did not mutate
    ckp.check()
예제 #5
0
    def take_action(self, parsed_args):
        client = self._get_client(parsed_args)
        raw_data = self._read_file(parsed_args.filename)
        dataset_json = json.loads(raw_data)

        # Load dataset from file
        dataset = CkanDataset(dataset_json)

        # todo: we need to check whether this dataset exists
        #       -> try getting and check..
        dataset.id = None

        dataset.owner_org = None  # todo: fill this
        dataset.groups = []  # todo: fill this

        for resource in dataset.resources:
            resource.id = None

        created = client.create_dataset(dataset)
        self.app.stdout.write(json.dumps(created.serialize()))
def test_ckandataset_creation():
    dataset = CkanDataset({
        'name': 'example-dataset',
        'title': 'Example Dataset',
        'author': 'Foo Bar',
        'author_email': '*****@*****.**',
        'extras': {'foo': 'bar', 'baz': 'SPAM!'},
        'groups': ['one', 'two', 'three'],
    })
    assert dataset.name == 'example-dataset'
    assert dataset.title == 'Example Dataset'
    assert dataset.groups == set(['one', 'two', 'three'])
    assert dataset.extras == {'foo': 'bar', 'baz': 'SPAM!'}

    assert isinstance(dataset.resources, ResourcesList)
    assert len(dataset.resources) == 0

    # The order of groups doesn't matter..
    _serialized = dataset.serialize()
    assert sorted(_serialized.pop('groups')) == sorted(['one', 'two', 'three'])

    assert _serialized == {
        'id': None,
        'name': 'example-dataset',
        'title': 'Example Dataset',
        'author': 'Foo Bar',
        'author_email': '*****@*****.**',
        'license_id': '',
        'maintainer': '',
        'maintainer_email': '',
        'notes': '',
        'owner_org': '',
        'private': False,
        'state': 'active',
        'type': 'dataset',
        'url': '',
        'extras': {'foo': 'bar', 'baz': 'SPAM!'},
        'resources': [],
        'tags': [],
    }
예제 #7
0
def test_dataset_update_resources(ckan_client_hl):
    client = ckan_client_hl  # shortcut

    ds_dict = generate_dataset()
    ds_dict['resources'] = [
        {
            'name': 'example-csv-1',
            'url': 'http://example.com/dataset-1.csv',
            'format': 'CSV'
        },
        {
            'name': 'example-json-1',
            'url': 'http://example.com/dataset-1.json',
            'format': 'JSON'
        },
    ]
    stage_1pre = CkanDataset(ds_dict)
    stage_1 = client.create_dataset(stage_1pre)

    # --------------------------------------------------
    # Try adding a new resource

    stage_2pre = client.get_dataset(stage_1.id)
    stage_2pre.resources.append({
        'name': 'example-csv-2',
        'url': 'http://example.com/dataset-2.csv',
        'format': 'CSV'
    })

    assert len(stage_2pre.resources) == 3
    assert len(stage_2pre.serialize()['resources']) == 3

    stage_2 = client.update_dataset(stage_2pre)
    assert len(stage_2.resources) == 3
    assert len(stage_2.serialize()['resources']) == 3

    # --------------------------------------------------
    # Try prepending adding a new resource

    stage_3pre = client.get_dataset(stage_1.id)
    stage_3pre.resources.insert(0, {
        'url': 'http://example.com/dataset-2.json',
        'format': 'JSON'
    })

    assert len(stage_3pre.resources) == 4
    assert len(stage_3pre.serialize()['resources']) == 4

    stage_3 = client.update_dataset(stage_3pre)
    assert len(stage_3.resources) == 4
    assert len(stage_3.serialize()['resources']) == 4
예제 #8
0
def test_dataset_update_extras(ckan_client_hl):
    client = ckan_client_hl  # shortcut

    ds_dict = generate_dataset()
    ds_dict['extras'] = {
        'key-0': 'value-0',
        'key-1': 'value-1',
        'key-2': 'value-2',
        'key-3': 'value-3',
        'key-4': 'value-4',
        'key-5': 'value-5',
        'key-6': 'value-6',
        'key-7': 'value-7',
        'key-8': 'value-8',
        'key-9': 'value-9',
    }
    stage_1pre = CkanDataset(ds_dict)
    stage_1 = client.create_dataset(stage_1pre)

    # --------------------------------------------------
    # Try adding a new record

    stage_1b = client.get_dataset(stage_1.id)
    stage_2pre = copy.deepcopy(stage_1b)
    stage_2pre.extras['NEW_FIELD_NAME'] = 'NEW_FIELD_VALUE'

    stage_2 = client.update_dataset(stage_2pre)
    assert stage_2.is_equivalent(client.get_dataset(stage_1.id))
    diffs = diff_mappings(stage_1b.serialize(), stage_2.serialize())
    assert diffs['left'] == diffs['right'] == set()
    assert diffs['differing'] == set(['extras'])

    del stage_1b, stage_2pre, stage_2, diffs

    # --------------------------------------------------
    # Try removing the custom field

    stage_2pre = client.get_dataset(stage_1.id)
    del stage_2pre.extras['NEW_FIELD_NAME']

    stage_2 = client.update_dataset(stage_2pre)
    assert stage_2.is_equivalent(client.get_dataset(stage_1.id))
    assert 'NEW_FIELD_NAME' not in stage_2.extras
    stage_2b = client.get_dataset(stage_1.id)
    assert stage_2 == stage_2b

    # Make sure we brought it back to its original state
    assert stage_1.is_equivalent(stage_2)

    del stage_2pre, stage_2
def test_ckan_dataset_resources():
    dataset = CkanDataset({
        'name': 'example-dataset',
    })
    assert dataset.is_modified() is False

    # By asking for resources, a copy will be made,
    # but the two items should match..
    assert isinstance(dataset.resources, ResourcesList)
    assert len(dataset.resources) == 0
    assert dataset.is_modified() is False

    # Resources can be passed as normal objects and
    # will be converted to CkanResource() objects.
    dataset.resources = [
        {'name': 'resource-1'},
        {'name': 'resource-2'},
    ]

    # Make sure type conversions have been applied
    assert isinstance(dataset.resources, ResourcesList)
    for item in dataset.resources:
        assert isinstance(item, CkanResource)

    # Make sure dataset is marked as modified
    assert dataset.is_modified() is True

    # We allow comparison to plain objects
    assert dataset.resources == [
        {'name': 'resource-1'},
        {'name': 'resource-2'},
    ]

    # Or to the actual types used internally, of course
    assert dataset.resources == ResourcesList([
        CkanResource({'name': 'resource-1'}),
        CkanResource({'name': 'resource-2'}),
    ])

    # Do some tests for object serialization
    serialized = dataset.serialize()

    assert isinstance(serialized['resources'], list)
    assert len(serialized['resources']) == 2

    assert isinstance(serialized['resources'][0], dict)
    assert serialized['resources'][0]['name'] == 'resource-1'

    assert isinstance(serialized['resources'][1], dict)
    assert serialized['resources'][1]['name'] == 'resource-2'

    # Serialized data must be json-serializable
    json.dumps(serialized)
예제 #10
0
def test_dataset_wipe(ckan_client_hl):
    client = ckan_client_hl

    # ------------------------------------------------------------
    # Now delete normally and try inserting another
    # one with the same name. Should fail with 409

    dataset = CkanDataset(generate_dataset())
    dataset.name = 'dataset-to-delete'

    created = client.create_dataset(dataset)
    assert created.is_equivalent(dataset)

    client.delete_dataset(created.id)

    new_dataset = CkanDataset(generate_dataset())
    new_dataset.name = 'dataset-to-delete'

    with pytest.raises(HTTPError) as excinfo:
        client.create_dataset(new_dataset)
    assert excinfo.value.status_code == 409

    del dataset, created, new_dataset, excinfo

    # ------------------------------------------------------------
    # Now let's try updating + deleting

    dataset = CkanDataset(generate_dataset())
    dataset.name = 'dataset-to-delete-2'

    created = client.create_dataset(dataset)
    assert created.is_equivalent(dataset)

    client.wipe_dataset(created.id)

    new_dataset = CkanDataset(generate_dataset())
    new_dataset.name = 'dataset-to-delete-2'

    # Should not fail anymore
    created = client.create_dataset(new_dataset)
    assert created.name == 'dataset-to-delete-2'
예제 #11
0
def test_dataset_delete(ckan_client_hl):
    client = ckan_client_hl

    dataset_dict = generate_dataset()
    dataset = CkanDataset(dataset_dict)

    created = client.create_dataset(dataset)
    assert created.is_equivalent(dataset)

    # Make sure it is in lists
    assert created.id in client.list_datasets()

    # Delete it
    client.delete_dataset(created.id)
    assert created.id not in client.list_datasets()

    # Test that our workarounds work as expected..

    with pytest.raises(HTTPError) as excinfo:
        client.get_dataset(created.id)
    assert excinfo.value.status_code == 404

    retrieved = client.get_dataset(created.id, allow_deleted=True)
    assert retrieved.state == 'deleted'
예제 #12
0
 def add_extras():
     ds = CkanDataset()
     ds.extras = {'KEY': 'VALUE'}
     return ds
예제 #13
0
 def repl_extras():
     ds = CkanDataset({'extras': {'KEY': 'ORIGINAL-VALUE'}})
     ds.extras = {'KEY': 'VALUE'}
     return ds
예제 #14
0
 def set_extras():
     ds = CkanDataset()
     ds.extras['KEY'] = 'VALUE'
     return ds
예제 #15
0
 def upd_extras():
     ds = CkanDataset({'extras': {'KEY': 'ORIGINAL-VALUE'}})
     ds.extras['KEY'] = 'VALUE'
     return ds
예제 #16
0
def test_dataset_create(ckan_client_hl):
    client = ckan_client_hl
    dataset_dict = generate_dataset()
    dataset = CkanDataset(dataset_dict)
    created = client.create_dataset(dataset)
    assert created.is_equivalent(dataset)
예제 #17
0
 def init_extras():
     return CkanDataset({'extras': {'KEY': 'VALUE'}})
 def set_extras():
     ds = CkanDataset()
     ds.extras['KEY'] = 'VALUE'
     return ds
예제 #19
0
def test_ckan_dataset():
    raw_data = {
        'id': 'dataset-1',
        'author': 'DATASET-AUTHOR',
        'author_email': 'DATASET-AUTHOR_EMAIL',
        'license_id': 'DATASET-LICENSE_ID',
        'maintainer': 'DATASET-MAINTAINER',
        'maintainer_email': 'DATASET-MAINTAINER_EMAIL',
        'name': 'DATASET-NAME',
        'notes': 'DATASET-NOTES',
        'owner_org': 'DATASET-OWNER_ORG',
        'private': 'DATASET-PRIVATE',
        'state': 'DATASET-STATE',
        'type': 'DATASET-TYPE',
        'url': 'DATASET-URL',
        'extras': {
            'EXTRA_KEY_1': 'EXTRA-VALUE-1',
            'EXTRA_KEY_2': 'EXTRA-VALUE-2',
            'EXTRA_KEY_3': 'EXTRA-VALUE-3',
        },
        'groups': ['GROUP-1', 'GROUP-2', 'GROUP-3'],
        'relationships': [],
        'resources': [
            {
                'id': 'resource-1',
                'description': 'RES1-DESCRIPTION',
                'format': 'RES1-FORMAT',
                'mimetype': 'RES1-MIMETYPE',
                'mimetype_inner': 'RES1-MIMETYPE_INNER',
                'name': 'RES1-NAME',
                'position': 'RES1-POSITION',
                'resource_type': 'RES1-RESOURCE_TYPE',
                'size': 'RES1-SIZE',
                'url': 'RES1-URL',
                'url_type': 'RES1-URL_TYPE',
            },
            {
                'id': 'resource-2',
                'description': 'RES2-DESCRIPTION',
                'format': 'RES2-FORMAT',
                'mimetype': 'RES2-MIMETYPE',
                'mimetype_inner': 'RES2-MIMETYPE_INNER',
                'name': 'RES2-NAME',
                'position': 'RES2-POSITION',
                'resource_type': 'RES2-RESOURCE_TYPE',
                'size': 'RES2-SIZE',
                'url': 'RES2-URL',
                'url_type': 'RES2-URL_TYPE',
            },
            {
                'id': 'resource-3',
                'description': 'RES3-DESCRIPTION',
                'format': 'RES3-FORMAT',
                'mimetype': 'RES3-MIMETYPE',
                'mimetype_inner': 'RES3-MIMETYPE_INNER',
                'name': 'RES3-NAME',
                'position': 'RES3-POSITION',
                'resource_type': 'RES3-RESOURCE_TYPE',
                'size': 'RES3-SIZE',
                'url': 'RES3-URL',
                'url_type': 'RES3-URL_TYPE',
            },
        ]
    }
    _raw_data = copy.deepcopy(raw_data)
    assert raw_data == _raw_data
    dataset = CkanDataset.from_dict(_raw_data)
    assert raw_data == _raw_data
    assert dataset.to_dict() == raw_data

    dataset.author = 'My author'
    assert dataset.is_modified()
    assert dataset.to_dict()['author'] == 'My author'

    # Create a new dataset
    dataset = CkanDataset.from_dict(raw_data)
    assert not dataset.is_modified()
    del dataset.resources[2]  # delete 'resource-3'
    assert dataset.is_modified()
    dataset.resources.append(CkanResource.from_dict({
        'id': 'resource-4',
        'description': 'RES4-DESCRIPTION',
        'format': 'RES4-FORMAT',
        'mimetype': 'RES4-MIMETYPE',
        'mimetype_inner': 'RES4-MIMETYPE_INNER',
        'name': 'RES4-NAME',
        'position': 'RES4-POSITION',
        'resource_type': 'RES4-RESOURCE_TYPE',
        'size': 'RES4-SIZE',
        'url': 'RES4-URL',
        'url_type': 'RES4-URL_TYPE',
    }))
    assert dataset.to_dict()['resources'] == [
        {
            'id': 'resource-1',
            'description': 'RES1-DESCRIPTION',
            'format': 'RES1-FORMAT',
            'mimetype': 'RES1-MIMETYPE',
            'mimetype_inner': 'RES1-MIMETYPE_INNER',
            'name': 'RES1-NAME',
            'position': 'RES1-POSITION',
            'resource_type': 'RES1-RESOURCE_TYPE',
            'size': 'RES1-SIZE',
            'url': 'RES1-URL',
            'url_type': 'RES1-URL_TYPE',
        },
        {
            'id': 'resource-2',
            'description': 'RES2-DESCRIPTION',
            'format': 'RES2-FORMAT',
            'mimetype': 'RES2-MIMETYPE',
            'mimetype_inner': 'RES2-MIMETYPE_INNER',
            'name': 'RES2-NAME',
            'position': 'RES2-POSITION',
            'resource_type': 'RES2-RESOURCE_TYPE',
            'size': 'RES2-SIZE',
            'url': 'RES2-URL',
            'url_type': 'RES2-URL_TYPE',
        },
        {
            'id': 'resource-4',
            'description': 'RES4-DESCRIPTION',
            'format': 'RES4-FORMAT',
            'mimetype': 'RES4-MIMETYPE',
            'mimetype_inner': 'RES4-MIMETYPE_INNER',
            'name': 'RES4-NAME',
            'position': 'RES4-POSITION',
            'resource_type': 'RES4-RESOURCE_TYPE',
            'size': 'RES4-SIZE',
            'url': 'RES4-URL',
            'url_type': 'RES4-URL_TYPE',
        },
    ]
예제 #20
0
def test_dataset_wipe(ckan_client_hl):
    client = ckan_client_hl

    # ------------------------------------------------------------
    # Now delete normally and try inserting another
    # one with the same name. Should fail with 409

    dataset = CkanDataset(generate_dataset())
    dataset.name = 'dataset-to-delete'

    created = client.create_dataset(dataset)
    assert created.is_equivalent(dataset)

    client.delete_dataset(created.id)

    new_dataset = CkanDataset(generate_dataset())
    new_dataset.name = 'dataset-to-delete'

    with pytest.raises(HTTPError) as excinfo:
        client.create_dataset(new_dataset)
    assert excinfo.value.status_code == 409

    del dataset, created, new_dataset, excinfo

    # ------------------------------------------------------------
    # Now let's try updating + deleting

    dataset = CkanDataset(generate_dataset())
    dataset.name = 'dataset-to-delete-2'

    created = client.create_dataset(dataset)
    assert created.is_equivalent(dataset)

    client.wipe_dataset(created.id)

    new_dataset = CkanDataset(generate_dataset())
    new_dataset.name = 'dataset-to-delete-2'

    # Should not fail anymore
    created = client.create_dataset(new_dataset)
    assert created.name == 'dataset-to-delete-2'
 def upd_extras():
     ds = CkanDataset({'extras': {'KEY': 'ORIGINAL-VALUE'}})
     ds.extras['KEY'] = 'VALUE'
     return ds
 def add_extras():
     ds = CkanDataset()
     ds.extras = {'KEY': 'VALUE'}
     return ds
 def repl_extras():
     ds = CkanDataset({'extras': {'KEY': 'ORIGINAL-VALUE'}})
     ds.extras = {'KEY': 'VALUE'}
     return ds
예제 #24
0
    "title": "Lure Dispenser comparison trial, thrips, Australia, Perth",
    "notes":
    "Assess three thrips Lure delivery mechanisms:\n\n* P Paint pen\n* D Deer wick\n* C Control (no wick)",
    "private": False,
    "owner_org": "plant-and-food-research-nz",
    "author": "Mette Nielson",
}

dataset_dict2 = {
    "name": "lure-dispenser-comparison-trial",
    "title": "Lure Dispenser comparison trial, thrips, Australia, Perth",
    "notes":
    "Assess three thrips Lure delivery mechanisms:\n\n* P Paint pen\n* D Deer wick\n* C Control (no wick)",
    "private": False,
    "owner_org": "plant-and-food-research-nz",
    "state": "active",
    "project_code": "P/1234",
    "author": "Mette Nielson",
    "project_leader_email": "*****@*****.**",
    "data_steward": "Mette Nielson",
    "data_steward_email": "*****@*****.**",
    "other_researcher": "David Teulon",
    "biometrician": "Ruth Butler",
    "credits": "Mel Walker",
    "license_id": "PFR Internal Use only"
}

new_dataset = client.create_dataset(CkanDataset(dataset_dict))

print(new_dataset)
예제 #25
0
def test_ckan_dataset_resources():
    dataset = CkanDataset({
        'name': 'example-dataset',
    })
    assert dataset.is_modified() is False

    # By asking for resources, a copy will be made,
    # but the two items should match..
    assert isinstance(dataset.resources, ResourcesList)
    assert len(dataset.resources) == 0
    assert dataset.is_modified() is False

    # Resources can be passed as normal objects and
    # will be converted to CkanResource() objects.
    dataset.resources = [
        {
            'name': 'resource-1'
        },
        {
            'name': 'resource-2'
        },
    ]

    # Make sure type conversions have been applied
    assert isinstance(dataset.resources, ResourcesList)
    for item in dataset.resources:
        assert isinstance(item, CkanResource)

    # Make sure dataset is marked as modified
    assert dataset.is_modified() is True

    # We allow comparison to plain objects
    assert dataset.resources == [
        {
            'name': 'resource-1'
        },
        {
            'name': 'resource-2'
        },
    ]

    # Or to the actual types used internally, of course
    assert dataset.resources == ResourcesList([
        CkanResource({'name': 'resource-1'}),
        CkanResource({'name': 'resource-2'}),
    ])

    # Do some tests for object serialization
    serialized = dataset.serialize()

    assert isinstance(serialized['resources'], list)
    assert len(serialized['resources']) == 2

    assert isinstance(serialized['resources'][0], dict)
    assert serialized['resources'][0]['name'] == 'resource-1'

    assert isinstance(serialized['resources'][1], dict)
    assert serialized['resources'][1]['name'] == 'resource-2'

    # Serialized data must be json-serializable
    json.dumps(serialized)
def test_ckandataset_resources_update():
    def _typecheck_resources(resources):
        assert isinstance(resources, ResourcesList)
        for item in resources:
            assert isinstance(item, CkanResource)

    dataset = CkanDataset({
        'name': 'example-dataset',
        'resources': [
            {'name': 'resource-1'},
            {'name': 'resource-2'},
        ]
    })
    assert dataset.is_modified() is False
    assert dataset.resources == [
        {'name': 'resource-1'},
        {'name': 'resource-2'},
    ]

    # Getting should not affect is_modified(), although
    # it is manipulating things internally..
    assert dataset.is_modified() is False

    dataset.resources.append({'name': 'resource-3'})
    assert dataset.is_modified() is True
    assert dataset.resources == [
        {'name': 'resource-1'},
        {'name': 'resource-2'},
        {'name': 'resource-3'},
    ]
    _typecheck_resources(dataset.resources)

    dataset.resources.insert(0, {'name': 'resource-0'})
    assert dataset.is_modified() is True
    assert dataset.resources == [
        {'name': 'resource-0'},
        {'name': 'resource-1'},
        {'name': 'resource-2'},
        {'name': 'resource-3'},
    ]
    _typecheck_resources(dataset.resources)

    dataset.resources[2] = {'name': 'RESOURCE-2'}
    assert dataset.is_modified() is True
    assert dataset.resources == [
        {'name': 'resource-0'},
        {'name': 'resource-1'},
        {'name': 'RESOURCE-2'},
        {'name': 'resource-3'},
    ]
    _typecheck_resources(dataset.resources)

    dataset.resources = [{'name': 'Hello'}]
    assert dataset.is_modified() is True
    assert dataset.resources == [
        {'name': 'Hello'},
    ]
    _typecheck_resources(dataset.resources)

    # "Contains" test is successful as fields left to
    # default values just get ignored during comparison.
    assert {'name': 'Hello'} in dataset.resources
    # assert {'name': 'WTF'} not in dataset.resources
    assert {'name': 'WTF, seriously'} not in dataset.resources
예제 #27
0
    def sync(self, source_name, data):
        """
        Synchronize data from a source into Ckan.

        - datasets are matched by _harvest_source
        - groups and organizations are matched by name

        :param source_name:
            String identifying the source of the data. Used to build
            ids that will be used in further synchronizations.
        :param data:
            Data to be synchronized. Should be a dict (or dict-like)
            with top level keys coresponding to the object type,
            mapping to dictionaries of ``{'id': <object>}``.
        """

        groups = dict(
            (key, CkanGroup(val))
            for key, val in data['group'].iteritems())

        organizations = dict(
            (key, CkanOrganization(val))
            for key, val in data['organization'].iteritems())

        # Upsert groups and organizations
        groups_map = self._upsert_groups(groups)
        orgs_map = self._upsert_organizations(organizations)

        # Create list of datasets to be synced
        source_datasets = {}
        for source_id, dataset_dict in data['dataset'].iteritems():
            _dataset_dict = copy.deepcopy(dataset_dict)

            # We need to make sure "source" datasets
            # don't have (otherwise misleading) ids
            _dataset_dict.pop('id', None)

            # We need to update groups and organizations,
            # to map their name from the source into a
            # ckan id
            _dataset_dict['groups'] = [
                groups_map.to_ckan(grp_id)
                for grp_id in _dataset_dict['groups']
            ]
            _dataset_dict['owner_org'] = \
                orgs_map.to_ckan(_dataset_dict['owner_org'])

            dataset = CkanDataset(_dataset_dict)

            # We also want to add the "source id", used for further
            # synchronizations to find stuff
            dataset.extras[HARVEST_SOURCE_ID_FIELD] = \
                self._join_source_id(source_name, source_id)

            source_datasets[source_id] = dataset

        # Retrieve list of datasets from Ckan
        ckan_datasets = self._find_datasets_by_source(source_name)

        # Compare collections to find differences
        differences = self._compare_collections(
            ckan_datasets, source_datasets)

        # ------------------------------------------------------------
        # We now need to create/update/delete datasets.

        # todo: we need to make sure dataset names are not
        # already used by another dataset. The only
        # way is to randomize resource names and hope
        # a 409 response indicates duplicate name..

        # We delete first, in order to (possibly) deallocate
        # some already-used names..
        for source_id in differences['left']:
            ckan_id = ckan_datasets[source_id].id
            logger.info('Deleting dataset {0}'.format(ckan_id))
            self._client.delete_dataset(ckan_id)

        def force_dataset_operation(operation, dataset, retry=5):
            # Maximum dataset name length is 100 characters
            # We trim it down to 80 just to be safe.

            # Note: we generally want to preserve the original name
            #       and there should *never* be problems with that
            #       when updating..

            _orig_name = dataset.name[:80]
            dataset.name = _orig_name

            while True:
                try:
                    result = operation(dataset)
                except HTTPError, e:
                    if e.status_code != 409:
                        raise
                    retry -= 1
                    if retry < 0:
                        raise
                    dataset.name = '{0}-{1:06d}'.format(
                        _orig_name,
                        random.randint(0, 999999))
                    logger.debug('Got 409: trying to rename dataset to {0}'
                                 .format(dataset.name))
                else:
                    return result
예제 #28
0
def test_ckandataset_resources_update():
    def _typecheck_resources(resources):
        assert isinstance(resources, ResourcesList)
        for item in resources:
            assert isinstance(item, CkanResource)

    dataset = CkanDataset({
        'name':
        'example-dataset',
        'resources': [
            {
                'name': 'resource-1'
            },
            {
                'name': 'resource-2'
            },
        ]
    })
    assert dataset.is_modified() is False
    assert dataset.resources == [
        {
            'name': 'resource-1'
        },
        {
            'name': 'resource-2'
        },
    ]

    # Getting should not affect is_modified(), although
    # it is manipulating things internally..
    assert dataset.is_modified() is False

    dataset.resources.append({'name': 'resource-3'})
    assert dataset.is_modified() is True
    assert dataset.resources == [
        {
            'name': 'resource-1'
        },
        {
            'name': 'resource-2'
        },
        {
            'name': 'resource-3'
        },
    ]
    _typecheck_resources(dataset.resources)

    dataset.resources.insert(0, {'name': 'resource-0'})
    assert dataset.is_modified() is True
    assert dataset.resources == [
        {
            'name': 'resource-0'
        },
        {
            'name': 'resource-1'
        },
        {
            'name': 'resource-2'
        },
        {
            'name': 'resource-3'
        },
    ]
    _typecheck_resources(dataset.resources)

    dataset.resources[2] = {'name': 'RESOURCE-2'}
    assert dataset.is_modified() is True
    assert dataset.resources == [
        {
            'name': 'resource-0'
        },
        {
            'name': 'resource-1'
        },
        {
            'name': 'RESOURCE-2'
        },
        {
            'name': 'resource-3'
        },
    ]
    _typecheck_resources(dataset.resources)

    dataset.resources = [{'name': 'Hello'}]
    assert dataset.is_modified() is True
    assert dataset.resources == [
        {
            'name': 'Hello'
        },
    ]
    _typecheck_resources(dataset.resources)

    # "Contains" test is successful as fields left to
    # default values just get ignored during comparison.
    assert {'name': 'Hello'} in dataset.resources
    # assert {'name': 'WTF'} not in dataset.resources
    assert {'name': 'WTF, seriously'} not in dataset.resources
예제 #29
0
def test_ckan_dataset():
    raw_data = {
        'id':
        'dataset-1',
        'author':
        'DATASET-AUTHOR',
        'author_email':
        'DATASET-AUTHOR_EMAIL',
        'license_id':
        'DATASET-LICENSE_ID',
        'maintainer':
        'DATASET-MAINTAINER',
        'maintainer_email':
        'DATASET-MAINTAINER_EMAIL',
        'name':
        'DATASET-NAME',
        'notes':
        'DATASET-NOTES',
        'owner_org':
        'DATASET-OWNER_ORG',
        'private':
        'DATASET-PRIVATE',
        'state':
        'DATASET-STATE',
        'type':
        'DATASET-TYPE',
        'url':
        'DATASET-URL',
        'extras': {
            'EXTRA_KEY_1': 'EXTRA-VALUE-1',
            'EXTRA_KEY_2': 'EXTRA-VALUE-2',
            'EXTRA_KEY_3': 'EXTRA-VALUE-3',
        },
        'groups': ['GROUP-1', 'GROUP-2', 'GROUP-3'],
        'relationships': [],
        'resources': [
            {
                'id': 'resource-1',
                'description': 'RES1-DESCRIPTION',
                'format': 'RES1-FORMAT',
                'mimetype': 'RES1-MIMETYPE',
                'mimetype_inner': 'RES1-MIMETYPE_INNER',
                'name': 'RES1-NAME',
                'position': 'RES1-POSITION',
                'resource_type': 'RES1-RESOURCE_TYPE',
                'size': 'RES1-SIZE',
                'url': 'RES1-URL',
                'url_type': 'RES1-URL_TYPE',
            },
            {
                'id': 'resource-2',
                'description': 'RES2-DESCRIPTION',
                'format': 'RES2-FORMAT',
                'mimetype': 'RES2-MIMETYPE',
                'mimetype_inner': 'RES2-MIMETYPE_INNER',
                'name': 'RES2-NAME',
                'position': 'RES2-POSITION',
                'resource_type': 'RES2-RESOURCE_TYPE',
                'size': 'RES2-SIZE',
                'url': 'RES2-URL',
                'url_type': 'RES2-URL_TYPE',
            },
            {
                'id': 'resource-3',
                'description': 'RES3-DESCRIPTION',
                'format': 'RES3-FORMAT',
                'mimetype': 'RES3-MIMETYPE',
                'mimetype_inner': 'RES3-MIMETYPE_INNER',
                'name': 'RES3-NAME',
                'position': 'RES3-POSITION',
                'resource_type': 'RES3-RESOURCE_TYPE',
                'size': 'RES3-SIZE',
                'url': 'RES3-URL',
                'url_type': 'RES3-URL_TYPE',
            },
        ]
    }
    _raw_data = copy.deepcopy(raw_data)
    assert raw_data == _raw_data
    dataset = CkanDataset.from_dict(_raw_data)
    assert raw_data == _raw_data
    assert dataset.to_dict() == raw_data

    dataset.author = 'My author'
    assert dataset.is_modified()
    assert dataset.to_dict()['author'] == 'My author'

    # Create a new dataset
    dataset = CkanDataset.from_dict(raw_data)
    assert not dataset.is_modified()
    del dataset.resources[2]  # delete 'resource-3'
    assert dataset.is_modified()
    dataset.resources.append(
        CkanResource.from_dict({
            'id': 'resource-4',
            'description': 'RES4-DESCRIPTION',
            'format': 'RES4-FORMAT',
            'mimetype': 'RES4-MIMETYPE',
            'mimetype_inner': 'RES4-MIMETYPE_INNER',
            'name': 'RES4-NAME',
            'position': 'RES4-POSITION',
            'resource_type': 'RES4-RESOURCE_TYPE',
            'size': 'RES4-SIZE',
            'url': 'RES4-URL',
            'url_type': 'RES4-URL_TYPE',
        }))
    assert dataset.to_dict()['resources'] == [
        {
            'id': 'resource-1',
            'description': 'RES1-DESCRIPTION',
            'format': 'RES1-FORMAT',
            'mimetype': 'RES1-MIMETYPE',
            'mimetype_inner': 'RES1-MIMETYPE_INNER',
            'name': 'RES1-NAME',
            'position': 'RES1-POSITION',
            'resource_type': 'RES1-RESOURCE_TYPE',
            'size': 'RES1-SIZE',
            'url': 'RES1-URL',
            'url_type': 'RES1-URL_TYPE',
        },
        {
            'id': 'resource-2',
            'description': 'RES2-DESCRIPTION',
            'format': 'RES2-FORMAT',
            'mimetype': 'RES2-MIMETYPE',
            'mimetype_inner': 'RES2-MIMETYPE_INNER',
            'name': 'RES2-NAME',
            'position': 'RES2-POSITION',
            'resource_type': 'RES2-RESOURCE_TYPE',
            'size': 'RES2-SIZE',
            'url': 'RES2-URL',
            'url_type': 'RES2-URL_TYPE',
        },
        {
            'id': 'resource-4',
            'description': 'RES4-DESCRIPTION',
            'format': 'RES4-FORMAT',
            'mimetype': 'RES4-MIMETYPE',
            'mimetype_inner': 'RES4-MIMETYPE_INNER',
            'name': 'RES4-NAME',
            'position': 'RES4-POSITION',
            'resource_type': 'RES4-RESOURCE_TYPE',
            'size': 'RES4-SIZE',
            'url': 'RES4-URL',
            'url_type': 'RES4-URL_TYPE',
        },
    ]
    def sync(self, source_name, data):
        """
        Synchronize data from a source into Ckan.

        - datasets are matched by _harvest_source
        - groups and organizations are matched by name

        :param source_name:
            String identifying the source of the data. Used to build
            ids that will be used in further synchronizations.
        :param data:
            Data to be synchronized. Should be a dict (or dict-like)
            with top level keys coresponding to the object type,
            mapping to dictionaries of ``{'id': <object>}``.
        """

        groups = dict(
            (key, CkanGroup(val))
            for key, val in data['group'].iteritems())

        organizations = dict(
            (key, CkanOrganization(val))
            for key, val in data['organization'].iteritems())

        # Upsert groups and organizations
        groups_map = self._upsert_groups(groups)
        orgs_map = self._upsert_organizations(organizations)

        # Create list of datasets to be synced
        logger.info('Creating list of datasets to be synchronized')
        source_datasets = {}
        for source_id, dataset_dict in data['dataset'].iteritems():
            _dataset_dict = copy.deepcopy(dataset_dict)

            # We need to make sure "source" datasets
            # don't have (otherwise misleading) ids
            _dataset_dict.pop('id', None)

            # We need to update groups and organizations,
            # to map their name from the source into a
            # ckan id
            _dataset_dict['groups'] = [
                groups_map.to_ckan(grp_id)
                for grp_id in _dataset_dict['groups']
            ]
            _dataset_dict['owner_org'] = \
                orgs_map.to_ckan(_dataset_dict['owner_org'])

            dataset = CkanDataset(_dataset_dict)

            # We also want to add the "source id", used for further
            # synchronizations to find stuff
            dataset.extras[HARVEST_SOURCE_ID_FIELD] = \
                self._join_source_id(source_name, source_id)

            source_datasets[source_id] = dataset

        # Retrieve list of datasets from Ckan
        logger.info('Retrieving current status from Ckan')
        ckan_datasets = self._find_datasets_by_source(source_name)

        # Compare collections to find differences
        differences = self._compare_collections(
            ckan_datasets, source_datasets)

        # ------------------------------------------------------------
        # We now need to create/update/delete datasets.

        # todo: we need to make sure dataset names are not
        # already used by another dataset. The only
        # way is to randomize resource names and hope
        # a 409 response indicates duplicate name..

        # _progress_total = sum(len(differences[x])
        #                       for x in ('left', 'right', 'differing'))
        # _progress_next = itertools.count(1).next
        # report_progress(0, _progress_total)

        _prog_tot_add = len(differences['right'])
        _prog_next_add = itertools.count(1).next
        _prog_tot_remove = len(differences['left'])
        _prog_next_remove = itertools.count(1).next
        _prog_tot_update = len(differences['differing'])
        _prog_next_update = itertools.count(1).next

        # Create progress bars early..
        report_progress(('datasets', 'delete'), 0, _prog_tot_remove)
        report_progress(('datasets', 'create'), 0, _prog_tot_add)
        report_progress(('datasets', 'update'), 0, _prog_tot_update)

        # We delete first, in order to (possibly) deallocate
        # some already-used names..
        for source_id in differences['left']:
            ckan_id = ckan_datasets[source_id].id
            logger.info('Deleting dataset {0}'.format(ckan_id))
            self._client.delete_dataset(ckan_id)
            report_progress(('datasets', 'delete'),
                            _prog_next_remove(), _prog_tot_remove)

        def force_dataset_operation(operation, dataset, retry=5):
            # Maximum dataset name length is 100 characters
            # We trim it down to 80 just to be safe.

            # Note: we generally want to preserve the original name
            #       and there should *never* be problems with that
            #       when updating..

            _orig_name = dataset.name[:80]
            dataset.name = _orig_name

            while True:
                try:
                    result = operation(dataset)
                except HTTPError, e:
                    if e.status_code != 409:
                        raise
                    retry -= 1
                    if retry < 0:
                        raise
                    dataset.name = '{0}-{1:06d}'.format(
                        _orig_name,
                        random.randint(0, 999999))
                    logger.debug('Got 409: trying to rename dataset to {0}'
                                 .format(dataset.name))
                else:
                    return result