Ejemplo n.º 1
0
    def test_data_dictionary(self, _, __):
        responses.add(
            responses.GET,
            'https://example.com/data.csv',
            body='Date,Price\n1/6/2017,4.00\n2/6/2017,4.12'
        )
        responses.add_passthru('http://127.0.0.1:8983/solr')
        dataset = factories.Dataset(resources=[{
            'url': 'https://example.com/data.csv',
            'format': 'csv',
            }])

        update_zip(dataset['id'])

        dataset = helpers.call_action(u'package_show', id=dataset['id'])
        zip_resources = [res for res in dataset['resources']
                         if res['name'] == u'All resource data']
        assert_equal(len(zip_resources), 1)
        zip_resource = zip_resources[0]
        assert_equal(zip_resource['url_type'], 'upload')

        uploader = ckan.lib.uploader.get_resource_uploader(zip_resource)
        filepath = uploader.get_path(zip_resource[u'id'])
        csv_filename_in_zip = '{}.csv'.format(dataset['resources'][0]['id'])
        with fake_open(filepath, 'rb') as f:
            with zipfile.ZipFile(f) as zip_:
                assert_equal(zip_.namelist(),
                             [csv_filename_in_zip, 'datapackage.json'])
                datapackage_json = zip_.read('datapackage.json')
                assert datapackage_json.startswith('{\n  "description"')
                datapackage = json.loads(datapackage_json)
                eq(datapackage['resources'][0][u'schema'],
                   {'fields': [{'type': 'string', 'name': u'Date'},
                               {'type': 'string', 'name': u'Price'}]})
Ejemplo n.º 2
0
    def test_uploaded_resource(self, _):
        responses.add_passthru('http://127.0.0.1:8983/solr')
        csv_content = u'Test,csv'
        responses.add(
            responses.GET,
            re.compile(r'http://test.ckan.net/dataset/.*/download/.*'),
            body=csv_content
        )
        dataset = factories.Dataset()
        # add a resource which is an uploaded file
        with tempfile.NamedTemporaryFile() as fp:
            fp.write(csv_content)
            fp.seek(0)
            registry = ckanapi.LocalCKAN()
            resource = dict(
                package_id=dataset[u'id'],
                url=u'dummy-value',
                upload=fp,
                name=u'Rainfall',
                format=u'CSV'
            )
            registry.action.resource_create(**resource)

        update_zip(dataset['id'])

        dataset = helpers.call_action(u'package_show', id=dataset['id'])
        zip_resources = [res for res in dataset['resources']
                         if res['name'] == u'All resource data']
        zip_resource = zip_resources[0]
        uploader = ckan.lib.uploader.get_resource_uploader(zip_resource)
        filepath = uploader.get_path(zip_resource[u'id'])
        csv_filename_in_zip = u'rainfall.csv'
        with fake_open(filepath, 'rb') as f:
            with zipfile.ZipFile(f) as zip_:
                # Check uploaded file
                assert_equal(zip_.namelist(),
                             [csv_filename_in_zip, 'datapackage.json'])
                assert_equal(zip_.read(csv_filename_in_zip), 'Test,csv')
                # Check datapackage.json
                datapackage_json = zip_.read('datapackage.json')
                datapackage = json.loads(datapackage_json)
                eq(datapackage[u'resources'], [{
                    u'format': u'CSV',
                    u'name': u'rainfall',
                    u'path': csv_filename_in_zip,
                    u'sources': [{u'path': dataset['resources'][0]['url'],
                                  u'title': u'Rainfall'}],
                    u'title': u'Rainfall',
                    }])
Ejemplo n.º 3
0
    def test_update_twice_skipping_second_time(self, _):
        # i.e. testing skip_if_no_changes=False
        responses.add(
            responses.GET,
            'https://example.com/data.csv',
            body='a,b,c'
        )
        responses.add_passthru('http://127.0.0.1:8983/solr')
        dataset = factories.Dataset(resources=[{
            'url': 'https://example.com/data.csv',
            'format': 'csv',
            }])

        update_zip(dataset['id'])
        with mock.patch('ckanext.downloadall.tasks.write_zip') as write_zip_:
            update_zip(dataset['id'], skip_if_no_changes=True)
            # nothings changed, so it shouldn't rewrite the zip
            assert not write_zip_.called
Ejemplo n.º 4
0
    def test_dont_skip_if_no_changes(self, _):
        # i.e. testing skip_if_no_changes=False
        responses.add(
            responses.GET,
            'https://example.com/data.csv',
            body='a,b,c'
        )
        responses.add_passthru('http://127.0.0.1:8983/solr')
        dataset = factories.Dataset(resources=[{
            'url': 'https://example.com/data.csv',
            'format': 'csv',
            }])

        update_zip(dataset['id'])
        with mock.patch('ckanext.downloadall.tasks.write_zip') as write_zip_:
            update_zip(dataset['id'], skip_if_no_changes=False)
            # ensure zip would be rewritten in this case - not letting it skip
            assert write_zip_.called
Ejemplo n.º 5
0
    def test_deleting_resource_causes_zip_to_update(self, _):
        responses.add(
            responses.GET,
            'https://example.com/data.csv',
            body='a,b,c'
        )
        responses.add_passthru('http://127.0.0.1:8983/solr')
        dataset = factories.Dataset(resources=[{
            'url': 'https://example.com/data.csv',
            'format': 'csv',
            }])

        update_zip(dataset['id'])
        dataset = helpers.call_action(u'package_patch', id=dataset['id'],
                                      resources=[])
        with mock.patch('ckanext.downloadall.tasks.write_zip') as write_zip_:
            update_zip(dataset['id'], skip_if_no_changes=True)
            # ensure zip would be rewritten in this case - not letting it skip
            assert write_zip_.called
Ejemplo n.º 6
0
    def test_simple(self, _):
        responses.add(
            responses.GET,
            'https://example.com/data.csv',
            body='a,b,c'
        )
        responses.add_passthru('http://127.0.0.1:8983/solr')
        dataset = factories.Dataset(resources=[{
            'url': 'https://example.com/data.csv',
            'format': 'csv',
            }])

        update_zip(dataset['id'])

        dataset = helpers.call_action(u'package_show', id=dataset['id'])
        zip_resources = [res for res in dataset['resources']
                         if res['name'] == u'All resource data']
        assert_equal(len(zip_resources), 1)
        zip_resource = zip_resources[0]
        assert_equal(zip_resource['url_type'], 'upload')

        uploader = ckan.lib.uploader.get_resource_uploader(zip_resource)
        filepath = uploader.get_path(zip_resource[u'id'])
        csv_filename_in_zip = '{}.csv'.format(dataset['resources'][0]['id'])
        with fake_open(filepath, 'rb') as f:
            with zipfile.ZipFile(f) as zip_:
                assert_equal(zip_.namelist(),
                             [csv_filename_in_zip, 'datapackage.json'])
                assert_equal(zip_.read(csv_filename_in_zip), 'a,b,c')
                datapackage_json = zip_.read('datapackage.json')
                assert datapackage_json.startswith('{\n  "description"')
                datapackage = json.loads(datapackage_json)
                eq(datapackage[u'name'][:12], u'test_dataset')
                eq(datapackage[u'title'], u'Test Dataset')
                eq(datapackage[u'description'], u'Just another test dataset.')
                eq(datapackage[u'resources'], [{
                    u'format': u'CSV',
                    u'name': dataset['resources'][0]['id'],
                    u'path': csv_filename_in_zip,
                    u'sources': [{u'path': u'https://example.com/data.csv',
                                  u'title': None}],
                    }])
Ejemplo n.º 7
0
    def test_update_twice(self, _):
        responses.add(
            responses.GET,
            'https://example.com/data.csv',
            body='a,b,c'
        )
        responses.add_passthru('http://127.0.0.1:8983/solr')
        dataset = factories.Dataset(resources=[{
            'url': 'https://example.com/data.csv',
            'format': 'csv',
            }])

        update_zip(dataset['id'])
        update_zip(dataset['id'], skip_if_no_changes=False)

        # ensure a second zip hasn't been added
        dataset = helpers.call_action(u'package_show', id=dataset['id'])
        zip_resources = [res for res in dataset['resources']
                         if res['name'] == u'All resource data']
        assert_equal(len(zip_resources), 1)
Ejemplo n.º 8
0
    def test_resource_url_with_404_error(self, _):
        responses.add_passthru('http://127.0.0.1:8983/solr')
        responses.add(
            responses.GET,
            'https://example.com/data.csv',
            status=404
        )
        dataset = factories.Dataset(resources=[{
            'url': 'https://example.com/data.csv',
            'name': 'rainfall',
            'format': 'csv',
            }])

        update_zip(dataset['id'])

        dataset = helpers.call_action(u'package_show', id=dataset['id'])
        zip_resources = [res for res in dataset['resources']
                         if res['name'] == u'All resource data']
        zip_resource = zip_resources[0]
        uploader = ckan.lib.uploader.get_resource_uploader(zip_resource)
        filepath = uploader.get_path(zip_resource[u'id'])
        with fake_open(filepath, 'rb') as f:
            with zipfile.ZipFile(f) as zip_:
                # Zip doesn't contain the data, just the json file
                assert_equal(zip_.namelist(),
                             ['datapackage.json'])
                # Check datapackage.json
                datapackage_json = zip_.read('datapackage.json')
                datapackage = json.loads(datapackage_json)
                eq(datapackage[u'resources'], [{
                    u'format': u'CSV',
                    u'name': u'rainfall',
                    # path is to the URL - an 'external resource'
                    u'path': 'https://example.com/data.csv',
                    u'title': u'rainfall',
                    }])