def test_job_pass_validation_options_string(self, mock_open): invalid_csv = ''' a;b;c #comment 1;2;3 ''' validation_options = '''{ "headers": 3, "skip_rows": ["#"] }''' invalid_file = StringIO.StringIO() invalid_file.write(invalid_csv) mock_upload = MockFieldStorage(invalid_file, 'invalid.csv') resource = factories.Resource(format='csv', upload=mock_upload, validation_options=validation_options) invalid_stream = io.BufferedReader(io.BytesIO(invalid_csv)) with mock.patch('io.open', return_value=invalid_stream): run_validation_job(resource) validation = Session.query(Validation).filter( Validation.resource_id == resource['id']).one() assert_equals(validation.report['valid'], True)
def test_validation_fails_no_validation_object_stored( self, mock_open, app): dataset = factories.Dataset( resources=[{ 'url': 'https://example.com/data.csv' }]) invalid_file = StringIO.StringIO() invalid_file.write(INVALID_CSV) mock_upload = MockFieldStorage(invalid_file, 'invalid.csv') invalid_stream = io.BufferedReader(io.BytesIO(INVALID_CSV)) with mock.patch('io.open', return_value=invalid_stream): with assert_raises(t.ValidationError): call_action('resource_update', id=dataset['resources'][0]['id'], format='CSV', upload=mock_upload) validation_count_after = model.Session.query(Validation).count() assert_equals(validation_count_after, 0)
def test_job_local_paths_are_hidden(self, mock_open): invalid_csv = 'id,type\n' + '1,a,\n' * 1010 invalid_file = StringIO.StringIO() invalid_file.write(invalid_csv) mock_upload = MockFieldStorage(invalid_file, 'invalid.csv') resource = factories.Resource(format='csv', upload=mock_upload) invalid_stream = io.BufferedReader(io.BytesIO(invalid_csv)) with mock.patch('io.open', return_value=invalid_stream): run_validation_job(resource) validation = Session.query(Validation).filter( Validation.resource_id == resource['id']).one() source = validation.report['tables'][0]['source'] assert source.startswith('http') assert source.endswith('invalid.csv') warning = validation.report['warnings'][0] assert_equals(warning, 'Table inspection has reached 1000 row(s) limit')
def test_validation_fails_on_upload(self, mock_open, app): dataset = factories.Dataset( resources=[{ 'url': 'https://example.com/data.csv' }]) invalid_file = StringIO.StringIO() invalid_file.write(INVALID_CSV) mock_upload = MockFieldStorage(invalid_file, 'invalid.csv') invalid_stream = io.BufferedReader(io.BytesIO(INVALID_CSV)) with mock.patch('io.open', return_value=invalid_stream): with assert_raises(t.ValidationError) as e: call_action('resource_update', id=dataset['resources'][0]['id'], format='CSV', upload=mock_upload) assert 'validation' in e.exception.error_dict assert 'missing-value' in str(e.exception) assert 'Row 2 has a missing value in column 4' in str(e.exception)
def test_job_pass_validation_options(self, mock_open): invalid_csv = b''' a,b,c #comment 1,2,3 ''' validation_options = {'headers': 3, 'skip_rows': ['#']} invalid_file = io.BytesIO(invalid_csv) mock_upload = MockFieldStorage(invalid_file, 'invalid.csv') resource = factories.Resource(format='csv', upload=mock_upload, validation_options=validation_options) invalid_stream = io.BufferedReader(io.BytesIO(invalid_csv)) with mock.patch('io.open', return_value=invalid_stream): run_validation_job(resource) validation = Session.query(Validation).filter( Validation.resource_id == resource['id']).one() assert validation.report['valid']
def test_schema_upload_field(self, mock_open, app): schema_file = StringIO.StringIO('{"fields":[{"name":"category"}]}') mock_upload = MockFieldStorage(schema_file, 'schema.json') dataset = factories.Dataset() resource = call_action('resource_create', package_id=dataset['id'], url='http://example.com/file.csv', schema_upload=mock_upload) assert_equals(resource['schema'], {'fields': [{'name': 'category'}]}) assert 'schema_upload' not in resource assert 'schema_url' not in resource
def test_validation_passes_on_upload(self, mock_open, app): invalid_file = StringIO.StringIO() invalid_file.write(VALID_CSV) mock_upload = MockFieldStorage(invalid_file, 'invalid.csv') dataset = factories.Dataset() valid_stream = io.BufferedReader(io.BytesIO(VALID_CSV)) with mock.patch('io.open', return_value=valid_stream): resource = call_action('resource_create', package_id=dataset['id'], format='CSV', upload=mock_upload) assert_equals(resource['validation_status'], 'success') assert 'validation_timestamp' in resource
def test_resource_form_fields_are_persisted(self, mock_open): upload = ('upload', 'valid.csv', VALID_CSV) upload_file = MockFieldStorage(io.BytesIO(VALID_CSV), filename='data.csv') valid_stream = io.BufferedReader(io.BytesIO(VALID_CSV)) dataset = Dataset() with mock.patch('io.open', return_value=valid_stream): resource = call_action( 'resource_create', package_id=dataset['id'], validation_status='success', validation_timestamp=datetime.datetime.now().isoformat(), upload=upload_file, url='data.csv') resource = call_action('resource_show', id=resource['id']) assert 'validation_status' in resource assert resource['validation_status'] == 'success' assert not resource.get('description') resource_id = resource['id'] params = { 'name': 'test_resource_form_fields_are_persisted', 'description': 'test desc', 'url': 'https://example.com/data.csv' } valid_stream = io.BufferedReader(io.BytesIO(VALID_CSV)) with mock.patch('io.open', return_value=valid_stream): _post(self.app, EDIT_RESOURCE_URL.format(dataset['id'], resource['id']), params, resource_id=resource_id, upload=[upload]) dataset = call_action('package_show', id=dataset['id']) assert dataset['resources'][0]['validation_status'] == 'success' assert 'validation_timestamp' in dataset['resources'][0] assert dataset['resources'][0]['description'] == 'test desc'
def test_validation_passes_on_upload(self, mock_open, app): dataset = factories.Dataset( resources=[{ 'url': 'https://example.com/data.csv' }]) valid_file = StringIO.StringIO() valid_file.write(INVALID_CSV) mock_upload = MockFieldStorage(valid_file, 'valid.csv') valid_stream = io.BufferedReader(io.BytesIO(VALID_CSV)) with mock.patch('io.open', return_value=valid_stream): resource = call_action('resource_update', id=dataset['resources'][0]['id'], format='CSV', upload=mock_upload) assert_equals(resource['validation_status'], 'success') assert 'validation_timestamp' in resource
def test_validation_fails_no_validation_object_stored(self, mock_open): invalid_file = StringIO.StringIO() invalid_file.write(INVALID_CSV) mock_upload = MockFieldStorage(invalid_file, 'invalid.csv') dataset = factories.Dataset() invalid_stream = io.BufferedReader(io.BytesIO(INVALID_CSV)) validation_count_before = model.Session.query(Validation).count() with mock.patch('io.open', return_value=invalid_stream): with assert_raises(t.ValidationError): call_action('resource_create', package_id=dataset['id'], format='CSV', upload=mock_upload) validation_count_after = model.Session.query(Validation).count() assert_equals(validation_count_after, validation_count_before)