def test_format_field_not_recognized(self): set_sniffed_format(None) result = resource_score(self._test_resource(format='ZAR'), log) assert result['openness_score'] == 1, result assert 'not recognized from its contents' in result['openness_score_reason'], result assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result assert 'Format field "ZAR" does not correspond to a known format' in result['openness_score_reason'], result
def test_no_format_clues(self): set_sniffed_format(None) result = resource_score(self._test_resource(format=None), log) assert result['openness_score'] == 1, result assert 'not recognized from its contents' in result['openness_score_reason'], result assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result assert 'Format field is blank' in result['openness_score_reason'], result
def test_format_field_not_recognized(self): set_sniffed_format(None) result = resource_score(self._test_resource(format='ZAR'), log) assert result['openness_score'] == 1, result assert 'not recognized from its contents' in result['openness_score_reason'], result assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result assert 'Format field "ZAR" does not correspond to a known format' in result['openness_score_reason'], result
def test_not_available_any_more(self): # A cache of the data still exists from the previous run, but this # time, the archiver found the file gave a 404. # The record of the previous (successful) run of QA. res = self._test_resource(license_id=None, format=None) qa = qa_model.QA.create(res.id) qa.format = 'CSV' model.Session.add(qa) model.Session.commit() # cache still exists from the previous run, but this time, the archiver # found the file gave a 404. archival = Archival.get_for_resource(res.id) archival.cache_filepath = __file__ archival.status_id = Status.by_text('Download error') archival.reason = 'Server returned 404 error' archival.last_success = datetime.datetime(year=2008, month=10, day=1) archival.first_failure = datetime.datetime(year=2008, month=10, day=2) archival.failure_count = 1 archival.is_broken = True result = resource_score(res) assert result['openness_score'] == 0, result assert_equal(result['format'], 'CSV') # in preference it should report that it is not available assert_equal(result['openness_score_reason'], 'File could not be downloaded. ' 'Reason: Download error. Error details: Server returned 404 error.' ' Attempted on 10/10/2008. This URL last worked on: 01/10/2008.')
def test_url_with_temporary_fetch_error_not_scored(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 0, result assert result['openness_score_reason'] == 'Service unavailable' or \ result['openness_score_reason'] == 'Server returned error: Service unavailable', result
def test_url_with_permanent_fetch_error_scores_zero(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 0, result assert result['openness_score_reason'] == 'URL unobtainable' or \ result['openness_score_reason'] == 'URL unobtainable: Server returned HTTP 404', result
def test_machine_readable_formats_score_two(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 2, result assert result[ 'openness_score_reason'] == 'machine readable format', result
def test_open_standard_formats_score_three(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 3, result assert result[ 'openness_score_reason'] == 'open and standardized format', result
def test_url_with_temporary_fetch_error_not_scored(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 0, result assert result['openness_score_reason'] == 'Service unavailable' or \ result['openness_score_reason'] == 'Server returned error: Service unavailable', result
def test_not_available_any_more(self): # A cache of the data still exists from the previous run, but this # time, the archiver found the file gave a 404. # The record of the previous (successful) run of QA. res = self._test_resource(license_id=None, format=None) qa = qa_model.QA.create(res.id) qa.format = 'CSV' model.Session.add(qa) model.Session.commit() # cache still exists from the previous run, but this time, the archiver # found the file gave a 404. archival = Archival.get_for_resource(res.id) archival.cache_filepath = __file__ archival.status_id = Status.by_text('Download error') archival.reason = 'Server returned 404 error' archival.last_success = datetime.datetime(year=2008, month=10, day=1) archival.first_failure = datetime.datetime(year=2008, month=10, day=2) archival.failure_count = 1 archival.is_broken = True result = resource_score(res, log) assert result['openness_score'] == 0, result assert_equal(result['format'], 'CSV') # in preference it should report that it is not available assert_equal( result['openness_score_reason'], 'File could not be downloaded. Reason: Download error. Error details: Server returned 404 error. Attempted on 10/10/2008. This URL last worked on: 01/10/2008.' )
def test_ontological_formats_score_four(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 4, result assert result[ 'openness_score_reason'] == 'ontologically represented', result
def test_url_pointing_to_html_page_scores_one(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 1, result assert result[ 'openness_score_reason'] == 'obtainable via web page', result
def test_url_with_unknown_content_type_scores_one(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 0, result assert result[ 'openness_score_reason'] == 'unrecognised content type', result
def test_no_format_clues(self): set_sniffed_format(None) result = resource_score(self._test_resource(format=None), log) assert result['openness_score'] == 1, result assert 'not recognized from its contents' in result['openness_score_reason'], result assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result assert 'Format field is blank' in result['openness_score_reason'], result
def test_url_with_permanent_fetch_error_scores_zero(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 0, result assert result['openness_score_reason'] == 'URL unobtainable' or \ result['openness_score_reason'] == 'URL unobtainable: Server returned HTTP 404', result
def test_content_type_with_charset_still_recognized(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 1, result assert result[ 'openness_score_reason'] == 'obtainable via web page', result
def test_by_sniff_csv(self): set_sniffed_format('CSV') result = resource_score(self._test_resource(), log) assert result['openness_score'] == 3, result assert 'Content of file appeared to be format "CSV"' in result['openness_score_reason'], result assert result['format'] == 'CSV', result assert result['archival_timestamp'] == TODAY_STR, result
def test_by_sniff_csv(self): set_sniffed_format('CSV') result = resource_score(self._test_resource(), log) assert result['openness_score'] == 3, result assert 'Content of file appeared to be format "CSV"' in result['openness_score_reason'], result assert result['format'] == 'CSV', result assert result['archival_timestamp'] == TODAY_STR, result
def test_by_extension(self): set_sniffed_format(None) result = resource_score(self._test_resource('http://site.com/filename.xls'), log) assert result['openness_score'] == 2, result assert result['archival_timestamp'] == TODAY_STR, result assert_equal(result['format'], 'XLS') assert 'not recognized from its contents' in result['openness_score_reason'], result assert 'extension "xls" relates to format "XLS"' in result['openness_score_reason'], result
def test_by_format_field(self): set_sniffed_format(None) result = resource_score(self._test_resource(format='XLS'), log) assert result['openness_score'] == 2, result assert_equal(result['format'], 'XLS') assert 'not recognized from its contents' in result['openness_score_reason'], result assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result assert 'Format field "XLS"' in result['openness_score_reason'], result
def test_by_extension(self): set_sniffed_format(None) result = resource_score(self._test_resource('http://site.com/filename.xls'), log) assert result['openness_score'] == 2, result assert result['archival_timestamp'] == TODAY_STR, result assert_equal(result['format'], 'XLS') assert 'not recognized from its contents' in result['openness_score_reason'], result assert 'extension "xls" relates to format "XLS"' in result['openness_score_reason'], result
def test_extension_not_recognized(self): set_sniffed_format(None) result = resource_score(_test_resource('http://site.com/filename.zar')) assert result['openness_score'] == 1, result assert 'not recognized from its contents' in result[ 'openness_score_reason'], result assert 'URL extension "zar" is an unknown format' in result[ 'openness_score_reason'], result
def test_by_format_field(self): set_sniffed_format(None) result = resource_score(self._test_resource(format='XLS'), log) assert result['openness_score'] == 2, result assert_equal(result['format'], 'XLS') assert 'not recognized from its contents' in result['openness_score_reason'], result assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result assert 'Format field "XLS"' in result['openness_score_reason'], result
def test_archiver_ran_but_not_cached(self): result = resource_score(self._test_resource(cached=False, format=None), log) # falls back on previous QA data detailing failed attempts assert result['openness_score'] == 1, result assert result['format'] == None, result assert result['archival_timestamp'] == TODAY_STR, result assert 'This file had not been downloaded at the time of scoring it.' in result['openness_score_reason'], result assert 'Could not determine a file extension in the URL.' in result['openness_score_reason'], result assert 'Format field is blank.' in result['openness_score_reason'], result assert 'Could not understand the file format, therefore score is 1.' in result['openness_score_reason'], result
def test_archiver_ran_but_not_cached(self): result = resource_score(self._test_resource(cached=False, format=None), log) # falls back on previous QA data detailing failed attempts assert result['openness_score'] == 1, result assert result['format'] == None, result assert result['archival_timestamp'] == TODAY_STR, result assert 'This file had not been downloaded at the time of scoring it.' in result['openness_score_reason'], result assert 'Could not determine a file extension in the URL.' in result['openness_score_reason'], result assert 'Format field is blank.' in result['openness_score_reason'], result assert 'Could not understand the file format, therefore score is 1.' in result['openness_score_reason'], result
def test_not_available_and_not_open(self): res = self._test_resource(license_id=None, format=None, cached=False) archival = Archival.get_for_resource(res.id) archival.status_id = Status.by_text('Download error') archival.reason = 'Server returned 500 error' archival.last_success = None archival.first_failure = datetime.datetime(year=2008, month=10, day=1, hour=6, minute=30) archival.failure_count = 16 archival.is_broken = True model.Session.commit() result = resource_score(res, log) assert result['openness_score'] == 0, result assert_equal(result['format'], None) # in preference it should report that it is not available assert_equal(result['openness_score_reason'], 'File could not be downloaded. Reason: Download error. Error details: Server returned 500 error. Attempted on 10/10/2008. Tried 16 times since 01/10/2008. This URL has not worked in the history of this tool.')
def test_not_available_and_not_open(self): res = self._test_resource(license_id=None, format=None, cached=False) archival = Archival.get_for_resource(res.id) archival.status_id = Status.by_text('Download error') archival.reason = 'Server returned 500 error' archival.last_success = None archival.first_failure = datetime.datetime(year=2008, month=10, day=1, hour=6, minute=30) archival.failure_count = 16 archival.is_broken = True model.Session.commit() result = resource_score(res, log) assert result['openness_score'] == 0, result assert_equal(result['format'], None) # in preference it should report that it is not available assert_equal(result['openness_score_reason'], 'File could not be downloaded. Reason: Download error. Error details: Server returned 500 error. Attempted on 10/10/2008. Tried 16 times since 01/10/2008. This URL has not worked in the history of this tool.')
def test_url_pointing_to_html_page_scores_one(self, url): data = self.fake_resource data["url"] = url result = resource_score(self.fake_context, data) assert result["openness_score"] == 1, result assert result["openness_score_reason"] == "obtainable via web page", result
def test_by_format_field_excel(self): set_sniffed_format(None) result = resource_score(self._test_resource(format='Excel'), log) assert_equal(result['format'], 'XLS')
def test_by_format_field_excel(self): set_sniffed_format(None) if check_ckan_version(max_version='2.4.99'): raise SkipTest result = resource_score(_test_resource(format='Excel')) assert_equal(result['format'], 'XLS')
def test_available_but_not_open(self): set_sniffed_format('CSV') result = resource_score(self._test_resource(license_id=None), log) assert result['openness_score'] == 0, result assert_equal(result['format'], 'CSV') assert 'License not open' in result['openness_score_reason'], result
def test_url_pointing_to_html_page_scores_one(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 1, result assert result['openness_score_reason'] == 'obtainable via web page', result
def test_url_with_permanent_fetch_error_scores_zero(self, url): data = self.fake_resource data["url"] = url result = resource_score(self.fake_context, data) assert result["openness_score"] == 0, result assert result["openness_score_reason"] == "URL unobtainable", result
def test_url_with_temporary_fetch_error_not_scored(self, url): data = self.fake_resource data["url"] = url result = resource_score(self.fake_context, data) assert result["openness_score"] == 0, result assert result["openness_score_reason"] == "Service unavailable", result
def test_open_standard_formats_score_three(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 3, result assert result['openness_score_reason'] == 'open and standardized format', result
def test_machine_readable_formats_score_two(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 2, result assert result['openness_score_reason'] == 'machine readable format', result
def test_temporary_failure_increments_failure_count(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score_failure_count'] == 1, result
def test_ontological_formats_score_four(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 4, result assert result['openness_score_reason'] == 'ontologically represented', result
def test_url_with_content(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 3, result
def test_open_standard_formats_score_three(self, url): data = self.fake_resource data["url"] = url result = resource_score(self.fake_context, data) assert result["openness_score"] == 3, result assert result["openness_score_reason"] == "open and standardized format", result
def test_by_format_field_excel(self): set_sniffed_format(None) if p.toolkit.check_ckan_version(max_version='2.4.99'): raise SkipTest result = resource_score(self._test_resource(format='Excel')) assert_equal(result['format'], 'XLS')
def test_url_with_unknown_content_type_scores_one(self, url): data = self.fake_resource data["url"] = url result = resource_score(self.fake_context, data) assert result["openness_score"] == 0, result assert result["openness_score_reason"] == "unrecognised content type", result
def test_content_type_with_charset_still_recognized(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 1, result assert result['openness_score_reason'] == 'obtainable via web page', result
def test_extension_not_recognized(self): set_sniffed_format(None) result = resource_score(self._test_resource('http://site.com/filename.zar'), log) assert result['openness_score'] == 1, result assert 'not recognized from its contents' in result['openness_score_reason'], result assert 'URL extension "zar" is an unknown format' in result['openness_score_reason'], result
def test_temporary_failure_increments_failure_count(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score_failure_count'] == 1, result
def test_by_format_field_excel(self): set_sniffed_format(None) result = resource_score(self._test_resource(format='Excel'), log) assert_equal(result['format'], 'XLS')
def test_url_with_content(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 3, result
def test_ontological_formats_score_four(self, url): data = self.fake_resource data["url"] = url result = resource_score(self.fake_context, data) assert result["openness_score"] == 4, result assert result["openness_score_reason"] == "ontologically represented", result
def test_available_but_not_open(self): set_sniffed_format('CSV') result = resource_score(self._test_resource(license_id=None), log) assert result['openness_score'] == 0, result assert_equal(result['format'], 'CSV') assert 'License not open' in result['openness_score_reason'], result
def test_machine_readable_formats_score_two(self, url): data = self.fake_resource data["url"] = url result = resource_score(self.fake_context, data) assert result["openness_score"] == 2, result assert result["openness_score_reason"] == "machine readable format", result
def test_url_with_unknown_content_type_scores_one(self, url): data = self.fake_resource data['url'] = url result = resource_score(self.fake_context, data) assert result['openness_score'] == 0, result assert result['openness_score_reason'] == 'unrecognised content type', result