Example #1
0
 def test_format_field_not_recognized(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format='ZAR'), log)
     assert result['openness_score'] == 1, result
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result
     assert 'Format field "ZAR" does not correspond to a known format' in result['openness_score_reason'], result
Example #2
0
 def test_no_format_clues(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format=None), log)
     assert result['openness_score'] == 1, result
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result
     assert 'Format field is blank' in result['openness_score_reason'], result
Example #3
0
 def test_format_field_not_recognized(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format='ZAR'), log)
     assert result['openness_score'] == 1, result
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result
     assert 'Format field "ZAR" does not correspond to a known format' in result['openness_score_reason'], result
Example #4
0
 def test_not_available_any_more(self):
     # A cache of the data still exists from the previous run, but this
     # time, the archiver found the file gave a 404.
     # The record of the previous (successful) run of QA.
     res = self._test_resource(license_id=None, format=None)
     qa = qa_model.QA.create(res.id)
     qa.format = 'CSV'
     model.Session.add(qa)
     model.Session.commit()
     # cache still exists from the previous run, but this time, the archiver
     # found the file gave a 404.
     archival = Archival.get_for_resource(res.id)
     archival.cache_filepath = __file__
     archival.status_id = Status.by_text('Download error')
     archival.reason = 'Server returned 404 error'
     archival.last_success = datetime.datetime(year=2008, month=10, day=1)
     archival.first_failure = datetime.datetime(year=2008, month=10, day=2)
     archival.failure_count = 1
     archival.is_broken = True
     result = resource_score(res)
     assert result['openness_score'] == 0, result
     assert_equal(result['format'], 'CSV')
     # in preference it should report that it is not available
     assert_equal(result['openness_score_reason'], 'File could not be downloaded. '
                                                   'Reason: Download error. Error details: Server returned 404 error.'
                                                   ' Attempted on 10/10/2008. This URL last worked on: 01/10/2008.')
Example #5
0
 def test_url_with_temporary_fetch_error_not_scored(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 0, result
     assert result['openness_score_reason'] == 'Service unavailable' or \
         result['openness_score_reason'] == 'Server returned error: Service unavailable', result
Example #6
0
 def test_url_with_permanent_fetch_error_scores_zero(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 0, result
     assert result['openness_score_reason'] == 'URL unobtainable' or \
         result['openness_score_reason'] == 'URL unobtainable: Server returned HTTP 404', result
Example #7
0
 def test_machine_readable_formats_score_two(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 2, result
     assert result[
         'openness_score_reason'] == 'machine readable format', result
Example #8
0
 def test_open_standard_formats_score_three(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 3, result
     assert result[
         'openness_score_reason'] == 'open and standardized format', result
Example #9
0
 def test_url_with_temporary_fetch_error_not_scored(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 0, result
     assert result['openness_score_reason'] == 'Service unavailable' or \
         result['openness_score_reason'] == 'Server returned error: Service unavailable', result
Example #10
0
 def test_not_available_any_more(self):
     # A cache of the data still exists from the previous run, but this
     # time, the archiver found the file gave a 404.
     # The record of the previous (successful) run of QA.
     res = self._test_resource(license_id=None, format=None)
     qa = qa_model.QA.create(res.id)
     qa.format = 'CSV'
     model.Session.add(qa)
     model.Session.commit()
     # cache still exists from the previous run, but this time, the archiver
     # found the file gave a 404.
     archival = Archival.get_for_resource(res.id)
     archival.cache_filepath = __file__
     archival.status_id = Status.by_text('Download error')
     archival.reason = 'Server returned 404 error'
     archival.last_success = datetime.datetime(year=2008, month=10, day=1)
     archival.first_failure = datetime.datetime(year=2008, month=10, day=2)
     archival.failure_count = 1
     archival.is_broken = True
     result = resource_score(res, log)
     assert result['openness_score'] == 0, result
     assert_equal(result['format'], 'CSV')
     # in preference it should report that it is not available
     assert_equal(
         result['openness_score_reason'],
         'File could not be downloaded. Reason: Download error. Error details: Server returned 404 error. Attempted on 10/10/2008. This URL last worked on: 01/10/2008.'
     )
Example #11
0
 def test_ontological_formats_score_four(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 4, result
     assert result[
         'openness_score_reason'] == 'ontologically represented', result
Example #12
0
 def test_url_pointing_to_html_page_scores_one(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 1, result
     assert result[
         'openness_score_reason'] == 'obtainable via web page', result
Example #13
0
 def test_url_with_unknown_content_type_scores_one(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 0, result
     assert result[
         'openness_score_reason'] == 'unrecognised content type', result
Example #14
0
 def test_no_format_clues(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format=None), log)
     assert result['openness_score'] == 1, result
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result
     assert 'Format field is blank' in result['openness_score_reason'], result
Example #15
0
 def test_url_with_permanent_fetch_error_scores_zero(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 0, result
     assert result['openness_score_reason'] == 'URL unobtainable' or \
         result['openness_score_reason'] == 'URL unobtainable: Server returned HTTP 404', result
Example #16
0
 def test_content_type_with_charset_still_recognized(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 1, result
     assert result[
         'openness_score_reason'] == 'obtainable via web page', result
Example #17
0
 def test_by_sniff_csv(self):
     set_sniffed_format('CSV')
     result = resource_score(self._test_resource(), log)
     assert result['openness_score'] == 3, result
     assert 'Content of file appeared to be format "CSV"' in result['openness_score_reason'], result
     assert result['format'] == 'CSV', result
     assert result['archival_timestamp'] == TODAY_STR, result
Example #18
0
 def test_by_sniff_csv(self):
     set_sniffed_format('CSV')
     result = resource_score(self._test_resource(), log)
     assert result['openness_score'] == 3, result
     assert 'Content of file appeared to be format "CSV"' in result['openness_score_reason'], result
     assert result['format'] == 'CSV', result
     assert result['archival_timestamp'] == TODAY_STR, result
Example #19
0
 def test_by_extension(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource('http://site.com/filename.xls'), log)
     assert result['openness_score'] == 2, result
     assert result['archival_timestamp'] == TODAY_STR, result
     assert_equal(result['format'], 'XLS')
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'extension "xls" relates to format "XLS"' in result['openness_score_reason'], result
Example #20
0
 def test_by_format_field(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format='XLS'), log)
     assert result['openness_score'] == 2, result
     assert_equal(result['format'], 'XLS')
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result
     assert 'Format field "XLS"' in result['openness_score_reason'], result
Example #21
0
 def test_by_extension(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource('http://site.com/filename.xls'), log)
     assert result['openness_score'] == 2, result
     assert result['archival_timestamp'] == TODAY_STR, result
     assert_equal(result['format'], 'XLS')
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'extension "xls" relates to format "XLS"' in result['openness_score_reason'], result
Example #22
0
 def test_extension_not_recognized(self):
     set_sniffed_format(None)
     result = resource_score(_test_resource('http://site.com/filename.zar'))
     assert result['openness_score'] == 1, result
     assert 'not recognized from its contents' in result[
         'openness_score_reason'], result
     assert 'URL extension "zar" is an unknown format' in result[
         'openness_score_reason'], result
Example #23
0
 def test_by_format_field(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format='XLS'), log)
     assert result['openness_score'] == 2, result
     assert_equal(result['format'], 'XLS')
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result
     assert 'Format field "XLS"' in result['openness_score_reason'], result
Example #24
0
 def test_archiver_ran_but_not_cached(self):
     result = resource_score(self._test_resource(cached=False, format=None), log)
     # falls back on previous QA data detailing failed attempts
     assert result['openness_score'] == 1, result
     assert result['format'] == None, result
     assert result['archival_timestamp'] == TODAY_STR, result
     assert 'This file had not been downloaded at the time of scoring it.' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL.' in result['openness_score_reason'], result
     assert 'Format field is blank.' in result['openness_score_reason'], result
     assert 'Could not understand the file format, therefore score is 1.' in result['openness_score_reason'], result
Example #25
0
 def test_archiver_ran_but_not_cached(self):
     result = resource_score(self._test_resource(cached=False, format=None), log)
     # falls back on previous QA data detailing failed attempts
     assert result['openness_score'] == 1, result
     assert result['format'] == None, result
     assert result['archival_timestamp'] == TODAY_STR, result
     assert 'This file had not been downloaded at the time of scoring it.' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL.' in result['openness_score_reason'], result
     assert 'Format field is blank.' in result['openness_score_reason'], result
     assert 'Could not understand the file format, therefore score is 1.' in result['openness_score_reason'], result
Example #26
0
 def test_not_available_and_not_open(self):
     res = self._test_resource(license_id=None, format=None, cached=False)
     archival = Archival.get_for_resource(res.id)
     archival.status_id = Status.by_text('Download error')
     archival.reason = 'Server returned 500 error'
     archival.last_success = None
     archival.first_failure = datetime.datetime(year=2008, month=10, day=1, hour=6, minute=30)
     archival.failure_count = 16
     archival.is_broken = True
     model.Session.commit()
     result = resource_score(res, log)
     assert result['openness_score'] == 0, result
     assert_equal(result['format'], None)
     # in preference it should report that it is not available
     assert_equal(result['openness_score_reason'], 'File could not be downloaded. Reason: Download error. Error details: Server returned 500 error. Attempted on 10/10/2008. Tried 16 times since 01/10/2008. This URL has not worked in the history of this tool.')
Example #27
0
 def test_not_available_and_not_open(self):
     res = self._test_resource(license_id=None, format=None, cached=False)
     archival = Archival.get_for_resource(res.id)
     archival.status_id = Status.by_text('Download error')
     archival.reason = 'Server returned 500 error'
     archival.last_success = None
     archival.first_failure = datetime.datetime(year=2008, month=10, day=1, hour=6, minute=30)
     archival.failure_count = 16
     archival.is_broken = True
     model.Session.commit()
     result = resource_score(res, log)
     assert result['openness_score'] == 0, result
     assert_equal(result['format'], None)
     # in preference it should report that it is not available
     assert_equal(result['openness_score_reason'], 'File could not be downloaded. Reason: Download error. Error details: Server returned 500 error. Attempted on 10/10/2008. Tried 16 times since 01/10/2008. This URL has not worked in the history of this tool.')
Example #28
0
 def test_url_pointing_to_html_page_scores_one(self, url):
     data = self.fake_resource
     data["url"] = url
     result = resource_score(self.fake_context, data)
     assert result["openness_score"] == 1, result
     assert result["openness_score_reason"] == "obtainable via web page", result
Example #29
0
 def test_by_format_field_excel(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format='Excel'), log)
     assert_equal(result['format'], 'XLS')
Example #30
0
 def test_by_format_field_excel(self):
     set_sniffed_format(None)
     if check_ckan_version(max_version='2.4.99'):
         raise SkipTest
     result = resource_score(_test_resource(format='Excel'))
     assert_equal(result['format'], 'XLS')
Example #31
0
 def test_available_but_not_open(self):
     set_sniffed_format('CSV')
     result = resource_score(self._test_resource(license_id=None), log)
     assert result['openness_score'] == 0, result
     assert_equal(result['format'], 'CSV')
     assert 'License not open' in result['openness_score_reason'], result
Example #32
0
 def test_url_pointing_to_html_page_scores_one(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 1, result
     assert result['openness_score_reason'] == 'obtainable via web page', result
Example #33
0
 def test_url_with_permanent_fetch_error_scores_zero(self, url):
     data = self.fake_resource
     data["url"] = url
     result = resource_score(self.fake_context, data)
     assert result["openness_score"] == 0, result
     assert result["openness_score_reason"] == "URL unobtainable", result
Example #34
0
 def test_url_with_temporary_fetch_error_not_scored(self, url):
     data = self.fake_resource
     data["url"] = url
     result = resource_score(self.fake_context, data)
     assert result["openness_score"] == 0, result
     assert result["openness_score_reason"] == "Service unavailable", result
Example #35
0
 def test_open_standard_formats_score_three(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 3, result
     assert result['openness_score_reason'] == 'open and standardized format', result
Example #36
0
 def test_machine_readable_formats_score_two(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 2, result
     assert result['openness_score_reason'] == 'machine readable format', result
Example #37
0
 def test_temporary_failure_increments_failure_count(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score_failure_count'] == 1, result
Example #38
0
 def test_ontological_formats_score_four(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 4, result
     assert result['openness_score_reason'] == 'ontologically represented', result
Example #39
0
 def test_url_with_content(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 3, result
Example #40
0
 def test_open_standard_formats_score_three(self, url):
     data = self.fake_resource
     data["url"] = url
     result = resource_score(self.fake_context, data)
     assert result["openness_score"] == 3, result
     assert result["openness_score_reason"] == "open and standardized format", result
Example #41
0
 def test_by_format_field_excel(self):
     set_sniffed_format(None)
     if p.toolkit.check_ckan_version(max_version='2.4.99'):
         raise SkipTest
     result = resource_score(self._test_resource(format='Excel'))
     assert_equal(result['format'], 'XLS')
Example #42
0
 def test_url_with_unknown_content_type_scores_one(self, url):
     data = self.fake_resource
     data["url"] = url
     result = resource_score(self.fake_context, data)
     assert result["openness_score"] == 0, result
     assert result["openness_score_reason"] == "unrecognised content type", result
Example #43
0
 def test_content_type_with_charset_still_recognized(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 1, result
     assert result['openness_score_reason'] == 'obtainable via web page', result
Example #44
0
 def test_extension_not_recognized(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource('http://site.com/filename.zar'), log)
     assert result['openness_score'] == 1, result
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'URL extension "zar" is an unknown format' in result['openness_score_reason'], result
Example #45
0
 def test_temporary_failure_increments_failure_count(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score_failure_count'] == 1, result
Example #46
0
 def test_by_format_field_excel(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format='Excel'), log)
     assert_equal(result['format'], 'XLS')
Example #47
0
 def test_url_with_content(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 3, result
Example #48
0
 def test_ontological_formats_score_four(self, url):
     data = self.fake_resource
     data["url"] = url
     result = resource_score(self.fake_context, data)
     assert result["openness_score"] == 4, result
     assert result["openness_score_reason"] == "ontologically represented", result
Example #49
0
 def test_available_but_not_open(self):
     set_sniffed_format('CSV')
     result = resource_score(self._test_resource(license_id=None), log)
     assert result['openness_score'] == 0, result
     assert_equal(result['format'], 'CSV')
     assert 'License not open' in result['openness_score_reason'], result
Example #50
0
 def test_machine_readable_formats_score_two(self, url):
     data = self.fake_resource
     data["url"] = url
     result = resource_score(self.fake_context, data)
     assert result["openness_score"] == 2, result
     assert result["openness_score_reason"] == "machine readable format", result
Example #51
0
 def test_url_with_unknown_content_type_scores_one(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 0, result
     assert result['openness_score_reason'] == 'unrecognised content type', result