Ejemplo n.º 1
0
 def test_format_field_not_recognized(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format='ZAR'), log)
     assert result['openness_score'] == 1, result
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result
     assert 'Format field "ZAR" does not correspond to a known format' in result['openness_score_reason'], result
Ejemplo n.º 2
0
 def test_no_format_clues(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format=None), log)
     assert result['openness_score'] == 1, result
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result
     assert 'Format field is blank' in result['openness_score_reason'], result
Ejemplo n.º 3
0
 def test_format_field_not_recognized(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format='ZAR'), log)
     assert result['openness_score'] == 1, result
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result
     assert 'Format field "ZAR" does not correspond to a known format' in result['openness_score_reason'], result
Ejemplo n.º 4
0
 def test_not_available_any_more(self):
     # A cache of the data still exists from the previous run, but this
     # time, the archiver found the file gave a 404.
     # The record of the previous (successful) run of QA.
     res = self._test_resource(license_id=None, format=None)
     qa = qa_model.QA.create(res.id)
     qa.format = 'CSV'
     model.Session.add(qa)
     model.Session.commit()
     # cache still exists from the previous run, but this time, the archiver
     # found the file gave a 404.
     archival = Archival.get_for_resource(res.id)
     archival.cache_filepath = __file__
     archival.status_id = Status.by_text('Download error')
     archival.reason = 'Server returned 404 error'
     archival.last_success = datetime.datetime(year=2008, month=10, day=1)
     archival.first_failure = datetime.datetime(year=2008, month=10, day=2)
     archival.failure_count = 1
     archival.is_broken = True
     result = resource_score(res)
     assert result['openness_score'] == 0, result
     assert_equal(result['format'], 'CSV')
     # in preference it should report that it is not available
     assert_equal(result['openness_score_reason'], 'File could not be downloaded. '
                                                   'Reason: Download error. Error details: Server returned 404 error.'
                                                   ' Attempted on 10/10/2008. This URL last worked on: 01/10/2008.')
Ejemplo n.º 5
0
 def test_url_with_temporary_fetch_error_not_scored(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 0, result
     assert result['openness_score_reason'] == 'Service unavailable' or \
         result['openness_score_reason'] == 'Server returned error: Service unavailable', result
Ejemplo n.º 6
0
 def test_url_with_permanent_fetch_error_scores_zero(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 0, result
     assert result['openness_score_reason'] == 'URL unobtainable' or \
         result['openness_score_reason'] == 'URL unobtainable: Server returned HTTP 404', result
Ejemplo n.º 7
0
 def test_machine_readable_formats_score_two(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 2, result
     assert result[
         'openness_score_reason'] == 'machine readable format', result
Ejemplo n.º 8
0
 def test_open_standard_formats_score_three(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 3, result
     assert result[
         'openness_score_reason'] == 'open and standardized format', result
Ejemplo n.º 9
0
 def test_url_with_temporary_fetch_error_not_scored(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 0, result
     assert result['openness_score_reason'] == 'Service unavailable' or \
         result['openness_score_reason'] == 'Server returned error: Service unavailable', result
Ejemplo n.º 10
0
 def test_not_available_any_more(self):
     # A cache of the data still exists from the previous run, but this
     # time, the archiver found the file gave a 404.
     # The record of the previous (successful) run of QA.
     res = self._test_resource(license_id=None, format=None)
     qa = qa_model.QA.create(res.id)
     qa.format = 'CSV'
     model.Session.add(qa)
     model.Session.commit()
     # cache still exists from the previous run, but this time, the archiver
     # found the file gave a 404.
     archival = Archival.get_for_resource(res.id)
     archival.cache_filepath = __file__
     archival.status_id = Status.by_text('Download error')
     archival.reason = 'Server returned 404 error'
     archival.last_success = datetime.datetime(year=2008, month=10, day=1)
     archival.first_failure = datetime.datetime(year=2008, month=10, day=2)
     archival.failure_count = 1
     archival.is_broken = True
     result = resource_score(res, log)
     assert result['openness_score'] == 0, result
     assert_equal(result['format'], 'CSV')
     # in preference it should report that it is not available
     assert_equal(
         result['openness_score_reason'],
         'File could not be downloaded. Reason: Download error. Error details: Server returned 404 error. Attempted on 10/10/2008. This URL last worked on: 01/10/2008.'
     )
Ejemplo n.º 11
0
 def test_ontological_formats_score_four(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 4, result
     assert result[
         'openness_score_reason'] == 'ontologically represented', result
Ejemplo n.º 12
0
 def test_url_pointing_to_html_page_scores_one(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 1, result
     assert result[
         'openness_score_reason'] == 'obtainable via web page', result
Ejemplo n.º 13
0
 def test_url_with_unknown_content_type_scores_one(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 0, result
     assert result[
         'openness_score_reason'] == 'unrecognised content type', result
Ejemplo n.º 14
0
 def test_no_format_clues(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format=None), log)
     assert result['openness_score'] == 1, result
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result
     assert 'Format field is blank' in result['openness_score_reason'], result
Ejemplo n.º 15
0
 def test_url_with_permanent_fetch_error_scores_zero(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 0, result
     assert result['openness_score_reason'] == 'URL unobtainable' or \
         result['openness_score_reason'] == 'URL unobtainable: Server returned HTTP 404', result
Ejemplo n.º 16
0
 def test_content_type_with_charset_still_recognized(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 1, result
     assert result[
         'openness_score_reason'] == 'obtainable via web page', result
Ejemplo n.º 17
0
 def test_by_sniff_csv(self):
     set_sniffed_format('CSV')
     result = resource_score(self._test_resource(), log)
     assert result['openness_score'] == 3, result
     assert 'Content of file appeared to be format "CSV"' in result['openness_score_reason'], result
     assert result['format'] == 'CSV', result
     assert result['archival_timestamp'] == TODAY_STR, result
Ejemplo n.º 18
0
 def test_by_sniff_csv(self):
     set_sniffed_format('CSV')
     result = resource_score(self._test_resource(), log)
     assert result['openness_score'] == 3, result
     assert 'Content of file appeared to be format "CSV"' in result['openness_score_reason'], result
     assert result['format'] == 'CSV', result
     assert result['archival_timestamp'] == TODAY_STR, result
Ejemplo n.º 19
0
 def test_by_extension(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource('http://site.com/filename.xls'), log)
     assert result['openness_score'] == 2, result
     assert result['archival_timestamp'] == TODAY_STR, result
     assert_equal(result['format'], 'XLS')
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'extension "xls" relates to format "XLS"' in result['openness_score_reason'], result
Ejemplo n.º 20
0
 def test_by_format_field(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format='XLS'), log)
     assert result['openness_score'] == 2, result
     assert_equal(result['format'], 'XLS')
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result
     assert 'Format field "XLS"' in result['openness_score_reason'], result
Ejemplo n.º 21
0
 def test_by_extension(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource('http://site.com/filename.xls'), log)
     assert result['openness_score'] == 2, result
     assert result['archival_timestamp'] == TODAY_STR, result
     assert_equal(result['format'], 'XLS')
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'extension "xls" relates to format "XLS"' in result['openness_score_reason'], result
Ejemplo n.º 22
0
 def test_extension_not_recognized(self):
     set_sniffed_format(None)
     result = resource_score(_test_resource('http://site.com/filename.zar'))
     assert result['openness_score'] == 1, result
     assert 'not recognized from its contents' in result[
         'openness_score_reason'], result
     assert 'URL extension "zar" is an unknown format' in result[
         'openness_score_reason'], result
Ejemplo n.º 23
0
 def test_by_format_field(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format='XLS'), log)
     assert result['openness_score'] == 2, result
     assert_equal(result['format'], 'XLS')
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL' in result['openness_score_reason'], result
     assert 'Format field "XLS"' in result['openness_score_reason'], result
Ejemplo n.º 24
0
 def test_archiver_ran_but_not_cached(self):
     result = resource_score(self._test_resource(cached=False, format=None), log)
     # falls back on previous QA data detailing failed attempts
     assert result['openness_score'] == 1, result
     assert result['format'] == None, result
     assert result['archival_timestamp'] == TODAY_STR, result
     assert 'This file had not been downloaded at the time of scoring it.' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL.' in result['openness_score_reason'], result
     assert 'Format field is blank.' in result['openness_score_reason'], result
     assert 'Could not understand the file format, therefore score is 1.' in result['openness_score_reason'], result
Ejemplo n.º 25
0
 def test_archiver_ran_but_not_cached(self):
     result = resource_score(self._test_resource(cached=False, format=None), log)
     # falls back on previous QA data detailing failed attempts
     assert result['openness_score'] == 1, result
     assert result['format'] == None, result
     assert result['archival_timestamp'] == TODAY_STR, result
     assert 'This file had not been downloaded at the time of scoring it.' in result['openness_score_reason'], result
     assert 'Could not determine a file extension in the URL.' in result['openness_score_reason'], result
     assert 'Format field is blank.' in result['openness_score_reason'], result
     assert 'Could not understand the file format, therefore score is 1.' in result['openness_score_reason'], result
Ejemplo n.º 26
0
 def test_not_available_and_not_open(self):
     res = self._test_resource(license_id=None, format=None, cached=False)
     archival = Archival.get_for_resource(res.id)
     archival.status_id = Status.by_text('Download error')
     archival.reason = 'Server returned 500 error'
     archival.last_success = None
     archival.first_failure = datetime.datetime(year=2008, month=10, day=1, hour=6, minute=30)
     archival.failure_count = 16
     archival.is_broken = True
     model.Session.commit()
     result = resource_score(res, log)
     assert result['openness_score'] == 0, result
     assert_equal(result['format'], None)
     # in preference it should report that it is not available
     assert_equal(result['openness_score_reason'], 'File could not be downloaded. Reason: Download error. Error details: Server returned 500 error. Attempted on 10/10/2008. Tried 16 times since 01/10/2008. This URL has not worked in the history of this tool.')
Ejemplo n.º 27
0
 def test_not_available_and_not_open(self):
     res = self._test_resource(license_id=None, format=None, cached=False)
     archival = Archival.get_for_resource(res.id)
     archival.status_id = Status.by_text('Download error')
     archival.reason = 'Server returned 500 error'
     archival.last_success = None
     archival.first_failure = datetime.datetime(year=2008, month=10, day=1, hour=6, minute=30)
     archival.failure_count = 16
     archival.is_broken = True
     model.Session.commit()
     result = resource_score(res, log)
     assert result['openness_score'] == 0, result
     assert_equal(result['format'], None)
     # in preference it should report that it is not available
     assert_equal(result['openness_score_reason'], 'File could not be downloaded. Reason: Download error. Error details: Server returned 500 error. Attempted on 10/10/2008. Tried 16 times since 01/10/2008. This URL has not worked in the history of this tool.')
Ejemplo n.º 28
0
 def test_url_pointing_to_html_page_scores_one(self, url):
     data = self.fake_resource
     data["url"] = url
     result = resource_score(self.fake_context, data)
     assert result["openness_score"] == 1, result
     assert result["openness_score_reason"] == "obtainable via web page", result
Ejemplo n.º 29
0
 def test_by_format_field_excel(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format='Excel'), log)
     assert_equal(result['format'], 'XLS')
Ejemplo n.º 30
0
 def test_by_format_field_excel(self):
     set_sniffed_format(None)
     if check_ckan_version(max_version='2.4.99'):
         raise SkipTest
     result = resource_score(_test_resource(format='Excel'))
     assert_equal(result['format'], 'XLS')
Ejemplo n.º 31
0
 def test_available_but_not_open(self):
     set_sniffed_format('CSV')
     result = resource_score(self._test_resource(license_id=None), log)
     assert result['openness_score'] == 0, result
     assert_equal(result['format'], 'CSV')
     assert 'License not open' in result['openness_score_reason'], result
Ejemplo n.º 32
0
 def test_url_pointing_to_html_page_scores_one(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 1, result
     assert result['openness_score_reason'] == 'obtainable via web page', result
Ejemplo n.º 33
0
 def test_url_with_permanent_fetch_error_scores_zero(self, url):
     data = self.fake_resource
     data["url"] = url
     result = resource_score(self.fake_context, data)
     assert result["openness_score"] == 0, result
     assert result["openness_score_reason"] == "URL unobtainable", result
Ejemplo n.º 34
0
 def test_url_with_temporary_fetch_error_not_scored(self, url):
     data = self.fake_resource
     data["url"] = url
     result = resource_score(self.fake_context, data)
     assert result["openness_score"] == 0, result
     assert result["openness_score_reason"] == "Service unavailable", result
Ejemplo n.º 35
0
 def test_open_standard_formats_score_three(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 3, result
     assert result['openness_score_reason'] == 'open and standardized format', result
Ejemplo n.º 36
0
 def test_machine_readable_formats_score_two(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 2, result
     assert result['openness_score_reason'] == 'machine readable format', result
Ejemplo n.º 37
0
 def test_temporary_failure_increments_failure_count(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score_failure_count'] == 1, result
Ejemplo n.º 38
0
 def test_ontological_formats_score_four(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 4, result
     assert result['openness_score_reason'] == 'ontologically represented', result
Ejemplo n.º 39
0
 def test_url_with_content(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 3, result
Ejemplo n.º 40
0
 def test_open_standard_formats_score_three(self, url):
     data = self.fake_resource
     data["url"] = url
     result = resource_score(self.fake_context, data)
     assert result["openness_score"] == 3, result
     assert result["openness_score_reason"] == "open and standardized format", result
Ejemplo n.º 41
0
 def test_by_format_field_excel(self):
     set_sniffed_format(None)
     if p.toolkit.check_ckan_version(max_version='2.4.99'):
         raise SkipTest
     result = resource_score(self._test_resource(format='Excel'))
     assert_equal(result['format'], 'XLS')
Ejemplo n.º 42
0
 def test_url_with_unknown_content_type_scores_one(self, url):
     data = self.fake_resource
     data["url"] = url
     result = resource_score(self.fake_context, data)
     assert result["openness_score"] == 0, result
     assert result["openness_score_reason"] == "unrecognised content type", result
Ejemplo n.º 43
0
 def test_content_type_with_charset_still_recognized(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 1, result
     assert result['openness_score_reason'] == 'obtainable via web page', result
Ejemplo n.º 44
0
 def test_extension_not_recognized(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource('http://site.com/filename.zar'), log)
     assert result['openness_score'] == 1, result
     assert 'not recognized from its contents' in result['openness_score_reason'], result
     assert 'URL extension "zar" is an unknown format' in result['openness_score_reason'], result
Ejemplo n.º 45
0
 def test_temporary_failure_increments_failure_count(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score_failure_count'] == 1, result
Ejemplo n.º 46
0
 def test_by_format_field_excel(self):
     set_sniffed_format(None)
     result = resource_score(self._test_resource(format='Excel'), log)
     assert_equal(result['format'], 'XLS')
Ejemplo n.º 47
0
 def test_url_with_content(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 3, result
Ejemplo n.º 48
0
 def test_ontological_formats_score_four(self, url):
     data = self.fake_resource
     data["url"] = url
     result = resource_score(self.fake_context, data)
     assert result["openness_score"] == 4, result
     assert result["openness_score_reason"] == "ontologically represented", result
Ejemplo n.º 49
0
 def test_available_but_not_open(self):
     set_sniffed_format('CSV')
     result = resource_score(self._test_resource(license_id=None), log)
     assert result['openness_score'] == 0, result
     assert_equal(result['format'], 'CSV')
     assert 'License not open' in result['openness_score_reason'], result
Ejemplo n.º 50
0
 def test_machine_readable_formats_score_two(self, url):
     data = self.fake_resource
     data["url"] = url
     result = resource_score(self.fake_context, data)
     assert result["openness_score"] == 2, result
     assert result["openness_score_reason"] == "machine readable format", result
Ejemplo n.º 51
0
 def test_url_with_unknown_content_type_scores_one(self, url):
     data = self.fake_resource
     data['url'] = url
     result = resource_score(self.fake_context, data)
     assert result['openness_score'] == 0, result
     assert result['openness_score_reason'] == 'unrecognised content type', result