def test_first_file(self): fd = FilenameDeduplicator() assert_equals(fd.deduplicate('index.html'), 'index.html')
def test_reset(self): fd = FilenameDeduplicator() assert_equals(fd.deduplicate('index.html'), 'index.html') fd.reset() assert_equals(fd.deduplicate('index.html'), 'index.html')
def test_unicode_filename(self): fd = FilenameDeduplicator() assert_equals(fd.deduplicate(u'\xa3'), u'\xa3') assert_equals(fd.deduplicate(u'\xa3'), u'\xa31')
def test_second_file(self): fd = FilenameDeduplicator() assert_equals(fd.deduplicate('index.html'), 'index.html') assert_equals(fd.deduplicate('robots.txt'), 'robots.txt')
def test_no_extension(self): fd = FilenameDeduplicator() assert_equals(fd.deduplicate('index'), 'index') assert_equals(fd.deduplicate('index'), 'index1')
fd = FilenameDeduplicator() for res in pkg['resources']: archival = Archival.get_for_resource(res['id']) if archival and archival.cache_filepath: # We have archived it, and we have a path. _, resource_id, filename = archival.cache_filepath.rsplit('/', 2) cache_filepath = archival.cache_filepath else: # Try and work out the filename from the URL. try: _, filename = res['url'].rsplit('/', 1) except ValueError: filename = res['id'] cache_filepath = '' filename = fd.deduplicate(filename) resource_json = { 'url': res['url'], 'path': u'data/{0}'.format(filename), 'cache_filepath': cache_filepath, 'description': res['description'] } resource_json['has_data'], resource_json['detected_format'] = \ resource_has_data(res) # If we have archived the data, but the link was broken # then record the reason. if archival and archival.is_broken: resource_json['reason'] = archival.reason format = datapackage_format(res['format'])
fd = FilenameDeduplicator() for res in pkg["resources"]: archival = Archival.get_for_resource(res["id"]) if archival and archival.cache_filepath: # We have archived it, and we have a path. _, resource_id, filename = archival.cache_filepath.rsplit("/", 2) cache_filepath = archival.cache_filepath else: # Try and work out the filename from the URL. try: _, filename = res["url"].rsplit("/", 1) except ValueError: filename = res["id"] cache_filepath = "" filename = fd.deduplicate(filename) resource_json = { "url": res["url"], "path": u"data/{0}".format(filename), "cache_filepath": cache_filepath, "description": res["description"], } resource_json["has_data"], resource_json["detected_format"] = resource_has_data(res) # If we have archived the data, but the link was broken # then record the reason. if archival and archival.is_broken: resource_json["reason"] = archival.reason format = datapackage_format(res["format"]) if format: