def generator(filenames: iter, count=0) -> [int, Resource]: passes_gate = self.resource_gate() for filename in filenames: if not isinstance(filename, str): LOG.warning("Not a string: %s" % filename) filename = str(filename) file = os.path.abspath(filename) if not os.path.exists(file): LOG.warning("File does not exist: %s" % file) elif os.path.isdir(file): for cr, rsc in generator(self.walk_directories(file), count=count): yield cr, rsc count = cr elif os.path.isfile(file): if passes_gate(file): count += 1 path = os.path.relpath(file, self.para.resource_dir) uri = self.para.url_prefix + defaults.sanitize_url_path(path) stat = os.stat(file) resource = Resource(uri=uri, length=stat.st_size, lastmod=defaults.w3c_datetime(stat.st_ctime), md5=defaults.md5_for_file(file), mime_type=defaults.mime_type(file)) yield count, resource self.observers_inform(self, ExecutorEvent.created_resource, resource=resource, count=count, file=file) else: self.observers_inform(self, ExecutorEvent.rejected_file, file=file) else: LOG.warning("Not a regular file: %s" % file)
def uri_from_path(self, path): """ ``derived`` :samp:`Calculate the url of a path relative to {resource_dir}` :param str path: the path to calculate the url from :return: the url of the path relative to ``resource_dir`` """ rel_path = os.path.relpath(path, self.resource_dir) return self.url_prefix + defaults.sanitize_url_path(rel_path)
def capabilitylist_url(self) -> str: """ ``derived`` :samp:`The current capabilitylist url` The current capabilitylist url points to 'capabilitylist.xml' in the metadata directory. :return: current capabilitylist url """ path = self.abs_metadata_path("capabilitylist.xml") rel_path = os.path.relpath(path, self.resource_dir) return self.url_prefix + defaults.sanitize_url_path(rel_path)
def description_url(self): """ ``derived`` :samp:`The current description url` The current description url either points to ``{server root}/.well-known/resourcesync`` or to a file in the metadata directory. :return: current description url See also: :func:`has_wellknown_at_root` """ if self.has_wellknown_at_root: r = urllib.parse.urlsplit(self.url_prefix) return urllib.parse.urlunsplit([r[0], r[1], WELL_KNOWN_URL, "", ""]) else: path = self.abs_metadata_path(WELL_KNOWN_URL) rel_path = os.path.relpath(path, self.resource_dir) return self.url_prefix + defaults.sanitize_url_path(rel_path)
def create_index(self, sitemap_data_iter: iter): if len(sitemap_data_iter) > 1: resourcelist_index = ResourceList() resourcelist_index.sitemapindex = True resourcelist_index.md_at = self.date_start_processing resourcelist_index.md_completed = self.date_end_processing index_path = self.para.abs_metadata_path("resourcelist-index.xml") rel_index_path = os.path.relpath(index_path, self.para.resource_dir) index_url = self.para.url_prefix + defaults.sanitize_url_path( rel_index_path) resourcelist_index.link_set(rel="up", href=self.para.capabilitylist_url()) for sitemap_data in sitemap_data_iter: resourcelist_index.add( Resource(uri=sitemap_data.uri, md_at=sitemap_data.doc_start, md_completed=sitemap_data.doc_end)) if sitemap_data.document_saved: self.update_rel_index(index_url, sitemap_data.path) self.finish_sitemap(-1, resourcelist_index)
def test_sanitize_url_path(self): self.assertEquals("foo/bar/baz.txt", defaults.sanitize_url_path("foo/bar/baz.txt")) self.assertEquals("foo/bar/baz.txt", defaults.sanitize_url_path("foo\\bar\\baz.txt"))