Beispiel #1
0
        def generator(filenames: iter, count=0) -> [int, Resource]:
            passes_gate = self.resource_gate()
            for filename in filenames:
                if not isinstance(filename, str):
                    LOG.warning("Not a string: %s" % filename)
                    filename = str(filename)

                file = os.path.abspath(filename)
                if not os.path.exists(file):
                    LOG.warning("File does not exist: %s" % file)
                elif os.path.isdir(file):
                    for cr, rsc in generator(self.walk_directories(file), count=count):
                        yield cr, rsc
                        count = cr
                elif os.path.isfile(file):
                    if passes_gate(file):
                        count += 1
                        path = os.path.relpath(file, self.para.resource_dir)
                        uri = self.para.url_prefix + defaults.sanitize_url_path(path)
                        stat = os.stat(file)
                        resource = Resource(uri=uri, length=stat.st_size,
                                            lastmod=defaults.w3c_datetime(stat.st_ctime),
                                            md5=defaults.md5_for_file(file),
                                            mime_type=defaults.mime_type(file))
                        yield count, resource
                        self.observers_inform(self, ExecutorEvent.created_resource, resource=resource,
                                              count=count, file=file)
                    else:
                        self.observers_inform(self, ExecutorEvent.rejected_file, file=file)
                else:
                    LOG.warning("Not a regular file: %s" % file)
Beispiel #2
0
    def uri_from_path(self, path):
        """
        ``derived`` :samp:`Calculate the url of a path relative to {resource_dir}`

        :param str path: the path to calculate the url from
        :return: the url of the path relative to ``resource_dir``
        """
        rel_path = os.path.relpath(path, self.resource_dir)
        return self.url_prefix + defaults.sanitize_url_path(rel_path)
Beispiel #3
0
    def capabilitylist_url(self) -> str:
        """
        ``derived`` :samp:`The current capabilitylist url`

        The current capabilitylist url points to 'capabilitylist.xml' in the metadata directory.

        :return: current capabilitylist url
        """
        path = self.abs_metadata_path("capabilitylist.xml")
        rel_path = os.path.relpath(path, self.resource_dir)
        return self.url_prefix + defaults.sanitize_url_path(rel_path)
Beispiel #4
0
    def description_url(self):
        """
        ``derived`` :samp:`The current description url`

        The current description url either points to ``{server root}/.well-known/resourcesync``
        or to a file in the metadata directory.

        :return: current description url

        See also: :func:`has_wellknown_at_root`
        """
        if self.has_wellknown_at_root:
            r = urllib.parse.urlsplit(self.url_prefix)
            return urllib.parse.urlunsplit([r[0], r[1], WELL_KNOWN_URL, "", ""])
        else:
            path = self.abs_metadata_path(WELL_KNOWN_URL)
            rel_path = os.path.relpath(path, self.resource_dir)
            return self.url_prefix + defaults.sanitize_url_path(rel_path)
Beispiel #5
0
    def create_index(self, sitemap_data_iter: iter):
        if len(sitemap_data_iter) > 1:
            resourcelist_index = ResourceList()
            resourcelist_index.sitemapindex = True
            resourcelist_index.md_at = self.date_start_processing
            resourcelist_index.md_completed = self.date_end_processing
            index_path = self.para.abs_metadata_path("resourcelist-index.xml")
            rel_index_path = os.path.relpath(index_path,
                                             self.para.resource_dir)
            index_url = self.para.url_prefix + defaults.sanitize_url_path(
                rel_index_path)
            resourcelist_index.link_set(rel="up",
                                        href=self.para.capabilitylist_url())

            for sitemap_data in sitemap_data_iter:
                resourcelist_index.add(
                    Resource(uri=sitemap_data.uri,
                             md_at=sitemap_data.doc_start,
                             md_completed=sitemap_data.doc_end))
                if sitemap_data.document_saved:
                    self.update_rel_index(index_url, sitemap_data.path)

            self.finish_sitemap(-1, resourcelist_index)
Beispiel #6
0
 def test_sanitize_url_path(self):
     self.assertEquals("foo/bar/baz.txt",
                       defaults.sanitize_url_path("foo/bar/baz.txt"))
     self.assertEquals("foo/bar/baz.txt",
                       defaults.sanitize_url_path("foo\\bar\\baz.txt"))