Exemple #1
0
    def getMetadata(self, key,
                    removeallentities=False,
                    doreplacements=True):
        value = None
        if not self.isValidMetaEntry(key):
            return value

        if self.isList(key):
            # join_string = self.getConfig("join_string_"+key,u", ").replace(SPACE_REPLACE,' ')
            # value = join_string.join(self.getList(key, removeallentities, doreplacements=True))
            value = self.join_list(key,self.getList(key, removeallentities, doreplacements=True))
            if doreplacements:
                value = self.doReplacements(value,key+"_LIST")
            return value
        elif self.metadata.has_key(key):
            value = self.metadata[key]
            if value:
                if key == "numWords":
                    value = commaGroups(value)
                if key == "numChapters":
                    value = commaGroups("%d"%value)
                if key in ("dateCreated"):
                    value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d %H:%M:%S"))
                if key in ("datePublished","dateUpdated"):
                    value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d"))

            if doreplacements:
                value=self.doReplacements(value,key)
            if removeallentities and value != None:
                return removeAllEntities(value)
            else:
                return value
        else: #if self.getConfig("default_value_"+key):
            return self.getConfig("default_value_"+key)
Exemple #2
0
    def getMetadata(self, key, removeallentities=False, doreplacements=True):
        value = None
        if not self.isValidMetaEntry(key):
            return value

        # check for a cached value to speed processing
        if (
            key in self.processed_metadata_cache
            and (removeallentities, doreplacements) in self.processed_metadata_cache[key]
        ):
            return self.processed_metadata_cache[key][(removeallentities, doreplacements)]
        elif self.isList(key):
            # join_string = self.getConfig("join_string_"+key,u", ").replace(SPACE_REPLACE,' ')
            # value = join_string.join(self.getList(key, removeallentities, doreplacements=True))
            value = self.join_list(key, self.getList(key, removeallentities, doreplacements=True))
            if doreplacements:
                value = self.doReplacements(value, key + "_LIST")
        elif self.metadata.has_key(key):
            value = self.metadata[key]
            if value:
                if key in ("numWords", "numChapters"):
                    value = commaGroups(unicode(value))
                if key in ("dateCreated"):
                    value = value.strftime(self.getConfig(key + "_format", "%Y-%m-%d %H:%M:%S"))
                if key in ("datePublished", "dateUpdated"):
                    value = value.strftime(self.getConfig(key + "_format", "%Y-%m-%d"))
                if isinstance(value, (datetime.date, datetime.datetime, datetime.time)) and self.hasConfig(
                    key + "_format"
                ):
                    # logger.info("DATE: %s"%key)
                    value = value.strftime(self.getConfig(key + "_format"))

                if (
                    key == "title"
                    and (self.chapter_first or self.chapter_last)
                    and self.getConfig("title_chapter_range_pattern")
                ):
                    first = self.chapter_first or "1"
                    last = self.chapter_last or self.getMetadata("numChapters")
                    templ = string.Template(self.getConfig("title_chapter_range_pattern"))
                    value = templ.substitute({"title": value, "first": commaGroups(first), "last": commaGroups(last)})

            if doreplacements:
                value = self.doReplacements(value, key)
            if removeallentities and value != None:
                value = removeAllEntities(value)
        else:  # if self.getConfig("default_value_"+key):
            value = self.getConfig("default_value_" + key)

        # save a cached value to speed processing
        if key not in self.processed_metadata_cache:
            self.processed_metadata_cache[key] = {}
        self.processed_metadata_cache[key][(removeallentities, doreplacements)] = value

        return value
Exemple #3
0
    def getMetadata(self, key,
                    removeallentities=False,
                    doreplacements=True):
        # check for a cached value to speed processing
        if key in self.processed_metadata_cache \
                and (removeallentities,doreplacements) in self.processed_metadata_cache[key]:
            return self.processed_metadata_cache[key][(removeallentities,doreplacements)]

        value = None
        if not self.isValidMetaEntry(key):
            pass # cache not valid entry, too.
#            return value

        elif self.isList(key):
            # join_string = self.getConfig("join_string_"+key,u", ").replace(SPACE_REPLACE,' ')
            # value = join_string.join(self.getList(key, removeallentities, doreplacements=True))
            value = self.join_list(key,self.getList(key, removeallentities, doreplacements=True))
            if doreplacements:
                value = self.doReplacements(value,key+"_LIST")
        elif self.metadata.has_key(key):
            value = self.metadata[key]
            if value:
                if key in ("numWords","numChapters"):
                    value = commaGroups(unicode(value))
                if key in ("dateCreated"):
                    value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d %H:%M:%S"))
                if key in ("datePublished","dateUpdated"):
                    value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d"))
                if isinstance(value, (datetime.date, datetime.datetime, datetime.time)) and self.hasConfig(key+"_format"):
                    # logger.info("DATE: %s"%key)
                    value = value.strftime(self.getConfig(key+"_format"))

                if key == "title" and (self.chapter_first or self.chapter_last) and self.getConfig("title_chapter_range_pattern"):
                    first = self.chapter_first or "1"
                    last = self.chapter_last or self.getMetadata("numChapters")
                    templ = string.Template(self.getConfig("title_chapter_range_pattern"))
                    value = templ.substitute({'title':value,
                                              'first':commaGroups(first),
                                              'last':commaGroups(last)})

            if doreplacements:
                value=self.doReplacements(value,key)
            if removeallentities and value != None:
                value = removeAllEntities(value)
        else: #if self.getConfig("default_value_"+key):
            value = self.getConfig("default_value_"+key)

        # save a cached value to speed processing
        if key not in self.processed_metadata_cache:
            self.processed_metadata_cache[key] = {}
        self.processed_metadata_cache[key][(removeallentities,doreplacements)] = value

        return value
Exemple #4
0
 def get_filename_safe_metadata(self,pattern=None):
     origvalues = self.getAllMetadata()
     values={}
     if not pattern:
         pattern = re_compile(self.getConfig("output_filename_safepattern",
                                             r"(^\.|/\.|[^a-zA-Z0-9_\. \[\]\(\)&'-]+)"),
                              "output_filename_safepattern")
     for k in origvalues.keys():
         if k == 'formatext': # don't do file extension--we set it anyway.
             values[k]=self.getMetadata(k)
         else:
             values[k]=re.sub(pattern,'_', removeAllEntities(self.getMetadata(k)))
     return values
Exemple #5
0
 def get_filename_safe_metadata(self,pattern=None):
     origvalues = self.getAllMetadata()
     values={}
     if not pattern:
         pattern = re_compile(self.getConfig("output_filename_safepattern",
                                             r"(^\.|/\.|[^a-zA-Z0-9_\. \[\]\(\)&'-]+)"),
                              "output_filename_safepattern")
     for k in origvalues.keys():
         if k == 'formatext': # don't do file extension--we set it anyway.
             values[k]=self.getMetadata(k)
         else:
             values[k]=re.sub(pattern,'_', removeAllEntities(self.getMetadata(k)))
     return values
Exemple #6
0
    def formatFileName(self,template,allowunsafefilename=True):
        values = origvalues = self.getAllMetadata()
        # fall back default:
        if not template:
            template="${title}-${siteabbrev}_${storyId}${formatext}"

        if not allowunsafefilename:
            values={}
            pattern = re_compile(self.getConfig("output_filename_safepattern",r"[^a-zA-Z0-9_\. \[\]\(\)&'-]+"),"output_filename_safepattern")
            for k in origvalues.keys():
                values[k]=re.sub(pattern,'_', removeAllEntities(self.getMetadata(k)))

        return string.Template(template).substitute(values).encode('utf8')
Exemple #7
0
    def getMetadata(self, key, removeallentities=False, doreplacements=True):
        value = None
        if not self.isValidMetaEntry(key):
            return value

        if self.isList(key):
            # join_string = self.getConfig("join_string_"+key,u", ").replace(SPACE_REPLACE,' ')
            # value = join_string.join(self.getList(key, removeallentities, doreplacements=True))
            value = self.join_list(
                key, self.getList(key, removeallentities, doreplacements=True))
            if doreplacements:
                value = self.doReplacements(value, key + "_LIST")
            return value
        elif self.metadata.has_key(key):
            value = self.metadata[key]
            if value:
                if key == "numWords":
                    value = commaGroups(value)
                if key == "numChapters":
                    value = commaGroups("%d" % value)
                if key in ("dateCreated"):
                    value = value.strftime(
                        self.getConfig(key + "_format", "%Y-%m-%d %H:%M:%S"))
                if key in ("datePublished", "dateUpdated"):
                    value = value.strftime(
                        self.getConfig(key + "_format", "%Y-%m-%d"))

                if key == "title" and (
                        self.chapter_first or self.chapter_last
                ) and self.getConfig("title_chapter_range_pattern"):
                    first = self.chapter_first or "1"
                    last = self.chapter_last or self.getMetadata("numChapters")
                    templ = string.Template(
                        self.getConfig("title_chapter_range_pattern"))
                    value = templ.substitute({
                        'title': value,
                        'first': commaGroups(first),
                        'last': commaGroups(last)
                    })

            if doreplacements:
                value = self.doReplacements(value, key)
            if removeallentities and value != None:
                return removeAllEntities(value)
            else:
                return value
        else:  #if self.getConfig("default_value_"+key):
            return self.getConfig("default_value_" + key)
Exemple #8
0
    def formatFileName(self,template,allowunsafefilename=True):
        values = origvalues = self.getAllMetadata()
        # fall back default:
        if not template:
            template="${title}-${siteabbrev}_${storyId}${formatext}"

        if not allowunsafefilename:
            values={}
            pattern = re_compile(self.getConfig("output_filename_safepattern",r"(^\.|/\.|[^a-zA-Z0-9_\. \[\]\(\)&'-]+)"),"output_filename_safepattern")
            for k in origvalues.keys():
                if k == 'formatext': # don't do file extension--we set it anyway.
                    values[k]=self.getMetadata(k)
                else:
                    values[k]=re.sub(pattern,'_', removeAllEntities(self.getMetadata(k)))

        return string.Template(template).substitute(values).encode('utf8')
Exemple #9
0
    def formatFileName(self, template, allowunsafefilename=True):
        values = origvalues = self.getAllMetadata()
        # fall back default:
        if not template:
            template = "${title}-${siteabbrev}_${storyId}${formatext}"

        if not allowunsafefilename:
            values = {}
            pattern = re_compile(
                self.getConfig("output_filename_safepattern", r"(^\.|/\.|[^a-zA-Z0-9_\. \[\]\(\)&'-]+)"),
                "output_filename_safepattern",
            )
            for k in origvalues.keys():
                if k == "formatext":  # don't do file extension--we set it anyway.
                    values[k] = self.getMetadata(k)
                else:
                    values[k] = re.sub(pattern, "_", removeAllEntities(self.getMetadata(k)))

        return string.Template(template).substitute(values).encode("utf8")
Exemple #10
0
    def getMetadata(self, key,
                    removeallentities=False,
                    doreplacements=True):
        value = None
        if not self.isValidMetaEntry(key):
            return value

        if self.isList(key):
            # join_string = self.getConfig("join_string_"+key,u", ").replace(SPACE_REPLACE,' ')
            # value = join_string.join(self.getList(key, removeallentities, doreplacements=True))
            value = self.join_list(key,self.getList(key, removeallentities, doreplacements=True))
            if doreplacements:
                value = self.doReplacements(value,key+"_LIST")
            return value
        elif self.metadata.has_key(key):
            value = self.metadata[key]
            if value:
                if key == "numWords":
                    value = commaGroups(value)
                if key == "numChapters":
                    value = commaGroups("%d"%value)
                if key in ("dateCreated"):
                    value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d %H:%M:%S"))
                if key in ("datePublished","dateUpdated"):
                    value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d"))

                if key == "title" and (self.chapter_first or self.chapter_last) and self.getConfig("title_chapter_range_pattern"):
                    first = self.chapter_first or "1"
                    last = self.chapter_last or self.getMetadata("numChapters")
                    templ = string.Template(self.getConfig("title_chapter_range_pattern"))
                    value = templ.substitute({'title':value,
                                              'first':commaGroups(first),
                                              'last':commaGroups(last)})

            if doreplacements:
                value=self.doReplacements(value,key)
            if removeallentities and value != None:
                return removeAllEntities(value)
            else:
                return value
        else: #if self.getConfig("default_value_"+key):
            return self.getConfig("default_value_"+key)
Exemple #11
0
    def getAllMetadata(self,
                       removeallentities=False,
                       doreplacements=True,
                       keeplists=False):
        '''
        All single value *and* list value metadata as strings (unless
        keeplists=True, then keep lists).
        '''
        allmetadata = {}

        # special handling for authors/authorUrls
        linkhtml = "<a class='%slink' href='%s'>%s</a>"
        if self.isList(
                'author'
        ):  # more than one author, assume multiple authorUrl too.
            htmllist = []
            for i, v in enumerate(self.getList('author')):
                if len(self.getList('authorUrl')) <= i:
                    aurl = None
                else:
                    aurl = self.getList('authorUrl')[i]
                auth = v
                # make sure doreplacements & removeallentities are honored.
                if doreplacements:
                    aurl = self.doReplacements(aurl, 'authorUrl')
                    auth = self.doReplacements(auth, 'author')
                if removeallentities:
                    aurl = removeAllEntities(aurl)
                    auth = removeAllEntities(auth)

                htmllist.append(linkhtml % ('author', aurl, auth))
            # join_string = self.getConfig("join_string_authorHTML",u", ").replace(SPACE_REPLACE,' ')
            self.setMetadata(
                'authorHTML', self.join_list("join_string_authorHTML",
                                             htmllist))
        else:
            self.setMetadata(
                'authorHTML',
                linkhtml % ('author',
                            self.getMetadata('authorUrl', removeallentities,
                                             doreplacements),
                            self.getMetadata('author', removeallentities,
                                             doreplacements)))

        self.extendList("extratags", self.getConfigList("extratags"))

        if self.getMetadataRaw('seriesUrl'):
            self.setMetadata(
                'seriesHTML',
                linkhtml % ('series',
                            self.getMetadata('seriesUrl', removeallentities,
                                             doreplacements),
                            self.getMetadata('series', removeallentities,
                                             doreplacements)))
        elif self.getMetadataRaw('series'):
            self.setMetadata('seriesHTML', self.getMetadataRaw('series'))

        # logger.debug("make_linkhtml_entries:%s"%self.getConfig('make_linkhtml_entries'))
        for k in self.getConfigList('make_linkhtml_entries'):
            # Assuming list, because it has to be site specific and
            # they are all lists.  Bail if kUrl list not the same
            # length.
            # logger.debug("\nk:%s\nlist:%s\nlistURL:%s"%(k,self.getList(k),self.getList(k+'Url')))
            if len(self.getList(k + 'Url')) != len(self.getList(k)):
                continue
            htmllist = []
            for i, v in enumerate(self.getList(k)):
                url = self.getList(k + 'Url')[i]
                # make sure doreplacements & removeallentities are honored.
                if doreplacements:
                    url = self.doReplacements(url, k + 'Url')
                    v = self.doReplacements(v, k)
                if removeallentities:
                    url = removeAllEntities(url)
                    v = removeAllEntities(v)

                htmllist.append(linkhtml % (k, url, v))
            # join_string = self.getConfig("join_string_"+k+"HTML",u", ").replace(SPACE_REPLACE,' ')
            self.setMetadata(
                k + 'HTML',
                self.join_list("join_string_" + k + "HTML", htmllist))

        for k in self.getValidMetaList():
            if self.isList(k) and keeplists:
                allmetadata[k] = self.getList(k, removeallentities,
                                              doreplacements)
            else:
                allmetadata[k] = self.getMetadata(k, removeallentities,
                                                  doreplacements)

        return allmetadata
Exemple #12
0
    def getAllMetadata(self, removeallentities=False, doreplacements=True, keeplists=False):
        """
        All single value *and* list value metadata as strings (unless
        keeplists=True, then keep lists).
        """
        allmetadata = {}

        # special handling for authors/authorUrls
        linkhtml = "<a class='%slink' href='%s'>%s</a>"
        if self.isList("author"):  # more than one author, assume multiple authorUrl too.
            htmllist = []
            for i, v in enumerate(self.getList("author")):
                if len(self.getList("authorUrl")) <= i:
                    aurl = None
                else:
                    aurl = self.getList("authorUrl")[i]
                auth = v
                # make sure doreplacements & removeallentities are honored.
                if doreplacements:
                    aurl = self.doReplacements(aurl, "authorUrl")
                    auth = self.doReplacements(auth, "author")
                if removeallentities:
                    aurl = removeAllEntities(aurl)
                    auth = removeAllEntities(auth)

                htmllist.append(linkhtml % ("author", aurl, auth))
            # join_string = self.getConfig("join_string_authorHTML",u", ").replace(SPACE_REPLACE,' ')
            self.setMetadata("authorHTML", self.join_list("join_string_authorHTML", htmllist))
        else:
            self.setMetadata(
                "authorHTML",
                linkhtml
                % (
                    "author",
                    self.getMetadata("authorUrl", removeallentities, doreplacements),
                    self.getMetadata("author", removeallentities, doreplacements),
                ),
            )

        self.extendList("extratags", self.getConfigList("extratags"))

        if self.getMetadataRaw("seriesUrl"):
            self.setMetadata(
                "seriesHTML",
                linkhtml
                % (
                    "series",
                    self.getMetadata("seriesUrl", removeallentities, doreplacements),
                    self.getMetadata("series", removeallentities, doreplacements),
                ),
            )
        elif self.getMetadataRaw("series"):
            self.setMetadata("seriesHTML", self.getMetadataRaw("series"))

        # logger.debug("make_linkhtml_entries:%s"%self.getConfig('make_linkhtml_entries'))
        for k in self.getConfigList("make_linkhtml_entries"):
            # Assuming list, because it has to be site specific and
            # they are all lists.  Bail if kUrl list not the same
            # length.
            # logger.debug("\nk:%s\nlist:%s\nlistURL:%s"%(k,self.getList(k),self.getList(k+'Url')))
            if len(self.getList(k + "Url")) != len(self.getList(k)):
                continue
            htmllist = []
            for i, v in enumerate(self.getList(k)):
                url = self.getList(k + "Url")[i]
                # make sure doreplacements & removeallentities are honored.
                if doreplacements:
                    url = self.doReplacements(url, k + "Url")
                    v = self.doReplacements(v, k)
                if removeallentities:
                    url = removeAllEntities(url)
                    v = removeAllEntities(v)

                htmllist.append(linkhtml % (k, url, v))
            # join_string = self.getConfig("join_string_"+k+"HTML",u", ").replace(SPACE_REPLACE,' ')
            self.setMetadata(k + "HTML", self.join_list("join_string_" + k + "HTML", htmllist))

        for k in self.getValidMetaList():
            if self.isList(k) and keeplists:
                allmetadata[k] = self.getList(k, removeallentities, doreplacements)
            else:
                allmetadata[k] = self.getMetadata(k, removeallentities, doreplacements)

        return allmetadata