def getMetadata(self, key, removeallentities=False, doreplacements=True): value = None if not self.isValidMetaEntry(key): return value if self.isList(key): # join_string = self.getConfig("join_string_"+key,u", ").replace(SPACE_REPLACE,' ') # value = join_string.join(self.getList(key, removeallentities, doreplacements=True)) value = self.join_list(key,self.getList(key, removeallentities, doreplacements=True)) if doreplacements: value = self.doReplacements(value,key+"_LIST") return value elif self.metadata.has_key(key): value = self.metadata[key] if value: if key == "numWords": value = commaGroups(value) if key == "numChapters": value = commaGroups("%d"%value) if key in ("dateCreated"): value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d %H:%M:%S")) if key in ("datePublished","dateUpdated"): value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d")) if doreplacements: value=self.doReplacements(value,key) if removeallentities and value != None: return removeAllEntities(value) else: return value else: #if self.getConfig("default_value_"+key): return self.getConfig("default_value_"+key)
def getMetadata(self, key, removeallentities=False, doreplacements=True): value = None if not self.isValidMetaEntry(key): return value # check for a cached value to speed processing if ( key in self.processed_metadata_cache and (removeallentities, doreplacements) in self.processed_metadata_cache[key] ): return self.processed_metadata_cache[key][(removeallentities, doreplacements)] elif self.isList(key): # join_string = self.getConfig("join_string_"+key,u", ").replace(SPACE_REPLACE,' ') # value = join_string.join(self.getList(key, removeallentities, doreplacements=True)) value = self.join_list(key, self.getList(key, removeallentities, doreplacements=True)) if doreplacements: value = self.doReplacements(value, key + "_LIST") elif self.metadata.has_key(key): value = self.metadata[key] if value: if key in ("numWords", "numChapters"): value = commaGroups(unicode(value)) if key in ("dateCreated"): value = value.strftime(self.getConfig(key + "_format", "%Y-%m-%d %H:%M:%S")) if key in ("datePublished", "dateUpdated"): value = value.strftime(self.getConfig(key + "_format", "%Y-%m-%d")) if isinstance(value, (datetime.date, datetime.datetime, datetime.time)) and self.hasConfig( key + "_format" ): # logger.info("DATE: %s"%key) value = value.strftime(self.getConfig(key + "_format")) if ( key == "title" and (self.chapter_first or self.chapter_last) and self.getConfig("title_chapter_range_pattern") ): first = self.chapter_first or "1" last = self.chapter_last or self.getMetadata("numChapters") templ = string.Template(self.getConfig("title_chapter_range_pattern")) value = templ.substitute({"title": value, "first": commaGroups(first), "last": commaGroups(last)}) if doreplacements: value = self.doReplacements(value, key) if removeallentities and value != None: value = removeAllEntities(value) else: # if self.getConfig("default_value_"+key): value = self.getConfig("default_value_" + key) # save a cached value to speed processing if key not in self.processed_metadata_cache: self.processed_metadata_cache[key] = {} self.processed_metadata_cache[key][(removeallentities, doreplacements)] = value return value
def getMetadata(self, key, removeallentities=False, doreplacements=True): # check for a cached value to speed processing if key in self.processed_metadata_cache \ and (removeallentities,doreplacements) in self.processed_metadata_cache[key]: return self.processed_metadata_cache[key][(removeallentities,doreplacements)] value = None if not self.isValidMetaEntry(key): pass # cache not valid entry, too. # return value elif self.isList(key): # join_string = self.getConfig("join_string_"+key,u", ").replace(SPACE_REPLACE,' ') # value = join_string.join(self.getList(key, removeallentities, doreplacements=True)) value = self.join_list(key,self.getList(key, removeallentities, doreplacements=True)) if doreplacements: value = self.doReplacements(value,key+"_LIST") elif self.metadata.has_key(key): value = self.metadata[key] if value: if key in ("numWords","numChapters"): value = commaGroups(unicode(value)) if key in ("dateCreated"): value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d %H:%M:%S")) if key in ("datePublished","dateUpdated"): value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d")) if isinstance(value, (datetime.date, datetime.datetime, datetime.time)) and self.hasConfig(key+"_format"): # logger.info("DATE: %s"%key) value = value.strftime(self.getConfig(key+"_format")) if key == "title" and (self.chapter_first or self.chapter_last) and self.getConfig("title_chapter_range_pattern"): first = self.chapter_first or "1" last = self.chapter_last or self.getMetadata("numChapters") templ = string.Template(self.getConfig("title_chapter_range_pattern")) value = templ.substitute({'title':value, 'first':commaGroups(first), 'last':commaGroups(last)}) if doreplacements: value=self.doReplacements(value,key) if removeallentities and value != None: value = removeAllEntities(value) else: #if self.getConfig("default_value_"+key): value = self.getConfig("default_value_"+key) # save a cached value to speed processing if key not in self.processed_metadata_cache: self.processed_metadata_cache[key] = {} self.processed_metadata_cache[key][(removeallentities,doreplacements)] = value return value
def get_filename_safe_metadata(self,pattern=None): origvalues = self.getAllMetadata() values={} if not pattern: pattern = re_compile(self.getConfig("output_filename_safepattern", r"(^\.|/\.|[^a-zA-Z0-9_\. \[\]\(\)&'-]+)"), "output_filename_safepattern") for k in origvalues.keys(): if k == 'formatext': # don't do file extension--we set it anyway. values[k]=self.getMetadata(k) else: values[k]=re.sub(pattern,'_', removeAllEntities(self.getMetadata(k))) return values
def formatFileName(self,template,allowunsafefilename=True): values = origvalues = self.getAllMetadata() # fall back default: if not template: template="${title}-${siteabbrev}_${storyId}${formatext}" if not allowunsafefilename: values={} pattern = re_compile(self.getConfig("output_filename_safepattern",r"[^a-zA-Z0-9_\. \[\]\(\)&'-]+"),"output_filename_safepattern") for k in origvalues.keys(): values[k]=re.sub(pattern,'_', removeAllEntities(self.getMetadata(k))) return string.Template(template).substitute(values).encode('utf8')
def getMetadata(self, key, removeallentities=False, doreplacements=True): value = None if not self.isValidMetaEntry(key): return value if self.isList(key): # join_string = self.getConfig("join_string_"+key,u", ").replace(SPACE_REPLACE,' ') # value = join_string.join(self.getList(key, removeallentities, doreplacements=True)) value = self.join_list( key, self.getList(key, removeallentities, doreplacements=True)) if doreplacements: value = self.doReplacements(value, key + "_LIST") return value elif self.metadata.has_key(key): value = self.metadata[key] if value: if key == "numWords": value = commaGroups(value) if key == "numChapters": value = commaGroups("%d" % value) if key in ("dateCreated"): value = value.strftime( self.getConfig(key + "_format", "%Y-%m-%d %H:%M:%S")) if key in ("datePublished", "dateUpdated"): value = value.strftime( self.getConfig(key + "_format", "%Y-%m-%d")) if key == "title" and ( self.chapter_first or self.chapter_last ) and self.getConfig("title_chapter_range_pattern"): first = self.chapter_first or "1" last = self.chapter_last or self.getMetadata("numChapters") templ = string.Template( self.getConfig("title_chapter_range_pattern")) value = templ.substitute({ 'title': value, 'first': commaGroups(first), 'last': commaGroups(last) }) if doreplacements: value = self.doReplacements(value, key) if removeallentities and value != None: return removeAllEntities(value) else: return value else: #if self.getConfig("default_value_"+key): return self.getConfig("default_value_" + key)
def formatFileName(self,template,allowunsafefilename=True): values = origvalues = self.getAllMetadata() # fall back default: if not template: template="${title}-${siteabbrev}_${storyId}${formatext}" if not allowunsafefilename: values={} pattern = re_compile(self.getConfig("output_filename_safepattern",r"(^\.|/\.|[^a-zA-Z0-9_\. \[\]\(\)&'-]+)"),"output_filename_safepattern") for k in origvalues.keys(): if k == 'formatext': # don't do file extension--we set it anyway. values[k]=self.getMetadata(k) else: values[k]=re.sub(pattern,'_', removeAllEntities(self.getMetadata(k))) return string.Template(template).substitute(values).encode('utf8')
def formatFileName(self, template, allowunsafefilename=True): values = origvalues = self.getAllMetadata() # fall back default: if not template: template = "${title}-${siteabbrev}_${storyId}${formatext}" if not allowunsafefilename: values = {} pattern = re_compile( self.getConfig("output_filename_safepattern", r"(^\.|/\.|[^a-zA-Z0-9_\. \[\]\(\)&'-]+)"), "output_filename_safepattern", ) for k in origvalues.keys(): if k == "formatext": # don't do file extension--we set it anyway. values[k] = self.getMetadata(k) else: values[k] = re.sub(pattern, "_", removeAllEntities(self.getMetadata(k))) return string.Template(template).substitute(values).encode("utf8")
def getMetadata(self, key, removeallentities=False, doreplacements=True): value = None if not self.isValidMetaEntry(key): return value if self.isList(key): # join_string = self.getConfig("join_string_"+key,u", ").replace(SPACE_REPLACE,' ') # value = join_string.join(self.getList(key, removeallentities, doreplacements=True)) value = self.join_list(key,self.getList(key, removeallentities, doreplacements=True)) if doreplacements: value = self.doReplacements(value,key+"_LIST") return value elif self.metadata.has_key(key): value = self.metadata[key] if value: if key == "numWords": value = commaGroups(value) if key == "numChapters": value = commaGroups("%d"%value) if key in ("dateCreated"): value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d %H:%M:%S")) if key in ("datePublished","dateUpdated"): value = value.strftime(self.getConfig(key+"_format","%Y-%m-%d")) if key == "title" and (self.chapter_first or self.chapter_last) and self.getConfig("title_chapter_range_pattern"): first = self.chapter_first or "1" last = self.chapter_last or self.getMetadata("numChapters") templ = string.Template(self.getConfig("title_chapter_range_pattern")) value = templ.substitute({'title':value, 'first':commaGroups(first), 'last':commaGroups(last)}) if doreplacements: value=self.doReplacements(value,key) if removeallentities and value != None: return removeAllEntities(value) else: return value else: #if self.getConfig("default_value_"+key): return self.getConfig("default_value_"+key)
def getAllMetadata(self, removeallentities=False, doreplacements=True, keeplists=False): ''' All single value *and* list value metadata as strings (unless keeplists=True, then keep lists). ''' allmetadata = {} # special handling for authors/authorUrls linkhtml = "<a class='%slink' href='%s'>%s</a>" if self.isList( 'author' ): # more than one author, assume multiple authorUrl too. htmllist = [] for i, v in enumerate(self.getList('author')): if len(self.getList('authorUrl')) <= i: aurl = None else: aurl = self.getList('authorUrl')[i] auth = v # make sure doreplacements & removeallentities are honored. if doreplacements: aurl = self.doReplacements(aurl, 'authorUrl') auth = self.doReplacements(auth, 'author') if removeallentities: aurl = removeAllEntities(aurl) auth = removeAllEntities(auth) htmllist.append(linkhtml % ('author', aurl, auth)) # join_string = self.getConfig("join_string_authorHTML",u", ").replace(SPACE_REPLACE,' ') self.setMetadata( 'authorHTML', self.join_list("join_string_authorHTML", htmllist)) else: self.setMetadata( 'authorHTML', linkhtml % ('author', self.getMetadata('authorUrl', removeallentities, doreplacements), self.getMetadata('author', removeallentities, doreplacements))) self.extendList("extratags", self.getConfigList("extratags")) if self.getMetadataRaw('seriesUrl'): self.setMetadata( 'seriesHTML', linkhtml % ('series', self.getMetadata('seriesUrl', removeallentities, doreplacements), self.getMetadata('series', removeallentities, doreplacements))) elif self.getMetadataRaw('series'): self.setMetadata('seriesHTML', self.getMetadataRaw('series')) # logger.debug("make_linkhtml_entries:%s"%self.getConfig('make_linkhtml_entries')) for k in self.getConfigList('make_linkhtml_entries'): # Assuming list, because it has to be site specific and # they are all lists. Bail if kUrl list not the same # length. # logger.debug("\nk:%s\nlist:%s\nlistURL:%s"%(k,self.getList(k),self.getList(k+'Url'))) if len(self.getList(k + 'Url')) != len(self.getList(k)): continue htmllist = [] for i, v in enumerate(self.getList(k)): url = self.getList(k + 'Url')[i] # make sure doreplacements & removeallentities are honored. if doreplacements: url = self.doReplacements(url, k + 'Url') v = self.doReplacements(v, k) if removeallentities: url = removeAllEntities(url) v = removeAllEntities(v) htmllist.append(linkhtml % (k, url, v)) # join_string = self.getConfig("join_string_"+k+"HTML",u", ").replace(SPACE_REPLACE,' ') self.setMetadata( k + 'HTML', self.join_list("join_string_" + k + "HTML", htmllist)) for k in self.getValidMetaList(): if self.isList(k) and keeplists: allmetadata[k] = self.getList(k, removeallentities, doreplacements) else: allmetadata[k] = self.getMetadata(k, removeallentities, doreplacements) return allmetadata
def getAllMetadata(self, removeallentities=False, doreplacements=True, keeplists=False): """ All single value *and* list value metadata as strings (unless keeplists=True, then keep lists). """ allmetadata = {} # special handling for authors/authorUrls linkhtml = "<a class='%slink' href='%s'>%s</a>" if self.isList("author"): # more than one author, assume multiple authorUrl too. htmllist = [] for i, v in enumerate(self.getList("author")): if len(self.getList("authorUrl")) <= i: aurl = None else: aurl = self.getList("authorUrl")[i] auth = v # make sure doreplacements & removeallentities are honored. if doreplacements: aurl = self.doReplacements(aurl, "authorUrl") auth = self.doReplacements(auth, "author") if removeallentities: aurl = removeAllEntities(aurl) auth = removeAllEntities(auth) htmllist.append(linkhtml % ("author", aurl, auth)) # join_string = self.getConfig("join_string_authorHTML",u", ").replace(SPACE_REPLACE,' ') self.setMetadata("authorHTML", self.join_list("join_string_authorHTML", htmllist)) else: self.setMetadata( "authorHTML", linkhtml % ( "author", self.getMetadata("authorUrl", removeallentities, doreplacements), self.getMetadata("author", removeallentities, doreplacements), ), ) self.extendList("extratags", self.getConfigList("extratags")) if self.getMetadataRaw("seriesUrl"): self.setMetadata( "seriesHTML", linkhtml % ( "series", self.getMetadata("seriesUrl", removeallentities, doreplacements), self.getMetadata("series", removeallentities, doreplacements), ), ) elif self.getMetadataRaw("series"): self.setMetadata("seriesHTML", self.getMetadataRaw("series")) # logger.debug("make_linkhtml_entries:%s"%self.getConfig('make_linkhtml_entries')) for k in self.getConfigList("make_linkhtml_entries"): # Assuming list, because it has to be site specific and # they are all lists. Bail if kUrl list not the same # length. # logger.debug("\nk:%s\nlist:%s\nlistURL:%s"%(k,self.getList(k),self.getList(k+'Url'))) if len(self.getList(k + "Url")) != len(self.getList(k)): continue htmllist = [] for i, v in enumerate(self.getList(k)): url = self.getList(k + "Url")[i] # make sure doreplacements & removeallentities are honored. if doreplacements: url = self.doReplacements(url, k + "Url") v = self.doReplacements(v, k) if removeallentities: url = removeAllEntities(url) v = removeAllEntities(v) htmllist.append(linkhtml % (k, url, v)) # join_string = self.getConfig("join_string_"+k+"HTML",u", ").replace(SPACE_REPLACE,' ') self.setMetadata(k + "HTML", self.join_list("join_string_" + k + "HTML", htmllist)) for k in self.getValidMetaList(): if self.isList(k) and keeplists: allmetadata[k] = self.getList(k, removeallentities, doreplacements) else: allmetadata[k] = self.getMetadata(k, removeallentities, doreplacements) return allmetadata