def _prepareDocument(self, record): doc = record if isinstance(record, str): doc = XMLFragment(record) root = doc.getRootElement() # Add the date if it's not already there. if 0 == len(doc.xpathEval("//pubDate")): date = time.time() # ISO8601 Date # TODO this shouldn't really use localtime as the ISO8601 date should be GMT. Just not sure how to # convert the time using XSL-T otherwise. pubDate = root.newChild( None, "pubDate", time.strftime("%Y-%m-%dT%H:%M:%S", time.localtime(date)) + "-07:00" ) pubDate.setProp("seconds", str(date)) # Set the id for the post # TODO: maybe this would be better as meta-data or should at least # be in a namespace # This call will also mark the database as modified # TODO: see if there's a better way. recordID = self.getNextID() root.setProp("id", str(recordID)) return (doc, recordID)
def _checkBlacklist(self, content, message): # Check for a direct string, we're just interested in the hostname so # we need to clean it up. if (self._checkURL(content, message) == 0): # No match on a plain string so we need to try parsing as XML to # see there are any embedded URLs try: contentDoc = XMLFragment("<wrap>" + content + "</wrap>") urls = contentDoc.xpathEval("//@href") for url in urls: self._checkURL(url.content, message) except: # if this fails then we don't worry because it will get rejected # anyway. pass
def queryDocument(self, content, xpath): doc = XMLFragment(content) result = XMLFragment() results = doc.xpathEval(xpath) if len(results) > 0: root = result.getFragment().newChild(None, "results", None) for item in results: item.unlinkNode() item.reconciliateNs(doc.getDocument()) content = item.serialize() root.addChild(item) return result.serialize() else: return '<?xml version="1.0"?><results/>'