def __init__(self, cache, key, fun, timedelta=timedelta(hours=24), **args): self.__cache = cache self.__key = stringToDigest(unicode(key)) self.__fun = fun self.__args = args self.__mutext = threading.Lock() self.__maxTimeDelta = timedelta
def addToQueue(self, url, edition, topic): klassDir = os.path.join(self.__dir, edition if self.__langAsKlass else topic) if not os.path.exists(klassDir): os.makedirs(klassDir) uuid = stringToDigest(url) idx = url.index("&url=") + 5 url = url[idx:] url = urllib.unquote(url) htmlFilename = os.path.join(klassDir, uuid + ".html") txtFilename = os.path.join(klassDir, uuid + ".txt") if uuid in self.__pending or os.path.exists(txtFilename): return False if not os.path.exists(htmlFilename) or not os.path.exists(txtFilename): self.__pending.append(uuid) self.__queue.put({"url": url, "html": htmlFilename, "txt": txtFilename, "uuid": uuid}) self.__logFile.write(os.path.join(topic, uuid + ".html") + " : " + url + "\n") return True return False
def getRealUrlDigest(self): return stringToDigest(self.getRealUrl())
def getUrlExpDigest(self): return stringToDigest(self.getExpandedUrl())
def getCachedWebpageFilename(self, urlAddress): name = "htmls/" + stringToDigest(urlAddress) + ".html" return os.path.join(self.__inlinedWebpageDir, name)
def setStatus(self, url, status): digest = stringToDigest(url) self.__cacheStatus[digest] = {"status": status, "url" : url}
def getStaus(self, url): digest = stringToDigest(url) return self.__cacheStatus[digest]
def hasStatus(self, url): digest = stringToDigest(url) return self.__cacheStatus.has_key(digest)