class urldownloader: def __init__(self): #logging.basicConfig(level=logging.WARN) self.logger = logging.getLogger(LogHelper.LoggerName) self.dictPools = {} self.dictAssetFiles = {} self.assetFileIndex = 0 self.session = requests.session() # def get_http_pool(self, url): # return urllib3.connection_from_url(url) # def get_http_pool(self, url): # parser = urlparse.urlparse(url) # scheme = str(parser.scheme).lower() # key = scheme + "://" + parser.netloc # if(key in self.dictPools): # return self.dictPools[key] # # if (scheme == "http"): # http_pool = urllib3.HTTPConnectionPool(parser.netloc) # elif (scheme == "https"): # http_pool = urllib3.HTTPSConnectionPool(parser.netloc) # else: # http_pool = None # if(http_pool!=None): # self.dictPools[key]=http_pool # return http_pool def get_attrib(self, node, name, default=None): if (node == None): return default return node.attrib[name] if name in node.attrib else default def set_attrib(self, node, name, value): if (node == None): return node.attrib[name] = value def saveToFile(self, filename, data): file = None try: file = open(filename, mode='wb') file.write(data) except Exception as ex: self.logger.exception(LogHelper.getExceptionMsg(ex, "unable to save file: %s" % (filename))) finally: if (file): file.close() # def saveResponseFile(self, filename, response): # file = None # try: # file = open(filename, mode='w') # while True: # data = response.read(102400) # if data is None: # break # file.write(data) # except Exception as ex: # self.logger.error("unable to save file: %s\n\t%s" % (filename, str(ex))) # finally: # if (file): # file.close() def saveTextToFile(self, filename, data): file = None try: #file = codecs.open(filename, mode='w', encoding="utf-8") #file = codecs.open(filename, mode='wb') file = open(filename, mode='w') #file.write(u'\ufeff') #codecs.BOM_UTF8 file.write(data) except Exception as ex: self.logger.exception(LogHelper.getExceptionMsg(ex, "unable to save file: %s" % filename)) finally: if (file): file.close() # def getNextAssetFilename(self, ext=""): # self.assetFileIndex += 1 # return "file%d%s" % (self.assetFileIndex, ext) # #return os.path.join(self.assetDir, "file%d" % (self.assetFileIndex)) def saveAssetFile(self, url, assetDir): result=False response = None if (url in self.dictAssetFiles): assetEntry = self.dictAssetFiles[url] if(assetEntry.path!=assetDir): copyfile(os.path.join(assetEntry.path, assetEntry.filename), os.path.join(assetDir, assetEntry.filename)) return try: response = self.session.get(url) except Exception, ex: self.logger.warn("url download error: %s" % url) self.logger.warn("\t %s" % str(ex)) #parser = urlparse.urlparse(url) #name, ext = os.path.splitext(parser.path) #assetFilename = self.getNextAssetFilename(ext) assetFilename = FileHelper.getValidFilename(url) filename = os.path.join(assetDir, assetFilename) if (response != None and response.status_code == 200): #self.saveTextToFile(filename, data) self.saveToFile(filename, response.content) result=True else: self.saveTextToFile(filename, "") pass self.dictAssetFiles[url] = AssetEntry(assetDir, assetFilename) #self.dictAssetFiles[url] = "file:" + urllib.pathname2url(filename) return result