예제 #1
0
class urldownloader:
    def __init__(self):
        #logging.basicConfig(level=logging.WARN)
        self.logger = logging.getLogger(LogHelper.LoggerName)
        self.dictPools = {}
        self.dictAssetFiles = {}
        self.assetFileIndex = 0
        self.session = requests.session()

    # def get_http_pool(self, url):
    #     return urllib3.connection_from_url(url)

    # def get_http_pool(self, url):
    #     parser = urlparse.urlparse(url)
    #     scheme = str(parser.scheme).lower()
    #     key = scheme + "://" + parser.netloc
    #     if(key in self.dictPools):
    #         return self.dictPools[key]
    #
    #     if (scheme == "http"):
    #         http_pool = urllib3.HTTPConnectionPool(parser.netloc)
    #     elif (scheme == "https"):
    #         http_pool = urllib3.HTTPSConnectionPool(parser.netloc)
    #     else:
    #         http_pool = None
    #     if(http_pool!=None):
    #         self.dictPools[key]=http_pool
    #     return http_pool

    def get_attrib(self, node, name, default=None):
        if (node == None):
            return default
        return node.attrib[name] if name in node.attrib else default

    def set_attrib(self, node, name, value):
        if (node == None):
            return
        node.attrib[name] = value

    def saveToFile(self, filename, data):
        file = None
        try:
            file = open(filename, mode='wb')
            file.write(data)
        except Exception as ex:
            self.logger.exception(LogHelper.getExceptionMsg(ex, "unable to save file: %s" % (filename)))
        finally:
            if (file):
                file.close()

    # def saveResponseFile(self, filename, response):
    #     file = None
    #     try:
    #         file = open(filename, mode='w')
    #         while True:
    #             data = response.read(102400)
    #             if data is None:
    #                 break
    #             file.write(data)
    #     except Exception as ex:
    #         self.logger.error("unable to save file: %s\n\t%s" % (filename, str(ex)))
    #     finally:
    #         if (file):
    #             file.close()

    def saveTextToFile(self, filename, data):
        file = None
        try:
            #file = codecs.open(filename, mode='w', encoding="utf-8")
            #file = codecs.open(filename, mode='wb')
            file = open(filename, mode='w')
            #file.write(u'\ufeff')  #codecs.BOM_UTF8
            file.write(data)
        except Exception as ex:
            self.logger.exception(LogHelper.getExceptionMsg(ex, "unable to save file: %s" % filename))
        finally:
            if (file):
                file.close()

    # def getNextAssetFilename(self, ext=""):
    #     self.assetFileIndex += 1
    #     return "file%d%s" % (self.assetFileIndex, ext)
    #     #return os.path.join(self.assetDir, "file%d" % (self.assetFileIndex))


    def saveAssetFile(self, url, assetDir):
        result=False
        response = None
        if (url in self.dictAssetFiles):
            assetEntry = self.dictAssetFiles[url]
            if(assetEntry.path!=assetDir):
                copyfile(os.path.join(assetEntry.path, assetEntry.filename), os.path.join(assetDir, assetEntry.filename))
            return
        try:
            response = self.session.get(url)
        except Exception, ex:
            self.logger.warn("url download error: %s" % url)
            self.logger.warn("\t %s" % str(ex))


        #parser = urlparse.urlparse(url)
        #name, ext = os.path.splitext(parser.path)
        #assetFilename = self.getNextAssetFilename(ext)
        assetFilename = FileHelper.getValidFilename(url)
        filename = os.path.join(assetDir, assetFilename)
        if (response != None and response.status_code == 200):
            #self.saveTextToFile(filename, data)
            self.saveToFile(filename, response.content)
            result=True
        else:
            self.saveTextToFile(filename, "")
            pass
        self.dictAssetFiles[url] = AssetEntry(assetDir, assetFilename)
        #self.dictAssetFiles[url] = "file:" + urllib.pathname2url(filename)
        return result