def putHeader(self, iHeader): if not type(iHeader) is list: gLogger.error("Expect a list but input type is: {}".format( type(iHeader))) return self.__mHeader = iHeader
def putParams(self, iParams): if not type(iParams) is dict: gLogger.error("Expect a dict but input type is: {}".format( type(iParams))) return self.__mParams = urllib.parse.urlencode(iParams)
def addArgs(self, iArgs): if not type(iArgs) is tuple: gLogger.error("Expect a tuple but input type is: {}".format( type(iArgs))) return self.__mFuncVar.append((iArgs, None))
def setRunner(self, iRunner): if not callable(iRunner): gLogger.error("Expect a function but input type is: {}".format( type(iRunner))) return self.__mRunner = iRunner
def _getResourceUrl(self, iRawData): if not type(iRawData) is dict: gLogger.error("Expect a dict but input type is: {}".format( type(iRawData))) return return iRawData["bigImage"] or iRawData["centerImage"] or iRawData[ "smallImage"]
def loadResourceMetadata(self): if os.path.exists(gConfigFileWrapper.getStr('dpm', 'metadata_file')): with open(gConfigFileWrapper.getStr('dpm', 'metadata_file'), 'r') as in_file: lLines = in_file.readlines() for lLine in lLines: lJLine = json.loads(lLine) if lJLine['uuid']: self.__mResourceMetadata[lJLine['uuid']] = lJLine else: gLogger.error("Resource {} does not have uuid".format(lJLine)) return
def run(self): if self.__mNum != len(self.__mFuncVar): gLogger.error( "The number of FuncVar is not match. Expect {} but now is {}.". format(self.__mNum, len(self.__mFuncVar))) return gLogger.debug( "Start to run {} in multi threads. Nums: {}. FuncVars: {}.".format( self.__mRunner, self.__mNum, self.__mFuncVar)) lRequests = threadpool.makeRequests(self.__mRunner, self.__mFuncVar) [self.__mPool.putRequest(req) for req in lRequests] self.__mPool.wait()
def __getResouce(self): lPostRequest = CPostRequest('download dpm list post') lPostRequest.putUrl(gConfigFileWrapper.getStr('dpm', 'query_list_url')) lFields = {} lFields['page'] = self.__mPage lFields['authorizeStatus'] = 'false' lFields['hasImage'] = 'false' lFields['cateList'] = self.__mCategory lFields['ranNum'] = 0 lPostRequest.putFields(lFields) lPostRequest.putHeader([self.__Referer]) lResponseCode = lPostRequest.performRequest() lResponseHeader = lPostRequest.getResponseHeader() if lResponseCode == 200 and 'text/plain' in lResponseHeader[ 'content-type']: lResponsebody = lPostRequest.getResponseBody() lResult = json.loads(lResponsebody.decode('utf-8')) if lResult and lResult["rows"] and len(lResult["rows"]) > 0: lRowsData = lResult["rows"] lGetRequest = CGetRequest('dpm detail get') for lRowData in lRowsData: lUuid = lRowData["uuid"] if not lUuid: gLogger.error( "Resource metadata info in page {} is not correct. Row data: {}." .format(self.__mPage, lRowData)) continue if gMetadataDpm.isExistedResource(lUuid): gLogger.warn( "Resource {} in page {} has been existed. Skip it". format(lUuid, self.__mPage)) continue lResourceName = lRowData["name"] or "None" lResourceDynastyName = lRowData["dynastyName"] or "None" gLogger.debug("Start to process {} {}".format( lResourceName, lResourceDynastyName)) lImageUrl = self._getResourceUrl(lRowData) if lImageUrl: lGetRequest.cleanData() lGetRequest.putUrl( gConfigFileWrapper.getStr( 'dpm', 'image_source_url').format(lImageUrl)) lGetRequest.putHeader([self.__Referer]) lResponseCode = lGetRequest.performRequest() if lResponseCode == 200: lResponseHeader = lGetRequest.getResponseHeader() lFormat = self._getResourceFormat( lResponseHeader['content-type']) if lFormat: lResponsebody = lGetRequest.getResponseBody() lOutputFile = "{}-{}-{}".format( lResourceDynastyName, lResourceName, lUuid) lOutputFile = re.sub(r'[<>:"/\|?*]', '', lOutputFile) lOutputFile = "{}/{}.{}".format( self.__mSavePath, lOutputFile, lFormat) with open(lOutputFile, 'wb+') as out_file: out_file.write(lResponsebody) # Write metadata to database lRowData["page"] = self.__mPage gMetadataDpm.insertResource( lUuid, lRowData) else: gLogger.error( "Fetch resource {}-{} in page {} failed. Response header: {}" .format(lResourceDynastyName, lResourceName, self.__mPage, lResponseHeader)) else: gLogger.error( "Fetch resource {}-{} in page {} failed. Response code: {}." .format(lResourceDynastyName, lResourceName, self.__mPage, lResponseCode)) else: gLogger.warn( "The resource {}-{} in page {} does not have image" .format(lResourceDynastyName, lResourceName, self.__mPage)) del lGetRequest else: gLogger.warn( "The page {}'s dpm list return result is not expected. Response body: {}" .format(self.__mPage, lResponsebody)) else: gLogger.error( "Fetch page {}'s dpm list failed. Response code: {}. Response header: {}" .format(self.__mPage, lResponseCode, lResponseHeader)) del lPostRequest
def insertResource(self, iUuid, iData): if self.isExistedResource(iUuid): gLogger.error("The resource {} has existed".format(iUuid)) return self.__mResourceMetadata[iUuid] = iData