예제 #1
0
파일: get_request.py 프로젝트: sosohu/dpm
    def putHeader(self, iHeader):
        if not type(iHeader) is list:
            gLogger.error("Expect a list but input type is: {}".format(
                type(iHeader)))
            return

        self.__mHeader = iHeader
예제 #2
0
파일: get_request.py 프로젝트: sosohu/dpm
    def putParams(self, iParams):
        if not type(iParams) is dict:
            gLogger.error("Expect a dict but input type is: {}".format(
                type(iParams)))
            return

        self.__mParams = urllib.parse.urlencode(iParams)
예제 #3
0
    def addArgs(self, iArgs):
        if not type(iArgs) is tuple:
            gLogger.error("Expect a tuple but input type is: {}".format(
                type(iArgs)))
            return

        self.__mFuncVar.append((iArgs, None))
예제 #4
0
    def setRunner(self, iRunner):
        if not callable(iRunner):
            gLogger.error("Expect a function but input type is: {}".format(
                type(iRunner)))
            return

        self.__mRunner = iRunner
예제 #5
0
    def _getResourceUrl(self, iRawData):
        if not type(iRawData) is dict:
            gLogger.error("Expect a dict but input type is: {}".format(
                type(iRawData)))
            return

        return iRawData["bigImage"] or iRawData["centerImage"] or iRawData[
            "smallImage"]
예제 #6
0
파일: metadata_dpm.py 프로젝트: sosohu/dpm
 def loadResourceMetadata(self):
     if os.path.exists(gConfigFileWrapper.getStr('dpm', 'metadata_file')):
         with open(gConfigFileWrapper.getStr('dpm', 'metadata_file'), 'r') as in_file:
             lLines = in_file.readlines()
             for lLine in lLines:
                 lJLine = json.loads(lLine)
                 if lJLine['uuid']:
                     self.__mResourceMetadata[lJLine['uuid']] = lJLine
                 else:
                     gLogger.error("Resource {} does not have uuid".format(lJLine))
                     return
예제 #7
0
    def run(self):
        if self.__mNum != len(self.__mFuncVar):
            gLogger.error(
                "The number of FuncVar is not match. Expect {} but now is {}.".
                format(self.__mNum, len(self.__mFuncVar)))
            return

        gLogger.debug(
            "Start to run {} in multi threads. Nums: {}. FuncVars: {}.".format(
                self.__mRunner, self.__mNum, self.__mFuncVar))

        lRequests = threadpool.makeRequests(self.__mRunner, self.__mFuncVar)
        [self.__mPool.putRequest(req) for req in lRequests]
        self.__mPool.wait()
예제 #8
0
    def __getResouce(self):
        lPostRequest = CPostRequest('download dpm list post')
        lPostRequest.putUrl(gConfigFileWrapper.getStr('dpm', 'query_list_url'))

        lFields = {}
        lFields['page'] = self.__mPage
        lFields['authorizeStatus'] = 'false'
        lFields['hasImage'] = 'false'
        lFields['cateList'] = self.__mCategory
        lFields['ranNum'] = 0
        lPostRequest.putFields(lFields)
        lPostRequest.putHeader([self.__Referer])

        lResponseCode = lPostRequest.performRequest()

        lResponseHeader = lPostRequest.getResponseHeader()
        if lResponseCode == 200 and 'text/plain' in lResponseHeader[
                'content-type']:
            lResponsebody = lPostRequest.getResponseBody()
            lResult = json.loads(lResponsebody.decode('utf-8'))

            if lResult and lResult["rows"] and len(lResult["rows"]) > 0:
                lRowsData = lResult["rows"]

                lGetRequest = CGetRequest('dpm detail get')
                for lRowData in lRowsData:
                    lUuid = lRowData["uuid"]
                    if not lUuid:
                        gLogger.error(
                            "Resource metadata info in page {} is not correct. Row data: {}."
                            .format(self.__mPage, lRowData))
                        continue

                    if gMetadataDpm.isExistedResource(lUuid):
                        gLogger.warn(
                            "Resource {} in page {} has been existed. Skip it".
                            format(lUuid, self.__mPage))
                        continue

                    lResourceName = lRowData["name"] or "None"
                    lResourceDynastyName = lRowData["dynastyName"] or "None"

                    gLogger.debug("Start to process {} {}".format(
                        lResourceName, lResourceDynastyName))

                    lImageUrl = self._getResourceUrl(lRowData)
                    if lImageUrl:
                        lGetRequest.cleanData()
                        lGetRequest.putUrl(
                            gConfigFileWrapper.getStr(
                                'dpm', 'image_source_url').format(lImageUrl))
                        lGetRequest.putHeader([self.__Referer])

                        lResponseCode = lGetRequest.performRequest()

                        if lResponseCode == 200:
                            lResponseHeader = lGetRequest.getResponseHeader()
                            lFormat = self._getResourceFormat(
                                lResponseHeader['content-type'])
                            if lFormat:
                                lResponsebody = lGetRequest.getResponseBody()
                                lOutputFile = "{}-{}-{}".format(
                                    lResourceDynastyName, lResourceName, lUuid)
                                lOutputFile = re.sub(r'[<>:"/\|?*]', '',
                                                     lOutputFile)
                                lOutputFile = "{}/{}.{}".format(
                                    self.__mSavePath, lOutputFile, lFormat)
                                with open(lOutputFile, 'wb+') as out_file:
                                    out_file.write(lResponsebody)
                                    # Write metadata to database
                                    lRowData["page"] = self.__mPage
                                    gMetadataDpm.insertResource(
                                        lUuid, lRowData)
                            else:
                                gLogger.error(
                                    "Fetch resource {}-{} in page {} failed. Response header: {}"
                                    .format(lResourceDynastyName,
                                            lResourceName, self.__mPage,
                                            lResponseHeader))
                        else:
                            gLogger.error(
                                "Fetch resource {}-{} in page {} failed. Response code: {}."
                                .format(lResourceDynastyName, lResourceName,
                                        self.__mPage, lResponseCode))
                    else:
                        gLogger.warn(
                            "The resource {}-{} in page {} does not have image"
                            .format(lResourceDynastyName, lResourceName,
                                    self.__mPage))

                del lGetRequest
            else:
                gLogger.warn(
                    "The page {}'s dpm list return result is not expected. Response body: {}"
                    .format(self.__mPage, lResponsebody))
        else:
            gLogger.error(
                "Fetch page {}'s dpm list failed. Response code: {}. Response header: {}"
                .format(self.__mPage, lResponseCode, lResponseHeader))

        del lPostRequest
예제 #9
0
파일: metadata_dpm.py 프로젝트: sosohu/dpm
    def insertResource(self, iUuid, iData):
        if self.isExistedResource(iUuid):
            gLogger.error("The resource {} has existed".format(iUuid))
            return

        self.__mResourceMetadata[iUuid] = iData