def __init__(self, config = configuration.Configuration()):
        self.config = config
        self.clusterConfigPaths = config.clusterConfigPaths
        self.logger = logging.getLogger()
        self.swStat = 0   # 0: mean all agg are on service   swStat = 1: Means clusterConfigPaths[swStat -1 ] is on services

        hadoop_client_path = 'hadoop_client/bin'
        hadoopToolPath = os.path.join(os.path.dirname(__file__),hadoop_client_path)
        self.hadoopWrapper = HadoopTool(hadoopToolPath)
Beispiel #2
0
    def __init__(self, configuration = configuration.Configuration()):
        self.config = configuration
        hadoop_client_path = 'hadoop_client/bin'
        self.doneFile =  configuration.doneFile
        self.copyingFile =configuration.copyingFile

        hadoopToolPath = os.path.join(os.path.dirname(__file__),hadoop_client_path)
        self.hadoopRoot = configuration.hadoopDumpRoot
        self.panguRoot = configuration.panguDataRoot

        self.hadoopWrapper = HadoopTool(hadoopToolPath)
        self.pangWrapper   = PanguWrapper()
        path = 'dump%s' % time.strftime('%Y-%m-%d',time.localtime())
        self.panguRoot = util.joinPanguPath(self.panguRoot, path)
        self.eagleLogFile = configuration.eagleLogFile
        self.jobIds = None
        self.logger = logging.getLogger()

        self.searchSystem=SearchSystem(self.config)
    def __init__(self, configRootPath, config = configuration.Configuration()):
        self.configRootPath = configRootPath
        print "XXX", configRootPath
        self.yishanConf   = YishanConfig(configRootPath)
        self.h2p          = Hdfs2Pangu(configRootPath)
        self.puCon        = PanguWrapper()
        self.nvwaAddr     = self.yishanConf.destnuwa
        self.configuration = config
        self.panguPort    = '10240'
        self.idxPathRoot  = 'pangu://' + self.nvwaAddr + ':' + self.panguPort + self.configuration.panguDataRoot 

        self.services = self.configuration.services
        self.ha2ToolPath = self.configuration.ha2ToolPath
        self.comboMap = self.configuration.comboMap

        hadoop_client_path = 'hadoop_client/bin'
        hadoopToolPath = os.path.join(os.path.dirname(__file__),hadoop_client_path)

        self.runtimeServices = []
        self.buildServices = []

        self.hdpIncrSrc     = config.hadoopIncrRoot
        self.pgIncrDest4YG  = config.panguIncrRoot
        self.hdpDoneDir = os.path.join(self.hdpIncrSrc,'done')
        self.pgDoneDir  = 'pangu://' + self.nvwaAddr + ':' + self.panguPort + os.path.join(self.pgIncrDest4YG,'done/')
        self.pgIncrDest = 'pangu://' + self.nvwaAddr + ':' + self.panguPort + self.pgIncrDest4YG
        self.hadoopWrapper = HadoopTool(hadoopToolPath)

        for serviceName in self.services.keys():
            configPath = configRootPath + '/' + serviceName + '_config'
            self.runtimeServices.append(RuntimeService(self.idxPathRoot,configPath,config,serviceName))

            for clusterName in self.configuration.services[serviceName]:
                self.buildServices.append(BuildService(self.idxPathRoot, configPath, config, serviceName, clusterName))

        self.logger = logging.getLogger()
class SearchCluster:
    def __init__(self, configRootPath, config = configuration.Configuration()):
        self.configRootPath = configRootPath
        print "XXX", configRootPath
        self.yishanConf   = YishanConfig(configRootPath)
        self.h2p          = Hdfs2Pangu(configRootPath)
        self.puCon        = PanguWrapper()
        self.nvwaAddr     = self.yishanConf.destnuwa
        self.configuration = config
        self.panguPort    = '10240'
        self.idxPathRoot  = 'pangu://' + self.nvwaAddr + ':' + self.panguPort + self.configuration.panguDataRoot 

        self.services = self.configuration.services
        self.ha2ToolPath = self.configuration.ha2ToolPath
        self.comboMap = self.configuration.comboMap

        hadoop_client_path = 'hadoop_client/bin'
        hadoopToolPath = os.path.join(os.path.dirname(__file__),hadoop_client_path)

        self.runtimeServices = []
        self.buildServices = []

        self.hdpIncrSrc     = config.hadoopIncrRoot
        self.pgIncrDest4YG  = config.panguIncrRoot
        self.hdpDoneDir = os.path.join(self.hdpIncrSrc,'done')
        self.pgDoneDir  = 'pangu://' + self.nvwaAddr + ':' + self.panguPort + os.path.join(self.pgIncrDest4YG,'done/')
        self.pgIncrDest = 'pangu://' + self.nvwaAddr + ':' + self.panguPort + self.pgIncrDest4YG
        self.hadoopWrapper = HadoopTool(hadoopToolPath)

        for serviceName in self.services.keys():
            configPath = configRootPath + '/' + serviceName + '_config'
            self.runtimeServices.append(RuntimeService(self.idxPathRoot,configPath,config,serviceName))

            for clusterName in self.configuration.services[serviceName]:
                self.buildServices.append(BuildService(self.idxPathRoot, configPath, config, serviceName, clusterName))

        self.logger = logging.getLogger()

        
    def getDirName(self, dir):
        p = re.compile('.*(\d{10}_\d{10})')
        if p.match(dir):
            dir_patern = p.search(dir).groups(1)[0]
            return True, dir_patern
        else:
            return False, ''

    def moveHdp2Done(self, dirs):
        self.logger.info("begin to move hadoop to done directory")

        if not self.hadoopWrapper.isPathExists(self.hdpDoneDir):
            print 'hdpDoneDir [%s] does not exists, create it' % self.hdpDoneDir
            self.hadoopWrapper.mkdir(self.hdpDoneDir)

        for dir in dirs:
            getDirNameRet, dirName = self.getDirName(dir)
            path = os.path.join(self.hdpIncrSrc + dirName)
            self.logger.debug(("move hadoop to done, [%s] => [%s]") % (path, self.hdpDoneDir))
            mv_ret = self.hadoopWrapper.move(path, self.hdpDoneDir)
            if not mv_ret:
                self.logger.error(("move hadoop to done dictionary faild. from [%s] to [%s]") % (dir, self.hdpDoneDir))
                return False

        return True

    def movePg2Done(self, dirs):
        self.logger.info("begin to move pangu to done dictionary")

        if self.puCon.listPanguDirectory(self.pgDoneDir) == None:
            print 'pgDoneDir [%s] does not exists,created it' % self.pgDoneDir
            self.puCon.makeDir(self.pgDoneDir)

        for dir in dirs:
            getDirNameRet, dirName = self.getDirName(dir)
            path = self.pgIncrDest + dirName
            dest = self.pgDoneDir + dirName
            self.logger.debug(("move pangu to done, [%s] => [%s]") % (path, self.pgDoneDir))
            mv_ret = self.puCon.mvDir(path, dest)
            if not mv_ret:
                self.logger.error(("move pangu to done dictionary faild. from [%s] to [%s]") % (path, self.pgDoneDir))
                return False

        return True

    def h2pCopy(self, src, dest, timeout = 0):
        if not self.h2p.copyData(src, dest, timeout):
            return False

        dest2 = ''
        if dest[-1] == '/':
            dest2 = dest
        else:
            dest2 = dest + '/'

        if self.puCon.listPanguDirectory('pangu://' + self.nvwaAddr + ':' + self.panguPort + dest2) == None:
            return False
        return True

    def copyIncrData(self, ckIncrList):
        timeout = 1200

        copied_paterns = []
        for dir_name in ckIncrList:
            info="Incr Copy OK!"
            getNameRet, dir_patern = self.getDirName(dir_name)
            if not getNameRet:
                continue

            for loop in range(3):
                self.logger.info(("copy increase data. try [%d]") % loop)

                copy_dest = os.path.join(self.pgIncrDest4YG,dir_patern)
                self.puCon.removeDir(copy_dest)
                if self.h2pCopy(dir_name, copy_dest,timeout):
                    break

            if loop > 3:
                #copy fail
                self.logger.error(("copy increase data fail. from [%s] to [%s]") % (dir_name, os.path.join(self.pgIncrDest4YG,dir_patern)))
                return error.ERROR_COPY_INCR_DATA, copied_paterns
            
            self.logger.info(("copy increase data success. from [%s] to [%s]") % (dir_name, os.path.join(self.pgIncrDest4YG,dir_patern)))
            copied_dir = os.path.join(self.pgIncrDest4YG,dir_patern)
            copied_paterns.append(copied_dir)

            for key in self.configuration.comboMap.keys():
                dump_dir='dump_' + self.configuration.comboMap[key]
                lst_res = self.puCon.listPanguDirectory(self.pgIncrDest + dir_patern + '/' + dump_dir + '/')
                if lst_res == None:
                    continue
                if len(lst_res) <= 0:
                    self.logger.info(("the cluster increase directory is empty. [%s]") % (self.pgIncrDest + dir_patern + '/' + dump_dir + '/'))
                    self.puCon.removeDir(self.pgIncrDest + dir_patern + '/' + dump_dir + '/')
                    continue
                
                for fn in lst_res:
                    if fn.find('/') != -1:
                        self.puCon.removeDir(self.pgIncrDest + dir_patern + '/' + dump_dir + '/' + fn )
                    if fn.find('part-m') == -1:
                        self.puCon.removeFile(self.pgIncrDest + dir_patern + '/' + dump_dir + '/' + fn )

        return error.NO_ERROR, copied_paterns


    def buildIncr(self, incr_data):
        if(len(incr_data) == 0):
            self.logger.info("No Data for build")
            return error.ERROR_NO_INCR_DATA

        for buildService in self.buildServices:
            buildService.setNoData(False)
            pathlist = []
            for path in incr_data:
                pgPath = 'pangu://' + self.nvwaAddr + ':' + self.panguPort + path
                self.logger.debug(('path [%s]') % (path))
                self.logger.debug(('path join [%s] + [%s] => [%s]') % (pgPath, self.comboMap[buildService.clusterName],os.path.join(pgPath, 'dump_' + self.comboMap[buildService.clusterName])))
                tmppath = os.path.join(pgPath, 'dump_' + self.comboMap[buildService.clusterName] + '/')
                self.logger.debug(('tmppath: [%s]') % (tmppath))
                lst_tmp = self.puCon.listPanguDirectory(tmppath)
                if lst_tmp != None and len(lst_tmp) > 0:
                    pathlist.append(tmppath)

            self.logger.info(('start build [%s] increase') % buildService.clusterName)
            self.logger.info(('the file patern list:[%s]') % pathlist)
            if (len(pathlist) == 0):
                self.logger.info(('file patern list is null, clusterName:[%s]') % buildService.clusterName)
                buildService.setNoData(True)
                continue

            if buildService.buildIncrIndex(pathlist, 300) != error.NO_ERROR:
                return error.ERROR_BUILD_INCR_INDEX

        for buildService in self.buildServices:
            if buildService.waitBuildIncrIndex(3600) != error.NO_ERROR:
                return error.ERROR_BUILD_INCR_INDEX
            
        return error.NO_ERROR

    def copyIndex(self):
        self.logger.info('Copy Begining ...')

        for service, clusters in self.configuration.services.items():
            print "clusters is:", self.configuration.services[service]

        hdp_dat_dir = self.configuration.hadoopDumpRoot
        pg_tmp_dir  = self.configuration.panguDataRoot + 'tmpdata'

        retry_time = self.configuration.idxCopyRetryTimes
        while retry_time > 0:
            if self.puCon.listPanguDirectory(self.idxPathRoot + 'tmpdata/' ) != None:
                self.puCon.removeDir(self.idxPathRoot + 'tmpdata/')

            h2pCopy = self.h2p.copyData(hdp_dat_dir,pg_tmp_dir)
            if h2pCopy:
                break;
            retry_time-=1
            self.logger.info("Copy failed from [%s] to [%s], last retry [%d]" , hdp_dat_dir, pg_tmp_dir, retry_time)
            if retry_time <= 0:
                self.logger.error("Copy failed from [%s] to [%s]" , hdp_dat_dir, pg_tmp_dir)
                return error.ERROR_COPY_INDEX

        for rs in self.runtimeServices:
            for cluster in self.configuration.services[rs.serviceName]:
                serv_src_path    = self.idxPathRoot + 'tmpdata/' + self.configuration.comboMap[cluster][0]
                serv_dest_path   = self.idxPathRoot  + \
                    rs.serviceName + '/runtimedata/' + cluster + '/' + cluster 

                max_gen_ver = self.checkRuntimedataDir(serv_dest_path)
                next_ver = max_gen_ver + 1
                serv_dest_path   = serv_dest_path + '/generation_%d' % next_ver
                self.logger.info("from [%s] to [%s]. Begining" , serv_src_path, serv_dest_path)
                # move index data from tmpdata
                if not self.puCon.mvDir(serv_src_path,serv_dest_path):
                    self.logger.error("Move failed from [%s] to [%s]" , serv_src_path, serv_dest_path)
                    return error.ERROR_COPY_INDEX
                
            self.logger.info('copy index for [%s] done ' % rs.serviceName)

        return error.NO_ERROR

    def checkRuntimedataDir(self,destDir):
        max_ver   = 0
        all_ver   = []
        dst_dir   = destDir
        p=re.compile('generation_([0-9]*)')

        if self.puCon.listPanguDirectory(dst_dir) == None:
            self.puCon.makeDir(dst_dir)
            return max_ver

        for line in self.puCon.listPanguDirectory(dst_dir):
            if p.match(line):
                pat_num = p.search(line).groups(1)[0]
                all_ver.append(int(pat_num))

        if len(all_ver) != 0:
            max_ver = max(all_ver)
        
        return max_ver

    def checkConfigurationDir(self,destDir):
        timeout = 10
        if self.puCon.listPanguDirectory(destDir, timeout) == None:
            self.puCon.makeDir(destDir,timeout)
        return True
        

    def switchIndex(self):
        self.logger.info('switch index for [%s]' % self.configRootPath)
        self.logger.info('get old serving config and index version...')
        
        
        # for rs in self.runtimeServices:
            # self.logger.info('switching index for [%s]' % rs.serviceName)
        

        #stop all Service
        if not self.stopAllService():
            return error.ERROR_STOP_SERVICE

        # deploy configuration
        if not self.deployConfiguration():
            return error.ERROR_DEPLOY_CONFIGURATION

        #start service
        ret = self.startAllService()
        return ret

    def stopAllService(self):
        for runtimeService in self.runtimeServices:
            if not runtimeService.stopService():
                self.logger.error('stop all service error: [%s]' % self.configRootPath)
                return False
        return True

    def startAllService(self):
        for runtimeService in self.runtimeServices:
            status = runtimeService.startService()
            if  status != error.NO_ERROR:
                return status
        if not self.warmupAllService():
            return error.ERROR_LOAD_PARTITION_FAILED
        if not self.checkAllService():
            return error.ERROR_LOAD_PARTITION_FAILED
        return error.NO_ERROR


    def warmupAllService(self):
        clusterWarmer = ClusterWarmer(self.configRootPath,self.configuration)
        threadNum = self.configuration.abenchThreadNum
        timeLen = self.configuration.abenchTimeLen
        if not clusterWarmer.startAbench(threadNum, timeLen):
            return False
        return True



    def deployConfiguration(self):
        self.logger.info('delployConfigurations begining')
        for rs in self.runtimeServices:
            if not rs.deployConfiguration():
                self.logger.error('deploy configuration error: [%s]' % rs.serviceName)
                return False
        return True

    def serviceCheckup(self):
        self.logger.info('service Checking up')
        for rs in self.runtimeServices:
            if not rs.getServiceStatusOk():
                rs.serviceStart()
                self.logger.error('deploy configuration error: [%s]' % rs.serviceName)
                return False

    def switchIndexing(self):
        self.logger.info('switch index begining')
        for rs in self.runtimeServices:
            if not rs.switchIndexing():
                self.logger.error('switch index error: [%s]' % rs.serviceName)
                return False
        return True

    def checkAllService(self):
        self.logger.info('checking AllService: [%s]')
        for runtimeService in self.runtimeServices:
            if not runtimeService.checkStartCorrect():
                return False
        return True


    def warmupAllService(self):
        clusterWarmer = ClusterWarmer(self.configRootPath)
        threadNum = self.configuration.abenchThreadNum
        timeLen = self.configuration.abenchTimeLen
        if not clusterWarmer.startAbench(threadNum, timeLen):
            return False
        return True
Beispiel #5
0
class Controller:
    def __init__(self, configuration = configuration.Configuration()):
        self.config = configuration
        hadoop_client_path = 'hadoop_client/bin'
        self.doneFile =  configuration.doneFile
        self.copyingFile =configuration.copyingFile

        hadoopToolPath = os.path.join(os.path.dirname(__file__),hadoop_client_path)
        self.hadoopRoot = configuration.hadoopDumpRoot
        self.panguRoot = configuration.panguDataRoot

        self.hadoopWrapper = HadoopTool(hadoopToolPath)
        self.pangWrapper   = PanguWrapper()
        path = 'dump%s' % time.strftime('%Y-%m-%d',time.localtime())
        self.panguRoot = util.joinPanguPath(self.panguRoot, path)
        self.eagleLogFile = configuration.eagleLogFile
        self.jobIds = None
        self.logger = logging.getLogger()

        self.searchSystem=SearchSystem(self.config)

    def printLog(self, info):
        fobj = open(self.eagleLogFile, 'a')
        info = info + ' ' + time.asctime()
        fobj.write(info)
        fobj.write('\n')
        fobj.close
        print info

    def detectBuild(self):
        breakfn  = "/tmp/nowait"
        sigfn    = self.doneFile
        ret      = True
        info     = 'got signal file from hadoop'
        cmdTimeout = 10
        buildTimeOut = self.config.indexBuildTimeout

        startTime = time.time()

        if os.path.exists(breakfn):
            self.logger.info("INFO: remain ",breakfn, "got removed")
            os.remove(breakfn)

        try:
            while (not self.hadoopWrapper.isPathExists(sigfn,cmdTimeout)):
                time.sleep(5)
                if time.time()  > buildTimeOut + startTime:
                    info = 'Critical - detectBuild timeout !!'
                    self.logger.error(info)
                    self.printLog(info)
                    return False
                
                if os.path.exists(breakfn):
                    info = "Got break files: " + breakfn + " QUIT"
                    ret = False
                    break
        except Exception,e :
            info = str(e)
            ret =  False
        self.logger.info(info)
        return ret
class SearchSystem:
    def __init__(self, config = configuration.Configuration()):
        self.config = config
        self.clusterConfigPaths = config.clusterConfigPaths
        self.logger = logging.getLogger()
        self.swStat = 0   # 0: mean all agg are on service   swStat = 1: Means clusterConfigPaths[swStat -1 ] is on services

        hadoop_client_path = 'hadoop_client/bin'
        hadoopToolPath = os.path.join(os.path.dirname(__file__),hadoop_client_path)
        self.hadoopWrapper = HadoopTool(hadoopToolPath)

    def checkBeforeCopy(self):
        incr_dirs = self.hadoopWrapper.listDirectory(self.config.hadoopIncrRoot)
        if len(incr_dirs) != 0:
            p = re.compile('.*(\d{10}_\d{10})')
            remove_list = []
            for dir in incr_dirs:
                if not p.match(dir):
                    remove_list.append(dir)

            for i in remove_list:
                incr_dirs.remove(i)

            return True, incr_dirs
        return True, incr_dirs
        

    #copy increase data to all SearchCluster and start build job in this service.
    def buildSearchClusters(self):
        (ckIncrRet, ckIncrList) = self.checkBeforeCopy()
        if not ckIncrRet:
            self.logger.info('don\'t find increase data.')
            return error.ERROR_NO_INCR_DATA
        
        self.logger.info(('get increase directory list: [%s]') % (ckIncrList))
        for clusterConfigPath in self.clusterConfigPaths:
            self.logger.info('switch cluster for: [%s]' % clusterConfigPath)

            searchCluster  = SearchCluster(clusterConfigPath, self.config)
            copyRet, copyIncrData   = searchCluster.copyIncrData(ckIncrList)

            if copyRet != error.NO_ERROR:
                self.logger.debug('Copy incr data failed')
                return error.ERROR_BUILD_INCR_INDEX

            self.logger.info(('Copy increase data sucess, the file patern [%s]' % copyIncrData))
            buildIndexRet = searchCluster.buildIncr(copyIncrData)
            if buildIndexRet != error.NO_ERROR:
                self.logger.debug('build increase index failed:[%s]' % clusterConfigPath)
                return buildIndexRet

        if not searchCluster.movePg2Done(ckIncrList):
            self.logger.error('move pangu to done dictionary fail')
            return error.ERROR_INCR_MOVE_PG

        self.logger.debug('begin to move hadoop to done')
        if not searchCluster.moveHdp2Done(ckIncrList):
            self.logger.error('move hadoop to done dictionary fail')
            return error.ERROR_INCR_MOVE_HDP

        return error.NO_ERROR

    #copy index to all SearchCluster and start service in this service.
    def updateSearchClusters(self):
        switchAggregatorTimeout = self.config.switchAggregatorTimeout
        aggregatorWrapper = AggregatorWrapper(self.config)
        
        for clusterConfigPath in self.clusterConfigPaths:
            self.logger.info('switch cluster for: [%s]' % clusterConfigPath)
            # fix switch path info
            # if not aggregatorWrapper.stop(index, switchAggregatorTimeout):    
                # self.logger.debug('switch aggregator failed: [%s]' % clusterConfigPath)
                # return error.ERROR_SWITCH_PROXY_ERROR

            searchCluster  = SearchCluster(clusterConfigPath, self.config)

            if self.config.noCopy == clusterConfigPath or self.config.noCopy == 'all':
                self.logger.info('[%s] running without Copy' % self.config.noCopy )
                copyIndexRet = error.NO_ERROR
            else:
                copyIndexRet   = searchCluster.copyIndex()

            if copyIndexRet   != error.NO_ERROR:
                self.logger.debug('Copy index failed')
                return copyIndexRet

            switchIndexRet = searchCluster.switchIndex()

            if switchIndexRet != error.NO_ERROR:
                self.logger.debug('switch cluster failed:[%s]' % clusterConfigPath)
                return switchIndexRet

        return error.NO_ERROR
        
        # #switch aggregator
        # self.logger.info('recover aggregator...')
        # if not aggregatorWrapper.recover(switchAggregatorTimeout):
            # return error.ERROR_SWITCH_PROXY_ERROR

        # self.logger.info('switch index for clusters completely')
        # return error.NO_ERROR


    def GetAllSearchClusters(self):
        for clusterConfigPath in self.clusterConfigPaths:
            print clusterConfigPath
        return