def rerun(tag, hqlList, dirNameList, fileNameList, maxFileSize, serialNoWidth, checker, checkerFieldSeparator, startDate, endDate): """ 手动重跑,startDate和endDate为账单时间,天、月 """ logger.info('Running vgop reloader: [startDate=%s] [endDate=%s]' % (startDate, endDate)) recordDate = startDate while recordDate <= endDate: loadPath = conf.get('vgopReloader', 'rerun.load.path') loadPathList = [os.path.join(loadPath, dirName) for dirName in dirNameList] logger.info('Running vgop reloader ... [recordDate=%s]' % recordDate) reloader = FsReloader( tag=tag, loadCmd=conf.get('coreHiveLoader', 'shell.load.cmd'), recordDate=recordDate, hqlList=[hql.replace('%s', recordDate) for hql in hqlList], loadPathList=loadPathList, fileNameList=[fn % recordDate for fn in fileNameList], separator=conf.get('vgopReloader', 'field.separator', '|'), isAddRowIndex=False, parallel=conf.getint('vgopReloader', 'reload.parallel'), retryTimes=conf.getint('vgopReloader', 'retry.times'), maxFileSize=maxFileSize, serialNoWidth=serialNoWidth, checkerPath=os.path.join(conf.get('vgopReloader', 'checkers.path'), checker) if checker != '' else '', checkerFieldSeparator=checkerFieldSeparator, bakupPathList=[], tagsHistoryPath='', operationTime=None) if not reloader.run(): exit(-1) logger.info("Run vgop reloader success: [date=%s]" % recordDate) recordDate = TimeUtils.timedelta(recordDate, 1) logger.info("Run all vgop reloader success")
def main(): logger.info("Running tags loader ...") tagsLoader = TagsLoader(hdfsPath=conf.get('basic', 'hdfs.tags.path'), fsPath=conf.get('basic', 'fs.tags.path'), duration=conf.getint('basic', 'sync.duration')) if not tagsLoader.run(): exit(-1)
def run(tag, detailTableList, hqlList, sqlList, dtype, deltaList): """ 定时执行 """ logger.info("Running tag detector ...") detector = TagDetector( tag=tag, duration=conf.getint('basic', 'sync.duration'), tagsSetPath=conf.get('basic', 'fs.tags.path'), tagsHistoryPath=conf.get('webReloader', 'tags.history.path'), times=conf.getint('tagDetector', 'detect.times'), interval=conf.getint('tagDetector', 'detect.interval')) detectResult = detector.detect() logger.info('Detect result: %s' % str(detectResult)) if detectResult.hasDetected: for delta in deltaList: # 话单时间 recordDate = toRecordDate(detectResult.minTagsSetTimeDate, dtype, delta) connectionList = [dt.split(':')[0] for dt in detailTableList] tableList = [dt.split(':')[1].upper() for dt in detailTableList] loadPath = conf.get('webReloader', 'load.path') loadPathList = [os.path.join(loadPath, table) for table in tableList] fileNamePattern = conf.get('webReloader', 'file.name.pattern') fileNameList = [fileNamePattern.format(table=table, date=recordDate) for table in tableList] bakupPath = conf.get('webReloader', 'bakup.path') bakupPathList = [os.path.join(bakupPath, table, recordDate) for table in tableList] logger.info('Running web reloader ... [recordDate=%s]' % recordDate) reloader = WebReloader( tag=tag, loadCmd=conf.get('coreHiveLoader', 'java.load.cmd'), recordDate=recordDate, hqlList=[hql % recordDate if '%s' in hql else hql for hql in hqlList], loadPathList=loadPathList, fileNameList=fileNameList, separator=conf.get('webReloader', 'field.separator', '|'), isAddRowIndex=False, parallel=conf.getint('webReloader', 'reload.parallel'), retryTimes=conf.getint('webReloader', 'retry.times'), bakupPathList=bakupPathList, connectionList=connectionList, sqlList=[sql % recordDate if '%s' in sql else sql for sql in sqlList], tagsHistoryPath=conf.get('webReloader', 'tags.history.path'), operationTime=detectResult.minTagsSetTime) if not reloader.run(): exit(-1) else: logger.info("No need to run web reloader because of no tag detected")
def validate(params): if len(params) not in [8, 9, 10]: logger.error("Wrong params numbers: [params=%s]" % str(params)) return False if params[0] == '': logger.error("Tag cannot be empty") return False if params[1].count('&') != params[2].count('&'): logger.error( "Hql's number is not equal to dir name's number: [hqls=%s] [dirNames=%s]" % (params[1], params[2])) return False if params[1].count('&') != params[3].count('&'): logger.error( "Hql's number is not equal to file name's number: [hqls=%s] [fileNames=%s]" % (params[1], params[3])) return False if not params[4].isdigit() and not params[5].isdigit(): logger.error( "Param maxFileSize and serialNoWidth must be non-negative integer: [maxFileSize=%s] [serialNoWidth=%s]" % (params[4], params[5])) return False if params[6] != '': checkersPath = conf.get('vgopReloader', 'checkers.path') if not os.path.isfile(os.path.join(checkersPath, params[6])): logger.error("No such checker found in '%s': [checkerName=%s]" % (checkersPath, params[6])) return False if params[7] == '': logger.error("Checker field separator cannot be empty") return False if len(params) == 9 and params[8].upper() not in ['DAY', 'MONTH']: logger.error("Cycle type must be 'DAY' or 'MONTH': [type=%s]" % params[8]) return False if len(params) == 10 and not TimeUtils.isComparable(params[8], params[9]): logger.error("Date format is invalid: [startDate=%s] [endDate=%s]" % (params[8], params[9])) return False logger.info("Params validation success") return True
def main(tag, historyPath, duration=None): """ 检测近duration天内tag是否有新的有效生成 """ duration = conf.getint('basic', 'sync.duration') if duration is None else duration logger.info( "Running tag detector [tag=%s] [historyPath=%s] [duration=%s] ..." % (tag, historyPath, duration)) tagDetector = TagDetector(tag=tag, duration=duration, tagsSetPath=conf.get('basic', 'fs.tags.path'), tagsHistoryPath=historyPath, times=conf.getint('tagDetector', 'detect.times'), interval=conf.getint('tagDetector', 'detect.interval')) logger.info(tagDetector.detect())
def run(tag, hqlList, dirNameList, fileNameList, maxFileSize, serialNoWidth, checker, checkerFieldSeparator, dtype='DAY'): """ 定时执行 """ logger.info("Running tag detector ...") detector = TagDetector( tag=tag, duration=conf.getint('basic', 'sync.duration'), tagsSetPath=conf.get('basic', 'fs.tags.path'), tagsHistoryPath=conf.get('vgopReloader', 'tags.history.path'), times=conf.getint('tagDetector', 'detect.times'), interval=conf.getint('tagDetector', 'detect.interval')) detectResult = detector.detect() logger.info('Detect result: %s' % str(detectResult)) if detectResult.hasDetected: recordDate = toRecordDate(detectResult.minTagsSetTimeDate, dtype) loadPath = conf.get('vgopReloader', 'load.path') loadPathList = [os.path.join(loadPath, dirName) for dirName in dirNameList] logger.info('Running vgop reloader ... [recordDate=%s]' % recordDate) reloader = FsReloader( tag=tag, loadCmd=conf.get('coreHiveLoader', 'shell.load.cmd'), recordDate=recordDate, hqlList=[hql.replace('%s', recordDate) for hql in hqlList], loadPathList=loadPathList, fileNameList=[fn % recordDate for fn in fileNameList], separator=conf.get('vgopReloader', 'field.separator', '|'), isAddRowIndex=False, parallel=conf.getint('vgopReloader', 'reload.parallel'), retryTimes=conf.getint('vgopReloader', 'retry.times'), maxFileSize=maxFileSize, serialNoWidth=serialNoWidth, checkerPath=os.path.join(conf.get('vgopReloader', 'checkers.path'), checker) if checker != '' else '', checkerFieldSeparator=checkerFieldSeparator, bakupPathList=[], tagsHistoryPath=conf.get('vgopReloader', 'tags.history.path'), operationTime=detectResult.minTagsSetTime) if not reloader.run(): exit(-1) else: logger.info("No need to run web reloader because of no tag detected")
def rerun(tag, detailTableList, hqlList, sqlList, startDate, endDate): """ 手动重跑,startDate和endDate为账单时间,天、月 """ logger.info('Running web reloader: [startDate=%s] [endDate=%s]' % (startDate, endDate)) recordDate = startDate while recordDate <= endDate: logger.info("Running web reloader: [date=%s]" % recordDate) connectionList = [dt.split(':')[0] for dt in detailTableList] tableList = [dt.split(':')[1].upper() for dt in detailTableList] loadPath = conf.get('webReloader', 'rerun.load.path') loadPathList = [os.path.join(loadPath, table) for table in tableList] fileNamePattern = conf.get('webReloader', 'file.name.pattern') fileNameList = [fileNamePattern.format(table=table, date=recordDate) for table in tableList] bakupPath = conf.get('webReloader', 'bakup.path') bakupPathList = [os.path.join(bakupPath, table, recordDate) for table in tableList] reloader = WebReloader( tag=tag, loadCmd=conf.get('coreHiveLoader', 'java.load.cmd'), recordDate=recordDate, hqlList=[hql % recordDate if '%s' in hql else hql for hql in hqlList], loadPathList=loadPathList, fileNameList=fileNameList, separator=conf.get('webReloader', 'field.separator', '|'), isAddRowIndex=False, parallel=conf.getint('webReloader', 'reload.parallel'), retryTimes=conf.getint('webReloader', 'retry.times'), bakupPathList=bakupPathList, connectionList=connectionList, sqlList=[sql % recordDate if '%s' in sql else sql for sql in sqlList], tagsHistoryPath=conf.get('webReloader', 'tags.history.path'), operationTime=None) if not reloader.run(): exit(-1) logger.info("Run web reloader success: [date=%s]" % recordDate) recordDate = TimeUtils.timedelta(recordDate, 1) logger.info("Run all web reloader success")
logger = logging.getLogger('stdout') """ 将HDFS上近duration天的Tag及其生成时间下载到本地 """ def main(): logger.info("Running tags loader ...") tagsLoader = TagsLoader(hdfsPath=conf.get('basic', 'hdfs.tags.path'), fsPath=conf.get('basic', 'fs.tags.path'), duration=conf.getint('basic', 'sync.duration')) if not tagsLoader.run(): exit(-1) if __name__ == '__main__': logging.config.fileConfig(conf.get('basic', 'log.conf.path')) executor = TimeLimitExecutor(conf.getint('tagsLoader', 'run.timeout'), main) exitCode = executor.execute() if exitCode == 0: logger.info("Execute tags load success") elif exitCode is None: logger.error("Execute tags load timeout, and it has been killed") exit(-1) else: logger.error("Execute tags load failed: [exitCode=%s]" % str(exitCode)) exit(-1)
historyTime = self.__getTimeFromTagsHistory() logger.info("Get tag operation history time: %s" % str(historyTime)) while self.__times > 0: self.__times -= 1 minTagsSetTime, minTagsSetTimeDate = self.__getTimeFromTagsSet(historyTime) logger.info("Get minTagsSetTime = %s, minTagsSetTimeDate = %s, remainTimes = %s" % (str(minTagsSetTime), str(minTagsSetTimeDate), str(self.__times))) if minTagsSetTime is None: time.sleep(self.__interval) continue return TagDetectResult(self.__tag, True, minTagsSetTime, minTagsSetTimeDate) return TagDetectResult(self.__tag, False) if __name__ == '__main__': import logging.config from src.parser import conf logging.config.fileConfig(conf.get('basic', 'log.conf.path')) logger = logging.getLogger('stdout') tagDetector = TagDetector( tag='10000', duration=conf.getint('basic', 'sync.duration'), tagsSetPath=conf.get('basic', 'fs.tags.path'), tagsHistoryPath=conf.get('webReloader', 'tags.history.path'), times=conf.getint('tagDetector', 'detect.times'), interval=conf.getint('tagDetector', 'detect.interval')) logger.info("Begin detect ...") logger.info(tagDetector.detect())