コード例 #1
0
def rerun(tag, hqlList, dirNameList, fileNameList, maxFileSize, serialNoWidth,
          checker, checkerFieldSeparator, startDate, endDate):
    """ 手动重跑,startDate和endDate为账单时间,天、月 """

    logger.info('Running vgop reloader: [startDate=%s] [endDate=%s]' % (startDate, endDate))
    recordDate = startDate
    while recordDate <= endDate:
        loadPath = conf.get('vgopReloader', 'rerun.load.path')
        loadPathList = [os.path.join(loadPath, dirName) for dirName in dirNameList]

        logger.info('Running vgop reloader ... [recordDate=%s]' % recordDate)
        reloader = FsReloader(
            tag=tag,
            loadCmd=conf.get('coreHiveLoader', 'shell.load.cmd'),
            recordDate=recordDate,
            hqlList=[hql.replace('%s', recordDate) for hql in hqlList],
            loadPathList=loadPathList,
            fileNameList=[fn % recordDate for fn in fileNameList],
            separator=conf.get('vgopReloader', 'field.separator', '|'),
            isAddRowIndex=False,
            parallel=conf.getint('vgopReloader', 'reload.parallel'),
            retryTimes=conf.getint('vgopReloader', 'retry.times'),
            maxFileSize=maxFileSize,
            serialNoWidth=serialNoWidth,
            checkerPath=os.path.join(conf.get('vgopReloader', 'checkers.path'), checker) if checker != '' else '',
            checkerFieldSeparator=checkerFieldSeparator,
            bakupPathList=[],
            tagsHistoryPath='',
            operationTime=None)
        if not reloader.run():
            exit(-1)
        logger.info("Run vgop reloader success: [date=%s]" % recordDate)
        recordDate = TimeUtils.timedelta(recordDate, 1)
    logger.info("Run all vgop reloader success")
コード例 #2
0
def main():
    logger.info("Running tags loader ...")
    tagsLoader = TagsLoader(hdfsPath=conf.get('basic', 'hdfs.tags.path'),
                            fsPath=conf.get('basic', 'fs.tags.path'),
                            duration=conf.getint('basic', 'sync.duration'))
    if not tagsLoader.run():
        exit(-1)
コード例 #3
0
ファイル: webReload.py プロジェクト: deadendif/hiveloader
def run(tag, detailTableList, hqlList, sqlList, dtype, deltaList):
    """ 定时执行 """

    logger.info("Running tag detector ...")
    detector = TagDetector(
        tag=tag,
        duration=conf.getint('basic', 'sync.duration'),
        tagsSetPath=conf.get('basic', 'fs.tags.path'),
        tagsHistoryPath=conf.get('webReloader', 'tags.history.path'),
        times=conf.getint('tagDetector', 'detect.times'),
        interval=conf.getint('tagDetector', 'detect.interval'))
    detectResult = detector.detect()
    logger.info('Detect result: %s' % str(detectResult))

    if detectResult.hasDetected:
        for delta in deltaList:
            # 话单时间
            recordDate = toRecordDate(detectResult.minTagsSetTimeDate, dtype, delta)

            connectionList = [dt.split(':')[0] for dt in detailTableList]
            tableList = [dt.split(':')[1].upper() for dt in detailTableList]

            loadPath = conf.get('webReloader', 'load.path')
            loadPathList = [os.path.join(loadPath, table) for table in tableList]

            fileNamePattern = conf.get('webReloader', 'file.name.pattern')
            fileNameList = [fileNamePattern.format(table=table, date=recordDate) for table in tableList]

            bakupPath = conf.get('webReloader', 'bakup.path')
            bakupPathList = [os.path.join(bakupPath, table, recordDate) for table in tableList]

            logger.info('Running web reloader ... [recordDate=%s]' % recordDate)
            reloader = WebReloader(
                tag=tag,
                loadCmd=conf.get('coreHiveLoader', 'java.load.cmd'),
                recordDate=recordDate,
                hqlList=[hql % recordDate if '%s' in hql else hql for hql in hqlList],
                loadPathList=loadPathList,
                fileNameList=fileNameList,
                separator=conf.get('webReloader', 'field.separator', '|'),
                isAddRowIndex=False,
                parallel=conf.getint('webReloader', 'reload.parallel'),
                retryTimes=conf.getint('webReloader', 'retry.times'),
                bakupPathList=bakupPathList,
                connectionList=connectionList,
                sqlList=[sql % recordDate if '%s' in sql else sql for sql in sqlList],
                tagsHistoryPath=conf.get('webReloader', 'tags.history.path'),
                operationTime=detectResult.minTagsSetTime)
            if not reloader.run():
                exit(-1)
    else:
        logger.info("No need to run web reloader because of no tag detected")
コード例 #4
0
ファイル: validate.py プロジェクト: deadendif/hiveloader
def validate(params):
    if len(params) not in [8, 9, 10]:
        logger.error("Wrong params numbers: [params=%s]" % str(params))
        return False

    if params[0] == '':
        logger.error("Tag cannot be empty")
        return False

    if params[1].count('&') != params[2].count('&'):
        logger.error(
            "Hql's number is not equal to dir name's number: [hqls=%s] [dirNames=%s]"
            % (params[1], params[2]))
        return False

    if params[1].count('&') != params[3].count('&'):
        logger.error(
            "Hql's number is not equal to file name's number: [hqls=%s] [fileNames=%s]"
            % (params[1], params[3]))
        return False

    if not params[4].isdigit() and not params[5].isdigit():
        logger.error(
            "Param maxFileSize and serialNoWidth must be non-negative integer: [maxFileSize=%s] [serialNoWidth=%s]"
            % (params[4], params[5]))
        return False

    if params[6] != '':
        checkersPath = conf.get('vgopReloader', 'checkers.path')
        if not os.path.isfile(os.path.join(checkersPath, params[6])):
            logger.error("No such checker found in '%s': [checkerName=%s]" %
                         (checkersPath, params[6]))
            return False

        if params[7] == '':
            logger.error("Checker field separator cannot be empty")
            return False

    if len(params) == 9 and params[8].upper() not in ['DAY', 'MONTH']:
        logger.error("Cycle type must be 'DAY' or 'MONTH': [type=%s]" %
                     params[8])
        return False

    if len(params) == 10 and not TimeUtils.isComparable(params[8], params[9]):
        logger.error("Date format is invalid: [startDate=%s] [endDate=%s]" %
                     (params[8], params[9]))
        return False

    logger.info("Params validation success")
    return True
コード例 #5
0
ファイル: tagDetect.py プロジェクト: deadendif/hiveloader
def main(tag, historyPath, duration=None):
    """ 检测近duration天内tag是否有新的有效生成 """
    duration = conf.getint('basic',
                           'sync.duration') if duration is None else duration
    logger.info(
        "Running tag detector [tag=%s] [historyPath=%s] [duration=%s] ..." %
        (tag, historyPath, duration))

    tagDetector = TagDetector(tag=tag,
                              duration=duration,
                              tagsSetPath=conf.get('basic', 'fs.tags.path'),
                              tagsHistoryPath=historyPath,
                              times=conf.getint('tagDetector', 'detect.times'),
                              interval=conf.getint('tagDetector',
                                                   'detect.interval'))
    logger.info(tagDetector.detect())
コード例 #6
0
def run(tag, hqlList, dirNameList, fileNameList, maxFileSize, serialNoWidth,
        checker, checkerFieldSeparator, dtype='DAY'):
    """ 定时执行 """

    logger.info("Running tag detector ...")
    detector = TagDetector(
        tag=tag,
        duration=conf.getint('basic', 'sync.duration'),
        tagsSetPath=conf.get('basic', 'fs.tags.path'),
        tagsHistoryPath=conf.get('vgopReloader', 'tags.history.path'),
        times=conf.getint('tagDetector', 'detect.times'),
        interval=conf.getint('tagDetector', 'detect.interval'))
    detectResult = detector.detect()
    logger.info('Detect result: %s' % str(detectResult))

    if detectResult.hasDetected:
        recordDate = toRecordDate(detectResult.minTagsSetTimeDate, dtype)

        loadPath = conf.get('vgopReloader', 'load.path')
        loadPathList = [os.path.join(loadPath, dirName) for dirName in dirNameList]

        logger.info('Running vgop reloader ... [recordDate=%s]' % recordDate)
        reloader = FsReloader(
            tag=tag,
            loadCmd=conf.get('coreHiveLoader', 'shell.load.cmd'),
            recordDate=recordDate,
            hqlList=[hql.replace('%s', recordDate) for hql in hqlList],
            loadPathList=loadPathList,
            fileNameList=[fn % recordDate for fn in fileNameList],
            separator=conf.get('vgopReloader', 'field.separator', '|'),
            isAddRowIndex=False,
            parallel=conf.getint('vgopReloader', 'reload.parallel'),
            retryTimes=conf.getint('vgopReloader', 'retry.times'),
            maxFileSize=maxFileSize,
            serialNoWidth=serialNoWidth,
            checkerPath=os.path.join(conf.get('vgopReloader', 'checkers.path'), checker) if checker != '' else '',
            checkerFieldSeparator=checkerFieldSeparator,
            bakupPathList=[],
            tagsHistoryPath=conf.get('vgopReloader', 'tags.history.path'),
            operationTime=detectResult.minTagsSetTime)
        if not reloader.run():
            exit(-1)
    else:
        logger.info("No need to run web reloader because of no tag detected")
コード例 #7
0
ファイル: webReload.py プロジェクト: deadendif/hiveloader
def rerun(tag, detailTableList, hqlList, sqlList, startDate, endDate):
    """ 手动重跑,startDate和endDate为账单时间,天、月 """

    logger.info('Running web reloader: [startDate=%s] [endDate=%s]' % (startDate, endDate))
    recordDate = startDate
    while recordDate <= endDate:
        logger.info("Running web reloader: [date=%s]" % recordDate)
        connectionList = [dt.split(':')[0] for dt in detailTableList]
        tableList = [dt.split(':')[1].upper() for dt in detailTableList]

        loadPath = conf.get('webReloader', 'rerun.load.path')
        loadPathList = [os.path.join(loadPath, table) for table in tableList]

        fileNamePattern = conf.get('webReloader', 'file.name.pattern')
        fileNameList = [fileNamePattern.format(table=table, date=recordDate) for table in tableList]

        bakupPath = conf.get('webReloader', 'bakup.path')
        bakupPathList = [os.path.join(bakupPath, table, recordDate) for table in tableList]

        reloader = WebReloader(
            tag=tag,
            loadCmd=conf.get('coreHiveLoader', 'java.load.cmd'),
            recordDate=recordDate,
            hqlList=[hql % recordDate if '%s' in hql else hql for hql in hqlList],
            loadPathList=loadPathList,
            fileNameList=fileNameList,
            separator=conf.get('webReloader', 'field.separator', '|'),
            isAddRowIndex=False,
            parallel=conf.getint('webReloader', 'reload.parallel'),
            retryTimes=conf.getint('webReloader', 'retry.times'),
            bakupPathList=bakupPathList,
            connectionList=connectionList,
            sqlList=[sql % recordDate if '%s' in sql else sql for sql in sqlList],
            tagsHistoryPath=conf.get('webReloader', 'tags.history.path'),
            operationTime=None)
        if not reloader.run():
            exit(-1)
        logger.info("Run web reloader success: [date=%s]" % recordDate)
        recordDate = TimeUtils.timedelta(recordDate, 1)
    logger.info("Run all web reloader success")
コード例 #8
0
logger = logging.getLogger('stdout')
"""
将HDFS上近duration天的Tag及其生成时间下载到本地
"""


def main():
    logger.info("Running tags loader ...")
    tagsLoader = TagsLoader(hdfsPath=conf.get('basic', 'hdfs.tags.path'),
                            fsPath=conf.get('basic', 'fs.tags.path'),
                            duration=conf.getint('basic', 'sync.duration'))
    if not tagsLoader.run():
        exit(-1)


if __name__ == '__main__':
    logging.config.fileConfig(conf.get('basic', 'log.conf.path'))

    executor = TimeLimitExecutor(conf.getint('tagsLoader', 'run.timeout'),
                                 main)
    exitCode = executor.execute()
    if exitCode == 0:
        logger.info("Execute tags load success")
    elif exitCode is None:
        logger.error("Execute tags load timeout, and it has been killed")
        exit(-1)
    else:
        logger.error("Execute tags load failed: [exitCode=%s]" % str(exitCode))
        exit(-1)
コード例 #9
0
ファイル: TagDetector.py プロジェクト: deadendif/hiveloader
        historyTime = self.__getTimeFromTagsHistory()
        logger.info("Get tag operation history time: %s" % str(historyTime))
        while self.__times > 0:
            self.__times -= 1
            minTagsSetTime, minTagsSetTimeDate = self.__getTimeFromTagsSet(historyTime)
            logger.info("Get minTagsSetTime = %s, minTagsSetTimeDate = %s, remainTimes = %s" %
                        (str(minTagsSetTime), str(minTagsSetTimeDate), str(self.__times)))
            if minTagsSetTime is None:
                time.sleep(self.__interval)
                continue
            return TagDetectResult(self.__tag, True, minTagsSetTime, minTagsSetTimeDate)
        return TagDetectResult(self.__tag, False)


if __name__ == '__main__':
    import logging.config
    from src.parser import conf

    logging.config.fileConfig(conf.get('basic', 'log.conf.path'))
    logger = logging.getLogger('stdout')

    tagDetector = TagDetector(
        tag='10000',
        duration=conf.getint('basic', 'sync.duration'),
        tagsSetPath=conf.get('basic', 'fs.tags.path'),
        tagsHistoryPath=conf.get('webReloader', 'tags.history.path'),
        times=conf.getint('tagDetector', 'detect.times'),
        interval=conf.getint('tagDetector', 'detect.interval'))
    logger.info("Begin detect ...")
    logger.info(tagDetector.detect())