Exemple #1
0
def load_file(date_arg):  #导文件模块,主要考虑:要导的文件存在不存在?需要有删除计算当天的分区步骤,文件没导成功怎么办
    global WORKERS, FILE_HIVE_LIST, HIVE_DB, HIVE_TABLE  # 5.1 设置全局变量
    hive_status = check_hive_file()  # 5.2 hive_status=True、False 检查是否存在文件
    if not hive_status:
        log_str = 'click stream file parse fail: file count not equal WORKERS'
        print MyTime.get_local_time(), '-------------- ' + log_str
        log.error(log_str)
        #MyAlarm.send_mail_sms(log_str)                                                  # 5.3 假如文件不存在,生成告警日志,并发送文件
    else:
        hive = MyHiveBin.HiveBin()  # 调用hive模块 模块在 com/hive/bin
        dt = date_arg.replace('-', '')  # "2016-11-30" 改变为 "20161130"
        hive.drop_partition(
            HIVE_DB, HIVE_TABLE, 'dt', dt
        )  # 5.4 假如文件存在,删掉计算当天的分区       HIVE_DB= 'to8to_rawdata'   HIVE_TABLE='clickstream'

        partition_dict = {'dt': dt}

        for hive_file in FILE_HIVE_LIST:
            log_str = 'load file ' + hive_file + ' into hive begin'
            print MyTime.get_local_time(), '-------------- ' + log_str
            log.info(log_str)
            status = hive.load_file(
                HIVE_DB, HIVE_TABLE, hive_file,
                partition_dict)  # status 注意,导数据成功后将 状态 True、False 赋值给 status
            if status is False:
                log_str = 'Load file ' + hive_file + ' into hive status:fail; Click stream parse exit'
                log.error(log_str)
                #MyAlarm.send_mail_sms(log_str)                                         # 5.5 假如文件存在  但没导成功,将发送告警
                return False
Exemple #2
0
def load_file(date, today):
    global tar_src
    global tar_des
    global latest_time
    tar_src = tar_src.replace('xxxx-xx-xx', date)
    tar_des = tar_des.replace('xxxx-xx-xx', date)
    print tar_src
    latest_time_stamp = MyTime.datetime_timestamp(
        latest_time.replace('xxxx-xx-xx', today))
    if not os.path.exists(tar_des):
        shell = "mkdir -p " + tar_des
        os.system(shell)
    while 1:
        if os.path.exists(tar_src):
            if file_modify_stat(tar_src):
                if MyTool.tar_file(tar_src, tar_des):
                    for root, dirs, files in os.walk(tar_des):
                        for d in dirs:
                            print os.path.join(root, d)
                        for f in files:
                            file = os.path.join(root, f)
                            shell = '/usr/bin/dos2unix ' + file
                            os.popen(shell)
                            hive = MyHiveBin.HiveBin()
                            hive.load_file_single_overwrite(
                                'to8to_rawdata', file)
                    log.info('Mysql actual kefu yuyue to hive status:ok')
                    break
        else:
            log.info('Mysql actual tar file not exists')
            now_time_stamp = MyTime.datetime_timestamp(MyTime.get_local_time())
            if now_time_stamp > latest_time_stamp:
                log.critical('not find ' + tar_src)
                MyAlarm.send_mail_sms('Get Mysql actual tar file status:fail!')
                return False

        time.sleep(time_rate)
    return True
Exemple #3
0
                log.error(log_str)
                #MyAlarm.send_mail_sms(log_str)                                         # 5.5 假如文件存在  但没导成功,将发送告警
                return False


def main(date_arg):
    global FILE_JSON_NAME, FILE_HIVE_PATH  ## 3.1、首先设置全局变量,FILE_JSON_NAME = None  FILE_HIVE_PATH = None
    set_file_path(date_arg)  ## 3.2、设置文件路径 ,生成文件名列表 FILE_HIVE_LIST
    try:
        os.mkdir(
            FILE_HIVE_PATH
        )  ## 3.3 创建目录  FILE_HIVE_PATH :/data1/bi/platform/tar/2016-11-16/ClickStream/
    except Exception, ex:
        print str(ex)
        pass
    print MyTime.get_local_time(
    ), '-------------- tar click stream file begin'  # from cube import MyTime
    log.info(
        'tar click stream file begin'
    )  # 写入日志  log = MyLog.MyLog(path='/data1/bi/platform/scripts/BI/ClickStream/log/', name='ClickStream', type='to8to', level='DEBUG')
    if check_file():  ## 3.4 检查json日志文件是否生成
        print MyTime.get_local_time(
        ), '-------------- tar click stream file success, then process work'
        log.info('tar click stream file success')
        log.info('click stream to8to process work')
        get_file_size(FILE_JSON_NAME)  ## 3.5 获得文件大小,这个有点多余
        click_stream()  ## 3.6 开始清洗
        print MyTime.get_local_time(), '-------------- process success'
        log.info('click stream to8to process work success')


if __name__ == '__main__':