Beispiel #1
0
def process_main():

    try:
        processNum = int(ServerConf.get("dota_server", "process_num"))
        bath_num = int(ServerConf.get("dota_server", "bath_num"))
    except (ConfigParser.NoOptionError, ConfigParser.NoSectionError,
            ValueError) as e:
        pydotalog.error("initConfig fail:%s", str(e))
        sys.exit(-1)

    pool = multiprocessing.Pool(processes=processNum)
    # print time.time()
    line_list = []
    for line in sys.stdin:
        line_list.append(line)
        if len(line_list) < bath_num:
            continue
        pool.apply_async(work_func, (line_list, ))
        line_list = []
        gc.collect()

    if len(line_list) != 0:
        pool.apply_async(work_func, (line_list, ))

    pool.close()
    pool.join()
def get_file_list(topic_name_tmp, date, list_num):

    recv_path = ServerConf["output_path"]["recv"]
    file_name_dict = ServerConf["dota_server"]["recv_file_name"]
    if recv_path == "" or file_name_dict == "":
        pydotalog.error("initConfig fail:get_file_list")
        sys.exit(-1)

    year = date[0:4]
    month = date[4:6]
    day = date[6:8]
    hour = date[8:10]
    file_name = file_name_dict[topic_name_tmp] % (year, month, day, hour)
    if topic_name_tmp == "dau_ott_3111":
        topic_recv_name = "ott_pv"
    else:
        topic_recv_name = topic_name_tmp
    recv_file_path = "%s/%s/%s/%s/%s" % (recv_path, year, month, topic_recv_name, file_name)
    (sta, li) = commands.getstatusoutput('ls ' + recv_file_path)
    file_list = li.split('\n')
    random.shuffle(file_list)
    list_len = len(file_list)
    if list_len % list_num == 0:
        mean = list_len / list_num
    else:
        mean = list_len / list_num + 1
    recv_file_list = []
    for i in range(list_num):
        recv_file_list.append(file_list[(i * mean):((i + 1) * mean)])
    return recv_file_list
def work_func(topic, run_time, input_file_list, postfix):
    (rt, format_client) = get_class_name(topic, run_time)

    if rt != 0:
        pydotalog.error(format_client)
        sys.exit(-1)

    format_client.write_to_file = write_to_file

    for input_file in input_file_list:
        with open(input_file) as f:
            for line in f:
                res = format_client.processFormat(line)
                if res[0] == -1:
                    write_to_file(res[1], topic, res[2], run_time, 'orig_err', postfix)
                    continue
                elif res[0] == -2:
                    write_to_file(res[1], topic, res[2], run_time, 'des_err', postfix)
                elif res[0] == 0:
                    write_to_file(res[1], topic, res[2], run_time, 'des', postfix)
                # 批量上报的自己写入文件,不通过该程序写入
                elif res[0] == 99:
                    continue

                if topic_name != "mpp_vv_mobile_211_20151012_live":
                    write_to_file(line, topic_name, res[2], start_time, 'orig', postfix)

    close_files()
Beispiel #4
0
def CheckLiveTime(log_time, live_info=[]):
    """
    :summery 校验日志时间是否有效的活动时间期间,并返回活动id和活动摄像机id
    :param log_time: 需要校验的日志时间时间戳
    :param live_info: 日志对应活动的活动信息,list数组
    :return: errnum,str,str 0-有效 -1 时间格式错误 -2 时间不在有效期内
    """
    for live_info_one in live_info:
        try:
            startTime = time.strptime(live_info_one[3], "%Y-%m-%d %H:%M:%S")
            endTime = time.strptime(live_info_one[4], "%Y-%m-%d %H:%M:%S")

            startTime = int(time.mktime(startTime))
            endTime = int(time.mktime(endTime))

            if startTime <= int(log_time) <= endTime:
                return 0, live_info_one[0], live_info_one[1]
            else:
                continue

        except (KeyError, ValueError) as e:
            pydotalog.error("check time failed: %s", str(e))
            return -1, 0, 0

    pydotalog.error("%s is not a valid time !", str(log_time))
    return -2, 0, 0
Beispiel #5
0
def __genOutputFileName(log_time, topic, start_time, data_type):
    """
    根据日志时间,topic,后缀,数据类型拼接写入文件
    :param log_time: 需要写入的日志时间,精确到小时
    :type log_time: basestring 201511012200 12位
    :param topic: kafka-topic名称
    :type topic: basestring
    :param start_time: 文件后缀名称, 为recv文件的时刻
    :type start_time: basestring 201511012300 12位
    :param data_type: 写入数据的类型,分为 orig,des,err
    :type data_type: basestring
    :return:
    """
    output_path = ServerConf["output_path"][data_type]
    if output_path == "":
        pydotalog.error("initConfig fail: no key[output_path or %s]",
                        data_type)
        return ""

    if "err" in data_type:
        file_str = '{0}/{1}/{2}/err_{3}_{4}_{5}_{6}'.format(
            str(output_path), log_time[0:4], log_time[4:6], log_time, topic,
            start_time, data_type)
    elif "des" == data_type:
        if "_pv" in topic:
            file_str = '{0}/{1}/{2}/{3}_pvrawdata_{4}_{5}'.format(
                str(output_path), log_time[0:4], log_time[4:6], log_time,
                topic, start_time)
        elif "mglive" in topic:
            file_str = '{0}/{1}/{2}/{3}_mgliverawdata_{4}_{5}'.format(
                str(output_path), log_time[0:4], log_time[4:6], log_time,
                topic, start_time)
        elif "dau_" in topic:
            file_str = '{0}/{1}/{2}/{3}_daurawdata_{4}_{5}'.format(
                str(output_path), log_time[0:4], log_time[4:6], log_time,
                topic, start_time)
        else:
            file_str = '{0}/{1}/{2}/{3}_playrawdata_{4}_{5}'.format(
                str(output_path), log_time[0:4], log_time[4:6], log_time,
                topic, start_time)
    else:
        file_str = '{0}/{1}/{2}/{3}_{4}_{5}'.format(str(output_path),
                                                    log_time[0:4],
                                                    log_time[4:6], log_time,
                                                    topic, start_time)
    file_str = os.path.abspath(file_str)
    dir_name = os.path.dirname(file_str)
    if not os.path.exists(dir_name):
        try:
            os.makedirs(dir_name)
        except OSError as e:
            pydotalog.error("make dir failed:[%s]", str(e))
            return ""
    return file_str
Beispiel #6
0
def __check_time(log_time):
    """
    :summary: 校验log_time是否合理
    :param log_time: 201510101000
    :return: boolean
    """
    if len(log_time) != 12:
        return False
    try:
        return 201000000000 < int(log_time) < 205000000000
    except ValueError:
        pydotalog.error("log_time[%s] is overtime", str(log_time))
        return False
def process_main(topic_name_tmp, date):

    processNum = int(ServerConf["dota_server"]["process_num"])
    if processNum == "":
        pydotalog.error("initConfig fail:process_num")
        sys.exit(-1)

    file_list = get_file_list(topic_name_tmp, date, processNum)

    pool = multiprocessing.Pool(processes=processNum)
    # print time.time()
    for i in range(processNum):
        pool.apply_async(work_func, args=(topic_name_tmp, date, file_list[i], '_%s' % str(i)))

    pool.close()
    pool.join()
Beispiel #8
0
def __output_to_files(log_time, line, topic, start_time, data_type):
    str_log_file = __genOutputFileName(log_time, topic, start_time, data_type)

    if str_log_file == "":
        return False

    if str_log_file in file_list:
        log_file = file_list[str_log_file]
    else:
        try:
            log_file = open(str_log_file, 'w')
            file_list[str_log_file] = log_file
        except IOError as e:
            pydotalog.error("IOError: %s", str(e))
            return False

    log_file.write(line.strip('\n') + '\n')
    return True
Beispiel #9
0
def LoadLiveMeizi(start_time):
    """
    :summery: 加载直播媒资信息
    :param start_time: 加载媒资信息的时刻,12为字符串,201511031700
    :return: 媒资信息dict
    """
    Meizi_info = {}

    output_path = ServerConf["meizi"]["output_path"]

    file = output_path + '/' + start_time + '_live.csv'
    if not os.path.exists(file):
        pydotalog.error("live_meizi info file is not exists!")
        sys.exit(-1)

    with open(file) as fp:
        for i, line in enumerate(fp):
            try:
                record = line.strip().split(',')
                sourceid = record[0]
                activityid = record[2]
                cameraid = record[5]
                timeid = record[7]
                startTime = record[8]
                endTime = record[9]
                try:
                    Meizi_info[sourceid].append(
                        [activityid, cameraid, timeid, startTime, endTime])
                except KeyError:
                    Meizi_info[sourceid] = []
                    Meizi_info[sourceid].append(
                        [activityid, cameraid, timeid, startTime, endTime])
            except KeyError:
                pydotalog.error("line[%d]: live meizi index err!", i)
                continue
    return Meizi_info
    file_list = get_file_list(topic_name_tmp, date, processNum)

    pool = multiprocessing.Pool(processes=processNum)
    # print time.time()
    for i in range(processNum):
        pool.apply_async(work_func, args=(topic_name_tmp, date, file_list[i], '_%s' % str(i)))

    pool.close()
    pool.join()
    # print time.time()
    # print "Sub-process(es) done."


if __name__ == "__main__":

    log_dir = os.path.join(os.path.dirname(__file__), "../log/")
    pydotalog.init_logger(log_dir + "/pydota_run.log")

    if len(sys.argv) == 3:
        (di, topic_name, start_time) = sys.argv

        if len(start_time) != 12:
            pydotalog.error("topic:%s start_time format error, start_time: %s", topic_name, start_time)
            sys.exit(-1)
        # print time.time()
        process_main(topic_name, start_time)
        # print time.time()

    else:
        pydotalog.error('arg is topic_name date')
        sys.exit(-1)
Beispiel #11
0
        if topic_name != "mpp_vv_mobile_211_20151012_live":
            write_to_file(line, topic_name, res[2], start_time, 'orig')

    close_files()


if __name__ == "__main__":

    log_dir = os.path.join(os.path.dirname(__file__), "../log/")
    pydotalog.init_logger(log_dir + "/pydota_run.log")

    if len(sys.argv) == 3:
        (di, topic_name, start_time) = sys.argv
        (rt, format_client) = get_class_name(topic_name, start_time)

        if rt != 0:
            pydotalog.error(format_client)
            sys.exit(-1)

        if len(start_time) != 12:
            pydotalog.error(
                "topic:[%s] start_time format error, start_time: %s",
                topic_name, start_time)
            sys.exit(-1)

        process_main()

    else:
        pydotalog.error('arg is topic_name date')
        sys.exit(-1)