Ejemplo n.º 1
0
def main(
    timestamp,
    src_file_format,
    errlog_path_format,
    filename_format,
    devuser_path_format="/data1/logs/transformsaaslogs/devuserlog/%(yyyymmdd)s/%(hhmm)s.log"
):
    transform = Transform(timestamp=timestamp * 1000)  # 毫秒
    yyyymmdd = time.strftime('%Y%m%d', time.localtime(timestamp))
    yyyymmddhhmm = time.strftime('%Y%m%d%H%M', time.localtime(timestamp))
    hhmm = time.strftime('%H%M', time.localtime(timestamp))
    src_file = src_file_format % {
        "yyyymmdd": yyyymmdd,
        "yyyymmddhhmm": yyyymmddhhmm
    }
    errlog_path = errlog_path_format % {"yyyymmdd": yyyymmdd, "hhmm": hhmm}
    devuser_path = devuser_path_format % {"yyyymmdd": yyyymmdd, "hhmm": hhmm}
    print src_file
    # with open(src_file) as f:
    errlognum = 0
    for line in JHOpen().readLines(src_file):
        if not line:
            continue
        try:
            logs = transform.transform(line)
            for item in logs:
                datatype = item['jhd_datatype']
                item['jhd_userkey'] = item['jhd_userkey'].strip()
                filename = filename_format % {
                    "yyyymmdd": yyyymmdd,
                    "hhmm": hhmm,
                    'datatype': datatype
                }
                # unicode 转码中文
                try:
                    line_out = json.dumps(item,
                                          ensure_ascii=False).encode('utf-8')
                except:
                    line_out = json.dumps(item, ensure_ascii=False)
                if item.get("isdevuser", False) == False:
                    LogStore(filename, line_out)
                else:
                    LogStore(devuser_path, line_out)
        except:
            import traceback
            exc_type, exc_value, exc_traceback = sys.exc_info()
            errinfo = traceback.format_exception(exc_type, exc_value,
                                                 exc_traceback)
            errinfo.append(line)
            LogStore(errlog_path,
                     json.dumps(map(lambda item: item.strip(), errinfo)))
            errlognum += 1
    if errlognum:
        print("\t".join([
            "@" + yyyymmddhhmm,
            "errlognum: %d" % errlognum,
            "err info: %s" % errlog_path
        ]))
        print("".join(["endline", '-' * 10]))
    LogStore.finished(iszip=True)
Ejemplo n.º 2
0
def main(
    timestamp,
    src_logpath_format="/data1/nginxlogs/jhsaaslogs_h5/access_jhlogs.%(yyyymmddhhmm)s",
    errlog_path_format="/data1/logs/transformh5/err/%(yyyymmdd)s/%(hhmm)s.err",
    filename_format="/data1/logs/transformh5/%(datatype)s/%(yyyymmdd)s/%(hhmm)s.log"
):
    yyyymmddhhmm = time.strftime('%Y%m%d%H%M', time.localtime(timestamp))
    yyyymmdd = time.strftime('%Y%m%d', time.localtime(timestamp))
    hhmm = time.strftime('%H%M', time.localtime(timestamp))
    src_logpath = src_logpath_format % {
        "yyyymmddhhmm": yyyymmddhhmm,
        "yyyymmdd": yyyymmdd
    }
    errlog_path = errlog_path_format % {
        "yyyymmddhhmm": yyyymmddhhmm,
        "yyyymmdd": yyyymmdd,
        "hhmm": hhmm
    }
    print src_logpath
    if src_logpath.endswith(".gz"):
        src_logpath_file = gzip.open(src_logpath)
    else:
        src_logpath_file = open(src_logpath)
    try:
        transform = Transform(timestamp=timestamp)
    except:
        transform = Transform()
    for line in src_logpath_file:
        try:
            # ip = line.split(",")[0].strip()
            # # 如果为内网ip,做单独处理
            # try:
            #     if ip.startswith("127"):
            #         ip = ip_pattern.search(line).group(1)
            # except:
            #     import traceback
            #     print(traceback.print_exc())
            data = transform.transform(line)
            # data = json.loads(lod_line)
            # data["ip"] = ip
            if not data:
                continue
            datatype = data["appkey"]
            filename = filename_format % {
                "datatype": datatype,
                "yyyymmdd": yyyymmdd,
                "hhmm": hhmm
            }
            LogStore(filename, json.dumps(data))
        except Exception, e:
            LogStore(errlog_path, "%s, %s" % (e, line.strip()))
Ejemplo n.º 3
0
def collectFiles(timestamp = time.time()-10*60, tm = {}, remote_dir_format="", local_dir_part_format="", local_dir_format="", datatypeList=[], is_store = False):
    yyyymmdd = time.strftime("%Y%m%d", time.localtime(timestamp)) if tm.get('yyyymmdd', 0) == 0 else tm.get('yyyymmdd', 0)
    hhmm = time.strftime("%H%M", time.localtime(timestamp)) if tm.get('hhmm', 0) == 0 else tm.get('hhmm', 0)
    log_path = "/data1/logs/collector/collectorinfo.%(yyyymmdd)s" % {"yyyymmdd": yyyymmdd}
    for datatype in datatypeList:
        remote_dir = remote_dir_format % {'yyyymmdd': yyyymmdd, 'hhmm': hhmm, 'datatype': datatype}
        local_dir = local_dir_format % {'yyyymmdd': yyyymmdd, 'hhmm': hhmm, 'datatype': datatype}
        if not os.path.exists(local_dir):
            os.system("mkdir -p %s" % "/".join(local_dir.split("/")[:-1]))
        souce_files = []
        a = time.time()
        for (remote_host, part) in zip(remote_host_list, range(0, len(remote_host_list))):
            b = time.time()
            try:
                local_dir_part = local_dir_part_format % \
                                 {'yyyymmdd': yyyymmdd, 'hhmm': hhmm, 'datatype': datatype, 'part': part}
                cmd = "scp root@%(remote_host)s:%(remote_dir)s %(local_dir_part)s" % \
                          {'remote_host': remote_host, 'remote_dir': remote_dir, 'local_dir_part': local_dir_part}
                print cmd
                result = os.system(cmd)
                # print cmd
                if is_store == False:
                    if result != 0:
                        time.sleep(2)
                        result = os.system(cmd)
                        if result != 0:
                            LogStore(log_path, "\t".join([time.strftime("%Y-%m-%d+%H:%M:%S", time.localtime(time.time())), "faild_0", "#", datatype, cmd]), mode="a+")
                if os.path.exists(local_dir_part):
                    souce_files.append(local_dir_part)
            except:
                import traceback
                print traceback.print_exc()
            print "remote_host: ", remote_host, " %s " % datatype, " cost ", time.time() - b
        souce_files_str = " ".join(souce_files)
        souce_files_str = souce_files_str.strip()
        if os.path.exists(local_dir):
            os.system("rm -f %(local_dir)s" % {"local_dir": local_dir})
            print("remove file", local_dir)
        if souce_files:
            os.system("cat %(souce_files)s >> %(local_dir)s && rm -rf %(souce_files)s" % \
                      {'souce_files': souce_files_str, 'local_dir': local_dir}
                      )
        print datatype, " cost ", time.time() - a
    LogStore.finished(iszip=True)
Ejemplo n.º 4
0
def main(
    timestamp,
    src_logpath_format="/data1/nginxlogs/jhlogs/access_jhlogs.%(yyyymmddhhmm)s"
):
    yyyymmddhhmm = time.strftime('%Y%m%d%H%M', time.localtime(timestamp))
    yyyymmdd = time.strftime('%Y%m%d', time.localtime(timestamp))
    hhmm = time.strftime('%H%M', time.localtime(timestamp))
    src_logpath = src_logpath_format % {
        "yyyymmddhhmm": yyyymmddhhmm,
        "yyyymmdd": yyyymmdd
    }
    errlog_path = errlog_path_format % {
        "yyyymmddhhmm": yyyymmddhhmm,
        "yyyymmdd": yyyymmdd,
        "hhmm": hhmm
    }
    transform = Transform()
    if src_logpath.endswith(".gz"):
        src_logpath_file = gzip.open(src_logpath)
    else:
        src_logpath_file = open(src_logpath)
    for line in src_logpath_file:
        try:
            for log in transform.transform(line):
                datatype = log["jhd_datatype"]
                # if datatype == "guaeng":
                #     print datatype, src_logpath_format
                #     print log
                filename = filename_format % {
                    "yyyymmdd": yyyymmdd,
                    "hhmm": hhmm,
                    "datatype": datatype
                }
                # print filename
                log_line = json.dumps(log)
                LogStore(filename, log_line)
        except Exception, e:
            # import traceback
            # print traceback.print_exc()
            LogStore(errlog_path, "%s, %s" % (e, line))
Ejemplo n.º 5
0
def main(timestamp, src_file_format=src_file_format):
    transform = Transform()
    yyyymmdd = time.strftime('%Y%m%d', time.localtime(timestamp))
    yyyymmddhhmm = time.strftime('%Y%m%d%H%M', time.localtime(timestamp))
    hhmm = time.strftime('%H%M', time.localtime(timestamp))
    src_file = src_file_format % {
        "yyyymmdd": yyyymmdd,
        "yyyymmddhhmm": yyyymmddhhmm
    }
    errlog_path = errlog_path_format % {"yyyymmdd": yyyymmdd, "hhmm": hhmm}
    with open(src_file) as f:
        for line in f:
            try:
                for item in transform.transform(line):
                    datatype = item['jhd_datatype']
                    filename = filename_format % {
                        "yyyymmdd": yyyymmdd,
                        "hhmm": hhmm,
                        'datatype': datatype
                    }
                    line_out = json.dumps(item, ensure_ascii=False)
                    LogStore(filename, line_out)
            except Exception, e:
                LogStore(errlog_path, "%s, %s" % (e, line))
Ejemplo n.º 6
0
                filename = filename_format % {
                    "yyyymmdd": yyyymmdd,
                    "hhmm": hhmm,
                    "datatype": datatype
                }
                log_line = json.dumps(log)
                LogStore(filename, log_line)
        except Exception, e:
            import traceback
            print traceback.print_exc()
            # print line
            LogStore(errlog_path, "%s, %s" % (e, line.strip()))

    if not src_logpath.endswith(".gz"):
        src_logpath_file.close()
    LogStore.finished(iszip=False)


if __name__ == "__main__":
    if 'normal' in sys.argv:
        timestamp = int(time.time() - 60 * 5)
        main(timestamp)

    if 'guagua' in sys.argv:
        timestamp = int(time.time() - 60 * 5)
        main(timestamp,
             src_logpath_format=
             "/data1/nginxlogs/guagua/access_guagua.%(yyyymmddhhmm)s")

    if 'store' in sys.argv:
        # startstamp = time.mktime(time.strptime('20160501+000100', '%Y%m%d+%H%M%S'))
Ejemplo n.º 7
0
    errlog_path = errlog_path_format % {"yyyymmdd": yyyymmdd, "hhmm": hhmm}
    with open(src_file) as f:
        for line in f:
            try:
                for item in transform.transform(line):
                    datatype = item['jhd_datatype']
                    filename = filename_format % {
                        "yyyymmdd": yyyymmdd,
                        "hhmm": hhmm,
                        'datatype': datatype
                    }
                    line_out = json.dumps(item, ensure_ascii=False)
                    LogStore(filename, line_out)
            except Exception, e:
                LogStore(errlog_path, "%s, %s" % (e, line))
    LogStore.finished()


if __name__ == "__main__":
    if 'normal' in sys.argv:
        timestamp = int(time.time() - 60 * 5)
        main(timestamp)

    if 'store' in sys.argv:
        # startstamp = time.mktime(time.strptime('20160501+000100', '%Y%m%d+%H%M%S'))
        # endstamp = time.mktime(time.strptime('20160602+000000', '%Y%m%d+%H%M%S'))
        startstamp = time.mktime(
            time.strptime('20160812+000000', '%Y%m%d+%H%M%S'))
        # startstamp = time.mktime(time.strptime('20160808+000000', '%Y%m%d+%H%M%S'))
        endstamp = time.mktime(
            time.strptime('20160812+103000', '%Y%m%d+%H%M%S'))
Ejemplo n.º 8
0
            # data["ip"] = ip
            if not data:
                continue
            datatype = data["appkey"]
            filename = filename_format % {
                "datatype": datatype,
                "yyyymmdd": yyyymmdd,
                "hhmm": hhmm
            }
            LogStore(filename, json.dumps(data))
        except Exception, e:
            LogStore(errlog_path, "%s, %s" % (e, line.strip()))

    if not src_logpath.endswith(".gz"):
        src_logpath_file.close()
    LogStore.finished(iszip=True)


if __name__ == "__main__":
    if 'transform_h5' in sys.argv:
        src_logpath_format = "/data1/nginxlogs/jhsaaslogs_h5/access_jhlogs.%(yyyymmddhhmm)s"
        errlog_path_format = "/data1/logs/transformh5/err/%(yyyymmdd)s/%(hhmm)s.err"
        filename_format = "/data1/logs/transformh5/%(datatype)s/%(yyyymmdd)s/%(hhmm)s.log"
        timestamp = int(time.time() - 60 * 5)
        main(timestamp,
             src_logpath_format=src_logpath_format,
             errlog_path_format=errlog_path_format,
             filename_format=filename_format)

    if 'store' in sys.argv:
        # startstamp = time.mktime(time.strptime('20160501+000100', '%Y%m%d+%H%M%S'))
Ejemplo n.º 9
0
def collect(delay=time.time() - 10 * 60,
            remote_dir_format="",
            local_dir_part_format="",
            local_dir_format="",
            datatypes=[],
            remote_ips=[]):
    '''
    :param delay: 拉取延迟多少时间的文件,默认10分钟
    :param tm:
    :param remote_dir_format: remote服务器目录格式
    :param local_dir_part_format: 本地存储目录格式
    :param local_dir_format:
    :param datatypeList:
    :param remote_ips:
    :return:
    '''
    # 获取日期
    yyyymmdd = time.strftime("%Y%m%d", time.localtime(delay))
    hhmm = time.strftime("%H%M", time.localtime(delay))
    log_path = "/data1/logs/collector/collectorinfo.%(yyyymmdd)s" % {
        "yyyymmdd": yyyymmdd
    }
    # 遍历所有的datatype,迭代获取相应的文件
    for datatype in datatypes:
        # remote目录
        remote_dir = remote_dir_format % {
            'yyyymmdd': yyyymmdd,
            'hhmm': hhmm,
            'datatype': datatype
        }
        # 本地临时存储文件
        if datatype.find("_log") != -1:
            local_datatype = datatype[datatype.find("/") + 1:]
        else:
            local_datatype = datatype
        local_dir = local_dir_format % {
            'yyyymmdd': yyyymmdd,
            'hhmm': hhmm,
            'datatype': local_datatype
        }
        # 如果本地文件不存在,则新建
        if not os.path.exists(local_dir):
            os.system("mkdir -p %s" % "/".join(local_dir.split("/")[:-1]))

        #临时文件存储
        souce_files = []
        # 获取远程ip和ip index
        for (remote_host, part) in zip(remote_ips,
                                       range(0, len(remote_host_list))):

            local_dir_part = local_dir_part_format % \
                             {'yyyymmdd': yyyymmdd, 'hhmm': hhmm, 'datatype': local_datatype, 'part': part}
            #scp 远程文件
            cmd = "scp root@%(remote_host)s:%(remote_dir)s %(local_dir_part)s" % \
                      {'remote_host': remote_host, 'remote_dir': remote_dir, 'local_dir_part': local_dir_part}
            # 失败重试
            result = os.system(cmd)
            if result != 0:
                #time.sleep(2)
                result = os.system(cmd)
                if result != 0:
                    LogStore(log_path,
                             "\t".join([
                                 time.strftime("%Y-%m-%d+%H:%M:%S",
                                               time.localtime(time.time())),
                                 "faild_0", "#", datatype, cmd
                             ]),
                             mode="a+")
            if os.path.exists(local_dir_part):
                souce_files.append(local_dir_part)
        souce_files_str = " ".join(souce_files)
        souce_files_str = souce_files_str.strip()
        if os.path.exists(local_dir):
            os.system("rm -f %(local_dir)s" % {"local_dir": local_dir})
            print("remove file", local_dir)
        if souce_files:
            os.system("cat %(souce_files)s >> %(local_dir)s && rm -f %(souce_files)s" % \
                      {'souce_files': souce_files_str, 'local_dir': local_dir}
                      )
    LogStore.finished()