Exemple #1
0
def uniq_history(app, gameid=None):
    """去除history重复的数据"""
    if gameid is None:
        games = util.get_gameid_from_history(app.history)
    else:
        games = [gameid]

    from bi.unit.login import LoginUnit
    from bi.unit.role import RoleUnit
    from bi.unit.payment import PaymentUnit

    historys = (
        (LoginUnit.HISTORY_LOGIN_CSV, LoginUnit.HISTORY_LOGIN_FIELDS),
        (LoginUnit.HISTORY_MAC_CSV, LoginUnit.HISTORY_MAC_FIELDS),
        (RoleUnit.HISTORY_CSV, RoleUnit.HISTORY_FIELDS),
        (PaymentUnit.HISTORY_CSV, PaymentUnit.HISTORY_FIELDS)
    )
    log.info("drop_history, gameids: %s", games)
    for game in games:
        # 处理csv
        for csv, fields in historys:
            path = os.path.join(app.history, game, csv)
            df = util.read_csv(path, names=fields)
            index = fields[:2]
            df2 = df.drop_duplicates(subset=index)
            log.info("process csv path: %s, pre: %s, now: %s", path, len(df), len(df2))
            df2.to_csv(path, header=False, index=False, mode='w', encoding='utf8')
Exemple #2
0
def clear(app, gameid=None):
    """游戏清档
    1. 删除入库表
    2.  清空histor等目录
    """
    log_init(app.realtive_path(app.log, "clear.log"))
    config.init(app.config)
    models = [ddl.AllRole, ddl.AllUser, ddl.AllAdvice, ddl.AllPayUser, ddl.PayMent, ddl.Login, ddl.Consume,
              ddl.RoleNew, ddl.RoleLogin, ddl.Levelup, ddl.Online, ddl.Mission]
    if gameid is None:
        games = util.get_gameid_from_history(app.history)
    else:
        games = [gameid]
    for game in games:
        for model in models:
            try:
                model.drop(game)
            except Exception as e:
                log.error("DDL delete error: %s", e)
    with cd(app.work_dir):
        if gameid:
            run("find clean/* -name {0}_*.csv | xargs rm".format(gameid))
            run("rm -rf history/{0}".format(gameid))
            run("rm -rf history/all_role/{0}".format(gameid))
        else:
            run("rm -rf clean/* history/*")
Exemple #3
0
def clear_filter_openid(app, ds, gameid=None, **kwargs):
    """清理测试账号的充值数据"""
    models = [ddl.PayMent]
    if gameid is None:
        games = util.get_gameid_from_history(app.history)
    else:
        games = [gameid]
    game_openids = BIManage.filter_openid(app.history)
    ensure(kwargs, "clear games:{0};ds:{1}, Please yes/no:".format(games, ds))
    ds_date = util.todate(ds)
    clear_time_f = util.timestamp(ds_date)
    clear_time_t = util.date_delta(ds_date)[1]
    log.info("clear filter openid, gameids: %s", games)
    # 清理数据库
    for game in games:
        # 根据游戏id获取测试账号
        filter_openids = game_openids.get(game)
        # gameid 的测试账号为空,不作处理
        if not filter_openids:
            continue
        openids = ",".join(map(lambda x: "'%s'" % x, filter_openids))
        # 手动拼接 sql
        for model in models:
            table = model.table_name(game)
            timestamp_column = model_timesamp(model)
            try:
                sql = "delete from {0} where {1} >= {2} and {1} < {3} and openid in ({4})"
                sql = sql.format(table, timestamp_column, clear_time_f, clear_time_t, openids)
                log.info(sql)
                model.execute(sql)
            except Exception as e:
                log.error("DDL delete error: %s", e)

    clear_dirs = [("consume", ddl.Consume), ("pay_orders", ddl.PayMent)]
    clean_dir = os.path.join(app.work_dir, "clean", ds)
    # 清理 csv
    with cd(clean_dir):
        for clear_dir, model in clear_dirs:
            csv_dir = os.path.join(clean_dir, clear_dir)
            for csv in os.listdir(csv_dir):
                # 指定单个 gameid
                if gameid:
                    # csv 不是此 gameid 的 csv 文件,不作处理
                    if not csv.startswith(gameid):
                        continue
                    filter_openids = game_openids.get(gameid)
                # 所有的 csv 文件都需要进行处理
                else:
                    filter_openids = game_openids.values()
                # gameid 的测试账号为空,不作处理
                if not filter_openids:
                    continue
                path = os.path.join(csv_dir, csv)
                df = util.read_csv(path, names=model.FIELDS, dtype=model.Dtype)
                df2 = df[~df["openid"].isin(filter_openids)]
                log.info("process csv path: %s, pre: %s, now: %s", path, len(df), len(df2))
                df2.to_csv(path, header=False, index=False, mode='w', encoding='utf8')
Exemple #4
0
def clear_history(app, ds, gameid=None, **kwargs):
    """补数据时处理history,将大于ds日期的去除"""
    if gameid is None:
        games = util.get_gameid_from_history(app.history)
    else:
        games = [gameid]
    ensure(kwargs, "clear games:{0} date:{1},Please yes/no:".format(games, ds))
    from bi.unit.login import LoginUnit
    from bi.unit.role import RoleUnit
    from bi.unit.payment import PaymentUnit

    historys = (
        (LoginUnit.HISTORY_LOGIN_CSV, LoginUnit.HISTORY_LOGIN_FIELDS),
        (LoginUnit.HISTORY_MAC_CSV, LoginUnit.HISTORY_MAC_FIELDS),
        (RoleUnit.HISTORY_CSV, RoleUnit.HISTORY_FIELDS),
        (PaymentUnit.HISTORY_CSV, PaymentUnit.HISTORY_FIELDS)
    )
    models = (ddl.AllUser, ddl.AllAdvice, ddl.AllPayUser)
    clear_time = util.timestamp(util.todate(ds))
    log.info("clear_history, gameids: %s, end_ds: %s", games, ds)
    for game in games:
        # 处理csv
        for csv, fields in historys:
            path = os.path.join(app.history, game, csv)
            df = util.read_csv(path, names=fields)
            time_field = fields[-1]
            df2 = df[df[time_field]<clear_time]
            log.info("process csv path: %s, pre: %s, now: %s", path, len(df), len(df2))
            df2.to_csv(path, header=False, index=False, mode='w', encoding='utf8')

        # 处理数据库
        for model in models:
            table = model.table_name(game)
            timestamp_column = model_timesamp(model)
            try:
                sql = "delete from {0} where {1} >= {2}"
                sql = sql.format(table, timestamp_column, clear_time)
                log.info(sql)
                model.execute(sql)
            except Exception as e:
                log.error("DDL delete error: %s, sql: %s", e, sql)
Exemple #5
0
def clear(app, gameid=None, **kwargs):
    """游戏清档
    1. 删除入库表
    2.  清空histor等目录
    """
    models = [ddl.AllUser, ddl.AllAdvice, ddl.AllPayUser, ddl.PayMent, ddl.Login, ddl.Consume,
              ddl.RoleNew, ddl.RoleLogin, ddl.Levelup, ddl.Online, ddl.Mission]
    if gameid is None:
        games = util.get_gameid_from_history(app.history)
    else:
        games = [gameid]
    ensure(kwargs, "clear games:{0},Please yes/no:".format(games))
    log.info("clear, gameids: %s", games)
    for game in games:
        for model in models:
            try:
                model.drop(game)
            except Exception as e:
                log.error("DDL delete error: %s", e)
    with cd(app.work_dir):
        if gameid:
            run("find clean/* -name {0}_*.csv | xargs rm".format(gameid))
            run("rm -rf history/{0}".format(gameid))
            run("rm -rf history/all_role/{0}".format(gameid))
        else:
            run("rm -rf clean/* history/*")

    # 试图清理消耗数据
    xiaohao = os.path.join(os.path.dirname(app.work_dir), app.work_dir + "-xiaohao")
    if not os.path.exists(xiaohao):
        return
    log.info("clear consume, base dir:%s" % xiaohao)
    with cd(xiaohao):
        if gameid:
            run("find clean/ -name {0}_*.csv | xargs rm".format(gameid))
            run("rm -rf history/{0}".format(gameid))
        else:
            run("rm -rf clean/* history/*")
Exemple #6
0
def fix(app, start, end, **kwargs):
    """
    假如今日13号
    python manage.py fix 2015-12-11 2015-12-12(重跑这两天的csv)
    只修复11-12号的日志
    1、清空11-12的clean
    2、删除11-12号的mysql日志
    3、修复data数据,
        11号的date里面包含了10号的数据,删除11号data中10号的数据
        12号在13号的部分数据放到12号里面
        13/0000.log -> 12/最大的日志.log
    4、ex.py cron.py
    """
    models = [ddl.AllRole, ddl.AllUser, ddl.AllAdvice, ddl.AllPayUser, ddl.PayMent, ddl.Login, ddl.Consume,
              ddl.RoleNew, ddl.RoleLogin, ddl.Levelup, ddl.Online, ddl.Mission]

    ds_models = [ddl.RealtimeIncomeNewer, ddl.RealtimeOnline, ddl.RealtimeRegister]

    models.extend(ds_models)
    games = util.get_gameid_from_history(app.history)
    ensure(kwargs, "clear games:{0} start:{1}, end:{2} Please yes/no:".format(games, start, end))
    # 2删除数据库
    timestamp_day = util.timestamp(util.todate(start))
    tomorrow = util.date_delta(util.todate(end), 1)
    for game in games:
        for model in models:
            table = model.table_name(game)
            try:
                if model not in ds_models:
                    timestamp_columns = model.TIMESTAMP or filter(lambda x: x.endswith("_time"), model.FIELDS)[0]
                    sql = "delete from {0} where {1} >= {2} and {1} <{3}"
                    sql = sql.format(table, timestamp_columns, timestamp_day, tomorrow[1])
                else:
                    sql = "delete from {0} where gameid = {1} and ds >= '{2}' and ds <='{3}'"
                    sql = sql.format(table, game, start, end)
                log.info(sql)
                model.execute(sql)
            except Exception as e:
                log.error("DDL delete error: %s", e)

    with cd(app.work_dir):

        # 处理start
        day = util.todate(start)
        directory = os.path.join(app.data, str(day))
        files = os.listdir(directory)
        fs = sorted(files)
        day_bak = os.path.join(app.data, "{0}_startbak".format(str(day)))
        if not os.path.exists(day_bak):
            os.makedirs(day_bak)
        else:
            run("rm -rf {0}/*".format(day_bak))
        for f in fs:
            run("cat {0}/{1} | grep {2} > {3}/{1}".format(directory, f, day, day_bak))
        run("mv {0} {1}/{2}_bu".format(directory, app.data, day))
        run("mv {0} {1}/{2}".format(day_bak, app.data, day))

        if start == end:
            log.info("process if start == end is return start: %s end : %s", start, end)
            return

        # 处理end
        endday = util.todate(end)
        endtomorrow = util.todate(end) + datetime.timedelta(days=1)
        enddirectory = os.path.join(app.data, str(endtomorrow))
        daydirectory = os.path.join(app.data, str(endday))
        files = os.listdir(enddirectory)
        fs = sorted(files)
        end_bak = os.path.join(app.data, "{0}_endbak".format(str(endtomorrow)))
        if not os.path.exists(end_bak):
            os.makedirs(end_bak)
        else:
            run("rm -rf {0}/*".format(end_bak))
        run("cat {0}/* | grep {1} > {2}/{3}".format(enddirectory, end, end_bak, fs[-1]))
        run("cat {0}/{2} >> {1}/{2}".format(end_bak, daydirectory, fs[-1]))

        for i in range((util.todate(end) - util.todate(start)).days + 1):
            day = util.todate(start) + datetime.timedelta(days=i)
            run("rm -rf clean/{0}".format(str(day)))
Exemple #7
0
def fix(app, start, end):
    """
    假如今日13号
    python manage.py fix 2015-12-11 2015-12-12(重跑这两天的csv)
    只修复11-12号的日志
    1、清空11-12的clean
    2、删除11-12号的mysql日志
    3、修复data数据,
        11号的date里面包含了10号的数据,删除11号data中10号的数据
        12号在13号的部分数据放到12号里面
        13/0000.log -> 12/最大的日志.log
    4、ex.py cron.py
    """
    log_init(app.realtive_path(app.log, "repeat.log"))
    config.init(app.config)
    models = [ddl.AllRole, ddl.AllUser, ddl.AllAdvice, ddl.AllPayUser, ddl.PayMent, ddl.Login, ddl.Consume,
              ddl.RoleNew, ddl.RoleLogin, ddl.Levelup, ddl.Online, ddl.Mission]
    games = util.get_gameid_from_history(app.history)

    # 2删除数据库
    timestamp_day = util.timestamp(util.todate(start))
    tomorrow = util.date_delta(util.todate(end), 1)
    for game in games:
        for model in models:
            table = model.table_name(game)
            try:
                timestamp_columns = model.TIMESTAMP or filter(lambda x: x.endswith("_time"), model.FIELDS)[0]
                sql = "delete from {0} where {1} >= {2} and {1} <{3}"
                sql = sql.format(table, timestamp_columns, timestamp_day, tomorrow[1])
                model.execute(sql)
            except Exception as e:
                log.error("DDL delete error: %s", e)

    with cd(app.work_dir):

        # 处理start
        day = util.todate(start)
        directory = os.path.join(app.data, str(day))
        files = os.listdir(directory)
        fs = sorted(files)
        day_bak = os.path.join(app.data, "{0}_startbak".format(str(day)))
        if not os.path.exists(day_bak):
            os.makedirs(day_bak)
        else:
            run("rm -rf {0}/*".format(day_bak))
        for f in fs:
            run("cat {0}/{1} | grep {2} > {3}/{1}".format(directory, f, day, day_bak))
        run("mv {0} {1}/{2}_bu".format(directory, app.data, day))
        run("mv {0} {1}/{2}".format(day_bak, app.data, day))

        # 处理end
        endday = util.todate(end)
        endtomorrow = util.todate(end) + datetime.timedelta(days=1)
        enddirectory = os.path.join(app.data, str(endtomorrow))
        daydirectory = os.path.join(app.data, str(endday))
        files = os.listdir(enddirectory)
        fs = sorted(files)
        end_bak = os.path.join(app.data, "{0}_endbak".format(str(endtomorrow)))
        if not os.path.exists(end_bak):
            os.makedirs(end_bak)
        else:
            run("rm -rf {0}/*".format(end_bak))
        run("cat {0}/* | grep {1} > {2}/{3}".format(enddirectory, end, end_bak, fs[-1]))
        run("cat {0}/{2} >> {1}/{2}".format(end_bak, daydirectory, fs[-1]))

        for i in range((util.todate(end) - util.todate(start)).days + 1):
            day = util.todate(start) + datetime.timedelta(days=i)
            run("rm -rf clean/{0}".format(str(day)))