def uniq_history(app, gameid=None): """去除history重复的数据""" if gameid is None: games = util.get_gameid_from_history(app.history) else: games = [gameid] from bi.unit.login import LoginUnit from bi.unit.role import RoleUnit from bi.unit.payment import PaymentUnit historys = ( (LoginUnit.HISTORY_LOGIN_CSV, LoginUnit.HISTORY_LOGIN_FIELDS), (LoginUnit.HISTORY_MAC_CSV, LoginUnit.HISTORY_MAC_FIELDS), (RoleUnit.HISTORY_CSV, RoleUnit.HISTORY_FIELDS), (PaymentUnit.HISTORY_CSV, PaymentUnit.HISTORY_FIELDS) ) log.info("drop_history, gameids: %s", games) for game in games: # 处理csv for csv, fields in historys: path = os.path.join(app.history, game, csv) df = util.read_csv(path, names=fields) index = fields[:2] df2 = df.drop_duplicates(subset=index) log.info("process csv path: %s, pre: %s, now: %s", path, len(df), len(df2)) df2.to_csv(path, header=False, index=False, mode='w', encoding='utf8')
def clear(app, gameid=None): """游戏清档 1. 删除入库表 2. 清空histor等目录 """ log_init(app.realtive_path(app.log, "clear.log")) config.init(app.config) models = [ddl.AllRole, ddl.AllUser, ddl.AllAdvice, ddl.AllPayUser, ddl.PayMent, ddl.Login, ddl.Consume, ddl.RoleNew, ddl.RoleLogin, ddl.Levelup, ddl.Online, ddl.Mission] if gameid is None: games = util.get_gameid_from_history(app.history) else: games = [gameid] for game in games: for model in models: try: model.drop(game) except Exception as e: log.error("DDL delete error: %s", e) with cd(app.work_dir): if gameid: run("find clean/* -name {0}_*.csv | xargs rm".format(gameid)) run("rm -rf history/{0}".format(gameid)) run("rm -rf history/all_role/{0}".format(gameid)) else: run("rm -rf clean/* history/*")
def clear_filter_openid(app, ds, gameid=None, **kwargs): """清理测试账号的充值数据""" models = [ddl.PayMent] if gameid is None: games = util.get_gameid_from_history(app.history) else: games = [gameid] game_openids = BIManage.filter_openid(app.history) ensure(kwargs, "clear games:{0};ds:{1}, Please yes/no:".format(games, ds)) ds_date = util.todate(ds) clear_time_f = util.timestamp(ds_date) clear_time_t = util.date_delta(ds_date)[1] log.info("clear filter openid, gameids: %s", games) # 清理数据库 for game in games: # 根据游戏id获取测试账号 filter_openids = game_openids.get(game) # gameid 的测试账号为空,不作处理 if not filter_openids: continue openids = ",".join(map(lambda x: "'%s'" % x, filter_openids)) # 手动拼接 sql for model in models: table = model.table_name(game) timestamp_column = model_timesamp(model) try: sql = "delete from {0} where {1} >= {2} and {1} < {3} and openid in ({4})" sql = sql.format(table, timestamp_column, clear_time_f, clear_time_t, openids) log.info(sql) model.execute(sql) except Exception as e: log.error("DDL delete error: %s", e) clear_dirs = [("consume", ddl.Consume), ("pay_orders", ddl.PayMent)] clean_dir = os.path.join(app.work_dir, "clean", ds) # 清理 csv with cd(clean_dir): for clear_dir, model in clear_dirs: csv_dir = os.path.join(clean_dir, clear_dir) for csv in os.listdir(csv_dir): # 指定单个 gameid if gameid: # csv 不是此 gameid 的 csv 文件,不作处理 if not csv.startswith(gameid): continue filter_openids = game_openids.get(gameid) # 所有的 csv 文件都需要进行处理 else: filter_openids = game_openids.values() # gameid 的测试账号为空,不作处理 if not filter_openids: continue path = os.path.join(csv_dir, csv) df = util.read_csv(path, names=model.FIELDS, dtype=model.Dtype) df2 = df[~df["openid"].isin(filter_openids)] log.info("process csv path: %s, pre: %s, now: %s", path, len(df), len(df2)) df2.to_csv(path, header=False, index=False, mode='w', encoding='utf8')
def clear_history(app, ds, gameid=None, **kwargs): """补数据时处理history,将大于ds日期的去除""" if gameid is None: games = util.get_gameid_from_history(app.history) else: games = [gameid] ensure(kwargs, "clear games:{0} date:{1},Please yes/no:".format(games, ds)) from bi.unit.login import LoginUnit from bi.unit.role import RoleUnit from bi.unit.payment import PaymentUnit historys = ( (LoginUnit.HISTORY_LOGIN_CSV, LoginUnit.HISTORY_LOGIN_FIELDS), (LoginUnit.HISTORY_MAC_CSV, LoginUnit.HISTORY_MAC_FIELDS), (RoleUnit.HISTORY_CSV, RoleUnit.HISTORY_FIELDS), (PaymentUnit.HISTORY_CSV, PaymentUnit.HISTORY_FIELDS) ) models = (ddl.AllUser, ddl.AllAdvice, ddl.AllPayUser) clear_time = util.timestamp(util.todate(ds)) log.info("clear_history, gameids: %s, end_ds: %s", games, ds) for game in games: # 处理csv for csv, fields in historys: path = os.path.join(app.history, game, csv) df = util.read_csv(path, names=fields) time_field = fields[-1] df2 = df[df[time_field]<clear_time] log.info("process csv path: %s, pre: %s, now: %s", path, len(df), len(df2)) df2.to_csv(path, header=False, index=False, mode='w', encoding='utf8') # 处理数据库 for model in models: table = model.table_name(game) timestamp_column = model_timesamp(model) try: sql = "delete from {0} where {1} >= {2}" sql = sql.format(table, timestamp_column, clear_time) log.info(sql) model.execute(sql) except Exception as e: log.error("DDL delete error: %s, sql: %s", e, sql)
def clear(app, gameid=None, **kwargs): """游戏清档 1. 删除入库表 2. 清空histor等目录 """ models = [ddl.AllUser, ddl.AllAdvice, ddl.AllPayUser, ddl.PayMent, ddl.Login, ddl.Consume, ddl.RoleNew, ddl.RoleLogin, ddl.Levelup, ddl.Online, ddl.Mission] if gameid is None: games = util.get_gameid_from_history(app.history) else: games = [gameid] ensure(kwargs, "clear games:{0},Please yes/no:".format(games)) log.info("clear, gameids: %s", games) for game in games: for model in models: try: model.drop(game) except Exception as e: log.error("DDL delete error: %s", e) with cd(app.work_dir): if gameid: run("find clean/* -name {0}_*.csv | xargs rm".format(gameid)) run("rm -rf history/{0}".format(gameid)) run("rm -rf history/all_role/{0}".format(gameid)) else: run("rm -rf clean/* history/*") # 试图清理消耗数据 xiaohao = os.path.join(os.path.dirname(app.work_dir), app.work_dir + "-xiaohao") if not os.path.exists(xiaohao): return log.info("clear consume, base dir:%s" % xiaohao) with cd(xiaohao): if gameid: run("find clean/ -name {0}_*.csv | xargs rm".format(gameid)) run("rm -rf history/{0}".format(gameid)) else: run("rm -rf clean/* history/*")
def fix(app, start, end, **kwargs): """ 假如今日13号 python manage.py fix 2015-12-11 2015-12-12(重跑这两天的csv) 只修复11-12号的日志 1、清空11-12的clean 2、删除11-12号的mysql日志 3、修复data数据, 11号的date里面包含了10号的数据,删除11号data中10号的数据 12号在13号的部分数据放到12号里面 13/0000.log -> 12/最大的日志.log 4、ex.py cron.py """ models = [ddl.AllRole, ddl.AllUser, ddl.AllAdvice, ddl.AllPayUser, ddl.PayMent, ddl.Login, ddl.Consume, ddl.RoleNew, ddl.RoleLogin, ddl.Levelup, ddl.Online, ddl.Mission] ds_models = [ddl.RealtimeIncomeNewer, ddl.RealtimeOnline, ddl.RealtimeRegister] models.extend(ds_models) games = util.get_gameid_from_history(app.history) ensure(kwargs, "clear games:{0} start:{1}, end:{2} Please yes/no:".format(games, start, end)) # 2删除数据库 timestamp_day = util.timestamp(util.todate(start)) tomorrow = util.date_delta(util.todate(end), 1) for game in games: for model in models: table = model.table_name(game) try: if model not in ds_models: timestamp_columns = model.TIMESTAMP or filter(lambda x: x.endswith("_time"), model.FIELDS)[0] sql = "delete from {0} where {1} >= {2} and {1} <{3}" sql = sql.format(table, timestamp_columns, timestamp_day, tomorrow[1]) else: sql = "delete from {0} where gameid = {1} and ds >= '{2}' and ds <='{3}'" sql = sql.format(table, game, start, end) log.info(sql) model.execute(sql) except Exception as e: log.error("DDL delete error: %s", e) with cd(app.work_dir): # 处理start day = util.todate(start) directory = os.path.join(app.data, str(day)) files = os.listdir(directory) fs = sorted(files) day_bak = os.path.join(app.data, "{0}_startbak".format(str(day))) if not os.path.exists(day_bak): os.makedirs(day_bak) else: run("rm -rf {0}/*".format(day_bak)) for f in fs: run("cat {0}/{1} | grep {2} > {3}/{1}".format(directory, f, day, day_bak)) run("mv {0} {1}/{2}_bu".format(directory, app.data, day)) run("mv {0} {1}/{2}".format(day_bak, app.data, day)) if start == end: log.info("process if start == end is return start: %s end : %s", start, end) return # 处理end endday = util.todate(end) endtomorrow = util.todate(end) + datetime.timedelta(days=1) enddirectory = os.path.join(app.data, str(endtomorrow)) daydirectory = os.path.join(app.data, str(endday)) files = os.listdir(enddirectory) fs = sorted(files) end_bak = os.path.join(app.data, "{0}_endbak".format(str(endtomorrow))) if not os.path.exists(end_bak): os.makedirs(end_bak) else: run("rm -rf {0}/*".format(end_bak)) run("cat {0}/* | grep {1} > {2}/{3}".format(enddirectory, end, end_bak, fs[-1])) run("cat {0}/{2} >> {1}/{2}".format(end_bak, daydirectory, fs[-1])) for i in range((util.todate(end) - util.todate(start)).days + 1): day = util.todate(start) + datetime.timedelta(days=i) run("rm -rf clean/{0}".format(str(day)))
def fix(app, start, end): """ 假如今日13号 python manage.py fix 2015-12-11 2015-12-12(重跑这两天的csv) 只修复11-12号的日志 1、清空11-12的clean 2、删除11-12号的mysql日志 3、修复data数据, 11号的date里面包含了10号的数据,删除11号data中10号的数据 12号在13号的部分数据放到12号里面 13/0000.log -> 12/最大的日志.log 4、ex.py cron.py """ log_init(app.realtive_path(app.log, "repeat.log")) config.init(app.config) models = [ddl.AllRole, ddl.AllUser, ddl.AllAdvice, ddl.AllPayUser, ddl.PayMent, ddl.Login, ddl.Consume, ddl.RoleNew, ddl.RoleLogin, ddl.Levelup, ddl.Online, ddl.Mission] games = util.get_gameid_from_history(app.history) # 2删除数据库 timestamp_day = util.timestamp(util.todate(start)) tomorrow = util.date_delta(util.todate(end), 1) for game in games: for model in models: table = model.table_name(game) try: timestamp_columns = model.TIMESTAMP or filter(lambda x: x.endswith("_time"), model.FIELDS)[0] sql = "delete from {0} where {1} >= {2} and {1} <{3}" sql = sql.format(table, timestamp_columns, timestamp_day, tomorrow[1]) model.execute(sql) except Exception as e: log.error("DDL delete error: %s", e) with cd(app.work_dir): # 处理start day = util.todate(start) directory = os.path.join(app.data, str(day)) files = os.listdir(directory) fs = sorted(files) day_bak = os.path.join(app.data, "{0}_startbak".format(str(day))) if not os.path.exists(day_bak): os.makedirs(day_bak) else: run("rm -rf {0}/*".format(day_bak)) for f in fs: run("cat {0}/{1} | grep {2} > {3}/{1}".format(directory, f, day, day_bak)) run("mv {0} {1}/{2}_bu".format(directory, app.data, day)) run("mv {0} {1}/{2}".format(day_bak, app.data, day)) # 处理end endday = util.todate(end) endtomorrow = util.todate(end) + datetime.timedelta(days=1) enddirectory = os.path.join(app.data, str(endtomorrow)) daydirectory = os.path.join(app.data, str(endday)) files = os.listdir(enddirectory) fs = sorted(files) end_bak = os.path.join(app.data, "{0}_endbak".format(str(endtomorrow))) if not os.path.exists(end_bak): os.makedirs(end_bak) else: run("rm -rf {0}/*".format(end_bak)) run("cat {0}/* | grep {1} > {2}/{3}".format(enddirectory, end, end_bak, fs[-1])) run("cat {0}/{2} >> {1}/{2}".format(end_bak, daydirectory, fs[-1])) for i in range((util.todate(end) - util.todate(start)).days + 1): day = util.todate(start) + datetime.timedelta(days=i) run("rm -rf clean/{0}".format(str(day)))