def read(self, table, features = None): conn = MysqlConnection(config.dbhost,config.dbuser,config.dbpassword,config.dbname) # features = ["login_times", "spin_times", "bonus_times", "active_days", "average_day_active_time", "average_login_interval", "average_spin_interval", "average_bonus_win", "average_bet", "bonus_ratio", "spin_per_active_day", "bonus_per_active_day"] # features = ["login_times", "spin_times", "bonus_times", "active_days", "average_day_active_time", "average_login_interval", "average_spin_interval", "average_bonus_win"] # sql = "select uid, level, coin, purchase_times, active_days, average_day_active_time, average_login_interval, average_spin_interval from slot_user_profile where purchase_times > 0" if features == None: fs = '*' else: fs = ",".join(features) sql = "select "+ fs +" from " + table + " where purchase_times > 0 and active_days > 1" result_pay = conn.query(sql) pay_num = len(result_pay) df_pay = pd.DataFrame(result_pay) df_pay["purchase"] = 1 sql = "select "+ fs + " from " + table + " where purchase_times = 0 and active_days > 1" result_no_pay = conn.query(sql) if self.proportion > 0: result_no_pay = random.sample(result_no_pay, self.proportion * pay_num) no_pay_num = len(result_no_pay) df_no_pay = pd.DataFrame(result_no_pay) df_no_pay["purchase"] = 0 df = pd.concat([df_pay, df_no_pay]) conn.close() # return np.array(x), np.array(y) return df
def cdf_of_lifetime(): conn = MysqlConnection(config.dbhost, config.dbuser, config.dbpassword, config.dbname, cursorclass=pymysql.cursors.Cursor) path = file_util.get_figure_path("slot", "tongji") sql = "select lifetime from slot_churn_profile where purchase_times > 0" result = conn.query(sql) result_pay = list(zip(result)) sql = "select lifetime from slot_churn_profile where purchase_times = 0" result = conn.query(sql) result_not_pay = list(zip(result)) dis_pay = other_util.cdf(result_pay) dis_not_pay = other_util.cdf(result_not_pay) plt.plot(dis_pay[0], dis_pay[1], label="pay") plt.plot(dis_not_pay[0], dis_not_pay[1], label="not_pay") plt.title("CDF of lifetime") plt.gca().set_xlabel("days") plt.gca().set_ylabel("cdf") plt.legend(loc="lower right") plt.savefig(os.path.join(path, "cdf_of_lifetime_pay_and_nopay")) plt.show()
def level7DayLeft(levels): """ 各个等级在不同日期的7日留存数 按天统计各个等级7日留存,并以日期为横坐标,留存为纵坐标,每个等级画出一张图 Arguments: levels {list} -- 要统计的等级,以列表形式给出 """ connection = MysqlConnection(config.dbhost, config.dbuser, config.dbpassward, config.dbname) sql = "select date,user_7day from log_level_left_s_wja_1 where level = %s" #sql = "select * from test" for level in levels: result = connection.query(sql, level) dates_num = [] number = [] result = list(zip(*result)) dates_num = result[0] number = result[1] dates = [str(x) for x in dates_num] plt.gca().xaxis.set_major_formatter( mdate.DateFormatter('%Y-%m-%d')) #设置时间标签显示格式 plt.gca().xaxis.set_major_locator(mdate.AutoDateLocator()) plt.xticks(pd.date_range(dates[0], dates[-1], freq='5d')) #时间间隔 plt.xticks(rotation=90) plt.plot(dates, number, 'r--') path = utils.figure_path("7DayLeft_level") plt.savefig(os.path.join(path, str(level))) plt.show() plt.cla() connection.close()
def dauAndDnu_Bar(): """ dau和dnu的直方图 """ connection = MysqlConnection(config.dbhost, config.dbuser, config.dbpassword, config.dbname) sql = "select date, login_count, register_count from log_return_s_wja_1_percent where channel_id = %s" result = connection.query(sql, -2) # result = list(zip(*result)) dates = [str(x['date']) for x in result] dau = [x['login_count'] for x in result] dnu = [x['register_count'] for x in result] fig = plt.gcf() fig.set_size_inches(18, 9) plt.gca().xaxis.set_major_formatter( mdate.DateFormatter('%Y-%m-%d')) #设置时间标签显示格式 plt.gca().xaxis.set_major_locator(mdate.AutoDateLocator()) plt.xticks(pd.date_range(dates[0], dates[-1], freq='5d')) #时间间隔 plt.xticks(rotation=90) old_user = [] for i in range(len(dau)): old_user.append(dau[i] - dnu[i]) plt.bar(dates, old_user, width=0.35, label="old_user") plt.bar(dates, dnu, bottom=old_user, width=0.35, label="dnu") plt.legend(loc='upper right') path = file_util.get_figure_path("dau and dnu test") plt.savefig(os.path.join(path, "dau and dnu bar"), dpi=100) #plt.show() connection.close()
def save_to_mysql(profiles): # 这里是保存数据到mysql conn = MysqlConnection(config.dbhost, config.dbuser, config.dbpassword, config.dbname) for uid, profile in profiles.items(): columns = "(uid" val_format = "(%s" values = [uid] for col, val in profile.items(): columns += (", " + col) val_format += (", " + "%s") values.append(val) columns += ")" val_format += ")" sql = "insert into slot_churn_profile " + columns + " values " + val_format conn.query(sql, values)
def date7DayLeft(dates): """ 每日的各等级7日留存统计 统计同一天的各个等级的7日留存数据,并以等级为横坐标,留存为纵坐标,每天画出一张图 Arguments: dates {list} -- 要统计的日期,以列表形式给出 """ connection = MysqlConnection(config.dbhost, config.dbuser, config.dbpassward, config.dbname) sql = "select * from log_level_left_s_wja_1 where date = %s" for date in dates: print("------------", date, "--------------") result = connection.query(sql, date) level = [] number = [] for record in result: print(record[1], record[2]) level.append(record[1]) number.append(record[2]) plt.plot(level, number, 'ro-') plt.show() connection.close()
def levelTotal(start, end): """ 画出每个等级的7日流失人数总和分布 获取每个等级的7日流失用户总数,并以等级为横坐标,人数为纵坐标画图 Arguments: start {int} -- 要统计的起始等级 end {int} -- 要统计的结束等级 """ connection = MysqlConnection(config.dbhost, config.dbuser, config.dbpassward, config.dbname) path = utils.figure_path("level_left") sql = "select * from log_level_left_total" result = connection.query(sql) result = list(zip(*result)) level = result[0][start:end] user_7day = result[1][start:end] plt.grid(True) plt.bar(level, user_7day) plt.gca().set_xlabel('level') plt.gca().set_ylabel('user_7day') plt.savefig( os.path.join(path, "level_left_total_" + str(start) + "_" + str(end))) plt.show() connection.close()
def output_to_mysql(self): conn = MysqlConnection(config.dbhost, config.dbuser, config.dbpassword, config.dbname) for uid, profile in self.profiles.items(): columns = "(uid" val_format = "(%s" values = [uid] for col, val in profile.items(): if col.startswith("machine"): continue columns += (", " + col) val_format += (", " + "%s") values.append(val) columns += ")" val_format += ")" sql = "insert into slot_purchase_profile_2018 " + columns + " values " + val_format conn.query(sql, values)
def get_uid_2_vector(self, argv=[]): # 取得原始数据 conn = MysqlConnection(config.dbhost, config.dbuser, config.dbpassword, config.dbname) sql = "select * from slot_user_profile_tmp where purchase_times = 0" result_no_pay = conn.query(sql) sql = "select * from slot_user_profile_tmp where purchase_times > 0" result_pay = conn.query(sql) # pay_weight = self.pay_weight # result_no_pay = random.sample(result_no_pay, pay_weight * len(result_pay)) len_no_pay = len(result_no_pay) len_pay = len(result_pay) # len_pay = len(result_pay * pay_weight) total_len = len_pay + len_no_pay # print("total len:%s" % total_len) if len(argv) == 0: argv = UserData.features # self.pay_flag = np.zeros() # self.pay_vectors = np.zeros(len_pay) self.pay_vectors = np.zeros(((len_pay), len(argv) + 1)) self.pay_vectors[:, 0] = 1 self.no_pay_vectors = np.zeros(((len_no_pay), len(argv) + 1)) self.no_pay_vectors[:, 0] = 0 # self.vectors = np.zeros( # ((total_len), len(argv) + 1)) # self.vectors[:len_no_pay, 0] = 0 # self.vectors[len_no_pay:total_len, 0] = 1 row = 0 for value in result_no_pay: i = 1 for v_name in argv: self.no_pay_vectors[row][i] = UserData.transSqlValue( value[v_name], v_name) i += 1 row += 1 row = 0 for value in result_pay: i = 1 for v_name in argv: self.pay_vectors[row][i] = UserData.transSqlValue( value[v_name], v_name) i += 1 row += 1
def dnuOfChannelID_Percent(channels=[-1]): """ dnu不同channel占总数的比例曲线图 Keyword Arguments: channels {list} -- 要统计的channel,以列表形式给出 (default: {[-1]}) """ connection = MysqlConnection(config.dbhost, config.dbuser, config.dbpassword, config.dbname) sql = "select date,register_count from log_return_s_wja_1_percent where channel_id = %s" result = connection.query(sql, -2) # result = list(zip(*result)) dates = [str(x['date']) for x in result] total_register = [x['register_count'] for x in result] # total_register = result[1] sql = "select date,register_count from log_return_s_wja_1_percent where date = %s and channel_id = %s" for channel in channels: path = file_util.get_figure_path() register_count = [] for i in range(len(dates)): result = connection.query(sql, [dates[i], channel]) if len(result): register_count.append(result[0]['register_count'] / total_register[i]) else: register_count.append(0) fig = plt.gcf() fig.set_size_inches(18, 9) plt.gca().xaxis.set_major_formatter( mdate.DateFormatter('%Y-%m-%d')) #设置时间标签显示格式 plt.gca().xaxis.set_major_locator(mdate.AutoDateLocator()) plt.xticks(pd.date_range(dates[0], dates[-1], freq='5d')) #时间间隔 plt.xticks(rotation=90) plt.plot(dates, register_count, label=channel) plt.legend(loc='upper right') #plt.savefig(os.path.join(path,"dnu_percent_" + str(channel)),dpi=100) #plt.show() plt.cla() connection.close()
def sim(): connection = MysqlConnection(config.dbhost, config.dbname, config.dbuser, config.dbpassword) sql = "select * from user_item" result = connection.query(sql) R_u = [x[1:] for x in result] R_u_centralized = list(map(centralized, R_u)) #items = np.array(list(zip(*R_u_centralized))) items = np.array(list(zip(*R_u))) for i in range(len(items)): for j in range(i + 1, len(items)): sim_ij = sim_cosin(items[i], items[j]) print(sim_ij) connection.close()
def dayReturn(days=list(range(2, 31))): """ n-day留存情况 获取不同日期下的同一个n-day 留存(2<=n<=30),并以日期为横坐标,留存率为纵坐标,每一个n作出一张图 Keyword Arguments: days {list} -- 要统计的n日留存,以列表方式给出 (default: {list(range(2,31))}) """ connection = MysqlConnection(config.dbhost, config.dbuser, config.dbpassword, config.dbname) for day in days: sql = "select date, " + str( day) + "day from log_return_s_wja_1_percent where channel_id = %s" result = connection.query(sql, -2) # result = list(zip(*result)) # number = result[1] # dates_num = result[0] return_percent = [x[str(day) + 'day'] for x in result] dates = [str(x['date']) for x in result] #dates = [str(x) for x in dates_num] fig = plt.gcf() fig.set_size_inches(18, 9) plt.gca().xaxis.set_major_formatter( mdate.DateFormatter('%Y-%m-%d')) #设置时间标签显示格式 plt.gca().xaxis.set_major_locator(mdate.AutoDateLocator()) plt.xticks(pd.date_range(dates[0], dates[-1], freq='5d')) #时间间隔 plt.xticks(rotation=90) plt.plot(dates, return_percent, 'r-o', label=str(day) + "day return") plt.gca().set_xlabel('date') plt.gca().set_ylabel('return percent(%)') plt.legend(loc="upper right") plt.grid(True) path = file_util.get_figure_path("return_day_test") plt.savefig(os.path.join(path, str(day) + "day")) #plt.show() plt.cla() fig.set_size_inches(10, 5) plt.hist(return_percent) path = file_util.get_figure_path("return_day_test", "hist") plt.savefig(os.path.join(path, str(day) + "day")) #plt.show() connection.close()
def pdf_of_lifetime(): conn = MysqlConnection(config.dbhost, config.dbuser, config.dbpassword, config.dbname, cursorclass=pymysql.cursors.Cursor) sql = "select lifetime from slot_churn_profile" result = np.array(list(zip(*conn.query(sql)))) total = len(result[0]) distribution = dict(zip(*np.unique(result, return_counts=True))) x = list(distribution.keys()) y = [a / total for a in list(distribution.values())] plt.title("PDF of lifetime") plt.bar(x[1:], y[1:], width=1, color="firebrick") plt.gca().set_xlabel("days") plt.gca().set_ylabel("count") plt.show()
def relativeLevelLeft(start, end): """ 对应顶级范围的相对流失曲线 该等级的流失人数除以前五个等级流失人数的平均数 Arguments: start {int} -- 起始等级 end {int} -- 结束等级 """ connection = MysqlConnection(config.dbhost, config.dbuser, config.dbpassward, config.dbname) path = utils.figure_path("level_left") sql = "select * from log_level_left_total" result = connection.query(sql) result = list(zip(*result)) level = result[0] user_7day = result[1] left_total = {} for i in range(len(level)): left_total[level[i]] = user_7day[i] relative_left = [] for i in range(start, end + 1): pre = 0 n = 0 for j in range(i - 5, i): if j in left_total: n += 1 pre = pre + left_total[j] if pre != 0: relative_left.append(left_total[i] / (pre / n)) else: relative_left.append(0) x = list(range(start, end + 1)) plt.gca().set_xlabel('level') plt.gca().set_ylabel('relative left rate') plt.plot(x, relative_left) plt.grid(True) plt.savefig( os.path.join(path, "relative_level_left" + str(start) + "_" + str(end))) plt.show() connection.close()
def read_positive(self, table, features): conn = MysqlConnection(config.dbhost,config.dbuser,config.dbpassword,config.dbname) # features = ["login_times", "spin_times", "bonus_times", "active_days", "average_day_active_time", "average_login_interval", "average_spin_interval", "average_bonus_win", "average_bet", "bonus_ratio", "spin_per_active_day", "bonus_per_active_day"] # features = ["login_times", "spin_times", "bonus_times", "active_days", "average_day_active_time", "average_login_interval", "average_spin_interval", "average_bonus_win"] x = [] y = [] # sql = "select uid, level, coin, purchase_times, active_days, average_day_active_time, average_login_interval, average_spin_interval from slot_user_profile where purchase_times > 0" sql = "select * from " + table + " where purchase_times > 0 and active_days > 1" result_pay = conn.query(sql) pay_num = len(result_pay) for record in result_pay: d = [] for feature in features: d.append(record[feature]) x.append(d) y.append(1) conn.close() return np.array(x), np.array(y)
def dnuOfChannelId(channels=[-2]): """ 不同channel_id的dnu人数柱状图 Keyword Arguments: channels {list} -- 要统计的channel_id 列表,-2为全部 (default: {[-2]}) """ connection = MysqlConnection(config.dbhost, config.dbuser, config.dbpassword, config.dbname) sql = "select date, register_count from log_return_s_wja_1_percent where channel_id = %s" pre = channels[0] for channel in channels: result = connection.query(sql, channel) # result = list(zip(*result)) dates = [str(x['date']) for x in result] register_count = [x['register_count'] for x in result] fig = plt.gcf() fig.set_size_inches(18, 9) plt.gca().xaxis.set_major_formatter( mdate.DateFormatter('%Y-%m-%d')) #设置时间标签显示格式 plt.gca().xaxis.set_major_locator(mdate.AutoDateLocator()) plt.xticks(pd.date_range(dates[0], dates[-1], freq='5d')) #时间间隔 plt.xticks(rotation=90) if channel == pre: plt.bar(dates, register_count, width=0.35, label=channel) else: plt.bar(dates, register_count, bottom=pre, width=0.35, label=channel) pre = channel plt.legend(loc='upper right') path = file_util.get_figure_path("dau and dnu") #plt.savefig(os.path.join(path,"dnu of differente channel")) plt.show() connection.close()
def dauAndDnu(): """ 绘制每日活跃用户、新增用户以及两者差值(老用户)的曲线图 """ connection = MysqlConnection(config.dbhost, config.dbuser, config.dbpassword, config.dbname) sql = "select date, login_count, register_count from log_return_s_wja_1_percent where channel_id = %s" result = connection.query(sql, -2) #result = list(zip(*result)) dates = [str(x['date']) for x in result] #dates = [str(x) for x in result[0]] plt.gca().xaxis.set_major_formatter( mdate.DateFormatter('%Y-%m-%d')) #设置时间标签显示格式 plt.gca().xaxis.set_major_locator(mdate.AutoDateLocator()) plt.xticks(pd.date_range(dates[0], dates[-1], freq='5d')) #时间间隔 plt.xticks(rotation=90) # dau = result[1] # dnu = result[2] dau = [x['login_count'] for x in result] dnu = [x['register_count'] for x in result] old_user = [] for i in range(len(dau)): old_user.append(dau[i] - dnu[i]) plt.plot(dates, dau, 'r-o', label="dau") plt.plot(dates, dnu, 'b-o', label="dnu") plt.plot(dates, old_user, 'g-o', label="old_user") fig = plt.gcf() fig.set_size_inches(19, 9) plt.gca().set_xlabel('date') plt.gca().set_ylabel('user') plt.legend(loc='upper right') path = file_util.get_figure_path("dau and dnu test") #plt.savefig(os.path.join(path,"dau and dnu"),dpi=100) plt.show() connection.close()
#plt.savefig(os.path.join(path,"dnu_percent_" + str(channel)),dpi=100) #plt.show() plt.cla() connection.close() if __name__ == '__main__': game_id = sys.argv[1] connection = MysqlConnection(config.dbhost, config.dbuser, config.dbpassword, config.dbname) result_path = file_util.get_result_path("return", game_id) for file in os.listdir(result_path): full_file = os.path.join(result_path, file) if os.path.isfile(full_file): file_name_split = file.split('_') channel = file_name_split[1] locale = file_name_split[-1] with open(full_file, 'r') as f: popt = f.read().split() result_table = db_util.get_result_table("return", game_id) sql = "delete from " + result_table + " where channel = %s and locale = %s" connection.query(sql, (channel, locale)) sql = "insert into " + result_table + "(channel, locale, a, b, c) values (%s, %s, %s, %s, %s)" connection.query(sql, (channel, locale, popt[0], popt[1], popt[2])) connection.close()
profile_file = os.path.join(os.path.dirname(__file__), "data", "user_profiles_tmp") if not os.path.exists(profile_file): parser.parse_log() profiles = parser.profiles with open(profile_file, 'wb') as f: pickle.dump(profiles, f) else: with open(profile_file, 'rb') as f: profiles = pickle.load(f) return profiles if __name__ == "__main__": profiles = get_profile() conn = MysqlConnection(config.dbhost, config.dbuser, config.dbpassword, config.dbname) for uid, profile in profiles.items(): columns = "(uid" val_format = "(%s" values = [uid] for col, val in profile.items(): columns += (", " + col) val_format += (", " + "%s") values.append(val) columns += ")" val_format += ")" sql = "insert into slot_user_profile_tmp " + columns + " values " + val_format conn.query(sql, values)
import sys import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import config from utils import * from MysqlConnection import MysqlConnection import pickle game_id = sys.argv[1] connection = MysqlConnection(config.dbhost, config.dbuser, config.dbpassword, config.dbname) result_path = file_util.get_result_path("item_used", game_id) with open(os.path.join(result_path, "result"), 'rb') as f: item_item_relation = pickle.load(f) item_item_table = db_util.get_item_item_table(game_id) for k, v in item_item_relation.items(): for vk, vv in v.items(): sql = "update " + item_item_table + " set item_" + vk + " = %s where item_id = %s" connection.query(sql, (float(vv), k))
def read_log(date_start, date_end, game_id, data_version = -2, server_id = -1): files = file_util.get_log_files(date_start, date_end, "stage", game_id, server_id = -1) level_info = {} # {data_version:{'user_count':, 'time_avg':, 'users':[]}} # 只有first pass的时间的平均值 # for file in files: # with open(file, 'r') as f: # for line in f.readlines(): # line = line.split() # _data_version = int(line[StageFormat.DATA_VERSION.value]) # if data_version != -2 and _data_version != data_version: # continue # stage_id = int(line[StageFormat.STAGE_ID.value]) # if not stage_id in level_info: # level_info[stage_id] = {'user_count':0, 'time_avg':0, 'users':[], 'user_passed_count':0} # uid = int(line[StageFormat.UID.value]) # first_pass = int(line[StageFormat.FIRST_PASS.value]) # if not uid in level_info[stage_id]['users']: # level_info[stage_id]['users'].append(uid) # level_info[stage_id]['user_count'] += 1 # if first_pass: # count = level_info[stage_id]['user_passed_count'] # level_info[stage_id]['user_passed_count'] += 1 # level_info[stage_id]['time_avg'] = (level_info[stage_id]['time_avg'] * count + int(line[StageFormat.PLAY_TIME.value])) / (count + 1) # first play到first pass之间的所有时间的总和 play_time = {} for file in files: with open(file, 'r') as f: for line in f.readlines(): line = line.split() stage_id = int(line[StageFormat.STAGE_ID.value]) _data_version = int(line[StageFormat.DATA_VERSION.value]) if data_version != -2 and _data_version != data_version: continue if not stage_id in level_info: level_info[stage_id] = {'user_count':0, 'time_avg':0, 'users':[]} play_time[stage_id] = {"all":[]} uid = int(line[StageFormat.UID.value]) first_play = int(line[StageFormat.FIRST_PLAY.value]) first_pass = int(line[StageFormat.FIRST_PASS.value]) if not uid in level_info[stage_id]['users']: level_info[stage_id]['users'].append(uid) level_info[stage_id]['user_count'] += 1 if first_play or uid in play_time[stage_id]: if first_play: play_time[stage_id][uid] = [] play_time[stage_id][uid].append(int(line[StageFormat.PLAY_TIME.value])) if first_pass: if uid in play_time[stage_id]: play_time[stage_id]['all'].append(np.sum(play_time[stage_id][uid])) play_time[stage_id].pop(uid) for stage_id in level_info.keys(): level_info[stage_id]['time_avg'] = np.average(play_time[stage_id]['all']) if play_time[stage_id]['all'] != [] else 0 connection = MysqlConnection(config.dbhost,config.dbuser,config.dbpassword,config.dbname) level_left_table = "log_level_left_s_100712_1" sql = "select level, sum(user_7day) as sum from " + level_left_table + " where date >= %s and date <= %s group by level" result = connection.query(sql, (date_start, date_end)) level_left = {} for x in result: level_left[x['level']] = int(x['sum']) for level in level_info.keys(): # sql = "select sum(user_7day) as sum from " + level_left_table + " where level = %s and date > %s and date < %s" # result = connection.query(sql, (int(level), date_start, date_end)) level_info[level]["level_left"] = level_left[level]/level_info[level]['user_count'] * 100 if level in level_left else 0 return level_info
def calculateTotalReturn(date_start, date_end, game_id): """ 汇总每天总的留存数据,并以百分比的形式保存在新表中 Arguments: date_start {int} -- 要统计的开始日期 date_end {int} -- 要统计的结束日期 """ # raw_connection = MysqlConnection(config.dbhost,config.dbuser,config.dbpassword,config.raw_dbname) # total_connection = MysqlConnection(config.dbhost,config.dbuser,config.dbpassword,config.dbname) # dates = utils.get_date_list(date_start,date_end) # for date in dates: # sql = "select * from log_return_s_wja_1 where date = %s" # result = raw_connection.query(sql,date) # temp = utils.union_dict(*result) # # temp = list(zip(*result)) # temp['date'] = date # temp['channel_id'] = -2 # temp['locale'] = -2 # # temp = list(map(sum,temp)) # values = [date,temp['login_count'],temp['register_count'],temp['locale'],temp['channel_id']] # if temp['register_count'] != 0: # for i in range(2,31): # temp[str(i) +'day'] = temp[str(i) +'day'] / temp['register_count'] * 100 # values.append(temp[str(i) + 'day']) # values.append(temp['login_count_pay']) # sql = "delete from log_return_s_wja_1_percent where date = %s" # total_connection.query(sql,date) # sql = "insert into log_return_s_wja_1_percent (date, login_count,register_count,locale, channel_id" # for i in range(2,31): # sql += ", " + str(i) + "day" # sql += ", login_count_pay) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" # total_connection.query(sql,values) # raw_connection.close() # total_connection.close() conn = MysqlConnection(config.dbhost, config.dbuser, config.dbpassword, config.dbname) days = "" for i in range(2, 31): days += "sum(" + str(i) + "day)," days = days[:-1] #总留存 sql = "select date,sum(login_count),sum(register_count)," + days + " from log_return_s_wja_1 where date >= %s and date <= %s group by date " result = conn.query(sql, [date_start, date_end]) for i in range(len(result)): if result[i]['sum(register_count)'] != 0: for j in range(2, 31): result[i]["sum(" + str(j) + "day)"] = result[i]["sum(" + str( j) + "day)"] / result[i]['sum(register_count)'] * 100 values = list(result[i].values()) # values.insert(3,-2) # values.insert(4,-2) temp_log_path = file_util.get_log_tmp_path("return", str(game_id), str(result[i]['date'])) with open(os.path.join(temp_log_path, "channel_-2_locale_-2"), 'w') as f: for x in values[-29:]: f.write(str(x) + " ") f.write("\n") # 分渠道留存 sql = "select date,sum(login_count),sum(register_count),channel_id, " + days + " from log_return_s_wja_1 where date >= %s and date <= %s group by date, channel_id" result = conn.query(sql, [date_start, date_end]) for i in range(len(result)): if result[i]['sum(register_count)'] != 0: for j in range(2, 31): result[i]["sum(" + str(j) + "day)"] = result[i]["sum(" + str( j) + "day)"] / result[i]['sum(register_count)'] * 100 values = list(result[i].values()) # values.insert(3,-2) temp_log_path = file_util.get_log_tmp_path("return", str(game_id), str(result[i]['date'])) with open( os.path.join( temp_log_path, "channel_" + str(result[i]['channel_id']) + "_locale_-2"), 'w') as f: for x in values[-29:]: f.write(str(x) + " ") f.write("\n") # 分locale留存 sql = "select date,sum(login_count),sum(register_count),locale, " + days + " from log_return_s_wja_1 where date >= %s and date <= %s group by date, locale" result = conn.query(sql, [date_start, date_end]) for i in range(len(result)): if result[i]['sum(register_count)'] != 0: for j in range(2, 31): result[i]["sum(" + str(j) + "day)"] = result[i]["sum(" + str( j) + "day)"] / result[i]['sum(register_count)'] * 100 values = list(result[i].values()) # values.insert(4,-2) temp_log_path = file_util.get_log_tmp_path("return", str(game_id), str(result[i]['date'])) with open( os.path.join(temp_log_path, "channel_-2_locale_" + str(result[i]['locale'])), 'w') as f: for x in values[-29:]: f.write(str(x) + " ") f.write("\n")