def wrap(result_dict): # 先处理从Excel读取的记录,然后导入数据库 insert_list = [] for app_name, value_list in result_dict.items(): # 单位千 mau = value_list[0] * 1000 dau = value_list[1] * 1000 # 单位次,分钟 per_capita_daily_startup_counts, per_capita_daily_use_time, app_name_en, target_date, sector = value_list[2:] per_startup_use_time = per_capita_daily_use_time / per_capita_daily_startup_counts total_startup_counts = dau * per_capita_daily_startup_counts * days_num total_use_time = (per_startup_use_time * total_startup_counts) / 60 insert_list.append([ app_name, app_name_en, sector, target_date, dau, per_capita_daily_startup_counts, per_startup_use_time, mau, per_capita_daily_use_time, total_startup_counts, total_use_time, 'TrustData', str(TODAY), '30', 'Mobile', 'APP', 'iOS + Android']) sql = ( 'INSERT INTO app(app_name, app_name_en, sector, starting_date, daily_active_user, per_capita_daily_startup_counts, ' ' per_startup_use_time, active_users, per_capita_daily_use_time, total_startup_counts , total_use_time,' ' source, createddate, date_type, app_type, data_type, os) ' ' VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)') upsert_batch(sql, insert_list)
def main(filename): # 读取文件 file_dict = {} with open(filename, 'r', encoding='utf8') as a_file: for line in a_file: fields = line.strip().split('\t') app_id, sector, sub_sector, starting_date = fields[0], fields[4], fields[5], fields[7] if app_id in file_dict: if file_dict[app_id][0] < starting_date: file_dict[app_id] = [starting_date, sector, sub_sector] else: file_dict[app_id] = [starting_date, sector, sub_sector] # 查询数据库 sql = ( 'SELECT app_id, app_name, COUNT(DISTINCT sector) AS num ' ' FROM app WHERE source="QuestMobile" GROUP BY app_id') data = query(sql) update_list = [] for row in data: app_id = row['app_id'] if row['num'] > 1 and app_id in file_dict: update_list.append([file_dict[app_id][1], file_dict[app_id][2], app_id]) update_sql = ('UPDATE app SET sector=%s, sub_sector=%s WHERE app_id=%s') print(len(update_list)) upsert_batch(update_sql, update_list)
def file_to_db(path, pattern=None): # 查询已经存在的司机 driver_data = query(DRIVER_SELECT_SQL) driver_set = set(map(lambda row: row["driver_id"], driver_data)) # 查询本月已经存在的司机 driver_month_data = query(DRIVER_MONTH_SELECT_SQL % (CURRENT_MONTH)) driver_month_set = set(map(lambda row: row["driver_id"], driver_month_data)) for file_name in list_files(path, pattern): print(file_name) with open(file_name, "r", encoding="utf8") as a_file: detail_dict, driver_dict, retention_dict = {}, {}, {} for line in a_file: fields = line.strip().split("\t") length = len(fields) if 9 < len(fields) < 13: # 读取字段值,格式转换 driver_id, driver_name, license, photo_url, level, longitude, latitude, \ order_num_str = fields[:8] capture_dtm, city = fields[-2:] order_num = int(order_num_str) flag_dt = capture_dtm.split(" ")[0] city = city[:2] + re.sub(CITY_RE, "", city[2:]) # 确定新司机 if driver_id not in driver_set: driver_dict[driver_id] = [driver_id, driver_name, license, photo_url, level, flag_dt] driver_set.add(driver_id) # 确定本月出现司机 # if driver_id not in retention_dict: # retention_dict[driver_id] = [driver_id, city, CURRENT_MONTH] if driver_id not in driver_month_set: retention_dict[driver_id] = [driver_id, city, CURRENT_MONTH] driver_month_set.add(driver_id) # 详细 if driver_id in detail_dict: detail_dict[driver_id][2] = min(detail_dict[driver_id][2], order_num) detail_dict[driver_id][3] = max(detail_dict[driver_id][3], order_num) detail_dict[driver_id][4] = min(detail_dict[driver_id][4], capture_dtm) detail_dict[driver_id][5] = max(detail_dict[driver_id][5], capture_dtm) else: detail_dict[driver_id] = [driver_id, city, order_num, order_num, capture_dtm, capture_dtm, flag_dt] upsert_batch(NEW_DRIVER_SQL, list(driver_dict.values())) print("导入: " + str(len(driver_dict)) + " 个新司机") upsert_batch(NEW_RETENTION_SQL, list(retention_dict.values())) print("导入: " + str(len(retention_dict)) + " 个本月出现司机") upsert_batch(NEW_DETAIL_SQL, list(detail_dict.values())) print("导入: " + str(len(detail_dict)) + " details信息")
def update_history(filename): # 先读取文件 file_dict = {} min_starting_date = None with open(filename, "r", encoding="utf8") as a_file: date_set = set() for line in a_file: fields = line.strip().split("\t") app_id, os, starting_date = fields[0], OS_MAPPING[fields[6]], fields[7] date_set.add(starting_date) key = app_id + "|-|" + os + "|-|" + starting_date # 单位转换 # os fields[6] = OS_MAPPING[fields[6]] # down_user fields[8] = float(fields[8]) * 10 # active_users fields[10] = float(fields[10]) * 10 # install_user fields[12] = float(fields[12]) * 10 # remove_user fields[14] = float(fields[14]) * 10 # daily_use_time fields[16] = float(fields[16]) / 6 # daily_startup_counts fields[17] = float(fields[17]) * 10 # daily_active_user fields[18] = float(fields[18]) * 10 # total_use_time fields[20] = float(fields[20]) / 6 # total_startup_counts fields[24] = float(fields[24]) * 10 file_dict[key] = fields min_starting_date = min(date_set) # 读取数据库,找出已存在的应用 sql = ( "SELECT DISTINCT app_id, os, starting_date, active_users, app_name FROM app WHERE source='QuestMobile'" " AND starting_date >= '%s'") % (min_starting_date) data = query(sql) db_dict = {} for row in data: app_id, os, starting_date, active_users, app_name = row['app_id'], row['os'], \ row['starting_date'], row['active_users'], row['app_name'] key = app_id + "|-|" + os + "|-|" + str(starting_date) db_dict[key] = [active_users, app_name] insert_list, update_list = [], [] for key in file_dict.keys(): if key in db_dict: db_mau = db_dict[key][0] file_mau = file_dict[key][10] diff = file_mau - db_mau # 找出有变化的行 if diff >= 1: # print(key + '\t' + db_dict[key][1] + '\t' + str(db_dict[key][0]) + '\t' + str(file_dict[key][10])) temp_list = list(file_dict[key]) del temp_list[0], temp_list[5], temp_list[5] temp_list.extend(key.split("|-|")) update_list.append(temp_list) else: file_dict[key].extend(["QuestMobile", "Mobile", "APP", str(TODAY)]) insert_list.append(file_dict[key]) # 插入/更新数据库 insert_sql = ( "INSERT INTO app(app_id, app_name, company, app_name_en, sector," " sub_sector, os, starting_date, down_user, down_user_permeability, active_users, " " active_users_penetration, install_user, install_user_permeability, remove_user, " " remove_user_permeability, daily_use_time, daily_startup_counts, daily_active_user," " daily_active_user_penetration, total_use_time, total_use_time_share, " " per_capita_use_time, per_startup_use_time, total_startup_counts, " " total_startup_counts_share, per_capita_startup_counts, source, app_type, data_type, createddate)" " VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s," " %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)") update_sql = ( "UPDATE app SET" " app_name=%s, company=%s, app_name_en=%s, sector=%s, sub_sector=%s," " down_user=%s, down_user_permeability=%s, active_users=%s," " active_users_penetration=%s, install_user=%s, install_user_permeability=%s," " remove_user=%s, remove_user_permeability=%s, daily_use_time=%s, " " daily_startup_counts=%s, daily_active_user=%s, daily_active_user_penetration=%s," " total_use_time=%s, total_use_time_share=%s, per_capita_use_time=%s," " per_startup_use_time=%s, total_startup_counts=%s, total_startup_counts_share=%s," " per_capita_startup_counts=%s " " WHERE app_id=%s AND os=%s AND source='QuestMobile' AND app_type='Mobile' AND starting_date=%s") print("插入数据条数: " + str(len(insert_list))) print("更新数据条数: " + str(len(update_list))) upsert_batch(insert_sql, insert_list) upsert_batch(update_sql, update_list)
def update_history(filename): # 先读取文件 file_dict = {} min_starting_date = None with open(filename, "r", encoding="utf8") as a_file: date_set = set() for line in a_file: fields = line.strip().split("\t") app_id, os, starting_date = fields[0], fields[6], fields[7] date_set.add(starting_date) key = app_id + "|-|" + OS_MAPPING[os] + "|-|" + starting_date # 单位转换 fields[6] = OS_MAPPING[fields[6]] # down_user fields[8] = float(fields[8]) * 10 # active_users fields[10] = float(fields[10]) * 10 # install_user fields[12] = float(fields[12]) * 10 # remove_user fields[14] = float(fields[14]) * 10 # daily_use_time fields[16] = float(fields[16]) / 6 # daily_startup_counts fields[17] = float(fields[17]) * 10 # daily_active_user fields[18] = float(fields[18]) * 10 # total_use_time fields[20] = float(fields[20]) / 6 # total_startup_counts fields[24] = float(fields[24]) * 10 file_dict[key] = fields min_starting_date = min(date_set) # 读取数据库,找出已存在的应用 sql = ( "SELECT app_id, os, starting_date, active_users FROM app WHERE source='QuestMobile'" " AND starting_date >= '%s' GROUP BY app_id, os, starting_date") % (min_starting_date) data = query(sql) # key_set = set([ # row["app_id"] + "|-|" + row["os"] + "|-|" + str(row["starting_date"]) for row in data]) db_dict = {row["app_id"] + "|-|" + row["os"] + "|-|" + str(row["starting_date"]): row['active_users'] for row in data} insert_list, update_list = [], [] for key in file_dict.keys(): if key in db_dict.keys(): if math.fabs(file_dict[key][10] - db_dict[key]) >= 1: temp_list = list(file_dict[key]) # 注意删除之后,后续的会排上来 del temp_list[0], temp_list[5], temp_list[5] temp_list.extend(key.split("|-|")) update_list.append(temp_list) else: file_dict[key].extend(["QuestMobile", "Mobile", "APP", str(TODAY)]) insert_list.append(file_dict[key]) # 插入/更新数据库 insert_sql = ( "INSERT INTO app(app_id, app_name, company, app_name_en, sector," " sub_sector, os, starting_date, down_user, down_user_permeability, active_users, " " active_users_penetration, install_user, install_user_permeability, remove_user, " " remove_user_permeability, daily_use_time, daily_startup_counts, daily_active_user," " daily_active_user_penetration, total_use_time, total_use_time_share, " " per_capita_use_time, per_startup_use_time, total_startup_counts, " " total_startup_counts_share, per_capita_startup_counts, source, app_type, data_type, createddate)" " VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s," " %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)") update_sql = ( "UPDATE app SET" " app_name=%s, company=%s, app_name_en=%s, sector=%s, sub_sector=%s," " down_user=%s, down_user_permeability=%s, active_users=%s," " active_users_penetration=%s, install_user=%s, install_user_permeability=%s," " remove_user=%s, remove_user_permeability=%s, daily_use_time=%s, " " daily_startup_counts=%s, daily_active_user=%s, daily_active_user_penetration=%s," " total_use_time=%s, total_use_time_share=%s, per_capita_use_time=%s," " per_startup_use_time=%s, total_startup_counts=%s, total_startup_counts_share=%s," " per_capita_startup_counts=%s " " WHERE app_id=%s AND os=%s AND source='QuestMobile' AND app_type='Mobile' AND starting_date=%s") print("插入数据条数: " + str(len(insert_list))) print("更新数据条数: " + str(len(update_list))) upsert_batch(insert_sql, insert_list) upsert_batch(update_sql, update_list)