def get_new_data(createddate, city, qtype): old_sql = ("SELECT %s, one_level FROM vdian_final " " WHERE createddate < '%s' AND city = '%s' GROUP BY %s") % (qtype, createddate, city, qtype) print(old_sql) old_data = query(old_sql) old_dict = OrderedDict([(key, set()) for key in CATEGORY_LIST]) for row in old_data: qtype_real, one_level = row[qtype], row["one_level"] old_dict["Total"].add(qtype_real) old_dict[one_level].add(qtype_real) new_sql = ("SELECT %s, one_level FROM vdian_final " " WHERE createddate = '%s' AND city = '%s' GROUP BY %s") % (qtype, createddate, city, qtype) new_data = query(new_sql) new_dict = OrderedDict([(key, set()) for key in CATEGORY_LIST]) for row in new_data: qtype_real, one_level = row[qtype], row["one_level"] new_dict["Total"].add(qtype_real) new_dict[one_level].add(qtype_real) # 打印结果 for key in old_dict.keys(): # New Store # print(len(new_dict[key] - old_dict[key])) # Closed Store # print(len(old_dict[key] - new_dict[key])) # New SKU print(len(new_dict[key] - old_dict[key]))
def unify_entrytime(entrytime): ''' Some entrytime are "2014-05-01", "0000-00-00" or "1970-01-01". Set all these entrytime to "2014-08-01" ''' sql = ( "SELECT DISTINCT uid FROM 58_ayi WHERE entrytime< '%s'" " ORDER BY entrytime LIMIT 5") % (entrytime) data = query(sql) update_sql = "UPDATE 58_ayi SET entrytime = %s WHERE uid = %s" update_list = [] for row in data: uid = row["uid"] sql2 = ("SELECT entrytime FROM 58_ayi WHERE uid= %s ORDER BY entrytime") % (uid,) data2 = query(sql2) for row2 in data2: entrytime = row2["entrytime"] # 更新为该uid不为空且大于“2014-08-01”的entrytime if entrytime and str(entrytime) >= "2014-08-01": update_list.append([str(entrytime), uid]) # update_entrytime(uid, str(entrytime)) break print("Update list Size: " + str(len(update_list))) update_batch(update_sql, update_list)
def get_new_staff(): """Get new staff of Meijia/Meijie/Xiufu""" sql = ( 'SELECT DISTINCT uid, city FROM 58_others WHERE capturedate >= "%s"' ' AND capturedate < "%s" AND TYPE IN ("Meijie", "Meijia", "Xiufu")' ) data = query(sql % ("2000-01-01", current_month)) old_list = [set() for x in range(0, 4)] for row in data: uid, city = row["uid"], row["city"] old_list[0].add(uid) if city in FIRST_TIER_CITIES_PY: old_list[1].add(uid) elif city in SECOND_TIER_CITIES_PY: old_list[2].add(uid) else: old_list[3].add(uid) data = query(sql % (current_month, next_month)) new_list = [set() for x in range(0, 4)] for row in data: uid, city = row["uid"], row["city"] if uid not in old_list[0]: new_list[0].add(uid) if city in FIRST_TIER_CITIES_PY: new_list[1].add(uid) elif city in SECOND_TIER_CITIES_PY: new_list[2].add(uid) else: new_list[3].add(uid) # 打印结果 print("*****************Meijia/Meijie/Xiufu 新员工*******************") print("Total: %s\n一线城市: %s\n二线城市: %s\n三线城市: %s" % tuple(map(lambda x: len(x), new_list)))
def retention_handler(start, end): start_next = start + relativedelta(months=1) end_next = end + relativedelta(months=1) start_data = query(SQL.format("entrytime", str(start), str(start_next))) start_list = [set() for x in range(0, CITY_LEN)] for row in start_data: uid, city = row["uid"], row["city"] index = CITY_LIST.index(city) start_list[0].add(uid) start_list[index].add(uid) # 下个月出现的阿姨 end_data = query(SQL.format("capturedate", str(end), str(end_next))) end_list = [set() for x in range(0, CITY_LEN)] for row in end_data: uid, city = row["uid"], row["city"] index = CITY_LIST.index(city) if uid in start_list[0]: end_list[0].add(uid) if uid in start_list[index]: end_list[index].add(uid) # 打印结果 for x in range(0, CITY_LEN): # 计算阿姨留存率 if len(start_list[x]): print(float(len(end_list[x])) / float(len(start_list[x]))) else: print(0.0)
def _handler(start): start_next = start + relativedelta(months=1) sql = ( 'SELECT DISTINCT uid, city FROM 58_others WHERE capturedate >= "%s"' ' AND capturedate < "%s" AND TYPE IN ("Meijie", "Meijia", "Xiufu")' ) start_data = query(sql % (start, start_next)) start_set = set(map(lambda row: row["uid"], start_data)) end_data = query(sql % (current_month, next_month)) end_set = set([row["uid"] for row in end_data if row["uid"] in start_set]) print(len(end_set) / len(start_set))
def file_to_db(path, pattern=None): # 查询已经存在的司机 driver_data = query(DRIVER_SELECT_SQL) driver_set = set(map(lambda row: row["driver_id"], driver_data)) # 查询本月已经存在的司机 driver_month_data = query(DRIVER_MONTH_SELECT_SQL % (CURRENT_MONTH)) driver_month_set = set(map(lambda row: row["driver_id"], driver_month_data)) for file_name in list_files(path, pattern): print(file_name) with open(file_name, "r", encoding="utf8") as a_file: detail_dict, driver_dict, retention_dict = {}, {}, {} for line in a_file: fields = line.strip().split("\t") length = len(fields) if 9 < len(fields) < 13: # 读取字段值,格式转换 driver_id, driver_name, license, photo_url, level, longitude, latitude, \ order_num_str = fields[:8] capture_dtm, city = fields[-2:] order_num = int(order_num_str) flag_dt = capture_dtm.split(" ")[0] city = city[:2] + re.sub(CITY_RE, "", city[2:]) # 确定新司机 if driver_id not in driver_set: driver_dict[driver_id] = [driver_id, driver_name, license, photo_url, level, flag_dt] driver_set.add(driver_id) # 确定本月出现司机 # if driver_id not in retention_dict: # retention_dict[driver_id] = [driver_id, city, CURRENT_MONTH] if driver_id not in driver_month_set: retention_dict[driver_id] = [driver_id, city, CURRENT_MONTH] driver_month_set.add(driver_id) # 详细 if driver_id in detail_dict: detail_dict[driver_id][2] = min(detail_dict[driver_id][2], order_num) detail_dict[driver_id][3] = max(detail_dict[driver_id][3], order_num) detail_dict[driver_id][4] = min(detail_dict[driver_id][4], capture_dtm) detail_dict[driver_id][5] = max(detail_dict[driver_id][5], capture_dtm) else: detail_dict[driver_id] = [driver_id, city, order_num, order_num, capture_dtm, capture_dtm, flag_dt] upsert_batch(NEW_DRIVER_SQL, list(driver_dict.values())) print("导入: " + str(len(driver_dict)) + " 个新司机") upsert_batch(NEW_RETENTION_SQL, list(retention_dict.values())) print("导入: " + str(len(retention_dict)) + " 个本月出现司机") upsert_batch(NEW_DETAIL_SQL, list(detail_dict.values())) print("导入: " + str(len(detail_dict)) + " details信息")
def main(filename): # 读取文件 file_dict = {} with open(filename, 'r', encoding='utf8') as a_file: for line in a_file: fields = line.strip().split('\t') app_id, sector, sub_sector, starting_date = fields[0], fields[4], fields[5], fields[7] if app_id in file_dict: if file_dict[app_id][0] < starting_date: file_dict[app_id] = [starting_date, sector, sub_sector] else: file_dict[app_id] = [starting_date, sector, sub_sector] # 查询数据库 sql = ( 'SELECT app_id, app_name, COUNT(DISTINCT sector) AS num ' ' FROM app WHERE source="QuestMobile" GROUP BY app_id') data = query(sql) update_list = [] for row in data: app_id = row['app_id'] if row['num'] > 1 and app_id in file_dict: update_list.append([file_dict[app_id][1], file_dict[app_id][2], app_id]) update_sql = ('UPDATE app SET sector=%s, sub_sector=%s WHERE app_id=%s') print(len(update_list)) upsert_batch(update_sql, update_list)
def get_top_gmv(a_date, city): sql = ("SELECT * FROM vdian_final WHERE createddate = '%s' and city='%s'" " ORDER BY sales_volume DESC") % (a_date, city) print(sql) data = query(sql) # 分类前100 # outer_dict = OrderedDict([(key, {}) for key in CATEGORY_LIST]) # 总数前100 all_dict = {} for row in data: one_level, price, sales_volume, shop_name = row["one_level"], row["sales_price"], \ row["sales_volume"], row["shop_name"] # if shop_name in outer_dict[one_level]: # outer_dict[one_level][shop_name] += (price * sales_volume) # else: # outer_dict[one_level][shop_name] = price * sales_volume if shop_name in all_dict: all_dict[shop_name] += price * sales_volume else: all_dict[shop_name] = price * sales_volume # 打印结果 - 分类前100 # for key in outer_dict.keys(): # values = sorted(outer_dict[key], key=lambda x:outer_dict[key][x], reverse=True) # print(sum(map(lambda x: outer_dict[key][x], values[:100]))) # print() # 打印结果 - 总数前100 top_100_gmv = heapq.nlargest(100, all_dict.values()) print(sum(map(lambda x:x, top_100_gmv)))
def get_fake_data(a_date, city): sql = ("SELECT one_level, sales_price, sales_volume, shop_name, item_id, collection" " FROM vdian_final WHERE createddate = '%s' and city='%s'") % (a_date, city) print(sql) data = query(sql) result_list = [set(), 0.0, set(), 0.0, set()] for row in data: one_level, price, sales_volume, shop_name, item_id, collection = row["one_level"], row["sales_price"], \ row["sales_volume"], row["shop_name"], row["item_id"], row["collection"] gmv = price * sales_volume if gmv >100000: result_list[1] += gmv result_list[2].add(item_id) if collection == 0: result_list[0].add(item_id) if price > 10000.0: result_list[3] += gmv result_list[4].add(item_id) # 打印结果 print("No collection: ", len(result_list[0])) print print("GMV: ", result_list[1]) print("SKU gt 10w: ", len(result_list[2])) print("GMV Price gt 1w: ", result_list[3]) print("SKU gt 1w: ", len(result_list[4]))
def get_driver_and_rides(): sql = ( "SELECT driver_id, city, MIN(order_max), MAX(order_max), MIN(capture_dtm_min), MAX(capture_dtm_max)" " FROM didi_details WHERE capture_dtm_min >= \'%s\'" " AND capture_dtm_min < \'%s\' GROUP BY driver_id, city") % (str(current_month), str(next_month)) data = query(sql) driver_nums = {"一线城市": 0, "二线城市": 0} driver_has_rides = {"一线城市": 0, "二线城市": 0} rides_nums = {"一线城市": 0, "二线城市": 0} for row in data: driver_id, city, order_min, order_max, capture_dtm_min, capture_dtm_max = row["driver_id"], \ row["city"], row["MIN(order_max)"], row["MAX(order_max)"], row["MIN(capture_dtm_min)"], \ row["MAX(capture_dtm_max)"] order = order_max - order_min if city in LEVEL_ONE: driver_nums["一线城市"] += 1 if order: driver_has_rides["一线城市"] += 1 rides_nums["一线城市"] += order elif city in LEVEL_TWO: driver_nums["二线城市"] += 1 if order: driver_has_rides["二线城市"] += 1 rides_nums["二线城市"] += order # 打印结果 print("Active Drivers: {0}, {1}".format(driver_nums["一线城市"], driver_nums["二线城市"])) print("Drivers With New Rides: {0}, {1}".format(driver_has_rides["一线城市"], driver_has_rides["二线城市"])) print("New Rides: {0}, {1}".format(rides_nums["一线城市"], rides_nums["二线城市"]))
def orders_matrix(flag=None): sql = ( "SELECT user_num, order_no, category, price, mobile, b2c, status, quarter FROM jd" " WHERE b2c = 'N'" " ORDER BY quarter") data = query(sql) for i in range(QUARTER_NUM): # 不再根据订单号去重,碰见一条记录就加一(去除活跃但没有下单的) order_inner_list = [0] * QUARTER_NUM for row in data: user_num, category, quarter, status = row["user_num"].upper(), \ row["category"], row["quarter"], row["status"] if quarter in quarter_dict: index = quarter_dict.get(quarter) if user_num in new_user_list[i]: if not flag: # 如果订单号为空的话,不计算在内(活跃但是没有下单) order_inner_list[index] += 1 if status != "没有下单" else None elif category in flag: order_inner_list[index] += 1 # 打印结果 # print("\t".join(map(lambda x: str(len(x)), order_inner_list))) print(*order_inner_list, sep="\t")
def gmv_matrix_percentage(flag): sql = ( "SELECT user_num, order_no, category, price, mobile, b2c, quarter FROM jd" " WHERE b2c = 'N'" " GROUP BY user_num, order_no, quarter ORDER BY quarter") data = query(sql) for i in range(QUARTER_NUM): total_inner_list = [0.0] * QUARTER_NUM sub_inner_list = [0.0] * QUARTER_NUM for row in data: user_num, category, price, quarter, mobile = row["user_num"].upper(), \ row["category"], row["price"], row["quarter"], row["mobile"] if quarter in quarter_dict: index = quarter_dict.get(quarter) if user_num in new_user_list[i]: total_inner_list[index] += price * 1.0 if flag == "Category": if category in ["3C Digital", "Home Appliance"]: sub_inner_list[index] += price * 1.0 elif flag == "Mobile": if mobile == "Y": sub_inner_list[index] += price * 1.0 # 打印结果 # print("\t".join(map(lambda x: str(x), sub_inner_list))) print("\t".join(map( lambda x: str(float( sub_inner_list[x]) / total_inner_list[x] if total_inner_list[x] else 0), range(0, QUARTER_NUM))))
def gmv_matrix(flag): sql = "SELECT id, quarter, price, is_tmall FROM taobao" data = query(sql) for i in range(0, QUARTER_NUM): gmv_inner_list = [0.0] * QUARTER_NUM for row in data: id, quarter, price, is_tmall = row["id"].upper(), row["quarter"], row["price"], \ row["is_tmall"] if quarter in quarter_dict.keys(): index = quarter_dict.get(quarter) if id in new_user_list[i]: # 如果不分Tmall和Taobao,参数为空,都要进来 if not flag: gmv_inner_list[index] += float(price) * 1.0 # 分Taobao和Tmall elif is_tmall == flag: gmv_inner_list[index] += float(price) * 1.0 elif i == 0: # 如果不分Tmall和Taobao,参数为空,都要进来 if not flag: gmv_inner_list[0] += float(price) * 1.0 # 分Taobao和Tmall elif is_tmall == flag: gmv_inner_list[0] += float(price) * 1.0 # 打印结果 print("\t".join(map(lambda x: str(x), gmv_inner_list)))
def user_matrix(flag): sql = ("SELECT id, quarter, is_tmall FROM taobao") data = query(sql) for i in range(0, QUARTER_NUM): active_user_list = [set() for ii in range(QUARTER_NUM)] for row in data: quarter, id, is_tmall = row["quarter"], row["id"].upper(), row["is_tmall"] if quarter in quarter_dict.keys(): index = quarter_dict.get(quarter) if id in new_user_list[i]: # 确保不管在任何情况下,对角线上的新用户都是全量的(不分Tmall和Taobao) if i == index: active_user_list[index].add(id) else: # 如果不分Tmall和Taobao,参数为空,都要进来 if not flag: active_user_list[index].add(id) # 分Taobao和Tmall elif is_tmall == flag: active_user_list[index].add(id) elif i == 0: active_user_list[0].add(id) # 打印结果 print("\t".join(map(lambda x: str(len(x)), active_user_list)))
def orders_matrix(flag): sql = "SELECT id, quarter, is_tmall FROM taobao WHERE createddate is Null" data = query(sql) for i in range(0, QUARTER_NUM): order_inner_list = [0] * QUARTER_NUM for row in data: id, quarter, is_tmall = row["id"].upper(), row["quarter"], row["is_tmall"] if quarter in quarter_dict.keys(): index = quarter_dict.get(quarter) if id in new_user_list[i]: # 如果不分Tmall和Taobao,参数为空,都要进来 if not flag: order_inner_list[index] += 1 # 分Taobao和Tmall elif is_tmall == flag: order_inner_list[index] += 1 elif i == 0: # 如果不分Tmall和Taobao,参数为空,都要进来 if not flag: order_inner_list[0] += 1 # 分Taobao和Tmall elif is_tmall == flag: order_inner_list[0] += 1 # 打印结果 print("\t".join(map(lambda x: str(x), order_inner_list)))
def get_fulltime_ayi(): data = query(SQL.format("capturedate", str(current_month), str(next_month))) # 全部阿姨 all_list = [set() for x in range(0, CITY_LEN)] # 全职阿姨 fulltime_list = [set() for x in range(0, CITY_LEN)] for row in data: uid, city, worktime = row["uid"], row["city"], row["worktime"] index = CITY_LIST.index(city) all_list[0].add(uid) all_list[index].add(uid) if "半" not in worktime and "全职" in worktime: fulltime_list[0].add(uid) fulltime_list[index].add(uid) print("*******************全职阿姨占比*******************") for x in range(0, CITY_LEN): if len(all_list[x]): print(float(len(fulltime_list[x])) / float(len(all_list[x]))) else: print(0.0) # 全部阿姨数量 print("********************全部阿姨数量******************") for x in range(CITY_LEN): print(len(all_list[x]))
def get_accumu_cars(): print("**************************Accumu Cars**************************") sql = ( "SELECT city, updatedAt FROM Haoche WHERE createdAt < '%s'" " GROUP BY code") % (str(next_month)) data = query(sql) accumu_dict = {"一线城市": 0, "二线城市": 0, "三线城市": 0} city_dict = {"一线城市": set(), "二线城市": set(), "三线城市": set()} for row in data: city, updatedAt = row["city"], row["updatedAt"] if city in LEVEL_ONE: accumu_dict["一线城市"] += 1 if str(updatedAt) >= str(current_month): city_dict["一线城市"].add(city) elif city in LEVEL_TWO: accumu_dict["二线城市"] += 1 if str(updatedAt) >= str(current_month): city_dict["二线城市"].add(city) else: accumu_dict["三线城市"] += 1 if str(updatedAt) >= str(current_month): city_dict["三线城市"].add(city) # 打印结果 for key, value in accumu_dict.items(): print(key + "累计房源:" + str(value)) print(key + "覆盖城市数: " + str(len(city_dict[key])))
def main(type): sql = ( "SELECT platform, UPPER(uni_code) AS uni_code, category, quarter, price, b2c, status" " FROM ecommerce ORDER BY quarter") data = query(sql) for x in range(0, 7): # 0 total, 1 Tmall, 2 Taobao, 3 JD1P, 4 JD2P inner_list = [[0.0] * 5 for ii in range(0, 7)] for row in data: platform, uni_code, category, quarter, price, b2c, status = row["platform"], \ row["uni_code"], row["category"], row["quarter"], row["price"], row["b2c"], \ row["status"] index = quarter_dict.get(quarter[:4]) if index is not None and uni_code in new_user_list[x] and category == type: if platform == "Taobao" and b2c == "Y" and status == "交易成功": inner_list[index][0] += price inner_list[index][1] += price elif platform == "Taobao" and b2c == "N" and status == "交易成功": inner_list[index][0] += price inner_list[index][2] += price elif platform == "JD自营": inner_list[index][0] += price inner_list[index][3] += price elif platform == "JD PoP": inner_list[index][0] += price inner_list[index][4] += price # print formatted result for i in range(0, 5): if i == 0: print("\t".join(map(lambda x: str(x[i] if x[0] else ""), inner_list))) else: print("\t".join(map(lambda x: str((x[i] / x[0] if x[0] else "")), inner_list)))
def set_data(start, end): sql = ( "SELECT car_id, final_offer, root_name" " FROM cheyipai_b2b WHERE capture_dtm >= '%s' AND capture_dtm < '%s'" " GROUP BY car_Id") % (start, end) global DATA DATA = query(sql)
def get_accumulative_data(): sql = "SELECT id, price, category, status FROM taobao" data = query(sql) order_gross = [0] * 16 order_net = [0] * 16 gmv_gross = [0.0] * 16 gmv_net = [0.0] * 16 for row in data: price, category, status = float(row["price"]), row["category"], row["status"] index = category_list.index(category) if index >= 0 and index <= 15: order_gross[index] += 1 gmv_gross[index] += price if status == "交易成功": order_net[index] += 1 gmv_net[index] += price # 打印结果 # Order Gross # print("\n".join(map(lambda x: str(x), order_gross))) # Order Net # print("\n".join(map(lambda x: str(x), order_net))) # GMV Gross # print("\n".join(map(lambda x: str(x), gmv_gross))) # GMV Net print("\n".join(map(lambda x: str(x), gmv_net)))
def single_month(a_year, a_month): start = date(a_year, a_month, 1) end = start + relativedelta(months=1) sql = ("SELECT driver_id, city, MIN(order_max), MAX(order_max), MIN(capture_dtm_min), MAX(capture_dtm_max)" " FROM didi_result WHERE capture_dtm_min >= \'%s\'" " AND capture_dtm_min < \'%s\' GROUP BY driver_id, city") % (str(start), str(end)) data = query(sql) # index 0 > Tier-1 city, index 1 > Tier-2 city driver_nums = [set(), set()] driver_has_rides = [set(), set()] rides_nums = [0, 0] for row in data: driver_id, city, order_min, order_max, capture_dtm_min, capture_dtm_max = row["driver_id"], \ row["city"], row["MIN(order_max)"], row["MAX(order_max)"], row["MIN(capture_dtm_min)"], \ row["MAX(capture_dtm_max)"] order = order_max - order_min if city in LEVEL_ONE: driver_nums[0].add(driver_id) if order: driver_has_rides[0].add(driver_id) rides_nums[0] += order elif city in LEVEL_TWO: driver_nums[1].add(driver_id) if order: driver_has_rides[1].add(driver_id) rides_nums[1] += order # 打印结果 print("Active Drivers: {0}, {1}".format(len(driver_nums[0]), len(driver_nums[1]))) print("Drivers With New Rides: {0}, {1}".format(len(driver_has_rides[0]), len(driver_has_rides[1]))) print("New Rides: {0}, {1}".format(rides_nums[0], rides_nums[1]))
def get_anmo(): """# of staff by city; city coverage""" sql = ( 'SELECT name, city FROM 58_others WHERE capturedate>="%s" AND capturedate<"%s"' ' AND TYPE="AnMo" GROUP BY name, city' % (current_month, next_month) ) data = query(sql) yuesao_list = [set(), 0, 0, 0] city_list = [set(), set(), set()] for row in data: name, city = row["name"], row["city"] yuesao_list[0].add(name) if city in FIRST_TIER_CITIES_PY: yuesao_list[1] += 1 city_list[0].add(city) elif city in SECOND_TIER_CITIES_PY: yuesao_list[2] += 1 city_list[1].add(city) else: yuesao_list[3] += 1 city_list[2].add(city) # 打印结果 print("*****************按摩*******************") print("Total: %s" % len(yuesao_list[0])) print("一线城市: %s\n二线城市: %s\n三线城市: %s" % tuple(yuesao_list[1:])) print("Tier1 cities: %s\nTier2 cities: %s\nTier3 cities: %s" % tuple(map(lambda x: len(x), city_list)))
def get_income_data(): sql = "SELECT income, price, quarter FROM taobao" data = query(sql) for x in range(0, 6): # Subtotal gmv_list, income_gmv_list = 0.0, 0.0 quarter_gmv_list, quarter_income_gmv_list = [0.0] * 28, [0.0] * 28 # AOV quarter_order_list = [0] * 28 quarter_total = 0 for row in data: income, price, quarter = row["income"], float(row["price"]), row["quarter"] if quarter in quarter_list: quarter_index = quarter_list.index(quarter) quarter_gmv_list[quarter_index] += price gmv_list += price if income == (x + 1): quarter_income_gmv_list[quarter_index] += price income_gmv_list += price quarter_order_list[quarter_index] += 1 quarter_total += 1 # 打印结果 # Subtotal # print(income_gmv_list / gmv_list) # 分季度打印 # print("\t".join(map(lambda x:str(float(quarter_income_gmv_list[x]) / quarter_gmv_list[x] if quarter_gmv_list[x] else 0), range(0, 28)))) # AOV分季度打印 # print("\t".join(map(lambda x:str(float(quarter_income_gmv_list[x]) / quarter_order_list[x] if quarter_order_list[x] else 0), range(0, 28)))) # AOV Subtotal print(income_gmv_list / quarter_total)
def get_mobile_penetration(): sql = "SELECT price, quarter, mobile, gender, status FROM jd WHERE b2c = 'N'" data = query(sql) quarter_mobile_order = [0] * QUARTER_NUM quarter_mobile_gmv = [0.0] * QUARTER_NUM quarter_female_order = [0] * QUARTER_NUM quarter_female_gmv = [0.0] * QUARTER_NUM for row in data: price, quarter, mobile, gender, status = row["price"], row["quarter"], row["mobile"],\ row["gender"], row["status"] if quarter in quarter_list: index = quarter_list.index(quarter) if mobile == "Y": quarter_mobile_gmv[index] += price if status != "没有下单": quarter_mobile_order[index] += 1 if gender == 2: quarter_female_gmv[index] += price if status != "没有下单": quarter_female_order[index] += 1 # 打印结果 print("******************************************") print(*quarter_mobile_order, sep="\t") print("******************************************") print(*quarter_mobile_gmv, sep="\t") print("******************************************") print(*quarter_female_order, sep="\t") print("******************************************") print(*quarter_female_gmv, sep="\t")
def get_quarterly_data(): sql = "SELECT user_num, price, category, status, quarter FROM jd WHERE b2c = 'N'" data = query(sql) for x in range(0, 16): outer_category = category_list[x] order_category_inner_list = [0] * QUARTER_NUM gmv_category_inner_list = [0.0] * QUARTER_NUM people_orderd = [set() for ii in range(0, QUARTER_NUM)] for row in data: user_num, price, category, quarter, status = row["user_num"].upper(),\ float(row["price"]), row["category"], row["quarter"], row["status"] if quarter in quarter_list: quarter_index = quarter_list.index(quarter) if status != "没有下单": people_orderd[quarter_index].add(user_num) if outer_category == category: order_category_inner_list[quarter_index] += 1 gmv_category_inner_list[quarter_index] += price # 打印结果 # Net Order # print(*order_category_inner_list, sep="\t") # People who ordered # print("\t".join(map(lambda x: str(len(x)), people_orderd))) # Net GMV print(*gmv_category_inner_list, sep="\t")
def fetch_data(starting_date): sql = ( "SELECT app_name, sector, sub_sector, mau, dau," " time FROM questmobile_merge WHERE starting_date = '%s'") % (starting_date) data = query(sql) mau_dict, dau_dict, time_dict, sector_dict = {}, {}, {}, {} for row in data: # mau -> 单位是千, dau -> 单位是千, time -> 单位是千小时 app_name, mau, dau, time, sector, sub_sector = row["app_name"], row["mau"], \ row["dau"], row["time"], row["sector"], row["sub_sector"] # 计算DAU时,要求DAU>=10万 # 计算MAU时,要求MAU>=30万 # if mau >= 300: # mau_dict[app_name] = mau # if dau >= 100: # dau_dict[app_name] = dau # time_dict[app_name] = time # sector_dict[app_name] = sector + "-" + sub_sector if dau >= 100: dau_dict[app_name] = dau mau_dict[app_name] = mau time_dict[app_name] = time sector_dict[app_name] = sector + "-" + sub_sector return Wrapper(mau_dict, dau_dict, time_dict, sector_dict)
def merge(start, end): sql = ( "SELECT app_name, sector, sub_sector, os, active_users, daily_active_user," " total_use_time, starting_date FROM app WHERE source = 'QuestMobile' AND data_type='APP'" " AND starting_date >= '%s' AND starting_date <= '%s'") % (start, end) data = query(sql) # key - [app_name, sector, sub_sector, staring_date, mau, dau, time] data_dict = {} for row in data: app_name, sector, sub_sector, active_users, daily_active_user, total_use_time, \ starting_date = row["app_name"], row["sector"], row["sub_sector"], \ row["active_users"], row["daily_active_user"], row["total_use_time"], \ row["starting_date"] key = app_name + str(starting_date) if key in data_dict: data_dict[key][4] += active_users data_dict[key][5] += daily_active_user data_dict[key][6] += total_use_time else: data_dict[key] = [ app_name, sector, sub_sector, starting_date, active_users, daily_active_user, total_use_time] insert_sql = ( "INSERT INTO questmobile_merge(app_name, sector, sub_sector, starting_date," " mau, dau, time) VALUES(%s, %s, %s, %s, %s, %s, %s)") insert_batch(insert_sql, list(data_dict.values()))
def merge_pro_vc(): query_sql = ( "SELECT app_id, starting_date, os FROM app WHERE source='QuestMobile'") old_data = query(query_sql) key_set = set() for row in old_data: app_id, starting_date, os = row["app_id"], row["starting_date"], row["os"] key_set.add(app_id + "-" + os + "-" + str(starting_date)) # print(key_set) insert_list = [] with open(filename, "r", encoding="utf8") as a_file: for line in a_file: fields = line.split("\t") print(fields) key = fields[0] + "-" + fields[6] + "-" + fields[7] if key not in key_set: print(key) # Set undefined to 0 fields = [x if x != "undefined" else 0 for x in fields] # active_users fields[8] = float(fields[8]) * 10.0 # active_users_penetration fields[9] = str(fields[9])# + "%" # total_startup_counts fields[10] = float(fields[10]) * 10.0 # total_startup_counts_share fields[11] = str(fields[11])# + "%" # total_use_time fields[13] = float(fields[13]) / 6.0 # total_use_time_share fields[14] = str(fields[14])# + "%" # daily_active_user fields[16] = float(fields[16]) * 10.0 # daily_active_user_penetration fields[17] = str(fields[17])# + "%" # daily_startup_counts fields[18] = float(fields[18]) * 10.0 # daily_startup_counts_share fields[19] = str(fields[19])# + "%" # daily_use_time fields[21] = float(fields[21]) / 6.0 # daily_use_time_share fields[22] = str(fields[22])# + "%" fields.extend([str(TODAY), "QuestMobile", "Mobile", "APP"]) insert_list.append(fields) # print(insert_list) insert_sql = ( "INSERT INTO app(app_id, app_name, company, app_name_en, sector, sub_sector," " os, starting_date, active_users, active_users_penetration, total_startup_counts," " total_startup_counts_share, per_capita_startup_counts, total_use_time, " " total_use_time_share, per_capita_use_time, daily_active_user, " " daily_active_user_penetration, daily_startup_counts, daily_startup_counts_share," " per_capita_daily_startup_counts, daily_use_time, daily_use_time_share, " " per_capita_daily_use_time, createddate, source, app_type, data_type) " " VALUES(%s, %s, %s, %s, %s, %s, %s," " %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
def estimate_entrytime(entrytime): sql = ( "SELECT DISTINCT uid FROM 58_ayi WHERE entrytime<\'%s\' ORDER BY entrytime") % (entrytime) data = query(sql) for row in data: uid = row["uid"] sql2 = ( "SELECT capturedate, servicecount FROM 58_ayi WHERE uid=\'%s\'" " ORDER BY entrytime LIMIT 1") % (uid,) data2 = query(sql2) for row in data2: capturedate, servicecount = row["capturedate"], row["servicecount"] estimate_entrytime = capturedate - timedelta(days=servicecount) print("********************", capturedate, servicecount, estimate_entrytime) update_entrytime(uid, estimate_entrytime)
def main(source, created_dt): sql = ( "SELECT app_name, sector, sub_sector, type, growth, new_data, old_data," " starting_date FROM top_growth_apps WHERE source= '%s' and created_dt <= '%s' " " ORDER BY starting_date, type, growth DESC, app_name" ) print(sql) data = query(sql % (source, created_dt)) date_set, app_set = set(), set() for row in data: date_set.add(row["starting_date"]) app_set.add(row["app_name"]) # initialize wrapper wrappers = [] for app in app_set: wrappers.append( Wrapper( app, 0, 0, 0, [""] * DATE_LEN, [""] * DATE_LEN, [""] * DATE_LEN, [""] * DATE_LEN, [""] * DATE_LEN, [""] * DATE_LEN, ) ) # calculate counts of DAU, MAU, Time for row in data: app_name = row["app_name"] type = row["type"] starting_date = str(row["starting_date"]) growth = row["growth"] new_data = row["new_data"] for wrapper in wrappers: if wrapper.app_name == app_name: index = DATE_LIST.index(starting_date) if type == "DAU": wrapper.dau_num += 1 wrapper.dau_growth[index] = growth wrapper.dau[index] = new_data elif type == "MAU": wrapper.mau_num += 1 wrapper.mau_growth[index] = growth wrapper.mau[index] = new_data elif type == "Installation": wrapper.time_num += 1 wrapper.time_growth[index] = growth wrapper.time[index] = new_data break for wrapper in wrappers: print(wrapper)