Example #1
0
def get_new_data(createddate, city, qtype):

    old_sql = ("SELECT %s, one_level FROM vdian_final "
        " WHERE createddate < '%s' AND city = '%s' GROUP BY %s") % (qtype, createddate, city, qtype)
    print(old_sql)
    old_data = query(old_sql)

    old_dict = OrderedDict([(key, set()) for key in CATEGORY_LIST])
    for row in old_data:
        qtype_real, one_level = row[qtype], row["one_level"]
        old_dict["Total"].add(qtype_real)
        old_dict[one_level].add(qtype_real)

    
    new_sql = ("SELECT %s, one_level FROM vdian_final "
        " WHERE createddate = '%s' AND city = '%s' GROUP BY %s") % (qtype, createddate, city, qtype)
    new_data = query(new_sql)

    new_dict = OrderedDict([(key, set()) for key in CATEGORY_LIST])
    for row in new_data:
        qtype_real, one_level = row[qtype], row["one_level"]
        new_dict["Total"].add(qtype_real)
        new_dict[one_level].add(qtype_real)

    # 打印结果
    for key in old_dict.keys():
        # New Store
        # print(len(new_dict[key] - old_dict[key]))
        # Closed Store
        # print(len(old_dict[key] - new_dict[key]))
        # New SKU
        print(len(new_dict[key] - old_dict[key]))
def unify_entrytime(entrytime):
    '''
    Some entrytime are "2014-05-01", "0000-00-00" or "1970-01-01".
    Set all these entrytime to "2014-08-01"
    '''
    sql = (
        "SELECT DISTINCT uid FROM 58_ayi WHERE entrytime< '%s'"
        " ORDER BY entrytime LIMIT 5") % (entrytime)
    data = query(sql)

    update_sql = "UPDATE 58_ayi SET entrytime = %s WHERE uid = %s"
    update_list = []
    for row in data:
        uid = row["uid"]
        sql2 = ("SELECT entrytime FROM 58_ayi WHERE uid= %s ORDER BY entrytime") % (uid,)
        data2 = query(sql2)

        for row2 in data2:
            entrytime = row2["entrytime"]
            # 更新为该uid不为空且大于“2014-08-01”的entrytime
            if entrytime and str(entrytime) >= "2014-08-01":
                update_list.append([str(entrytime), uid])
                # update_entrytime(uid, str(entrytime))
                break
    print("Update list Size: " + str(len(update_list)))
    update_batch(update_sql, update_list)
def get_new_staff():
    """Get new staff of Meijia/Meijie/Xiufu"""
    sql = (
        'SELECT DISTINCT uid, city FROM 58_others WHERE capturedate >= "%s"'
        ' AND capturedate < "%s" AND TYPE IN ("Meijie", "Meijia", "Xiufu")'
    )
    data = query(sql % ("2000-01-01", current_month))
    old_list = [set() for x in range(0, 4)]
    for row in data:
        uid, city = row["uid"], row["city"]
        old_list[0].add(uid)
        if city in FIRST_TIER_CITIES_PY:
            old_list[1].add(uid)
        elif city in SECOND_TIER_CITIES_PY:
            old_list[2].add(uid)
        else:
            old_list[3].add(uid)

    data = query(sql % (current_month, next_month))
    new_list = [set() for x in range(0, 4)]
    for row in data:
        uid, city = row["uid"], row["city"]
        if uid not in old_list[0]:
            new_list[0].add(uid)
            if city in FIRST_TIER_CITIES_PY:
                new_list[1].add(uid)
            elif city in SECOND_TIER_CITIES_PY:
                new_list[2].add(uid)
            else:
                new_list[3].add(uid)
    # 打印结果
    print("*****************Meijia/Meijie/Xiufu 新员工*******************")
    print("Total: %s\n一线城市: %s\n二线城市: %s\n三线城市: %s" % tuple(map(lambda x: len(x), new_list)))
Example #4
0
def retention_handler(start, end):
    start_next = start + relativedelta(months=1)
    end_next = end + relativedelta(months=1)
    start_data = query(SQL.format("entrytime", str(start), str(start_next)))

    start_list = [set() for x in range(0, CITY_LEN)]
    for row in start_data:
        uid, city = row["uid"], row["city"]
        index = CITY_LIST.index(city)
        start_list[0].add(uid)
        start_list[index].add(uid)

    # 下个月出现的阿姨
    end_data = query(SQL.format("capturedate", str(end), str(end_next)))
    end_list = [set() for x in range(0, CITY_LEN)]
    for row in end_data:
        uid, city = row["uid"], row["city"]
        index = CITY_LIST.index(city)
        if uid in start_list[0]:
            end_list[0].add(uid)
            if uid in start_list[index]:
                end_list[index].add(uid)

    # 打印结果
    for x in range(0, CITY_LEN):
        # 计算阿姨留存率
        if len(start_list[x]):
            print(float(len(end_list[x])) / float(len(start_list[x])))
        else:
            print(0.0)
    def _handler(start):
        start_next = start + relativedelta(months=1)
        sql = (
            'SELECT DISTINCT uid, city FROM 58_others WHERE capturedate >= "%s"'
            ' AND capturedate < "%s" AND TYPE IN ("Meijie", "Meijia", "Xiufu")'
        )
        start_data = query(sql % (start, start_next))
        start_set = set(map(lambda row: row["uid"], start_data))

        end_data = query(sql % (current_month, next_month))
        end_set = set([row["uid"] for row in end_data if row["uid"] in start_set])
        print(len(end_set) / len(start_set))
def file_to_db(path, pattern=None):
    # 查询已经存在的司机
    driver_data = query(DRIVER_SELECT_SQL)
    driver_set = set(map(lambda row: row["driver_id"], driver_data))
    # 查询本月已经存在的司机
    driver_month_data = query(DRIVER_MONTH_SELECT_SQL % (CURRENT_MONTH))
    driver_month_set = set(map(lambda row: row["driver_id"], driver_month_data))

    for file_name in list_files(path, pattern):
        print(file_name)
        with open(file_name, "r", encoding="utf8") as a_file:
            detail_dict, driver_dict, retention_dict = {}, {}, {}
            for line in a_file:
                fields = line.strip().split("\t")
                length = len(fields)

                if 9 < len(fields) < 13:
                    # 读取字段值,格式转换
                    driver_id, driver_name, license, photo_url, level, longitude, latitude, \
                        order_num_str = fields[:8]
                    capture_dtm, city = fields[-2:]
                    order_num = int(order_num_str)
                    flag_dt = capture_dtm.split(" ")[0]
                    city = city[:2] + re.sub(CITY_RE, "", city[2:])

                    # 确定新司机
                    if driver_id not in driver_set:
                        driver_dict[driver_id] = [driver_id, driver_name, license, photo_url, level, flag_dt]
                        driver_set.add(driver_id)
                    # 确定本月出现司机
                    # if driver_id not in retention_dict:
                    #     retention_dict[driver_id] = [driver_id, city, CURRENT_MONTH]
                    if driver_id not in driver_month_set:
                        retention_dict[driver_id] = [driver_id, city, CURRENT_MONTH]
                        driver_month_set.add(driver_id)
                    # 详细
                    if driver_id in detail_dict:
                        detail_dict[driver_id][2] = min(detail_dict[driver_id][2], order_num)
                        detail_dict[driver_id][3] = max(detail_dict[driver_id][3], order_num)
                        detail_dict[driver_id][4] = min(detail_dict[driver_id][4], capture_dtm)
                        detail_dict[driver_id][5] = max(detail_dict[driver_id][5], capture_dtm)
                    else:
                        detail_dict[driver_id] = [driver_id, city, order_num, order_num,
                            capture_dtm, capture_dtm, flag_dt]

            upsert_batch(NEW_DRIVER_SQL, list(driver_dict.values()))
            print("导入: " + str(len(driver_dict)) + " 个新司机")
            upsert_batch(NEW_RETENTION_SQL, list(retention_dict.values()))
            print("导入: " + str(len(retention_dict)) + " 个本月出现司机")
            upsert_batch(NEW_DETAIL_SQL, list(detail_dict.values()))
            print("导入: " + str(len(detail_dict)) + " details信息")
def main(filename):
    # 读取文件
    file_dict = {}
    with open(filename, 'r', encoding='utf8') as a_file:
        for line in a_file:
            fields = line.strip().split('\t')
            app_id, sector, sub_sector, starting_date = fields[0], fields[4], fields[5], fields[7]
            if app_id in file_dict:
                if file_dict[app_id][0] < starting_date:
                    file_dict[app_id] = [starting_date, sector, sub_sector]
            else:
                file_dict[app_id] = [starting_date, sector, sub_sector]
    # 查询数据库
    sql = (
        'SELECT app_id, app_name, COUNT(DISTINCT sector) AS num '
        ' FROM app WHERE source="QuestMobile" GROUP BY app_id')
    data = query(sql)

    update_list = []
    for row in data:
        app_id = row['app_id']
        if row['num'] > 1 and app_id in file_dict:
            update_list.append([file_dict[app_id][1], file_dict[app_id][2], app_id])

    update_sql = ('UPDATE app SET sector=%s, sub_sector=%s WHERE app_id=%s')
    print(len(update_list))
    upsert_batch(update_sql, update_list)
Example #8
0
def get_top_gmv(a_date, city):
    sql = ("SELECT * FROM vdian_final WHERE createddate = '%s' and city='%s'"
        " ORDER BY sales_volume DESC") % (a_date, city)
    print(sql)
    data = query(sql)

    # 分类前100
    # outer_dict = OrderedDict([(key, {}) for key in CATEGORY_LIST])
    # 总数前100
    all_dict = {}

    for row in data:
        one_level, price, sales_volume, shop_name = row["one_level"], row["sales_price"], \
        row["sales_volume"], row["shop_name"]

        # if shop_name in outer_dict[one_level]:
        #     outer_dict[one_level][shop_name] += (price * sales_volume)
        # else:
        #     outer_dict[one_level][shop_name] = price * sales_volume

        if shop_name in all_dict:
            all_dict[shop_name] += price * sales_volume
        else:
            all_dict[shop_name] = price * sales_volume

    # 打印结果 - 分类前100
    # for key in outer_dict.keys():
    #     values = sorted(outer_dict[key], key=lambda x:outer_dict[key][x], reverse=True)
    #     print(sum(map(lambda x: outer_dict[key][x], values[:100])))
    #     print()
    # 打印结果 - 总数前100
    top_100_gmv = heapq.nlargest(100, all_dict.values())
    print(sum(map(lambda x:x, top_100_gmv)))
Example #9
0
def get_fake_data(a_date, city):
    sql = ("SELECT one_level, sales_price, sales_volume, shop_name, item_id, collection" 
    	" FROM vdian_final WHERE createddate = '%s' and city='%s'") % (a_date, city)
    print(sql)
    data = query(sql)

    result_list = [set(), 0.0, set(), 0.0, set()]
    for row in data:
        one_level, price, sales_volume, shop_name, item_id, collection = row["one_level"], row["sales_price"], \
        row["sales_volume"], row["shop_name"], row["item_id"], row["collection"]
        gmv = price * sales_volume

        if gmv >100000:
            result_list[1] += gmv
            result_list[2].add(item_id)
            if collection == 0:
                result_list[0].add(item_id)
        if price > 10000.0:
            result_list[3] += gmv
            result_list[4].add(item_id)
    # 打印结果
    print("No collection: ", len(result_list[0]))
    print
    print("GMV: ", result_list[1])
    print("SKU gt 10w: ", len(result_list[2]))
    print("GMV Price gt 1w: ", result_list[3])
    print("SKU gt 1w: ", len(result_list[4]))
Example #10
0
def get_driver_and_rides():
    sql = (
        "SELECT driver_id, city, MIN(order_max), MAX(order_max), MIN(capture_dtm_min), MAX(capture_dtm_max)"
        " FROM didi_details WHERE capture_dtm_min >= \'%s\'"
        " AND capture_dtm_min < \'%s\' GROUP BY driver_id, city") % (str(current_month), str(next_month))
    data = query(sql)

    driver_nums = {"一线城市": 0, "二线城市": 0}
    driver_has_rides = {"一线城市": 0, "二线城市": 0}
    rides_nums = {"一线城市": 0, "二线城市": 0}

    for row in data:
        driver_id, city, order_min, order_max, capture_dtm_min, capture_dtm_max = row["driver_id"], \
        row["city"], row["MIN(order_max)"], row["MAX(order_max)"], row["MIN(capture_dtm_min)"], \
        row["MAX(capture_dtm_max)"]
        order = order_max - order_min
        if city in LEVEL_ONE:
            driver_nums["一线城市"] += 1
            if order:
                driver_has_rides["一线城市"] += 1
                rides_nums["一线城市"] += order
        elif city in LEVEL_TWO:
            driver_nums["二线城市"] += 1
            if order:
                driver_has_rides["二线城市"] += 1
                rides_nums["二线城市"] += order

        # 打印结果
    print("Active Drivers: {0}, {1}".format(driver_nums["一线城市"], driver_nums["二线城市"]))
    print("Drivers With New Rides: {0}, {1}".format(driver_has_rides["一线城市"], driver_has_rides["二线城市"]))
    print("New Rides: {0}, {1}".format(rides_nums["一线城市"], rides_nums["二线城市"]))
Example #11
0
def orders_matrix(flag=None):
    sql = (
        "SELECT user_num, order_no, category, price, mobile, b2c, status, quarter FROM jd"
        "  WHERE b2c = 'N'"
        " ORDER BY quarter")
    data = query(sql)

    for i in range(QUARTER_NUM):
        # 不再根据订单号去重,碰见一条记录就加一(去除活跃但没有下单的)
        order_inner_list = [0] * QUARTER_NUM
        for row in data:
            user_num, category, quarter, status = row["user_num"].upper(), \
                row["category"], row["quarter"], row["status"]

            if quarter in quarter_dict:
                index = quarter_dict.get(quarter)
                if user_num in new_user_list[i]:
                    if not flag:
                        # 如果订单号为空的话,不计算在内(活跃但是没有下单)
                        order_inner_list[index] += 1 if status != "没有下单" else None
                    elif category in flag:
                        order_inner_list[index] += 1
        # 打印结果
        # print("\t".join(map(lambda x: str(len(x)), order_inner_list)))
        print(*order_inner_list, sep="\t")
Example #12
0
def gmv_matrix_percentage(flag):
    sql = (
        "SELECT user_num, order_no, category, price, mobile, b2c, quarter FROM jd"
        "  WHERE b2c = 'N'"
        " GROUP BY user_num, order_no, quarter ORDER BY quarter")
    data = query(sql)

    for i in range(QUARTER_NUM):
        total_inner_list = [0.0] * QUARTER_NUM
        sub_inner_list = [0.0] * QUARTER_NUM
        for row in data:
            user_num, category, price, quarter, mobile = row["user_num"].upper(), \
                row["category"], row["price"], row["quarter"], row["mobile"]

            if quarter in quarter_dict:
                index = quarter_dict.get(quarter)
                if user_num in new_user_list[i]:
                    total_inner_list[index] += price * 1.0
                    if flag == "Category":
                        if category in ["3C Digital", "Home Appliance"]:
                            sub_inner_list[index] += price * 1.0
                    elif flag == "Mobile":
                        if mobile == "Y":
                                sub_inner_list[index] += price * 1.0
        # 打印结果
        # print("\t".join(map(lambda x: str(x), sub_inner_list)))
        print("\t".join(map(
            lambda x: str(float(
                sub_inner_list[x]) / total_inner_list[x] if total_inner_list[x] else 0),
            range(0, QUARTER_NUM))))
Example #13
0
def gmv_matrix(flag):
    sql = "SELECT id, quarter, price, is_tmall FROM taobao"
    data = query(sql)

    for i in range(0, QUARTER_NUM):
        gmv_inner_list = [0.0] * QUARTER_NUM
        for row in data:
            id, quarter, price, is_tmall = row["id"].upper(), row["quarter"], row["price"], \
                row["is_tmall"]
            if quarter in quarter_dict.keys():
                index = quarter_dict.get(quarter)
                if id in new_user_list[i]:
                    # 如果不分Tmall和Taobao,参数为空,都要进来
                    if not flag:
                        gmv_inner_list[index] += float(price) * 1.0
                    # 分Taobao和Tmall
                    elif is_tmall == flag:
                        gmv_inner_list[index] += float(price) * 1.0
            elif i == 0:
                # 如果不分Tmall和Taobao,参数为空,都要进来
                if not flag:
                    gmv_inner_list[0] += float(price) * 1.0
                # 分Taobao和Tmall
                elif is_tmall == flag:
                    gmv_inner_list[0] += float(price) * 1.0
        # 打印结果
        print("\t".join(map(lambda x: str(x), gmv_inner_list)))
Example #14
0
def user_matrix(flag):
    sql = ("SELECT id, quarter, is_tmall FROM taobao")
    data = query(sql)

    for i in range(0, QUARTER_NUM):
        active_user_list = [set() for ii in range(QUARTER_NUM)]
        for row in data:
            quarter, id, is_tmall = row["quarter"], row["id"].upper(), row["is_tmall"]
            if quarter in quarter_dict.keys():
                index = quarter_dict.get(quarter)
                if id in new_user_list[i]:
                    # 确保不管在任何情况下,对角线上的新用户都是全量的(不分Tmall和Taobao)
                    if i == index:
                        active_user_list[index].add(id)
                    else:
                        # 如果不分Tmall和Taobao,参数为空,都要进来
                        if not flag:
                            active_user_list[index].add(id)
                        # 分Taobao和Tmall
                        elif is_tmall == flag:
                            active_user_list[index].add(id)
            elif i == 0:
                active_user_list[0].add(id)
        # 打印结果
        print("\t".join(map(lambda x: str(len(x)), active_user_list)))
Example #15
0
def orders_matrix(flag):
    sql = "SELECT id, quarter, is_tmall FROM taobao WHERE createddate is Null"
    data = query(sql)

    for i in range(0, QUARTER_NUM):
        order_inner_list = [0] * QUARTER_NUM
        for row in data:
            id, quarter, is_tmall = row["id"].upper(), row["quarter"], row["is_tmall"]
            if quarter in quarter_dict.keys():
                index = quarter_dict.get(quarter)
                if id in new_user_list[i]:
                    # 如果不分Tmall和Taobao,参数为空,都要进来
                    if not flag:
                        order_inner_list[index] += 1
                    # 分Taobao和Tmall
                    elif is_tmall == flag:
                        order_inner_list[index] += 1
            elif i == 0:
                # 如果不分Tmall和Taobao,参数为空,都要进来
                if not flag:
                    order_inner_list[0] += 1
                # 分Taobao和Tmall
                elif is_tmall == flag:
                    order_inner_list[0] += 1
        # 打印结果
        print("\t".join(map(lambda x: str(x), order_inner_list)))
Example #16
0
def get_fulltime_ayi():
    data = query(SQL.format("capturedate", str(current_month), str(next_month)))

    # 全部阿姨
    all_list = [set() for x in range(0, CITY_LEN)]
    # 全职阿姨
    fulltime_list = [set() for x in range(0, CITY_LEN)]
    for row in data:
        uid, city, worktime = row["uid"], row["city"], row["worktime"]
        index = CITY_LIST.index(city)
        all_list[0].add(uid)
        all_list[index].add(uid)
        if "半" not in worktime and "全职" in worktime:
            fulltime_list[0].add(uid)
            fulltime_list[index].add(uid)

    print("*******************全职阿姨占比*******************")
    for x in range(0, CITY_LEN):
        if len(all_list[x]):
            print(float(len(fulltime_list[x])) / float(len(all_list[x])))
        else:
            print(0.0)
    # 全部阿姨数量
    print("********************全部阿姨数量******************")
    for x in range(CITY_LEN):
        print(len(all_list[x]))
Example #17
0
def get_accumu_cars():
    print("**************************Accumu Cars**************************")
    sql = (
        "SELECT city, updatedAt FROM Haoche WHERE createdAt < '%s'"
        " GROUP BY code") % (str(next_month))
    data = query(sql)

    accumu_dict = {"一线城市": 0, "二线城市": 0, "三线城市": 0}
    city_dict = {"一线城市": set(), "二线城市": set(), "三线城市": set()}
    for row in data:
        city, updatedAt = row["city"], row["updatedAt"]
        if city in LEVEL_ONE:
            accumu_dict["一线城市"] += 1
            if str(updatedAt) >= str(current_month):
                city_dict["一线城市"].add(city)
        elif city in LEVEL_TWO:
            accumu_dict["二线城市"] += 1
            if str(updatedAt) >= str(current_month):
                city_dict["二线城市"].add(city)
        else:
            accumu_dict["三线城市"] += 1
            if str(updatedAt) >= str(current_month):
                city_dict["三线城市"].add(city)

    # 打印结果
    for key, value in accumu_dict.items():
        print(key + "累计房源:" + str(value))
        print(key + "覆盖城市数: " + str(len(city_dict[key])))
Example #18
0
def main(type):
    sql = (
        "SELECT platform, UPPER(uni_code) AS uni_code, category, quarter, price, b2c, status"
        " FROM ecommerce ORDER BY quarter")
    data = query(sql)

    for x in range(0, 7):
        # 0 total, 1 Tmall, 2 Taobao, 3 JD1P, 4 JD2P
        inner_list = [[0.0] * 5 for ii in range(0, 7)]
        for row in data:
            platform, uni_code, category, quarter, price, b2c, status = row["platform"], \
                row["uni_code"], row["category"], row["quarter"], row["price"], row["b2c"], \
                row["status"]
            index = quarter_dict.get(quarter[:4])
            if index is not None and uni_code in new_user_list[x] and category == type:
                if platform == "Taobao" and b2c == "Y" and status == "交易成功":
                    inner_list[index][0] += price
                    inner_list[index][1] += price
                elif platform == "Taobao" and b2c == "N" and status == "交易成功":
                    inner_list[index][0] += price
                    inner_list[index][2] += price
                elif platform == "JD自营":
                    inner_list[index][0] += price
                    inner_list[index][3] += price
                elif platform == "JD PoP":
                    inner_list[index][0] += price
                    inner_list[index][4] += price

        # print formatted result
        for i in range(0, 5):
            if i == 0:
                print("\t".join(map(lambda x: str(x[i] if x[0] else ""), inner_list)))
            else:
                print("\t".join(map(lambda x: str((x[i] / x[0] if x[0] else "")), inner_list)))
Example #19
0
def set_data(start, end):
    sql = (
        "SELECT car_id, final_offer, root_name"
        " FROM cheyipai_b2b WHERE capture_dtm >= '%s' AND capture_dtm < '%s'"
        " GROUP BY car_Id") % (start, end)
    global DATA
    DATA = query(sql)
Example #20
0
def get_accumulative_data():
    sql = "SELECT id, price, category, status FROM taobao"
    data = query(sql)

    order_gross = [0] * 16
    order_net = [0] * 16
    gmv_gross = [0.0] * 16
    gmv_net = [0.0] * 16
    for row in data:
        price, category, status = float(row["price"]), row["category"], row["status"]
        index = category_list.index(category)
        if index >= 0 and index <= 15:
            order_gross[index] += 1
            gmv_gross[index] += price
            if status == "交易成功":
                order_net[index] += 1
                gmv_net[index] += price
    # 打印结果
    # Order Gross
    # print("\n".join(map(lambda x: str(x), order_gross)))
    # Order Net
    # print("\n".join(map(lambda x: str(x), order_net)))
    # GMV Gross
    # print("\n".join(map(lambda x: str(x), gmv_gross)))
    # GMV Net
    print("\n".join(map(lambda x: str(x), gmv_net)))
Example #21
0
def single_month(a_year, a_month):
    start = date(a_year, a_month, 1)
    end = start + relativedelta(months=1)
    sql = ("SELECT driver_id, city, MIN(order_max), MAX(order_max), MIN(capture_dtm_min), MAX(capture_dtm_max)"
        " FROM didi_result WHERE capture_dtm_min >= \'%s\'"
        " AND capture_dtm_min < \'%s\' GROUP BY driver_id, city") % (str(start), str(end))
    data = query(sql)

    # index 0 > Tier-1 city, index 1 > Tier-2 city
    driver_nums = [set(), set()]
    driver_has_rides = [set(), set()]
    rides_nums = [0, 0]
    for row in data:
        driver_id, city, order_min, order_max, capture_dtm_min, capture_dtm_max = row["driver_id"], \
        row["city"], row["MIN(order_max)"], row["MAX(order_max)"], row["MIN(capture_dtm_min)"], \
        row["MAX(capture_dtm_max)"]
        order = order_max - order_min
        if city in LEVEL_ONE:
            driver_nums[0].add(driver_id)
            if order:
                driver_has_rides[0].add(driver_id)
                rides_nums[0] += order
        elif city in LEVEL_TWO:
            driver_nums[1].add(driver_id)
            if order:
                driver_has_rides[1].add(driver_id)
                rides_nums[1] += order

    # 打印结果
    print("Active Drivers: {0}, {1}".format(len(driver_nums[0]), len(driver_nums[1])))
    print("Drivers With New Rides: {0}, {1}".format(len(driver_has_rides[0]), len(driver_has_rides[1])))
    print("New Rides: {0}, {1}".format(rides_nums[0], rides_nums[1]))
Example #22
0
def get_anmo():
    """# of staff by city; city coverage"""
    sql = (
        'SELECT name, city FROM 58_others WHERE capturedate>="%s" AND capturedate<"%s"'
        ' AND TYPE="AnMo" GROUP BY name, city' % (current_month, next_month)
    )
    data = query(sql)

    yuesao_list = [set(), 0, 0, 0]
    city_list = [set(), set(), set()]
    for row in data:
        name, city = row["name"], row["city"]
        yuesao_list[0].add(name)
        if city in FIRST_TIER_CITIES_PY:
            yuesao_list[1] += 1
            city_list[0].add(city)
        elif city in SECOND_TIER_CITIES_PY:
            yuesao_list[2] += 1
            city_list[1].add(city)
        else:
            yuesao_list[3] += 1
            city_list[2].add(city)
    # 打印结果
    print("*****************按摩*******************")
    print("Total: %s" % len(yuesao_list[0]))
    print("一线城市: %s\n二线城市: %s\n三线城市: %s" % tuple(yuesao_list[1:]))
    print("Tier1 cities: %s\nTier2 cities: %s\nTier3 cities: %s" % tuple(map(lambda x: len(x), city_list)))
Example #23
0
def get_income_data():
    sql = "SELECT income, price, quarter FROM taobao"
    data = query(sql)

    for x in range(0, 6):
        # Subtotal
        gmv_list, income_gmv_list = 0.0, 0.0
        quarter_gmv_list, quarter_income_gmv_list = [0.0] * 28, [0.0] * 28
        # AOV
        quarter_order_list = [0] * 28
        quarter_total = 0
        for row in data:
            income, price, quarter = row["income"], float(row["price"]), row["quarter"]
            if quarter in quarter_list:
                quarter_index = quarter_list.index(quarter)

                quarter_gmv_list[quarter_index] += price
                gmv_list += price

                if income == (x + 1):
                    quarter_income_gmv_list[quarter_index] += price
                    income_gmv_list += price
                    quarter_order_list[quarter_index] += 1
                    quarter_total += 1
        # 打印结果
        # Subtotal
        # print(income_gmv_list / gmv_list)
        # 分季度打印
        # print("\t".join(map(lambda x:str(float(quarter_income_gmv_list[x]) / quarter_gmv_list[x] if quarter_gmv_list[x] else 0), range(0, 28))))
        # AOV分季度打印
        # print("\t".join(map(lambda x:str(float(quarter_income_gmv_list[x]) / quarter_order_list[x] if quarter_order_list[x] else 0), range(0, 28))))
        # AOV Subtotal
        print(income_gmv_list / quarter_total)
Example #24
0
def get_mobile_penetration():
    sql = "SELECT price, quarter, mobile, gender, status FROM jd WHERE b2c = 'N'"
    data = query(sql)

    quarter_mobile_order = [0] * QUARTER_NUM
    quarter_mobile_gmv = [0.0] * QUARTER_NUM
    quarter_female_order = [0] * QUARTER_NUM
    quarter_female_gmv = [0.0] * QUARTER_NUM
    for row in data:
        price, quarter, mobile, gender, status = row["price"], row["quarter"], row["mobile"],\
            row["gender"], row["status"]

        if quarter in quarter_list:
            index = quarter_list.index(quarter)
            if mobile == "Y":
                quarter_mobile_gmv[index] += price
                if status != "没有下单":
                    quarter_mobile_order[index] += 1
            if gender == 2:
                quarter_female_gmv[index] += price
                if status != "没有下单":
                    quarter_female_order[index] += 1
    # 打印结果
    print("******************************************")
    print(*quarter_mobile_order, sep="\t")
    print("******************************************")
    print(*quarter_mobile_gmv, sep="\t")
    print("******************************************")
    print(*quarter_female_order, sep="\t")
    print("******************************************")
    print(*quarter_female_gmv, sep="\t")
Example #25
0
def get_quarterly_data():
    sql = "SELECT user_num, price, category, status, quarter FROM jd WHERE b2c = 'N'"
    data = query(sql)

    for x in range(0, 16):
        outer_category = category_list[x]
        order_category_inner_list = [0] * QUARTER_NUM
        gmv_category_inner_list = [0.0] * QUARTER_NUM
        people_orderd = [set() for ii in range(0, QUARTER_NUM)]
        for row in data:
            user_num, price, category, quarter, status = row["user_num"].upper(),\
                float(row["price"]), row["category"], row["quarter"], row["status"]
            if quarter in quarter_list:
                quarter_index = quarter_list.index(quarter)
                if status != "没有下单":
                    people_orderd[quarter_index].add(user_num)
                    if outer_category == category:
                        order_category_inner_list[quarter_index] += 1
                        gmv_category_inner_list[quarter_index] += price
        # 打印结果
        # Net Order
        # print(*order_category_inner_list, sep="\t")
        # People who ordered
        # print("\t".join(map(lambda x: str(len(x)), people_orderd)))
        # Net GMV
        print(*gmv_category_inner_list, sep="\t")
def fetch_data(starting_date):
    sql = (
        "SELECT app_name, sector, sub_sector, mau, dau,"
        " time FROM questmobile_merge WHERE starting_date = '%s'") % (starting_date)
    data = query(sql)

    mau_dict, dau_dict, time_dict, sector_dict = {}, {}, {}, {}
    for row in data:
        # mau -> 单位是千, dau -> 单位是千, time -> 单位是千小时
        app_name, mau, dau, time, sector, sub_sector = row["app_name"], row["mau"], \
            row["dau"], row["time"], row["sector"], row["sub_sector"]

        # 计算DAU时,要求DAU>=10万
        # 计算MAU时,要求MAU>=30万
        # if mau >= 300:
        #     mau_dict[app_name] = mau
        # if dau >= 100:
        #     dau_dict[app_name] = dau
        # time_dict[app_name] = time
        # sector_dict[app_name] = sector + "-" + sub_sector
        if dau >= 100:
            dau_dict[app_name] = dau
            mau_dict[app_name] = mau
            time_dict[app_name] = time
            sector_dict[app_name] = sector + "-" + sub_sector
    return Wrapper(mau_dict, dau_dict, time_dict, sector_dict)
def merge(start, end):
    sql = (
        "SELECT app_name, sector, sub_sector, os, active_users, daily_active_user,"
        " total_use_time, starting_date FROM app WHERE source = 'QuestMobile' AND data_type='APP'"
        " AND starting_date >= '%s' AND starting_date <= '%s'") % (start, end)
    data = query(sql)

    # key - [app_name, sector, sub_sector, staring_date, mau, dau, time]
    data_dict = {}
    for row in data:
        app_name, sector, sub_sector, active_users, daily_active_user, total_use_time, \
            starting_date = row["app_name"], row["sector"], row["sub_sector"], \
            row["active_users"], row["daily_active_user"], row["total_use_time"], \
            row["starting_date"]
        key = app_name + str(starting_date)
        if key in data_dict:
            data_dict[key][4] += active_users
            data_dict[key][5] += daily_active_user
            data_dict[key][6] += total_use_time
        else:
            data_dict[key] = [
                app_name, sector, sub_sector, starting_date, active_users,
                daily_active_user, total_use_time]

    insert_sql = (
        "INSERT INTO questmobile_merge(app_name, sector, sub_sector, starting_date,"
        " mau, dau, time) VALUES(%s, %s, %s, %s, %s, %s, %s)")
    insert_batch(insert_sql, list(data_dict.values()))
def merge_pro_vc():
    query_sql = (
        "SELECT app_id, starting_date, os FROM app WHERE source='QuestMobile'")
    old_data = query(query_sql)

    key_set = set()
    for row in old_data:
        app_id, starting_date, os = row["app_id"], row["starting_date"], row["os"]
        key_set.add(app_id + "-" + os + "-" + str(starting_date))
    # print(key_set)

    insert_list = []
    with open(filename, "r", encoding="utf8") as a_file:
        for line in a_file:
            fields = line.split("\t")
            print(fields)
            key = fields[0] + "-" + fields[6] + "-" + fields[7]
            if key not in key_set:
                print(key)
                # Set undefined to 0
                fields = [x if x != "undefined" else 0 for x in fields]
                # active_users
                fields[8] = float(fields[8]) * 10.0
                # active_users_penetration
                fields[9] = str(fields[9])# + "%"
                # total_startup_counts
                fields[10] = float(fields[10]) * 10.0
                # total_startup_counts_share
                fields[11] = str(fields[11])# + "%"
                # total_use_time
                fields[13] = float(fields[13]) / 6.0
                # total_use_time_share
                fields[14] = str(fields[14])# + "%"
                # daily_active_user
                fields[16] = float(fields[16]) * 10.0
                # daily_active_user_penetration
                fields[17] = str(fields[17])# + "%"
                # daily_startup_counts
                fields[18] = float(fields[18]) * 10.0
                # daily_startup_counts_share
                fields[19] = str(fields[19])# + "%"
                # daily_use_time
                fields[21] = float(fields[21]) / 6.0
                # daily_use_time_share
                fields[22] = str(fields[22])# + "%"

                fields.extend([str(TODAY), "QuestMobile", "Mobile", "APP"])
                insert_list.append(fields)
    # print(insert_list)
    insert_sql = (
        "INSERT INTO app(app_id, app_name, company, app_name_en, sector, sub_sector,"
        " os, starting_date, active_users, active_users_penetration, total_startup_counts,"
        " total_startup_counts_share, per_capita_startup_counts, total_use_time, "
        " total_use_time_share, per_capita_use_time, daily_active_user, "
        " daily_active_user_penetration, daily_startup_counts, daily_startup_counts_share,"
        " per_capita_daily_startup_counts, daily_use_time,  daily_use_time_share, "
        " per_capita_daily_use_time, createddate, source, app_type, data_type) "
        " VALUES(%s, %s, %s, %s, %s, %s, %s,"
        " %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)")
Example #29
0
def estimate_entrytime(entrytime):
    sql = (
        "SELECT DISTINCT uid FROM 58_ayi WHERE entrytime<\'%s\' ORDER BY entrytime") % (entrytime)
    data = query(sql)

    for row in data:
        uid = row["uid"]
        sql2 = (
            "SELECT capturedate, servicecount FROM 58_ayi WHERE uid=\'%s\'"
            " ORDER BY entrytime LIMIT 1") % (uid,)
        data2 = query(sql2)

        for row in data2:
            capturedate, servicecount = row["capturedate"], row["servicecount"]
            estimate_entrytime = capturedate - timedelta(days=servicecount)
            print("********************", capturedate, servicecount, estimate_entrytime)
            update_entrytime(uid, estimate_entrytime)
def main(source, created_dt):
    sql = (
        "SELECT app_name, sector, sub_sector, type, growth, new_data, old_data,"
        " starting_date FROM top_growth_apps WHERE source= '%s' and created_dt <= '%s' "
        " ORDER BY starting_date, type, growth DESC, app_name"
    )
    print(sql)
    data = query(sql % (source, created_dt))

    date_set, app_set = set(), set()
    for row in data:
        date_set.add(row["starting_date"])
        app_set.add(row["app_name"])

    # initialize wrapper
    wrappers = []
    for app in app_set:
        wrappers.append(
            Wrapper(
                app,
                0,
                0,
                0,
                [""] * DATE_LEN,
                [""] * DATE_LEN,
                [""] * DATE_LEN,
                [""] * DATE_LEN,
                [""] * DATE_LEN,
                [""] * DATE_LEN,
            )
        )

    # calculate counts of DAU, MAU, Time
    for row in data:
        app_name = row["app_name"]
        type = row["type"]
        starting_date = str(row["starting_date"])
        growth = row["growth"]
        new_data = row["new_data"]
        for wrapper in wrappers:
            if wrapper.app_name == app_name:
                index = DATE_LIST.index(starting_date)
                if type == "DAU":
                    wrapper.dau_num += 1
                    wrapper.dau_growth[index] = growth
                    wrapper.dau[index] = new_data
                elif type == "MAU":
                    wrapper.mau_num += 1
                    wrapper.mau_growth[index] = growth
                    wrapper.mau[index] = new_data
                elif type == "Installation":
                    wrapper.time_num += 1
                    wrapper.time_growth[index] = growth
                    wrapper.time[index] = new_data
                break
    for wrapper in wrappers:
        print(wrapper)