def unify_entrytime(entrytime):
    '''
    Some entrytime are "2014-05-01", "0000-00-00" or "1970-01-01".
    Set all these entrytime to "2014-08-01"
    '''
    sql = (
        "SELECT DISTINCT uid FROM 58_ayi WHERE entrytime< '%s'"
        " ORDER BY entrytime LIMIT 5") % (entrytime)
    data = query(sql)

    update_sql = "UPDATE 58_ayi SET entrytime = %s WHERE uid = %s"
    update_list = []
    for row in data:
        uid = row["uid"]
        sql2 = ("SELECT entrytime FROM 58_ayi WHERE uid= %s ORDER BY entrytime") % (uid,)
        data2 = query(sql2)

        for row2 in data2:
            entrytime = row2["entrytime"]
            # 更新为该uid不为空且大于“2014-08-01”的entrytime
            if entrytime and str(entrytime) >= "2014-08-01":
                update_list.append([str(entrytime), uid])
                # update_entrytime(uid, str(entrytime))
                break
    print("Update list Size: " + str(len(update_list)))
    update_batch(update_sql, update_list)
Example #2
0
def update_register_dt():
    sql = ("SELECT driver_id, MIN(flag_dt) AS flag_dt FROM didi_result"
        " GROUP BY driver_id")
    data = query(sql)

    refer_dict = {}
    for row in data:
        refer_dict[row["driver_id"]] = row["flag_dt"]

    # 查找register_dt为空的记录
    sql_null = "SELECT DISTINCT driver_id FROM didi_result WHERE register_dt IS NULL"
    data = query(sql_null)
    update_list = []
    for row in data:
        driver_id = row["driver_id"]
        update_list.append([refer_dict[driver_id], driver_id])

    update_sql = ("UPDATE didi_result SET register_dt = %s"
        " WHERE driver_id = %s")
    update_batch(update_sql, update_list)
def yxp_update(fpath):
    # 把整个数据库表装载进内存
    sql = ("SELECT url, is_sellable, price, update_dt, sold_dt FROM youxinpai2")
    data = query(sql)

    db_dict = {}
    for row in data:
        url, is_sellable, price, update_dt, sold_dt = row["url"], row["is_sellable"], row["price"], \
            row["update_dt"], row["sold_dt"]
        db_dict[url] = [is_sellable, price, update_dt, sold_dt]

    # 遍历list文件更新内存中的数据
    for file_name in list_files(fpath):
        print(file_name)
        with open(file_name, "r", encoding="utf8") as a_file:
            for line in a_file:
                fields = line.split("\t")
                if len(fields) == 11:
                    url, price, is_sellable, capture_dt = fields[0], fields[8], fields[9], \
                        fields[10].split(" ")[0]
                    if url in db_dict:
                        # "未出售" --> "已出售"
                        if db_dict[url][0] != "已出售" and is_sellable == "已出售":
                            db_dict[url] = ["已出售", price, capture_dt, capture_dt]
                        # None or "未出售" --> None or "未出售"
                        elif is_sellable != "已出售":
                            db_dict[url][1] = price
                            db_dict[url][2] = capture_dt
    # 更新数据库
    update_sql = (
        "UPDATE youxinpai2 SET is_sellable = %s, price = %s, update_dt=%s, sold_dt=%s"
        " WHERE url = %s")
    update_list = []
    for key, value in db_dict.items():
        value.append(key)
        update_list.append(value)
    print(len(update_list))
    update_batch(update_sql, update_list)
    print(u"本次出售状态更新完毕")
def deduplicate_entrytime(start, end):
    '''
    Some cleaners have over one entrytime, that's abnormal.
    This method will update all entrytime to it's earliest one.
    '''
    sql = (
        "SELECT DISTINCT uid, entrytime FROM 58_ayi"
        " WHERE capturedate >= \'%s\' and capturedate <= \'%s\' ORDER BY entrytime") % (start, end,)
    data = query(sql)

    a_dict, a_list, b_list = {}, [], []
    for row in data:
        uid, entrytime = row["uid"], row["entrytime"]
        if uid not in a_dict:
            a_dict[uid] = entrytime
            a_list.append([entrytime, uid])
        else:
            b_list.append(uid)
    sql = "UPDATE 58_ayi SET entrytime =%s WHERE uid = %s"
    print(len(a_list))
    # print(a_list)
    update_batch(sql, a_list)