def unify_entrytime(entrytime): ''' Some entrytime are "2014-05-01", "0000-00-00" or "1970-01-01". Set all these entrytime to "2014-08-01" ''' sql = ( "SELECT DISTINCT uid FROM 58_ayi WHERE entrytime< '%s'" " ORDER BY entrytime LIMIT 5") % (entrytime) data = query(sql) update_sql = "UPDATE 58_ayi SET entrytime = %s WHERE uid = %s" update_list = [] for row in data: uid = row["uid"] sql2 = ("SELECT entrytime FROM 58_ayi WHERE uid= %s ORDER BY entrytime") % (uid,) data2 = query(sql2) for row2 in data2: entrytime = row2["entrytime"] # 更新为该uid不为空且大于“2014-08-01”的entrytime if entrytime and str(entrytime) >= "2014-08-01": update_list.append([str(entrytime), uid]) # update_entrytime(uid, str(entrytime)) break print("Update list Size: " + str(len(update_list))) update_batch(update_sql, update_list)
def update_register_dt(): sql = ("SELECT driver_id, MIN(flag_dt) AS flag_dt FROM didi_result" " GROUP BY driver_id") data = query(sql) refer_dict = {} for row in data: refer_dict[row["driver_id"]] = row["flag_dt"] # 查找register_dt为空的记录 sql_null = "SELECT DISTINCT driver_id FROM didi_result WHERE register_dt IS NULL" data = query(sql_null) update_list = [] for row in data: driver_id = row["driver_id"] update_list.append([refer_dict[driver_id], driver_id]) update_sql = ("UPDATE didi_result SET register_dt = %s" " WHERE driver_id = %s") update_batch(update_sql, update_list)
def yxp_update(fpath): # 把整个数据库表装载进内存 sql = ("SELECT url, is_sellable, price, update_dt, sold_dt FROM youxinpai2") data = query(sql) db_dict = {} for row in data: url, is_sellable, price, update_dt, sold_dt = row["url"], row["is_sellable"], row["price"], \ row["update_dt"], row["sold_dt"] db_dict[url] = [is_sellable, price, update_dt, sold_dt] # 遍历list文件更新内存中的数据 for file_name in list_files(fpath): print(file_name) with open(file_name, "r", encoding="utf8") as a_file: for line in a_file: fields = line.split("\t") if len(fields) == 11: url, price, is_sellable, capture_dt = fields[0], fields[8], fields[9], \ fields[10].split(" ")[0] if url in db_dict: # "未出售" --> "已出售" if db_dict[url][0] != "已出售" and is_sellable == "已出售": db_dict[url] = ["已出售", price, capture_dt, capture_dt] # None or "未出售" --> None or "未出售" elif is_sellable != "已出售": db_dict[url][1] = price db_dict[url][2] = capture_dt # 更新数据库 update_sql = ( "UPDATE youxinpai2 SET is_sellable = %s, price = %s, update_dt=%s, sold_dt=%s" " WHERE url = %s") update_list = [] for key, value in db_dict.items(): value.append(key) update_list.append(value) print(len(update_list)) update_batch(update_sql, update_list) print(u"本次出售状态更新完毕")
def deduplicate_entrytime(start, end): ''' Some cleaners have over one entrytime, that's abnormal. This method will update all entrytime to it's earliest one. ''' sql = ( "SELECT DISTINCT uid, entrytime FROM 58_ayi" " WHERE capturedate >= \'%s\' and capturedate <= \'%s\' ORDER BY entrytime") % (start, end,) data = query(sql) a_dict, a_list, b_list = {}, [], [] for row in data: uid, entrytime = row["uid"], row["entrytime"] if uid not in a_dict: a_dict[uid] = entrytime a_list.append([entrytime, uid]) else: b_list.append(uid) sql = "UPDATE 58_ayi SET entrytime =%s WHERE uid = %s" print(len(a_list)) # print(a_list) update_batch(sql, a_list)