def task(): name_dict = {} en_dict = {} query_sql = '''SELECT id, name, name_en FROM chat_shopping;''' conn = poi_ori_pool.connection() cursor = conn.cursor(cursor=DictCursor) cursor.execute(query_sql) for line in cursor.fetchall(): miaoji_id = line['id'] name = line['name'] name_en = line['name_en'] name_dict[name] = miaoji_id en_dict[get_similar_word(name_en)] = miaoji_id cursor.close() conn.close() query_sql = '''SELECT * FROM qyer_outlets_new WHERE city_id IS NOT NULL AND city_id != 'NULL';''' conn = poi_ori_pool.connection() cursor = conn.cursor(cursor=DictCursor) cursor.execute(query_sql) for line in cursor.fetchall(): name = line['name'] name_en = get_similar_word(line['name_en']) if (name in name_dict) and line['name'] != '': uid = name_dict.get(name, '') u_name, u_name_en, o_name, o_name_en = get_shop_info(uid) final_name = get_name( [(line['name'], 'qyer'), (line['name_en'], 'qyer'), (u_name, 'daodao'), (u_name_en, 'daodao'), (o_name, 'online'), (o_name_en, 'online')]) update_outlets(cid=line['city_id'], name=final_name, uid=uid) print(uid, final_name, line['city_id']) elif (name_en in en_dict) and line['name_en'] != '': uid = en_dict.get(name_en, '') u_name, u_name_en, o_name, o_name_en = get_shop_info(uid) final_name = get_name( [(line['name'], 'qyer'), (line['name_en'], 'qyer'), (u_name, 'daodao'), (u_name_en, 'daodao'), (o_name, 'online'), (o_name_en, 'online')]) update_outlets(cid=line['city_id'], name=final_name, uid=uid) print(uid, final_name, line['city_id']) else: continue cursor.close() conn.close()
def get_shop_info(uid): conn = poi_ori_pool.connection() cursor = conn.cursor() cursor.execute('''SELECT name, name_en FROM chat_shopping WHERE id=%s;''', (uid,)) _res_1 = cursor.fetchone() cursor.close() conn.close() conn = base_data_pool.connection() cursor = conn.cursor() _res = cursor.execute('''SELECT name, name_en FROM chat_shopping WHERE tag_id = 9 AND id=%s;''', (uid,)) if _res: _res_2 = cursor.fetchone() else: _res_2 = ('', '') cursor.close() conn.close() _res = list() _res.extend(_res_1) _res.extend(_res_2) return _res
def init_white_list(): _dict = {} conn = poi_ori_pool.connection() cursor = conn.cursor(cursor=DictCursor) query_sql = '''SELECT id, url FROM attr WHERE source = 'qyer';''' cursor.execute(query_sql) _count = 0 for line in cursor.fetchall(): _count += 1 if _count % 2000 == 0: logger.debug("[init dict][count: {}]".format(_count)) try: _url = line['url'] _url_id = re.findall('place.qyer.com/poi/([\s\S]+)/', _url)[0] _sid = line['id'] _dict[_url_id] = _sid except Exception as exc: logger.exception(msg="[init qyer id dict error]", exc_info=exc) cursor.close() conn.close() logger.debug("[init dict][count: {}]".format(_count)) return _dict
def get_id_map_info(table_name): conn = poi_ori_pool.connection() cursor = conn.cursor(cursor=DictCursor) cursor.execute("""SELECT attr_unid.id, attr_unid.source, attr_unid.source_id, attr_unid.map_info AS old_map_info, ServicePlatform.{0}.map_info AS new_map_info, ServicePlatform.{0}.address FROM attr_unid JOIN ServicePlatform.{0} ON attr_unid.source = ServicePlatform.{0}.source AND attr_unid.source_id = ServicePlatform.{0}.id WHERE ServicePlatform.{0}.id IN (SELECT DISTINCT sid FROM ServicePlatform.supplement_field WHERE SOURCE = 'daodao' AND TABLE_NAME = '{0}');""". format(table_name)) data = [] for line in cursor.fetchall(): data.append((line['new_map_info'], line['id'])) if len(data) % 1000 == 0: logger.debug("[now data][count: {}]".format(len(data))) logger.debug("[now data][count: {}]".format(len(data))) for d in chunks(data, 1000): update_db(d) cursor.close() conn.close()
def init_white_list_data(): global white_list global white_list_data _s_sid = [] for _each in white_list: _s_sid.extend(_each) if not _s_sid: return # get whole source data _t = time.time() conn = poi_ori_pool.connection() cursor = conn.cursor(cursor=DictCursor) sql = '''SELECT * FROM {} WHERE (source, id) IN ({});'''.format( poi_type, ','.join(map(lambda x: "('{}', '{}')".format(*x), _s_sid))) cursor.execute(sql) logger.debug('[query][sql: {}][takes: {}]'.format(sql, time.time() - _t)) for line in cursor.fetchall(): source = line['source'] source_id = line['id'] white_list_data[(source, source_id)] = line cursor.close() conn.close()
def update_sql(data): sql = '''DELETE FROM chat_attraction WHERE id=%s;''' conn = poi_ori_pool.connection() cursor = conn.cursor() _res = cursor.executemany(sql, data) cursor.close() conn.close() logger.info("[total: {}][execute: {}]".format(len(data), _res))
def update_already_merge_city(poi_type, cid): _conn = poi_ori_pool.connection() _cursor = _conn.cursor() res = _cursor.execute( '''REPLACE INTO already_merged_city VALUES (%s,%s)''', (poi_type, cid)) _conn.commit() _cursor.close() _conn.close() return res
def init_already_merged_city(poi_type): _conn = poi_ori_pool.connection() _cursor = _conn.cursor() _cursor.execute( '''SELECT * FROM already_merged_city WHERE type='{}';'''.format( poi_type)) res = list(map(lambda x: x[1], _cursor.fetchall())) _cursor.close() _conn.close() return res
def insert_white_list(data): conn = poi_ori_pool.connection() cursor = conn.cursor() _res = cursor.executemany( '''REPLACE INTO white_list (type, md5, info) VALUES (%s, %s, %s)''', data) conn.commit() logger.debug("[insert white list][total: {}][insert: {}]".format( len(data), _res)) cursor.close() conn.close()
def update(data): sql = '''UPDATE chat_attraction SET beentocount = %s, plantocount = %s WHERE id = %s;''' conn = poi_ori_pool.connection() cursor = conn.cursor() _res = cursor.executemany(sql, data) conn.commit() cursor.close() conn.close() print("[total: {}][execute: {}]".format(len(data), _res))
def insert_fake_uid(uid, sid, city_id): sql = '''INSERT INTO attr_unid_1220 (`id`, `source`, `source_id`, `city_id`) VALUES (%s, %s, %s, %s);''' conn = poi_ori_pool.connection() cursor = conn.cursor() try: _res = cursor.execute(sql, (uid, 'qyer', sid, city_id)) except Exception: print(sql) raise Exception() conn.commit() cursor.close() conn.close() logger.info("[total: {}][execute: {}]".format(1, _res))
def update_sql(data): sql = '''UPDATE chat_attraction SET beentocount = %s, plantocount = %s, commentcount = %s WHERE id = %s;''' conn = poi_ori_pool.connection() cursor = conn.cursor() try: _res = cursor.execute(sql, data) except Exception: print(sql) raise Exception() conn.commit() cursor.close() conn.close() logger.info("[total: {}][execute: {}]".format(1, _res))
def update_outlets(uid, name, cid): conn = spider_data_base_data_pool.connection() cursor = conn.cursor() cursor.execute('''UPDATE chat_shopping SET name = %s, tag_id = 9, city_id = %s WHERE id = %s;''', (name, cid, uid)) cursor.close() conn.close() conn = poi_ori_pool.connection() cursor = conn.cursor() cursor.execute('''UPDATE chat_shopping SET name = %s, norm_tagid = '奥特莱斯', norm_tagid_en = 'Outlet', city_id = %s WHERE id = %s;''', (name, cid, uid)) cursor.close() conn.close()
def insert_unknown_keywords(_type, _keyword_or_keywords): conn = poi_ori_pool.connection() cursor = conn.cursor() sql = '''INSERT IGNORE INTO unknown_keywords (`type`, `key_hash`, `keywords`) VALUES (%s, %s, %s);''' if isinstance(_keyword_or_keywords, str): _hash_key = encode(_keyword_or_keywords) cursor.execute(sql, (_type, _hash_key, _keyword_or_keywords)) elif isinstance(_keyword_or_keywords, (list, set, tuple)): for each_keyword in _keyword_or_keywords: _hash_key = encode(each_keyword) cursor.execute(sql, (_type, _hash_key, each_keyword)) else: logger.debug( "[unknown _keyword_or_keywords type: {}][_type: {}][_keyword_or_keywords: {}]" .format(type(_keyword_or_keywords), _type, _keyword_or_keywords)) conn.commit() cursor.close() conn.close()
def get_qyer_grade_ranking(): _dict = {} conn = poi_ori_pool.connection() cursor = conn.cursor() query_sql = '''SELECT attr_unid.id, attr.grade, attr.ranking FROM attr JOIN attr_unid ON attr_unid.source = attr.source AND attr_unid.source_id = attr.id WHERE attr.source = 'qyer';''' cursor.execute(query_sql) for line in cursor.fetchall(): _dict[line[0]] = (line[1] if line[1] else -1.0, line[2] if line[2] else -1) cursor.close() conn.close() return _dict
def get_id(): cross_dict = generate_cross_dict() _e_sid_set = set() _e_uid_set = set() sql = '''SELECT chat_attraction.id, chat_attraction.name, chat_attraction.name_en, attr_unid.source, attr_unid.source_id FROM chat_attraction, attr_unid WHERE beentocount = '{}' AND plantocount = '{}' AND data_source LIKE '%qyer%' AND attr_unid.source = 'qyer' AND chat_attraction.id = attr_unid.id;''' conn = poi_ori_pool.connection() cursor = conn.cursor() cursor.execute(sql) data = [] for uid, name, name_en, source, sid in cursor.fetchall(): _tmp_res = cross_dict.get(sid) if _tmp_res: new_beentocounts, new_plantocounts = _tmp_res if new_beentocounts == '{}' and new_plantocounts == '{}': pass else: _e_uid_set.add(uid) else: print('[error sid: {}]'.format(sid)) _e_sid_set.add(sid) continue data.append((new_beentocounts, new_plantocounts, uid)) if len(data) == 1000: update(data=data) data = [] if data: update(data=data) cursor.close() conn.close() print(_e_sid_set) print(_e_uid_set)
def filter_data_already_online(_type, _mioji_id, error): if _type == 'attr': table_name = 'chat_attraction' elif _type == 'rest': table_name = 'chat_restaurant' elif _type == 'shop': table_name = 'chat_shopping' else: raise TypeError("Unknown Type: {}".format(_type)) _status = None try: conn = base_data_pool.connection() cursor = conn.cursor() sql = '''SELECT status_test FROM {} WHERE id='{}';'''.format(table_name, _mioji_id) _res = cursor.execute(sql) if not _res: return False _status = cursor.fetchone()[0] logger.debug("[type: {}][status: {}]".format(_type, _status)) cursor.close() conn.close() except Exception as exc: logger.exception(msg="[get online poi status error]", exc_info=exc) if not _status: return False try: conn = poi_ori_pool.connection() cursor = conn.cursor() sql = '''INSERT IGNORE INTO filter_data_already_online (type, mioji_id, error, status) VALUES (%s, %s, %s, %s);''' cursor.execute(sql, (_type, _mioji_id, error, _status)) conn.commit() cursor.close() conn.close() except Exception as exc: logger.exception(msg="[insert filter data already online error]", exc_info=exc)
def update_each_tag_id(): tag_id = tag2id() conn = poi_ori_pool.connection() cursor = conn.cursor() cursor.execute('''SELECT id,norm_tagid FROM {};'''.format(task_table)) data = [] _count = 0 for _id, _tag_id in cursor.fetchall(): if is_legal(_tag_id): tag_id_set = set() for each in _tag_id.split('|'): tag_id_set.add(tag_id.get(each)) small_tag = ('|'.join(filter(lambda x: is_legal(x), tag_id_set))) big_tag = get_tag(small_tag) data.append((small_tag, big_tag, _id)) _count += 1 if len(data) % 1000 == 0: logger.debug("[mk data][poi_type: {}][len: {}]".format( poi_type, _count)) res = cursor.executemany( 'update base_data.{} set tag_id=%s, tagB=%s where id=%s'. format(task_table), data) data = [] logger.debug( "[update tag id][table_name: {}][update count: {}]".format( task_table, res)) res = cursor.executemany( 'update base_data.{} set tag_id=%s, tagB=%s where id=%s'.format( task_table), data) logger.debug("[update tag id][table_name: {}][update count: {}]".format( task_table, res)) logger.debug("[mk data finished][poi_type: {}][len: {}]".format( poi_type, _count)) conn.commit() cursor.close() conn.close()
def get_max_id(): global poi_type global online_table_name global data_source_table conn = base_data_pool.connection() cursor = conn.cursor() cursor.execute('''SELECT max(id) FROM {};'''.format(online_table_name)) _id_online = cursor.fetchone()[0] conn.close() conn = poi_ori_pool.connection() cursor = conn.cursor() cursor.execute('''SELECT max(id) FROM {}_unid;'''.format(poi_type)) _id_merged = cursor.fetchone()[0] conn.close() if _id_merged and _id_online: return max(_id_online, _id_merged) elif _id_merged: return _id_merged elif _id_online: return _id_online else: raise Exception("无法获取最大 id ,不能进行融合")
def init_white_list(): global white_list if white_list: return conn = poi_ori_pool.connection() cursor = conn.cursor(cursor=DictCursor) cursor.execute('''SELECT info FROM white_list WHERE type = %s;''', (poi_type, )) _d = set() for line in cursor.fetchall(): if not line: continue else: _data = json.loads(line['info']) if 'qyer' in _data: _d.add(('qyer', str(_data['qyer']))) if 'daodao' in _data: _d.add(('daodao', str(_data['daodao']))) white_list.append(_d) cursor.close() conn.close()
def get_poi_union_info(cid): conn = poi_ori_pool.connection() # 获取所有用于融合的城市 id logger.info("[get city task][cid: {}]".format(cid)) cursor = conn.cursor(cursor=DictCursor) city_poi = [] sql = '''SELECT id, city_id, group_concat(concat(source, '|', source_id) SEPARATOR '|_||_|') AS union_info FROM {}_unid WHERE city_id='{}' GROUP BY id'''.format(poi_type, cid) _t = time.time() cursor.execute(sql) logger.debug('[query][sql: {}][takes: {}]'.format(sql, time.time() - _t)) for line in cursor.fetchall(): miaoji_id = line['id'] city_id = line['city_id'] union_info = line['union_info'] city_poi.append((miaoji_id, city_id, union_info)) cursor.close() conn.close() return city_poi
def get_poi_dict(city_id): """ get all attraction info in the city :param city_id: city mioji id :return: city info dict """ global white_list_data _poi_dict = defaultdict(dict) _online_official_data = {} _online_nonofficial_data = {} # get whole data process conn = data_process_pool.connection() cursor = conn.cursor(cursor=DictCursor) sql = '''SELECT * FROM {} WHERE city_id='{}';'''.format(data_process_table_name, city_id) _t = time.time() cursor.execute(sql) logger.debug('[query][sql: {}][takes: {}]'.format(sql, time.time() - _t)) for each in cursor.fetchall(): if int(each["official"]) == 0: _online_nonofficial_data[each['id']] = each else: _online_official_data[each['id']] = each cursor.close() conn.close() # get whole base data, and update data process result conn = base_data_pool.connection() cursor = conn.cursor(cursor=DictCursor) sql = '''SELECT * FROM {} WHERE city_id='{}';'''.format(data_process_table_name, city_id) _t = time.time() cursor.execute(sql) logger.debug('[query][sql: {}][takes: {}]'.format(sql, time.time() - _t)) for each in cursor.fetchall(): each.pop('tag_id') if int(each["official"]) == 0: _online_nonofficial_data[each['id']].update(each) else: _online_official_data[each['id']].update(each) cursor.close() conn.close() # get whole source data conn = poi_ori_pool.connection() cursor = conn.cursor(cursor=DictCursor) sql = "select * from {} where city_id='{}'".format(poi_type, city_id) _t = time.time() cursor.execute(sql) logger.debug('[query][sql: {}][takes: {}]'.format(sql, time.time() - _t)) for line in cursor.fetchall(): source = line['source'] source_id = line['id'] _poi_dict[(source, source_id)] = line cursor.close() conn.close() for k, v in white_list_data.items(): _poi_dict[k] = v return _poi_dict, _online_official_data, _online_nonofficial_data
def insert_poi_unid(merged_dict, cid_or_geohash): global white_list global online_table_name global data_source_table start = time.time() # get city country name map_info _dev_conn = base_data_pool.connection() _dev_cursor = _dev_conn.cursor() _dev_cursor.execute('''SELECT city.id AS cid, city.name AS city_name, country.name AS country_name, city.map_info AS map_info FROM city JOIN country ON city.country_id = country.mid WHERE city.id = {};'''.format(cid_or_geohash)) cid, city_name, country, city_map_info = _dev_cursor.fetchone() _dev_cursor.close() _dev_conn.close() # 去除 total 的写法,费劲但是提升速度不明显,使用 total 后 5.9 秒获取巴黎全部信息,直接获取 6.9 秒,相差可以接受 # init id list # online_ids = set() # data_ids = set() # for _, s_sid_set in merged_dict.items(): # for source, sid in s_sid_set: # if source == 'online': # online_ids.add(sid) # else: # data_ids.add((source, sid)) # get data total # get online data name name_en map_info grade star ranking address url total_data = {} _dev_conn = base_data_pool.connection() _dev_cursor = _dev_conn.cursor() try: _t = time.time() sql = '''SELECT id, name, name_en, map_info, grade, -1, ranking, address, '' FROM {} WHERE city_id='{}';'''.format(online_table_name, cid_or_geohash) _dev_cursor.execute(sql) logger.debug('[query][sql: {}][takes: {}]'.format( sql, time.time() - _t)) except Exception as exc: logger.exception("[sql exc][sql: {}]".format(sql), exc_info=exc) for line in _dev_cursor.fetchall(): total_data[('online', line[0])] = line[1:] _dev_cursor.close() _dev_conn.close() # get poi name name_en map_info grade star ranking address url _data_conn = poi_ori_pool.connection() _data_cursor = _data_conn.cursor() try: _t = time.time() sql = '''SELECT source, id, name, name_en, map_info, grade, star, ranking, address, url FROM {} WHERE city_id='{}';'''.format(data_source_table, cid_or_geohash) _data_cursor.execute(sql) logger.debug('[query][sql: {}][takes: {}]'.format( sql, time.time() - _t)) except Exception as exc: logger.exception("[sql exc][sql: {}]".format(sql), exc_info=exc) for line in _data_cursor.fetchall(): total_data[(line[0], line[1])] = line[2:] _data_cursor.close() _data_conn.close() # init white list total data if white_list: _s_sid = [] for _each in white_list: _s_sid.extend(_each) _ori_conn = poi_ori_pool.connection() _ori_cursor = _ori_conn.cursor() try: _t = time.time() query_sql = '''SELECT source, id, name, name_en, map_info, grade, star, ranking, address, url FROM {} WHERE (source, id) IN ({});'''.format( data_source_table, ','.join(map(lambda x: "('{}', '{}')".format(*x), _s_sid))) _ori_cursor.execute(query_sql) logger.debug('[query][sql: {}][takes: {}]'.format( sql, time.time() - _t)) except Exception as exc: logger.exception("[sql exc][sql: {}]".format(sql), exc_info=exc) for line in _ori_cursor.fetchall(): total_data[(line[0], line[1])] = line[2:] _ori_cursor.close() _ori_conn.close() data = [] for uid, s_sid_set in merged_dict.items(): for source, sid in s_sid_set: # name name_en map_info grade star ranking address url name, name_en, map_info, grade, star, ranking, address, url = total_data[ (source, sid)] if not is_legal(name): name = '' if not is_legal(name_en): name_en = '' if not is_legal(grade): grade = -1.0 if not is_legal(star): star = -1.0 if not is_legal(ranking): ranking = -1.0 if not is_legal(address): address = '' if not is_legal(url): url = '' data.append( (uid, cid, city_name, country, city_map_info, source, sid, name, name_en, map_info, grade, star, ranking, address, url)) # data = [] # _dev_conn = base_data_pool.connection() # _dev_cursor = _dev_conn.cursor() # _data_conn = poi_ori_pool.connection() # _data_cursor = _data_conn.cursor() # for uid, s_sid_set in merged_dict.items(): # for source, sid in s_sid_set: # if source == 'online': # _dev_cursor.execute('''SELECT # name, # name_en, # map_info, # grade, # -1, # ranking, # address, # '' # FROM chat_attraction # WHERE id = '{}';'''.format(sid)) # try: # name, name_en, map_info, grade, star, ranking, address, url = _dev_cursor.fetchone() # except Exception as exc: # logger.exception("[error sql query][source: {}][sid: {}]".format(source, sid), exc_info=exc) # continue # else: # _data_cursor.execute('''SELECT # CASE WHEN name NOT IN ('NULL', '', NULL) # THEN name # ELSE '' END, # CASE WHEN name_en NOT IN ('NULL', '', NULL) # THEN name_en # ELSE '' END, # map_info, # CASE WHEN grade NOT IN ('NULL', '', NULL) # THEN grade # ELSE -1.0 END AS grade, # CASE WHEN star NOT IN ('NULL', '', NULL) # THEN star # ELSE -1.0 END AS star, # CASE WHEN ranking NOT IN ('NULL', '', NULL) # THEN ranking # ELSE -1.0 END AS ranking, # CASE WHEN address NOT IN ('NULL', '', NULL) # THEN address # ELSE '' END, # CASE WHEN url NOT IN ('null', '', NULL) # THEN url # ELSE '' END # FROM attr # WHERE source = '{}' AND id = '{}';'''.format(source, sid)) # try: # name, name_en, map_info, grade, star, ranking, address, url = _data_cursor.fetchone() # except Exception as exc: # logger.exception("[error sql query][source: {}][sid: {}]".format(source, sid), exc_info=exc) # continue # # data.append((uid, cid, city_name, country, city_map_info, source, sid, name, name_en, map_info, grade, # star, ranking, address, url)) # _dev_cursor.close() # _data_cursor.close() # _dev_conn.close() # _data_conn.close() _final_conn = poi_ori_pool.connection() _final_cursor = _final_conn.cursor() # for d in data: try: _t = time.time() sql = '''REPLACE INTO {}_unid (id, city_id, city_name, country_name, city_map_info, source, source_id, name, name_en, map_info, grade, star, ranking, address, url) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);'''.format( poi_type) _final_cursor.executemany(sql, data) logger.debug('[query][sql: {}][takes: {}]'.format( sql, time.time() - _t)) except Exception as exc: logger.exception("[insert unid table error]", exc_info=exc) _final_conn.commit() _final_cursor.close() _final_conn.close() logger.info( "[finish prepare data][city: {}][line_count: {}][takes: {}]".format( cid_or_geohash, len(data), time.time() - start))
def poi_merged_report(poi_type): init_global_name(poi_type) conn = poi_ori_pool.connection() cursor = conn.cursor(cursor=DictCursor) get_column_sql = "SELECT column_name FROM information_schema.columns WHERE table_schema = DATABASE() AND table_name='{}'".format( table_name) cursor.execute(get_column_sql) datas = cursor.fetchall() column_list = [] filter_info = [] error_data_id = [] for data in datas: column_list.append(data['column_name']) report_data = pandas.DataFrame(columns=[ 'ID', '城市名', '国家', '城市 grade', '开发库中{}个数'.format(poi_name), 'test 上线{}个数'.format(poi_name), '上线{}占比'.format( poi_name), '上线{}有图占比'.format(poi_name), '开发库中 daodao {}个数'.format( poi_name), '上线 daodao 个数', '上线 daodao {}占比'.format(poi_name) ]) report_count = 0 for cid, city_name, city_name_en, country, grade in init_city_dict(): get_sql = "select * from {} where city_id = '{}'".format( table_name, cid) cursor.execute(get_sql) datas = cursor.fetchall() name_null_fail_count = 0 city_null_fail_count = 0 first_img_null_fail_count = 0 norm_tag_null_fail_count = 0 map_fail_count = 0 img_succeed_count = 0 daodao_count = 0 daodao_succeed_count = 0 success_count = 0 for data in datas: # daodao 统计部分 if 'daodao' in data['data_source']: daodao_count += 1 # 数据检查部分 # name if data['name'].lower() in ( '', 'null', '0') and data['name_en'] in ('', 'null', '0'): name_null_fail_count += 1 error_data_id.append(data['id']) continue # city if data['city_id'] in ('', 'null', '0'): city_null_fail_count += 1 error_data_id.append(data['id']) continue # map_info try: lat = float(data['map_info'].strip().split(',')[0]) lgt = float(data['map_info'].strip().split(',')[1]) except Exception: map_fail_count += 1 error_data_id.append(data['id']) continue if 'daodao' in data['data_source']: daodao_succeed_count += 1 success_count += 1 if success_count != len(datas): filter_info.append((cid, city_name, country)) report_data.loc[report_count] = [ cid, city_name, country, grade, len(datas), success_count, "{0:04f}%".format(100 * success_count / float(len(datas))) if len(datas) != 0 else '无穷大', "{0:04f}%".format(100 * img_succeed_count / float(success_count)) if success_count != 0 else '无穷大', daodao_count, daodao_succeed_count, "{0:04f}%".format(100 * daodao_succeed_count / float(daodao_count)) if daodao_count != 0 else '无穷大' ] report_count += 1 logger.debug(' '.join([cid, city_name, city_name_en, country, grade]) + '\tall:' + str(len(datas)) + "\tget:" + str(success_count) + '\tname_null:' + str(name_null_fail_count) + '\tcity_null:' + str(city_null_fail_count) + '\timg_null:' + str(first_img_null_fail_count) + '\tmap:' + str(map_fail_count) + '\tnorm_tag:' + str(norm_tag_null_fail_count)) for each in filter_info: logger.debug("[data_filter: {}]".format(each)) logger.debug("[error data][id: {}]".format(error_data_id)) pandas.DataFrame(report_data).to_excel(datetime.datetime.now().strftime( '{}_report_%Y_%m_%d.xlsx'.format(poi_name))) pandas.DataFrame(report_data).to_csv(datetime.datetime.now().strftime( '{}_report_%Y_%m_%d.csv'.format(poi_name)))
def poi_insert_data(cid, _poi_type): init_global_name(_poi_type) ''' 数据最终入库表 if _poi_type == 'attr': sql = 'replace into chat_attraction(`id`,`name`,`name_en`,`data_source`,`city_id`,' \ '`map_info`,`address`,`star`,`plantocount`,`beentocount`,`real_ranking`,' \ '`grade`,`commentcount`,`tagid`,`norm_tagid`,`norm_tagid_en`,`url`,`website_url`,`phone`,`introduction`,' \ '`open`, `open_desc`,`recommend_lv`,`prize`,`traveler_choice`, `alias`, ' \ '`image`, `ori_grade`,`nearCity`, `ranking`,`rcmd_open`,`add_info`,`address_en`,`event_mark`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,' \ '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,-1,"","","","")' elif _poi_type == 'rest': sql = 'replace into chat_restaurant(`id`,`name`,`name_en`,' \ '`source`,`city_id`,`map_info`,`address`,`real_ranking`,' \ '`grade`,`res_url`,`telphone`,`introduction`,`open_time`,`open_time_desc`,`prize`,' \ '`traveler_choice`,`review_num`,`price`,`price_level`,`cuisines`, ' \ '`image_urls`,`tagid`,`norm_tagid`,`norm_tagid_en`,`nearCity`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,' \ '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' elif _poi_type == 'shop': sql = 'replace into ' \ 'chat_shopping(`id`,`name`,`name_en`,`data_source`,`city_id`,' \ '`map_info`,`address`,`star`,`plantocount`,`beentocount`,' \ '`real_ranking`,`grade`,`commentcount`,`tagid`,`norm_tagid`,`norm_tagid_en`,`url`,`website_url`,' \ '`phone`,`introduction`,`open`,`open_desc`,`recommend_lv`,`prize`,' \ '`traveler_choice`,`image`,`nearCity`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,' \ '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' else: raise TypeError("Unknown Type: {}".format(poi_type)) ''' conn = poi_ori_pool.connection() # for task_dict in get_task(cid): count = 0 data = [] # 获取融合需要的 poi 信息 _info_dict, _online_official_data, _online_nonofficial_data = get_poi_dict( cid) _city_poi = get_poi_union_info(cid) # 开始数据融合 for miaoji_id, city_id, union_info in _city_poi: # 初始化融合前变量 data_dict = defaultdict(dict) # 有从其他数据源来的数据 other_source = False # 用于判定是否有线上 official 以及 nonofficial 的数据 has_official = False has_nonofficial = False # 获取线上环境数据 o_official_data = _online_official_data.get(miaoji_id, None) o_nonofficial_data = _online_nonofficial_data.get(miaoji_id, None) # 更新 official 判定 if o_official_data is not None: has_official = True if o_nonofficial_data is not None: has_nonofficial = True # 初始化融合信息 for each_name in (json_name_list + norm_name_list + others_name_list): data_dict[each_name] = {} def get_data(src_dict, is_official=False): if each_name in online2source: source_name = online2source[each_name] else: source_name = each_name if each_name in json_name_list: if source_name in src_dict: try: _res = json.loads(src_dict[source_name]) if isinstance(_res, dict): data_dict[each_name] = { k: v for k, v in _res.items() if k in available_source } else: pass except Exception: pass else: data_dict[each_name]['mioji_official' if is_official else 'mioji_nonofficial'] = src_dict.get( source_name, {}) if o_official_data is not None: get_data(o_official_data, is_official=True) if o_nonofficial_data is not None: get_data(o_nonofficial_data, is_official=False) # 遍历所有需要融合的 source 以及 id,并生成 dict 类融合内容 for s_sid in union_info.split('|_||_|'): source, source_id = s_sid.split('|') # todo 增加 online 的处理,先 load data,然后进行数据更新 # todo 使用 online 的 base data 更新 data process 的字段 # 未获得融合 id 信息 if not source_id or not source: continue # 过滤不必要的 source if source not in available_source: logger.debug("[not available source: {}]".format(source)) continue # 未获得融合数据 poi_info = _info_dict[(source, source_id)] if poi_info == {}: continue other_source = True # 加 key for each_name in (json_name_list + norm_name_list + others_name_list): if is_legal(poi_info[each_name]): if isinstance(poi_info[each_name], str): data_dict[each_name][source] = tradition2simple( poi_info[each_name]).decode() else: data_dict[each_name][source] = poi_info[each_name] # 补空白的内容 for each_name in (json_name_list + norm_name_list + others_name_list): if each_name not in data_dict: data_dict[each_name] = {} # 不能融合的内容包含两种 if not o_official_data and not o_nonofficial_data and not other_source: if 'online' in union_info: filter_data_already_online(poi_type, miaoji_id, "没有可供融合的数据") logger.debug('[union_info: {}]'.format(union_info)) continue new_data_dict = {} # 通过优先级获取 !中文 ! def get_name_by_priority(): # 按照标准优先级更新字段信息 name_tmp = get_key.get_key_by_priority_or_default( data_dict['name'], norm_name, '', special_filter=check_chinese) # 从英文字段中找中文 if not name_tmp: name_tmp = get_key.get_key_by_priority_or_default( data_dict['name_en'], norm_name, '', special_filter=check_chinese) # 从英文字段中找拉丁 if not name_tmp: name_tmp = get_key.get_key_by_priority_or_default( data_dict['name_en'], norm_name, '', special_filter=check_latin) # 从中文字段中找拉丁 if not name_tmp: name_tmp = get_key.get_key_by_priority_or_default( data_dict['name'], norm_name, '', special_filter=check_latin) return name_tmp # 通过优先级获取 !拉丁字符 ! def get_name_en_by_priority(): # 从融合数据的英文字段中获取 name_en_tmp = get_key.get_key_by_priority_or_default( data_dict['name_en'], norm_name, '', special_filter=check_latin) if not name_en_tmp: get_key.get_key_by_priority_or_default( data_dict['name'], norm_name, '', special_filter=check_latin) return name_en_tmp for norm_name in norm_name_list: # 所有字段处理的过程中,对 name / name_en 进行特殊处理 if norm_name == 'name': if has_official: # official = 1 的点,不更新 name new_data_dict['name'] = data_dict['name']['mioji_official'] elif has_nonofficial: # official = 0 的点,name 已为中文的点不更新 name if any([ toolbox.Common.is_chinese(c) for c in data_dict['name']['mioji_nonofficial'] ]): new_data_dict['name'] = data_dict['name'][ 'mioji_nonofficial'] else: new_data_dict['name'] = get_name_by_priority() else: # 按照标准优先级更新字段信息 new_data_dict['name'] = get_name_by_priority() elif norm_name == 'name_en': # official 1 不更新英文名,否则按优先级更新英文名 if has_official: new_data_dict['name_en'] = data_dict['name_en'][ 'mioji_official'] else: new_data_dict['name_en'] = get_name_en_by_priority() else: new_data_dict[ norm_name] = get_key.get_key_by_priority_or_default( data_dict[norm_name], norm_name, '') # daodao url 处理 if 'daodao' in data_dict['url']: data_dict['url']['daodao'] = data_dict['url']['daodao'].replace( 'www.tripadvisor.com.hk', 'www.tripadvisor.cn') # 餐厅使用 cuisines 添加 tagid if poi_type == 'rest': data_dict['tagid'] = copy.deepcopy(data_dict['cuisines']) new_data_dict['tagid'] = json.dumps({ k: v for k, v in data_dict['tagid'].items() if k in final_source }) for json_name in json_name_list: new_data_dict[json_name] = json.dumps({ k: v for k, v in data_dict[json_name].items() if k in final_source }) new_data_dict['phone'] = new_data_dict['phone'].replace('电话号码:', '').strip() # 数据操作部分 # ori_grade modify tmp_ori_grade = {} if has_official: try: tmp_ori_grade.update(json.loads(o_official_data['ori_grade'])) except Exception as exc: logger.exception(msg="[load ori grade error]", exc_info=exc) if has_nonofficial: try: tmp_ori_grade.update( json.loads(o_nonofficial_data['ori_grade'])) except Exception as exc: logger.exception(msg="[load ori grade error]", exc_info=exc) tmp_ori_grade.update({k: v for k, v in data_dict['grade'].items()}) new_data_dict['ori_grade'] = json.dumps( {k: v for k, v in tmp_ori_grade.items() if k in final_source}) # 添加 source source = '|'.join( map(lambda x: x.split('|')[0], union_info.split('|_||_|'))) # add alias alias = '|'.join( filter( lambda x: x != new_data_dict['name'] and x != new_data_dict[ 'name_en'], set( list(data_dict['name'].values()) + list(data_dict['name_en'].values())))) # add open time final_open_time_desc = get_key.get_key_by_priority_or_default( data_dict['opentime'], 'opentime', special_filter=add_open_time_filter) if final_open_time_desc: norm_open_time = fix_daodao_open_time(final_open_time_desc) else: norm_open_time = '' # add norm tag # todo change make qyer and other can be used unknown_tag = set() if 'daodao' in data_dict['tagid']: try: daodao_tagid, daodao_tagid_en, _unknown_tag = get_norm_tag( data_dict['tagid']['daodao'], poi_type) unknown_tag.update(_unknown_tag) except Exception: daodao_tagid, daodao_tagid_en = '', '' else: daodao_tagid, daodao_tagid_en = '', '' # # rest tag # if 'daodao' in data_dict['tagid']: # try: # daodao_rest_tagid, daodao_rest_tagid_en, _ = get_norm_tag(data_dict['tagid']['daodao'], # 'rest') # except Exception: # daodao_rest_tagid, daodao_rest_tagid_en = '', '' # else: # daodao_rest_tagid, daodao_rest_tagid_en = '', '' # shop tag if 'daodao' in data_dict['tagid']: try: daodao_shop_tagid, daodao_shop_tagid_en, _ = get_norm_tag( data_dict['tagid']['daodao'], 'shop') except Exception: daodao_shop_tagid, daodao_shop_tagid_en = '', '' else: daodao_shop_tagid, daodao_shop_tagid_en = '', '' if 'qyer' in data_dict['tagid']: try: qyer_tagid, qyer_tagid_en, _unknown_tag = get_norm_tag( data_dict['tagid']['qyer'], poi_type) unknown_tag.update(_unknown_tag) except Exception: qyer_tagid, qyer_tagid_en = '', '' else: qyer_tagid, qyer_tagid_en = '', '' # # rest tag # if 'qyer' in data_dict['tagid']: # try: # qyer_rest_tagid, qyer_rest_tagid_en, _ = get_norm_tag(data_dict['tagid']['qyer'], 'rest') # except Exception: # qyer_rest_tagid, qyer_rest_tagid_en = '', '' # else: # qyer_rest_tagid, qyer_rest_tagid_en = '', '' # shop tag if 'qyer' in data_dict['tagid']: try: qyer_shop_tagid, qyer_shop_tagid_en, _ = get_norm_tag( data_dict['tagid']['qyer'], 'shop') except Exception: qyer_shop_tagid, qyer_shop_tagid_en = '', '' else: qyer_shop_tagid, qyer_shop_tagid_en = '', '' l_norm_tag = [] l_norm_tag_en = [] l_norm_tag.extend(daodao_tagid.split('|')) l_norm_tag_en.extend(daodao_tagid_en.split('|')) l_norm_tag.extend(qyer_tagid.split('|')) l_norm_tag_en.extend(qyer_tagid_en.split('|')) l_other_norm_tag = [] l_other_norm_tag.extend(daodao_shop_tagid.split('|')) l_other_norm_tag.extend(qyer_shop_tagid.split('|')) # 去除空 tag 以及重复 tag norm_tag = '|'.join(filter(lambda x: is_legal(x), set(l_norm_tag))) norm_tag_en = '|'.join( filter(lambda x: is_legal(x), set(l_norm_tag_en))) other_tag = '|'.join( filter(lambda x: is_legal(x), set(l_other_norm_tag))) # 数据入库部分 # 替换旧的 data_dict data_dict = new_data_dict # 过滤名称 if data_dict['name'].lower() in ( '', 'null', '0') and data_dict['name_en'].lower() in ('', 'null', '0'): if 'online' in union_info: filter_data_already_online(poi_type, miaoji_id, "中英文名为空") logger.debug("[filter by name][name: {}][name_en: {}]".format( data_dict['name'], data_dict['name_en'])) continue if '停业' in data_dict['name'] or '停业' in data_dict['name_en']: if 'online' in union_info: filter_data_already_online(poi_type, miaoji_id, "停业 POI") logger.debug( "[filter by name with close business][name: {}][name_en: {}]". format(data_dict['name'], data_dict['name_en'])) continue # 这个逻辑太蠢了,去除,23333333 # # name name_en 判断 # if data_dict['name'] != data_dict['name_en']: # if data_dict['name_en'] in data_dict['name']: # data_dict['name'] = data_dict['name'].replace(data_dict['name_en'], '') # phone 处理 if data_dict['phone'] in ('+ 新增電話號碼', '+ 新增电话号码'): data_dict['phone'] = '' # 餐厅的 price_level 单独处理 if poi_type == 'rest': data_dict['price_level'] = W2N.get( data_dict.get('price_level', ''), '0') # 添加 nearCity 字段 nearby_city = get_nearby_city(poi_city_id=city_id, poi_map_info=data_dict['map_info']) # 数据清理以及入库部分 # 全量经纬度不符合规范数据清理 try: lng, lat = data_dict['map_info'].split(',') lng = float(lng) lat = float(lat) data_dict['map_info'] = '{},{}'.format(lng, lat) except Exception as exc: logger.exception(msg="[map_info filter error][data: {}]".format( data_dict['map_info']), exc_info=exc) continue # 清理名称中的多余字符 data_dict['name'] = data_dict['name'].replace('这是您的企业吗?', '').strip() if data_dict['name_en'] in data_dict[ 'name'] and data_dict['name_en'] != data_dict['name']: data_dict['name'].replace(data_dict['name_en'], '') # 字段修改部分 # address if data_dict['address'].lower() in ('null', '0'): data_dict['address'] = '' # open time if norm_open_time.lower() in ('', 'null', '0'): if poi_type in ('attr', 'rest'): norm_open_time = '<*><*><00:00-23:55><SURE>' else: norm_open_time = '<*><*><08:00-20:00><SURE>' # 保存不能识别的 tag 以及 open time 信息 if unknown_tag: insert_unknown_keywords('{}_tag'.format(poi_type), unknown_tag) logger.debug("[unknown tag][tags: {}]".format(unknown_tag)) # 距离过远过滤 poi result = poi_is_too_far(city_id, poi_map_info=data_dict['map_info']) if not result: if 'online' in union_info: filter_data_already_online(poi_type, miaoji_id, "距城市中心距离过远") logger.debug( "[poi filter by poi city distance][cid: {}][city_map: {}][poi_map_info: {}][distance: {}]" .format(city_id, result.city_map, data_dict['map_info'], result.dist)) continue # 大于 55 长度的电话置空 if len(data_dict['phone']) > 55: logger.debug( "[phone length too long][poi_id: {}][len: {}][phone: {}]". format(miaoji_id, len(data_dict['phone']), data_dict['phone'])) data_dict['phone'] = '' if poi_type == 'attr': per_data = { 'id': miaoji_id, 'name': data_dict['name'], 'name_en': data_dict['name_en'], 'data_source': source, 'city_id': city_id, 'map_info': data_dict['map_info'], 'address': data_dict['address'], 'star': data_dict['star'], 'plantocount': data_dict['plantocounts'], 'beentocount': data_dict['beentocounts'], 'real_ranking': data_dict['ranking'], # 'grade': data_dict['grade'], 'commentcount': data_dict['commentcounts'], 'tagid': data_dict['tagid'], 'norm_tagid': norm_tag, 'norm_tagid_en': norm_tag_en, 'website_url': data_dict['site'], 'phone': data_dict['phone'], 'open': norm_open_time, 'open_desc': data_dict['opentime'], 'recommend_lv': data_dict['recommend_lv'], 'prize': data_dict['prize'], 'traveler_choice': data_dict['traveler_choice'], 'alias': alias, 'image': data_dict['imgurl'], 'ori_grade': data_dict['ori_grade'], 'nearCity': nearby_city } # official 为 1 时,不更新的字段 # nonofficial 以及 新增的数据时进行更新 if not has_official: per_data.update({ 'introduction': data_dict['introduction'], 'url': data_dict['url'], }) if not has_official and not has_nonofficial: per_data.update({ # 明确更新逻辑,当之前没有融合时才会更新状态 'ranking': -1.0, 'rcmd_open': '', 'add_info': '', 'address_en': '', 'event_mark': '', 'grade': -1.0, # 明确更新逻辑,当之前没有融合时才会更新状态 'status_online': 'Open', 'status_test': 'Open' }) # 景点游览部分清理 try: tagid_data = json.loads(data_dict['tagid']) if 'daodao' in tagid_data: if is_legal(tagid_data['daodao']): if '游览' in tagid_data['daodao']: if 'online' in union_info: # 这种内容本来是要被删除的,但是由于 online 环境的某些购物还必须要更新,所以只能保留 # 并参加融合,这种内容算是特殊处理吧,对于新增的,一定不能加入游览 # filter_data_already_online(poi_type, miaoji_id, "tag 中包含游览被过滤") pass else: logger.debug("[tour filter][data: {}]".format( tagid_data['daodao'])) continue except Exception as exc: logger.exception(msg="[tour filter error]", exc_info=exc) if norm_tag == '' and other_tag != '': # 景点中包含购物被清除 if 'online' in union_info: # 这种内容本来是要被删除的,但是由于 online 环境的某些购物还必须要更新,所以只能保留 # 并参加融合,这种内容算是特殊处理吧,对于新增的,一定不能添加购物进入 # filter_data_already_online(poi_type, miaoji_id, "景点类中存在购物数据被过滤") pass else: continue data.append(per_data) elif poi_type == 'rest': data.append( (miaoji_id, data_dict['name'], data_dict['name_en'], source, key, data_dict['map_info'], data_dict['address'], data_dict['ranking'], data_dict['grade'], data_dict['url'], data_dict['phone'], data_dict['introduction'], norm_open_time, data_dict['opentime'], data_dict['prize'], data_dict['traveler_choice'], data_dict['commentcounts'], data_dict['price'], data_dict['price_level'], data_dict['cuisines'], data_dict['imgurl'], data_dict['tagid'], norm_tag, norm_tag_en, nearby_city)) elif poi_type == 'shop': per_data = { 'id': miaoji_id, 'name': data_dict['name'], 'name_en': data_dict['name_en'], 'data_source': source, 'city_id': city_id, 'map_info': data_dict['map_info'], 'address': data_dict['address'], 'star': data_dict['star'], 'plantocount': data_dict['plantocounts'], 'beentocount': data_dict['beentocounts'], 'real_ranking': data_dict['ranking'], # 'grade': data_dict['grade'], 'commentcount': data_dict['commentcounts'], 'tagid': data_dict['tagid'], 'norm_tagid': norm_tag, 'norm_tagid_en': norm_tag_en, 'website_url': data_dict['site'], 'phone': data_dict['phone'], 'open': norm_open_time, 'open_desc': data_dict['opentime'], 'recommend_lv': data_dict['recommend_lv'], 'prize': data_dict['prize'], 'traveler_choice': data_dict['traveler_choice'], 'image': data_dict['imgurl'], 'nearCity': nearby_city } # official 为 1 时,不更新的字段 # nonofficial 以及 新增的数据时进行更新 if not has_official: per_data.update({ 'introduction': data_dict['introduction'], 'url': data_dict['url'], }) if not has_official and not has_nonofficial: per_data.update({ # 需要增加默认值才能入库 'ranking': -1.0, 'rcmd_open': '', 'image_list': '', 'grade': -1.0, # 明确更新逻辑,当之前没有融合时才会更新状态 'status_online': 'Open', 'status_test': 'Open' }) shopping_tag = [ '礼品与特产商店', '大型购物中心', '农贸市场', '跳蚤市场与街边市场', '古董店', '百货商场', '厂家直营店', '购物' ] important_shopping_tag = [ '礼品与特产商店', '大型购物中心', '百货商场', '厂家直营店', '购物' ] # 购物数据过滤,通过 tag 过滤 tag_list = norm_tag.split('|') if not all([tag.strip() in shopping_tag for tag in tag_list]): if not any([ tag.strip() in important_shopping_tag for tag in tag_list ]): if 'online' in union_info: filter_data_already_online(poi_type, miaoji_id, "非购物数据被过滤") continue data.append(per_data) else: raise TypeError("Unknown Type: {}".format(poi_type)) if count % 300 == 0: db = dataset.connect( "mysql+pymysql://mioji_admin:[email protected]/poi_merge?charset=utf8" ) table = db[data_process_table_name] _insert = 0 logger.debug("Total: {}".format(count)) _t = time.time() for d in data: _res = table.upsert(d, keys=['id']) if _res: _insert += 1 logger.debug( '[data upsert][count: {}][insert: {}][takes: {}]'.format( count, _insert, time.time() - _t)) logger.debug("[city_id: {}][insert_count_this_times: {}]".format( cid, _insert)) db.commit() data = [] count += 1 logger.debug("[city_id: {}][total: {}]".format(cid, count)) _insert = 0 db = dataset.connect( "mysql+pymysql://mioji_admin:[email protected]/poi_merge?charset=utf8" ) table = db[data_process_table_name] for d in data: _res = table.upsert(d, keys=['id']) if _res: _insert += 1 logger.debug("Insert: {}".format(_insert)) db.commit() logger.debug("Insert: {}".format(_insert)) conn.close() update_already_merge_city("{}_data".format(poi_type), cid)
def mk_base_data_final(_poi_type): if _poi_type == 'attr': table_name = 'chat_attraction' elif _poi_type == 'rest': table_name = 'chat_restaurant' elif _poi_type == 'shop': table_name = 'chat_shopping' else: raise TypeError("Unknown Type: {}".format(_poi_type)) conn = poi_ori_pool.connection() cursor = conn.cursor() if _poi_type == 'attr': cursor.execute('''TRUNCATE base_data.{};'''.format(table_name)) conn.commit() sql = '''REPLACE INTO base_data.{0} SELECT id, name, alias, name_en, name_en_alias, map_info, city_id, grade, ranking, '', address, introduction, open, ticket, intensity, rcmd_intensity, first_image, image_list, level, hot_level, url, tagB, disable, utime, nearCity, official, status_online, status_test FROM {0};'''.format(table_name) elif _poi_type == 'shop': cursor.execute('''TRUNCATE base_data.{};'''.format(table_name)) conn.commit() sql = '''REPLACE INTO base_data.{0} SELECT id, name, name_en, map_info, city_id, address, grade, ranking, '', introduction, open, intensity, rcmd_intensity, level, first_image, image_list, hot_level, url, tagB, disable, utime, nearCity, 0, 'Open', 'Open' FROM {0};'''.format(table_name) elif _poi_type == 'rest': sql = '''REPLACE INTO base_data.{0} SELECT id, name, name_en, city_id, map_info, address, telphone, introduction, ranking, grade, max_price, min_price, price_level, open_time, '', 0, michelin_star, level, first_image, image_list, hot_level, url, tagB, disable, utime, nearCity, 0, status_online, status_test FROM {0};'''.format(table_name) else: raise TypeError("Unknown Type: {}".format(_poi_type)) res = cursor.execute(sql) cursor.close() conn.close() logger.debug( "[replace base data table][table: {}][replace_count: {}]".format( table_name, res))