def update_lnglat(): conn, cur = connect_mysql() sql = """SELECT `business_id`, `name`, `address`, `telephone`, `month_saled`, `shop_announcement`, `latitude`, `longitude`, `geohash`, `avg_rating`, `business_url`, `photo_url`, `float_minimum_order_amount`, `float_delivery_fee`, `delivery_consume_time`, `work_time`, `md5`, `mt_poi_id`, `minus` FROM new_hudong_db.meituan_tenantinfo_1 """ items = pd.read_sql(sql, conn, index_col='business_id') cur.close() conn.close() count = len(items) no = 0 for i in items.index: print('正在整合第%s/%s条数据' % (no, count)) items.loc[i, 'longitude'], items.loc[i, 'latitude']\ = coord_transform.gcj02_to_bd09(float(items.loc[i, 'longitude']), float(items.loc[i, 'latitude'])) items.loc[i, 'geohash'] = geohash.encode(float(items.loc[i, 'latitude']), float(items.loc[i, 'longitude'])) no += 1 # for i, j in minus_dic.items(): # items.loc[i, 'minus'] = j # logger.info('正在整合第%s/%s条数据' % (minus_num, minus_count)) # minus_num += 1 # items = items.drop_duplicates(['mt_poi_id']) # get_tenant_info.create_table() # logger.info('开始存入数据库。') get_tenant_info.create_table() engine = create_engine( "mysql+mysqldb://%s:%s@%s/%s?charset=utf8mb4" % (config.user, config.passwd, config.ip, config.db)) items.to_sql('meituan_tenantinfo', con=engine, if_exists='append', index=True, index_label=None) return
def save_to_mysql(items): start = time.time() conn, cur = connect_mysql() try: cur.execute("set names utf8mb4") # cur.executemany("INSERT INTO meituan_tenantid(tenant_id, mt_poi_id, name) VALUES (%s,%s,%s)", items) cur.executemany(""" INSERT INTO `new_hudong_db`.`meituan_tenantinfo` (`business_id`, `name`, `address`, `telephone`, `month_saled`, `shop_announcement`, `latitude`, `longitude`, `geohash`, `avg_rating`, `business_url`, `photo_url`, `float_minimum_order_amount`, `float_delivery_fee`, `delivery_consume_time`, `work_time`, `md5`, `mt_poi_id`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) """, items) # ON DUPLICATE KEY UPDATE # `business_id`=%s, # `month_saled`=%s, # `shop_announcement`=%s, # `avg_rating`=%s, # `business_url`=%s, # `photo_url`=%s, # `float_minimum_order_amount`=%s, # `float_delivery_fee`=%s, # `delivery_consume_time`=%s, # `work_time`=%s, # `md5`=%s ; cur.connection.commit() except Exception as e: logger.error('保存到mysql出错', e) cur.close() conn.close() print(time.time() - start)
def update_minus(minus_dic): conn, cur = connect_mysql() engine = create_engine("mysql+mysqldb://%s:%s@%s/%s?charset=utf8mb4" % (config.user, config.passwd, config.ip, config.db)) sql = """SELECT `business_id`, `name`, `address`, `telephone`, `month_saled`, `shop_announcement`, `latitude`, `longitude`, `geohash`, `avg_rating`, `business_url`, `photo_url`, `float_minimum_order_amount`, `float_delivery_fee`, `delivery_consume_time`, `work_time`, `md5`, `mt_poi_id`, `minus` FROM new_hudong_db.meituan_tenantinfo """ items = pd.read_sql(sql, conn, index_col='business_id') cur.close() conn.close() minus_num = 0 minus_count = len(minus_dic) for i, j in minus_dic.items(): items.loc[i, 'minus'] = j logger.info('正在整合第%s/%s条数据' % (minus_num, minus_count)) minus_num += 1 get_tenant_info.create_table() logger.info('开始存入数据库。') items.to_sql('meituan_tenantinfo', con=engine, if_exists='append', index=True, index_label=None) return
def save_to_mysql(items): conn, cur = connect_mysql() cur.execute("set names utf8mb4") logger.info("正在插入商品的数据,共有%s条" % len(items)) start = time.time() try: count = 0 item_li = [] for item in items: item_li.append(item) count += 1 if count % 10000 == 0: cur.executemany( """ INSERT INTO `new_hudong_db`.`meituan_goodsinfo` (`food_id`, `business_id`, `price`, `name`, `image_path`, `month_saled`, `description`, `specs_value`, `specs_features`, `md5`, `mt_poi_id`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s) """, item_li) conn.commit() print('已插入%s/%s条数据, 本次插入耗时%sS' % (count, len(items), time.time() - start)) start = time.time() item_li.clear() cur.connection.commit() except Exception as e: logger.error('保存到mysql出错', e) cur.close() conn.close()
def create_table(): conn, cur = connect_mysql() cur.execute('set names utf8') cur.execute(""" DROP TABLE IF EXISTS `meituan_goodsinfo`; CREATE TABLE `meituan_goodsinfo` ( `id` bigint(20) NOT NULL AUTO_INCREMENT, `food_id` varchar(30) DEFAULT NULL, `business_id` varchar(30) DEFAULT NULL, `price` varchar(20) DEFAULT NULL, `name` varchar(150) DEFAULT NULL, `image_path` varchar(150) DEFAULT NULL, `month_saled` int(10) DEFAULT '0', `description` text DEFAULT NULL, `specs_value` text DEFAULT NULL, `specs_features` text DEFAULT NULL, `md5` varchar(50) DEFAULT NULL, `mt_poi_id` varchar(50) DEFAULT NULL, PRIMARY KEY (`id`), UNIQUE KEY `food_id` (`food_id`), INDEX (`business_id`) ) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4; """) cur.close() conn.close()
def create_table(): conn, cur = connect_mysql() cur.execute('set names utf8') cur.execute(""" DROP TABLE IF EXISTS `meituan_tenantinfo`; CREATE TABLE `meituan_tenantinfo` ( `Id` BIGINT(20) PRIMARY KEY AUTO_INCREMENT, `business_id` VARCHAR(50) DEFAULT NULL, `name` VARCHAR(155) DEFAULT NULL, `address` VARCHAR(200) DEFAULT NULL, `telephone` VARCHAR(100) DEFAULT NULL, `month_saled` INT(50) DEFAULT NULL, `shop_announcement` TEXT DEFAULT NULL, `latitude` VARCHAR(50) DEFAULT NULL, `longitude` VARCHAR(50) DEFAULT NULL, `geohash` VARCHAR(20) DEFAULT NULL, `avg_rating` VARCHAR(50) DEFAULT NULL, `business_url` VARCHAR(255) DEFAULT NULL, `photo_url` VARCHAR(255) DEFAULT NULL, `float_minimum_order_amount` VARCHAR(50) DEFAULT NULL, `float_delivery_fee` VARCHAR(50) DEFAULT NULL, `minus` VARCHAR(255) DEFAULT NULL, `delivery_consume_time` VARCHAR(100) DEFAULT NULL, `work_time` VARCHAR(100) DEFAULT NULL, `md5` VARCHAR(100) DEFAULT NULL, `mt_poi_id` VARCHAR(50) DEFAULT NULL, UNIQUE KEY `mt_poi_id` (`mt_poi_id`), INDEX(`business_id`), INDEX(`name`) ) ENGINE=InnoDB AUTO_INCREMENT=31150 DEFAULT CHARSET=utf8mb4; """) cur.close() conn.close()
def get_lnglat(): """ Get longtitude and latitude from mysqlDB :return: """ conn, cur = connect_mysql() province = config.province city = config.city region = config.region sql = """ SELECT latitude,longitude FROM meituan_validlnglat WHERE sign=0 """ if province != '': sql += " and province like '%" + province + "%'" if city != '': sql += " and city like '%" + city + "%'" if region != '': sql += " and region like '%" + region + "%'" # sql = """ # SELECT latitude,longitude # FROM meituan_tenantinfo # """ # sql += 'LIMIT 1' cur.execute(sql) lnglats = cur.fetchall() conn.close() cur.close() return lnglats
def init_url(): conn, cur = connect_db.connect_mysql() cur.execute('set names utf8') cur.execute('''UPDATE `meituan_tenantinfo` SET `business_url`=null;''') logger.info('初始化url成功') conn.commit() logger.info('初始化url commit 成功') cur.close() conn.close()
def save_to_mysql(items): conn, cur = connect_mysql() try: cur.execute("set names utf8") cur.executemany("INSERT INTO meituan_tenantid(tenant_id, mt_poi_id, name) " "VALUES (%s,%s,%s)", items) cur.connection.commit() except Exception as e: logger.error('保存到mysql出错', e) cur.close() conn.close()
def monitor(): conn, cur = connect_db.connect_mysql() sql = """ select t1.longitude, t1.latitude, t1.name, t1.business_url from meituan_tenantinfo as t1 join (select rand()*(select max(id) from meituan_tenantinfo) as id ) as t2 on t1.id>t2.id where business_url is not null limit 1; """ faild_times = 1 while True: # data = pd.read_sql(sql, conn, index_col='business_id') cur.execute(sql) data = cur.fetchone() if not data: continue if (get_url(float(data[0]), float(data[1]), data[2])) == data[3]: logger.info('未改变') faild_times = 0 else: logger.info('无法获取url') if faild_times >= max_faild_times: logger.info('url已变化,开始更新') cur.close() conn.close() api_is_updated(0) start = time.time() update_url_queue_server.start_update() logger.info('主数据库更新完毕,更新至测试数据库。') sync_formal_test_db.sync_db() logger.info('更新完成,耗时%s分' % ((time.time()-start)/60)) faild_times = 0 api_is_updated(1) conn, cur = connect_db.connect_mysql() else: faild_times += 1 time.sleep(1) cur.close() conn.close()
def get_tenant_ids(): conn, cur = connect_mysql() sql = """ SELECT tenant_id FROM meituan_tenantid WHERE mt_poi_id != 0 """ cur.execute(sql) tenant_ids = cur.fetchall() conn.close() cur.close() return tenant_ids
def create_table(): conn, cur = connect_mysql() cur.execute('set names utf8') try: cur.execute(""" CREATE TABLE IF NOT EXISTS meituan_tenant_id (id BIGINT PRIMARY KEY AUTO_INCREMENT, tenant_id VARCHAR(30), mt_poi_id VARCHAR(30),name VARCHAR(200)) """) except Exception as e: print('表已经存在', e) cur.close() conn.close()
def get_datas(): conn, cur = connect_db.connect_mysql() sql = """ SELECT business_id, longitude, latitude, name FROM meituan_tenantinfo AS mt WHERE mt.business_id IN (SELECT M_business_id FROM test_merge_tenantinfo) """ datas = pd.read_sql(sql, conn, index_col='business_id') cur.close() conn.close() # server.stop() return datas
def create_table(): conn, cur = connect_mysql() cur.execute('set names utf8') try: cur.execute(""" DROP TABLE IF EXISTS `meituan_tenantid`; CREATE TABLE `meituan_tenantid` (id BIGINT PRIMARY KEY AUTO_INCREMENT, tenant_id VARCHAR(30), mt_poi_id VARCHAR(30),name VARCHAR(200),UNIQUE(mt_poi_id), INDEX (tenant_id), INDEX (mt_poi_id)) """) except Exception as e: logger.info('表已经存在', e) cur.close() conn.close()
def get_mt_poi_ids(): conn, cur = connect_mysql() sql = """ SELECT tenant_id, mt_poi_id FROM meituan_tenantid """ cur.execute(sql) tenant_ids = cur.fetchall() conn.close() cur.close() id_dic = dict() for i, j in tenant_ids: id_dic[i] = j return id_dic
def get_mt_poi_ids(): conn, cur = connect_mysql() sql = """ SELECT tenant_id, mt_poi_id FROM meituan_tenantid """ # sql += 'LIMIT 1' cur.execute(sql) tenant_ids = cur.fetchall() # cur.executemany("UPDATE `new_hudong_db`. `meituan_tenantinfo` SET `mt_poi_id` = %s " # "WHERE `business_id` = %s and `mt_poi_id` = '';", # tenant_ids) # cur.connection.commit() conn.close() cur.close() id_dic = dict() for i, j in tenant_ids: id_dic[i] = j return id_dic
def get_tenant_ids(): conn, cur = connect_mysql() sql = """ SELECT tenant_id FROM meituan_tenant_id """ # sql = """ # SELECT latitude,longitude # FROM meituan_tenantinfo # """ sql += 'LIMIT 1' cur.execute(sql) tenant_ids = cur.fetchall() conn.close() cur.close() # return tenant_ids return [["353189028939398"]]
def save_to_mysql(items): count_faild = 0 conn, cur = connect_mysql() try: cur.execute("set names utf8") # cur.executemany("INSERT INTO meituan_tenant_id(tenant_id, mt_poi_id, name) VALUES (%s,%s,%s)", items) for item in items: sql = "INSERT INTO meituan_tenant_id(tenant_id, mt_poi_id, name) VALUES('%s','%s','%s')" % \ (item[0], item[1], item[2]) print(sql) if not is_exist(item[0]): cur.execute(sql) cur.connection.commit() else: count_faild += 1 except Exception as e: print('\033[31;1m保存到mysql出错%s\033[0m' % e) finally: cur.close() conn.close() print('重复数据量:%s' % count_faild)
def sync_db(): if config.user != 'pywrite': logger.error('所操作的数据库不是主数据库,不应同步至测试数据库!') return conn, cur = connect_mysql() sql = """SELECT * FROM new_hudong_db.meituan_tenantinfo""" logger.info('读取主数据库数据') items = pd.read_sql(sql, conn, index_col='business_id') logger.info('读取完成') create_table() engine = create_engine("mysql+mysqldb://%s:%s@%s/%s?charset=utf8mb4" % ('root', 'hudongdata', '120.77.8.6', 'hd')) logger.info('将数据存入测试数据库') items.to_sql('meituan_tenantinfo', con=engine, if_exists='append', index=True, index_label=None) logger.info('存入完成') cur.close() conn.close()
import os import threading import time from url_queue import Queue from downloader import Downloader import copy import logging import asyncio import get_tenant_food import get_tenant_info # import objgraph queue = Queue() downloader = Downloader(queue) conn, cur = connect_mysql() logging.basicConfig(level=logging.DEBUG) logger = logging.getLogger('get_tenant_id') post_data = { "page_index": 0, } headers = { 'referer': 'http://i.waimai.meituan.com/home?lat=22.544102&lng=113.947104', } cookies = {'w_latlng': '22555969,113893232'} dir_name = 'json_id' request_delay = 2.5 done_count = 0