예제 #1
0
def update_lnglat():
    conn, cur = connect_mysql()

    sql = """SELECT `business_id`, `name`, `address`,
                `telephone`, `month_saled`, `shop_announcement`, `latitude`, `longitude`, `geohash`,
                `avg_rating`, `business_url`, `photo_url`, `float_minimum_order_amount`, `float_delivery_fee`,
                `delivery_consume_time`, `work_time`, `md5`, `mt_poi_id`, `minus` FROM new_hudong_db.meituan_tenantinfo_1
                """
    items = pd.read_sql(sql, conn, index_col='business_id')
    cur.close()
    conn.close()

    count = len(items)
    no = 0
    for i in items.index:
        print('正在整合第%s/%s条数据' % (no, count))
        items.loc[i, 'longitude'], items.loc[i, 'latitude']\
            = coord_transform.gcj02_to_bd09(float(items.loc[i, 'longitude']), float(items.loc[i, 'latitude']))
        items.loc[i, 'geohash'] = geohash.encode(float(items.loc[i, 'latitude']), float(items.loc[i, 'longitude']))
        no += 1

    # for i, j in minus_dic.items():
    #     items.loc[i, 'minus'] = j
    #     logger.info('正在整合第%s/%s条数据' % (minus_num, minus_count))
    #     minus_num += 1

    # items = items.drop_duplicates(['mt_poi_id'])
    # get_tenant_info.create_table()
    # logger.info('开始存入数据库。')

    get_tenant_info.create_table()
    engine = create_engine(
        "mysql+mysqldb://%s:%s@%s/%s?charset=utf8mb4" % (config.user, config.passwd, config.ip, config.db))
    items.to_sql('meituan_tenantinfo', con=engine, if_exists='append', index=True, index_label=None)
    return
예제 #2
0
def save_to_mysql(items):
    start = time.time()
    conn, cur = connect_mysql()
    try:
        cur.execute("set names utf8mb4")
        # cur.executemany("INSERT INTO meituan_tenantid(tenant_id, mt_poi_id, name) VALUES (%s,%s,%s)", items)
        cur.executemany("""
                        INSERT INTO `new_hudong_db`.`meituan_tenantinfo` (`business_id`, `name`, `address`, 
                        `telephone`, `month_saled`, `shop_announcement`, `latitude`, `longitude`, `geohash`, 
                        `avg_rating`, `business_url`, `photo_url`, `float_minimum_order_amount`, `float_delivery_fee`, 
                        `delivery_consume_time`, `work_time`, `md5`, `mt_poi_id`) 
                        VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
                        """, items)
        # ON DUPLICATE KEY UPDATE
        # `business_id`=%s,
        # `month_saled`=%s,
        # `shop_announcement`=%s,
        # `avg_rating`=%s,
        # `business_url`=%s,
        # `photo_url`=%s,
        # `float_minimum_order_amount`=%s,
        # `float_delivery_fee`=%s,
        # `delivery_consume_time`=%s,
        # `work_time`=%s,
        # `md5`=%s ;
        cur.connection.commit()
    except Exception as e:
        logger.error('保存到mysql出错', e)
    cur.close()
    conn.close()
    print(time.time() - start)
예제 #3
0
def update_minus(minus_dic):
    conn, cur = connect_mysql()
    engine = create_engine("mysql+mysqldb://%s:%s@%s/%s?charset=utf8mb4" %
                           (config.user, config.passwd, config.ip, config.db))

    sql = """SELECT `business_id`, `name`, `address`,
                `telephone`, `month_saled`, `shop_announcement`, `latitude`, `longitude`, `geohash`,
                `avg_rating`, `business_url`, `photo_url`, `float_minimum_order_amount`, `float_delivery_fee`,
                `delivery_consume_time`, `work_time`, `md5`, `mt_poi_id`, `minus` FROM new_hudong_db.meituan_tenantinfo
                """
    items = pd.read_sql(sql, conn, index_col='business_id')
    cur.close()
    conn.close()

    minus_num = 0
    minus_count = len(minus_dic)
    for i, j in minus_dic.items():
        items.loc[i, 'minus'] = j
        logger.info('正在整合第%s/%s条数据' % (minus_num, minus_count))
        minus_num += 1

    get_tenant_info.create_table()
    logger.info('开始存入数据库。')
    items.to_sql('meituan_tenantinfo',
                 con=engine,
                 if_exists='append',
                 index=True,
                 index_label=None)
    return
예제 #4
0
def save_to_mysql(items):
    conn, cur = connect_mysql()
    cur.execute("set names utf8mb4")
    logger.info("正在插入商品的数据,共有%s条" % len(items))
    start = time.time()
    try:
        count = 0
        item_li = []
        for item in items:
            item_li.append(item)
            count += 1
            if count % 10000 == 0:
                cur.executemany(
                    """
                        INSERT INTO `new_hudong_db`.`meituan_goodsinfo` (`food_id`, `business_id`, `price`, 
                        `name`, `image_path`, `month_saled`, `description`, `specs_value`, `specs_features`, `md5`, 
                        `mt_poi_id`) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
                        """, item_li)
                conn.commit()
                print('已插入%s/%s条数据, 本次插入耗时%sS' %
                      (count, len(items), time.time() - start))
                start = time.time()
                item_li.clear()
        cur.connection.commit()
    except Exception as e:
        logger.error('保存到mysql出错', e)
    cur.close()
    conn.close()
예제 #5
0
def create_table():
    conn, cur = connect_mysql()
    cur.execute('set names utf8')
    cur.execute("""
            DROP TABLE IF EXISTS `meituan_goodsinfo`;
            CREATE TABLE `meituan_goodsinfo` (
              `id` bigint(20) NOT NULL AUTO_INCREMENT,
              `food_id` varchar(30) DEFAULT NULL,
              `business_id` varchar(30) DEFAULT NULL,
              `price` varchar(20) DEFAULT NULL,
              `name` varchar(150) DEFAULT NULL,
              `image_path` varchar(150) DEFAULT NULL,
              `month_saled` int(10) DEFAULT '0',
              `description` text DEFAULT NULL,
              `specs_value` text DEFAULT NULL,
              `specs_features` text DEFAULT NULL,
              `md5` varchar(50) DEFAULT NULL,
              `mt_poi_id` varchar(50) DEFAULT NULL,
              PRIMARY KEY (`id`),
              UNIQUE KEY `food_id` (`food_id`),
              INDEX (`business_id`)
            ) ENGINE=MyISAM DEFAULT CHARSET=utf8mb4;
            
            """)
    cur.close()
    conn.close()
예제 #6
0
def create_table():
    conn, cur = connect_mysql()
    cur.execute('set names utf8')
    cur.execute("""
            DROP TABLE IF EXISTS `meituan_tenantinfo`;
            CREATE TABLE `meituan_tenantinfo` (
              `Id` BIGINT(20) PRIMARY KEY  AUTO_INCREMENT,
              `business_id` VARCHAR(50) DEFAULT NULL,
              `name` VARCHAR(155) DEFAULT NULL,
              `address` VARCHAR(200) DEFAULT NULL,
              `telephone` VARCHAR(100) DEFAULT NULL,
              `month_saled` INT(50) DEFAULT NULL,
              `shop_announcement` TEXT DEFAULT NULL,
              `latitude` VARCHAR(50) DEFAULT NULL,
              `longitude` VARCHAR(50) DEFAULT NULL,
              `geohash` VARCHAR(20) DEFAULT NULL,
              `avg_rating` VARCHAR(50) DEFAULT NULL,
              `business_url` VARCHAR(255) DEFAULT NULL,
              `photo_url` VARCHAR(255) DEFAULT NULL,
              `float_minimum_order_amount` VARCHAR(50) DEFAULT NULL,
              `float_delivery_fee` VARCHAR(50) DEFAULT NULL,
              `minus` VARCHAR(255) DEFAULT NULL,
              `delivery_consume_time` VARCHAR(100) DEFAULT NULL,
              `work_time` VARCHAR(100) DEFAULT NULL,
              `md5` VARCHAR(100) DEFAULT NULL,
              `mt_poi_id` VARCHAR(50) DEFAULT NULL,
              UNIQUE KEY `mt_poi_id` (`mt_poi_id`),
              INDEX(`business_id`),
              INDEX(`name`)
            ) ENGINE=InnoDB AUTO_INCREMENT=31150 DEFAULT CHARSET=utf8mb4;
            """)
    cur.close()
    conn.close()
예제 #7
0
def get_lnglat():
    """
    Get longtitude and latitude from mysqlDB
    :return:
    """
    conn, cur = connect_mysql()
    province = config.province
    city = config.city
    region = config.region
    sql = """
              SELECT latitude,longitude
              FROM meituan_validlnglat
              WHERE sign=0
              """
    if province != '':
        sql += " and province like '%" + province + "%'"
    if city != '':
        sql += " and city like '%" + city + "%'"
    if region != '':
        sql += " and region like '%" + region + "%'"
    # sql = """
    #           SELECT latitude,longitude
    #           FROM meituan_tenantinfo
    #           """

    # sql += 'LIMIT 1'

    cur.execute(sql)
    lnglats = cur.fetchall()
    conn.close()
    cur.close()
    return lnglats
def init_url():
    conn, cur = connect_db.connect_mysql()
    cur.execute('set names utf8')
    cur.execute('''UPDATE `meituan_tenantinfo`
                   SET `business_url`=null;''')
    logger.info('初始化url成功')
    conn.commit()
    logger.info('初始化url commit 成功')
    cur.close()
    conn.close()
예제 #9
0
def save_to_mysql(items):
    conn, cur = connect_mysql()
    try:
        cur.execute("set names utf8")
        cur.executemany("INSERT INTO meituan_tenantid(tenant_id, mt_poi_id, name) "
                        "VALUES (%s,%s,%s)", items)
        cur.connection.commit()
    except Exception as e:
        logger.error('保存到mysql出错', e)
    cur.close()
    conn.close()
예제 #10
0
def monitor():
    conn, cur = connect_db.connect_mysql()

    sql = """
        select t1.longitude, t1.latitude, t1.name, t1.business_url
        from meituan_tenantinfo as t1 join (select rand()*(select max(id) 
        from meituan_tenantinfo) as id ) as t2 on t1.id>t2.id 
        where business_url is not null
        limit 1;
          """
    faild_times = 1
    while True:
        # data = pd.read_sql(sql, conn, index_col='business_id')
        cur.execute(sql)
        data = cur.fetchone()
        if not data:
            continue
        if (get_url(float(data[0]), float(data[1]), data[2])) == data[3]:
            logger.info('未改变')
            faild_times = 0
        else:
            logger.info('无法获取url')
            if faild_times >= max_faild_times:
                logger.info('url已变化,开始更新')
                cur.close()
                conn.close()
                api_is_updated(0)
                start = time.time()
                update_url_queue_server.start_update()
                logger.info('主数据库更新完毕,更新至测试数据库。')
                sync_formal_test_db.sync_db()
                logger.info('更新完成,耗时%s分' % ((time.time()-start)/60))
                faild_times = 0
                api_is_updated(1)
                conn, cur = connect_db.connect_mysql()
            else:
                faild_times += 1
        time.sleep(1)

    cur.close()
    conn.close()
예제 #11
0
def get_tenant_ids():
    conn, cur = connect_mysql()
    sql = """
              SELECT tenant_id
              FROM meituan_tenantid
              WHERE mt_poi_id != 0
              """
    cur.execute(sql)
    tenant_ids = cur.fetchall()
    conn.close()
    cur.close()
    return tenant_ids
예제 #12
0
def create_table():
    conn, cur = connect_mysql()
    cur.execute('set names utf8')
    try:
        cur.execute("""
            CREATE TABLE IF NOT EXISTS meituan_tenant_id
            (id BIGINT PRIMARY KEY AUTO_INCREMENT, tenant_id VARCHAR(30),
            mt_poi_id VARCHAR(30),name VARCHAR(200))
            """)
    except Exception as e:
        print('表已经存在', e)
    cur.close()
    conn.close()
def get_datas():
    conn, cur = connect_db.connect_mysql()

    sql = """
            SELECT business_id, longitude, latitude, name FROM meituan_tenantinfo AS mt
            WHERE mt.business_id IN (SELECT M_business_id FROM test_merge_tenantinfo)
            """
    datas = pd.read_sql(sql, conn, index_col='business_id')

    cur.close()
    conn.close()
    # server.stop()
    return datas
예제 #14
0
def create_table():
    conn, cur = connect_mysql()
    cur.execute('set names utf8')
    try:
        cur.execute("""
            DROP TABLE IF EXISTS `meituan_tenantid`;
            CREATE TABLE `meituan_tenantid`
            (id BIGINT PRIMARY KEY AUTO_INCREMENT, tenant_id VARCHAR(30),
            mt_poi_id VARCHAR(30),name VARCHAR(200),UNIQUE(mt_poi_id), INDEX (tenant_id), INDEX (mt_poi_id))
            """)
    except Exception as e:
        logger.info('表已经存在', e)
    cur.close()
    conn.close()
예제 #15
0
def get_mt_poi_ids():
    conn, cur = connect_mysql()
    sql = """
              SELECT tenant_id, mt_poi_id
              FROM meituan_tenantid
              """
    cur.execute(sql)
    tenant_ids = cur.fetchall()
    conn.close()
    cur.close()
    id_dic = dict()
    for i, j in tenant_ids:
        id_dic[i] = j
    return id_dic
예제 #16
0
def get_mt_poi_ids():
    conn, cur = connect_mysql()
    sql = """
              SELECT tenant_id, mt_poi_id
              FROM meituan_tenantid
              """
    # sql += 'LIMIT 1'
    cur.execute(sql)
    tenant_ids = cur.fetchall()
    # cur.executemany("UPDATE `new_hudong_db`. `meituan_tenantinfo` SET `mt_poi_id` = %s "
    #                 "WHERE `business_id` = %s and `mt_poi_id` = '';",
    #                 tenant_ids)
    # cur.connection.commit()
    conn.close()
    cur.close()
    id_dic = dict()
    for i, j in tenant_ids:
        id_dic[i] = j
    return id_dic
예제 #17
0
def get_tenant_ids():
    conn, cur = connect_mysql()
    sql = """
              SELECT tenant_id
              FROM meituan_tenant_id
              """
    # sql = """
    #           SELECT latitude,longitude
    #           FROM meituan_tenantinfo
    #           """

    sql += 'LIMIT 1'

    cur.execute(sql)
    tenant_ids = cur.fetchall()
    conn.close()
    cur.close()
    # return tenant_ids
    return [["353189028939398"]]
예제 #18
0
def save_to_mysql(items):
    count_faild = 0
    conn, cur = connect_mysql()
    try:
        cur.execute("set names utf8")
        # cur.executemany("INSERT INTO meituan_tenant_id(tenant_id, mt_poi_id, name) VALUES (%s,%s,%s)", items)
        for item in items:
            sql = "INSERT INTO meituan_tenant_id(tenant_id, mt_poi_id, name) VALUES('%s','%s','%s')" % \
                  (item[0], item[1], item[2])
            print(sql)
            if not is_exist(item[0]):
                cur.execute(sql)
                cur.connection.commit()
            else:
                count_faild += 1
    except Exception as e:
        print('\033[31;1m保存到mysql出错%s\033[0m' % e)
    finally:
        cur.close()
        conn.close()
    print('重复数据量:%s' % count_faild)
예제 #19
0
def sync_db():
    if config.user != 'pywrite':
        logger.error('所操作的数据库不是主数据库,不应同步至测试数据库!')
        return

    conn, cur = connect_mysql()
    sql = """SELECT * FROM new_hudong_db.meituan_tenantinfo"""
    logger.info('读取主数据库数据')
    items = pd.read_sql(sql, conn, index_col='business_id')
    logger.info('读取完成')

    create_table()
    engine = create_engine("mysql+mysqldb://%s:%s@%s/%s?charset=utf8mb4" %
                           ('root', 'hudongdata', '120.77.8.6', 'hd'))
    logger.info('将数据存入测试数据库')
    items.to_sql('meituan_tenantinfo',
                 con=engine,
                 if_exists='append',
                 index=True,
                 index_label=None)
    logger.info('存入完成')

    cur.close()
    conn.close()
예제 #20
0
import os
import threading
import time
from url_queue import Queue
from downloader import Downloader
import copy
import logging
import asyncio
import get_tenant_food
import get_tenant_info
# import objgraph

queue = Queue()
downloader = Downloader(queue)

conn, cur = connect_mysql()

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger('get_tenant_id')

post_data = {
    "page_index": 0,
}
headers = {
    'referer': 'http://i.waimai.meituan.com/home?lat=22.544102&lng=113.947104',
}
cookies = {'w_latlng': '22555969,113893232'}

dir_name = 'json_id'
request_delay = 2.5
done_count = 0