Beispiel #1
0
def get_chaos_menu():
    sql = "select menu from `{}` where id='{}'"
    mysql_obj = get_mysql_obj(os.sep.join([conf_dir, 'db.conf']), 'mysql_waimai')
    conn = mysql_obj['conn']
    cursor = mysql_obj['cursor']
    
    bd_parser = BdWmParser()
    mt_parser = MtWmParser()
    elm_parser = ElemeParser()
    
    menu_merger_obj = MenuMerger()
 
    tb_dic = {'eleme':'eleme_shop', 'baidu':'baidu_waimai_shop', 'meituan':'meituan_waimai_shop'}
    parser_dic = {'eleme':elm_parser, 'baidu':bd_parser, 'meituan':mt_parser}
    ss = u'𠧧鸭腿'
    num = 0
    for line in open('../data/res.out_json_offline', 'r'):
        num += 1
        if num % 2000 == 0:
            print (num)
        dic = json.loads(line.strip())
        tup_ls = []
        for tag, _id in dic.items():
            tb_name = tb_dic[tag]
            cursor.execute(sql.format(tb_name, _id))
            res = cursor.fetchone()
            if res:
                parser = parser_dic[tag]
                menu = res['menu']
                ori_ls = parser.parse_one_menu(menu)
                name_ls = parser.get_all_food(ori_ls)
                name_ls = set(name_ls)
                if ss in name_ls:
                    print (tag, _id)
                    sys.exit()
Beispiel #2
0
def parse_specific_menu():
    bd_parser = BdWmParser()
    mt_parser = MtWmParser()
    elm_parser = ElemeParser()
    src_tb_dic = {'baidu':'baidu_waimai_shop', 'meituan':'meituan_waimai_shop', 'eleme':'eleme_shop'}
    tb_parser_dic = {'baidu_waimai_shop':bd_parser, 'meituan_waimai_shop':mt_parser, 'eleme_shop':elm_parser}
    
    mysql_obj = get_mysql_obj(os.sep.join([conf_dir, 'db.conf']), 'mysql_online')
    conn = mysql_obj['conn']
    cursor = mysql_obj['cursor']

    for line in sys.stdin:
        ln = line.strip()
        dic = json.loads(ln)
        for src, _id in dic.items():
            src_tb = src_tb_dic[src]
            parser = tb_parser_dic[src_tb]
            sql = "select menu from `{}` where id='{}' limit 1"
            sql = sql.format(src_tb, _id) 
            cursor.execute(sql)
	    dic = cursor.fetchone()
            menu = dic.get('menu', '')
            menu_ls = parser.parse_one_menu(menu)
            print (json.dumps(menu_ls, ensure_ascii=False).encode('utf8'))
        sys.exit()
Beispiel #3
0
def menu_cluster(fn='merged_shop.json'):
    '''
    进行菜品的聚类
    input: fn string 合并好的文件名 一行一个合并的店铺 {"baidu":'', "eleme":'', "meituan":'', "id":''}
    output: 打印结果 json 每行为一个店铺的菜品合并信息
    '''
    sql = "select * from `{}` where id='{}'"
    mysql_obj = get_mysql_obj(os.sep.join([conf_dir, 'db.conf']),
                              'mysql_online')
    conn = mysql_obj['conn']
    cursor = mysql_obj['cursor']

    feat_gen = FeatureGenerator()
    cluster_obj = SimpleCluster()

    tb_dic = {
        'eleme': 'eleme_shop',
        'baidu': 'baidu_waimai_shop',
        'meituan': 'meituan_waimai_shop'
    }
    num = 0
    for line in open(fn, 'r'):
        dic = json.loads(line.strip())
        feat_ls = []
        for tag, _id in dic.items():
            if tag not in tb_dic:
                continue
            tb_name = tb_dic[tag]
            cursor.execute(sql.format(tb_name, _id))
            res = cursor.fetchone()
            __feat_ls = feat_gen.generate_feature_with_food_dic(res, tag)
            #	    print ('__feat_ls', len(__feat_ls))
            feat_ls.extend(__feat_ls)


#	print (len(feat_ls), feat_ls)
        label_ls = cluster_obj.cluster(feat_ls)
        res_dic = OrderedDict()
        for __feat_ls, label in zip(feat_ls, label_ls):
            if label not in res_dic:
                res_dic[label] = []
            src, _id, food_dic, food_name = __feat_ls[:4]
            food_dic['__source'] = src
            food_dic['__id'] = _id
            res_dic[label].append(food_dic)
        res_dic = {'id': dic['id'], 'foods': res_dic}
        print(json.dumps(res_dic, ensure_ascii=False).encode('utf8'))
Beispiel #4
0
def menu_cluster(fn = '../menu_fusion/res_dic.json'):
    '''
    进行菜品的聚类
    input: fn string 合并好的文件名 一行一个合并的店铺 {"baidu":'', "eleme":'', "meituan":'', "id":''}
    output: 打印结果 json 每行为一个店铺的菜品合并信息
    '''
    sql = "select * from `{}` where id='{}'"
    mysql_obj = get_mysql_obj(os.sep.join([conf_dir, 'db.conf']), 'mysql_online')
    conn = mysql_obj['conn']
    cursor = mysql_obj['cursor']

    feat_gen = FeatureGenerator()
    cluster_obj = SimpleCluster()
 
    tb_dic = {'eleme':'eleme_shop', 'baidu':'baidu_waimai_shop', 'meituan':'meituan_waimai_shop'}
    num = 0
    for line in open(, 'r'):
	dic = json.loads(line.strip())
	feat_ls = []
	for tag, _id in dic.items():
	    tb_name = tb_dic[tag]
	    cursor.execute(sql.format(tb_name, _id))
	    res = cursor.fetchone()
	    __feat_ls = feat_gen.generate_feature_with_food_dic(res, tb_name)
#	    print ('__feat_ls', len(__feat_ls))
	    feat_ls.extend(__feat_ls)
#	print (len(feat_ls), feat_ls)
	label_ls = cluster_obj.cluster(feat_ls)
	out_ls = []
	for __feat_ls, label in zip(feat_ls, label_ls):
	    __feat_ls.append(str(label))
	    _feat_ls = []
	    for ss in __feat_ls:
		if not isinstance(ss, str):
		    ss = ss.encode('utf8')
		_feat_ls.append(ss)
	    out_ls.append(_feat_ls)
	out_ls = sorted(out_ls, key=lambda x:x[-1])
	for _feat_ls in out_ls:
	    print ('\t'.join(_feat_ls))
	print ''
	num += 1
	if num == 10:
	    break
Beispiel #5
0
def get_unlabeled_data():
    '''
    input: sys.stdin
    output: sys.stdout  \t split  id shop_name food_name mid eid bid
    '''
    sql = "select * from `{}` where id='{}'"
    mysql_obj = get_mysql_obj(os.sep.join([conf_dir, 'db.conf']), 'mysql_online')
    conn = mysql_obj['conn']
    cursor = mysql_obj['cursor']
    
    bd_parser = BdWmParser()
    mt_parser = MtWmParser()
    elm_parser = ElemeParser()
 
    tb_dic = {'eleme':'eleme_shop', 'baidu':'baidu_waimai_shop', 'meituan':'meituan_waimai_shop'}
    tb_ls = ['meituan_waimai_shop', 'eleme_shop', 'baidu_waimai_shop']
    parser_ls = [mt_parser, elm_parser, bd_parser]
    id_set = set()
    for line in sys.stdin:
        ls = line.strip().split('\t')
        try:
            _id, cnt, name, typ, city, m_id, e_id, b_id = ls
        except:
            continue
        if _id in id_set:
            continue
        id_set.add(_id)
        for a_id, tb, parser in zip([m_id, e_id, b_id], tb_ls, parser_ls):
            if not a_id:
                continue
            sql_i = sql.format(tb, a_id)
            cursor.execute(sql_i)
            dic = cursor.fetchone()
            food_ls = parser.get_all_food_from_menu(dic.get('menu', ''))
            for food in food_ls:
                str_ls = [_id, name, typ, city, food.encode('utf8'), a_id, tb]
                #str_ls = [ss.encode('utf8') for ss in str_ls]
                print ('\t'.join(str_ls))
        print ('')
        if len(id_set) >=50:
            break
Beispiel #6
0
def get_sample_shop_from_sample_file():
    '''
    通过采样文件获取同一商家在不同app中的id
    输入: sys.stdin
<<<<<<< HEAD
    720e49b2f4c6991ff4b3b6500fd815ba        1        印象柳螺柳州螺蛳粉•匠心制造        waimai
    meituan_id        cnt brand_name        type
    输出: sys.stdout
    meituan_id        cnt brand_name        meituan_id  eleme_id  baidu_id
=======
    720e49b2f4c6991ff4b3b6500fd815ba	1	印象柳螺柳州螺蛳粉•匠心制造	waimai
    meituan_id	cnt brand_name	type
    输出: sys.stdout
    meituan_id	cnt brand_name	meituan_id  eleme_id  baidu_id
>>>>>>> 4cca5f0e4264c66366b590f7a182ce39f800a3b1
    '''
    sql = "select * from `std_shop` where id='{}' limit 1"
    mysql_obj = get_mysql_obj(os.sep.join([conf_dir, 'db.conf']), 'mysql_waimai')
    conn = mysql_obj['conn']
    cursor = mysql_obj['cursor']
    for line in sys.stdin: