Example #1
0
def sphinx():
    global num
    t=int(time.time())
    tmp ='( "牛宝 百叶 牛鞭 在家 欧阳"/3)'
    item={'querymod':'SPH_MATCH_EXTENDED2', \
        'pageSize':1, \
        'page':1, \
        'intType':{\
         'status':'1', 'isimg':'1'
            },\
        'intRange':{\
        	'province':'101101,101111,0',
            'city':'101101101,109102101,0'},
        'orderBy':'SPH_SORT_EXPR|FLOOR(log2(@weight))+mem_level*6+isimg*10+IDIV(pub_time,2592000)',\
        #'orderBy':'SPH_SORT_ATTR|pub_time desc' ,
        #'groupBy':'status,SPH_GROUPBY_ATTR,status desc',\
        'weight':{'title':8,'com_name':4,'keyword':2},\
        'index':'IDX_pro_info_dist' , 
        #'keyw': '@com_gsjj '+tmp
        }
    #item={'orderBy': 'SPH_SORT_EXPR|FLOOR(log2(@weight))+mem_level*6+isimg*10+IDIV(pub_time,2592000)', 'index': 'IDX_pro_info_dist', 'intType': {'isimg': '1', 'status': '1', 'r_tag_id': '101'}, 'weight': {'com_name': 4, 'keyword': 2, 'title': 8}, 'pageSize': 30, 'page':1,             
    #item={'orderBy': 'SPH_SORT_EXPR|FLOOR(log2(@weight))+mem_level*6+isimg*10+IDIV(pub_time,2592000)', 'index': 'IDX_pro_info_dist', 'intType': {'isimg': '1', 'status': '1', 'r_tag_id': '101'}, 'weight': {'com_name': 4, 'keyword': 2, 'title': 8}, 'pageSize': 30, 'keyw': [], 'page': 1, 'querymod': 'SPH_MATCH_EXTENDED', 'groupBy': 'com_id,SPH_GROUPBY_ATTR,isimg desc pub_time desc'}

    #s = sphinx('192.168.10.127',9501)
    s = sphinx('183.60.177.157',9501)
    #for i in [101,101101]:
    #    tmp = item.copy()
    #tmp['intType']['r_tag_id'] = str(i)
    s.initQuery(item)
    
    rs = s.RunQueries()
    if rs:
        res=rs[0]
    else:
        print(rs)

    print('useritme:',time.time()-t)
 
    num+=1
    #if res['status']==0:
    #    num+=1
    status=res['status'] if 'status' in res else -1
    _time=res['time'] if 'time' in res else 0
    total_found= res['total_found'] if 'total_found' in res else 0
    warning = res['warning'] if 'warning' in res else ''
    #print(res)
    return (status,_time,total_found,warning)    
Example #2
0
def sphinx2redis(cnfkey):
	"""通过配置文件,获取统计数据
	"""
	#取业务配置
	bizcnf = get_cnf_val(cnfkey,sconf.BIZ)	
	if not bizcnf:
		return sconf.err_handle.biznum_not_config
	rdb = rediswrap.get_redis('cache')

	for i in range(0,len(bizcnf['prama'])):
		itm = bizcnf['prama'][i]
		source = itm['source'].split('.')
		if source[1] == 'sphinx':
			host_info = get_host_by_data(itm['source'])
			if  not host_info :
				return sconf.sphinx_index_not_found
			
			sp = sphinx(host_info['host'],host_info['port'])
			expression = itm['expression']
			expression['index'] = source[2]
			total_found = 0
			while True:
				#if total_found >0:
				#	if expression['pageSize'] * expression['page'] >=total_found:
				#		break
				#	expression['page'] +=1
					
				sp.initQuery(itm['expression'])
				rs = sp.RunQueries()
				pprint(rs)
				if rs and rs[0]['status']==0:
					total_found = rs[0]['total_found']
					_items = {}
					for row in rs[0]['matches']:
						if itm['key'] in row['attrs'] and itm['value'] in row['attrs']:
							_items[row['attrs'][itm['key']]]=row['attrs'][itm['value']]
					if _items:
						print(_items)
						#res = rdb.mset(cnfkey,_items)
						
				else:
					print(sp._error)
					break
				break
Example #3
0
def search(kw):
	"""提交搜索任务获取搜索结果及数据结果
	"""
	index = kw.get('index')
	host_info = sconf.get_search_info(index)
	kw['index'] = host_info['index']
	sp = sphinx(host_info['info']['host'],host_info['info']['port'])

	sp.initQuery(kw)
	rs = sp.RunQueries()
	result = {}
	if rs: 
		if  rs[0]['status']==0:
			result['total_found']	= rs[0]['total_found']
			result['total']			= rs[0]['total']
			result['time']			= rs[0]['time']
			
			ids			= []
			for row in rs[0]['matches']:
				if 'id' in row:
					ids.append(str(row['id']))
			if ids :
				dbinf			= {}
				dbinf['dbname'] = host_info['dbname']
				dbinf['table']	= host_info['table']
				dbinf['ids']	= ",".join(ids)
				dbinf['fields']	= kw.get('fields','*')
				if type(dbinf['fields']) in (tuple,list):
					if 'id' not in dbinf['fields']:
						dbinf['fields'].append('id')
					dbinf['fields'] = ','.join(dbinf['fields'])

				res,desc = dbhandle.getbyid(dbinf)
				if res == 0 and desc:
					rows = {str(row['id']):row for row in desc}
					result['data'] = [ rows[k] for k in ids if k in rows]
		else:
			result['warning'] = rs[0]['warning']
			result['error']	= rs[0]['error']
	else:
		return -1,sp._error
	return 0,result
				
	
Example #4
0
def get_stat_data(name,info):
	"""通过配置文件,获取统计数据
	"""
	#url提交模式
	http = HttpWrap()
	http.set_header('Content-type','application/json')
	url = "http://192.168.10.126:1985/api/set"
	
	for i in range(0,len(info['history_from'])):
		itm = info['history_from'][i]
		source = itm['source'].split('.')
		if source[1] == 'sphinx':
			host_info = get_host_by_data(itm['source'])
			if  not host_info :
				return [-1,"key erro %s not in sysconfig." % row['source']]
			
			sp = sphinx(host_info['host'],host_info['port'])
			expression = itm['expression']
			expression['index'] = source[2]
			total_found = 0
			while True:
				if total_found >0:
					if expression['pageSize'] * expression['page'] >=total_found:
						break
					expression['page'] +=1
					
				sp.initQuery(itm['expression'])
				rs = sp.RunQueries()
				if rs and rs[0]['status']==0:
					total_found = rs[0]['total_found']
					_items = {}
					for row in rs[0]['matches']:
						_items["%s%s" % (itm['key_prefix'],row['attrs'][itm['key']])]=[row['attrs'][itm['value']],utils.timestamp(0,'d')]
					if _items:
						data = json.dumps({'gkey':name,'data':_items})
						_rs = http.request(url,"POST",data)
						rs = http.read(_rs)
						print(rs)
				else:
					print(sp._error)
					break
Example #5
0
def proxy(kw):
    """
    param=[
            {'querymod':'SPH_MATCH_EXTENDED2', \
            'pageSize':0, \
            'page':1, \
            'intType':{\
                'r_tag_id':'101', 'status':'1', 'isimg':'1'
                },\
            'orderBy':'SPH_SORT_EXPR|FLOOR(log2(@weight))+mem_level*6+isimg*10+IDIV(pub_time,2592000)',\
            'groupBy':'com_id,SPH_GROUPBY_ATTR,isimg desc pub_time desc',\
            'weight':{'title':8,'com_name':4,'keyword':2},\
            'index':'IDX_pro_info_dist'    
            }   
         ]
    """
    param = kw.pop("param",[])
    #检查主参数
    if not param:
        return err_handle.sphinx_param_not_set
    #检查索引
    if 'index' not in param[0]:
        return err_handle.sphinx_index_not_found
    index = "base.sphinx.%s" % param[0]['index']
    #检查索引对应的搜索服务器是否存在   
    host_info = get_host_by_data(index)
    if  not host_info :
        return err_handle.sphinx_index_not_found
    #连接搜索引擎
    sp = sphinx(host_info['host'],host_info['port'])
    #加载query
    for q in param:
        sp.initQuery(q)
    res = sp.RunQueries()

    #返回结果
    return [0,res]
Example #6
0
def prodlist(kw):
	"""
	test: 	
     curl -l -H "Content-Type: application/json" -X POST -d  '{"biznum":"tags.corplist","tag_id":[101]}'     http://192.168.10.126:6000/biz_tags/prodlist
	"""	
	tag_id = kw.get('tag_id',[])
	biznum = kw.get('biznum','')

	if not biznum:
		return [-6,'parameter biznum not set.']
	bizcnf = get_cnf_val(biznum,sconf.BIZ)
			
	#取业务配置
	if not bizcnf:
		return sconf.err_handle.biznum_not_config
		
	pageSize = kw.get('pageSize',bizcnf['search']['expression']['pageSize'])
	page = kw.get('page',bizcnf['search']['expression']['page'])
	#连接搜索引擎
	host_info = get_host_by_data(bizcnf['search']['source'])
	if  not host_info :
		return sconf.sphinx_index_not_found
	sp = sphinx(host_info['host'],host_info['port'])
	
	expression = bizcnf['search']['expression']
	expression['index'] = bizcnf['search']['source'].split('.')[-1]
	expression['pageSize']=pageSize
	expression['page']=page
	st = int(time.time())
	for tid in tag_id:
		tag_len = len(str(tid))
		if tag_len == 3:
			f = "r_tag_id"
		elif tag_len == 6:
			f = "c_tag_id"
		elif tag_len == 9:
			f = "_tag_id"
		#exp = expression.copy()
		expression['intType'][f]=str(tid)
		sp.initQuery(expression)
	rs = sp.RunQueries()
	print("search_time:",time.time()-st)
	result = []
	st = int(time.time())
	if rs:
		#连接数据库
		dbinfo = get_host_by_data(bizcnf['data']['source'])
		dbinfo['dbname'] = bizcnf['data']['source'].split('.')[-1]
		db = dbclass(dbinfo)
		db.connect()
		for row in rs:
			sql_item =  bizcnf['data'].copy()
			ids = [str(k['id']) for k in row['matches']]
			print("ids:",ids)
			sql_item['where'] = sql_item['where'] % ",".join(ids)
			res,desc = db.query(sql_item)
			if res ==0 and desc:
				result.append(desc)
	else:
		result.append([])
	#print("search_time:",time.time()-st)
	#pprint(result)	
	#print(sp._error)
	return [0,result]
Example #7
0
def sphinx2redis_ex(cnfkey):
	"""通过配置文件,获取统计数据
	"""
	#取业务配置
	bizcnf = get_cnf_val(cnfkey,sconf.BIZ)	
	if not bizcnf:
		return sconf.err_handle.biznum_not_config
	rdb = rediswrap.get_redis('cache')
	dbinfo = get_host_by_data("base.mysql.biz72_product")
	dbinfo['dbname']="biz72_product"
	db  = mysqlwrap.dbclass(dbinfo)
	db.connect()
	sql = "select tag_id from pro_tag where tag_id>100 and tag_id<200"
	res,desc = db.query(sql)
	print("rows:",len(desc))
	source="base.sphinx.IDX_com_corp_dist"
	host_info = get_host_by_data(source)
	sp = sphinx(host_info['host'],host_info['port'])
	prama = {
					"querymod":"SPH_MATCH_EXTENDED2",
					"pageSize":1,
					"page":1,
					"intType":{
            		},
            		"index":"IDX_com_corp_dist"
			}
	itm1 = {}
	itm2 = {}
	itm3 = {}
	x=0
	st = int(time.time())
	for row in desc:
		x+=1
		if x%100==0:
			print(x,time.time()-st)
		tag_len = len(str(row['tag_id']))
		if tag_len == 3:
			f = "r_com"
		elif tag_len == 6:
			f = "c_com"
		elif tag_len == 9:
			f = "com"
		prama['intType'][f]=str(row['tag_id'])
		#prama['intType']["com"]='101105105'
		sp.initQuery(prama)
		rs = sp.RunQueries()

		if rs and rs[0]['status']==0:
			total_found = rs[0]['total_found']
			print("tag_id",row['tag_id'],"total_found",total_found)
			if total_found >0:
				rdb.hset('stat2cach.tags_corp',row['tag_id'],total_found)
			if tag_len == 3:
				itm1[row['tag_id']]=total_found
			elif tag_len == 6:
				itm2[row['tag_id']]=total_found
			elif tag_len == 9:
				itm3[row['tag_id']]=total_found
		else:
			print(sp._error)

	print("time:",time.time()-st)
	print("itm1","len:",len(itm1),"sum:",sum(list(itm1.values())))		
	print("itm2","len:",len(itm2),"sum:",sum(list(itm2.values())))	
	print("itm3","len:",len(itm3),"sum:",sum(list(itm3.values())))