def sphinx(): global num t=int(time.time()) tmp ='( "牛宝 百叶 牛鞭 在家 欧阳"/3)' item={'querymod':'SPH_MATCH_EXTENDED2', \ 'pageSize':1, \ 'page':1, \ 'intType':{\ 'status':'1', 'isimg':'1' },\ 'intRange':{\ 'province':'101101,101111,0', 'city':'101101101,109102101,0'}, 'orderBy':'SPH_SORT_EXPR|FLOOR(log2(@weight))+mem_level*6+isimg*10+IDIV(pub_time,2592000)',\ #'orderBy':'SPH_SORT_ATTR|pub_time desc' , #'groupBy':'status,SPH_GROUPBY_ATTR,status desc',\ 'weight':{'title':8,'com_name':4,'keyword':2},\ 'index':'IDX_pro_info_dist' , #'keyw': '@com_gsjj '+tmp } #item={'orderBy': 'SPH_SORT_EXPR|FLOOR(log2(@weight))+mem_level*6+isimg*10+IDIV(pub_time,2592000)', 'index': 'IDX_pro_info_dist', 'intType': {'isimg': '1', 'status': '1', 'r_tag_id': '101'}, 'weight': {'com_name': 4, 'keyword': 2, 'title': 8}, 'pageSize': 30, 'page':1, #item={'orderBy': 'SPH_SORT_EXPR|FLOOR(log2(@weight))+mem_level*6+isimg*10+IDIV(pub_time,2592000)', 'index': 'IDX_pro_info_dist', 'intType': {'isimg': '1', 'status': '1', 'r_tag_id': '101'}, 'weight': {'com_name': 4, 'keyword': 2, 'title': 8}, 'pageSize': 30, 'keyw': [], 'page': 1, 'querymod': 'SPH_MATCH_EXTENDED', 'groupBy': 'com_id,SPH_GROUPBY_ATTR,isimg desc pub_time desc'} #s = sphinx('192.168.10.127',9501) s = sphinx('183.60.177.157',9501) #for i in [101,101101]: # tmp = item.copy() #tmp['intType']['r_tag_id'] = str(i) s.initQuery(item) rs = s.RunQueries() if rs: res=rs[0] else: print(rs) print('useritme:',time.time()-t) num+=1 #if res['status']==0: # num+=1 status=res['status'] if 'status' in res else -1 _time=res['time'] if 'time' in res else 0 total_found= res['total_found'] if 'total_found' in res else 0 warning = res['warning'] if 'warning' in res else '' #print(res) return (status,_time,total_found,warning)
def sphinx2redis(cnfkey): """通过配置文件,获取统计数据 """ #取业务配置 bizcnf = get_cnf_val(cnfkey,sconf.BIZ) if not bizcnf: return sconf.err_handle.biznum_not_config rdb = rediswrap.get_redis('cache') for i in range(0,len(bizcnf['prama'])): itm = bizcnf['prama'][i] source = itm['source'].split('.') if source[1] == 'sphinx': host_info = get_host_by_data(itm['source']) if not host_info : return sconf.sphinx_index_not_found sp = sphinx(host_info['host'],host_info['port']) expression = itm['expression'] expression['index'] = source[2] total_found = 0 while True: #if total_found >0: # if expression['pageSize'] * expression['page'] >=total_found: # break # expression['page'] +=1 sp.initQuery(itm['expression']) rs = sp.RunQueries() pprint(rs) if rs and rs[0]['status']==0: total_found = rs[0]['total_found'] _items = {} for row in rs[0]['matches']: if itm['key'] in row['attrs'] and itm['value'] in row['attrs']: _items[row['attrs'][itm['key']]]=row['attrs'][itm['value']] if _items: print(_items) #res = rdb.mset(cnfkey,_items) else: print(sp._error) break break
def search(kw): """提交搜索任务获取搜索结果及数据结果 """ index = kw.get('index') host_info = sconf.get_search_info(index) kw['index'] = host_info['index'] sp = sphinx(host_info['info']['host'],host_info['info']['port']) sp.initQuery(kw) rs = sp.RunQueries() result = {} if rs: if rs[0]['status']==0: result['total_found'] = rs[0]['total_found'] result['total'] = rs[0]['total'] result['time'] = rs[0]['time'] ids = [] for row in rs[0]['matches']: if 'id' in row: ids.append(str(row['id'])) if ids : dbinf = {} dbinf['dbname'] = host_info['dbname'] dbinf['table'] = host_info['table'] dbinf['ids'] = ",".join(ids) dbinf['fields'] = kw.get('fields','*') if type(dbinf['fields']) in (tuple,list): if 'id' not in dbinf['fields']: dbinf['fields'].append('id') dbinf['fields'] = ','.join(dbinf['fields']) res,desc = dbhandle.getbyid(dbinf) if res == 0 and desc: rows = {str(row['id']):row for row in desc} result['data'] = [ rows[k] for k in ids if k in rows] else: result['warning'] = rs[0]['warning'] result['error'] = rs[0]['error'] else: return -1,sp._error return 0,result
def get_stat_data(name,info): """通过配置文件,获取统计数据 """ #url提交模式 http = HttpWrap() http.set_header('Content-type','application/json') url = "http://192.168.10.126:1985/api/set" for i in range(0,len(info['history_from'])): itm = info['history_from'][i] source = itm['source'].split('.') if source[1] == 'sphinx': host_info = get_host_by_data(itm['source']) if not host_info : return [-1,"key erro %s not in sysconfig." % row['source']] sp = sphinx(host_info['host'],host_info['port']) expression = itm['expression'] expression['index'] = source[2] total_found = 0 while True: if total_found >0: if expression['pageSize'] * expression['page'] >=total_found: break expression['page'] +=1 sp.initQuery(itm['expression']) rs = sp.RunQueries() if rs and rs[0]['status']==0: total_found = rs[0]['total_found'] _items = {} for row in rs[0]['matches']: _items["%s%s" % (itm['key_prefix'],row['attrs'][itm['key']])]=[row['attrs'][itm['value']],utils.timestamp(0,'d')] if _items: data = json.dumps({'gkey':name,'data':_items}) _rs = http.request(url,"POST",data) rs = http.read(_rs) print(rs) else: print(sp._error) break
def proxy(kw): """ param=[ {'querymod':'SPH_MATCH_EXTENDED2', \ 'pageSize':0, \ 'page':1, \ 'intType':{\ 'r_tag_id':'101', 'status':'1', 'isimg':'1' },\ 'orderBy':'SPH_SORT_EXPR|FLOOR(log2(@weight))+mem_level*6+isimg*10+IDIV(pub_time,2592000)',\ 'groupBy':'com_id,SPH_GROUPBY_ATTR,isimg desc pub_time desc',\ 'weight':{'title':8,'com_name':4,'keyword':2},\ 'index':'IDX_pro_info_dist' } ] """ param = kw.pop("param",[]) #检查主参数 if not param: return err_handle.sphinx_param_not_set #检查索引 if 'index' not in param[0]: return err_handle.sphinx_index_not_found index = "base.sphinx.%s" % param[0]['index'] #检查索引对应的搜索服务器是否存在 host_info = get_host_by_data(index) if not host_info : return err_handle.sphinx_index_not_found #连接搜索引擎 sp = sphinx(host_info['host'],host_info['port']) #加载query for q in param: sp.initQuery(q) res = sp.RunQueries() #返回结果 return [0,res]
def prodlist(kw): """ test: curl -l -H "Content-Type: application/json" -X POST -d '{"biznum":"tags.corplist","tag_id":[101]}' http://192.168.10.126:6000/biz_tags/prodlist """ tag_id = kw.get('tag_id',[]) biznum = kw.get('biznum','') if not biznum: return [-6,'parameter biznum not set.'] bizcnf = get_cnf_val(biznum,sconf.BIZ) #取业务配置 if not bizcnf: return sconf.err_handle.biznum_not_config pageSize = kw.get('pageSize',bizcnf['search']['expression']['pageSize']) page = kw.get('page',bizcnf['search']['expression']['page']) #连接搜索引擎 host_info = get_host_by_data(bizcnf['search']['source']) if not host_info : return sconf.sphinx_index_not_found sp = sphinx(host_info['host'],host_info['port']) expression = bizcnf['search']['expression'] expression['index'] = bizcnf['search']['source'].split('.')[-1] expression['pageSize']=pageSize expression['page']=page st = int(time.time()) for tid in tag_id: tag_len = len(str(tid)) if tag_len == 3: f = "r_tag_id" elif tag_len == 6: f = "c_tag_id" elif tag_len == 9: f = "_tag_id" #exp = expression.copy() expression['intType'][f]=str(tid) sp.initQuery(expression) rs = sp.RunQueries() print("search_time:",time.time()-st) result = [] st = int(time.time()) if rs: #连接数据库 dbinfo = get_host_by_data(bizcnf['data']['source']) dbinfo['dbname'] = bizcnf['data']['source'].split('.')[-1] db = dbclass(dbinfo) db.connect() for row in rs: sql_item = bizcnf['data'].copy() ids = [str(k['id']) for k in row['matches']] print("ids:",ids) sql_item['where'] = sql_item['where'] % ",".join(ids) res,desc = db.query(sql_item) if res ==0 and desc: result.append(desc) else: result.append([]) #print("search_time:",time.time()-st) #pprint(result) #print(sp._error) return [0,result]
def sphinx2redis_ex(cnfkey): """通过配置文件,获取统计数据 """ #取业务配置 bizcnf = get_cnf_val(cnfkey,sconf.BIZ) if not bizcnf: return sconf.err_handle.biznum_not_config rdb = rediswrap.get_redis('cache') dbinfo = get_host_by_data("base.mysql.biz72_product") dbinfo['dbname']="biz72_product" db = mysqlwrap.dbclass(dbinfo) db.connect() sql = "select tag_id from pro_tag where tag_id>100 and tag_id<200" res,desc = db.query(sql) print("rows:",len(desc)) source="base.sphinx.IDX_com_corp_dist" host_info = get_host_by_data(source) sp = sphinx(host_info['host'],host_info['port']) prama = { "querymod":"SPH_MATCH_EXTENDED2", "pageSize":1, "page":1, "intType":{ }, "index":"IDX_com_corp_dist" } itm1 = {} itm2 = {} itm3 = {} x=0 st = int(time.time()) for row in desc: x+=1 if x%100==0: print(x,time.time()-st) tag_len = len(str(row['tag_id'])) if tag_len == 3: f = "r_com" elif tag_len == 6: f = "c_com" elif tag_len == 9: f = "com" prama['intType'][f]=str(row['tag_id']) #prama['intType']["com"]='101105105' sp.initQuery(prama) rs = sp.RunQueries() if rs and rs[0]['status']==0: total_found = rs[0]['total_found'] print("tag_id",row['tag_id'],"total_found",total_found) if total_found >0: rdb.hset('stat2cach.tags_corp',row['tag_id'],total_found) if tag_len == 3: itm1[row['tag_id']]=total_found elif tag_len == 6: itm2[row['tag_id']]=total_found elif tag_len == 9: itm3[row['tag_id']]=total_found else: print(sp._error) print("time:",time.time()-st) print("itm1","len:",len(itm1),"sum:",sum(list(itm1.values()))) print("itm2","len:",len(itm2),"sum:",sum(list(itm2.values()))) print("itm3","len:",len(itm3),"sum:",sum(list(itm3.values())))