def es_delete(): req_json = request.get_json() print 'post req params : %s' % str(req_json) index_name = req_json.get('index_name') or 'product_name_index_smart' sku = req_json.get("sku") not_required = req_params_required(['index_name', 'sku'], req_json) if not_required: return res_failure('es delete failure, because %s is necessary' % not_required) else: data = delete_one(index_name, 'fulltext', sku) return res_success(data, 'es delete success')
def es_search(): req_json = request.get_json() print 'post req params : %s' % str(req_json) index_name = req_json.get('index_name') or 'product_name_index_smart' keyword = req_json.get('keyword') page_no = int(req_json.get('page_no')) or 0 page_size = int(req_json.get('page_size')) or 10000 sort_by = req_json.get('sort_by') desc = int(req_json.get('desc')) or 1 not_required = req_params_required(['index_name', 'keyword'], req_json) if not_required: return res_failure('es search failure, because %s is necessary' % not_required) else: data = collapse_search(index_name, "fulltext", keyword, page_size, page_no, sort_by, desc) return res_success(data, 'es search success')
def es_update(): req_json = request.get_json() logger.info('Params:' + json.dumps(req_json)) print 'post req params : %s' % str(req_json) index_name = req_json.get('index_name') or 'product_name_index_smart' sku = req_json.get('sku') spu = req_json.get('spu') or "TmpSKU" + sku product_name = req_json.get("product_name") product_no = req_json.get("product_no") ts_score = req_json.get("ts_score") price = req_json.get("price") sales_volume = req_json.get("sales_volume") data_dict = {"sku": sku, "spu": spu, "product_name": product_name, "product_no": product_no, "ts_score": ts_score} not_required = req_params_required(['index_name', 'sku', 'product_name', 'product_no', 'price', 'sales_volume'], req_json) if not_required: return res_failure('es update failure, because %s is necessary' % not_required) else: data = update_one(index_name, 'fulltext', sku, data_dict) return res_success(data, 'es update success')
def es_update_index(): req_json = request.get_json() logger.info('Params:' + json.dumps(req_json)) print 'post req params : %s' % str(req_json) index_name = req_json.get('index_name') or 'product_name_index_smart' not_required = req_params_required(['index_name'], req_json) if not_required: return res_failure('es update index failure, because %s is necessary' % not_required) else: es_domain = 'http://localhost:9200/' es = Elasticsearch(hosts=[es_domain], timeout=5000) db_name = index_name table_name = 'fulltext' create_table(db_name, table_name, [ ['product_name', 'text'], ['product_no', 'keyword'], ['ts_score', 'keyword'], ['sku', 'keyword'], ['spu', 'keyword'], ]) count = batch_insert_es(es, db_name, table_name) return res_success(count, 'es update index success')
def collapse_search(index_name, type_name, keyword, result_max_size, result_from, sort_by, desc): data_dict = { "query": {"match": {"product_name": keyword}}, "size": result_max_size, "highlight": { "pre_tags": ["<tag1>", "<tag2>"], "post_tags": ["</tag1>", "</tag2>"], "fields": { "product_name": {}, "sku": {}, "spu": {}, "product_no": {}, "ts_score": {}, "sales_volume": {}, "price": {} } }, "collapse": { "field": "spu" # 折叠去重操作 ,根据spu ,每一种spu只查询评分最高的一条数据 参考资料:https://elasticsearch.cn/article/132 }, "from": result_from } response = requests.post("%s%s/%s/_search" % (es_domain, index_name, type_name), json=data_dict) spu_dict = set() if response.status_code == 200 and 'error' not in response.json(): res_obj = response.json() if res_obj['hits'] and res_obj['hits']['hits']: for item in res_obj['hits']['hits']: if sort_by not in item['_source'] or item['_source'][sort_by] is None: item['_source'][sort_by] = 0.0 else: try: item['_source'][sort_by] = float(item['_source'][sort_by]) except Exception as e: raise e def _get_cmp_func(_sort_by, _desc): def _cmp(x, y): if x['_source'][_sort_by] < y['_source'][_sort_by]: return 1 if _desc else -1 elif x['_source'][_sort_by] == y['_source'][_sort_by]: return 0 else: return -1 if _desc else 1 return _cmp res_obj['hits']['hits'].sort(_get_cmp_func(_sort_by=sort_by, _desc=int(desc))) ans_data = list() for item in res_obj['hits']['hits']: spu = item['_source']['spu'] if spu not in spu_dict: spu_dict.add(spu) tmp_dict = item['_source'] tmp_dict['_score'] = item['_score'] ans_data.append(tmp_dict) page_no = int(result_from) page_size = int(result_max_size) return json.dumps(res_success(ans_data[page_no * page_size:(page_no + 1) * page_size], "es query success")) return json.dumps(res_failure("es query failure"))