def test23(): ''' 向solr testbiz中放"辅舒良"相关数据 :return: ''' from add_data_to_solr.cy_solr_local.solr_base import SolrHelper, SolrCloud, ZooKeeper from general_utils.solr_utils import SolrQuery alltables = ["biztest_hospital_search", "biztest_main_doctors", "biztest_personal_doctors", "biztest_robot_news", "biztest_problem", "biztest_dialog", "biztest_full_problem", "biztest_drug", "biztest_topics", "biztest_pedia"] # robot news docs = [ {'id': '9000', 'title': '辅舒良_test_news1','title_tag':['辅舒良']}, {'id': '9001', 'title': '辅舒良_test_news2','title_tag':['辅舒良']}, ] solr_news = SolrCloud(ZooKeeper("rd1:2181,rd2:2181"), "biztest_robot_news") solr_news.add(docs) # topics docs = [ {'id': '510', 'title': '辅舒良_test_news1', 'content': '辅舒良'}, {'id': '511', 'title': '辅舒良_test_news2', 'content': '辅舒良'}, ] solr_topics = SolrCloud(ZooKeeper("rd1:2181,rd2:2181"), "biztest_topics") solr_topics.add(docs,fieldUpdates={"title":"set","content":"set"})
def add_clinic(): # medicaldb_clinic sql = 'select id,name,abbr from medicaldb_clinic;' id_prefix = "clinic_" o = get_medical_entity_handler(False).do_one(sql) docs = [] for item in o: id = item[0] name = item[1].lower().replace(" ", "") abbr = item[2].lower().replace(" ", "") name_list = [name, abbr] if abbr else [name] docs.append( { "id": id_prefix + str(id), "name": name_list, "name_string": name_list, "type": "clinic", "timestamp": int(time.time() * 1000) } ) solr = SolrCloud(ZooKeeper("md4:2181"), "simple_medical_entity") add_all(docs, solr)
def add_symptom(): # medicaldb_newsymptoms sql = "select id,name,abbr,alias from medicaldb_newsymptoms;" id_prefix = "symptom_" o = get_medical_entity_handler(False).do_one(sql) docs = [] for item in o: id = item[0] name = item[1].lower().replace(" ", "") abbr = item[2].lower().replace(" ", "") alias = item[3].lower().replace(" ", "") name_list = [name] if abbr: name_list.append(abbr) if alias: alias_list = alias.split('|') name_list.extend(alias_list) name_set = set(name_list) name_set.discard("") name_list = list(name_set) docs.append( { "id": id_prefix + str(id), "name": name_list, "name_string": name_list, "type": "symptom", "timestamp": int(time.time() * 1000) } ) solr = SolrCloud(ZooKeeper("md4:2181"), "simple_medical_entity") add_all(docs, solr)
def add_bdpart(): ''' fields: id : "bodypart_1231231" name : ["眼睛"] type: "bodypart" ''' sql = 'select id,name,abbr from medicaldb_bodypart;' id_prefix = "bodypart_" o = get_medical_entity_handler(False).do_one(sql) docs = [] for item in o: id = item[0] name = item[1].lower().replace(" ", "") abbr = item[2].lower().replace(" ", "") name_list = [name, abbr] if abbr else [name] docs.append( { "id": id_prefix + str(id), "name": name_list, "name_string":name_list, "type": "bodypart", "timestamp": int(time.time() * 1000) } ) solr = SolrCloud(ZooKeeper("md4:2181"), "simple_medical_entity") add_all(docs, solr)
def test25(): ''' 往solr biz 里放一些数据 :return: ''' from add_data_to_solr.cy_solr_local.solr_base import SolrHelper, SolrCloud, ZooKeeper from general_utils.solr_utils import SolrQuery table = "biztest_full_problem" solr = SolrCloud(ZooKeeper("rd1:2181,rd2:2181"), table) def get_ids(): solrQuery = SolrQuery() solrQuery.set("q", "*:*") solrQuery.add("fq", "doctor_id:clinic_web_c383b3a7e6db1f1d") solrQuery.set("fl", ["id"]) solrQuery.set("rows", 200) res = [ item["id"] for item in solr.search(**solrQuery.get_query_dict()) ] return res ids = get_ids() print len(ids) docs = [] for id in ids: docs.append({"id": id, "diseases": ["痔疮", "皮炎", "高血压", "吃的太多"]}) solr.add(docs, fieldUpdates={"diseases": "set"})
def test21(): ''' 尝试连接各种测试solr :return: ''' from add_data_to_solr.cy_solr_local.solr_base import SolrHelper, SolrCloud, ZooKeeper from general_utils.solr_utils import SolrQuery for table in [ "biztest_hospital_search", "biztest_main_doctors", "biztest_personal_doctors", "biztest_robot_news", "biztest_problem", "biztest_dialog", "biztest_full_problem", "biztest_drug", "biztest_topics", "biztest_pedia" ]: print 'tablename', table, '=' * 30 try: solr = SolrCloud(ZooKeeper("rd1:2181,rd2:2181"), table) solr_query = SolrQuery() solr_query.set('q', '*:*') solr_query.set('fl', ['*', 'score']) solr_query.set('rows', 10) for item in solr.search(**solr_query.get_query_dict()): print item.get('id') except Exception, e: print e
def test27(): from add_data_to_solr.cy_solr_local.solr_base import SolrHelper, SolrCloud, ZooKeeper from general_utils.solr_utils import SolrQuery solr = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "main_doctors") for id in ("clinic_web_98f32ad0d4461af8", "clinic_web_63da2e8135fabfb1", "clinic_zhongyike_zhouyuchun", "clinic_web_7ff76f1118d806e6", "4647c810af1ee0850bf2"): sq = SolrQuery() sq.set('q', '*:*') sq.add('fq', 'id:%s' % id) sq.set('rows', 1) sq.set('fl', ['*']) res = [item for item in solr.search(**sq.get_query_dict())][0] if 'name' not in res: continue print '=' * 30 print id, res.get('name2', '') for key in res: if 'score' in key or 'rate' in key or 'star' in key: print key, res[key]
''' 将前一天的活跃用户的推荐的医生话题离线计算后,存入线上solr的user_topn_topics表 ''' import sys import time import json import random from add_data_to_solr.cy_solr_local.solr_base import SolrCloud, ZooKeeper from add_data_to_solr.manager.add_utils import add_all from general_utils.time_utils import timestamp2date from recommend.daily_scripts.utils import get_parti_solr_filename solr = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "user_topn_topics") def test_insert(): ''' id:string date|uid topic_ids: string json.dumps([1,2,3]) :return: ''' date = timestamp2date(time.time() - 86400.0) topic_ids = [1, 2, 3, 5555, 666] docs = [] uids = set() for i in range(10000): uid = random.randint(1, 99999999)
# encoding=utf8 ''' 查看每天往solr里导入的数据是否有成功导入 ''' import time import happybase from general_utils.time_utils import timestamp2datetime from general_utils.solr_utils import SolrQuery from add_data_to_solr.cy_solr_local.solr_base import SolrCloud, ZooKeeper solr_nat_online = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "news_and_topic") # solr_nat_biz = SolrCloud(ZooKeeper("rd1:2181,rd2:2181"), "news_and_topic") solr_utt_online = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "user_topn_topics") solr_utn_online = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "user_topn_news") # chunyu_search相关的表 all_online solr_main_doctors = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "main_doctors") # 这个每30min更新一次 solr_full_problem = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "full_problem") # 每日凌晨3点多更新完 solr_robot_news = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "robot_news") # 每日凌晨3点多更新完 solr_hospital_search = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "hospital_search") # 每日凌晨3点多更新完 solr_pedia = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "pedia") # 每日1点多更新完 solr_topics = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "topics") # 每日1点多更新完 solr_problem_v2 = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "problem_v2") # 每日5点多更新完
task1 将很多(一个月)活动用户的推荐news id存入solr task2 每日任务:将前一天活动用户的数据更新了 ''' import os import sys import time import json import random from add_data_to_solr.cy_solr_local.solr_base import SolrCloud, ZooKeeper from add_data_to_solr.manager.add_utils import add_all from general_utils.time_utils import timestamp2date, ensure_m_timestamp from recommend.daily_scripts.utils import get_parti_solr_filename solr = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "user_topn_news") solr_test = SolrCloud(ZooKeeper("rd1:2181,rd2:2181"), "user_topn_news") def test_insert(): ''' id:string date|uid topic_ids: string json.dumps([1,2,3]) :return: ''' # date = timestamp2date(time.time() - 86400.0) topic_ids = [1, 2, 3, 5555, 666] docs = [] uids = set() for i in range(10000):