def test25():
    '''
    往solr biz 里放一些数据
    :return:
    '''
    from add_data_to_solr.cy_solr_local.solr_base import SolrHelper, SolrCloud, ZooKeeper
    from general_utils.solr_utils import SolrQuery
    table = "biztest_full_problem"
    solr = SolrCloud(ZooKeeper("rd1:2181,rd2:2181"), table)

    def get_ids():
        solrQuery = SolrQuery()
        solrQuery.set("q", "*:*")
        solrQuery.add("fq", "doctor_id:clinic_web_c383b3a7e6db1f1d")
        solrQuery.set("fl", ["id"])
        solrQuery.set("rows", 200)
        res = [
            item["id"] for item in solr.search(**solrQuery.get_query_dict())
        ]
        return res

    ids = get_ids()
    print len(ids)
    docs = []
    for id in ids:
        docs.append({"id": id, "diseases": ["痔疮", "皮炎", "高血压", "吃的太多"]})

    solr.add(docs, fieldUpdates={"diseases": "set"})
def test21():
    '''
    尝试连接各种测试solr
    :return:
    '''
    from add_data_to_solr.cy_solr_local.solr_base import SolrHelper, SolrCloud, ZooKeeper
    from general_utils.solr_utils import SolrQuery
    for table in [
            "biztest_hospital_search", "biztest_main_doctors",
            "biztest_personal_doctors", "biztest_robot_news",
            "biztest_problem", "biztest_dialog", "biztest_full_problem",
            "biztest_drug", "biztest_topics", "biztest_pedia"
    ]:

        print 'tablename', table, '=' * 30
        try:
            solr = SolrCloud(ZooKeeper("rd1:2181,rd2:2181"), table)
            solr_query = SolrQuery()
            solr_query.set('q', '*:*')
            solr_query.set('fl', ['*', 'score'])
            solr_query.set('rows', 10)
            for item in solr.search(**solr_query.get_query_dict()):
                print item.get('id')
        except Exception, e:
            print e
def test23():
    '''
    向solr testbiz中放"辅舒良"相关数据
    :return:
    '''

    from add_data_to_solr.cy_solr_local.solr_base import SolrHelper, SolrCloud, ZooKeeper
    from general_utils.solr_utils import SolrQuery
    alltables = ["biztest_hospital_search",
                 "biztest_main_doctors",
                 "biztest_personal_doctors",
                 "biztest_robot_news",
                 "biztest_problem",
                 "biztest_dialog",
                 "biztest_full_problem",
                 "biztest_drug",
                 "biztest_topics",
                 "biztest_pedia"]

    # robot news
    docs = [
        {'id': '9000', 'title': '辅舒良_test_news1','title_tag':['辅舒良']},
        {'id': '9001', 'title': '辅舒良_test_news2','title_tag':['辅舒良']},
    ]
    solr_news = SolrCloud(ZooKeeper("rd1:2181,rd2:2181"), "biztest_robot_news")
    solr_news.add(docs)

    # topics
    docs = [
        {'id': '510', 'title': '辅舒良_test_news1', 'content': '辅舒良'},
        {'id': '511', 'title': '辅舒良_test_news2', 'content': '辅舒良'},
    ]

    solr_topics = SolrCloud(ZooKeeper("rd1:2181,rd2:2181"), "biztest_topics")
    solr_topics.add(docs,fieldUpdates={"title":"set","content":"set"})
def add_clinic():
    # medicaldb_clinic
    sql = 'select id,name,abbr from medicaldb_clinic;'
    id_prefix = "clinic_"
    o = get_medical_entity_handler(False).do_one(sql)

    docs = []

    for item in o:
        id = item[0]
        name = item[1].lower().replace(" ", "")
        abbr = item[2].lower().replace(" ", "")
        name_list = [name, abbr] if abbr else [name]
        docs.append(
            {
                "id": id_prefix + str(id),
                "name": name_list,
                "name_string": name_list,
                "type": "clinic",
                "timestamp": int(time.time() * 1000)
            }
        )

    solr = SolrCloud(ZooKeeper("md4:2181"), "simple_medical_entity")
    add_all(docs, solr)
def add_symptom():
    # medicaldb_newsymptoms
    sql = "select id,name,abbr,alias from medicaldb_newsymptoms;"
    id_prefix = "symptom_"
    o = get_medical_entity_handler(False).do_one(sql)
    docs = []

    for item in o:
        id = item[0]
        name = item[1].lower().replace(" ", "")
        abbr = item[2].lower().replace(" ", "")
        alias = item[3].lower().replace(" ", "")
        name_list = [name]
        if abbr:
            name_list.append(abbr)
        if alias:
            alias_list = alias.split('|')
            name_list.extend(alias_list)

        name_set = set(name_list)
        name_set.discard("")
        name_list = list(name_set)

        docs.append(
            {
                "id": id_prefix + str(id),
                "name": name_list,
                "name_string": name_list,
                "type": "symptom",
                "timestamp": int(time.time() * 1000)
            }
        )
    solr = SolrCloud(ZooKeeper("md4:2181"), "simple_medical_entity")
    add_all(docs, solr)
def add_bdpart():
    '''
    fields:
     id : "bodypart_1231231"
     name : ["眼睛"]
     type: "bodypart"
    '''

    sql = 'select id,name,abbr from medicaldb_bodypart;'
    id_prefix = "bodypart_"
    o = get_medical_entity_handler(False).do_one(sql)

    docs = []

    for item in o:
        id = item[0]
        name = item[1].lower().replace(" ", "")
        abbr = item[2].lower().replace(" ", "")
        name_list = [name, abbr] if abbr else [name]
        docs.append(
            {
                "id": id_prefix + str(id),
                "name": name_list,
                "name_string":name_list,
                "type": "bodypart",
                "timestamp": int(time.time() * 1000)
            }
        )

    solr = SolrCloud(ZooKeeper("md4:2181"), "simple_medical_entity")
    add_all(docs, solr)
def test27():
    from add_data_to_solr.cy_solr_local.solr_base import SolrHelper, SolrCloud, ZooKeeper
    from general_utils.solr_utils import SolrQuery
    solr = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "main_doctors")
    for id in ("clinic_web_98f32ad0d4461af8", "clinic_web_63da2e8135fabfb1",
               "clinic_zhongyike_zhouyuchun", "clinic_web_7ff76f1118d806e6",
               "4647c810af1ee0850bf2"):
        sq = SolrQuery()
        sq.set('q', '*:*')
        sq.add('fq', 'id:%s' % id)
        sq.set('rows', 1)
        sq.set('fl', ['*'])
        res = [item for item in solr.search(**sq.get_query_dict())][0]
        if 'name' not in res:
            continue
        print '=' * 30
        print id, res.get('name2', '')
        for key in res:
            if 'score' in key or 'rate' in key or 'star' in key:
                print key, res[key]
'''
将前一天的活跃用户的推荐的医生话题离线计算后,存入线上solr的user_topn_topics表
'''

import sys
import time
import json
import random

from add_data_to_solr.cy_solr_local.solr_base import SolrCloud, ZooKeeper
from add_data_to_solr.manager.add_utils import add_all

from general_utils.time_utils import timestamp2date
from recommend.daily_scripts.utils import get_parti_solr_filename

solr = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "user_topn_topics")


def test_insert():
    '''
    id:string date|uid
    topic_ids: string json.dumps([1,2,3])
    :return:
    '''
    date = timestamp2date(time.time() - 86400.0)

    topic_ids = [1, 2, 3, 5555, 666]
    docs = []
    uids = set()
    for i in range(10000):
        uid = random.randint(1, 99999999)
Example #9
0
# encoding=utf8


'''
查看每天往solr里导入的数据是否有成功导入
'''

import time
import happybase

from general_utils.time_utils import timestamp2datetime
from general_utils.solr_utils import SolrQuery

from add_data_to_solr.cy_solr_local.solr_base import SolrCloud, ZooKeeper

solr_nat_online = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "news_and_topic")
# solr_nat_biz = SolrCloud(ZooKeeper("rd1:2181,rd2:2181"), "news_and_topic")

solr_utt_online = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "user_topn_topics")

solr_utn_online = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "user_topn_news")

# chunyu_search相关的表 all_online
solr_main_doctors = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "main_doctors")  # 这个每30min更新一次
solr_full_problem = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "full_problem")  # 每日凌晨3点多更新完
solr_robot_news = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "robot_news")  # 每日凌晨3点多更新完
solr_hospital_search = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "hospital_search")  # 每日凌晨3点多更新完
solr_pedia = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "pedia")  # 每日1点多更新完
solr_topics = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "topics")  # 每日1点多更新完
solr_problem_v2 = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "problem_v2")  # 每日5点多更新完
task1 将很多(一个月)活动用户的推荐news id存入solr
task2 每日任务:将前一天活动用户的数据更新了
'''
import os
import sys
import time
import json
import random

from add_data_to_solr.cy_solr_local.solr_base import SolrCloud, ZooKeeper
from add_data_to_solr.manager.add_utils import add_all

from general_utils.time_utils import timestamp2date, ensure_m_timestamp
from recommend.daily_scripts.utils import get_parti_solr_filename

solr = SolrCloud(ZooKeeper("md7:2181,md8:2181,md9:2181"), "user_topn_news")
solr_test = SolrCloud(ZooKeeper("rd1:2181,rd2:2181"), "user_topn_news")


def test_insert():
    '''
    id:string date|uid
    topic_ids: string json.dumps([1,2,3])
    :return:
    '''
    # date = timestamp2date(time.time() - 86400.0)

    topic_ids = [1, 2, 3, 5555, 666]
    docs = []
    uids = set()
    for i in range(10000):