def test_insert():
    '''
    id:string date|uid
    topic_ids: string json.dumps([1,2,3])
    :return:
    '''
    date = timestamp2date(time.time() - 86400.0)

    topic_ids = [1, 2, 3, 5555, 666]
    docs = []
    uids = set()
    for i in range(10000):
        uid = random.randint(1, 99999999)
        if uid in uids:
            continue
        uids.add(uid)
        key = date + '|' + str(uid)
        docs.append(
            {
                'id': key,
                'topic_ids': json.dumps(topic_ids),
                'timestamp': int(time.time() * 1000),
            }
        )
    add_all(docs, solr)
def add_clinic():
    # medicaldb_clinic
    sql = 'select id,name,abbr from medicaldb_clinic;'
    id_prefix = "clinic_"
    o = get_medical_entity_handler(False).do_one(sql)

    docs = []

    for item in o:
        id = item[0]
        name = item[1].lower().replace(" ", "")
        abbr = item[2].lower().replace(" ", "")
        name_list = [name, abbr] if abbr else [name]
        docs.append(
            {
                "id": id_prefix + str(id),
                "name": name_list,
                "name_string": name_list,
                "type": "clinic",
                "timestamp": int(time.time() * 1000)
            }
        )

    solr = SolrCloud(ZooKeeper("md4:2181"), "simple_medical_entity")
    add_all(docs, solr)
def add_symptom():
    # medicaldb_newsymptoms
    sql = "select id,name,abbr,alias from medicaldb_newsymptoms;"
    id_prefix = "symptom_"
    o = get_medical_entity_handler(False).do_one(sql)
    docs = []

    for item in o:
        id = item[0]
        name = item[1].lower().replace(" ", "")
        abbr = item[2].lower().replace(" ", "")
        alias = item[3].lower().replace(" ", "")
        name_list = [name]
        if abbr:
            name_list.append(abbr)
        if alias:
            alias_list = alias.split('|')
            name_list.extend(alias_list)

        name_set = set(name_list)
        name_set.discard("")
        name_list = list(name_set)

        docs.append(
            {
                "id": id_prefix + str(id),
                "name": name_list,
                "name_string": name_list,
                "type": "symptom",
                "timestamp": int(time.time() * 1000)
            }
        )
    solr = SolrCloud(ZooKeeper("md4:2181"), "simple_medical_entity")
    add_all(docs, solr)
def add_bdpart():
    '''
    fields:
     id : "bodypart_1231231"
     name : ["眼睛"]
     type: "bodypart"
    '''

    sql = 'select id,name,abbr from medicaldb_bodypart;'
    id_prefix = "bodypart_"
    o = get_medical_entity_handler(False).do_one(sql)

    docs = []

    for item in o:
        id = item[0]
        name = item[1].lower().replace(" ", "")
        abbr = item[2].lower().replace(" ", "")
        name_list = [name, abbr] if abbr else [name]
        docs.append(
            {
                "id": id_prefix + str(id),
                "name": name_list,
                "name_string":name_list,
                "type": "bodypart",
                "timestamp": int(time.time() * 1000)
            }
        )

    solr = SolrCloud(ZooKeeper("md4:2181"), "simple_medical_entity")
    add_all(docs, solr)
def from_file(file_name):
    '''
    从文件中读取数据;若没有last_event_time字段,则将其设置为当前
    :param file_name:
    :return:
    '''
    data = []
    with open(file_name, 'r') as f:
        for l in f:
            data.append(json.loads(l.strip('\n')))

    docs = []
    for item in data:
        now = time.time()
        uid = str(item['uid'])
        news_ids = json.dumps(item['ids'])
        last_event_time = ensure_m_timestamp(item.get('last_event_time', now))
        timestamp = ensure_m_timestamp(now)

        docs.append({
            'id': uid,
            'news_ids': news_ids,
            'last_event_time': last_event_time,
            'timestamp': timestamp,
        })

    add_all(docs, solr)
def add_a_part_kernel(file_name):
    print 'file name = %s' % file_name
    docs = []
    with open(file_name, 'r') as f:
        for l in f:
            info_dict = json.loads(l.strip('\n'))
            uid = info_dict['uid']
            news_ids = info_dict['ids']
            last_event_time = info_dict['last_event_time']  # 秒
            last_event_time = int(last_event_time * 1000)  # 毫秒

            docs.append({
                'id': str(uid),
                'news_ids': json.dumps(news_ids),
                'timestamp': int(time.time() * 1000),
                'last_event_time': last_event_time,
            })
    add_all(docs, solr)
def add_a_part(part):
    # {"topics": [], "key": "20180101|135495695"}
    file_name = get_parti_solr_filename(part, 'topic')
    docs = []
    with open(file_name, 'r') as f:
        for l in f:
            info_dict = json.loads(l.strip('\n'))
            key = info_dict['key']
            topics = info_dict['topics']
            # if not topics:
            #     continue
            docs.append(
                {
                    'id': key,
                    'topic_ids': json.dumps(topics),
                    'timestamp': int(time.time() * 1000),
                }
            )
    add_all(docs, solr)