def test_insert(): ''' id:string date|uid topic_ids: string json.dumps([1,2,3]) :return: ''' date = timestamp2date(time.time() - 86400.0) topic_ids = [1, 2, 3, 5555, 666] docs = [] uids = set() for i in range(10000): uid = random.randint(1, 99999999) if uid in uids: continue uids.add(uid) key = date + '|' + str(uid) docs.append( { 'id': key, 'topic_ids': json.dumps(topic_ids), 'timestamp': int(time.time() * 1000), } ) add_all(docs, solr)
def add_clinic(): # medicaldb_clinic sql = 'select id,name,abbr from medicaldb_clinic;' id_prefix = "clinic_" o = get_medical_entity_handler(False).do_one(sql) docs = [] for item in o: id = item[0] name = item[1].lower().replace(" ", "") abbr = item[2].lower().replace(" ", "") name_list = [name, abbr] if abbr else [name] docs.append( { "id": id_prefix + str(id), "name": name_list, "name_string": name_list, "type": "clinic", "timestamp": int(time.time() * 1000) } ) solr = SolrCloud(ZooKeeper("md4:2181"), "simple_medical_entity") add_all(docs, solr)
def add_symptom(): # medicaldb_newsymptoms sql = "select id,name,abbr,alias from medicaldb_newsymptoms;" id_prefix = "symptom_" o = get_medical_entity_handler(False).do_one(sql) docs = [] for item in o: id = item[0] name = item[1].lower().replace(" ", "") abbr = item[2].lower().replace(" ", "") alias = item[3].lower().replace(" ", "") name_list = [name] if abbr: name_list.append(abbr) if alias: alias_list = alias.split('|') name_list.extend(alias_list) name_set = set(name_list) name_set.discard("") name_list = list(name_set) docs.append( { "id": id_prefix + str(id), "name": name_list, "name_string": name_list, "type": "symptom", "timestamp": int(time.time() * 1000) } ) solr = SolrCloud(ZooKeeper("md4:2181"), "simple_medical_entity") add_all(docs, solr)
def add_bdpart(): ''' fields: id : "bodypart_1231231" name : ["眼睛"] type: "bodypart" ''' sql = 'select id,name,abbr from medicaldb_bodypart;' id_prefix = "bodypart_" o = get_medical_entity_handler(False).do_one(sql) docs = [] for item in o: id = item[0] name = item[1].lower().replace(" ", "") abbr = item[2].lower().replace(" ", "") name_list = [name, abbr] if abbr else [name] docs.append( { "id": id_prefix + str(id), "name": name_list, "name_string":name_list, "type": "bodypart", "timestamp": int(time.time() * 1000) } ) solr = SolrCloud(ZooKeeper("md4:2181"), "simple_medical_entity") add_all(docs, solr)
def from_file(file_name): ''' 从文件中读取数据;若没有last_event_time字段,则将其设置为当前 :param file_name: :return: ''' data = [] with open(file_name, 'r') as f: for l in f: data.append(json.loads(l.strip('\n'))) docs = [] for item in data: now = time.time() uid = str(item['uid']) news_ids = json.dumps(item['ids']) last_event_time = ensure_m_timestamp(item.get('last_event_time', now)) timestamp = ensure_m_timestamp(now) docs.append({ 'id': uid, 'news_ids': news_ids, 'last_event_time': last_event_time, 'timestamp': timestamp, }) add_all(docs, solr)
def add_a_part_kernel(file_name): print 'file name = %s' % file_name docs = [] with open(file_name, 'r') as f: for l in f: info_dict = json.loads(l.strip('\n')) uid = info_dict['uid'] news_ids = info_dict['ids'] last_event_time = info_dict['last_event_time'] # 秒 last_event_time = int(last_event_time * 1000) # 毫秒 docs.append({ 'id': str(uid), 'news_ids': json.dumps(news_ids), 'timestamp': int(time.time() * 1000), 'last_event_time': last_event_time, }) add_all(docs, solr)
def add_a_part(part): # {"topics": [], "key": "20180101|135495695"} file_name = get_parti_solr_filename(part, 'topic') docs = [] with open(file_name, 'r') as f: for l in f: info_dict = json.loads(l.strip('\n')) key = info_dict['key'] topics = info_dict['topics'] # if not topics: # continue docs.append( { 'id': key, 'topic_ids': json.dumps(topics), 'timestamp': int(time.time() * 1000), } ) add_all(docs, solr)