if count!=0: for r in get_results(): print "** " * 10 print r['_id'] print r['user'] print r['text'] print r['timestamp'] print r['terms'] print 'hits: %s' % count else: print 'no results' ''' query_dict = {'_id': 3617761840458230} count, get_results = s.search( query=query_dict, fields=['text', 'timestamp', 'user', 'terms', '_id']) print count for r in get_results(): print "** " * 10 print r['_id'] print r['user'] print r['text'] print r['timestamp'] print r['terms'] """ print 'query2:' #根据user_id查询 count, get_results = s.search(query={'_id': 2171696122}, fields=['text', 'timestamp', 'user', 'terms', '_id']) print count for r in get_results(): print "** " * 10
def save_weibos(excel_name, topic, child_topic_list, w_limit): # 这里需要根据文本内容查询相关微博id等 data = xlrd.open_workbook(excel_name) weibos_dict = {} for i in child_topic_list: #if i == '0': # continue weibos_dict[i] = [] table_weibos = data.sheet_by_name(str(int(i))) n_row_weibos = table_weibos.nrows if n_row_weibos <= w_limit: n_rows = n_row_weibo else: n_rows = w_limit # 考虑到数据已经根据权重从大到小排列 for j in range(n_rows): line = table_weibos.row_values(j) # 缺少根据文本查询微博文本对应的其他微博内容 weibo_text = line[1] weibo_weight = line[0] try: weibos_dict[i].append( (weibo_text, weibo_weight )) # 实际上这里append的应该是weibo的完整内容,并且是将username等获取到的 except: weibos_dict[i] = [(weibo_text, weibo_weight)] #print 'weibos_dict:', weibos_dict #获取微博具体数据,仅作测试用 s = XapianSearch(path='/home/ubuntu3/huxiaoqian/case/20140724/20140804/', name='master_timeline_weibo', schema_version='5') begin_ts = 1378050300 end_ts = 1378051200 query_dict = { 'timestamp': { '$gt': begin_ts, '$lt': end_ts }, 'message_type': 2 } weibos_dict_new = {} scount, weibo_results = s.search(query=query_dict, fields=fields_list) #print 'scount:', scount i = 0 j = 0 for weibo in weibo_results(): if i == 11: break weibo['text'] = weibos_dict[str(i)][j][0] #获取username,profileimage,weibourl username, profileimage = getuserinfo(weibo['user']) weibo['username'] = username weibo['profile_image_url'] = profileimage weibo['timestamp'] = ts2date(weibo['timestamp']) weibo['weibo_link'] = weiboinfo2url(weibo['user'], weibo['_id']) #获取username, profileimage,weibourl结束 weight = weibos_dict[str(i)][j][1] try: weibos_dict_new[i].append((weibo, weight)) except: weibos_dict_new[i] = [(weibo, weight)] if j == 4: j = 0 i += 1 else: j += 1 #分割线 for i in range(len(child_topic_list)): item = OpinionTestWeibos(topic, i, json.dumps(weibos_dict_new[i])) item_exist = db.session.query(OpinionTestWeibos).filter(OpinionTestWeibos.topic==topic, \ OpinionTestWeibos.child_topic==i).first() if item_exist: db.session.delete(item_exist) db.session.add(item) db.session.commit()
def save_weibos(excel_name, topic, child_topic_list, w_limit): # 这里需要根据文本内容查询相关微博id等 data = xlrd.open_workbook(excel_name) weibos_dict = {} for i in child_topic_list: #if i == '0': # continue weibos_dict[i] = [] table_weibos = data.sheet_by_name(str(int(i))) n_row_weibos = table_weibos.nrows if n_row_weibos <= w_limit: n_rows = n_row_weibo else: n_rows = w_limit # 考虑到数据已经根据权重从大到小排列 for j in range(n_rows): line = table_weibos.row_values(j) # 缺少根据文本查询微博文本对应的其他微博内容 weibo_text = line[1] weibo_weight = line[0] try: weibos_dict[i].append((weibo_text, weibo_weight)) # 实际上这里append的应该是weibo的完整内容,并且是将username等获取到的 except: weibos_dict[i]=[(weibo_text, weibo_weight)] #print 'weibos_dict:', weibos_dict #获取微博具体数据,仅作测试用 s = XapianSearch(path='/home/ubuntu3/huxiaoqian/case/20140724/20140804/', name='master_timeline_weibo',schema_version='5') begin_ts = 1378050300 end_ts = 1378051200 query_dict = { 'timestamp': {'$gt':begin_ts, '$lt': end_ts}, 'message_type' : 2 } weibos_dict_new = {} scount, weibo_results =s.search(query=query_dict, fields=fields_list) #print 'scount:', scount i = 0 j = 0 for weibo in weibo_results(): if i==11: break weibo['text'] = weibos_dict[str(i)][j][0] #获取username,profileimage,weibourl username, profileimage = getuserinfo(weibo['user']) weibo['username'] = username weibo['profile_image_url'] = profileimage weibo['timestamp'] = ts2date(weibo['timestamp']) weibo['weibo_link'] = weiboinfo2url(weibo['user'],weibo['_id']) #获取username, profileimage,weibourl结束 weight = weibos_dict[str(i)][j][1] try: weibos_dict_new[i].append((weibo, weight)) except: weibos_dict_new[i] = [(weibo, weight)] if j==4: j = 0 i += 1 else: j +=1 #分割线 for i in range(len(child_topic_list)): item = OpinionTestWeibos(topic, i, json.dumps(weibos_dict_new[i])) item_exist = db.session.query(OpinionTestWeibos).filter(OpinionTestWeibos.topic==topic, \ OpinionTestWeibos.child_topic==i).first() if item_exist: db.session.delete(item_exist) db.session.add(item) db.session.commit()
print "** " * 10 print r['_id'] print r['user'] print r['text'] print r['timestamp'] print r['terms'] print 'hits: %s' % count else: print 'no results' ''' query_dict={ '_id': 3617761840458230 } count, get_results = s.search(query=query_dict, fields=['text', 'timestamp', 'user', 'terms', '_id']) print count for r in get_results(): print "** " * 10 print r['_id'] print r['user'] print r['text'] print r['timestamp'] print r['terms'] """