Ejemplo n.º 1
0
if count!=0:
    for r in get_results():
        print "** " * 10
        print r['_id']
        print r['user']
        print r['text']
        print r['timestamp']
        print r['terms']

    print 'hits: %s' % count
else:
    print 'no results'

'''
query_dict = {'_id': 3617761840458230}
count, get_results = s.search(
    query=query_dict, fields=['text', 'timestamp', 'user', 'terms', '_id'])
print count
for r in get_results():
    print "** " * 10
    print r['_id']
    print r['user']
    print r['text']
    print r['timestamp']
    print r['terms']
"""
print 'query2:'
#根据user_id查询
count, get_results = s.search(query={'_id': 2171696122}, fields=['text', 'timestamp', 'user', 'terms', '_id'])
print count
for r in get_results():
    print "** " * 10
Ejemplo n.º 2
0
def save_weibos(excel_name, topic, child_topic_list,
                w_limit):  # 这里需要根据文本内容查询相关微博id等
    data = xlrd.open_workbook(excel_name)
    weibos_dict = {}
    for i in child_topic_list:
        #if i == '0':
        #    continue
        weibos_dict[i] = []
        table_weibos = data.sheet_by_name(str(int(i)))
        n_row_weibos = table_weibos.nrows
        if n_row_weibos <= w_limit:
            n_rows = n_row_weibo
        else:
            n_rows = w_limit  # 考虑到数据已经根据权重从大到小排列
        for j in range(n_rows):
            line = table_weibos.row_values(j)  # 缺少根据文本查询微博文本对应的其他微博内容
            weibo_text = line[1]
            weibo_weight = line[0]
            try:
                weibos_dict[i].append(
                    (weibo_text, weibo_weight
                     ))  # 实际上这里append的应该是weibo的完整内容,并且是将username等获取到的
            except:
                weibos_dict[i] = [(weibo_text, weibo_weight)]
    #print 'weibos_dict:', weibos_dict
    #获取微博具体数据,仅作测试用
    s = XapianSearch(path='/home/ubuntu3/huxiaoqian/case/20140724/20140804/',
                     name='master_timeline_weibo',
                     schema_version='5')
    begin_ts = 1378050300
    end_ts = 1378051200
    query_dict = {
        'timestamp': {
            '$gt': begin_ts,
            '$lt': end_ts
        },
        'message_type': 2
    }
    weibos_dict_new = {}
    scount, weibo_results = s.search(query=query_dict, fields=fields_list)
    #print 'scount:', scount
    i = 0
    j = 0
    for weibo in weibo_results():
        if i == 11:
            break
        weibo['text'] = weibos_dict[str(i)][j][0]
        #获取username,profileimage,weibourl
        username, profileimage = getuserinfo(weibo['user'])
        weibo['username'] = username
        weibo['profile_image_url'] = profileimage
        weibo['timestamp'] = ts2date(weibo['timestamp'])
        weibo['weibo_link'] = weiboinfo2url(weibo['user'], weibo['_id'])
        #获取username, profileimage,weibourl结束
        weight = weibos_dict[str(i)][j][1]
        try:
            weibos_dict_new[i].append((weibo, weight))
        except:
            weibos_dict_new[i] = [(weibo, weight)]
        if j == 4:
            j = 0
            i += 1
        else:
            j += 1

        #分割线
    for i in range(len(child_topic_list)):
        item = OpinionTestWeibos(topic, i, json.dumps(weibos_dict_new[i]))
        item_exist = db.session.query(OpinionTestWeibos).filter(OpinionTestWeibos.topic==topic, \
                                                                OpinionTestWeibos.child_topic==i).first()
        if item_exist:
            db.session.delete(item_exist)
        db.session.add(item)
    db.session.commit()
Ejemplo n.º 3
0
def save_weibos(excel_name, topic, child_topic_list, w_limit): # 这里需要根据文本内容查询相关微博id等
    data = xlrd.open_workbook(excel_name)
    weibos_dict = {}
    for i in child_topic_list:
        #if i == '0':
        #    continue
        weibos_dict[i] = []
        table_weibos = data.sheet_by_name(str(int(i)))
        n_row_weibos = table_weibos.nrows
        if n_row_weibos <= w_limit:
            n_rows = n_row_weibo
        else:
            n_rows = w_limit  # 考虑到数据已经根据权重从大到小排列
        for j in range(n_rows):
            line = table_weibos.row_values(j)  # 缺少根据文本查询微博文本对应的其他微博内容
            weibo_text = line[1]
            weibo_weight = line[0]
            try:
                weibos_dict[i].append((weibo_text, weibo_weight)) # 实际上这里append的应该是weibo的完整内容,并且是将username等获取到的
            except:
                weibos_dict[i]=[(weibo_text, weibo_weight)]
    #print 'weibos_dict:', weibos_dict
    #获取微博具体数据,仅作测试用
    s = XapianSearch(path='/home/ubuntu3/huxiaoqian/case/20140724/20140804/', name='master_timeline_weibo',schema_version='5')
    begin_ts = 1378050300
    end_ts = 1378051200
    query_dict = {
        'timestamp': {'$gt':begin_ts, '$lt': end_ts},
        'message_type' : 2
    }
    weibos_dict_new = {}
    scount, weibo_results =s.search(query=query_dict, fields=fields_list)
    #print 'scount:', scount
    i = 0
    j = 0
    for weibo in weibo_results():
        if i==11:
            break
        weibo['text'] = weibos_dict[str(i)][j][0]
        #获取username,profileimage,weibourl
        username, profileimage = getuserinfo(weibo['user'])
        weibo['username'] = username
        weibo['profile_image_url'] = profileimage
        weibo['timestamp'] = ts2date(weibo['timestamp'])
        weibo['weibo_link'] = weiboinfo2url(weibo['user'],weibo['_id'])
        #获取username, profileimage,weibourl结束       
        weight = weibos_dict[str(i)][j][1]
        try:
            weibos_dict_new[i].append((weibo, weight))
        except:
            weibos_dict_new[i] = [(weibo, weight)]
        if j==4:
            j = 0
            i += 1
        else:
            j +=1
            
        #分割线
    for i in range(len(child_topic_list)):
        item = OpinionTestWeibos(topic, i, json.dumps(weibos_dict_new[i]))
        item_exist = db.session.query(OpinionTestWeibos).filter(OpinionTestWeibos.topic==topic, \
                                                                OpinionTestWeibos.child_topic==i).first()
        if item_exist:
            db.session.delete(item_exist)
        db.session.add(item)
    db.session.commit()
Ejemplo n.º 4
0
        print "** " * 10
        print r['_id']
        print r['user']
        print r['text']
        print r['timestamp']
        print r['terms']

    print 'hits: %s' % count
else:
    print 'no results'

'''
query_dict={
    '_id': 3617761840458230
    }
count, get_results = s.search(query=query_dict, fields=['text', 'timestamp', 'user', 'terms', '_id'])
print count
for r in get_results():
    print "** " * 10
    print r['_id']
    print r['user']
    print r['text']
    print r['timestamp']
    print r['terms'] 





    
"""