Esempio n. 1
0
def subject_weibo2news(item):
    news = dict()
    for field in NULL_FIELDS:
        news[field] = None

    for k, v in item.iteritems():
        if k == 'timestamp':
            news['timestamp'] = v
            news['date'] = ts2date(v)
            news['datetime'] = ts2datetime(v)
        if k == '_id':
            news['_id'] = v
            news['id'] = v
        if k == 'reposts_count':
            news['replies'] = v
        if k == 'comments_count':
            news['same_news_num'] = v
        if k == 'name':
            news['news_author'] = v
            news['user_name'] = v
        if k == 'user':
            news['user_id'] = v
        if k == 'text':
            text = v
            news['title'] = '【' + re.search(r'【(.*?)】', str(text)).group(1) + '】'
            news['content168'] = text.replace(news['title'], '')
        if k == 'weibourl':
            news['showurl'] = v

    return news
Esempio n. 2
0
def subject_weibo2news(item):
    news = dict()
    for field in NULL_FIELDS:
        news[field] = None

    for k, v in item.iteritems():
        if k == 'timestamp':
            news['timestamp'] = v
            news['date'] = ts2date(v)
            news['datetime'] = ts2datetime(v)
        if k == '_id':
            news['_id'] = v
            news['id'] = v
        if k == 'reposts_count':
            news['replies'] = v
        if k == 'comments_count':
            news['same_news_num'] = v
        if k == 'name':
            news['news_author'] = v
            news['user_name'] = v
        if k == 'user':
            news['user_id'] = v
        if k == 'text':
            text = v
            news['title'] = '【' + re.search(r'【(.*?)】',
                                            str(text)).group(1) + '】'
            news['content168'] = text.replace(news['title'], '')
        if k == 'weibourl':
            news['showurl'] = v

    return news
Esempio n. 3
0
def object_weibo2comment(item):
    comment = dict()
    for field in NULL_FIELDS:
        comment[field] = None
    comment['news_id'] = 'weibo'
    for k, v in item.iteritems():
        if k == 'timestamp':
            comment['timestamp'] = v
            comment['date'] = ts2date(v)
            comment['datetime'] = ts2datetime(v)
        if k == '_id':
            comment['_id'] = v
            comment['id'] = v
        if k == 'reposts_count':
            comment[k] = v
        if k == 'comments_count':
            comment[k] = v
        if k == 'attitudes_count':
            comment[k] = v
        if k == 'name':
            comment['user_name'] = v
        if k == 'weibourl':
            comment['comment_source'] = v
        if k == 'text':
            text = v
            comment['content168'] = text

    return comment
Esempio n. 4
0
def object_weibo2comment(item):
    comment = dict()
    for field in NULL_FIELDS:
        comment[field] = None
    comment['news_id'] = 'weibo'
    for k, v in item.iteritems():
        if k == 'timestamp':
            comment['timestamp'] = v
            comment['date'] = ts2date(v)
            comment['datetime'] = ts2datetime(v)
        if k == '_id':
            comment['_id'] = v
            comment['id'] = v
        if k == 'reposts_count':
            comment[k] = v
        if k == 'comments_count':
            comment[k] = v
        if k == 'attitudes_count':
            comment[k] = v
        if k == 'name':
            comment['user_name'] = v
        if k =='weibourl':
            comment['comment_source'] = v
        if k == 'text':
            text = v
            comment['content168'] = text

    return comment
Esempio n. 5
0
def personal_weibo_count_false(uid):
    total_days = 89
    today = datetime.datetime.today()
    now_ts = time.mktime(datetime.datetime(today.year, today.month, today.day, 2, 0).timetuple())
    now_ts = int(now_ts)
    during = 24 * 3600
    time_arr = []
    post_arr = []
    repost_arr = []
    fipost_arr = []

    m = request.args.get('m')

    if request.args.get('interval'):
        total_days =  int(request.args.get('interval')) - 1

    for i in range(total_days-1, -1, -1):
        end_ts = now_ts - i * during
        begin_ts = end_ts - during 

        repost_query_dict = {
            'timestamp': {
                '$gt': begin_ts,
                '$lt': end_ts
            },
            'user': int(uid),
            'retweeted_status': '1'
        }

        post_query_dict = {
            'timestamp': {
                '$gt': begin_ts,
                '$lt': end_ts
            },
            'user': int(uid)
        }

        if m == 'test':
            post_count = xapian_search_weibo_test.search(query=post_query_dict, count_only=True)
            repost_count = xapian_search_weibo_test.search(query=repost_query_dict, count_only=True)
            fipost_count = post_count - repost_count
        else:
            post_count = xapian_search_weibo.search(query=post_query_dict, count_only=True)
            repost_count = xapian_search_weibo.search(query=repost_query_dict, count_only=True)
            fipost_count = post_count - repost_count

        post_arr.append(post_count)
        fipost_arr.append(fipost_count)
        repost_arr.append(repost_count)
        time_arr.append(ts2date(end_ts).isoformat())
    print 'sum count: ', sum(post_arr)

    return json.dumps({'time': time_arr, 'count': post_arr, 'repost': repost_arr, 'fipost': fipost_arr})
Esempio n. 6
0
def personal_weibo_count(uid):
    total_days = 89
    today = datetime.datetime.today()
    now_ts = time.mktime(datetime.datetime(today.year, today.month, today.day, 2, 0).timetuple())
    now_ts = int(now_ts)
    during = 24 * 3600
    time_arr = []
    post_arr = []
    repost_arr = []
    fipost_arr = []

    m = request.args.get('m')

    if request.args.get('interval'):
        total_days =  int(request.args.get('interval')) - 1

    for i in range(total_days-1, -1, -1):
        end_ts = now_ts - i * during
        begin_ts = end_ts - during 

        query_dict = {
            'timestamp': {
                '$gt': begin_ts,
                '$lt': end_ts
            },
            'user': int(uid),
        }

        if m == 'test':
            count, get_results = xapian_search_weibo_test.search(query=query_dict, fields=['retweeted_status'])
        else:
            count, get_results = xapian_search_weibo.search(query=query_dict, fields=['retweeted_status'])
        post_count = 0
        fipost_count = 0
        repost_count = 0
        for r in get_results():
            if r['retweeted_status']:
                repost_count += 1
            else:
                fipost_count += 1
            post_count += 1

        post_arr.append(post_count)
        fipost_arr.append(fipost_count)
        repost_arr.append(repost_count)
        time_arr.append(ts2date(end_ts).isoformat())
    print 'sum count: ', sum(post_arr)

    return json.dumps({'time': time_arr, 'count': post_arr, 'repost': repost_arr, 'fipost': fipost_arr})
Esempio n. 7
0
def transform(item):
    result = dict()
    result['reposts_count'] = item['reposts_count']
    result['user_comment_url'] = item['weibourl']
    result['comment_source'] = item['weibourl']
    result['first_in'] = None
    result['last_modify'] = None
    result['timestamp'] = item['timestamp']
    result['content168'] = item['text']
    result['datetime'] = ts2datetime(item['timestamp'])
    result['news_id'] = 'weibo'
    result['attitudes_count'] = item['attitudes_count']
    result['news_content'] = None
    result['comments_count'] = item['comments_count']
    result['location'] = item['geo']
    result['date'] = ts2date(item['timestamp'])
    result['_id'] = item['_id']
    result['id'] = item['_id']
    result['user_name'] = item['name']
    return result
Esempio n. 8
0
def transform(item):
    result = dict()
    result['reposts_count'] = item['reposts_count']
    result['user_comment_url'] = item['weibourl']
    result['comment_source'] = item['weibourl']
    result['first_in'] = None
    result['last_modify'] = None
    result['timestamp'] = item['timestamp']
    result['content168'] = item['text']
    result['datetime'] = ts2datetime(item['timestamp'])
    result['news_id'] = 'weibo'
    result['attitudes_count'] = item['attitudes_count']
    result['news_content'] = None
    result['comments_count'] = item['comments_count']
    result['location'] = item['geo']
    result['date'] = ts2date(item['timestamp'])
    result['_id'] = item['_id']
    result['id'] = item['_id']
    result['user_name'] = item['name']
    return result