def subject_weibo2news(item): news = dict() for field in NULL_FIELDS: news[field] = None for k, v in item.iteritems(): if k == 'timestamp': news['timestamp'] = v news['date'] = ts2date(v) news['datetime'] = ts2datetime(v) if k == '_id': news['_id'] = v news['id'] = v if k == 'reposts_count': news['replies'] = v if k == 'comments_count': news['same_news_num'] = v if k == 'name': news['news_author'] = v news['user_name'] = v if k == 'user': news['user_id'] = v if k == 'text': text = v news['title'] = '【' + re.search(r'【(.*?)】', str(text)).group(1) + '】' news['content168'] = text.replace(news['title'], '') if k == 'weibourl': news['showurl'] = v return news
def object_weibo2comment(item): comment = dict() for field in NULL_FIELDS: comment[field] = None comment['news_id'] = 'weibo' for k, v in item.iteritems(): if k == 'timestamp': comment['timestamp'] = v comment['date'] = ts2date(v) comment['datetime'] = ts2datetime(v) if k == '_id': comment['_id'] = v comment['id'] = v if k == 'reposts_count': comment[k] = v if k == 'comments_count': comment[k] = v if k == 'attitudes_count': comment[k] = v if k == 'name': comment['user_name'] = v if k == 'weibourl': comment['comment_source'] = v if k == 'text': text = v comment['content168'] = text return comment
def object_weibo2comment(item): comment = dict() for field in NULL_FIELDS: comment[field] = None comment['news_id'] = 'weibo' for k, v in item.iteritems(): if k == 'timestamp': comment['timestamp'] = v comment['date'] = ts2date(v) comment['datetime'] = ts2datetime(v) if k == '_id': comment['_id'] = v comment['id'] = v if k == 'reposts_count': comment[k] = v if k == 'comments_count': comment[k] = v if k == 'attitudes_count': comment[k] = v if k == 'name': comment['user_name'] = v if k =='weibourl': comment['comment_source'] = v if k == 'text': text = v comment['content168'] = text return comment
def personal_weibo_count_false(uid): total_days = 89 today = datetime.datetime.today() now_ts = time.mktime(datetime.datetime(today.year, today.month, today.day, 2, 0).timetuple()) now_ts = int(now_ts) during = 24 * 3600 time_arr = [] post_arr = [] repost_arr = [] fipost_arr = [] m = request.args.get('m') if request.args.get('interval'): total_days = int(request.args.get('interval')) - 1 for i in range(total_days-1, -1, -1): end_ts = now_ts - i * during begin_ts = end_ts - during repost_query_dict = { 'timestamp': { '$gt': begin_ts, '$lt': end_ts }, 'user': int(uid), 'retweeted_status': '1' } post_query_dict = { 'timestamp': { '$gt': begin_ts, '$lt': end_ts }, 'user': int(uid) } if m == 'test': post_count = xapian_search_weibo_test.search(query=post_query_dict, count_only=True) repost_count = xapian_search_weibo_test.search(query=repost_query_dict, count_only=True) fipost_count = post_count - repost_count else: post_count = xapian_search_weibo.search(query=post_query_dict, count_only=True) repost_count = xapian_search_weibo.search(query=repost_query_dict, count_only=True) fipost_count = post_count - repost_count post_arr.append(post_count) fipost_arr.append(fipost_count) repost_arr.append(repost_count) time_arr.append(ts2date(end_ts).isoformat()) print 'sum count: ', sum(post_arr) return json.dumps({'time': time_arr, 'count': post_arr, 'repost': repost_arr, 'fipost': fipost_arr})
def personal_weibo_count(uid): total_days = 89 today = datetime.datetime.today() now_ts = time.mktime(datetime.datetime(today.year, today.month, today.day, 2, 0).timetuple()) now_ts = int(now_ts) during = 24 * 3600 time_arr = [] post_arr = [] repost_arr = [] fipost_arr = [] m = request.args.get('m') if request.args.get('interval'): total_days = int(request.args.get('interval')) - 1 for i in range(total_days-1, -1, -1): end_ts = now_ts - i * during begin_ts = end_ts - during query_dict = { 'timestamp': { '$gt': begin_ts, '$lt': end_ts }, 'user': int(uid), } if m == 'test': count, get_results = xapian_search_weibo_test.search(query=query_dict, fields=['retweeted_status']) else: count, get_results = xapian_search_weibo.search(query=query_dict, fields=['retweeted_status']) post_count = 0 fipost_count = 0 repost_count = 0 for r in get_results(): if r['retweeted_status']: repost_count += 1 else: fipost_count += 1 post_count += 1 post_arr.append(post_count) fipost_arr.append(fipost_count) repost_arr.append(repost_count) time_arr.append(ts2date(end_ts).isoformat()) print 'sum count: ', sum(post_arr) return json.dumps({'time': time_arr, 'count': post_arr, 'repost': repost_arr, 'fipost': fipost_arr})
def transform(item): result = dict() result['reposts_count'] = item['reposts_count'] result['user_comment_url'] = item['weibourl'] result['comment_source'] = item['weibourl'] result['first_in'] = None result['last_modify'] = None result['timestamp'] = item['timestamp'] result['content168'] = item['text'] result['datetime'] = ts2datetime(item['timestamp']) result['news_id'] = 'weibo' result['attitudes_count'] = item['attitudes_count'] result['news_content'] = None result['comments_count'] = item['comments_count'] result['location'] = item['geo'] result['date'] = ts2date(item['timestamp']) result['_id'] = item['_id'] result['id'] = item['_id'] result['user_name'] = item['name'] return result