def process_old(nday=1): mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news") mysql_tool.querydb("SET NAMES utf8mb4") data_news = edit_info.get_edit_news_info(nday) data_news = edit_info.get_hot_news_info(nday, data_news) data_news = edit_info.get_hot_list_info(nday, data_news) data_news = edit_info.get_news_effective_reading(nday, data_news) data_video = edit_info.get_edit_video_info(nday) data_video = edit_info.get_video_effective_reading(nday, data_video) mysql_tool.closedb() for data in [data_news, data_video]: for k , v in data.items(): tid = time_tool.get_someday_str(-nday) url = edit_info.URL_ELASTICSEARCH_EDIT_INFO + \ "/" + tid + "_" + v["channel"] + "_" + str(k) r = requests.get(url, headers=edit_info.JSON_HEADER, timeout=(30, 120)) if 200 != r.status_code: print r.status_code, r.reason, r.json() v['@timestamp'] = time_tool.get_someday_es_format(-nday) r = requests.post(url, headers=edit_info.JSON_HEADER, data=json.dumps(v), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: print r.status_code, r.reason, r.json() else: r_json = r.json() if "_source" in r_json.keys(): td = r_json["_source"] if 'effective_reading' in td.keys(): # td['effective_reading'] = v['effective_reading'] del td['effective_reading'] print td
def process(nday=1): mysql_tool.connectdb() mysql_tool.querydb("SET NAMES utf8mb4") data = get_task_info(nday) mysql_tool.closedb() dau, _ = active.get_user_device_count(active.get_query(-nday)) for k, v in data.items(): v['@timestamp'] = time_tool.get_someday_es_format(-nday) if k != 0 and 'num' in v.keys() and 'num_for_parent' in v.keys(): v['num_for_no_child'] = v['num'] - v['num_for_parent'] v['n_for_no_child'] = v['n'] - v['n_for_parent'] if dau > 0: v['num_dau'] = float(v['num']) / dau _id = time_tool.get_someday_str(-nday) url = URL_ELASTICSEARCH_TASK_INFO + "/" + _id + "_" + str(k) r = requests.post(url, headers=JSON_HEADER, data=json.dumps(v), timeout=(10, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error( "request task_info index failed, status_code:%d, reason:%s", r.status_code, r.reason)
def get_news_category(nday=1, news_id_arr=[]): day = time_tool.get_someday_str(-nday + 1) list_news = {} sql = "select id, category_id from news where id in (%s)" for i in range(0, len(news_id_arr), 1000): id_arr = [str(nid) for nid in news_id_arr[i:i + 1000]] sql_use = sql % (",".join(id_arr)) rt = mysql_tool.querydb( sql_use, logger, "select id, category_id from news for %d news" % len(id_arr)) for v in rt: news_id = int(v[0]) category_id = int(v[1]) if category_id in list_news.keys(): list_news[category_id].append(news_id) else: list_news[category_id] = [news_id] list_news[define.HOT_NEWS_CATEGORY_ID] = [] sql = "select id from news where hot_at < \"" + day + "\" and id in (%s)" for i in range(0, len(news_id_arr), 1000): id_arr = [str(nid) for nid in news_id_arr[i:i + 1000]] sql_use = sql % (",".join(id_arr)) rt = mysql_tool.querydb( sql_use, logger, "select hot id from news for %d news" % len(id_arr)) for v in rt: list_news[define.HOT_NEWS_CATEGORY_ID].append(int(v[0])) list_news = get_list_news_category(list_news) return list_news
def get_edit_video_info(nday=1): data = {} day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday+1) tommorow = time_tool.get_someday_str(1) # 获取视频名称 video_categories = {} sql = "select id, name from video_categories" rt = mysql_tool.querydb(sql, logger, sql) for v in rt: video_categories[int(v[0])] = v[1] # 获取新增视频 video_new_count = {} sql = "select category_id, count(*) from videos where published_at >= \"%s\" and published_at < \"%s\" group by category_id" % (day1, day2) handle_sql_hash_total(video_new_count, sql, logger, TOTAL_ID) # 获取视频pv sql = "select category_id, sum(real_pv) from videos group by category_id" rt = mysql_tool.querydb(sql, logger, sql) total = 0 for v in rt: k = int(v[0]) total += int(v[1]) data[k] = {} data[k]['channel_name'] = u"视频" data[k]['channel'] = "video" data[k]['category_id'] = k data[k]['pv_total'] = int(v[1]) set_hash(video_categories, data[k], k, 'category_name') set_hash(video_new_count, data[k], k, 'new_published_count') data[TOTAL_ID] = {} data[TOTAL_ID]['channel_name'] = u"视频" data[TOTAL_ID]['channel'] = "video" data[TOTAL_ID]['category_name'] = TOTAL_SHOW data[TOTAL_ID]['category_id'] = TOTAL_ID data[TOTAL_ID]['pv_total'] = total set_hash(video_new_count, data[TOTAL_ID], TOTAL_ID, 'new_published_count') # 获取热点视频 data[HOT_VIDEO_CATEGORY_ID] = {} sql = "select count(*) from videos where hot_at >= \"%s\" and hot_at < \"%s\" and published_at < \"%s\"" % (day1, day2, day2) rt = mysql_tool.querydb(sql, logger, sql) for v in rt: data[HOT_VIDEO_CATEGORY_ID]['new_published_count'] = int(v[0]) # 获取热点视频的pv sql = "select sum(real_pv) from videos where hot_at < \"%s\"" % tommorow rt = mysql_tool.querydb(sql, logger, sql) for v in rt: data[HOT_VIDEO_CATEGORY_ID]['channel_name'] = u"视频" data[HOT_VIDEO_CATEGORY_ID]['channel'] = "video" data[HOT_VIDEO_CATEGORY_ID]['category_id'] = HOT_VIDEO_CATEGORY_ID data[HOT_VIDEO_CATEGORY_ID]['pv_total'] = int(v[0]) data[HOT_VIDEO_CATEGORY_ID]['category_name'] = HOT_SHOW return data
def get_edit_video_info(user_arr=[], user_type="new", ver={}, nday=1): data = {} day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday+1) # 获取视频名称 video_categories = {} sql = "select id, name from video_categories" rt = mysql_tool.querydb(sql, logger, sql) for v in rt: video_categories[int(v[0])] = v[1] video_id_arr = [str(video_id) for video_id in ver.keys()] # 获取普通类目有效阅读 sql = "select id, category_id from videos where id in (%s)" for i in range(0, len(video_id_arr), 1000): id_arr = video_id_arr[i:i+1000] sql_use = sql % (",".join(id_arr)) rt = mysql_tool.querydb( sql_use, logger, "select id, category_id from videos for %d video" % len(id_arr)) for v in rt: video_id = int(v[0]) category_id = int(v[1]) if category_id in data.keys(): data[category_id]["effective_reading"] += ver[video_id] else: set_for_one_category( data, category_id, user_type, u"视频", "video", video_categories) data[category_id]["effective_reading"] = ver[video_id] # 获取热点视频有效阅读 sql = "select id from videos where id in (%s) and hot_at < \"" + \ day2 + "\"" for i in range(0, len(video_id_arr), 1000): id_arr = video_id_arr[i:i+1000] sql_use = sql % (",".join(id_arr)) rt = mysql_tool.querydb( sql_use, logger, "select id from videos for %d video" % len(id_arr)) for v in rt: video_id = int(v[0]) if edit_info.HOT_VIDEO_CATEGORY_ID in data.keys(): data[edit_info.HOT_VIDEO_CATEGORY_ID]['effective_reading'] += ver[video_id] else: data[edit_info.HOT_VIDEO_CATEGORY_ID] = {} data[edit_info.HOT_VIDEO_CATEGORY_ID]['effective_reading'] = ver[video_id] data[edit_info.HOT_VIDEO_CATEGORY_ID]['user_type'] = "new" data[edit_info.HOT_VIDEO_CATEGORY_ID]['channel_name'] = u"视频" data[edit_info.HOT_VIDEO_CATEGORY_ID]['channel'] = "video" data[edit_info.HOT_VIDEO_CATEGORY_ID]['category_id'] = edit_info.HOT_VIDEO_CATEGORY_ID data[edit_info.HOT_VIDEO_CATEGORY_ID]['category_name'] = edit_info.HOT_SHOW return data
def process(nday=1): data_pv_uv = edit_pv_uv.process(nday) mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news") mysql_tool.querydb("SET NAMES utf8mb4") data_news = get_edit_news_info(nday) data_news = get_hot_list_info(nday, data_news) data_news = get_hot_news_info(nday, data_news) data_news = get_news_effective_reading(nday, data_news) data_video = get_edit_video_info(nday) data_video = get_video_effective_reading(nday, data_video) mysql_tool.closedb() for k, v in data_pv_uv.items(): if k in data_news.keys(): for k_, v_ in v.items(): data_news[k][k_] = v_ for data in [data_news, data_video]: for k, v in data.items(): # 获取前一天 yid = time_tool.get_someday_str(-nday-1) url = URL_ELASTICSEARCH_EDIT_INFO + \ "/" + yid + "_" + v["channel"] + "_" + str(k) r = requests.get(url, headers=JSON_HEADER, timeout=(30, 120)) if 200 != r.status_code: logger.error("request edit_info index failed, status_code:%d, reason:%s, k:%s", r.status_code, r.reason, k) else: r_json = r.json() if "_source" in r_json.keys(): yd = r_json["_source"] if "pv_total" in yd.keys(): v["pv"] = v["pv_total"] - yd["pv_total"] set_pv_(v, "pv", "dau_count", "pv_dau") set_pv_(v, "pv", "new_published_count", "pv_published") set_pv_(v, "effective_reading", "pv", "reading_pv") set_pv_(v, "comments_count", "pv", "comments_pv") set_pv_(v, "zan_count", "pv", "zan_pv") set_pv_(v, "like_count", "pv", "like_pv") set_pv_(v, "share_count", "pv", "share_pv") v['@timestamp'] = time_tool.get_someday_es_format(-nday) set_pv_(v, "show_all_pv_uv", "new_published_count", "show_all_pv_uv_new_published") _id = time_tool.get_someday_str(-nday) url = URL_ELASTICSEARCH_EDIT_INFO + \ "/" + _id + "_" + v['channel'] + "_" + str(k) r = requests.post(url, headers=JSON_HEADER, data=json.dumps(v), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error("request edit index failed, status_code:%d, reason:%s, %s, %s", r.status_code, json.dumps(r.json()), url, json.dumps(v))
def get_news_content(news_id=1): sql = "select content from news_details where news_id = %d" % news_id rt = mysql_tool.querydb(sql, logger, sql) for v in rt: return v[0] return ""
def handle_sql_hash_total(mhash={}, sql="", lg=logger, hk=1): rt = mysql_tool.querydb(sql, lg, sql) total = 0 for v in rt: total += int(v[1]) mhash[int(v[0])] = int(v[1]) mhash[hk] = total
def get_video(user_channel={}, nday=1): data = {} data["all_channel"] = {} data["all_channel"]["num_video"] = 0 data["all_channel"]["num_user"] = 0 day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday+1) sql = "select count(user_id), count(distinct user_id) from video_effective_readings where user_id in (%s) and created_at >= \"%s\" and created_at < \"%s\" and effective = 1" for k, user_arr in user_channel.items(): data[k] = {} data[k]["num_video"] = 0 data[k]["num_user"] = 0 for i in range(0, len(user_arr), 1000): user_to_sql = user_arr[i:i+1000] sql_use = sql % (",".join(user_to_sql), day1, day2) rt = mysql_tool.querydb( sql_use, logger, "get video count for %d users" % len(user_to_sql)) if len(rt) > 0 and rt[0][0] is not None: data[k]["num_video"] += int(rt[0][0]) data[k]["num_user"] += int(rt[0][1]) data["all_channel"]["num_video"] += data[k]["num_video"] data["all_channel"]["num_user"] += data[k]["num_user"] return data
def get_edit_news_info(user_arr=[], user_type="new", ner={}, nday=1): data = {} day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday+1) # 获取资讯名称 news_categories = {} sql = "select id, name from news_categories" rt = mysql_tool.querydb(sql, logger, sql) for v in rt: news_categories[int(v[0])] = v[1] news_id_arr = [str(news_id) for news_id in ner.keys()] # 获取普通类目有效阅读 sql = "select id, category_id from news where id in (%s)" for i in range(0, len(news_id_arr), 1000): id_arr = news_id_arr[i:i+1000] sql_use = sql % (",".join(id_arr)) rt = mysql_tool.querydb( sql_use, logger, "select id, category_id from news for %d news" % len(id_arr)) for v in rt: news_id = int(v[0]) category_id = int(v[1]) if category_id in data.keys(): data[category_id]["effective_reading"] += ner[news_id] else: set_for_one_category( data, category_id, user_type, u"资讯", "news", news_categories) data[category_id]["effective_reading"] = ner[news_id] # 获取评论 sql = "select n.category_id, count(*) from news_comments as nc join news as n on (nc.news_id = n.id) where nc.user_id in (%s) and nc.created_at >= \"" + \ day1 + "\" and nc.created_at < \"" + day2 + "\" group by n.category_id" set_common_edit_info(sql, user_arr, data, "select from news_comments", "comments_count", user_type, "news", u"资讯", news_categories) # 获取收藏数 sql = "select n.category_id, count(*) from user_like_news as uln join news as n on(uln.news_id = n.id) where uln.user_id in (%s) and uln.created_at >= \"" + \ day1 + "\" and uln.created_at < \"" + day2 + "\" group by n.category_id" set_common_edit_info(sql, user_arr, data, "select from user_like_news", "like_count", user_type, "news", u"资讯", news_categories) # 获取点赞数 sql = "select n.category_id, count(*) from user_zan_news_comments as uznc join news_comments as nc on (uznc.comment_id = nc.id) join news as n on(nc.news_id = n.id) where uznc.user_id in (%s) and uznc.created_at >= \"" + \ day1 + "\" and uznc.created_at < \"" + day2 + "\" group by n.category_id" set_common_edit_info(sql, user_arr, data, "select from user_zan_news_comments", "zan_count", user_type, "news", u"资讯", news_categories) return data
def process(nday=1): new_user_arr = get_new_user_arr(nday) mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news") mysql_tool.querydb("SET NAMES utf8mb4") ner = get_news_effective_readings(new_user_arr, nday) data_news = get_edit_news_info(new_user_arr, "new", ner, nday) data_news = get_hot_news_info(new_user_arr, ner, data_news, nday) data_news = get_hot_list_info(new_user_arr, ner, data_news, nday) ver = get_video_effective_readings(new_user_arr, nday) data_video = get_edit_video_info(new_user_arr, "new", ver, nday) mysql_tool.closedb() for data in [data_news, data_video]: for k, v in data.items(): v['@timestamp'] = time_tool.get_someday_es_format(-nday) _id = time_tool.get_someday_str(-nday) url = URL_ELASTICSEARCH_EDIT_USER_INFO + \ "/" + _id + "_" + v['user_type'] + \ "_" + v['channel'] + "_" + str(k) r = requests.post(url, headers=JSON_HEADER, data=json.dumps(v), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error("request edit index failed, status_code:%d, reason:%s, %s, %s", r.status_code, json.dumps(r.json()), url, json.dumps(v)) url_edit = edit_info.URL_ELASTICSEARCH_EDIT_INFO + \ "/" + _id + "_" + v['channel'] + "_" + str(k) r = requests.get(url_edit, headers=JSON_HEADER, timeout=(30, 120)) if 200 != r.status_code: logger.error("request edit_user_info failed, status_code:%d, reason:%s", r.status_code, json.dumps(r.json())) else: r_json = r.json() if "_source" in r_json.keys(): info = r_json["_source"] v_ = copy.deepcopy(v) v_["user_type"] = "not_new" set_a_key(v, info, v_, "effective_reading") set_a_key(v, info, v_, "comments_count") set_a_key(v, info, v_, "like_count") set_a_key(v, info, v_, "zan_count") url_ = URL_ELASTICSEARCH_EDIT_USER_INFO + \ "/" + _id + "_not_new_" + v['channel'] + "_" + str(k) r = requests.post(url_, headers=JSON_HEADER, data=json.dumps(v_), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error("request edit index failed, status_code:%d, reason:%s", r.status_code, json.dumps(r.json()))
def get_video_effective_reading(nday=1, data_video={}): # 获取视频有效阅读 day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday+1) ver = {} sql = "select video_id, count(*) from video_effective_readings where created_at >= \"%s\" and created_at < \"%s\" and effective != 0 group by video_id" % (day1, day2) sob = mysql_tool.sql_obj() rt = sob.querydb(sql, logger, sql) total = 0 for v in rt: ver[int(v[0])] = int(v[1]) total += int(v[1]) sob.closedb() data_video[TOTAL_ID]['effective_reading'] = total video_id_arr = [str(video_id) for video_id in ver.keys()] # 获取普通类目有效阅读 sql = "select id, category_id from videos where id in (%s)" for i in range(0, len(video_id_arr), 1000): id_arr = video_id_arr[i:i+1000] sql_use = sql % (",".join(id_arr)) rt = mysql_tool.querydb( sql_use, logger, "select id, category_id from videos for %d videos" % len(id_arr)) for v in rt: video_id = int(v[0]) category_id = int(v[1]) if category_id in data_video.keys(): if 'effective_reading' in data_video[category_id].keys(): data_video[category_id]['effective_reading'] += ver[video_id] else: data_video[category_id]['effective_reading'] = ver[video_id] # 获取热点有效阅读 if HOT_VIDEO_CATEGORY_ID in data_video.keys(): sql = "select id from videos where id in (%s) and hot_at < \"" + \ day2 + "\"" for i in range(0, len(video_id_arr), 1000): id_arr = video_id_arr[i:i+1000] sql_use = sql % (",".join(id_arr)) rt = mysql_tool.querydb( sql_use, logger, "select id from hot videos for %d videos" % len(id_arr)) for v in rt: video_id = int(v[0]) if 'effective_reading' in data_video[HOT_VIDEO_CATEGORY_ID].keys(): data_video[HOT_VIDEO_CATEGORY_ID]['effective_reading'] += ver[video_id] else: data_video[HOT_VIDEO_CATEGORY_ID]['effective_reading'] = ver[video_id] return data_video
def get_task_info(nday=1): data = {} day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday + 1) sql_task_name = "select id, name from user_tasks" rt = mysql_tool.querydb(sql_task_name, logger, sql_task_name) for v in rt: data[v[0]] = {} data[v[0]]['task_id'] = int(v[0]) data[v[0]]['task_name'] = v[1] sql_task_num = "select task_id, sum(today_count), count(distinct user_id) from user_task_day_records where day = \"%s\" and today_count !=0 group by task_id" % day1 rt = mysql_tool.querydb(sql_task_num, logger, sql_task_num) for v in rt: if v[0] in data.keys(): data[v[0]]['num'] = int(v[1]) data[v[0]]['n'] = int(v[2]) sql_task_parent_num = "select task_id, sum(today_count), count(distinct user_id) from user_task_day_records where user_id in (select distinct parent_id from users where bind_parent_at < \"%s\") and day = \"%s\" and today_count !=0 group by task_id" % ( day2, day1) rt = mysql_tool.querydb(sql_task_parent_num, logger, sql_task_parent_num) for v in rt: if v[0] in data.keys(): data[v[0]]['num_for_parent'] = int(v[1]) data[v[0]]['n_for_parent'] = int(v[2]) sql = "select sum(today_count), count(distinct user_id) from user_task_day_records where day = \"%s\" and user_id in (select id from users where registered_at >= \"%s\" and registered_at < \"%s\") and task_id in(1,2,3,22,4,5,29,30,31,33)" % ( day1, day1, day2) rta = mysql_tool.querydb(sql, logger, sql) data[0] = {} data[0]['task_id'] = 0 data[0]['task_name'] = u"今日新增注册用户完成新手任务平均个数" if rta[0][1] > 0: data[0]['num'] = round(float(rta[0][0]) / float(rta[0][1]), 2) sql = "select sum(today_count), count(distinct user_id) from user_task_day_records where day = \"%s\" and user_id in (select id from users where id in (select distinct parent_id from users where bind_parent_at >= \"%s\" and bind_parent_at < \"%s\") and registered_at >= \"%s\" and registered_at < \"%s\") and task_id in(1,2,3,22,4,5,29,30,31,33)" % ( day1, day1, day2, day1, day2) rtp = mysql_tool.querydb(sql, logger, sql) if rtp[0][1] > 0: data[0]['num_for_parent'] = round( float(rtp[0][0]) / float(rtp[0][1]), 2) if rta[0][1] - rtp[0][1] > 0: data[0]['num_for_no_child'] = round( float(rta[0][0] - rtp[0][0]) / float(rta[0][1] - rtp[0][1]), 2) return data
def get_child(nday=1): data = {} day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday + 1) sql = "select us.channel, count(us.channel) from users as u left join user_statistics as us on (u.id = us.user_id) where u.parent_id in (select id from users where registered_at >= \"%s\" and registered_at < \"%s\") and u.bind_parent_at >= \"%s\" and u.bind_parent_at < \"%s\" group by us.channel" % ( day1, day2, day1, day2) rt = mysql_tool.querydb(sql, logger, sql) for v in rt: data[v[0]] = int(v[1]) sql_all = "select count(*) from users where parent_id in (select id from users where registered_at >= \"%s\" and registered_at < \"%s\") and bind_parent_at >= \"%s\" and bind_parent_at < \"%s\"" % ( day1, day2, day1, day2) rt_all = mysql_tool.querydb(sql_all, logger, sql_all) if len(rt_all) > 0: data["all_channel"] = int(rt_all[0][0]) return data
def get_hot_list_info(nday=1, data={}): day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday+1) for table, category_id, category_name in [("news_hot_lists", NEWS_HOT_LISTS, NEWS_HOT_LISTS_SHOW), ("news_hot_seven_day_lists", NEWS_HOT_SEVEN, NEWS_HOT_SEVEN_SHOW), ("news_hot_total_lists", NEWS_HOT_TOTAL, NEWS_HOT_TOTAL_SHOW)]: data[category_id] = {} # 获取新增 sql = "select count(*) from %s where created_at >= \"%s\" and created_at < \"%s\"" % ( table, day1, day2) rt = mysql_tool.querydb(sql, logger, sql) for v in rt: data[category_id]['new_published_count'] = int(v[0]) # 获取评论数 sql = "select count(*) from news_comments as nc join news as n on(nc.news_id = n.id) where n.id in (select news_id from %s) and nc.created_at >= \"%s\" and nc.created_at < \"%s\"" % (table, day1, day2) rt = mysql_tool.querydb(sql, logger, sql) for v in rt: data[category_id]['comments_count'] = int(v[0]) # 获取收藏数 sql = "select count(*) from user_like_news as uln join news as n on(uln.news_id = n.id) where n.id in (select news_id from %s) and uln.created_at >= \"%s\" and uln.created_at < \"%s\"" % (table, day1, day2) rt = mysql_tool.querydb(sql, logger, sql) for v in rt: data[category_id]['like_count'] = int(v[0]) # 获取点赞数 sql = "select count(*) from user_zan_news_comments as uznc join news_comments as nc on (uznc.comment_id = nc.id) join news as n on(nc.news_id = n.id) where n.id in (select news_id from %s) and uznc.created_at >= \"%s\" and uznc.created_at < \"%s\"" % (table, day1, day2) rt = mysql_tool.querydb(sql, logger, sql) for v in rt: data[category_id]['zan_count'] = int(v[0]) # 获取pv. sql = "select sum(real_pv) from news where id in (select news_id from %s)" % table rt = mysql_tool.querydb(sql, logger, sql) for v in rt: data[category_id]['channel_name'] = u"资讯" data[category_id]['channel'] = "news" data[category_id]['category_id'] = category_id data[category_id]['pv_total'] = int(v[0]) data[category_id]['category_name'] = category_name return data
def set_common_hot_info(sql, user_arr=[], data_hot={}, fk="", message=""): for i in range(0, len(user_arr), 1000): user_to_sql = user_arr[i:i+1000] sql_use = sql % (",".join(user_to_sql)) rt = mysql_tool.querydb(sql_use, logger, message) for v in rt: if fk in data_hot.keys(): data_hot[fk] += int(v[0]) else: data_hot[fk] = int(v[0])
def get_hot_list_info(user_arr=[], ner={}, data={}, nday=1): day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday+1) for table, category_id, category_name in [ ("news_hot_lists", edit_info.NEWS_HOT_LISTS, edit_info.NEWS_HOT_LISTS_SHOW), ("news_hot_seven_day_lists", edit_info.NEWS_HOT_SEVEN, edit_info.NEWS_HOT_SEVEN_SHOW), ("news_hot_total_lists", edit_info.NEWS_HOT_TOTAL, edit_info.NEWS_HOT_TOTAL_SHOW)]: data[category_id] = {} data[category_id]['user_type'] = "new" data[category_id]['channel_name'] = u"资讯" data[category_id]['channel'] = "news" data[category_id]['category_id'] = category_id data[category_id]['category_name'] = category_name # 获取有效阅读 news_id_arr = [str(news_id) for news_id in ner.keys()] sql = "select news_id from " + table + " where news_id in (%s)" for i in range(0, len(news_id_arr), 1000): id_arr = news_id_arr[i:i+1000] sql_use = sql % (",".join(id_arr)) rt = mysql_tool.querydb( sql_use, logger, "select news_id from %s for %d news" % (table, len(id_arr))) for v in rt: news_id = int(v[0]) if 'effective_reading' in data[category_id].keys(): data[category_id]['effective_reading'] += ner[news_id] else: data[category_id]['effective_reading'] = ner[news_id] # 获取评论数 sql = "select count(*) from news_comments as nc join news as n on(nc.news_id = n.id) where n.id in (select news_id from " + \ table + ") and nc.created_at >= \"" + day1 + \ "\" and nc.created_at < \"" + day2 + "\" and nc.user_id in (%s)" set_common_hot_info( sql, user_arr, data[category_id], 'comments_count', "select news_comments") # 获取收藏数 sql = "select count(*) from user_like_news as uln join news as n on(uln.news_id = n.id) where n.id in (select news_id from " + \ table + ") and uln.created_at >= \"" + day1 + \ "\" and uln.created_at < \"" + day2 + "\" and uln.user_id in (%s)" set_common_hot_info( sql, user_arr, data[category_id], 'like_count', "select user_like_newsews_comments") # 获取点赞数 sql = "select count(*) from user_zan_news_comments as uznc join news_comments as nc on (uznc.comment_id = nc.id) join news as n on(nc.news_id = n.id) where n.id in (select news_id from " + \ table + ") and uznc.created_at >= \"" + day1 + \ "\" and uznc.created_at < \"" + \ day2 + "\" and uznc.user_id in (%s)" set_common_hot_info( sql, user_arr, data[category_id], 'zan_count', "select user_zan_news_comments") return data
def process(nday=0): mysql_tool.connectdb(host="10.100.100.198", user="******", password="******", database="taozuiredian-news") mysql_tool.querydb("SET NAMES utf8mb4") news = get_news(nday) for k, v in news.items(): v['content'] = get_news_content(k) if v['content'] == "": continue url = config.URL_NEWS_SEARCH + "/" + str(k) r = requests.post(url, headers=config.JSON_HEADER, data=json.dumps(v), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error( "post news_research index error, status_code:%s, reason:%s", r.status_code, r.reason) mysql_tool.closedb()
def get_video(nday=1): data = {} data_user_num = {} day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday + 1) sql = "select us.channel, count(vr.user_id), count(distinct u.id) from users as u left join user_statistics as us on (u.id = us.user_id) right join video_effective_readings as vr on (u.id = vr.user_id) where u.registered_at >= \"%s\" and u.registered_at < \"%s\" and vr.created_at >= \"%s\" and vr.created_at < \"%s\" and vr.effective = 1 group by us.channel" % ( day1, day2, day1, day2) rt = mysql_tool.querydb(sql, logger, sql) for v in rt: data[v[0]] = int(v[1]) data_user_num[v[0]] = int(v[2]) sql_all = "select count(vr.user_id), count(distinct u.id) from users as u right join video_effective_readings as vr on (u.id = vr.user_id) where u.registered_at >= \"%s\" and u.registered_at < \"%s\" and vr.created_at >= \"%s\" and vr.created_at < \"%s\" and vr.effective = 1" % ( day1, day2, day1, day2) rt_all = mysql_tool.querydb(sql_all, logger, sql_all) if len(rt_all) > 0: data["all_channel"] = int(rt_all[0][0]) data_user_num["all_channel"] = int(rt_all[0][1]) logger.debug(data) return data, data_user_num
def get_list_news_category(list_news={}): for table, category_id in [ ("news_hot_lists", define.NEWS_HOT_LISTS), ("news_hot_seven_day_lists", define.NEWS_HOT_SEVEN), ("news_hot_total_lists", define.NEWS_HOT_TOTAL) ]: list_news[category_id] = [] sql = "select news_id from %s" % table rt = mysql_tool.querydb(sql, logger, sql) for v in rt: list_news[category_id].append(int(v[0])) return list_news
def get_task(nday=1): data = {} data_user_num = {} day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday + 1) sql = "select us.channel, sum(ut.today_count), count(distinct u.id) from users as u left join user_statistics as us on (u.id = us.user_id) right join user_task_day_records as ut on (u.id = ut.user_id and u.id = ut.from_user_id) where u.registered_at >= \"%s\" and u.registered_at < \"%s\" and ut.day = \"%s\" group by us.channel" % ( day1, day2, day1) rt = mysql_tool.querydb(sql, logger, sql) for v in rt: data[v[0]] = int(v[1]) data_user_num[v[0]] = int(v[2]) sql_all = "select sum(ut.today_count), count(distinct u.id) from users as u right join user_task_day_records as ut on (u.id = ut.user_id and u.id = ut.from_user_id) where u.registered_at >= \"%s\" and u.registered_at < \"%s\" and ut.day = \"%s\"" % ( day1, day2, day1) rt_all = mysql_tool.querydb(sql_all, logger, sql_all) if len(rt_all) > 0: data["all_channel"] = int(rt_all[0][0]) data_user_num["all_channel"] = int(rt_all[0][1]) logger.debug(data) return data, data_user_num
def process(nday=1): mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news") mysql_tool.querydb("SET NAMES utf8mb4") data_news = edit_info.get_edit_news_info(nday) data_news = edit_info.get_hot_news_info(nday, data_news) # data_news = edit_info.get_hot_list_info(nday, data_news) # data_news = edit_info.get_news_effective_reading(nday, data_news) # data_video = edit_info.get_edit_video_info(nday) # data_video = edit_info.get_video_effective_reading(nday, data_video) mysql_tool.closedb() for data in [data_news]: for k, v in data.items(): _id = time_tool.get_someday_str(-nday) v['@timestamp'] = time_tool.get_someday_es_format(-nday) url = "http://localhost:8200/edit_info/doc" + \ "/" + _id + "_" + v['channel'] + "_" + str(k) r = requests.post(url, headers=edit_info.JSON_HEADER, data=json.dumps(v), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error("request edit index failed, status_code:%d, reason:%s, %s, %s", r.status_code, json.dumps(r.json()), url, json.dumps(v))
def get_hot_list_pv_info(nday=1): data = {} for table, category_id in [ ("news_hot_seven_day_lists", define.NEWS_HOT_SEVEN), ("news_hot_total_lists", define.NEWS_HOT_TOTAL) ]: # 获取pv. sql = "select sum(real_pv) from news where id in (select news_id from %s)" % table rt = mysql_tool.querydb(sql, logger, sql) for v in rt: data[category_id] = int(v[0]) return data
def get_news(nday=1): day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday+1) news = {} sql = "select id, published_at, title from news where published_at >= \"%s\" and published_at < \"%s\"" % ( day1, day2) rt = mysql_tool.querydb(sql, logger, sql) for v in rt: news_id = int(v[0]) news[news_id] = {} news[news_id]['@timestamp'] = str(v[1]).replace(" ", "T") + "+08:00" news[news_id]['news_id'] = news_id news[news_id]['title'] = v[2] return news
def get_user_channel(user_arr=[]): data = {} sql = "select channel, user_id from user_statistics where user_id in (%s)" for i in range(0, len(user_arr), 1000): user_to_sql = user_arr[i:i+1000] sql_use = sql % (",".join(user_to_sql)) rt = mysql_tool.querydb( sql_use, logger, "get user channel for %d users" % len(user_to_sql)) for v in rt: if v[0] in data.keys(): data[v[0]].append(str(v[1])) else: data[v[0]] = [str(v[1])] return data
def get_hot_news_info(user_arr=[], ner={}, data={}, nday=1): day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday+1) data[edit_info.HOT_NEWS_CATEGORY_ID] = {} data[edit_info.HOT_NEWS_CATEGORY_ID]['user_type'] = "new" data[edit_info.HOT_NEWS_CATEGORY_ID]['channel_name'] = "资讯" data[edit_info.HOT_NEWS_CATEGORY_ID]['channel'] = "news" data[edit_info.HOT_NEWS_CATEGORY_ID]['category_id'] = edit_info.HOT_NEWS_CATEGORY_ID data[edit_info.HOT_NEWS_CATEGORY_ID]['category_name'] = edit_info.HOT_SHOW # 获取热点资讯有效阅读 news_id_arr = [str(news_id) for news_id in ner.keys()] sql = "select id from news where id in (%s) and hot_at < \"" + day2 + "\"" for i in range(0, len(news_id_arr), 1000): id_arr = news_id_arr[i:i+1000] sql_use = sql % (",".join(id_arr)) rt = mysql_tool.querydb( sql_use, logger, "select id from news for %d news" % len(id_arr)) for v in rt: news_id = int(v[0]) if 'effective_reading' in data[edit_info.HOT_NEWS_CATEGORY_ID].keys(): data[edit_info.HOT_NEWS_CATEGORY_ID]['effective_reading'] += ner[news_id] else: data[edit_info.HOT_NEWS_CATEGORY_ID]['effective_reading'] = ner[news_id] # 获取热点资讯评论数 sql = "select count(*) from news_comments as nc join news as n on(nc.news_id = n.id) where n.id in (select id from news where hot_at < \"" + \ day2 + "\") and nc.created_at >= \"" + day1 + \ "\" and nc.created_at < \"" + day2 + "\" and nc.user_id in (%s)" set_common_hot_info( sql, user_arr, data[edit_info.HOT_NEWS_CATEGORY_ID], 'comments_count', "select from news_comments") # 获取热点资讯收藏数 sql = "select count(*) from user_like_news as uln join news as n on(uln.news_id = n.id) where n.id in (select id from news where hot_at < \"" + \ day2 + "\") and uln.created_at >= \"" + day1 + \ "\" and uln.created_at < \"" + day2 + "\" and uln.user_id in (%s)" set_common_hot_info( sql, user_arr, data[edit_info.HOT_NEWS_CATEGORY_ID], 'like_count', "select from user_like_news") # 获取热点资讯点赞数 sql = "select count(*) from user_zan_news_comments as uznc join news_comments as nc on (uznc.comment_id = nc.id) join news as n on(nc.news_id = n.id) where n.id in (select id from news where hot_at < \"" + \ day2 + "\") and uznc.created_at >= \"" + day1 + \ "\" and uznc.created_at < \"" + day2 + "\" and uznc.user_id in (%s)" set_common_hot_info(sql, user_arr, data[edit_info.HOT_NEWS_CATEGORY_ID], 'zan_count', "select from user_zan_news_comments") return data
def set_common_edit_info(sql="", user_arr=[], data={}, message="", fkey="", user_type="", channel="", channel_name="", categories={}): for i in range(0, len(user_arr), 1000): user_to_sql = user_arr[i:i+1000] sql_use = sql % (",".join(user_to_sql)) rt = mysql_tool.querydb(sql_use, logger, message) for v in rt: k = int(v[0]) if k in data.keys(): if fkey in data[k].keys(): data[k][fkey] += int(v[1]) else: data[k][fkey] = int(v[1]) else: set_for_one_category(data, k, user_type, channel_name, channel, categories) data[k][fkey] = int(v[1])
def get_parent_info(nday=1): data = {} day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday + 1) # 各vip等级师傅数量 sql_vip_parent_count = "select level, count(id) from users where id in (select distinct parent_id from users where bind_parent_at < \"%s\") group by level" % day2 rt = mysql_tool.querydb(sql_vip_parent_count, logger, sql_vip_parent_count) for v in rt: k = str(v[0]) if k in data.keys(): data[k]['num'] = int(v[1]) else: data[k] = {} data[k]['num'] = int(v[1]) return data
def get_child(user_channel={}, nday=1): data = {} data["all_channel"] = 0 day1 = time_tool.get_someday_str(-nday) day2 = time_tool.get_someday_str(-nday+1) sql = "select count(us.channel) from users as u left join user_statistics as us on (u.id = us.user_id) where u.parent_id in (%s) and u.bind_parent_at >= \"%s\" and u.bind_parent_at < \"%s\"" for k, user_arr in user_channel.items(): data[k] = 0 for i in range(0, len(user_arr), 1000): user_to_sql = user_arr[i:i+1000] sql_use = sql % (",".join(user_to_sql), day1, day2) rt = mysql_tool.querydb( sql_use, logger, "get child count for %d users" % len(user_to_sql)) if len(rt) > 0 and rt[0][0] is not None: data[k] += int(rt[0][0]) data["all_channel"] += data[k] return data
def get_active_child(nday=1): data = {} all_child = [] user_arr = active_user_info.get_user_arr(nday) sql = "select channel, user_id from user_statistics where user_id in (select id from users where id in (%s) and parent_id != 0)" for i in range(0, len(user_arr), 1000): user_to_sql = user_arr[i:i + 1000] sql_use = sql % (",".join(user_to_sql)) rt = mysql_tool.querydb(sql_use) for v in rt: all_child.append(v[1]) if v[0] in data.keys(): data[v[0]].append(v[1]) else: data[v[0]] = [] data[v[0]].append(v[1]) active_child = {} for k, v in data.items(): active_child[k] = len(list(set(data[k]))) active_child["all_channel"] = len(list(set(all_child))) return active_child