def process(nday=1): mysql_tool.connectdb() mysql_tool.querydb("SET NAMES utf8mb4") data = get_task_info(nday) mysql_tool.closedb() dau, _ = active.get_user_device_count(active.get_query(-nday)) for k, v in data.items(): v['@timestamp'] = time_tool.get_someday_es_format(-nday) if k != 0 and 'num' in v.keys() and 'num_for_parent' in v.keys(): v['num_for_no_child'] = v['num'] - v['num_for_parent'] v['n_for_no_child'] = v['n'] - v['n_for_parent'] if dau > 0: v['num_dau'] = float(v['num']) / dau _id = time_tool.get_someday_str(-nday) url = URL_ELASTICSEARCH_TASK_INFO + "/" + _id + "_" + str(k) r = requests.post(url, headers=JSON_HEADER, data=json.dumps(v), timeout=(10, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error( "request task_info index failed, status_code:%d, reason:%s", r.status_code, r.reason)
def process_old(nday=1): mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news") mysql_tool.querydb("SET NAMES utf8mb4") data_news = edit_info.get_edit_news_info(nday) data_news = edit_info.get_hot_news_info(nday, data_news) data_news = edit_info.get_hot_list_info(nday, data_news) data_news = edit_info.get_news_effective_reading(nday, data_news) data_video = edit_info.get_edit_video_info(nday) data_video = edit_info.get_video_effective_reading(nday, data_video) mysql_tool.closedb() for data in [data_news, data_video]: for k , v in data.items(): tid = time_tool.get_someday_str(-nday) url = edit_info.URL_ELASTICSEARCH_EDIT_INFO + \ "/" + tid + "_" + v["channel"] + "_" + str(k) r = requests.get(url, headers=edit_info.JSON_HEADER, timeout=(30, 120)) if 200 != r.status_code: print r.status_code, r.reason, r.json() v['@timestamp'] = time_tool.get_someday_es_format(-nday) r = requests.post(url, headers=edit_info.JSON_HEADER, data=json.dumps(v), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: print r.status_code, r.reason, r.json() else: r_json = r.json() if "_source" in r_json.keys(): td = r_json["_source"] if 'effective_reading' in td.keys(): # td['effective_reading'] = v['effective_reading'] del td['effective_reading'] print td
def process(nday=1): mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news") data = {} news_id_arr = get_news(nday=nday) news_category = get_news_category(nday=nday, news_id_arr=news_id_arr) for func, k in [(get_puv4news_show_all, "show_all"), (get_puv4news_page_click, "page_click")]: for category_id, news_arr in news_category.items(): if category_id not in data.keys(): data[category_id] = {} pv, uv = func(nday=nday, news_id_arr=news_arr) data[category_id][k + "_pv"] = pv data[category_id][k + "_uv"] = uv if uv > 0 and k == "show_all": data[category_id][k + "_pv_uv"] = round(float(pv) / uv, 2) pv, uv = func(nday=nday) if define.TOTAL_ID not in data.keys(): data[define.TOTAL_ID] = {} data[define.TOTAL_ID][k + "_pv"] = pv data[define.TOTAL_ID][k + "_uv"] = uv if uv > 0 and k == "show_all": data[define.TOTAL_ID][k + "_pv_uv"] = round(float(pv) / uv, 2) mysql_tool.closedb() return data
def process(nday=1): mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news") list_pv_total = get_hot_list_pv_info(nday) mysql_tool.closedb() for k, v in list_pv_total.items(): # 总榜和蹿红榜的所有资讯会在凌晨一点更新,这样在凌晨计算的这两个榜的pv_total就 # 失效了, 需要重新计算一次 _id = time_tool.get_someday_str(-nday) url = config.URL_EDIT_INFO + "/" + _id + "_news_" + str(k) r = requests.get(url, headers=config.JSON_HEADER, timeout=(30, 120)) if 200 != r.status_code: logger.error( "request edit_info index failed, status_code:%d, reason:%s, k:%s", r.status_code, r.reason, k) else: r_json = r.json() if "_source" in r_json.keys(): yd = r_json["_source"] yd['pv_total'] = v yd['@timestamp'] = time_tool.get_someday_es_format(-nday) url = config.URL_EDIT_INFO + "/" + _id + "_news_" + str(k) r = requests.post(url, headers=config.JSON_HEADER, data=json.dumps(yd), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error( "request edit index failed, status_code:%d, reason:%s, %s, %s", r.status_code, json.dumps(r.json()), url, json.dumps(v))
def process(nday=1): data_pv_uv = edit_pv_uv.process(nday) mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news") mysql_tool.querydb("SET NAMES utf8mb4") data_news = get_edit_news_info(nday) data_news = get_hot_list_info(nday, data_news) data_news = get_hot_news_info(nday, data_news) data_news = get_news_effective_reading(nday, data_news) data_video = get_edit_video_info(nday) data_video = get_video_effective_reading(nday, data_video) mysql_tool.closedb() for k, v in data_pv_uv.items(): if k in data_news.keys(): for k_, v_ in v.items(): data_news[k][k_] = v_ for data in [data_news, data_video]: for k, v in data.items(): # 获取前一天 yid = time_tool.get_someday_str(-nday-1) url = URL_ELASTICSEARCH_EDIT_INFO + \ "/" + yid + "_" + v["channel"] + "_" + str(k) r = requests.get(url, headers=JSON_HEADER, timeout=(30, 120)) if 200 != r.status_code: logger.error("request edit_info index failed, status_code:%d, reason:%s, k:%s", r.status_code, r.reason, k) else: r_json = r.json() if "_source" in r_json.keys(): yd = r_json["_source"] if "pv_total" in yd.keys(): v["pv"] = v["pv_total"] - yd["pv_total"] set_pv_(v, "pv", "dau_count", "pv_dau") set_pv_(v, "pv", "new_published_count", "pv_published") set_pv_(v, "effective_reading", "pv", "reading_pv") set_pv_(v, "comments_count", "pv", "comments_pv") set_pv_(v, "zan_count", "pv", "zan_pv") set_pv_(v, "like_count", "pv", "like_pv") set_pv_(v, "share_count", "pv", "share_pv") v['@timestamp'] = time_tool.get_someday_es_format(-nday) set_pv_(v, "show_all_pv_uv", "new_published_count", "show_all_pv_uv_new_published") _id = time_tool.get_someday_str(-nday) url = URL_ELASTICSEARCH_EDIT_INFO + \ "/" + _id + "_" + v['channel'] + "_" + str(k) r = requests.post(url, headers=JSON_HEADER, data=json.dumps(v), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error("request edit index failed, status_code:%d, reason:%s, %s, %s", r.status_code, json.dumps(r.json()), url, json.dumps(v))
def process(nday=1): mysql_tool.connectdb() data = get_cash_user(nday) mysql_tool.closedb() data['@timestamp'] = time_tool.get_someday_es_format(-nday) _id = time_tool.get_someday_str(-nday) url = URL_ELASTICSEARCH_CASH_INFO + "/" + _id r = requests.post(url, headers=JSON_HEADER, data=json.dumps(data), timeout=(10, 20)) if 200 != r.status_code and 201 != r.status_code: logger.error( "request active_user_info index failed, status_code:%d, reason:%s", r.status_code, r.reason)
def process(nday=1): mysql_tool.connectdb() data = get_parent_info(nday) mysql_tool.closedb() for k, v in data.items(): # 获取前一天各vip等级师傅数量, 各vip新增师傅数量 = 今日各vip师傅数量 - 昨天各vip师傅数量 yid = time_tool.get_someday_str(-nday - 1) url = URL_ELASTICSEARCH_PARENT_VIP_INFO + "/" + yid + "_" + k r = requests.get(url, headers=JSON_HEADER, timeout=(30, 120)) if 200 != r.status_code: logger.error( "request parent_info index failed, status_code:%d, reason:%s, k:%s", r.status_code, r.reason, k) else: r_json = r.json() if "_source" in r_json.keys(): yd = r_json["_source"] if "num" in yd.keys(): v["num_new"] = v["num"] - yd["num"] v['@timestamp'] = time_tool.get_someday_es_format(-nday) v["vip"] = k _id = time_tool.get_someday_str(-nday) url = URL_ELASTICSEARCH_PARENT_VIP_INFO + "/" + _id + "_" + k r = requests.post(url, headers=JSON_HEADER, data=json.dumps(v), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error( "request parent_info index failed, status_code:%d, reason:%s", r.status_code, r.reason) data_num_user = {} data_num_user['@timestamp'] = time_tool.get_someday_es_format(-nday) user_per_channel = nui.get_new_user(nday=nday) data_num_user["num_user_new"] = user_per_channel["all_channel"] data_num_user["vip"] = "" url = URL_ELASTICSEARCH_PARENT_VIP_INFO + "/" + time_tool.get_someday_str( -nday) r = requests.post(url, headers=JSON_HEADER, data=json.dumps(data_num_user), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error( "request parent_info index failed, status_code:%d, reason:%s", r.status_code, r.reason)
def process(nday=1): new_user_arr = get_new_user_arr(nday) mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news") mysql_tool.querydb("SET NAMES utf8mb4") ner = get_news_effective_readings(new_user_arr, nday) data_news = get_edit_news_info(new_user_arr, "new", ner, nday) data_news = get_hot_news_info(new_user_arr, ner, data_news, nday) data_news = get_hot_list_info(new_user_arr, ner, data_news, nday) ver = get_video_effective_readings(new_user_arr, nday) data_video = get_edit_video_info(new_user_arr, "new", ver, nday) mysql_tool.closedb() for data in [data_news, data_video]: for k, v in data.items(): v['@timestamp'] = time_tool.get_someday_es_format(-nday) _id = time_tool.get_someday_str(-nday) url = URL_ELASTICSEARCH_EDIT_USER_INFO + \ "/" + _id + "_" + v['user_type'] + \ "_" + v['channel'] + "_" + str(k) r = requests.post(url, headers=JSON_HEADER, data=json.dumps(v), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error("request edit index failed, status_code:%d, reason:%s, %s, %s", r.status_code, json.dumps(r.json()), url, json.dumps(v)) url_edit = edit_info.URL_ELASTICSEARCH_EDIT_INFO + \ "/" + _id + "_" + v['channel'] + "_" + str(k) r = requests.get(url_edit, headers=JSON_HEADER, timeout=(30, 120)) if 200 != r.status_code: logger.error("request edit_user_info failed, status_code:%d, reason:%s", r.status_code, json.dumps(r.json())) else: r_json = r.json() if "_source" in r_json.keys(): info = r_json["_source"] v_ = copy.deepcopy(v) v_["user_type"] = "not_new" set_a_key(v, info, v_, "effective_reading") set_a_key(v, info, v_, "comments_count") set_a_key(v, info, v_, "like_count") set_a_key(v, info, v_, "zan_count") url_ = URL_ELASTICSEARCH_EDIT_USER_INFO + \ "/" + _id + "_not_new_" + v['channel'] + "_" + str(k) r = requests.post(url_, headers=JSON_HEADER, data=json.dumps(v_), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error("request edit index failed, status_code:%d, reason:%s", r.status_code, json.dumps(r.json()))
def process(nday=1): mysql_tool.connectdb() data = get_parent_info(nday) mysql_tool.closedb() for k, v in data.items(): # 获取前一天师傅的总收入和师傅的进贡收入来计算当日的收入 yid = time_tool.get_someday_str(-nday - 1) url = URL_ELASTICSEARCH_PARENT_INFO + "/" + yid + "_" + k r = requests.get(url, headers=JSON_HEADER, timeout=(10, 20)) if 200 != r.status_code: logger.error( "request parent_info index failed, status_code:%d, reason:%s, k:%s", r.status_code, r.reason, k) else: r_json = r.json() if "_source" in r_json.keys(): yd = r_json["_source"] # if "total_income" in v.keys(): # ydti = yd["total_income"] if "total_income" in yd.keys() else 0 # v["today_income"] = v["total_income"] - ydti if "total_rebate" in v.keys(): ydtr = yd["total_rebate"] if "total_rebate" in yd.keys( ) else 0 v["today_rebate"] = v["total_rebate"] - ydtr if "num_active_child" in v.keys( ) and v["num_active_child"] > 0: # 平均进贡值 = 当日进贡收入 / 活跃徒弟数 详见《后台数据维度(3).xlsx》 v["average_rebate_per_child"] = round( v["today_rebate"] / float(v["num_active_child"]), 2) v['@timestamp'] = time_tool.get_someday_es_format(-nday) v["channel"] = k _id = time_tool.get_someday_str(-nday) url = URL_ELASTICSEARCH_PARENT_INFO + "/" + _id + "_" + k r = requests.post(url, headers=JSON_HEADER, data=json.dumps(v), timeout=(10, 20)) if 200 != r.status_code and 201 != r.status_code: logger.error( "request parent_info index failed, status_code:%d, reason:%s", r.status_code, r.reason)
def process(nday=0): mysql_tool.connectdb(host="10.100.100.198", user="******", password="******", database="taozuiredian-news") mysql_tool.querydb("SET NAMES utf8mb4") news = get_news(nday) for k, v in news.items(): v['content'] = get_news_content(k) if v['content'] == "": continue url = config.URL_NEWS_SEARCH + "/" + str(k) r = requests.post(url, headers=config.JSON_HEADER, data=json.dumps(v), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error( "post news_research index error, status_code:%s, reason:%s", r.status_code, r.reason) mysql_tool.closedb()
def process(nday=1): mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news") mysql_tool.querydb("SET NAMES utf8mb4") data_news = edit_info.get_edit_news_info(nday) data_news = edit_info.get_hot_news_info(nday, data_news) # data_news = edit_info.get_hot_list_info(nday, data_news) # data_news = edit_info.get_news_effective_reading(nday, data_news) # data_video = edit_info.get_edit_video_info(nday) # data_video = edit_info.get_video_effective_reading(nday, data_video) mysql_tool.closedb() for data in [data_news]: for k, v in data.items(): _id = time_tool.get_someday_str(-nday) v['@timestamp'] = time_tool.get_someday_es_format(-nday) url = "http://localhost:8200/edit_info/doc" + \ "/" + _id + "_" + v['channel'] + "_" + str(k) r = requests.post(url, headers=edit_info.JSON_HEADER, data=json.dumps(v), timeout=(30, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error("request edit index failed, status_code:%d, reason:%s, %s, %s", r.status_code, json.dumps(r.json()), url, json.dumps(v))
def process(nday=1): mysql_tool.connectdb() device_arr = get_device_arr(nday) device_per_channel = get_device_channel(device_arr) user_arr = get_user_arr(nday) # user_per_channel = get_user_channel_(user_arr) Got user channel info is not same as get_user_channel. # get_user_channel which get info from db, should use db info firstly user_channel = get_user_channel(user_arr) user_channel_stay = copy.deepcopy(user_channel) user_channel_stay["all_channel"] = user_arr app_stay, app_stay_first = get_app_stay( get_query_app_stay(), user_channel_stay, nday) task_per_channel = get_task(user_channel, nday=nday) reading_per_channel = get_reading(user_channel, nday=nday) video_per_channel = get_video(user_channel, nday=nday) child_per_channel = get_child(user_channel, nday=nday) mysql_tool.closedb() for k, v in user_channel_stay.items(): len_v = len(v) data = {} data["channel"] = k data["@timestamp"] = time_tool.get_someday_es_format(-nday) data["num_user"] = len_v if k in device_per_channel.keys(): data["num_device"] = device_per_channel[k] if k in task_per_channel.keys() and task_per_channel[k]["num_user"] > 0: data["num_task_average"] = round( task_per_channel[k]["num_task"] / float(task_per_channel[k]["num_user"]), 2) if k in reading_per_channel.keys(): data["num_read"] = reading_per_channel[k]["num_read"] data["num_read_user"] = reading_per_channel[k]["num_user"] if reading_per_channel[k]["num_user"] > 0: data["num_read_average"] = round( data["num_read"] / float(data["num_read_user"]), 2) if k in video_per_channel.keys(): data["num_video"] = video_per_channel[k]["num_video"] data["num_video_user"] = video_per_channel[k]["num_user"] if data["num_video_user"] > 0: data["num_video_average"] = round( data["num_video"] / float(data["num_video_user"]), 2) data["num_video_average_f"] = data["num_video_average"] if k in child_per_channel.keys(): data["num_child"] = child_per_channel[k] if k in app_stay.keys(): data["app_stay"] = int(app_stay[k]) if data["app_stay"] > 60: data["app_stay_show"] = str( data["app_stay"] / 60) + u"分" + str(data["app_stay"] % 60) + u"秒" else: data["app_stay_show"] = str(data["app_stay"]) + u"秒" if k in app_stay_first.keys(): data["app_stay_first"] = int(app_stay_first[k]) if data["app_stay_first"] > 60: data["app_stay_first_show"] = str( data["app_stay_first"] / 60) + u"分" + str(data["app_stay_first"] % 60) + u"秒" else: data["app_stay_first_show"] = str( data["app_stay_first"]) + u"秒" url = URL_ELASTICSEARCH_ACTIVE_USER_INFO + "/" + \ time_tool.get_someday_str(-nday) + "_" + k r = requests.post(url, headers=JSON_HEADER, data=json.dumps(data), timeout=(30, 60)) if 200 != r.status_code and 201 != r.status_code: logger.error("request active_user_info index failed, status_code:%d, reason:%s", r.status_code, r.reason)
def process(nday=1): user_per_channel = get_new_user(nday=nday) device_per_channel = get_new_device(nday=nday) app_stay, app_stay_first = get_app_stay(get_query_app_stay(), get_query_user(), nday) mysql_tool.connectdb() task_per_channel, task_user_per_channel = get_task(nday=nday) reading_per_channel, reading_user_per_channel = get_reading(nday=nday) video_per_channel, video_user_per_channel = get_video(nday=nday) child_per_channel = get_child(nday=nday) mysql_tool.closedb() for k, v in user_per_channel.items(): data = {} data["channel"] = k data["@timestamp"] = time_tool.get_someday_es_format(-nday) data["num_user"] = v if k in device_per_channel.keys(): data["num_device"] = device_per_channel[k] if k in task_per_channel.keys(): data["num_task"] = task_per_channel[k] if k in task_user_per_channel.keys(): data["num_task_user"] = task_user_per_channel[k] if data["num_task_user"] > 0: data["num_task_average"] = round( data["num_task"] / float(data["num_task_user"]), 2) if k in reading_per_channel.keys(): data["num_read"] = reading_per_channel[k] if k in reading_user_per_channel.keys(): data["num_read_user"] = reading_user_per_channel[k] if data["num_read_user"] > 0: data["num_read_average"] = round( data["num_read"] / float(data["num_read_user"]), 2) if k in video_per_channel.keys(): data["num_video"] = video_per_channel[k] if k in video_user_per_channel.keys(): data["num_video_user"] = video_user_per_channel[k] if data["num_video_user"] > 0: data["num_video_average"] = round( data["num_video"] / float(data["num_video_user"]), 2) if k in child_per_channel.keys(): data["num_child"] = child_per_channel[k] if k in app_stay.keys(): data["app_stay"] = int(app_stay[k]) if data["app_stay"] > 60: data["app_stay_show"] = str( data["app_stay"] / 60) + u"分" + str( data["app_stay"] % 60) + u"秒" else: data["app_stay_show"] = str(data["app_stay"]) + u"秒" if k in app_stay_first.keys(): data["app_stay_first"] = int(app_stay_first[k]) if data["app_stay_first"] > 60: data["app_stay_first_show"] = str( data["app_stay_first"] / 60) + u"分" + str( data["app_stay_first"] % 60) + u"秒" else: data["app_stay_first_show"] = str( data["app_stay_first"]) + u"秒" url = URL_ELASTICSEARCH_NEW_USER_INFO + "/" + \ time_tool.get_someday_str(-nday) + "_" + k r = requests.post(url, headers=JSON_HEADER, data=json.dumps(data), timeout=(60, 120)) if 200 != r.status_code and 201 != r.status_code: logger.error( "request new_user_info index failed, status_code:%d, reason:%s", r.status_code, r.reason)