Example #1
0
def process(nday=1):
    mysql_tool.connectdb()
    mysql_tool.querydb("SET NAMES utf8mb4")
    data = get_task_info(nday)
    mysql_tool.closedb()

    dau, _ = active.get_user_device_count(active.get_query(-nday))

    for k, v in data.items():
        v['@timestamp'] = time_tool.get_someday_es_format(-nday)
        if k != 0 and 'num' in v.keys() and 'num_for_parent' in v.keys():
            v['num_for_no_child'] = v['num'] - v['num_for_parent']
            v['n_for_no_child'] = v['n'] - v['n_for_parent']
            if dau > 0:
                v['num_dau'] = float(v['num']) / dau
        _id = time_tool.get_someday_str(-nday)
        url = URL_ELASTICSEARCH_TASK_INFO + "/" + _id + "_" + str(k)
        r = requests.post(url,
                          headers=JSON_HEADER,
                          data=json.dumps(v),
                          timeout=(10, 120))
        if 200 != r.status_code and 201 != r.status_code:
            logger.error(
                "request task_info index failed, status_code:%d, reason:%s",
                r.status_code, r.reason)
Example #2
0
def process_old(nday=1):
    mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news")
    mysql_tool.querydb("SET NAMES utf8mb4")

    data_news = edit_info.get_edit_news_info(nday)
    data_news = edit_info.get_hot_news_info(nday, data_news)
    data_news = edit_info.get_hot_list_info(nday, data_news)
    data_news = edit_info.get_news_effective_reading(nday, data_news)
    data_video = edit_info.get_edit_video_info(nday)
    data_video = edit_info.get_video_effective_reading(nday, data_video)

    mysql_tool.closedb()

    for data in [data_news, data_video]:
        for k , v in data.items():
            tid = time_tool.get_someday_str(-nday)
            url = edit_info.URL_ELASTICSEARCH_EDIT_INFO + \
                "/" + tid + "_" + v["channel"] + "_" + str(k)
            r = requests.get(url, headers=edit_info.JSON_HEADER, timeout=(30, 120))
            if 200 != r.status_code:
                print r.status_code, r.reason, r.json()
                v['@timestamp'] = time_tool.get_someday_es_format(-nday)
                r = requests.post(url, headers=edit_info.JSON_HEADER,
                                  data=json.dumps(v), timeout=(30, 120))
                if 200 != r.status_code and 201 != r.status_code:
                    print r.status_code, r.reason, r.json()
            else:
                r_json = r.json()
                if "_source" in r_json.keys():
                    td = r_json["_source"]
                    if 'effective_reading' in td.keys():
                        # td['effective_reading'] = v['effective_reading']
                        del td['effective_reading']
                print td
Example #3
0
def process(nday=1):
    mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news")
    data = {}
    news_id_arr = get_news(nday=nday)
    news_category = get_news_category(nday=nday, news_id_arr=news_id_arr)
    for func, k in [(get_puv4news_show_all, "show_all"),
                    (get_puv4news_page_click, "page_click")]:
        for category_id, news_arr in news_category.items():
            if category_id not in data.keys():
                data[category_id] = {}
            pv, uv = func(nday=nday, news_id_arr=news_arr)
            data[category_id][k + "_pv"] = pv
            data[category_id][k + "_uv"] = uv
            if uv > 0 and k == "show_all":
                data[category_id][k + "_pv_uv"] = round(float(pv) / uv, 2)
        pv, uv = func(nday=nday)
        if define.TOTAL_ID not in data.keys():
            data[define.TOTAL_ID] = {}
        data[define.TOTAL_ID][k + "_pv"] = pv
        data[define.TOTAL_ID][k + "_uv"] = uv
        if uv > 0 and k == "show_all":
            data[define.TOTAL_ID][k + "_pv_uv"] = round(float(pv) / uv, 2)
    mysql_tool.closedb()

    return data
Example #4
0
def process(nday=1):
    mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news")
    list_pv_total = get_hot_list_pv_info(nday)
    mysql_tool.closedb()

    for k, v in list_pv_total.items():
        # 总榜和蹿红榜的所有资讯会在凌晨一点更新,这样在凌晨计算的这两个榜的pv_total就
        # 失效了, 需要重新计算一次
        _id = time_tool.get_someday_str(-nday)
        url = config.URL_EDIT_INFO + "/" + _id + "_news_" + str(k)
        r = requests.get(url, headers=config.JSON_HEADER, timeout=(30, 120))
        if 200 != r.status_code:
            logger.error(
                "request edit_info index failed, status_code:%d, reason:%s, k:%s",
                r.status_code, r.reason, k)
        else:
            r_json = r.json()
            if "_source" in r_json.keys():
                yd = r_json["_source"]
                yd['pv_total'] = v
                yd['@timestamp'] = time_tool.get_someday_es_format(-nday)
                url = config.URL_EDIT_INFO + "/" + _id + "_news_" + str(k)
                r = requests.post(url,
                                  headers=config.JSON_HEADER,
                                  data=json.dumps(yd),
                                  timeout=(30, 120))
                if 200 != r.status_code and 201 != r.status_code:
                    logger.error(
                        "request edit index failed, status_code:%d, reason:%s, %s, %s",
                        r.status_code, json.dumps(r.json()), url,
                        json.dumps(v))
Example #5
0
def process(nday=1):
    data_pv_uv = edit_pv_uv.process(nday)

    mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news")
    mysql_tool.querydb("SET NAMES utf8mb4")
    data_news = get_edit_news_info(nday)
    data_news = get_hot_list_info(nday, data_news)
    data_news = get_hot_news_info(nday, data_news)
    data_news = get_news_effective_reading(nday, data_news)
    data_video = get_edit_video_info(nday)
    data_video = get_video_effective_reading(nday, data_video)
    mysql_tool.closedb()

    for k, v in data_pv_uv.items():
        if k in data_news.keys():
            for k_, v_ in v.items():
                data_news[k][k_] = v_

    for data in [data_news, data_video]:
        for k, v in data.items():
            # 获取前一天
            yid = time_tool.get_someday_str(-nday-1)
            url = URL_ELASTICSEARCH_EDIT_INFO + \
                "/" + yid + "_" + v["channel"] + "_" + str(k)
            r = requests.get(url, headers=JSON_HEADER, timeout=(30, 120))
            if 200 != r.status_code:
                logger.error("request edit_info index failed, status_code:%d, reason:%s, k:%s",
                             r.status_code, r.reason, k)
            else:
                r_json = r.json()
                if "_source" in r_json.keys():
                    yd = r_json["_source"]
                    if "pv_total" in yd.keys():
                        v["pv"] = v["pv_total"] - yd["pv_total"]
                        set_pv_(v, "pv", "dau_count", "pv_dau")
                        set_pv_(v, "pv", "new_published_count", "pv_published")
                        set_pv_(v, "effective_reading", "pv", "reading_pv")
                        set_pv_(v, "comments_count", "pv", "comments_pv")
                        set_pv_(v, "zan_count", "pv", "zan_pv")
                        set_pv_(v, "like_count", "pv", "like_pv")
                        set_pv_(v, "share_count", "pv", "share_pv")
            v['@timestamp'] = time_tool.get_someday_es_format(-nday)
            set_pv_(v, "show_all_pv_uv", "new_published_count", "show_all_pv_uv_new_published")
            _id = time_tool.get_someday_str(-nday)
            url = URL_ELASTICSEARCH_EDIT_INFO + \
                "/" + _id + "_" + v['channel'] + "_" + str(k)
            r = requests.post(url, headers=JSON_HEADER,
                              data=json.dumps(v), timeout=(30, 120))
            if 200 != r.status_code and 201 != r.status_code:
                logger.error("request edit index failed, status_code:%d, reason:%s, %s, %s",
                             r.status_code, json.dumps(r.json()), url, json.dumps(v))
Example #6
0
def process(nday=1):
    mysql_tool.connectdb()
    data = get_cash_user(nday)
    mysql_tool.closedb()
    data['@timestamp'] = time_tool.get_someday_es_format(-nday)
    _id = time_tool.get_someday_str(-nday)
    url = URL_ELASTICSEARCH_CASH_INFO + "/" + _id
    r = requests.post(url,
                      headers=JSON_HEADER,
                      data=json.dumps(data),
                      timeout=(10, 20))
    if 200 != r.status_code and 201 != r.status_code:
        logger.error(
            "request active_user_info index failed, status_code:%d, reason:%s",
            r.status_code, r.reason)
Example #7
0
def process(nday=1):
    mysql_tool.connectdb()
    data = get_parent_info(nday)
    mysql_tool.closedb()

    for k, v in data.items():
        # 获取前一天各vip等级师傅数量, 各vip新增师傅数量 = 今日各vip师傅数量 - 昨天各vip师傅数量
        yid = time_tool.get_someday_str(-nday - 1)
        url = URL_ELASTICSEARCH_PARENT_VIP_INFO + "/" + yid + "_" + k
        r = requests.get(url, headers=JSON_HEADER, timeout=(30, 120))
        if 200 != r.status_code:
            logger.error(
                "request parent_info index failed, status_code:%d, reason:%s, k:%s",
                r.status_code, r.reason, k)
        else:
            r_json = r.json()
            if "_source" in r_json.keys():
                yd = r_json["_source"]
                if "num" in yd.keys():
                    v["num_new"] = v["num"] - yd["num"]
        v['@timestamp'] = time_tool.get_someday_es_format(-nday)
        v["vip"] = k
        _id = time_tool.get_someday_str(-nday)
        url = URL_ELASTICSEARCH_PARENT_VIP_INFO + "/" + _id + "_" + k
        r = requests.post(url,
                          headers=JSON_HEADER,
                          data=json.dumps(v),
                          timeout=(30, 120))
        if 200 != r.status_code and 201 != r.status_code:
            logger.error(
                "request parent_info index failed, status_code:%d, reason:%s",
                r.status_code, r.reason)
    data_num_user = {}
    data_num_user['@timestamp'] = time_tool.get_someday_es_format(-nday)
    user_per_channel = nui.get_new_user(nday=nday)
    data_num_user["num_user_new"] = user_per_channel["all_channel"]
    data_num_user["vip"] = ""
    url = URL_ELASTICSEARCH_PARENT_VIP_INFO + "/" + time_tool.get_someday_str(
        -nday)
    r = requests.post(url,
                      headers=JSON_HEADER,
                      data=json.dumps(data_num_user),
                      timeout=(30, 120))
    if 200 != r.status_code and 201 != r.status_code:
        logger.error(
            "request parent_info index failed, status_code:%d, reason:%s",
            r.status_code, r.reason)
Example #8
0
def process(nday=1):
    new_user_arr = get_new_user_arr(nday)
    mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news")
    mysql_tool.querydb("SET NAMES utf8mb4")
    ner = get_news_effective_readings(new_user_arr, nday)
    data_news = get_edit_news_info(new_user_arr, "new", ner, nday)
    data_news = get_hot_news_info(new_user_arr, ner, data_news, nday)
    data_news = get_hot_list_info(new_user_arr, ner, data_news, nday)
    ver = get_video_effective_readings(new_user_arr, nday)
    data_video = get_edit_video_info(new_user_arr, "new", ver, nday)
    mysql_tool.closedb()
    for data in [data_news, data_video]:
        for k, v in data.items():
            v['@timestamp'] = time_tool.get_someday_es_format(-nday)
            _id = time_tool.get_someday_str(-nday)
            url = URL_ELASTICSEARCH_EDIT_USER_INFO + \
                "/" + _id + "_" + v['user_type'] + \
                "_" + v['channel'] + "_" + str(k)
            r = requests.post(url, headers=JSON_HEADER,
                              data=json.dumps(v), timeout=(30, 120))
            if 200 != r.status_code and 201 != r.status_code:
                logger.error("request edit index failed, status_code:%d, reason:%s, %s, %s",
                             r.status_code, json.dumps(r.json()), url, json.dumps(v))
            url_edit = edit_info.URL_ELASTICSEARCH_EDIT_INFO + \
                "/" + _id + "_" + v['channel'] + "_" + str(k)
            r = requests.get(url_edit, headers=JSON_HEADER, timeout=(30, 120))
            if 200 != r.status_code:
                logger.error("request edit_user_info failed, status_code:%d, reason:%s",
                             r.status_code, json.dumps(r.json()))
            else:
                r_json = r.json()
                if "_source" in r_json.keys():
                    info = r_json["_source"]
                    v_ = copy.deepcopy(v)
                    v_["user_type"] = "not_new"
                    set_a_key(v, info, v_, "effective_reading")
                    set_a_key(v, info, v_, "comments_count")
                    set_a_key(v, info, v_, "like_count")
                    set_a_key(v, info, v_, "zan_count")
                    url_ = URL_ELASTICSEARCH_EDIT_USER_INFO + \
                        "/" + _id + "_not_new_" + v['channel'] + "_" + str(k)
                    r = requests.post(url_, headers=JSON_HEADER,
                                      data=json.dumps(v_), timeout=(30, 120))
                    if 200 != r.status_code and 201 != r.status_code:
                        logger.error("request edit index failed, status_code:%d, reason:%s",
                                     r.status_code, json.dumps(r.json()))
Example #9
0
def process(nday=1):
    mysql_tool.connectdb()
    data = get_parent_info(nday)
    mysql_tool.closedb()

    for k, v in data.items():
        # 获取前一天师傅的总收入和师傅的进贡收入来计算当日的收入
        yid = time_tool.get_someday_str(-nday - 1)
        url = URL_ELASTICSEARCH_PARENT_INFO + "/" + yid + "_" + k
        r = requests.get(url, headers=JSON_HEADER, timeout=(10, 20))
        if 200 != r.status_code:
            logger.error(
                "request parent_info index failed, status_code:%d, reason:%s, k:%s",
                r.status_code, r.reason, k)
        else:
            r_json = r.json()
            if "_source" in r_json.keys():
                yd = r_json["_source"]
                # if "total_income" in v.keys():
                #     ydti = yd["total_income"] if "total_income" in yd.keys() else 0
                #     v["today_income"] = v["total_income"] - ydti
                if "total_rebate" in v.keys():
                    ydtr = yd["total_rebate"] if "total_rebate" in yd.keys(
                    ) else 0
                    v["today_rebate"] = v["total_rebate"] - ydtr
                    if "num_active_child" in v.keys(
                    ) and v["num_active_child"] > 0:
                        # 平均进贡值 = 当日进贡收入 / 活跃徒弟数  详见《后台数据维度(3).xlsx》
                        v["average_rebate_per_child"] = round(
                            v["today_rebate"] / float(v["num_active_child"]),
                            2)
        v['@timestamp'] = time_tool.get_someday_es_format(-nday)
        v["channel"] = k
        _id = time_tool.get_someday_str(-nday)
        url = URL_ELASTICSEARCH_PARENT_INFO + "/" + _id + "_" + k
        r = requests.post(url,
                          headers=JSON_HEADER,
                          data=json.dumps(v),
                          timeout=(10, 20))
        if 200 != r.status_code and 201 != r.status_code:
            logger.error(
                "request parent_info index failed, status_code:%d, reason:%s",
                r.status_code, r.reason)
Example #10
0
def process(nday=0):
    mysql_tool.connectdb(host="10.100.100.198",
                         user="******",
                         password="******",
                         database="taozuiredian-news")
    mysql_tool.querydb("SET NAMES utf8mb4")

    news = get_news(nday)
    for k, v in news.items():
        v['content'] = get_news_content(k)
        if v['content'] == "":
            continue
        url = config.URL_NEWS_SEARCH + "/" + str(k)
        r = requests.post(url, headers=config.JSON_HEADER,
                          data=json.dumps(v), timeout=(30, 120))
        if 200 != r.status_code and 201 != r.status_code:
            logger.error(
                "post news_research index error, status_code:%s, reason:%s", r.status_code, r.reason)

    mysql_tool.closedb()
Example #11
0
def process(nday=1):
    mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news")
    mysql_tool.querydb("SET NAMES utf8mb4")
    data_news = edit_info.get_edit_news_info(nday)
    data_news = edit_info.get_hot_news_info(nday, data_news)
    # data_news = edit_info.get_hot_list_info(nday, data_news)
    # data_news = edit_info.get_news_effective_reading(nday, data_news)
    # data_video = edit_info.get_edit_video_info(nday)
    # data_video = edit_info.get_video_effective_reading(nday, data_video)
    mysql_tool.closedb()

    for data in [data_news]:
        for k, v in data.items():
            _id = time_tool.get_someday_str(-nday)
            v['@timestamp'] = time_tool.get_someday_es_format(-nday)
            url = "http://localhost:8200/edit_info/doc" + \
                "/" + _id + "_" + v['channel'] + "_" + str(k)
            r = requests.post(url, headers=edit_info.JSON_HEADER,
                              data=json.dumps(v), timeout=(30, 120))
            if 200 != r.status_code and 201 != r.status_code:
                logger.error("request edit index failed, status_code:%d, reason:%s, %s, %s",
                             r.status_code, json.dumps(r.json()), url, json.dumps(v))
Example #12
0
def process(nday=1):
    mysql_tool.connectdb()

    device_arr = get_device_arr(nday)
    device_per_channel = get_device_channel(device_arr)
    user_arr = get_user_arr(nday)
    # user_per_channel = get_user_channel_(user_arr) Got user channel info is not same as get_user_channel.
    # get_user_channel which get info from db, should use db info firstly
    user_channel = get_user_channel(user_arr)
    user_channel_stay = copy.deepcopy(user_channel)
    user_channel_stay["all_channel"] = user_arr
    app_stay, app_stay_first = get_app_stay(
        get_query_app_stay(), user_channel_stay, nday)
    task_per_channel = get_task(user_channel, nday=nday)
    reading_per_channel = get_reading(user_channel, nday=nday)
    video_per_channel = get_video(user_channel, nday=nday)
    child_per_channel = get_child(user_channel, nday=nday)

    mysql_tool.closedb()

    for k, v in user_channel_stay.items():
        len_v = len(v)
        data = {}
        data["channel"] = k
        data["@timestamp"] = time_tool.get_someday_es_format(-nday)
        data["num_user"] = len_v
        if k in device_per_channel.keys():
            data["num_device"] = device_per_channel[k]
        if k in task_per_channel.keys() and task_per_channel[k]["num_user"] > 0:
            data["num_task_average"] = round(
                task_per_channel[k]["num_task"] / float(task_per_channel[k]["num_user"]), 2)
        if k in reading_per_channel.keys():
            data["num_read"] = reading_per_channel[k]["num_read"]
            data["num_read_user"] = reading_per_channel[k]["num_user"]
            if reading_per_channel[k]["num_user"] > 0:
                data["num_read_average"] = round(
                    data["num_read"] / float(data["num_read_user"]), 2)
        if k in video_per_channel.keys():
            data["num_video"] = video_per_channel[k]["num_video"]
            data["num_video_user"] = video_per_channel[k]["num_user"]
            if data["num_video_user"] > 0:
                data["num_video_average"] = round(
                    data["num_video"] / float(data["num_video_user"]), 2)
                data["num_video_average_f"] = data["num_video_average"]
        if k in child_per_channel.keys():
            data["num_child"] = child_per_channel[k]
        if k in app_stay.keys():
            data["app_stay"] = int(app_stay[k])
            if data["app_stay"] > 60:
                data["app_stay_show"] = str(
                    data["app_stay"] / 60) + u"分" + str(data["app_stay"] % 60) + u"秒"
            else:
                data["app_stay_show"] = str(data["app_stay"]) + u"秒"
        if k in app_stay_first.keys():
            data["app_stay_first"] = int(app_stay_first[k])
            if data["app_stay_first"] > 60:
                data["app_stay_first_show"] = str(
                    data["app_stay_first"] / 60) + u"分" + str(data["app_stay_first"] % 60) + u"秒"
            else:
                data["app_stay_first_show"] = str(
                    data["app_stay_first"]) + u"秒"
        url = URL_ELASTICSEARCH_ACTIVE_USER_INFO + "/" + \
            time_tool.get_someday_str(-nday) + "_" + k
        r = requests.post(url, headers=JSON_HEADER,
                          data=json.dumps(data), timeout=(30, 60))
        if 200 != r.status_code and 201 != r.status_code:
            logger.error("request active_user_info index failed, status_code:%d, reason:%s",
                         r.status_code, r.reason)
Example #13
0
def process(nday=1):
    user_per_channel = get_new_user(nday=nday)
    device_per_channel = get_new_device(nday=nday)
    app_stay, app_stay_first = get_app_stay(get_query_app_stay(),
                                            get_query_user(), nday)
    mysql_tool.connectdb()
    task_per_channel, task_user_per_channel = get_task(nday=nday)
    reading_per_channel, reading_user_per_channel = get_reading(nday=nday)
    video_per_channel, video_user_per_channel = get_video(nday=nday)
    child_per_channel = get_child(nday=nday)
    mysql_tool.closedb()

    for k, v in user_per_channel.items():
        data = {}
        data["channel"] = k
        data["@timestamp"] = time_tool.get_someday_es_format(-nday)
        data["num_user"] = v
        if k in device_per_channel.keys():
            data["num_device"] = device_per_channel[k]
        if k in task_per_channel.keys():
            data["num_task"] = task_per_channel[k]
        if k in task_user_per_channel.keys():
            data["num_task_user"] = task_user_per_channel[k]
            if data["num_task_user"] > 0:
                data["num_task_average"] = round(
                    data["num_task"] / float(data["num_task_user"]), 2)
        if k in reading_per_channel.keys():
            data["num_read"] = reading_per_channel[k]
        if k in reading_user_per_channel.keys():
            data["num_read_user"] = reading_user_per_channel[k]
            if data["num_read_user"] > 0:
                data["num_read_average"] = round(
                    data["num_read"] / float(data["num_read_user"]), 2)
        if k in video_per_channel.keys():
            data["num_video"] = video_per_channel[k]
        if k in video_user_per_channel.keys():
            data["num_video_user"] = video_user_per_channel[k]
            if data["num_video_user"] > 0:
                data["num_video_average"] = round(
                    data["num_video"] / float(data["num_video_user"]), 2)
        if k in child_per_channel.keys():
            data["num_child"] = child_per_channel[k]
        if k in app_stay.keys():
            data["app_stay"] = int(app_stay[k])
            if data["app_stay"] > 60:
                data["app_stay_show"] = str(
                    data["app_stay"] / 60) + u"分" + str(
                        data["app_stay"] % 60) + u"秒"
            else:
                data["app_stay_show"] = str(data["app_stay"]) + u"秒"
        if k in app_stay_first.keys():
            data["app_stay_first"] = int(app_stay_first[k])
            if data["app_stay_first"] > 60:
                data["app_stay_first_show"] = str(
                    data["app_stay_first"] / 60) + u"分" + str(
                        data["app_stay_first"] % 60) + u"秒"
            else:
                data["app_stay_first_show"] = str(
                    data["app_stay_first"]) + u"秒"
        url = URL_ELASTICSEARCH_NEW_USER_INFO + "/" + \
            time_tool.get_someday_str(-nday) + "_" + k
        r = requests.post(url,
                          headers=JSON_HEADER,
                          data=json.dumps(data),
                          timeout=(60, 120))
        if 200 != r.status_code and 201 != r.status_code:
            logger.error(
                "request new_user_info index failed, status_code:%d, reason:%s",
                r.status_code, r.reason)