Ejemplo n.º 1
0
def get_video(user_channel={}, nday=1):
    data = {}
    data["all_channel"] = {}
    data["all_channel"]["num_video"] = 0
    data["all_channel"]["num_user"] = 0
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday+1)
    sql = "select count(user_id), count(distinct user_id) from video_effective_readings where user_id in (%s) and created_at >= \"%s\" and created_at < \"%s\" and effective = 1"

    for k, user_arr in user_channel.items():
        data[k] = {}
        data[k]["num_video"] = 0
        data[k]["num_user"] = 0
        for i in range(0, len(user_arr), 1000):
            user_to_sql = user_arr[i:i+1000]
            sql_use = sql % (",".join(user_to_sql), day1, day2)
            rt = mysql_tool.querydb(
                sql_use, logger, "get video count for %d users" % len(user_to_sql))
            if len(rt) > 0 and rt[0][0] is not None:
                data[k]["num_video"] += int(rt[0][0])
                data[k]["num_user"] += int(rt[0][1])
        data["all_channel"]["num_video"] += data[k]["num_video"]
        data["all_channel"]["num_user"] += data[k]["num_user"]

    return data
Ejemplo n.º 2
0
def get_edit_video_info(nday=1):
    data = {}
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday+1)
    tommorow = time_tool.get_someday_str(1)

    # 获取视频名称
    video_categories = {}
    sql = "select id, name from video_categories"
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        video_categories[int(v[0])] = v[1]

    # 获取新增视频
    video_new_count = {}
    sql = "select category_id, count(*) from videos where published_at >= \"%s\" and published_at < \"%s\" group by category_id" % (day1, day2)
    handle_sql_hash_total(video_new_count, sql, logger, TOTAL_ID)

    # 获取视频pv
    sql = "select category_id, sum(real_pv) from videos group by category_id"
    rt = mysql_tool.querydb(sql, logger, sql)
    total = 0
    for v in rt:
        k = int(v[0])
        total += int(v[1])
        data[k] = {}
        data[k]['channel_name'] = u"视频"
        data[k]['channel'] = "video"
        data[k]['category_id'] = k
        data[k]['pv_total'] = int(v[1])
        set_hash(video_categories, data[k], k, 'category_name')
        set_hash(video_new_count, data[k], k, 'new_published_count')
    data[TOTAL_ID] = {}
    data[TOTAL_ID]['channel_name'] = u"视频"
    data[TOTAL_ID]['channel'] = "video"
    data[TOTAL_ID]['category_name'] = TOTAL_SHOW
    data[TOTAL_ID]['category_id'] = TOTAL_ID
    data[TOTAL_ID]['pv_total'] = total
    set_hash(video_new_count, data[TOTAL_ID], TOTAL_ID, 'new_published_count')

    # 获取热点视频
    data[HOT_VIDEO_CATEGORY_ID] = {}
    sql = "select count(*) from videos where hot_at >= \"%s\" and hot_at < \"%s\" and published_at < \"%s\"" % (day1, day2, day2)
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        data[HOT_VIDEO_CATEGORY_ID]['new_published_count'] = int(v[0])

    # 获取热点视频的pv
    sql = "select sum(real_pv) from videos where hot_at < \"%s\"" % tommorow
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        data[HOT_VIDEO_CATEGORY_ID]['channel_name'] = u"视频"
        data[HOT_VIDEO_CATEGORY_ID]['channel'] = "video"
        data[HOT_VIDEO_CATEGORY_ID]['category_id'] = HOT_VIDEO_CATEGORY_ID
        data[HOT_VIDEO_CATEGORY_ID]['pv_total'] = int(v[0])
        data[HOT_VIDEO_CATEGORY_ID]['category_name'] = HOT_SHOW

    return data
Ejemplo n.º 3
0
def get_news_effective_readings(user_arr=[], nday=1):
    # 获取有效阅读
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday+1)
    sql = "select news_id, count(*) from news_effective_readings where user_id in (%s) and created_at >= \"" + \
        day1 + "\" and created_at < \"" + day2 + \
        "\" and effective != 0 group by news_id"
    ner = get_effective_reading(
        sql, user_arr, "select from news_effective_readings")
    return ner
Ejemplo n.º 4
0
def get_hot_list_info(user_arr=[], ner={}, data={}, nday=1):
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday+1)
    for table, category_id, category_name in [
            ("news_hot_lists", edit_info.NEWS_HOT_LISTS,
             edit_info.NEWS_HOT_LISTS_SHOW),
            ("news_hot_seven_day_lists", edit_info.NEWS_HOT_SEVEN,
             edit_info.NEWS_HOT_SEVEN_SHOW),
            ("news_hot_total_lists", edit_info.NEWS_HOT_TOTAL, edit_info.NEWS_HOT_TOTAL_SHOW)]:
        data[category_id] = {}
        data[category_id]['user_type'] = "new"
        data[category_id]['channel_name'] = u"资讯"
        data[category_id]['channel'] = "news"
        data[category_id]['category_id'] = category_id
        data[category_id]['category_name'] = category_name

        # 获取有效阅读
        news_id_arr = [str(news_id) for news_id in ner.keys()]
        sql = "select news_id from " + table + " where news_id in (%s)"
        for i in range(0, len(news_id_arr), 1000):
            id_arr = news_id_arr[i:i+1000]
            sql_use = sql % (",".join(id_arr))
            rt = mysql_tool.querydb(
                sql_use, logger, "select news_id from %s for %d news" % (table, len(id_arr)))
            for v in rt:
                news_id = int(v[0])
                if 'effective_reading' in data[category_id].keys():
                    data[category_id]['effective_reading'] += ner[news_id]
                else:
                    data[category_id]['effective_reading'] = ner[news_id]

        # 获取评论数
        sql = "select count(*) from news_comments as nc join news as n on(nc.news_id = n.id) where n.id in (select news_id from " + \
            table + ") and nc.created_at >= \"" + day1 + \
            "\" and nc.created_at < \"" + day2 + "\" and nc.user_id in (%s)"
        set_common_hot_info(
            sql, user_arr, data[category_id], 'comments_count', "select news_comments")

        # 获取收藏数
        sql = "select count(*) from user_like_news as uln join news as n on(uln.news_id = n.id) where n.id in (select news_id from " + \
            table + ") and uln.created_at >= \"" + day1 + \
            "\" and uln.created_at < \"" + day2 + "\" and uln.user_id in (%s)"
        set_common_hot_info(
            sql, user_arr, data[category_id], 'like_count', "select user_like_newsews_comments")

        # 获取点赞数
        sql = "select count(*) from user_zan_news_comments as uznc join news_comments as nc on (uznc.comment_id = nc.id) join news as n on(nc.news_id = n.id) where n.id in (select news_id from " + \
            table + ") and uznc.created_at >= \"" + day1 + \
            "\" and uznc.created_at < \"" + \
            day2 + "\" and uznc.user_id in (%s)"
        set_common_hot_info(
            sql, user_arr, data[category_id], 'zan_count', "select user_zan_news_comments")

    return data
Ejemplo n.º 5
0
def get_edit_video_info(user_arr=[], user_type="new", ver={}, nday=1):
    data = {}
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday+1)

    # 获取视频名称
    video_categories = {}
    sql = "select id, name from video_categories"
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        video_categories[int(v[0])] = v[1]

    video_id_arr = [str(video_id) for video_id in ver.keys()]
    # 获取普通类目有效阅读
    sql = "select id, category_id from videos where id in (%s)"
    for i in range(0, len(video_id_arr), 1000):
        id_arr = video_id_arr[i:i+1000]
        sql_use = sql % (",".join(id_arr))
        rt = mysql_tool.querydb(
            sql_use, logger, "select id, category_id from videos for %d video" % len(id_arr))
        for v in rt:
            video_id = int(v[0])
            category_id = int(v[1])
            if category_id in data.keys():
                data[category_id]["effective_reading"] += ver[video_id]
            else:
                set_for_one_category(
                    data, category_id, user_type, u"视频", "video", video_categories)
                data[category_id]["effective_reading"] = ver[video_id]

    # 获取热点视频有效阅读
    sql = "select id from videos where id in (%s) and hot_at < \"" + \
        day2 + "\""
    for i in range(0, len(video_id_arr), 1000):
        id_arr = video_id_arr[i:i+1000]
        sql_use = sql % (",".join(id_arr))
        rt = mysql_tool.querydb(
            sql_use, logger, "select id from videos for %d video" % len(id_arr))
        for v in rt:
            video_id = int(v[0])
            if edit_info.HOT_VIDEO_CATEGORY_ID in data.keys():
                data[edit_info.HOT_VIDEO_CATEGORY_ID]['effective_reading'] += ver[video_id]
            else:
                data[edit_info.HOT_VIDEO_CATEGORY_ID] = {}
                data[edit_info.HOT_VIDEO_CATEGORY_ID]['effective_reading'] = ver[video_id]
                data[edit_info.HOT_VIDEO_CATEGORY_ID]['user_type'] = "new"
                data[edit_info.HOT_VIDEO_CATEGORY_ID]['channel_name'] = u"视频"
                data[edit_info.HOT_VIDEO_CATEGORY_ID]['channel'] = "video"
                data[edit_info.HOT_VIDEO_CATEGORY_ID]['category_id'] = edit_info.HOT_VIDEO_CATEGORY_ID
                data[edit_info.HOT_VIDEO_CATEGORY_ID]['category_name'] = edit_info.HOT_SHOW

    return data
Ejemplo n.º 6
0
def process(nday=1):
    data_pv_uv = edit_pv_uv.process(nday)

    mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news")
    mysql_tool.querydb("SET NAMES utf8mb4")
    data_news = get_edit_news_info(nday)
    data_news = get_hot_list_info(nday, data_news)
    data_news = get_hot_news_info(nday, data_news)
    data_news = get_news_effective_reading(nday, data_news)
    data_video = get_edit_video_info(nday)
    data_video = get_video_effective_reading(nday, data_video)
    mysql_tool.closedb()

    for k, v in data_pv_uv.items():
        if k in data_news.keys():
            for k_, v_ in v.items():
                data_news[k][k_] = v_

    for data in [data_news, data_video]:
        for k, v in data.items():
            # 获取前一天
            yid = time_tool.get_someday_str(-nday-1)
            url = URL_ELASTICSEARCH_EDIT_INFO + \
                "/" + yid + "_" + v["channel"] + "_" + str(k)
            r = requests.get(url, headers=JSON_HEADER, timeout=(30, 120))
            if 200 != r.status_code:
                logger.error("request edit_info index failed, status_code:%d, reason:%s, k:%s",
                             r.status_code, r.reason, k)
            else:
                r_json = r.json()
                if "_source" in r_json.keys():
                    yd = r_json["_source"]
                    if "pv_total" in yd.keys():
                        v["pv"] = v["pv_total"] - yd["pv_total"]
                        set_pv_(v, "pv", "dau_count", "pv_dau")
                        set_pv_(v, "pv", "new_published_count", "pv_published")
                        set_pv_(v, "effective_reading", "pv", "reading_pv")
                        set_pv_(v, "comments_count", "pv", "comments_pv")
                        set_pv_(v, "zan_count", "pv", "zan_pv")
                        set_pv_(v, "like_count", "pv", "like_pv")
                        set_pv_(v, "share_count", "pv", "share_pv")
            v['@timestamp'] = time_tool.get_someday_es_format(-nday)
            set_pv_(v, "show_all_pv_uv", "new_published_count", "show_all_pv_uv_new_published")
            _id = time_tool.get_someday_str(-nday)
            url = URL_ELASTICSEARCH_EDIT_INFO + \
                "/" + _id + "_" + v['channel'] + "_" + str(k)
            r = requests.post(url, headers=JSON_HEADER,
                              data=json.dumps(v), timeout=(30, 120))
            if 200 != r.status_code and 201 != r.status_code:
                logger.error("request edit index failed, status_code:%d, reason:%s, %s, %s",
                             r.status_code, json.dumps(r.json()), url, json.dumps(v))
Ejemplo n.º 7
0
def get_news(nday=1):
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday+1)
    news = {}
    sql = "select id, published_at, title from news where published_at >= \"%s\" and published_at < \"%s\"" % (
        day1, day2)
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        news_id = int(v[0])
        news[news_id] = {}
        news[news_id]['@timestamp'] = str(v[1]).replace(" ", "T") + "+08:00"
        news[news_id]['news_id'] = news_id
        news[news_id]['title'] = v[2]
    return news
Ejemplo n.º 8
0
def process(nday=1):
    mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news")
    list_pv_total = get_hot_list_pv_info(nday)
    mysql_tool.closedb()

    for k, v in list_pv_total.items():
        # 总榜和蹿红榜的所有资讯会在凌晨一点更新,这样在凌晨计算的这两个榜的pv_total就
        # 失效了, 需要重新计算一次
        _id = time_tool.get_someday_str(-nday)
        url = config.URL_EDIT_INFO + "/" + _id + "_news_" + str(k)
        r = requests.get(url, headers=config.JSON_HEADER, timeout=(30, 120))
        if 200 != r.status_code:
            logger.error(
                "request edit_info index failed, status_code:%d, reason:%s, k:%s",
                r.status_code, r.reason, k)
        else:
            r_json = r.json()
            if "_source" in r_json.keys():
                yd = r_json["_source"]
                yd['pv_total'] = v
                yd['@timestamp'] = time_tool.get_someday_es_format(-nday)
                url = config.URL_EDIT_INFO + "/" + _id + "_news_" + str(k)
                r = requests.post(url,
                                  headers=config.JSON_HEADER,
                                  data=json.dumps(yd),
                                  timeout=(30, 120))
                if 200 != r.status_code and 201 != r.status_code:
                    logger.error(
                        "request edit index failed, status_code:%d, reason:%s, %s, %s",
                        r.status_code, json.dumps(r.json()), url,
                        json.dumps(v))
Ejemplo n.º 9
0
def process_old(nday=1):
    mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news")
    mysql_tool.querydb("SET NAMES utf8mb4")

    data_news = edit_info.get_edit_news_info(nday)
    data_news = edit_info.get_hot_news_info(nday, data_news)
    data_news = edit_info.get_hot_list_info(nday, data_news)
    data_news = edit_info.get_news_effective_reading(nday, data_news)
    data_video = edit_info.get_edit_video_info(nday)
    data_video = edit_info.get_video_effective_reading(nday, data_video)

    mysql_tool.closedb()

    for data in [data_news, data_video]:
        for k , v in data.items():
            tid = time_tool.get_someday_str(-nday)
            url = edit_info.URL_ELASTICSEARCH_EDIT_INFO + \
                "/" + tid + "_" + v["channel"] + "_" + str(k)
            r = requests.get(url, headers=edit_info.JSON_HEADER, timeout=(30, 120))
            if 200 != r.status_code:
                print r.status_code, r.reason, r.json()
                v['@timestamp'] = time_tool.get_someday_es_format(-nday)
                r = requests.post(url, headers=edit_info.JSON_HEADER,
                                  data=json.dumps(v), timeout=(30, 120))
                if 200 != r.status_code and 201 != r.status_code:
                    print r.status_code, r.reason, r.json()
            else:
                r_json = r.json()
                if "_source" in r_json.keys():
                    td = r_json["_source"]
                    if 'effective_reading' in td.keys():
                        # td['effective_reading'] = v['effective_reading']
                        del td['effective_reading']
                print td
Ejemplo n.º 10
0
def get_news_category(nday=1, news_id_arr=[]):
    day = time_tool.get_someday_str(-nday + 1)
    list_news = {}
    sql = "select id, category_id from news where id in (%s)"
    for i in range(0, len(news_id_arr), 1000):
        id_arr = [str(nid) for nid in news_id_arr[i:i + 1000]]
        sql_use = sql % (",".join(id_arr))
        rt = mysql_tool.querydb(
            sql_use, logger,
            "select id, category_id from news for %d news" % len(id_arr))
        for v in rt:
            news_id = int(v[0])
            category_id = int(v[1])
            if category_id in list_news.keys():
                list_news[category_id].append(news_id)
            else:
                list_news[category_id] = [news_id]
    list_news[define.HOT_NEWS_CATEGORY_ID] = []
    sql = "select id from news where hot_at < \"" + day + "\" and id in (%s)"
    for i in range(0, len(news_id_arr), 1000):
        id_arr = [str(nid) for nid in news_id_arr[i:i + 1000]]
        sql_use = sql % (",".join(id_arr))
        rt = mysql_tool.querydb(
            sql_use, logger,
            "select hot id from news for %d news" % len(id_arr))
        for v in rt:
            list_news[define.HOT_NEWS_CATEGORY_ID].append(int(v[0]))

    list_news = get_list_news_category(list_news)
    return list_news
Ejemplo n.º 11
0
def process(nday=1):
    mysql_tool.connectdb()
    mysql_tool.querydb("SET NAMES utf8mb4")
    data = get_task_info(nday)
    mysql_tool.closedb()

    dau, _ = active.get_user_device_count(active.get_query(-nday))

    for k, v in data.items():
        v['@timestamp'] = time_tool.get_someday_es_format(-nday)
        if k != 0 and 'num' in v.keys() and 'num_for_parent' in v.keys():
            v['num_for_no_child'] = v['num'] - v['num_for_parent']
            v['n_for_no_child'] = v['n'] - v['n_for_parent']
            if dau > 0:
                v['num_dau'] = float(v['num']) / dau
        _id = time_tool.get_someday_str(-nday)
        url = URL_ELASTICSEARCH_TASK_INFO + "/" + _id + "_" + str(k)
        r = requests.post(url,
                          headers=JSON_HEADER,
                          data=json.dumps(v),
                          timeout=(10, 120))
        if 200 != r.status_code and 201 != r.status_code:
            logger.error(
                "request task_info index failed, status_code:%d, reason:%s",
                r.status_code, r.reason)
Ejemplo n.º 12
0
def process(nday=1):
    mysql_tool.connectdb()
    data = get_parent_info(nday)
    mysql_tool.closedb()

    for k, v in data.items():
        # 获取前一天各vip等级师傅数量, 各vip新增师傅数量 = 今日各vip师傅数量 - 昨天各vip师傅数量
        yid = time_tool.get_someday_str(-nday - 1)
        url = URL_ELASTICSEARCH_PARENT_VIP_INFO + "/" + yid + "_" + k
        r = requests.get(url, headers=JSON_HEADER, timeout=(30, 120))
        if 200 != r.status_code:
            logger.error(
                "request parent_info index failed, status_code:%d, reason:%s, k:%s",
                r.status_code, r.reason, k)
        else:
            r_json = r.json()
            if "_source" in r_json.keys():
                yd = r_json["_source"]
                if "num" in yd.keys():
                    v["num_new"] = v["num"] - yd["num"]
        v['@timestamp'] = time_tool.get_someday_es_format(-nday)
        v["vip"] = k
        _id = time_tool.get_someday_str(-nday)
        url = URL_ELASTICSEARCH_PARENT_VIP_INFO + "/" + _id + "_" + k
        r = requests.post(url,
                          headers=JSON_HEADER,
                          data=json.dumps(v),
                          timeout=(30, 120))
        if 200 != r.status_code and 201 != r.status_code:
            logger.error(
                "request parent_info index failed, status_code:%d, reason:%s",
                r.status_code, r.reason)
    data_num_user = {}
    data_num_user['@timestamp'] = time_tool.get_someday_es_format(-nday)
    user_per_channel = nui.get_new_user(nday=nday)
    data_num_user["num_user_new"] = user_per_channel["all_channel"]
    data_num_user["vip"] = ""
    url = URL_ELASTICSEARCH_PARENT_VIP_INFO + "/" + time_tool.get_someday_str(
        -nday)
    r = requests.post(url,
                      headers=JSON_HEADER,
                      data=json.dumps(data_num_user),
                      timeout=(30, 120))
    if 200 != r.status_code and 201 != r.status_code:
        logger.error(
            "request parent_info index failed, status_code:%d, reason:%s",
            r.status_code, r.reason)
Ejemplo n.º 13
0
def get_edit_news_info(user_arr=[], user_type="new", ner={}, nday=1):
    data = {}
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday+1)

    # 获取资讯名称
    news_categories = {}
    sql = "select id, name from news_categories"
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        news_categories[int(v[0])] = v[1]

    news_id_arr = [str(news_id) for news_id in ner.keys()]
    # 获取普通类目有效阅读
    sql = "select id, category_id from news where id in (%s)"
    for i in range(0, len(news_id_arr), 1000):
        id_arr = news_id_arr[i:i+1000]
        sql_use = sql % (",".join(id_arr))
        rt = mysql_tool.querydb(
            sql_use, logger, "select id, category_id from news for %d news" % len(id_arr))
        for v in rt:
            news_id = int(v[0])
            category_id = int(v[1])
            if category_id in data.keys():
                data[category_id]["effective_reading"] += ner[news_id]
            else:
                set_for_one_category(
                    data, category_id, user_type, u"资讯", "news", news_categories)
                data[category_id]["effective_reading"] = ner[news_id]

    # 获取评论
    sql = "select n.category_id, count(*) from news_comments as nc join news as n on (nc.news_id = n.id) where nc.user_id in (%s) and nc.created_at >= \"" + \
        day1 + "\" and nc.created_at < \"" + day2 + "\" group by n.category_id"
    set_common_edit_info(sql, user_arr, data, "select from news_comments",
                         "comments_count", user_type, "news", u"资讯", news_categories)
    # 获取收藏数
    sql = "select n.category_id, count(*) from user_like_news as uln join news as n on(uln.news_id = n.id) where uln.user_id in (%s) and uln.created_at >= \"" + \
        day1 + "\" and uln.created_at < \"" + day2 + "\" group by n.category_id"
    set_common_edit_info(sql, user_arr, data, "select from user_like_news",
                         "like_count", user_type, "news", u"资讯", news_categories)
    # 获取点赞数
    sql = "select n.category_id, count(*) from user_zan_news_comments as uznc join news_comments as nc on (uznc.comment_id = nc.id) join news as n on(nc.news_id = n.id) where uznc.user_id in (%s) and uznc.created_at >= \"" + \
        day1 + "\" and uznc.created_at < \"" + day2 + "\" group by n.category_id"
    set_common_edit_info(sql, user_arr, data, "select from user_zan_news_comments",
                         "zan_count", user_type, "news", u"资讯", news_categories)

    return data
Ejemplo n.º 14
0
def get_hot_news_info(user_arr=[], ner={}, data={}, nday=1):
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday+1)
    data[edit_info.HOT_NEWS_CATEGORY_ID] = {}
    data[edit_info.HOT_NEWS_CATEGORY_ID]['user_type'] = "new"
    data[edit_info.HOT_NEWS_CATEGORY_ID]['channel_name'] = "资讯"
    data[edit_info.HOT_NEWS_CATEGORY_ID]['channel'] = "news"
    data[edit_info.HOT_NEWS_CATEGORY_ID]['category_id'] = edit_info.HOT_NEWS_CATEGORY_ID
    data[edit_info.HOT_NEWS_CATEGORY_ID]['category_name'] = edit_info.HOT_SHOW

    # 获取热点资讯有效阅读
    news_id_arr = [str(news_id) for news_id in ner.keys()]
    sql = "select id from news where id in (%s) and hot_at < \"" + day2 + "\""
    for i in range(0, len(news_id_arr), 1000):
        id_arr = news_id_arr[i:i+1000]
        sql_use = sql % (",".join(id_arr))
        rt = mysql_tool.querydb(
            sql_use, logger, "select id from news for %d news" % len(id_arr))
        for v in rt:
            news_id = int(v[0])
            if 'effective_reading' in data[edit_info.HOT_NEWS_CATEGORY_ID].keys():
                data[edit_info.HOT_NEWS_CATEGORY_ID]['effective_reading'] += ner[news_id]
            else:
                data[edit_info.HOT_NEWS_CATEGORY_ID]['effective_reading'] = ner[news_id]

    # 获取热点资讯评论数
    sql = "select count(*) from news_comments as nc join news as n on(nc.news_id = n.id) where n.id in (select id from news where hot_at < \"" + \
        day2 + "\") and nc.created_at >= \"" + day1 + \
        "\" and nc.created_at < \"" + day2 + "\" and nc.user_id in (%s)"
    set_common_hot_info(
        sql, user_arr, data[edit_info.HOT_NEWS_CATEGORY_ID], 'comments_count', "select from news_comments")

    # 获取热点资讯收藏数
    sql = "select count(*) from user_like_news as uln join news as n on(uln.news_id = n.id) where n.id in (select id from news where hot_at < \"" + \
        day2 + "\") and uln.created_at >= \"" + day1 + \
        "\" and uln.created_at < \"" + day2 + "\" and uln.user_id in (%s)"
    set_common_hot_info(
        sql, user_arr, data[edit_info.HOT_NEWS_CATEGORY_ID], 'like_count', "select from user_like_news")

    # 获取热点资讯点赞数
    sql = "select count(*) from user_zan_news_comments as uznc join news_comments as nc on (uznc.comment_id = nc.id) join news as n on(nc.news_id = n.id) where n.id in (select id from news where hot_at < \"" + \
        day2 + "\") and uznc.created_at >= \"" + day1 + \
        "\" and uznc.created_at < \"" + day2 + "\" and uznc.user_id in (%s)"
    set_common_hot_info(sql, user_arr, data[edit_info.HOT_NEWS_CATEGORY_ID],
                        'zan_count', "select from user_zan_news_comments")

    return data
Ejemplo n.º 15
0
def get_task_info(nday=1):
    data = {}
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday + 1)

    sql_task_name = "select id, name from user_tasks"
    rt = mysql_tool.querydb(sql_task_name, logger, sql_task_name)
    for v in rt:
        data[v[0]] = {}
        data[v[0]]['task_id'] = int(v[0])
        data[v[0]]['task_name'] = v[1]

    sql_task_num = "select task_id, sum(today_count), count(distinct user_id) from user_task_day_records where day = \"%s\" and today_count !=0 group by task_id" % day1
    rt = mysql_tool.querydb(sql_task_num, logger, sql_task_num)
    for v in rt:
        if v[0] in data.keys():
            data[v[0]]['num'] = int(v[1])
            data[v[0]]['n'] = int(v[2])

    sql_task_parent_num = "select task_id, sum(today_count), count(distinct user_id) from user_task_day_records where user_id in (select distinct parent_id from users where bind_parent_at < \"%s\") and day = \"%s\" and today_count !=0 group by task_id" % (
        day2, day1)
    rt = mysql_tool.querydb(sql_task_parent_num, logger, sql_task_parent_num)
    for v in rt:
        if v[0] in data.keys():
            data[v[0]]['num_for_parent'] = int(v[1])
            data[v[0]]['n_for_parent'] = int(v[2])

    sql = "select sum(today_count), count(distinct user_id) from user_task_day_records where day = \"%s\" and user_id in (select id from users where registered_at >= \"%s\" and registered_at < \"%s\") and task_id in(1,2,3,22,4,5,29,30,31,33)" % (
        day1, day1, day2)
    rta = mysql_tool.querydb(sql, logger, sql)
    data[0] = {}
    data[0]['task_id'] = 0
    data[0]['task_name'] = u"今日新增注册用户完成新手任务平均个数"
    if rta[0][1] > 0:
        data[0]['num'] = round(float(rta[0][0]) / float(rta[0][1]), 2)
    sql = "select sum(today_count), count(distinct user_id) from user_task_day_records where day = \"%s\" and user_id in (select id from users where id in (select distinct parent_id from users where bind_parent_at >= \"%s\" and bind_parent_at < \"%s\") and registered_at >= \"%s\" and registered_at < \"%s\") and task_id in(1,2,3,22,4,5,29,30,31,33)" % (
        day1, day1, day2, day1, day2)
    rtp = mysql_tool.querydb(sql, logger, sql)
    if rtp[0][1] > 0:
        data[0]['num_for_parent'] = round(
            float(rtp[0][0]) / float(rtp[0][1]), 2)
    if rta[0][1] - rtp[0][1] > 0:
        data[0]['num_for_no_child'] = round(
            float(rta[0][0] - rtp[0][0]) / float(rta[0][1] - rtp[0][1]), 2)

    return data
Ejemplo n.º 16
0
def get_video_effective_reading(nday=1, data_video={}):
    # 获取视频有效阅读
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday+1)
    ver = {}
    sql = "select video_id, count(*) from video_effective_readings where created_at >= \"%s\" and created_at < \"%s\" and effective != 0 group by video_id" % (day1, day2)
    sob = mysql_tool.sql_obj()
    rt = sob.querydb(sql, logger, sql)
    total = 0
    for v in rt:
        ver[int(v[0])] = int(v[1])
        total += int(v[1])
    sob.closedb()
    data_video[TOTAL_ID]['effective_reading'] = total
    video_id_arr = [str(video_id) for video_id in ver.keys()]
    # 获取普通类目有效阅读
    sql = "select id, category_id from videos where id in (%s)"
    for i in range(0, len(video_id_arr), 1000):
        id_arr = video_id_arr[i:i+1000]
        sql_use = sql % (",".join(id_arr))
        rt = mysql_tool.querydb(
            sql_use, logger, "select id, category_id from videos for %d videos" % len(id_arr))
        for v in rt:
            video_id = int(v[0])
            category_id = int(v[1])
            if category_id in data_video.keys():
                if 'effective_reading' in data_video[category_id].keys():
                    data_video[category_id]['effective_reading'] += ver[video_id]
                else:
                    data_video[category_id]['effective_reading'] = ver[video_id]
    # 获取热点有效阅读
    if HOT_VIDEO_CATEGORY_ID in data_video.keys():
        sql = "select id from videos where id in (%s) and hot_at < \"" + \
            day2 + "\""
        for i in range(0, len(video_id_arr), 1000):
            id_arr = video_id_arr[i:i+1000]
            sql_use = sql % (",".join(id_arr))
            rt = mysql_tool.querydb(
                sql_use, logger, "select id from hot videos for %d videos" % len(id_arr))
            for v in rt:
                video_id = int(v[0])
                if 'effective_reading' in data_video[HOT_VIDEO_CATEGORY_ID].keys():
                    data_video[HOT_VIDEO_CATEGORY_ID]['effective_reading'] += ver[video_id]
                else:
                    data_video[HOT_VIDEO_CATEGORY_ID]['effective_reading'] = ver[video_id]
    return data_video
Ejemplo n.º 17
0
def get_parent_info(nday=1):
    data = {}
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday + 1)

    # 各vip等级师傅数量
    sql_vip_parent_count = "select level, count(id) from users where id in (select distinct parent_id from users where bind_parent_at < \"%s\") group by level" % day2
    rt = mysql_tool.querydb(sql_vip_parent_count, logger, sql_vip_parent_count)
    for v in rt:
        k = str(v[0])
        if k in data.keys():
            data[k]['num'] = int(v[1])
        else:
            data[k] = {}
            data[k]['num'] = int(v[1])

    return data
Ejemplo n.º 18
0
def get_child(nday=1):
    data = {}
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday + 1)
    sql = "select us.channel, count(us.channel) from users as u left join user_statistics as us on (u.id = us.user_id) where u.parent_id in (select id from users where registered_at >= \"%s\" and registered_at < \"%s\") and u.bind_parent_at >= \"%s\" and u.bind_parent_at < \"%s\" group by us.channel" % (
        day1, day2, day1, day2)
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        data[v[0]] = int(v[1])

    sql_all = "select count(*) from users where parent_id in (select id from users where registered_at >= \"%s\" and registered_at < \"%s\") and bind_parent_at >= \"%s\" and bind_parent_at < \"%s\"" % (
        day1, day2, day1, day2)
    rt_all = mysql_tool.querydb(sql_all, logger, sql_all)
    if len(rt_all) > 0:
        data["all_channel"] = int(rt_all[0][0])

    return data
Ejemplo n.º 19
0
def get_hot_list_info(nday=1, data={}):
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday+1)

    for table, category_id, category_name in [("news_hot_lists", NEWS_HOT_LISTS, NEWS_HOT_LISTS_SHOW),
                                              ("news_hot_seven_day_lists",
                                               NEWS_HOT_SEVEN, NEWS_HOT_SEVEN_SHOW),
                                              ("news_hot_total_lists", NEWS_HOT_TOTAL, NEWS_HOT_TOTAL_SHOW)]:
        data[category_id] = {}
        # 获取新增
        sql = "select count(*) from %s where created_at >= \"%s\" and created_at < \"%s\"" % (
            table, day1, day2)
        rt = mysql_tool.querydb(sql, logger, sql)
        for v in rt:
            data[category_id]['new_published_count'] = int(v[0])

        # 获取评论数
        sql = "select count(*) from news_comments as nc join news as n on(nc.news_id = n.id) where n.id in (select news_id from %s) and nc.created_at >= \"%s\" and nc.created_at < \"%s\"" % (table, day1, day2)
        rt = mysql_tool.querydb(sql, logger, sql)
        for v in rt:
            data[category_id]['comments_count'] = int(v[0])

        # 获取收藏数
        sql = "select count(*) from user_like_news as uln join news as n on(uln.news_id = n.id) where n.id in (select news_id from %s) and uln.created_at >= \"%s\" and uln.created_at < \"%s\"" % (table, day1, day2)
        rt = mysql_tool.querydb(sql, logger, sql)
        for v in rt:
            data[category_id]['like_count'] = int(v[0])

        # 获取点赞数
        sql = "select count(*) from user_zan_news_comments as uznc join news_comments as nc on (uznc.comment_id = nc.id) join news as n on(nc.news_id = n.id) where n.id in (select news_id from %s) and uznc.created_at >= \"%s\" and uznc.created_at < \"%s\"" % (table, day1, day2)
        rt = mysql_tool.querydb(sql, logger, sql)
        for v in rt:
            data[category_id]['zan_count'] = int(v[0])

        # 获取pv.
        sql = "select sum(real_pv) from news where id in (select news_id from %s)" % table
        rt = mysql_tool.querydb(sql, logger, sql)
        for v in rt:
            data[category_id]['channel_name'] = u"资讯"
            data[category_id]['channel'] = "news"
            data[category_id]['category_id'] = category_id
            data[category_id]['pv_total'] = int(v[0])
            data[category_id]['category_name'] = category_name

    return data
Ejemplo n.º 20
0
def get_child(user_channel={}, nday=1):
    data = {}
    data["all_channel"] = 0
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday+1)
    sql = "select count(us.channel) from users as u left join user_statistics as us on (u.id = us.user_id) where u.parent_id in (%s) and u.bind_parent_at >= \"%s\" and u.bind_parent_at < \"%s\""

    for k, user_arr in user_channel.items():
        data[k] = 0
        for i in range(0, len(user_arr), 1000):
            user_to_sql = user_arr[i:i+1000]
            sql_use = sql % (",".join(user_to_sql), day1, day2)
            rt = mysql_tool.querydb(
                sql_use, logger, "get child count for %d users" % len(user_to_sql))
            if len(rt) > 0 and rt[0][0] is not None:
                data[k] += int(rt[0][0])
        data["all_channel"] += data[k]

    return data
Ejemplo n.º 21
0
def process(nday=1):
    mysql_tool.connectdb()
    data = get_parent_info(nday)
    mysql_tool.closedb()

    for k, v in data.items():
        # 获取前一天师傅的总收入和师傅的进贡收入来计算当日的收入
        yid = time_tool.get_someday_str(-nday - 1)
        url = URL_ELASTICSEARCH_PARENT_INFO + "/" + yid + "_" + k
        r = requests.get(url, headers=JSON_HEADER, timeout=(10, 20))
        if 200 != r.status_code:
            logger.error(
                "request parent_info index failed, status_code:%d, reason:%s, k:%s",
                r.status_code, r.reason, k)
        else:
            r_json = r.json()
            if "_source" in r_json.keys():
                yd = r_json["_source"]
                # if "total_income" in v.keys():
                #     ydti = yd["total_income"] if "total_income" in yd.keys() else 0
                #     v["today_income"] = v["total_income"] - ydti
                if "total_rebate" in v.keys():
                    ydtr = yd["total_rebate"] if "total_rebate" in yd.keys(
                    ) else 0
                    v["today_rebate"] = v["total_rebate"] - ydtr
                    if "num_active_child" in v.keys(
                    ) and v["num_active_child"] > 0:
                        # 平均进贡值 = 当日进贡收入 / 活跃徒弟数  详见《后台数据维度(3).xlsx》
                        v["average_rebate_per_child"] = round(
                            v["today_rebate"] / float(v["num_active_child"]),
                            2)
        v['@timestamp'] = time_tool.get_someday_es_format(-nday)
        v["channel"] = k
        _id = time_tool.get_someday_str(-nday)
        url = URL_ELASTICSEARCH_PARENT_INFO + "/" + _id + "_" + k
        r = requests.post(url,
                          headers=JSON_HEADER,
                          data=json.dumps(v),
                          timeout=(10, 20))
        if 200 != r.status_code and 201 != r.status_code:
            logger.error(
                "request parent_info index failed, status_code:%d, reason:%s",
                r.status_code, r.reason)
Ejemplo n.º 22
0
def get_task(nday=1):
    data = {}
    data_user_num = {}
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday + 1)
    sql = "select us.channel, sum(ut.today_count), count(distinct u.id) from users as u left join user_statistics as us on (u.id = us.user_id) right join user_task_day_records as ut on (u.id = ut.user_id and u.id = ut.from_user_id) where u.registered_at >= \"%s\" and u.registered_at < \"%s\" and ut.day = \"%s\" group by us.channel" % (
        day1, day2, day1)
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        data[v[0]] = int(v[1])
        data_user_num[v[0]] = int(v[2])

    sql_all = "select sum(ut.today_count), count(distinct u.id) from users as u right join user_task_day_records as ut on (u.id = ut.user_id and u.id = ut.from_user_id) where u.registered_at >= \"%s\" and u.registered_at < \"%s\" and ut.day = \"%s\"" % (
        day1, day2, day1)
    rt_all = mysql_tool.querydb(sql_all, logger, sql_all)
    if len(rt_all) > 0:
        data["all_channel"] = int(rt_all[0][0])
        data_user_num["all_channel"] = int(rt_all[0][1])
    logger.debug(data)

    return data, data_user_num
Ejemplo n.º 23
0
def process(nday=1):
    tid = time_tool.get_someday_str(-nday)
    tid_ = time_tool.get_someday_str(-nday - 1)
    failed_url = []
    for url in check_url:
        if url.find("remain") != -1:
            url = CHECK_DOMAIN + url % tid_
        else:
            url = CHECK_DOMAIN + url % tid
        r = requests.get(url, headers=JSON_HEADER, timeout=(30, 120))
        if 200 != r.status_code:
            failed_url.append(url)
        else:
            r_json = r.json()
            if False == r_json["found"]:
                failed_url.append(url)
    if len(failed_url) > 0:
        failed_url_str = u"下面数据更新失败<br>" + "<br>".join(failed_url)
        myEmail.send_email("*****@*****.**",
                           failed_url_str,
                           subject=u"淘热点数据统计监测")
Ejemplo n.º 24
0
def get_video(nday=1):
    data = {}
    data_user_num = {}
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday + 1)
    sql = "select us.channel, count(vr.user_id), count(distinct u.id) from users as u left join user_statistics as us on (u.id = us.user_id) right join video_effective_readings as vr on (u.id = vr.user_id) where u.registered_at >= \"%s\" and u.registered_at < \"%s\" and vr.created_at >= \"%s\" and vr.created_at < \"%s\" and vr.effective = 1 group by us.channel" % (
        day1, day2, day1, day2)
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        data[v[0]] = int(v[1])
        data_user_num[v[0]] = int(v[2])

    sql_all = "select count(vr.user_id), count(distinct u.id) from users as u right join video_effective_readings as vr on (u.id = vr.user_id) where u.registered_at >= \"%s\" and u.registered_at < \"%s\" and vr.created_at >= \"%s\" and vr.created_at < \"%s\" and vr.effective = 1" % (
        day1, day2, day1, day2)
    rt_all = mysql_tool.querydb(sql_all, logger, sql_all)
    if len(rt_all) > 0:
        data["all_channel"] = int(rt_all[0][0])
        data_user_num["all_channel"] = int(rt_all[0][1])
    logger.debug(data)

    return data, data_user_num
Ejemplo n.º 25
0
def process(nday=1):
    mysql_tool.connectdb()
    data = get_cash_user(nday)
    mysql_tool.closedb()
    data['@timestamp'] = time_tool.get_someday_es_format(-nday)
    _id = time_tool.get_someday_str(-nday)
    url = URL_ELASTICSEARCH_CASH_INFO + "/" + _id
    r = requests.post(url,
                      headers=JSON_HEADER,
                      data=json.dumps(data),
                      timeout=(10, 20))
    if 200 != r.status_code and 201 != r.status_code:
        logger.error(
            "request active_user_info index failed, status_code:%d, reason:%s",
            r.status_code, r.reason)
Ejemplo n.º 26
0
def process(nday=1):
    new_user_arr = get_new_user_arr(nday)
    mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news")
    mysql_tool.querydb("SET NAMES utf8mb4")
    ner = get_news_effective_readings(new_user_arr, nday)
    data_news = get_edit_news_info(new_user_arr, "new", ner, nday)
    data_news = get_hot_news_info(new_user_arr, ner, data_news, nday)
    data_news = get_hot_list_info(new_user_arr, ner, data_news, nday)
    ver = get_video_effective_readings(new_user_arr, nday)
    data_video = get_edit_video_info(new_user_arr, "new", ver, nday)
    mysql_tool.closedb()
    for data in [data_news, data_video]:
        for k, v in data.items():
            v['@timestamp'] = time_tool.get_someday_es_format(-nday)
            _id = time_tool.get_someday_str(-nday)
            url = URL_ELASTICSEARCH_EDIT_USER_INFO + \
                "/" + _id + "_" + v['user_type'] + \
                "_" + v['channel'] + "_" + str(k)
            r = requests.post(url, headers=JSON_HEADER,
                              data=json.dumps(v), timeout=(30, 120))
            if 200 != r.status_code and 201 != r.status_code:
                logger.error("request edit index failed, status_code:%d, reason:%s, %s, %s",
                             r.status_code, json.dumps(r.json()), url, json.dumps(v))
            url_edit = edit_info.URL_ELASTICSEARCH_EDIT_INFO + \
                "/" + _id + "_" + v['channel'] + "_" + str(k)
            r = requests.get(url_edit, headers=JSON_HEADER, timeout=(30, 120))
            if 200 != r.status_code:
                logger.error("request edit_user_info failed, status_code:%d, reason:%s",
                             r.status_code, json.dumps(r.json()))
            else:
                r_json = r.json()
                if "_source" in r_json.keys():
                    info = r_json["_source"]
                    v_ = copy.deepcopy(v)
                    v_["user_type"] = "not_new"
                    set_a_key(v, info, v_, "effective_reading")
                    set_a_key(v, info, v_, "comments_count")
                    set_a_key(v, info, v_, "like_count")
                    set_a_key(v, info, v_, "zan_count")
                    url_ = URL_ELASTICSEARCH_EDIT_USER_INFO + \
                        "/" + _id + "_not_new_" + v['channel'] + "_" + str(k)
                    r = requests.post(url_, headers=JSON_HEADER,
                                      data=json.dumps(v_), timeout=(30, 120))
                    if 200 != r.status_code and 201 != r.status_code:
                        logger.error("request edit index failed, status_code:%d, reason:%s",
                                     r.status_code, json.dumps(r.json()))
Ejemplo n.º 27
0
def process(nday=1):
    mysql_tool.connectdb(host="47.96.238.205", database="taozuiredian-news")
    mysql_tool.querydb("SET NAMES utf8mb4")
    data_news = edit_info.get_edit_news_info(nday)
    data_news = edit_info.get_hot_news_info(nday, data_news)
    # data_news = edit_info.get_hot_list_info(nday, data_news)
    # data_news = edit_info.get_news_effective_reading(nday, data_news)
    # data_video = edit_info.get_edit_video_info(nday)
    # data_video = edit_info.get_video_effective_reading(nday, data_video)
    mysql_tool.closedb()

    for data in [data_news]:
        for k, v in data.items():
            _id = time_tool.get_someday_str(-nday)
            v['@timestamp'] = time_tool.get_someday_es_format(-nday)
            url = "http://localhost:8200/edit_info/doc" + \
                "/" + _id + "_" + v['channel'] + "_" + str(k)
            r = requests.post(url, headers=edit_info.JSON_HEADER,
                              data=json.dumps(v), timeout=(30, 120))
            if 200 != r.status_code and 201 != r.status_code:
                logger.error("request edit index failed, status_code:%d, reason:%s, %s, %s",
                             r.status_code, json.dumps(r.json()), url, json.dumps(v))
Ejemplo n.º 28
0
def get_task(user_channel={}, nday=1):
    data = {}
    data["all_channel"] = {}
    data["all_channel"]["num_task"] = 0
    data["all_channel"]["num_user"] = 0
    day_str = time_tool.get_someday_str(-nday)
    sql = "select sum(today_count), count(distinct user_id) from user_task_day_records where user_id in (%s) and user_id = from_user_id and day = \"%s\""

    for k, user_arr in user_channel.items():
        data[k] = {}
        data[k]["num_task"] = 0
        data[k]["num_user"] = 0
        for i in range(0, len(user_arr), 1000):
            user_to_sql = user_arr[i:i+1000]
            sql_use = sql % (",".join(user_to_sql), day_str)
            rt = mysql_tool.querydb(
                sql_use, logger, "get task count for %d users" % len(user_to_sql))
            if len(rt) > 0 and rt[0][0] is not None:
                data[k]["num_task"] += int(rt[0][0])
                data[k]["num_user"] += int(rt[0][1])
        data["all_channel"]["num_task"] += data[k]["num_task"]
        data["all_channel"]["num_user"] += data[k]["num_user"]

    return data
Ejemplo n.º 29
0
def get_news_effective_reading(nday=1, data_news={}):
    # 获取资讯有效阅读
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday+1)
    ner = {}
    sql = "select news_id, count(*) from news_effective_readings where created_at >= \"%s\" and created_at < \"%s\" and effective != 0 group by news_id" % (day1, day2)
    sob = mysql_tool.sql_obj()
    rt = sob.querydb(sql, logger, sql)
    total = 0
    for v in rt:
        ner[int(v[0])] = int(v[1])
        total += int(v[1])
    sob.closedb()
    data_news[TOTAL_ID]['effective_reading'] = total
    news_id_arr = [str(news_id) for news_id in ner.keys()]
    # 获取榜单有效阅读
    for table, category_id in [("news_hot_seven_day_lists", NEWS_HOT_SEVEN),
                               ("news_hot_lists", NEWS_HOT_LISTS),
                               ("news_hot_total_lists", NEWS_HOT_TOTAL)]:
        if category_id in data_news.keys():
            sql = "select news_id from %s where news_id in (%s)"
            for i in range(0, len(news_id_arr), 1000):
                id_arr = news_id_arr[i:i+1000]
                sql_use = sql % (table, ",".join(id_arr))
                rt = mysql_tool.querydb(
                    sql_use, logger, "select news_id for %d %s news" % (len(id_arr), table))
                for v in rt:
                    news_id = int(v[0])
                    if 'effective_reading' in data_news[category_id].keys():
                        data_news[category_id]['effective_reading'] += ner[news_id]
                    else:
                        data_news[category_id]['effective_reading'] = ner[news_id]
    # 获取普通类目有效阅读
    sql = "select id, category_id from news where id in (%s)"
    for i in range(0, len(news_id_arr), 1000):
        id_arr = news_id_arr[i:i+1000]
        sql_use = sql % (",".join(id_arr))
        rt = mysql_tool.querydb(
            sql_use, logger, "select id, category_id from news for %d news" % len(id_arr))
        for v in rt:
            news_id = int(v[0])
            category_id = int(v[1])
            if category_id in data_news.keys():
                if 'effective_reading' in data_news[category_id].keys():
                    data_news[category_id]['effective_reading'] += ner[news_id]
                else:
                    data_news[category_id]['effective_reading'] = ner[news_id]
    # 获取热点有效阅读
    if HOT_NEWS_CATEGORY_ID in data_news.keys():
        sql = "select id from news where id in (%s) and hot_at < \"" + \
            day2 + "\""
        for i in range(0, len(news_id_arr), 1000):
            id_arr = news_id_arr[i:i+1000]
            sql_use = sql % (",".join(id_arr))
            rt = mysql_tool.querydb(
                sql_use, logger, "select id from news for %d news" % len(id_arr))
            for v in rt:
                news_id = int(v[0])
                if 'effective_reading' in data_news[HOT_NEWS_CATEGORY_ID].keys():
                    data_news[HOT_NEWS_CATEGORY_ID]['effective_reading'] += ner[news_id]
                else:
                    data_news[HOT_NEWS_CATEGORY_ID]['effective_reading'] = ner[news_id]
    
    return data_news
Ejemplo n.º 30
0
def get_hot_news_info(nday=1, data={}):
    day0 = time_tool.get_someday_str(-nday-1)
    day1 = time_tool.get_someday_str(-nday)
    day2 = time_tool.get_someday_str(-nday+1)
    tommorow = time_tool.get_someday_str(1)

    # 获取当天发布热点资讯
    hot_new_published_count = 0
    data[HOT_NEWS_CATEGORY_ID] = {}
    sql = "select count(*) from news where hot_at >= \"%s\" and hot_at < \"%s\" and published_at < \"%s\"" % (day1, day2, day2)
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        data[HOT_NEWS_CATEGORY_ID]['new_published_count'] = int(v[0])
        hot_new_published_count = int(v[0])

    # 获取今日发布的热点旧资讯占比数
    sql = "select count(*) from news as n join news_raws as nr on (n.raw_id = nr.id) where n.hot_at >= \"%s\" and n.hot_at < \"%s\" and n.published_at < \"%s\" and nr.created_at < \"%s\"" % (day1, day2, day2, day1)
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        if hot_new_published_count > 0:
            data[HOT_NEWS_CATEGORY_ID]['old_published_percentage_f'] = float(
                v[0])/hot_new_published_count

    # 获取热点资讯发布与抓取平均时间间隔
    sql = "select avg(timestampdiff(second, nr.created_at, n.published_at)) from news as n join news_raws as nr on (n.raw_id = nr.id) where n.hot_at >= \"%s\" and n.hot_at < \"%s\" and n.published_at < \"%s\"" % (day1, day2, day2)
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        data[HOT_NEWS_CATEGORY_ID]['interval_pub_crawl'] = int(v[0])
        data[HOT_NEWS_CATEGORY_ID]['interval_pub_crawl_show'] = transfer_seconds2time(
            int(v[0]))

    # 获取当天挑选热点资讯
    sql = "select count(*) from news_raw_auto_releases where created_at >= \"%s\" and created_at < \"%s\" and is_hot !=0" % (day1, day2)
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        data[HOT_NEWS_CATEGORY_ID]['new_choosed_count'] = int(v[0])

    # 获取昨天挑选热点资讯
    sql = "select count(*) from news_raw_auto_releases where created_at >= \"%s\" and created_at < \"%s\" and is_hot !=0" % (day0, day1)
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        data[HOT_NEWS_CATEGORY_ID]['yd_choosed_count'] = int(v[0])

    # 获取热点资讯评论数
    sql = "select count(*) from news_comments as nc join news as n on(nc.news_id = n.id) where n.id in (select id from news where hot_at < \"%s\") and nc.created_at >= \"%s\" and nc.created_at < \"%s\"" % (day2, day1, day2)
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        data[HOT_NEWS_CATEGORY_ID]['comments_count'] = int(v[0])

    # 获取热点资讯收藏数
    sql = "select count(*) from user_like_news as uln join news as n on(uln.news_id = n.id) where n.id in (select id from news where hot_at < \"%s\") and uln.created_at >= \"%s\" and uln.created_at < \"%s\"" % (day2, day1, day2)
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        data[HOT_NEWS_CATEGORY_ID]['like_count'] = int(v[0])

    # 获取热点资讯点赞数
    sql = "select count(*) from user_zan_news_comments as uznc join news_comments as nc on (uznc.comment_id = nc.id) join news as n on(nc.news_id = n.id) where n.id in (select id from news where hot_at < \"%s\") and uznc.created_at >= \"%s\" and uznc.created_at < \"%s\"" % (day2, day1, day2)
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        data[HOT_NEWS_CATEGORY_ID]['zan_count'] = int(v[0])

    # 获取热点资讯的pv.
    sql = "select sum(real_pv) from news where hot_at < \"%s\"" % tommorow
    rt = mysql_tool.querydb(sql, logger, sql)
    for v in rt:
        data[HOT_NEWS_CATEGORY_ID]['channel_name'] = u"资讯"
        data[HOT_NEWS_CATEGORY_ID]['channel'] = "news"
        data[HOT_NEWS_CATEGORY_ID]['category_id'] = HOT_NEWS_CATEGORY_ID
        data[HOT_NEWS_CATEGORY_ID]['pv_total'] = int(v[0])
        data[HOT_NEWS_CATEGORY_ID]['category_name'] = HOT_SHOW

    return data