Esempio n. 1
0
def user_stats_data(uid, start=None, end=None):
    """

    :param uid: int or something cast-able to int
    :param start: datetime
    :param end: datetime
    :return:
    """
    from sefaria.model.category import TOP_CATEGORIES
    from sefaria.model.trend import Trend, TrendSet, read_in_category_key, reverse_read_in_category_key
    from sefaria.sheets import user_sheets, sheet_list

    uid = int(uid)

    # todo: needs more thought.  UserHistory.timeclause handles the Nones, but later usages in this method aren't so graceful.
    timeclause = UserHistory.timeclause(start, end)
    end = end or datetime.now()
    start = start or datetime(2017, 12, 1)  # start of Sefaria epoch

    # All of user's sheets
    usheets = user_sheets(uid)["sheets"]
    usheet_ids = [s["id"] for s in usheets]

    # Sheet views in this period
    match_clause = {
            "is_sheet": True,
            "sheet_id": {"$in": usheet_ids},
            }
    match_clause.update(timeclause)
    usheet_views = db.user_history.aggregate([
        {"$match": match_clause},
        {"$group": {
            "_id": "$sheet_id",
            "cnt": {"$sum": 1}}},
    ])

    most_popular_sheet_ids = [s["_id"] for s in sorted(usheet_views, key=lambda o: o["cnt"], reverse=True)[:3]]
    most_popular_sheets = [s for s in usheets if s["id"] in most_popular_sheet_ids]
    sheets_this_period = [s for s in usheets if start <= datetime.strptime(s["created"], "%Y-%m-%dT%H:%M:%S.%f") <= end]

    # Refs I viewed
    match_clause = {
            "uid": uid,
            "secondary": False,
            "is_sheet": False
            }
    match_clause.update(timeclause)
    refs_viewed = db.user_history.aggregate([
        {"$match": match_clause},
        {"$group": {
            "_id": "$ref",
            "cnt": {"$sum": 1}}},
    ])
    most_viewed_refs = [s["_id"] for s in sorted(refs_viewed, key=lambda o: o["cnt"], reverse=True) if s["cnt"] > 1 and "Genesis 1" not in s["_id"]][:10]


    # Sheets I viewed
    match_clause = {
            "uid": uid,
            "secondary": False,
            "is_sheet": True
            }
    match_clause.update(timeclause)
    sheets_viewed = db.user_history.aggregate([
        {"$match": match_clause},
        {"$group": {
            "_id": "$sheet_id",
            "cnt": {"$sum": 1}}},
    ])
    most_viewed_sheets_ids = [s["_id"] for s in sorted(sheets_viewed, key=lambda o: o["cnt"], reverse=True) if s["cnt"] > 1 and s["_id"] not in usheet_ids][:10]
    most_viewed_sheets = sheet_list({"id": {"$in":most_viewed_sheets_ids}})

    # Construct returned data
    d = public_user_data(uid)

    sheetsReadQuery = {"is_sheet": True, "secondary": False, "uid": uid}
    sheetsReadQuery.update(timeclause)
    d["sheetsRead"] = UserHistorySet(sheetsReadQuery).count()

    textsReadQuery = {"is_sheet": False, "secondary": False, "uid": uid}
    textsReadQuery.update(timeclause)
    d["textsRead"] = UserHistorySet(textsReadQuery).count()

    d["categoriesRead"] = {reverse_read_in_category_key(t.name): t.value for t in TrendSet({"uid":uid, "name": {"$in": map(read_in_category_key, TOP_CATEGORIES)}})}
    d["totalSheets"] = len(usheets)
    d["publicSheets"] = len([s for s in usheets if s["status"] == "public"])
    d["popularSheets"] = most_popular_sheets
    d["sheetsThisPeriod"] = len(sheets_this_period)
    d["mostViewedRefs"] = most_viewed_refs
    d["mostViewedSheets"] = most_viewed_sheets
    return d
Esempio n. 2
0
def user_stats_data(uid):
    """

    :param uid: int or something cast-able to int
    :param start: datetime
    :param end: datetime
    :return:
    """
    from sefaria.model.category import TOP_CATEGORIES
    from sefaria.model.story import Story
    from sefaria.sheets import user_sheets

    uid = int(uid)
    user_stats_dict = user_profile.public_user_data(uid)

    # All of user's sheets
    usheets = user_sheets(uid)["sheets"]
    usheet_ids = [s["id"] for s in usheets]

    for daterange in active_dateranges:
        # Sheet views in this period
        usheet_views = db.user_history.aggregate([
            {"$match": daterange.update_match({
                "is_sheet": True,
                "sheet_id": {"$in": usheet_ids},
                "uid": {"$ne": uid}
                })},
            {"$group": {
                "_id": "$sheet_id",
                "cnt": {"$sum": 1}}},
        ])

        most_popular_sheet_ids = [s["_id"] for s in sorted(usheet_views, key=lambda o: o["cnt"], reverse=True)[:3]]
        most_popular_sheets = []
        for sheet_id in most_popular_sheet_ids:
            most_popular_sheets += [s for s in usheets if s["id"] == sheet_id]

        sheets_this_period = [s for s in usheets if daterange.contains(datetime.strptime(s["created"], "%Y-%m-%dT%H:%M:%S.%f"))]

        # Refs I viewed
        refs_viewed = db.user_history.aggregate([
            {"$match": daterange.update_match({
                "uid": uid,
                "secondary": False,
                "is_sheet": False
                })},
            {"$group": {
                "_id": "$ref",
                "cnt": {"$sum": 1}}},  # Using $num_times_read isn't reliable.  It counts book views, but not text views.
        ])
        most_viewed_trefs = [s["_id"] for s in sorted(refs_viewed, key=lambda o: o["cnt"], reverse=True) if s["cnt"] > 1 and "Genesis 1" not in s["_id"]][:9]
        most_viewed_refs = [text.Ref(r) for r in most_viewed_trefs]
        most_viewed_ref_dicts = [{"en": r.normal(), "he": r.he_normal(), "book": r.index.title} for r in most_viewed_refs]

        # Sheets I viewed
        sheets_viewed = db.user_history.aggregate([
            {"$match": daterange.update_match({
                "uid": uid,
                "secondary": False,
                "is_sheet": True
                })},
            {"$group": {
                "_id": "$sheet_id",
                "cnt": {"$sum": 1}}},
        ])
        most_viewed_sheets_ids = [s["_id"] for s in sorted(sheets_viewed, key=lambda o: o["cnt"], reverse=True) if s["cnt"] > 1 and s["_id"] not in usheet_ids][:3]


        most_viewed_sheets = [Story.sheet_metadata(i, return_id=True) for i in most_viewed_sheets_ids]
        most_viewed_sheets = [a for a in most_viewed_sheets if a]

        for sheet_dict in most_viewed_sheets:
            sheet_dict.update(Story.publisher_metadata(sheet_dict["publisher_id"]))

        # Construct returned data
        user_stats_dict[daterange.key] = {
            "sheetsRead": user_profile.UserHistorySet(daterange.update_match({"is_sheet": True, "secondary": False, "uid": uid})).hits(),
            "textsRead": user_profile.UserHistorySet(daterange.update_match({"is_sheet": False, "secondary": False, "uid": uid})).hits(),
            "categoriesRead": {reverse_read_in_category_key(t.name): t.value for t in TrendSet({"uid":uid, "period": daterange.key, "name": {"$in": list(map(read_in_category_key, TOP_CATEGORIES))}})},
            "totalSheets": len(usheets),
            "publicSheets": len([s for s in usheets if s["status"] == "public"]),
            "popularSheets": most_popular_sheets,
            "sheetsThisPeriod": len(sheets_this_period),
            "mostViewedRefs": most_viewed_ref_dicts,
            "mostViewedSheets": most_viewed_sheets
        }

    return user_stats_dict