def user_stats_data(uid, start=None, end=None): """ :param uid: int or something cast-able to int :param start: datetime :param end: datetime :return: """ from sefaria.model.category import TOP_CATEGORIES from sefaria.model.trend import Trend, TrendSet, read_in_category_key, reverse_read_in_category_key from sefaria.sheets import user_sheets, sheet_list uid = int(uid) # todo: needs more thought. UserHistory.timeclause handles the Nones, but later usages in this method aren't so graceful. timeclause = UserHistory.timeclause(start, end) end = end or datetime.now() start = start or datetime(2017, 12, 1) # start of Sefaria epoch # All of user's sheets usheets = user_sheets(uid)["sheets"] usheet_ids = [s["id"] for s in usheets] # Sheet views in this period match_clause = { "is_sheet": True, "sheet_id": {"$in": usheet_ids}, } match_clause.update(timeclause) usheet_views = db.user_history.aggregate([ {"$match": match_clause}, {"$group": { "_id": "$sheet_id", "cnt": {"$sum": 1}}}, ]) most_popular_sheet_ids = [s["_id"] for s in sorted(usheet_views, key=lambda o: o["cnt"], reverse=True)[:3]] most_popular_sheets = [s for s in usheets if s["id"] in most_popular_sheet_ids] sheets_this_period = [s for s in usheets if start <= datetime.strptime(s["created"], "%Y-%m-%dT%H:%M:%S.%f") <= end] # Refs I viewed match_clause = { "uid": uid, "secondary": False, "is_sheet": False } match_clause.update(timeclause) refs_viewed = db.user_history.aggregate([ {"$match": match_clause}, {"$group": { "_id": "$ref", "cnt": {"$sum": 1}}}, ]) most_viewed_refs = [s["_id"] for s in sorted(refs_viewed, key=lambda o: o["cnt"], reverse=True) if s["cnt"] > 1 and "Genesis 1" not in s["_id"]][:10] # Sheets I viewed match_clause = { "uid": uid, "secondary": False, "is_sheet": True } match_clause.update(timeclause) sheets_viewed = db.user_history.aggregate([ {"$match": match_clause}, {"$group": { "_id": "$sheet_id", "cnt": {"$sum": 1}}}, ]) most_viewed_sheets_ids = [s["_id"] for s in sorted(sheets_viewed, key=lambda o: o["cnt"], reverse=True) if s["cnt"] > 1 and s["_id"] not in usheet_ids][:10] most_viewed_sheets = sheet_list({"id": {"$in":most_viewed_sheets_ids}}) # Construct returned data d = public_user_data(uid) sheetsReadQuery = {"is_sheet": True, "secondary": False, "uid": uid} sheetsReadQuery.update(timeclause) d["sheetsRead"] = UserHistorySet(sheetsReadQuery).count() textsReadQuery = {"is_sheet": False, "secondary": False, "uid": uid} textsReadQuery.update(timeclause) d["textsRead"] = UserHistorySet(textsReadQuery).count() d["categoriesRead"] = {reverse_read_in_category_key(t.name): t.value for t in TrendSet({"uid":uid, "name": {"$in": map(read_in_category_key, TOP_CATEGORIES)}})} d["totalSheets"] = len(usheets) d["publicSheets"] = len([s for s in usheets if s["status"] == "public"]) d["popularSheets"] = most_popular_sheets d["sheetsThisPeriod"] = len(sheets_this_period) d["mostViewedRefs"] = most_viewed_refs d["mostViewedSheets"] = most_viewed_sheets return d
def user_stats_data(uid): """ :param uid: int or something cast-able to int :param start: datetime :param end: datetime :return: """ from sefaria.model.category import TOP_CATEGORIES from sefaria.model.story import Story from sefaria.sheets import user_sheets uid = int(uid) user_stats_dict = user_profile.public_user_data(uid) # All of user's sheets usheets = user_sheets(uid)["sheets"] usheet_ids = [s["id"] for s in usheets] for daterange in active_dateranges: # Sheet views in this period usheet_views = db.user_history.aggregate([ {"$match": daterange.update_match({ "is_sheet": True, "sheet_id": {"$in": usheet_ids}, "uid": {"$ne": uid} })}, {"$group": { "_id": "$sheet_id", "cnt": {"$sum": 1}}}, ]) most_popular_sheet_ids = [s["_id"] for s in sorted(usheet_views, key=lambda o: o["cnt"], reverse=True)[:3]] most_popular_sheets = [] for sheet_id in most_popular_sheet_ids: most_popular_sheets += [s for s in usheets if s["id"] == sheet_id] sheets_this_period = [s for s in usheets if daterange.contains(datetime.strptime(s["created"], "%Y-%m-%dT%H:%M:%S.%f"))] # Refs I viewed refs_viewed = db.user_history.aggregate([ {"$match": daterange.update_match({ "uid": uid, "secondary": False, "is_sheet": False })}, {"$group": { "_id": "$ref", "cnt": {"$sum": 1}}}, # Using $num_times_read isn't reliable. It counts book views, but not text views. ]) most_viewed_trefs = [s["_id"] for s in sorted(refs_viewed, key=lambda o: o["cnt"], reverse=True) if s["cnt"] > 1 and "Genesis 1" not in s["_id"]][:9] most_viewed_refs = [text.Ref(r) for r in most_viewed_trefs] most_viewed_ref_dicts = [{"en": r.normal(), "he": r.he_normal(), "book": r.index.title} for r in most_viewed_refs] # Sheets I viewed sheets_viewed = db.user_history.aggregate([ {"$match": daterange.update_match({ "uid": uid, "secondary": False, "is_sheet": True })}, {"$group": { "_id": "$sheet_id", "cnt": {"$sum": 1}}}, ]) most_viewed_sheets_ids = [s["_id"] for s in sorted(sheets_viewed, key=lambda o: o["cnt"], reverse=True) if s["cnt"] > 1 and s["_id"] not in usheet_ids][:3] most_viewed_sheets = [Story.sheet_metadata(i, return_id=True) for i in most_viewed_sheets_ids] most_viewed_sheets = [a for a in most_viewed_sheets if a] for sheet_dict in most_viewed_sheets: sheet_dict.update(Story.publisher_metadata(sheet_dict["publisher_id"])) # Construct returned data user_stats_dict[daterange.key] = { "sheetsRead": user_profile.UserHistorySet(daterange.update_match({"is_sheet": True, "secondary": False, "uid": uid})).hits(), "textsRead": user_profile.UserHistorySet(daterange.update_match({"is_sheet": False, "secondary": False, "uid": uid})).hits(), "categoriesRead": {reverse_read_in_category_key(t.name): t.value for t in TrendSet({"uid":uid, "period": daterange.key, "name": {"$in": list(map(read_in_category_key, TOP_CATEGORIES))}})}, "totalSheets": len(usheets), "publicSheets": len([s for s in usheets if s["status"] == "public"]), "popularSheets": most_popular_sheets, "sheetsThisPeriod": len(sheets_this_period), "mostViewedRefs": most_viewed_ref_dicts, "mostViewedSheets": most_viewed_sheets } return user_stats_dict