def format_note_object_for_client(note): """ Returns an object that represents note in the format expected by the reader client, matching the format of links, which are currently handled together. """ anchor_oref = Ref(note.ref).padded_ref() ownerData = public_user_data(note.owner) com = { "category": "Notes", "type": "note", "owner": note.owner, "_id": str(note._id), "anchorRef": note.ref, "anchorVerse": anchor_oref.sections[-1], "anchorText": getattr(note, "anchorText", ""), "public": getattr(note, "public", False), "commentator": user_link(note.owner), "text": note.text, "title": getattr(note, "title", ""), "ownerName": ownerData["name"], "ownerProfileUrl": ownerData["profileUrl"], "ownerImageUrl": ownerData["imageUrl"], } return com
def index_sheet(index_name, id): """ Index source sheet with 'id'. """ sheet = db.sheets.find_one({"id": id}) if not sheet: return False pud = public_user_data(sheet["owner"]) doc = { "title": sheet["title"], "content": make_sheet_text(sheet, pud), "owner_id": sheet["owner"], "owner_name": pud["name"], "owner_image": pud["imageUrl"], "profile_url": pud["profileUrl"], "version": "Source Sheet by " + user_link(sheet["owner"]), "tags": ",".join(sheet.get("tags",[])), "sheetId": id, } try: es.index(index_name, 'sheet', doc, id) global doc_count doc_count += 1 return True except Exception, e: print "Error indexing sheet %d" % id print e return False
def index_sheet(index_name, id): """ Index source sheet with 'id'. """ sheet = db.sheets.find_one({"id": id}) if not sheet: return False pud = public_user_data(sheet["owner"]) try: doc = { "title": strip_tags(sheet["title"]), "content": make_sheet_text(sheet, pud), "owner_id": sheet["owner"], "owner_name": pud["name"], "owner_image": pud["imageUrl"], "profile_url": pud["profileUrl"], "version": "Source Sheet by " + user_link(sheet["owner"]), "tags": sheet.get("tags", []), "sheetId": id, "summary": sheet.get("summary", None), "group": sheet.get("group", None), "datePublished": sheet.get("datePublished", None), "dateCreated": sheet.get("dateCreated", None), "dateModified": sheet.get("dateCreated", None), "views": sheet.get("views", 0) } es_client.create(index=index_name, doc_type='sheet', id=id, body=doc) global doc_count doc_count += 1 return True except Exception, e: print "Error indexing sheet %d" % id print e return False
def index_sheet(index_name, id): """ Index source sheet with 'id'. """ sheet = db.sheets.find_one({"id": id}) if not sheet: return False pud = public_user_data(sheet["owner"]) doc = { "title": sheet["title"], "content": make_sheet_text(sheet, pud), "owner_id": sheet["owner"], "owner_name": pud["name"], "owner_image": pud["imageUrl"], "profile_url": pud["profileUrl"], "version": "Source Sheet by " + user_link(sheet["owner"]), "tags": ",".join(sheet.get("tags",[])), "sheetId": id, } try: es.index(index_name, 'sheet', doc, id) global doc_count doc_count += 1 except Exception, e: print "Error indexing sheet %d" % id print e
def annotate_user_links(sources): """ Search a sheet for any addedBy fields (containg a UID) and add corresponding user links. """ for source in sources: if "addedBy" in source: source["userLink"] = user_link(source["addedBy"]) return sources
def get_sheets_for_ref(tref, pad=True, context=1): """ Returns a list of sheets that include ref, formating as need for the Client Sidebar. """ oref = model.Ref(tref) if pad: oref = oref.padded_ref() if context: oref = oref.context_ref(context) ref_re = oref.regex() results = [] regex_list = oref.regex(as_list=True) ref_clauses = [{"sources.ref": {"$regex": r}} for r in regex_list] sheets = db.sheets.find({"$or": ref_clauses, "status": "public"}, {"id": 1, "title": 1, "owner": 1, "sources.ref": 1, "views": 1}).sort([["views", -1]]) for sheet in sheets: matched_refs = [] if "sources" in sheet: for source in sheet["sources"]: if "ref" in source: matched_refs.append(source["ref"]) matched_refs = [r for r in matched_refs if regex.match(ref_re, r)] for match in matched_refs: try: match = model.Ref(match) except InputError: continue ownerData = public_user_data(sheet["owner"]) com = { "category": "Sheets", "type": "sheet", "owner": sheet["owner"], "_id": str(sheet["_id"]), "anchorRef": match.normal(), "anchorVerse": match.sections[-1] if len(match.sections) else 1, "public": True, "commentator": user_link(sheet["owner"]), # legacy, used in S1 "text": "<a class='sheetLink' href='/sheets/%d'>%s</a>" % (sheet["id"], strip_tags(sheet["title"])), # legacy, used in S1 "title": strip_tags(sheet["title"]), "sheetUrl": "/sheets/" + str(sheet["id"]), "ownerName": ownerData["name"], "ownerProfileUrl": ownerData["profileUrl"], "ownerImageUrl": ownerData["imageUrl"], "views": sheet["views"] } results.append(com) return results
def get_reviews(tref, lang, version): """ Returns a list of reviews pertaining to ref/lang/version """ reviews = [] tref = model.Ref(tref).normal() refRe = '^%s$|^%s:' % (tref, tref) cursor = db.history.find({"ref": {"$regex": refRe}, "language": lang, "version": version, "rev_type": "review"}).sort([["date", -1]]) for r in cursor: r["_id"] = str(r["_id"]) r["userLink"] = user_link(r["user"]) reviews.append(r) return reviews
def index_sheet(index_name, id): """ Index source sheet with 'id'. """ sheet = db.sheets.find_one({"id": id}) if not sheet: return False pud = public_user_data(sheet["owner"]) tag_terms_simple = make_sheet_tags(sheet) tags = [t["en"] for t in tag_terms_simple] topics = [] for t in sheet.get('topics', []): topic_obj = Topic.init(t['slug']) if not topic_obj: continue topics += [topic_obj] collections = CollectionSet({"sheets": id, "listed": True}) collection_names = [c.name for c in collections] try: doc = { "title": strip_tags(sheet["title"]), "content": make_sheet_text(sheet, pud), "owner_id": sheet["owner"], "owner_name": pud["name"], "owner_image": pud["imageUrl"], "profile_url": pud["profileUrl"], "version": "Source Sheet by " + user_link(sheet["owner"]), "tags": tags, "topic_slugs": [topic_obj.slug for topic_obj in topics], "topics_en": [topic_obj.get_primary_title('en') for topic_obj in topics], "topics_he": [topic_obj.get_primary_title('he') for topic_obj in topics], "sheetId": id, "summary": sheet.get("summary", None), "collections": collection_names, "datePublished": sheet.get("datePublished", None), "dateCreated": sheet.get("dateCreated", None), "dateModified": sheet.get("dateModified", None), "views": sheet.get("views", 0) } es_client.create(index=index_name, doc_type='sheet', id=id, body=doc) global doc_count doc_count += 1 return True except Exception as e: print("Error indexing sheet %d" % id) print(e) return False
def index_sheet(index_name, id): """ Index source sheet with 'id'. """ sheet = db.sheets.find_one({"id": id}) if not sheet: return False pud = public_user_data(sheet["owner"]) tag_terms_simple = make_sheet_tags(sheet) tags = [t["en"] for t in tag_terms_simple] tags_he_and_en = [ u"{}|||{}".format(t["en"], t["he"]) for t in tag_terms_simple ] try: doc = { "title": strip_tags(sheet["title"]), "content": make_sheet_text(sheet, pud), "owner_id": sheet["owner"], "owner_name": pud["name"], "owner_image": pud["imageUrl"], "profile_url": pud["profileUrl"], "version": "Source Sheet by " + user_link(sheet["owner"]), "tags": tags, "tags_he_and_en": tags_he_and_en, "sheetId": id, "summary": sheet.get("summary", None), "group": sheet.get("group", None), "datePublished": sheet.get("datePublished", None), "dateCreated": sheet.get("dateCreated", None), "dateModified": sheet.get("dateCreated", None), "views": sheet.get("views", 0) } es_client.create(index=index_name, doc_type='sheet', id=id, body=doc) global doc_count doc_count += 1 return True except Exception, e: print "Error indexing sheet %d" % id print e return False
def get_reviews(tref, lang, version): """ Returns a list of reviews pertaining to ref/lang/version """ reviews = [] tref = model.Ref(tref).normal() refRe = '^%s$|^%s:' % (tref, tref) cursor = db.history.find({ "ref": { "$regex": refRe }, "language": lang, "version": version, "rev_type": "review" }).sort([["date", -1]]) for r in cursor: r["_id"] = str(r["_id"]) r["userLink"] = user_link(r["user"]) reviews.append(r) return reviews
def index_sheet(id): """ Index source sheet with 'id'. """ sheet = db.sheets.find_one({"id": id}) if not sheet: return False doc = { "title": sheet["title"], "content": make_sheet_text(sheet), "version": "Source Sheet by " + user_link(sheet["owner"]), "tags": ",".join(sheet.get("tags", [])), "sheetId": id, } try: es.index('sefaria', 'sheet', doc, id) global doc_count doc_count += 1 except Exception, e: print "Error indexing sheet %d" % id print e
def get_sheets_for_ref(tref, uid=None): """ Returns a list of sheets that include ref, formating as need for the Client Sidebar. If `uid` is present return user sheets, otherwise return public sheets. """ oref = model.Ref(tref) # perform initial search with context to catch ranges that include a segment ref regex_list = oref.context_ref().regex(as_list=True) ref_clauses = [{"sources.ref": {"$regex": r}} for r in regex_list] query = {"$or": ref_clauses} if uid: query["owner"] = uid else: query["status"] = "public" sheets = db.sheets.find( query, { "id": 1, "title": 1, "owner": 1, "sources.ref": 1, "views": 1, "tags": 1, "status": 1 }).sort([["views", -1]]) results = [] for sheet in sheets: matched_refs = [] for source in sheet.get("sources", []): if "ref" in source: matched_refs.append(source["ref"]) for match in matched_refs: try: match = model.Ref(match) if not oref.overlaps(match): continue except InputError: continue ownerData = public_user_data(sheet["owner"]) sheet_data = { "owner": sheet["owner"], "_id": str(sheet["_id"]), "anchorRef": match.normal(), "anchorVerse": match.sections[-1] if len(match.sections) else 1, "public": sheet["status"] == "public", "text": "<a class='sheetLink' href='/sheets/%d'>%s</a>" % (sheet["id"], strip_tags( sheet["title"])), # legacy, used in S1 "title": strip_tags(sheet["title"]), "sheetUrl": "/sheets/" + str(sheet["id"]), "ownerName": ownerData["name"], "ownerProfileUrl": ownerData["profileUrl"], "ownerImageUrl": ownerData["imageUrl"], "status": sheet["status"], "views": sheet["views"], "tags": sheet.get("tags", []), "commentator": user_link(sheet["owner"]), # legacy, used in S1 "category": "Sheets", # ditto "type": "sheet", # ditto } results.append(sheet_data) return results
def sheet_spam_dashboard(request): from django.contrib.auth.models import User if request.method == 'POST': return jsonResponse({"error": "Unsupported Method: {}".format(request.method)}) else: date = request.GET.get("date", None) if date: date = datetime.strptime(date, '%Y-%m-%d') else: date = request.GET.get("date", datetime.now() - timedelta(days=30)) earliest_new_user_id = User.objects.filter(date_joined__gte=date).order_by('date_joined')[0].id regex = r'.*(?!href=[\'"](\/|http(s)?:\/\/(www\.)?sefaria).+[\'"])(href).*' sheets = db.sheets.find({"sources.ref": {"$exists": False}, "dateCreated": {"$gt": date.strftime("%Y-%m-%dT%H:%M:%S.%f")}, "owner": {"$gt": earliest_new_user_id}, "includedRefs": {"$size": 0}, "reviewed": {"$ne": True}, "$or": [{"sources.outsideText": {"$regex": regex}}, {"sources.comment": {"$regex": regex}}, {"sources.outsideBiText.en": {"$regex": regex}}, {"sources.outsideBiText.he": {"$regex": regex}}]}) sheets_list = [] for sheet in sheets: sheets_list.append({"id": sheet["id"], "title": strip_tags(sheet["title"]), "owner": user_link(sheet["owner"])}) return render_template(request, 'spam_dashboard.html', None, { "title": "Potential Spam Sheets since %s" % date.strftime("%Y-%m-%d"), "sheets": sheets_list, "type": "sheet", })
def spam_dashboard(request): from django.contrib.auth.models import User if request.method == 'POST': spam_sheet_ids = list( map(int, request.POST.getlist("spam_sheets[]", []))) reviewed_sheet_ids = list( map(int, request.POST.getlist("reviewed_sheets[]", []))) db.sheets.update_many({"id": { "$in": reviewed_sheet_ids }}, {"$set": { "reviewed": True }}) spammers = db.sheets.find({ "id": { "$in": spam_sheet_ids } }, { "owner": 1 }).distinct("owner") for spammer in spammers: try: spammer_account = User.objects.get(id=spammer) spammer_account.is_active = False spammer_account.save() except: continue db.sheets.delete_many({"id": {"$in": spam_sheet_ids}}) return render( request, 'spam_dashboard.html', { "deleted_sheets": len(spam_sheet_ids), "sheet_ids": spam_sheet_ids, "reviewed_sheets": len(reviewed_sheet_ids), "spammers_deactivated": len(spammers) }) else: date = request.GET.get("date", None) if date: date = datetime.strptime(date, '%Y-%m-%d') else: date = request.GET.get("date", datetime.now() - timedelta(days=30)) earliest_new_user_id = User.objects.filter(date_joined__gte=date)[0].id regex = r'.*(?!href=[\'"](\/|http(s)?:\/\/(www\.)?sefaria).+[\'"])(href).*' sheets = db.sheets.find({ "sources.ref": { "$exists": False }, "dateCreated": { "$gt": date.strftime("%Y-%m-%dT%H:%M:%S.%f") }, "owner": { "$gt": earliest_new_user_id }, "includedRefs": { "$size": 0 }, "reviewed": { "$ne": True }, "$or": [{ "sources.outsideText": { "$regex": regex } }, { "sources.comment": { "$regex": regex } }, { "sources.outsideBiText.en": { "$regex": regex } }, { "sources.outsideBiText.he": { "$regex": regex } }] }) sheets_list = [] for sheet in sheets: sheets_list.append({ "id": sheet["id"], "title": strip_tags(sheet["title"]), "owner": user_link(sheet["owner"]) }) return render( request, 'spam_dashboard.html', { "title": "Potential Spam Sheets since %s" % date.strftime("%Y-%m-%d"), "sheets": sheets_list, })