def sheet_to_dict(sheet): """ Returns a JSON serializable dictionary of Mongo document `sheet`. Annotates sheet with user profile info that is useful to client. """ profile = public_user_data(sheet["owner"]) sheet_dict = { "id": sheet["id"], "title": strip_tags(sheet["title"]) if "title" in sheet else "Untitled Sheet", "status": sheet["status"], "author": sheet["owner"], "ownerName": profile["name"], "ownerImageUrl": profile["imageUrl"], "views": sheet["views"], "modified": dateutil.parser.parse(sheet["dateModified"]).strftime("%m/%d/%Y"), "tags": sheet["tags"] if "tags" in sheet else [], } return sheet_dict
def index_sheet(index_name, id): """ Index source sheet with 'id'. """ sheet = db.sheets.find_one({"id": id}) if not sheet: return False pud = public_user_data(sheet["owner"]) doc = { "title": strip_tags(sheet["title"]), "content": make_sheet_text(sheet, pud), "owner_id": sheet["owner"], "owner_name": pud["name"], "owner_image": pud["imageUrl"], "profile_url": pud["profileUrl"], "version": "Source Sheet by " + user_link(sheet["owner"]), "tags": ",".join(sheet.get("tags", [])), "sheetId": id, } try: es.index(index_name, 'sheet', doc, id) global doc_count doc_count += 1 return True except Exception, e: print "Error indexing sheet %d" % id print e return False
def sheet_to_dict(sheet): """ Returns a JSON serializable dictionary of Mongo document `sheet`. Annotates sheet with user profile info that is useful to client. """ profile = public_user_data(sheet["owner"]) sheet_dict = { "id": sheet["id"], "title": strip_tags(sheet["title"]) if "title" in sheet else "Untitled Sheet", "status": sheet["status"], "author": sheet["owner"], "ownerName": profile["name"], "ownerImageUrl": profile["imageUrl"], "views": sheet["views"], "group": sheet.get("group", None), "modified": dateutil.parser.parse(sheet["dateModified"]).strftime("%m/%d/%Y"), "created": sheet.get("dateCreated", None), "topics": add_langs_to_topics(sheet.get("topics", [])), "tags": [t['asTyped'] for t in sheet.get("topics", []) ], # for backwards compatibility with mobile "options": sheet["options"] if "options" in sheet else [], } return sheet_dict
def word_frequency_for_text(title, lang="en"): """ Returns an ordered list of word/count tuples for occurences of words inside the text `title`. """ import string from collections import defaultdict from sefaria.export import make_text, prepare_merged_text_for_export from sefaria.utils.util import strip_tags text = make_text(prepare_merged_text_for_export(title, lang=lang)) text = strip_tags(text) text = text.lower() text = re.sub(r'[^a-z ]', " ", text) text = re.sub(r' +', " ", text) text = text.translate(str.maketrans(dict.fromkeys(string.punctuation))) count = defaultdict(int) words = text.split(" ") for word in words: count[word] += 1 counts = sorted(iter(count.items()), key=lambda x: -x[1]) return counts
def index_sheet(index_name, id): """ Index source sheet with 'id'. """ sheet = db.sheets.find_one({"id": id}) if not sheet: return False pud = public_user_data(sheet["owner"]) try: doc = { "title": strip_tags(sheet["title"]), "content": make_sheet_text(sheet, pud), "owner_id": sheet["owner"], "owner_name": pud["name"], "owner_image": pud["imageUrl"], "profile_url": pud["profileUrl"], "version": "Source Sheet by " + user_link(sheet["owner"]), "tags": sheet.get("tags", []), "sheetId": id, "summary": sheet.get("summary", None), "group": sheet.get("group", None), "datePublished": sheet.get("datePublished", None), "dateCreated": sheet.get("dateCreated", None), "dateModified": sheet.get("dateCreated", None), "views": sheet.get("views", 0) } es_client.create(index=index_name, doc_type='sheet', id=id, body=doc) global doc_count doc_count += 1 return True except Exception, e: print "Error indexing sheet %d" % id print e return False
def count_sources(sources, sheet_id): global refs, texts, categories global sources_count, comments_count, outside_count, untrans_count global untrans_texts, untrans_categories, untrans_refs global fragments, fragments_count for s in sources: if "ref" in s: sources_count += 1 pRef = parse_ref(s["ref"]) if "error" in pRef: continue refs[s["ref"]] += 1 texts[pRef["book"]] += 1 categories[pRef["categories"][0]] += 1 if not is_ref_translated(s["ref"]): untrans_categories[pRef["categories"][0]] +=1 untrans_texts[pRef["book"]] += 1 untrans_refs[s["ref"]] += 1 untrans_count += 1 en = strip_tags(s.get("text", {}).get("en", "")) if len(en) > 25: fragments[s["ref"]].append(sheet_id) fragments_count += 1 if "subsources" in s: count_sources(s["subsources"], sheet_id) elif "comment" in s: comments_count += 1 elif "outsideText" in s or "outsideBiText" in s: outside_count += 1
def is_hebrew(self): """Returns True if this sheet appears to be in Hebrew according to its title""" from sefaria.utils.hebrew import is_hebrew import regex title = strip_tags(self.title) # Consider a sheet Hebrew if its title contains Hebrew character but no English characters return is_hebrew(title) and not regex.search(u"[a-z|A-Z]", title)
def count_sources(self, sources, tags, sheet_id): for s in sources: try: if "ref" in s and s["ref"] is not None: self.sources_count += 1 oref = Ref(s["ref"]).padded_ref() self.refs[oref.normal()] += 1 self.texts[oref.book] += 1 self.categories[oref.index.categories[0]] += 1 self.refs_by_category[oref.index.categories[0]][oref.normal()] += 1 for tag in tags: self.refs_by_tag[tag][oref.normal()] += 1 try: is_translated = oref.is_text_translated() except: is_translated = False if not is_translated: self.untrans_categories[oref.index.categories[0]] += 1 self.untrans_texts[oref.book] += 1 self.untrans_refs[s["ref"]] += 1 self.untrans_count += 1 en = strip_tags(s.get("text", {}).get("en", "")) if len(en) > 25: self.fragments[s["ref"]].append(sheet_id) self.fragments_count += 1 elif "comment" in s: self.comments_count += 1 elif "outsideText" in s or "outsideBiText" in s: self.outside_count += 1 except: continue
def make_tag_list(include_sheets=False): """ Returns an alphabetized list of tags and sheets included in each tag. """ tags = {} results = [] projection = {"tags": 1, "title": 1, "id": 1, "views": 1} if include_sheets else {"tags": 1} sheet_list = db.sheets.find({"status": "public"}, projection) for sheet in sheet_list: sheet_tags = sheet.get("tags", []) for tag in sheet_tags: if tag not in tags: tags[tag] = {"tag": tag, "count": 0, "sheets": []} if include_sheets: tags[tag]["sheets"].append({"title": strip_tags(sheet["title"]), "id": sheet["id"], "views": sheet["views"]}) tags[tag]["count"] += 1 for tag in tags.values(): tag["sheets"] = sorted(tag["sheets"], key=lambda x: -x["views"] ) results.append(tag) results = sorted(results, key=lambda x: x["tag"]) return results
def export_to_drive(request, credential, sheet_id): """ Export a sheet to Google Drive. """ http = credential.authorize(httplib2.Http()) service = build('drive', 'v3', http=http, cache_discovery=False) sheet = get_sheet(sheet_id) if 'error' in sheet: return jsonResponse({'error': {'message': sheet["error"]}}) file_metadata = { 'name': strip_tags(sheet['title'].strip()), 'mimeType': 'application/vnd.google-apps.document' } html_string = bytes(sheet_to_html_string(sheet), "utf8") media = MediaIoBaseUpload(BytesIO(html_string), mimetype='text/html', resumable=True) new_file = service.files().create(body=file_metadata, media_body=media, fields='webViewLink').execute() return jsonResponse(new_file)
def sheet_spam_dashboard(request): from django.contrib.auth.models import User if request.method == 'POST': return jsonResponse({"error": "Unsupported Method: {}".format(request.method)}) else: date = request.GET.get("date", None) if date: date = datetime.strptime(date, '%Y-%m-%d') else: date = request.GET.get("date", datetime.now() - timedelta(days=30)) earliest_new_user_id = User.objects.filter(date_joined__gte=date).order_by('date_joined')[0].id regex = r'.*(?!href=[\'"](\/|http(s)?:\/\/(www\.)?sefaria).+[\'"])(href).*' sheets = db.sheets.find({"sources.ref": {"$exists": False}, "dateCreated": {"$gt": date.strftime("%Y-%m-%dT%H:%M:%S.%f")}, "owner": {"$gt": earliest_new_user_id}, "includedRefs": {"$size": 0}, "reviewed": {"$ne": True}, "$or": [{"sources.outsideText": {"$regex": regex}}, {"sources.comment": {"$regex": regex}}, {"sources.outsideBiText.en": {"$regex": regex}}, {"sources.outsideBiText.he": {"$regex": regex}}]}) sheets_list = [] for sheet in sheets: sheets_list.append({"id": sheet["id"], "title": strip_tags(sheet["title"]), "owner": user_link(sheet["owner"])}) return render_template(request, 'spam_dashboard.html', None, { "title": "Potential Spam Sheets since %s" % date.strftime("%Y-%m-%d"), "sheets": sheets_list, "type": "sheet", })
def make_sheet_list_by_tag(): """ Returns an alphabetized list of tags and sheets included in each tag. """ tags = {} results = [] sheet_list = db.sheets.find({"status": {"$in": LISTED_SHEETS}}) for sheet in sheet_list: sheet_tags = sheet.get("tags", []) for tag in sheet_tags: if tag not in tags: tags[tag] = {"tag": tag, "count": 0, "sheets": []} tags[tag]["sheets"].append({ "title": strip_tags(sheet["title"]), "id": sheet["id"], "views": sheet["views"] }) tags[tag]["count"] += 1 for tag in tags.values(): tag["sheets"] = sorted(tag["sheets"], key=lambda x: -x["views"]) results.append(tag) results = sorted(results, key=lambda x: x["tag"]) return results
def export_to_drive(request, credential, sheet_id): """ Export a sheet to Google Drive. """ http = credential.authorize(httplib2.Http()) service = build('drive', 'v3', http=http) sheet = get_sheet(sheet_id) if 'error' in sheet: return jsonResponse({'error': {'message': sheet["error"]}}) file_metadata = { 'name': strip_tags(sheet['title'].strip()), 'mimeType': 'application/vnd.google-apps.document' } html_string = sheet_to_html_string(sheet) media = MediaIoBaseUpload( StringIO(html_string), mimetype='text/html', resumable=True) new_file = service.files().create(body=file_metadata, media_body=media, fields='webViewLink').execute() return jsonResponse(new_file)
def _sheet_metadata(sheet_id, return_id=False): from sefaria.sheets import get_sheet_metadata metadata = get_sheet_metadata(sheet_id) if not metadata: return None d = { "sheet_title": strip_tags(metadata["title"]), "sheet_summary": strip_tags(metadata["summary"]) if "summary" in metadata else "", "publisher_id": metadata["owner"] } if return_id: d["sheet_id"] = sheet_id return d
def test(): ss = db.sheets.find({}, sort=[["_id", -1]], limit=10000) for s in ss: lang = get_sheet_language(s) if lang == "some hebrew": print("{}\thttps://www.sefaria.org/sheets/{}".format( strip_tags(s["title"]).replace("\n", ""), s["id"]))
def get_sheet_language(sheet): """ Returns the language we believe `sheet` to be written in, based on the language of its title. """ title = strip_tags(sheet.get("title", "")).replace("(Copy)", "").replace("\n", " ") return "hebrew" if is_hebrew(title, heb_only=True) else "english"
def get_sheets_for_ref(tref, pad=True, context=1): """ Returns a list of sheets that include ref, formating as need for the Client Sidebar. """ #tref = norm_ref(tref, pad=pad, context=context) #ref_re = make_ref_re(tref) oref = model.Ref(tref) if pad: oref = oref.padded_ref() if context: oref = oref.context_ref(context) ref_re = oref.regex() results = [] sheets = db.sheets.find( { "included_refs": { "$regex": ref_re }, "status": { "$in": LISTED_SHEETS } }, { "id": 1, "title": 1, "owner": 1, "included_refs": 1 }) for sheet in sheets: # Check for multiple matching refs within this sheet matched_orefs = [ model.Ref(r) for r in sheet["included_refs"] if regex.match(ref_re, r) ] for match in matched_orefs: com = {} com["category"] = "Sheets" com["type"] = "sheet" com["owner"] = sheet["owner"] com["_id"] = str(sheet["_id"]) com["anchorRef"] = match.normal() com["anchorVerse"] = match.sections[-1] com["public"] = True com["commentator"] = user_link(sheet["owner"]) com["text"] = "<a class='sheetLink' href='/sheets/%d'>%s</a>" % ( sheet["id"], strip_tags(sheet["title"])) results.append(com) return results
def spam_dashboard(request): from django.contrib.auth.models import User if request.method == 'POST': spam_sheet_ids = list(map(int, request.POST.getlist("spam_sheets[]", []))) reviewed_sheet_ids = list(map(int, request.POST.getlist("reviewed_sheets[]", []))) db.sheets.update_many({"id": {"$in": reviewed_sheet_ids}}, {"$set": {"reviewed": True}}) spammers = db.sheets.find({"id": {"$in": spam_sheet_ids}}, {"owner": 1}).distinct("owner") for spammer in spammers: try: spammer_account = User.objects.get(id=spammer) spammer_account.is_active = False spammer_account.save() except: continue db.sheets.delete_many({"id": {"$in": spam_sheet_ids}}) return render_template(request, 'spam_dashboard.html', None, { "deleted_sheets": len(spam_sheet_ids), "sheet_ids": spam_sheet_ids, "reviewed_sheets": len(reviewed_sheet_ids), "spammers_deactivated": len(spammers) }) else: date = request.GET.get("date", None) if date: date = datetime.strptime(date, '%Y-%m-%d') else: date = request.GET.get("date", datetime.now() - timedelta(days=30)) earliest_new_user_id = User.objects.filter(date_joined__gte=date)[0].id regex = r'.*(?!href=[\'"](\/|http(s)?:\/\/(www\.)?sefaria).+[\'"])(href).*' sheets = db.sheets.find({"sources.ref": {"$exists": False}, "dateCreated": {"$gt": date.strftime("%Y-%m-%dT%H:%M:%S.%f")}, "owner": {"$gt": earliest_new_user_id}, "includedRefs": {"$size": 0}, "reviewed": {"$ne": True}, "$or": [{"sources.outsideText": {"$regex": regex}}, {"sources.comment": {"$regex": regex}}, {"sources.outsideBiText.en": {"$regex": regex}}, {"sources.outsideBiText.he": {"$regex": regex}}]}) sheets_list = [] for sheet in sheets: sheets_list.append({"id": sheet["id"], "title": strip_tags(sheet["title"]), "owner": user_link(sheet["owner"])}) return render_template(request, 'spam_dashboard.html', None, { "title": "Potential Spam Sheets since %s" % date.strftime("%Y-%m-%d"), "sheets": sheets_list, })
def get_sheets_for_ref(tref, pad=True, context=1): """ Returns a list of sheets that include ref, formating as need for the Client Sidebar. """ oref = model.Ref(tref) if pad: oref = oref.padded_ref() if context: oref = oref.context_ref(context) ref_re = oref.regex() results = [] regex_list = oref.regex(as_list=True) ref_clauses = [{"sources.ref": {"$regex": r}} for r in regex_list] sheets = db.sheets.find({"$or": ref_clauses, "status": "public"}, {"id": 1, "title": 1, "owner": 1, "sources.ref": 1, "views": 1}).sort([["views", -1]]) for sheet in sheets: matched_refs = [] if "sources" in sheet: for source in sheet["sources"]: if "ref" in source: matched_refs.append(source["ref"]) matched_refs = [r for r in matched_refs if regex.match(ref_re, r)] for match in matched_refs: try: match = model.Ref(match) except InputError: continue ownerData = public_user_data(sheet["owner"]) com = { "category": "Sheets", "type": "sheet", "owner": sheet["owner"], "_id": str(sheet["_id"]), "anchorRef": match.normal(), "anchorVerse": match.sections[-1] if len(match.sections) else 1, "public": True, "commentator": user_link(sheet["owner"]), # legacy, used in S1 "text": "<a class='sheetLink' href='/sheets/%d'>%s</a>" % (sheet["id"], strip_tags(sheet["title"])), # legacy, used in S1 "title": strip_tags(sheet["title"]), "sheetUrl": "/sheets/" + str(sheet["id"]), "ownerName": ownerData["name"], "ownerProfileUrl": ownerData["profileUrl"], "ownerImageUrl": ownerData["imageUrl"], "views": sheet["views"] } results.append(com) return results
def get_sheets_for_ref(tref, pad=True, context=1): """ Returns a list of sheets that include ref, formating as need for the Client Sidebar. """ oref = model.Ref(tref) if pad: oref = oref.padded_ref() if context: oref = oref.context_ref(context) ref_re = oref.regex() results = [] regex_list = oref.regex(as_list=True) ref_clauses = [{"included_refs": {"$regex": r}} for r in regex_list] sheets = db.sheets.find({ "$or": ref_clauses, "status": "public" }, { "id": 1, "title": 1, "owner": 1, "included_refs": 1 }) for sheet in sheets: # Check for multiple matching refs within this sheet matched_refs = [ r for r in sheet["included_refs"] if regex.match(ref_re, r) ] for match in matched_refs: try: match = model.Ref(match) except InputError: continue com = {} com["category"] = "Sheets" com["type"] = "sheet" com["owner"] = sheet["owner"] com["_id"] = str(sheet["_id"]) com["anchorRef"] = match.normal() com["anchorVerse"] = match.sections[-1] if len( match.sections) else 1 com["public"] = True com["commentator"] = user_link(sheet["owner"]) com["text"] = "<a class='sheetLink' href='/sheets/%d'>%s</a>" % ( sheet["id"], strip_tags(sheet["title"])) results.append(com) return results
def index_sheet(index_name, id): """ Index source sheet with 'id'. """ sheet = db.sheets.find_one({"id": id}) if not sheet: return False pud = public_user_data(sheet["owner"]) tag_terms_simple = make_sheet_tags(sheet) tags = [t["en"] for t in tag_terms_simple] topics = [] for t in sheet.get('topics', []): topic_obj = Topic.init(t['slug']) if not topic_obj: continue topics += [topic_obj] collections = CollectionSet({"sheets": id, "listed": True}) collection_names = [c.name for c in collections] try: doc = { "title": strip_tags(sheet["title"]), "content": make_sheet_text(sheet, pud), "owner_id": sheet["owner"], "owner_name": pud["name"], "owner_image": pud["imageUrl"], "profile_url": pud["profileUrl"], "version": "Source Sheet by " + user_link(sheet["owner"]), "tags": tags, "topic_slugs": [topic_obj.slug for topic_obj in topics], "topics_en": [topic_obj.get_primary_title('en') for topic_obj in topics], "topics_he": [topic_obj.get_primary_title('he') for topic_obj in topics], "sheetId": id, "summary": sheet.get("summary", None), "collections": collection_names, "datePublished": sheet.get("datePublished", None), "dateCreated": sheet.get("dateCreated", None), "dateModified": sheet.get("dateModified", None), "views": sheet.get("views", 0) } es_client.create(index=index_name, doc_type='sheet', id=id, body=doc) global doc_count doc_count += 1 return True except Exception as e: print("Error indexing sheet %d" % id) print(e) return False
def sheet_to_dict(sheet): """ Returns a JSON serializable dictionary of Mongo document `sheet`. Annotates sheet with user profile info that is useful to client. """ profile = public_user_data(sheet["owner"]) sheet_dict = { "id": sheet["id"], "title": strip_tags(sheet["title"]) if "title" in sheet else "Untitled Sheet", "status": sheet["status"], "author": sheet["owner"], "ownerName": profile["name"], "ownerImageUrl": profile["imageUrl"], "views": sheet["views"], "modified": dateutil.parser.parse(sheet["dateModified"]).strftime("%m/%d/%Y"), "tags": sheet["tags"] if "tags" in sheet else [], "options": sheet["options"] if "options" in sheet else [], } return sheet_dict
def profile_spam_dashboard(request): from django.contrib.auth.models import User if request.method == 'POST': return jsonResponse({"error": "Unsupported Method: {}".format(request.method)}) else: date = request.GET.get("date", None) if date: date = datetime.strptime(date, '%Y-%m-%d') else: date = request.GET.get("date", datetime.now() - timedelta(days=30)) earliest_new_user_id = User.objects.filter(date_joined__gte=date).order_by('date_joined')[0].id regex = r'.*(?!href=[\'"](\/|http(s)?:\/\/(www\.)?sefaria).+[\'"])(href).*' users_to_check = db.profiles.find( {'$or': [ {'website': {"$ne": ""}, 'bio': {"$ne": ""}, "id": {"$gt": earliest_new_user_id}, "reviewed": {"$ne": True}}, {'bio': {"$regex": regex}, "id": {"$gt": earliest_new_user_id}, "reviewed": {"$ne": True}} ] }) profiles_list = [] for user in users_to_check: history_count = db.user_history.find({'uid': user['id']}).count() if history_count < 10: profiles_list.append({"id": user["id"], "slug": user["slug"], "bio": strip_tags(user["bio"][0:250]), "website": user["website"][0:50]}) return render_template(request, 'spam_dashboard.html', None, { "title": "Potential Spam Profiles since %s" % date.strftime("%Y-%m-%d"), "profiles": profiles_list, "type": "profile", })
def source_text(source): """ Recursive function to translate a source dictionary into text. """ content = [ source.get("customTitle", ""), source.get("ref", ""), source.get("text", {"he": ""}).get("he", ""), source.get("text", {"en": ""}).get("en", ""), source.get("comment", ""), source.get("outside", ""), ] content = [strip_tags(c) for c in content] text = " ".join(content) if "subsources" in source: for s in source["subsources"]: text += source_text(s) return text
def untagged_sheets(request): html = "" page = int(request.GET.get("page", 0)) page_size = 100 sheets = db.sheets.find({ "status": "public", "tags": [] }, { "id": 1, "title": 1 }).limit(page_size).skip(page_size * page) for sheet in sheets: html += "<li><a href='/sheets/%d' target='_blank'>%s</a></li>" % ( sheet["id"], strip_tags(sheet["title"])) html += u"<br><a href='/admin/untagged-sheets?page=%d'>More ›</a>" % ( page + 1) return HttpResponse("<html><h1>Untagged Public Sheets</h1><ul>" + html + "</ul></html>")
def index_sheet(index_name, id): """ Index source sheet with 'id'. """ sheet = db.sheets.find_one({"id": id}) if not sheet: return False pud = public_user_data(sheet["owner"]) tag_terms_simple = make_sheet_tags(sheet) tags = [t["en"] for t in tag_terms_simple] tags_he_and_en = [ u"{}|||{}".format(t["en"], t["he"]) for t in tag_terms_simple ] try: doc = { "title": strip_tags(sheet["title"]), "content": make_sheet_text(sheet, pud), "owner_id": sheet["owner"], "owner_name": pud["name"], "owner_image": pud["imageUrl"], "profile_url": pud["profileUrl"], "version": "Source Sheet by " + user_link(sheet["owner"]), "tags": tags, "tags_he_and_en": tags_he_and_en, "sheetId": id, "summary": sheet.get("summary", None), "group": sheet.get("group", None), "datePublished": sheet.get("datePublished", None), "dateCreated": sheet.get("dateCreated", None), "dateModified": sheet.get("dateCreated", None), "views": sheet.get("views", 0) } es_client.create(index=index_name, doc_type='sheet', id=id, body=doc) global doc_count doc_count += 1 return True except Exception, e: print "Error indexing sheet %d" % id print e return False
def make_sheet_list_by_tag(): """ Returns an alphabetized list of tags and sheets included in each tag. """ tags = {} results = [] sheet_list = db.sheets.find({"status": {"$in": LISTED_SHEETS }}) for sheet in sheet_list: sheet_tags = sheet.get("tags", []) for tag in sheet_tags: if tag not in tags: tags[tag] = {"tag": tag, "count": 0, "sheets": []} tags[tag]["sheets"].append({"title": strip_tags(sheet["title"]), "id": sheet["id"], "views": sheet["views"]}) tags[tag]["count"] += 1 for tag in tags.values(): tag["sheets"] = sorted(tag["sheets"], key=lambda x: -x["views"] ) results.append(tag) results = sorted(results, key=lambda x: x["tag"]) return results
def refine_ref_by_text(ref, text): """ Returns a ref (string) which refines 'ref' (string) by comparing 'text' (string), to the hebrew text stored in the Library. """ try: oref = model.Ref(ref).section_ref() except: return ref needle = strip_tags(text).strip().replace("\n", "") hay = model.TextChunk(oref, lang="he").text start, end = None, None for n in range(len(hay)): if not isinstance(hay[n], basestring): # TODO handle this case # happens with spanning ref like "Shabbat 3a-3b" return ref if needle in hay[n]: start, end = n+1, n+1 break if not start and string_overlap(hay[n], needle): start = n+1 elif string_overlap(needle, hay[n]): end = n+1 break if start and end: if start == end: refined = "%s:%d" % (oref.normal(), start) else: refined = "%s:%d-%d" % (oref.normal(), start, end) ref = refined return ref
def refine_ref_by_text(ref, text): """ Returns a ref (string) which refines 'ref' (string) by comparing 'text' (string), to the hebrew text stored in the Library. """ try: oref = model.Ref(ref).section_ref() except: return ref needle = strip_tags(text).strip().replace("\n", "") hay = model.TextChunk(oref, lang="he").text start, end = None, None for n in range(len(hay)): if not isinstance(hay[n], basestring): # TODO handle this case # happens with spanning ref like "Shabbat 3a-3b" return ref if needle in hay[n]: start, end = n + 1, n + 1 break if not start and string_overlap(hay[n], needle): start = n + 1 elif string_overlap(needle, hay[n]): end = n + 1 break if start and end: if start == end: refined = "%s:%d" % (oref.normal(), start) else: refined = "%s:%d-%d" % (oref.normal(), start, end) ref = refined return ref
def count_sources(sources, sheet_id): global refs, texts, categories global sources_count, comments_count, outside_count, untrans_count global untrans_texts, untrans_categories, untrans_refs global fragments, fragments_count for s in sources: if "ref" in s and s["ref"] is not None: sources_count += 1 try: oref = model.Ref(s["ref"]).padded_ref() except InputError: continue refs[s["ref"]] += 1 texts[oref.book] += 1 categories[oref.index.categories[0]] += 1 if not model.Ref(s["ref"]).is_text_translated(): untrans_categories[oref.index.categories[0]] += 1 untrans_texts[oref.book] += 1 untrans_refs[s["ref"]] += 1 untrans_count += 1 en = strip_tags(s.get("text", {}).get("en", "")) if len(en) > 25: fragments[s["ref"]].append(sheet_id) fragments_count += 1 if "subsources" in s: count_sources(s["subsources"], sheet_id) elif "comment" in s: comments_count += 1 elif "outsideText" in s or "outsideBiText" in s: outside_count += 1
def count_sources(sources, sheet_id): global refs, texts, categories global sources_count, comments_count, outside_count, untrans_count global untrans_texts, untrans_categories, untrans_refs global fragments, fragments_count for s in sources: if "ref" in s and s["ref"] is not None: sources_count += 1 try: oref = Ref(s["ref"]).padded_ref() except InputError: continue refs[s["ref"]] += 1 texts[oref.book] += 1 categories[oref.index.categories[0]] += 1 if not Ref(s["ref"]).is_text_translated(): untrans_categories[oref.index.categories[0]] += 1 untrans_texts[oref.book] += 1 untrans_refs[s["ref"]] += 1 untrans_count += 1 en = strip_tags(s.get("text", {}).get("en", "")) if len(en) > 25: fragments[s["ref"]].append(sheet_id) fragments_count += 1 if "subsources" in s: count_sources(s["subsources"], sheet_id) elif "comment" in s: comments_count += 1 elif "outsideText" in s or "outsideBiText" in s: outside_count += 1
def get_sheets_for_ref(tref, uid=None): """ Returns a list of sheets that include ref, formating as need for the Client Sidebar. If `uid` is present return user sheets, otherwise return public sheets. """ oref = model.Ref(tref) # perform initial search with context to catch ranges that include a segment ref regex_list = oref.context_ref().regex(as_list=True) ref_clauses = [{"includedRefs": {"$regex": r}} for r in regex_list] query = {"$or": ref_clauses} if uid: query["owner"] = uid else: query["status"] = "public" sheetsObj = db.sheets.find( query, { "id": 1, "title": 1, "owner": 1, "viaOwner": 1, "via": 1, "dateCreated": 1, "includedRefs": 1, "views": 1, "tags": 1, "status": 1, "summary": 1, "attribution": 1, "assigner_id": 1, "likes": 1, "group": 1, "options": 1 }).sort([["views", -1]]) sheets = list((s for s in sheetsObj)) user_ids = list(set([s["owner"] for s in sheets])) django_user_profiles = User.objects.filter(id__in=user_ids).values( 'email', 'first_name', 'last_name', 'id') user_profiles = {item['id']: item for item in django_user_profiles} mongo_user_profiles = list( db.profiles.find({"id": { "$in": user_ids }}, { "id": 1, "slug": 1 })) mongo_user_profiles = {item['id']: item for item in mongo_user_profiles} for profile in user_profiles: user_profiles[profile]["slug"] = mongo_user_profiles[profile]["slug"] ref_re = "(" + '|'.join(regex_list) + ")" results = [] for sheet in sheets: potential_matches = [ r for r in sheet["includedRefs"] if r.startswith(oref.index.title) ] matched_refs = [r for r in potential_matches if regex.match(ref_re, r)] for match in matched_refs: try: match = model.Ref(match) except InputError: continue ownerData = user_profiles.get( sheet["owner"], { 'first_name': u'Ploni', 'last_name': u'Almoni', 'email': u'*****@*****.**', 'slug': 'Ploni-Almoni', 'id': None }) default_image = "https://www.sefaria.org/static/img/profile-default.png" gravatar_base = "https://www.gravatar.com/avatar/" + hashlib.md5( ownerData["email"].lower()).hexdigest() + "?" gravatar_url_small = gravatar_base + urllib.urlencode({ 'd': default_image, 's': str(80) }) if "assigner_id" in sheet: asignerData = public_user_data(sheet["assigner_id"]) sheet["assignerName"] = asignerData["name"] sheet["assignerProfileUrl"] = asignerData["profileUrl"] if "viaOwner" in sheet: viaOwnerData = public_user_data(sheet["viaOwner"]) sheet["viaOwnerName"] = viaOwnerData["name"] sheet["viaOwnerProfileUrl"] = viaOwnerData["profileUrl"] if "group" in sheet: group = Group().load({"name": sheet["group"]}) try: sheet["groupLogo"] = group.imageUrl except: sheet["groupLogo"] = None sheet_data = { "owner": sheet["owner"], "_id": str(sheet["_id"]), "id": str(sheet["id"]), "anchorRef": match.normal(), "anchorVerse": match.sections[-1] if len(match.sections) else 1, "public": sheet["status"] == "public", "title": strip_tags(sheet["title"]), "sheetUrl": "/sheets/" + str(sheet["id"]), "options": sheet["options"], "naturalDateCreated": naturaltime( datetime.strptime(sheet["dateCreated"], "%Y-%m-%dT%H:%M:%S.%f")), "groupLogo": sheet.get("groupLogo", None), "ownerName": ownerData["first_name"] + " " + ownerData["last_name"], "via": sheet.get("via", None), "viaOwnerName": sheet.get("viaOwnerName", None), "assignerName": sheet.get("assignerName", None), "viaOwnerProfileUrl": sheet.get("viaOwnerProfileUrl", None), "assignerProfileUrl": sheet.get("assignerProfileUrl", None), "ownerProfileUrl": "/profile/" + ownerData["slug"], "ownerImageUrl": gravatar_url_small, "status": sheet["status"], "views": sheet["views"], "tags": sheet.get("tags", []), "likes": sheet.get("likes", []), "summary": sheet.get("summary", None), "attribution": sheet.get("attribution", None), "category": "Sheets", # ditto "type": "sheet", # ditto } results.append(sheet_data) return results
def untagged_sheets(request): html = "" page = int(request.GET.get("page", 0)) page_size = 100 sheets = db.sheets.find({"status": "public", "tags": []}, {"id": 1, "title": 1}).limit(page_size).skip(page_size*page) for sheet in sheets: html += "<li><a href='/sheets/%d' target='_blank'>%s</a></li>" % (sheet["id"], strip_tags(sheet["title"])) html += u"<br><a href='/admin/untagged-sheets?page=%d'>More ›</a>" % (page + 1) return HttpResponse("<html><h1>Untagged Public Sheets</h1><ul>" + html + "</ul></html>")
def get_sheets_for_ref(tref, pad=True, context=1): """ Returns a list of sheets that include ref, formating as need for the Client Sidebar. """ oref = model.Ref(tref) if pad: oref = oref.padded_ref() if context: oref = oref.context_ref(context) ref_re = oref.regex() results = [] regex_list = oref.regex(as_list=True) ref_clauses = [{"included_refs": {"$regex": r}} for r in regex_list] sheets = db.sheets.find({"$or": ref_clauses, "status": "public"}, {"id": 1, "title": 1, "owner": 1, "included_refs": 1}) for sheet in sheets: # Check for multiple matching refs within this sheet matched_refs = [r for r in sheet["included_refs"] if regex.match(ref_re, r)] for match in matched_refs: try: match = model.Ref(match) except InputError: continue com = {} com["category"] = "Sheets" com["type"] = "sheet" com["owner"] = sheet["owner"] com["_id"] = str(sheet["_id"]) com["anchorRef"] = match.normal() com["anchorVerse"] = match.sections[-1] com["public"] = True com["commentator"] = user_link(sheet["owner"]) com["text"] = "<a class='sheetLink' href='/sheets/%d'>%s</a>" % (sheet["id"], strip_tags(sheet["title"])) results.append(com) return results
def get_sheets_for_ref(tref, uid=None, in_collection=None): """ Returns a list of sheets that include ref, formating as need for the Client Sidebar. If `uid` is present return user sheets, otherwise return public sheets. If `in_collection` (list of slugs) is present, only return sheets in one of the listed collections. """ oref = model.Ref(tref) # perform initial search with context to catch ranges that include a segment ref segment_refs = [r.normal() for r in oref.all_segment_refs()] query = {"expandedRefs": {"$in": segment_refs}} if uid: query["owner"] = uid else: query["status"] = "public" if in_collection: collections = CollectionSet({"slug": {"$in": in_collection}}) sheets_list = [collection.sheets for collection in collections] sheets_ids = [sheet for sublist in sheets_list for sheet in sublist] query["id"] = {"$in": sheets_ids} sheetsObj = db.sheets.find( query, { "id": 1, "title": 1, "owner": 1, "viaOwner": 1, "via": 1, "dateCreated": 1, "includedRefs": 1, "expandedRefs": 1, "views": 1, "topics": 1, "status": 1, "summary": 1, "attribution": 1, "assigner_id": 1, "likes": 1, "displayedCollection": 1, "options": 1 }).sort([["views", -1]]) sheetsObj.hint("expandedRefs_1") sheets = [s for s in sheetsObj] user_ids = list({s["owner"] for s in sheets}) django_user_profiles = User.objects.filter(id__in=user_ids).values( 'email', 'first_name', 'last_name', 'id') user_profiles = {item['id']: item for item in django_user_profiles} mongo_user_profiles = list( db.profiles.find({"id": { "$in": user_ids }}, { "id": 1, "slug": 1, "profile_pic_url_small": 1 })) mongo_user_profiles = {item['id']: item for item in mongo_user_profiles} for profile in user_profiles: try: user_profiles[profile]["slug"] = mongo_user_profiles[profile][ "slug"] except: user_profiles[profile]["slug"] = "/" try: user_profiles[profile][ "profile_pic_url_small"] = mongo_user_profiles[profile].get( "profile_pic_url_small", '') except: user_profiles[profile]["profile_pic_url_small"] = "" results = [] for sheet in sheets: anchor_ref_list, anchor_ref_expanded_list = oref.get_all_anchor_refs( segment_refs, sheet.get("includedRefs", []), sheet.get("expandedRefs", [])) ownerData = user_profiles.get( sheet["owner"], { 'first_name': 'Ploni', 'last_name': 'Almoni', 'email': '*****@*****.**', 'slug': 'Ploni-Almoni', 'id': None, 'profile_pic_url_small': '' }) if "assigner_id" in sheet: asignerData = public_user_data(sheet["assigner_id"]) sheet["assignerName"] = asignerData["name"] sheet["assignerProfileUrl"] = asignerData["profileUrl"] if "viaOwner" in sheet: viaOwnerData = public_user_data(sheet["viaOwner"]) sheet["viaOwnerName"] = viaOwnerData["name"] sheet["viaOwnerProfileUrl"] = viaOwnerData["profileUrl"] if "displayedCollection" in sheet: collection = Collection().load( {"slug": sheet["displayedCollection"]}) sheet["collectionTOC"] = getattr(collection, "toc", None) topics = add_langs_to_topics(sheet.get("topics", [])) for anchor_ref, anchor_ref_expanded in zip(anchor_ref_list, anchor_ref_expanded_list): sheet_data = { "owner": sheet["owner"], "_id": str(sheet["_id"]), "id": str(sheet["id"]), "public": sheet["status"] == "public", "title": strip_tags(sheet["title"]), "sheetUrl": "/sheets/" + str(sheet["id"]), "anchorRef": anchor_ref.normal(), "anchorRefExpanded": [r.normal() for r in anchor_ref_expanded], "options": sheet["options"], "collectionTOC": sheet.get("collectionTOC", None), "ownerName": ownerData["first_name"] + " " + ownerData["last_name"], "via": sheet.get("via", None), "viaOwnerName": sheet.get("viaOwnerName", None), "assignerName": sheet.get("assignerName", None), "viaOwnerProfileUrl": sheet.get("viaOwnerProfileUrl", None), "assignerProfileUrl": sheet.get("assignerProfileUrl", None), "ownerProfileUrl": "/profile/" + ownerData["slug"], "ownerImageUrl": ownerData.get('profile_pic_url_small', ''), "status": sheet["status"], "views": sheet["views"], "topics": topics, "likes": sheet.get("likes", []), "summary": sheet.get("summary", None), "attribution": sheet.get("attribution", None), "is_featured": sheet.get("is_featured", False), "category": "Sheets", # ditto "type": "sheet", # ditto } results.append(sheet_data) return results
def get_sheets_for_ref(tref, uid=None, in_group=None): """ Returns a list of sheets that include ref, formating as need for the Client Sidebar. If `uid` is present return user sheets, otherwise return public sheets. If `in_group` (list) is present, only return sheets in one of the listed groups. """ oref = model.Ref(tref) # perform initial search with context to catch ranges that include a segment ref segment_refs = [r.normal() for r in oref.all_segment_refs()] query = {"expandedRefs": {"$in": segment_refs}} if uid: query["owner"] = uid else: query["status"] = "public" if in_group: query["group"] = {"$in": in_group} sheetsObj = db.sheets.find( query, { "id": 1, "title": 1, "owner": 1, "viaOwner": 1, "via": 1, "dateCreated": 1, "includedRefs": 1, "expandedRefs": 1, "views": 1, "topics": 1, "status": 1, "summary": 1, "attribution": 1, "assigner_id": 1, "likes": 1, "group": 1, "options": 1 }).sort([["views", -1]]) sheetsObj.hint("expandedRefs_1") sheets = [s for s in sheetsObj] user_ids = list({s["owner"] for s in sheets}) django_user_profiles = User.objects.filter(id__in=user_ids).values( 'email', 'first_name', 'last_name', 'id') user_profiles = {item['id']: item for item in django_user_profiles} mongo_user_profiles = list( db.profiles.find({"id": { "$in": user_ids }}, { "id": 1, "slug": 1, "profile_pic_url_small": 1 })) mongo_user_profiles = {item['id']: item for item in mongo_user_profiles} for profile in user_profiles: try: user_profiles[profile]["slug"] = mongo_user_profiles[profile][ "slug"] except: user_profiles[profile]["slug"] = "/" try: user_profiles[profile][ "profile_pic_url_small"] = mongo_user_profiles[profile].get( "profile_pic_url_small", '') except: user_profiles[profile]["profile_pic_url_small"] = "" results = [] for sheet in sheets: anchor_ref_list, anchor_ref_expanded_list = oref.get_all_anchor_refs( segment_refs, sheet.get("includedRefs", []), sheet.get("expandedRefs", [])) ownerData = user_profiles.get( sheet["owner"], { 'first_name': 'Ploni', 'last_name': 'Almoni', 'email': '*****@*****.**', 'slug': 'Ploni-Almoni', 'id': None, 'profile_pic_url_small': '' }) if len(ownerData.get('profile_pic_url_small', '')) == 0: default_image = "https://www.sefaria.org/static/img/profile-default.png" gravatar_base = "https://www.gravatar.com/avatar/" + hashlib.md5( ownerData["email"].lower().encode('utf8')).hexdigest() + "?" gravatar_url_small = gravatar_base + urllib.parse.urlencode( { 'd': default_image, 's': str(80) }) ownerData['profile_pic_url_small'] = gravatar_url_small if "assigner_id" in sheet: asignerData = public_user_data(sheet["assigner_id"]) sheet["assignerName"] = asignerData["name"] sheet["assignerProfileUrl"] = asignerData["profileUrl"] if "viaOwner" in sheet: viaOwnerData = public_user_data(sheet["viaOwner"]) sheet["viaOwnerName"] = viaOwnerData["name"] sheet["viaOwnerProfileUrl"] = viaOwnerData["profileUrl"] if "group" in sheet: group = Group().load({"name": sheet["group"]}) sheet["groupLogo"] = getattr(group, "imageUrl", None) sheet["groupTOC"] = getattr(group, "toc", None) natural_date_created = naturaltime( datetime.strptime(sheet["dateCreated"], "%Y-%m-%dT%H:%M:%S.%f")) topics = add_langs_to_topics(sheet.get("topics", [])) for anchor_ref, anchor_ref_expanded in zip(anchor_ref_list, anchor_ref_expanded_list): sheet_data = { "owner": sheet["owner"], "_id": str(sheet["_id"]), "id": str(sheet["id"]), "public": sheet["status"] == "public", "title": strip_tags(sheet["title"]), "sheetUrl": "/sheets/" + str(sheet["id"]), "anchorRef": anchor_ref.normal(), "anchorRefExpanded": [r.normal() for r in anchor_ref_expanded], "options": sheet["options"], "naturalDateCreated": natural_date_created, "group": sheet.get("group", None), "groupLogo": sheet.get("groupLogo", None), "groupTOC": sheet.get("groupTOC", None), "ownerName": ownerData["first_name"] + " " + ownerData["last_name"], "via": sheet.get("via", None), "viaOwnerName": sheet.get("viaOwnerName", None), "assignerName": sheet.get("assignerName", None), "viaOwnerProfileUrl": sheet.get("viaOwnerProfileUrl", None), "assignerProfileUrl": sheet.get("assignerProfileUrl", None), "ownerProfileUrl": "/profile/" + ownerData["slug"], "ownerImageUrl": ownerData.get('profile_pic_url_small', ''), "status": sheet["status"], "views": sheet["views"], "topics": topics, "likes": sheet.get("likes", []), "summary": sheet.get("summary", None), "attribution": sheet.get("attribution", None), "is_featured": sheet.get("is_featured", False), "category": "Sheets", # ditto "type": "sheet", # ditto } results.append(sheet_data) return results
def get_sheets_for_ref(tref, uid=None): """ Returns a list of sheets that include ref, formating as need for the Client Sidebar. If `uid` is present return user sheets, otherwise return public sheets. """ oref = model.Ref(tref) # perform initial search with context to catch ranges that include a segment ref regex_list = oref.context_ref().regex(as_list=True) ref_clauses = [{"includedRefs": {"$regex": r}} for r in regex_list] query = {"$or": ref_clauses } if uid: query["owner"] = uid else: query["status"] = "public" sheetsObj = db.sheets.find(query, {"id": 1, "title": 1, "owner": 1, "viaOwner":1, "via":1, "dateCreated": 1, "includedRefs": 1, "views": 1, "tags": 1, "status": 1, "summary":1, "attribution":1, "assigner_id":1, "likes":1, "options":1}).sort([["views", -1]]) sheets = list((s for s in sheetsObj)) user_ids = list(set([s["owner"] for s in sheets])) django_user_profiles = User.objects.filter(id__in=user_ids).values('email','first_name','last_name','id') user_profiles = {item['id']: item for item in django_user_profiles} mongo_user_profiles = list(db.profiles.find({"id": {"$in": user_ids}},{"id":1,"slug":1})) mongo_user_profiles = {item['id']: item for item in mongo_user_profiles} for profile in user_profiles: user_profiles[profile]["slug"] = mongo_user_profiles[profile]["slug"] ref_re = "("+'|'.join(regex_list)+")" results = [] for sheet in sheets: potential_matches = [r for r in sheet["includedRefs"] if r.startswith(oref.index.title)] matched_refs = [r for r in potential_matches if regex.match(ref_re, r)] for match in matched_refs: try: match = model.Ref(match) except InputError: continue ownerData = user_profiles.get(sheet["owner"], {'first_name': u'Ploni', 'last_name': u'Almoni', 'email': u'*****@*****.**', 'slug': 'Ploni-Almoni', 'id': None}) default_image = "https://www.sefaria.org/static/img/profile-default.png" gravatar_base = "https://www.gravatar.com/avatar/" + hashlib.md5(ownerData["email"].lower()).hexdigest() + "?" gravatar_url_small = gravatar_base + urllib.urlencode({'d': default_image, 's': str(80)}) if "assigner_id" in sheet: asignerData = public_user_data(sheet["assigner_id"]) sheet["assignerName"] = asignerData["name"] sheet["assignerProfileUrl"] = asignerData["profileUrl"] if "viaOwner" in sheet: viaOwnerData = public_user_data(sheet["viaOwner"]) sheet["viaOwnerName"] = viaOwnerData["name"] sheet["viaOwnerProfileUrl"] = viaOwnerData["profileUrl"] sheet_data = { "owner": sheet["owner"], "_id": str(sheet["_id"]), "id": str(sheet["id"]), "anchorRef": match.normal(), "anchorVerse": match.sections[-1] if len(match.sections) else 1, "public": sheet["status"] == "public", "title": strip_tags(sheet["title"]), "sheetUrl": "/sheets/" + str(sheet["id"]), "options": sheet["options"], "naturalDateCreated": naturaltime(datetime.strptime(sheet["dateCreated"], "%Y-%m-%dT%H:%M:%S.%f")), "ownerName": ownerData["first_name"]+" "+ownerData["last_name"], "via": sheet.get("via", None), "viaOwnerName": sheet.get("viaOwnerName", None), "assignerName": sheet.get("assignerName", None), "viaOwnerProfileUrl": sheet.get("viaOwnerProfileUrl", None), "assignerProfileUrl": sheet.get("assignerProfileUrl", None), "ownerProfileUrl": "/profile/" + ownerData["slug"], "ownerImageUrl": gravatar_url_small, "status": sheet["status"], "views": sheet["views"], "tags": sheet.get("tags", []), "likes": sheet.get("likes", []), "summary": sheet.get("summary", None), "attribution": sheet.get("attribution", None), "category": "Sheets", # ditto "type": "sheet", # ditto } results.append(sheet_data) return results
"profile_url", ]) for row in reader: email = row[8] profile = UserProfile(email=email) if not profile.date_joined: writer.writerow(row + ["false", 0, 0, "", "", "", ""]) continue has_account = "true" sheets_count = db.sheets.find({"owner": profile.id}).count() public_sheets_count = db.sheets.find({"owner": profile.id, "status": "public"}).count() sheet_views = sum([sheet["views"] for sheet in sheet_list(query={"owner": profile.id})]) bio = strip_tags(profile.bio) organization = profile.organization position = profile.position jewish_education = ". ".join(profile.jewish_education) profile_url = "https://www.sefaria.org/profile/%s" % profile.slug new_row = row + [ has_account, sheets_count, public_sheets_count, sheet_views, bio, organization, position, jewish_education, profile_url,
def annotate_sheet(n, sheet_id): sheet_data = get_sheet_metadata(id=sheet_id) n["content"]["sheet_title"] = strip_tags(sheet_data["title"], remove_new_lines=True) n["content"]["summary"] = sheet_data["summary"]
def get_sheets_for_ref(tref, pad=True, context=1): """ Returns a list of sheets that include ref, formating as need for the Client Sidebar. """ #tref = norm_ref(tref, pad=pad, context=context) #ref_re = make_ref_re(tref) oref = model.Ref(tref) if pad: oref = oref.padded_ref() if context: oref = oref.context_ref(context) ref_re = oref.regex() results = [] sheets = db.sheets.find({"included_refs": {"$regex": ref_re}, "status": {"$in": LISTED_SHEETS}}, {"id": 1, "title": 1, "owner": 1, "included_refs": 1}) for sheet in sheets: # Check for multiple matching refs within this sheet matched_orefs = [model.Ref(r) for r in sheet["included_refs"] if regex.match(ref_re, r)] for match in matched_orefs: com = {} com["category"] = "Sheets" com["type"] = "sheet" com["owner"] = sheet["owner"] com["_id"] = str(sheet["_id"]) com["anchorRef"] = match.normal() com["anchorVerse"] = match.sections[-1] com["public"] = True com["commentator"] = user_link(sheet["owner"]) com["text"] = "<a class='sheetLink' href='/sheets/%d'>%s</a>" % (sheet["id"], strip_tags(sheet["title"])) results.append(com) return results
def get_sheets_for_ref(tref, uid=None): """ Returns a list of sheets that include ref, formating as need for the Client Sidebar. If `uid` is present return user sheets, otherwise return public sheets. """ oref = model.Ref(tref) # perform initial search with context to catch ranges that include a segment ref regex_list = oref.context_ref().regex(as_list=True) ref_clauses = [{"sources.ref": {"$regex": r}} for r in regex_list] query = {"$or": ref_clauses} if uid: query["owner"] = uid else: query["status"] = "public" sheets = db.sheets.find( query, { "id": 1, "title": 1, "owner": 1, "sources.ref": 1, "views": 1, "tags": 1, "status": 1 }).sort([["views", -1]]) results = [] for sheet in sheets: matched_refs = [] for source in sheet.get("sources", []): if "ref" in source: matched_refs.append(source["ref"]) for match in matched_refs: try: match = model.Ref(match) if not oref.overlaps(match): continue except InputError: continue ownerData = public_user_data(sheet["owner"]) sheet_data = { "owner": sheet["owner"], "_id": str(sheet["_id"]), "anchorRef": match.normal(), "anchorVerse": match.sections[-1] if len(match.sections) else 1, "public": sheet["status"] == "public", "text": "<a class='sheetLink' href='/sheets/%d'>%s</a>" % (sheet["id"], strip_tags( sheet["title"])), # legacy, used in S1 "title": strip_tags(sheet["title"]), "sheetUrl": "/sheets/" + str(sheet["id"]), "ownerName": ownerData["name"], "ownerProfileUrl": ownerData["profileUrl"], "ownerImageUrl": ownerData["imageUrl"], "status": sheet["status"], "views": sheet["views"], "tags": sheet.get("tags", []), "commentator": user_link(sheet["owner"]), # legacy, used in S1 "category": "Sheets", # ditto "type": "sheet", # ditto } results.append(sheet_data) return results