Example #1
0
def sheet_to_dict(sheet):
    """
	Returns a JSON serializable dictionary of Mongo document `sheet`.
	Annotates sheet with user profile info that is useful to client.
	"""
    profile = public_user_data(sheet["owner"])
    sheet_dict = {
        "id":
        sheet["id"],
        "title":
        strip_tags(sheet["title"]) if "title" in sheet else "Untitled Sheet",
        "status":
        sheet["status"],
        "author":
        sheet["owner"],
        "ownerName":
        profile["name"],
        "ownerImageUrl":
        profile["imageUrl"],
        "views":
        sheet["views"],
        "modified":
        dateutil.parser.parse(sheet["dateModified"]).strftime("%m/%d/%Y"),
        "tags":
        sheet["tags"] if "tags" in sheet else [],
    }
    return sheet_dict
Example #2
0
def index_sheet(index_name, id):
    """
    Index source sheet with 'id'.
    """

    sheet = db.sheets.find_one({"id": id})
    if not sheet: return False

    pud = public_user_data(sheet["owner"])
    doc = {
        "title": strip_tags(sheet["title"]),
        "content": make_sheet_text(sheet, pud),
        "owner_id": sheet["owner"],
        "owner_name": pud["name"],
        "owner_image": pud["imageUrl"],
        "profile_url": pud["profileUrl"],
        "version": "Source Sheet by " + user_link(sheet["owner"]),
        "tags": ",".join(sheet.get("tags", [])),
        "sheetId": id,
    }
    try:
        es.index(index_name, 'sheet', doc, id)
        global doc_count
        doc_count += 1
        return True
    except Exception, e:
        print "Error indexing sheet %d" % id
        print e
        return False
Example #3
0
def sheet_to_dict(sheet):
    """
	Returns a JSON serializable dictionary of Mongo document `sheet`.
	Annotates sheet with user profile info that is useful to client.
	"""
    profile = public_user_data(sheet["owner"])
    sheet_dict = {
        "id":
        sheet["id"],
        "title":
        strip_tags(sheet["title"]) if "title" in sheet else "Untitled Sheet",
        "status":
        sheet["status"],
        "author":
        sheet["owner"],
        "ownerName":
        profile["name"],
        "ownerImageUrl":
        profile["imageUrl"],
        "views":
        sheet["views"],
        "group":
        sheet.get("group", None),
        "modified":
        dateutil.parser.parse(sheet["dateModified"]).strftime("%m/%d/%Y"),
        "created":
        sheet.get("dateCreated", None),
        "topics":
        add_langs_to_topics(sheet.get("topics", [])),
        "tags": [t['asTyped'] for t in sheet.get("topics", [])
                 ],  # for backwards compatibility with mobile
        "options":
        sheet["options"] if "options" in sheet else [],
    }
    return sheet_dict
Example #4
0
def word_frequency_for_text(title, lang="en"):
    """
    Returns an ordered list of word/count tuples for occurences of words inside the 
    text `title`.
    """
    import string
    from collections import defaultdict
    from sefaria.export import make_text, prepare_merged_text_for_export
    from sefaria.utils.util import strip_tags
    text = make_text(prepare_merged_text_for_export(title, lang=lang))

    text = strip_tags(text)
    text = text.lower()
    text = re.sub(r'[^a-z ]', " ", text)
    text = re.sub(r' +', " ", text)
    text = text.translate(str.maketrans(dict.fromkeys(string.punctuation)))

    count = defaultdict(int)
    words = text.split(" ")
    for word in words:
        count[word] += 1

    counts = sorted(iter(count.items()), key=lambda x: -x[1])

    return counts
def index_sheet(index_name, id):
    """
    Index source sheet with 'id'.
    """

    sheet = db.sheets.find_one({"id": id})
    if not sheet: return False

    pud = public_user_data(sheet["owner"])
    try:
        doc = {
            "title": strip_tags(sheet["title"]),
            "content": make_sheet_text(sheet, pud),
            "owner_id": sheet["owner"],
            "owner_name": pud["name"],
            "owner_image": pud["imageUrl"],
            "profile_url": pud["profileUrl"],
            "version": "Source Sheet by " + user_link(sheet["owner"]),
            "tags": sheet.get("tags", []),
            "sheetId": id,
            "summary": sheet.get("summary", None),
            "group": sheet.get("group", None),
            "datePublished": sheet.get("datePublished", None),
            "dateCreated": sheet.get("dateCreated", None),
            "dateModified": sheet.get("dateCreated", None),
            "views": sheet.get("views", 0)
        }
        es_client.create(index=index_name, doc_type='sheet', id=id, body=doc)
        global doc_count
        doc_count += 1
        return True
    except Exception, e:
        print "Error indexing sheet %d" % id
        print e
        return False
Example #6
0
def count_sources(sources, sheet_id):
	global refs, texts, categories
	global sources_count, comments_count, outside_count, untrans_count
	global untrans_texts, untrans_categories, untrans_refs
	global fragments, fragments_count

	for s in sources:
		if "ref" in s:
			sources_count += 1
			pRef = parse_ref(s["ref"])
			if "error" in pRef:
				continue
			refs[s["ref"]] += 1
			texts[pRef["book"]] += 1
			categories[pRef["categories"][0]] += 1

			if not is_ref_translated(s["ref"]):
				untrans_categories[pRef["categories"][0]] +=1 
				untrans_texts[pRef["book"]] += 1
				untrans_refs[s["ref"]] += 1
				untrans_count += 1

				en = strip_tags(s.get("text", {}).get("en", ""))
				if len(en) > 25:
					fragments[s["ref"]].append(sheet_id)
					fragments_count += 1

			if "subsources" in s:
				count_sources(s["subsources"], sheet_id)
		
		elif "comment" in s:
			comments_count += 1
		
		elif "outsideText" in s or "outsideBiText" in s:
			outside_count += 1
	def is_hebrew(self):
		"""Returns True if this sheet appears to be in Hebrew according to its title"""
		from sefaria.utils.hebrew import is_hebrew
		import regex
		title = strip_tags(self.title)
		# Consider a sheet Hebrew if its title contains Hebrew character but no English characters
		return is_hebrew(title) and not regex.search(u"[a-z|A-Z]", title)
Example #8
0
	def count_sources(self, sources, tags, sheet_id):
		for s in sources:
			try:
				if "ref" in s and s["ref"] is not None:
					self.sources_count += 1
					oref = Ref(s["ref"]).padded_ref()
					self.refs[oref.normal()] += 1
					self.texts[oref.book] += 1
					self.categories[oref.index.categories[0]] += 1
					self.refs_by_category[oref.index.categories[0]][oref.normal()] += 1
					for tag in tags:
						self.refs_by_tag[tag][oref.normal()] += 1

					try:
						is_translated = oref.is_text_translated()
					except:
						is_translated = False 
					if not is_translated:
						self.untrans_categories[oref.index.categories[0]] += 1
						self.untrans_texts[oref.book] += 1
						self.untrans_refs[s["ref"]] += 1
						self.untrans_count += 1

						en = strip_tags(s.get("text", {}).get("en", ""))
						if len(en) > 25:
							self.fragments[s["ref"]].append(sheet_id)
							self.fragments_count += 1
				
				elif "comment" in s:
					self.comments_count += 1
				
				elif "outsideText" in s or "outsideBiText" in s:
					self.outside_count += 1
			except:
				continue
Example #9
0
def make_tag_list(include_sheets=False):
	"""
	Returns an alphabetized list of tags and sheets included in each tag.
	"""
	tags = {}
	results = []
	projection = {"tags": 1, "title": 1, "id": 1, "views": 1} if include_sheets else {"tags": 1}

	sheet_list = db.sheets.find({"status": "public"}, projection)
	for sheet in sheet_list:
		sheet_tags = sheet.get("tags", [])
		for tag in sheet_tags:
			if tag not in tags:
				tags[tag] = {"tag": tag, "count": 0, "sheets": []}
			if include_sheets:
				tags[tag]["sheets"].append({"title": strip_tags(sheet["title"]), "id": sheet["id"], "views": sheet["views"]})
			tags[tag]["count"] += 1

	for tag in tags.values():
		tag["sheets"] = sorted(tag["sheets"], key=lambda x: -x["views"] )
		results.append(tag)

	results = sorted(results, key=lambda x: x["tag"])

	return results
Example #10
0
def export_to_drive(request, credential, sheet_id):
    """
    Export a sheet to Google Drive.
    """

    http = credential.authorize(httplib2.Http())
    service = build('drive', 'v3', http=http, cache_discovery=False)

    sheet = get_sheet(sheet_id)
    if 'error' in sheet:
        return jsonResponse({'error': {'message': sheet["error"]}})

    file_metadata = {
        'name': strip_tags(sheet['title'].strip()),
        'mimeType': 'application/vnd.google-apps.document'
    }

    html_string = bytes(sheet_to_html_string(sheet), "utf8")

    media = MediaIoBaseUpload(BytesIO(html_string),
                              mimetype='text/html',
                              resumable=True)

    new_file = service.files().create(body=file_metadata,
                                      media_body=media,
                                      fields='webViewLink').execute()

    return jsonResponse(new_file)
Example #11
0
def make_tag_list(include_sheets=False):
	"""
	Returns an alphabetized list of tags and sheets included in each tag.
	"""
	tags = {}
	results = []
	projection = {"tags": 1, "title": 1, "id": 1, "views": 1} if include_sheets else {"tags": 1}

	sheet_list = db.sheets.find({"status": "public"}, projection)
	for sheet in sheet_list:
		sheet_tags = sheet.get("tags", [])
		for tag in sheet_tags:
			if tag not in tags:
				tags[tag] = {"tag": tag, "count": 0, "sheets": []}
			if include_sheets:
				tags[tag]["sheets"].append({"title": strip_tags(sheet["title"]), "id": sheet["id"], "views": sheet["views"]})
			tags[tag]["count"] += 1

	for tag in tags.values():
		tag["sheets"] = sorted(tag["sheets"], key=lambda x: -x["views"] )
		results.append(tag)

	results = sorted(results, key=lambda x: x["tag"])

	return results
Example #12
0
 def is_hebrew(self):
     """Returns True if this sheet appears to be in Hebrew according to its title"""
     from sefaria.utils.hebrew import is_hebrew
     import regex
     title = strip_tags(self.title)
     # Consider a sheet Hebrew if its title contains Hebrew character but no English characters
     return is_hebrew(title) and not regex.search(u"[a-z|A-Z]", title)
Example #13
0
def sheet_spam_dashboard(request):

    from django.contrib.auth.models import User

    if request.method == 'POST':
        return jsonResponse({"error": "Unsupported Method: {}".format(request.method)})

    else:
        date = request.GET.get("date", None)

        if date:
            date = datetime.strptime(date, '%Y-%m-%d')

        else:
            date = request.GET.get("date", datetime.now() - timedelta(days=30))

        earliest_new_user_id = User.objects.filter(date_joined__gte=date).order_by('date_joined')[0].id

        regex = r'.*(?!href=[\'"](\/|http(s)?:\/\/(www\.)?sefaria).+[\'"])(href).*'
        sheets = db.sheets.find({"sources.ref": {"$exists": False}, "dateCreated": {"$gt": date.strftime("%Y-%m-%dT%H:%M:%S.%f")}, "owner": {"$gt": earliest_new_user_id}, "includedRefs": {"$size": 0}, "reviewed": {"$ne": True}, "$or": [{"sources.outsideText": {"$regex": regex}}, {"sources.comment": {"$regex": regex}}, {"sources.outsideBiText.en": {"$regex": regex}}, {"sources.outsideBiText.he": {"$regex": regex}}]})

        sheets_list = []

        for sheet in sheets:
            sheets_list.append({"id": sheet["id"], "title": strip_tags(sheet["title"]), "owner": user_link(sheet["owner"])})

        return render_template(request, 'spam_dashboard.html', None, {
            "title": "Potential Spam Sheets since %s" % date.strftime("%Y-%m-%d"),
            "sheets": sheets_list,
            "type": "sheet",
        })
Example #14
0
def make_sheet_list_by_tag():
    """
	Returns an alphabetized list of tags and sheets included in each tag.
	"""
    tags = {}
    results = []

    sheet_list = db.sheets.find({"status": {"$in": LISTED_SHEETS}})
    for sheet in sheet_list:
        sheet_tags = sheet.get("tags", [])
        for tag in sheet_tags:
            if tag not in tags:
                tags[tag] = {"tag": tag, "count": 0, "sheets": []}
            tags[tag]["sheets"].append({
                "title": strip_tags(sheet["title"]),
                "id": sheet["id"],
                "views": sheet["views"]
            })
            tags[tag]["count"] += 1

    for tag in tags.values():
        tag["sheets"] = sorted(tag["sheets"], key=lambda x: -x["views"])
        results.append(tag)

    results = sorted(results, key=lambda x: x["tag"])

    return results
Example #15
0
def export_to_drive(request, credential, sheet_id):
	"""
	Export a sheet to Google Drive.
	"""

	http = credential.authorize(httplib2.Http())
	service = build('drive', 'v3', http=http)

	sheet = get_sheet(sheet_id)
	if 'error' in sheet:
		return jsonResponse({'error': {'message': sheet["error"]}})

	file_metadata = {
		'name': strip_tags(sheet['title'].strip()),
		'mimeType': 'application/vnd.google-apps.document'
	}

	html_string = sheet_to_html_string(sheet)

	media = MediaIoBaseUpload(
		StringIO(html_string),
		mimetype='text/html',
		resumable=True)

	new_file = service.files().create(body=file_metadata,
									  media_body=media,
									  fields='webViewLink').execute()

	return jsonResponse(new_file)
Example #16
0
    def _sheet_metadata(sheet_id, return_id=False):
        from sefaria.sheets import get_sheet_metadata
        metadata = get_sheet_metadata(sheet_id)
        if not metadata:
            return None

        d = {
            "sheet_title":
            strip_tags(metadata["title"]),
            "sheet_summary":
            strip_tags(metadata["summary"]) if "summary" in metadata else "",
            "publisher_id":
            metadata["owner"]
        }
        if return_id:
            d["sheet_id"] = sheet_id
        return d
Example #17
0
def test():
    ss = db.sheets.find({}, sort=[["_id", -1]], limit=10000)

    for s in ss:
        lang = get_sheet_language(s)
        if lang == "some hebrew":
            print("{}\thttps://www.sefaria.org/sheets/{}".format(
                strip_tags(s["title"]).replace("\n", ""), s["id"]))
Example #18
0
def get_sheet_language(sheet):
    """
	Returns the language we believe `sheet` to be written in,
	based on the language of its title.
	"""
    title = strip_tags(sheet.get("title", "")).replace("(Copy)",
                                                       "").replace("\n", " ")
    return "hebrew" if is_hebrew(title, heb_only=True) else "english"
Example #19
0
def get_sheets_for_ref(tref, pad=True, context=1):
    """
	Returns a list of sheets that include ref,
	formating as need for the Client Sidebar.
	"""
    #tref = norm_ref(tref, pad=pad, context=context)
    #ref_re = make_ref_re(tref)

    oref = model.Ref(tref)
    if pad:
        oref = oref.padded_ref()
    if context:
        oref = oref.context_ref(context)

    ref_re = oref.regex()

    results = []
    sheets = db.sheets.find(
        {
            "included_refs": {
                "$regex": ref_re
            },
            "status": {
                "$in": LISTED_SHEETS
            }
        }, {
            "id": 1,
            "title": 1,
            "owner": 1,
            "included_refs": 1
        })
    for sheet in sheets:
        # Check for multiple matching refs within this sheet
        matched_orefs = [
            model.Ref(r) for r in sheet["included_refs"]
            if regex.match(ref_re, r)
        ]
        for match in matched_orefs:
            com = {}

            com["category"] = "Sheets"
            com["type"] = "sheet"
            com["owner"] = sheet["owner"]
            com["_id"] = str(sheet["_id"])
            com["anchorRef"] = match.normal()
            com["anchorVerse"] = match.sections[-1]
            com["public"] = True
            com["commentator"] = user_link(sheet["owner"])
            com["text"] = "<a class='sheetLink' href='/sheets/%d'>%s</a>" % (
                sheet["id"], strip_tags(sheet["title"]))

            results.append(com)

    return results
Example #20
0
def spam_dashboard(request):

    from django.contrib.auth.models import User

    if request.method == 'POST':

        spam_sheet_ids = list(map(int, request.POST.getlist("spam_sheets[]", [])))
        reviewed_sheet_ids = list(map(int, request.POST.getlist("reviewed_sheets[]", [])))

        db.sheets.update_many({"id": {"$in": reviewed_sheet_ids}}, {"$set": {"reviewed": True}})

        spammers = db.sheets.find({"id": {"$in": spam_sheet_ids}}, {"owner": 1}).distinct("owner")

        for spammer in spammers:
            try:
                spammer_account = User.objects.get(id=spammer)
                spammer_account.is_active = False
                spammer_account.save()
            except:
                continue

        db.sheets.delete_many({"id": {"$in": spam_sheet_ids}})

        return render_template(request, 'spam_dashboard.html', None, {
            "deleted_sheets": len(spam_sheet_ids),
            "sheet_ids": spam_sheet_ids,
            "reviewed_sheets": len(reviewed_sheet_ids),
            "spammers_deactivated": len(spammers)
        })

    else:
        date = request.GET.get("date", None)

        if date:
            date = datetime.strptime(date, '%Y-%m-%d')

        else:
            date = request.GET.get("date", datetime.now() - timedelta(days=30))

        earliest_new_user_id = User.objects.filter(date_joined__gte=date)[0].id

        regex = r'.*(?!href=[\'"](\/|http(s)?:\/\/(www\.)?sefaria).+[\'"])(href).*'
        sheets = db.sheets.find({"sources.ref": {"$exists": False}, "dateCreated": {"$gt": date.strftime("%Y-%m-%dT%H:%M:%S.%f")}, "owner": {"$gt": earliest_new_user_id}, "includedRefs": {"$size": 0}, "reviewed": {"$ne": True}, "$or": [{"sources.outsideText": {"$regex": regex}}, {"sources.comment": {"$regex": regex}}, {"sources.outsideBiText.en": {"$regex": regex}}, {"sources.outsideBiText.he": {"$regex": regex}}]})

        sheets_list = []

        for sheet in sheets:
            sheets_list.append({"id": sheet["id"], "title": strip_tags(sheet["title"]), "owner": user_link(sheet["owner"])})

        return render_template(request, 'spam_dashboard.html', None, {
            "title": "Potential Spam Sheets since %s" % date.strftime("%Y-%m-%d"),
            "sheets": sheets_list,
        })
Example #21
0
def get_sheets_for_ref(tref, pad=True, context=1):
	"""
	Returns a list of sheets that include ref,
	formating as need for the Client Sidebar.
	"""
	oref = model.Ref(tref)
	if pad:
		oref = oref.padded_ref()
	if context:
		oref = oref.context_ref(context)

	ref_re = oref.regex()

	results = []

	regex_list = oref.regex(as_list=True)
	ref_clauses = [{"sources.ref": {"$regex": r}} for r in regex_list]
	sheets = db.sheets.find({"$or": ref_clauses, "status": "public"},
		{"id": 1, "title": 1, "owner": 1, "sources.ref": 1, "views": 1}).sort([["views", -1]])
	for sheet in sheets:
		matched_refs = []
		if "sources" in sheet:
			for source in sheet["sources"]:
				if "ref" in source:
					matched_refs.append(source["ref"])
		matched_refs = [r for r in matched_refs if regex.match(ref_re, r)]
		for match in matched_refs:
			try:
				match = model.Ref(match)
			except InputError:
				continue
			ownerData = public_user_data(sheet["owner"])
			com = {
				"category":        "Sheets",
				"type":            "sheet",
				"owner":           sheet["owner"],
				"_id":             str(sheet["_id"]),
				"anchorRef":       match.normal(),
				"anchorVerse":     match.sections[-1] if len(match.sections) else 1,
				"public":          True,
				"commentator":     user_link(sheet["owner"]), # legacy, used in S1
				"text":            "<a class='sheetLink' href='/sheets/%d'>%s</a>" % (sheet["id"], strip_tags(sheet["title"])), # legacy, used in S1
				"title":           strip_tags(sheet["title"]),
				"sheetUrl":        "/sheets/" + str(sheet["id"]),
				"ownerName":       ownerData["name"],
				"ownerProfileUrl": ownerData["profileUrl"],
				"ownerImageUrl":   ownerData["imageUrl"],
				"views":           sheet["views"]
			}

			results.append(com)

	return results
Example #22
0
def get_sheets_for_ref(tref, pad=True, context=1):
	"""
	Returns a list of sheets that include ref,
	formating as need for the Client Sidebar.
	"""
	oref = model.Ref(tref)
	if pad:
		oref = oref.padded_ref()
	if context:
		oref = oref.context_ref(context)

	ref_re = oref.regex()

	results = []

	regex_list = oref.regex(as_list=True)
	ref_clauses = [{"sources.ref": {"$regex": r}} for r in regex_list]
	sheets = db.sheets.find({"$or": ref_clauses, "status": "public"},
		{"id": 1, "title": 1, "owner": 1, "sources.ref": 1, "views": 1}).sort([["views", -1]])
	for sheet in sheets:
		matched_refs = []
		if "sources" in sheet:
			for source in sheet["sources"]:
				if "ref" in source:
					matched_refs.append(source["ref"])
		matched_refs = [r for r in matched_refs if regex.match(ref_re, r)]
		for match in matched_refs:
			try:
				match = model.Ref(match)
			except InputError:
				continue
			ownerData = public_user_data(sheet["owner"])
			com = {
				"category":        "Sheets",
				"type":            "sheet",
				"owner":           sheet["owner"],
				"_id":             str(sheet["_id"]),
				"anchorRef":       match.normal(),
				"anchorVerse":     match.sections[-1] if len(match.sections) else 1,
				"public":          True,
				"commentator":     user_link(sheet["owner"]), # legacy, used in S1
				"text":            "<a class='sheetLink' href='/sheets/%d'>%s</a>" % (sheet["id"], strip_tags(sheet["title"])), # legacy, used in S1
				"title":           strip_tags(sheet["title"]),
				"sheetUrl":        "/sheets/" + str(sheet["id"]),
				"ownerName":       ownerData["name"],
				"ownerProfileUrl": ownerData["profileUrl"],
				"ownerImageUrl":   ownerData["imageUrl"],
				"views":           sheet["views"]
			}

			results.append(com)

	return results
Example #23
0
def get_sheets_for_ref(tref, pad=True, context=1):
    """
	Returns a list of sheets that include ref,
	formating as need for the Client Sidebar.
	"""
    oref = model.Ref(tref)
    if pad:
        oref = oref.padded_ref()
    if context:
        oref = oref.context_ref(context)

    ref_re = oref.regex()

    results = []

    regex_list = oref.regex(as_list=True)
    ref_clauses = [{"included_refs": {"$regex": r}} for r in regex_list]
    sheets = db.sheets.find({
        "$or": ref_clauses,
        "status": "public"
    }, {
        "id": 1,
        "title": 1,
        "owner": 1,
        "included_refs": 1
    })
    for sheet in sheets:
        # Check for multiple matching refs within this sheet
        matched_refs = [
            r for r in sheet["included_refs"] if regex.match(ref_re, r)
        ]
        for match in matched_refs:
            try:
                match = model.Ref(match)
            except InputError:
                continue
            com = {}
            com["category"] = "Sheets"
            com["type"] = "sheet"
            com["owner"] = sheet["owner"]
            com["_id"] = str(sheet["_id"])
            com["anchorRef"] = match.normal()
            com["anchorVerse"] = match.sections[-1] if len(
                match.sections) else 1
            com["public"] = True
            com["commentator"] = user_link(sheet["owner"])
            com["text"] = "<a class='sheetLink' href='/sheets/%d'>%s</a>" % (
                sheet["id"], strip_tags(sheet["title"]))

            results.append(com)

    return results
Example #24
0
def index_sheet(index_name, id):
    """
    Index source sheet with 'id'.
    """

    sheet = db.sheets.find_one({"id": id})
    if not sheet: return False

    pud = public_user_data(sheet["owner"])
    tag_terms_simple = make_sheet_tags(sheet)
    tags = [t["en"] for t in tag_terms_simple]
    topics = []
    for t in sheet.get('topics', []):
        topic_obj = Topic.init(t['slug'])
        if not topic_obj:
            continue
        topics += [topic_obj]
    collections = CollectionSet({"sheets": id, "listed": True})
    collection_names = [c.name for c in collections]
    try:
        doc = {
            "title": strip_tags(sheet["title"]),
            "content": make_sheet_text(sheet, pud),
            "owner_id": sheet["owner"],
            "owner_name": pud["name"],
            "owner_image": pud["imageUrl"],
            "profile_url": pud["profileUrl"],
            "version": "Source Sheet by " + user_link(sheet["owner"]),
            "tags": tags,
            "topic_slugs": [topic_obj.slug for topic_obj in topics],
            "topics_en": [topic_obj.get_primary_title('en') for topic_obj in topics],
            "topics_he": [topic_obj.get_primary_title('he') for topic_obj in topics],
            "sheetId": id,
            "summary": sheet.get("summary", None),
            "collections": collection_names,
            "datePublished": sheet.get("datePublished", None),
            "dateCreated": sheet.get("dateCreated", None),
            "dateModified": sheet.get("dateModified", None),
            "views": sheet.get("views", 0)
        }
        es_client.create(index=index_name, doc_type='sheet', id=id, body=doc)
        global doc_count
        doc_count += 1
        return True
    except Exception as e:
        print("Error indexing sheet %d" % id)
        print(e)
        return False
Example #25
0
def sheet_to_dict(sheet):
	"""
	Returns a JSON serializable dictionary of Mongo document `sheet`.
	Annotates sheet with user profile info that is useful to client.
	"""
	profile = public_user_data(sheet["owner"])
	sheet_dict = {
		"id": sheet["id"],
		"title": strip_tags(sheet["title"]) if "title" in sheet else "Untitled Sheet",
		"status": sheet["status"],
		"author": sheet["owner"],
		"ownerName": profile["name"],
		"ownerImageUrl": profile["imageUrl"],
		"views": sheet["views"],
		"modified": dateutil.parser.parse(sheet["dateModified"]).strftime("%m/%d/%Y"),
		"tags": sheet["tags"] if "tags" in sheet else [],
		"options": sheet["options"] if "options" in sheet else [],
	}
	return sheet_dict
Example #26
0
def profile_spam_dashboard(request):

    from django.contrib.auth.models import User

    if request.method == 'POST':
        return jsonResponse({"error": "Unsupported Method: {}".format(request.method)})

    else:
        date = request.GET.get("date", None)

        if date:
            date = datetime.strptime(date, '%Y-%m-%d')

        else:
            date = request.GET.get("date", datetime.now() - timedelta(days=30))

        earliest_new_user_id = User.objects.filter(date_joined__gte=date).order_by('date_joined')[0].id

        regex = r'.*(?!href=[\'"](\/|http(s)?:\/\/(www\.)?sefaria).+[\'"])(href).*'

        users_to_check = db.profiles.find(
            {'$or': [
                {'website': {"$ne": ""}, 'bio': {"$ne": ""}, "id": {"$gt": earliest_new_user_id},
                      "reviewed": {"$ne": True}},
                {'bio': {"$regex": regex}, "id": {"$gt": earliest_new_user_id}, "reviewed": {"$ne": True}}
            ]
        })



        profiles_list = []

        for user in users_to_check:
            history_count = db.user_history.find({'uid': user['id']}).count()
            if history_count < 10:
                profiles_list.append({"id": user["id"], "slug": user["slug"], "bio": strip_tags(user["bio"][0:250]), "website": user["website"][0:50]})

        return render_template(request, 'spam_dashboard.html', None, {
            "title": "Potential Spam Profiles since %s" % date.strftime("%Y-%m-%d"),
            "profiles": profiles_list,
            "type": "profile",
        })
Example #27
0
def source_text(source):
    """
    Recursive function to translate a source dictionary into text.
    """
    content = [
        source.get("customTitle", ""),
        source.get("ref", ""),
        source.get("text", {"he": ""}).get("he", ""),
        source.get("text", {"en": ""}).get("en", ""),
        source.get("comment", ""),
        source.get("outside", ""),
        ]
    content = [strip_tags(c) for c in content]
    text = " ".join(content)

    if "subsources" in source:
        for s in source["subsources"]:
            text += source_text(s)

    return text
Example #28
0
def untagged_sheets(request):
    html = ""
    page = int(request.GET.get("page", 0))
    page_size = 100
    sheets = db.sheets.find({
        "status": "public",
        "tags": []
    }, {
        "id": 1,
        "title": 1
    }).limit(page_size).skip(page_size * page)

    for sheet in sheets:
        html += "<li><a href='/sheets/%d' target='_blank'>%s</a></li>" % (
            sheet["id"], strip_tags(sheet["title"]))
    html += u"<br><a href='/admin/untagged-sheets?page=%d'>More ›</a>" % (
        page + 1)

    return HttpResponse("<html><h1>Untagged Public Sheets</h1><ul>" + html +
                        "</ul></html>")
Example #29
0
def source_text(source):
    """
    Recursive function to translate a source dictionary into text.
    """
    content = [
        source.get("customTitle", ""),
        source.get("ref", ""),
        source.get("text", {"he": ""}).get("he", ""),
        source.get("text", {"en": ""}).get("en", ""),
        source.get("comment", ""),
        source.get("outside", ""),
        ]
    content = [strip_tags(c) for c in content]
    text = " ".join(content)

    if "subsources" in source:
        for s in source["subsources"]:
            text += source_text(s)

    return text
Example #30
0
def index_sheet(index_name, id):
    """
    Index source sheet with 'id'.
    """

    sheet = db.sheets.find_one({"id": id})
    if not sheet: return False

    pud = public_user_data(sheet["owner"])
    tag_terms_simple = make_sheet_tags(sheet)
    tags = [t["en"] for t in tag_terms_simple]
    tags_he_and_en = [
        u"{}|||{}".format(t["en"], t["he"]) for t in tag_terms_simple
    ]
    try:
        doc = {
            "title": strip_tags(sheet["title"]),
            "content": make_sheet_text(sheet, pud),
            "owner_id": sheet["owner"],
            "owner_name": pud["name"],
            "owner_image": pud["imageUrl"],
            "profile_url": pud["profileUrl"],
            "version": "Source Sheet by " + user_link(sheet["owner"]),
            "tags": tags,
            "tags_he_and_en": tags_he_and_en,
            "sheetId": id,
            "summary": sheet.get("summary", None),
            "group": sheet.get("group", None),
            "datePublished": sheet.get("datePublished", None),
            "dateCreated": sheet.get("dateCreated", None),
            "dateModified": sheet.get("dateCreated", None),
            "views": sheet.get("views", 0)
        }
        es_client.create(index=index_name, doc_type='sheet', id=id, body=doc)
        global doc_count
        doc_count += 1
        return True
    except Exception, e:
        print "Error indexing sheet %d" % id
        print e
        return False
Example #31
0
def make_sheet_list_by_tag():
	"""
	Returns an alphabetized list of tags and sheets included in each tag.
	"""
	tags = {}
	results = []

	sheet_list = db.sheets.find({"status": {"$in": LISTED_SHEETS }})
	for sheet in sheet_list:
		sheet_tags = sheet.get("tags", [])
		for tag in sheet_tags:
			if tag not in tags:
				tags[tag] = {"tag": tag, "count": 0, "sheets": []}
			tags[tag]["sheets"].append({"title": strip_tags(sheet["title"]), "id": sheet["id"], "views": sheet["views"]})
			tags[tag]["count"] += 1

	for tag in tags.values():
		tag["sheets"] = sorted(tag["sheets"], key=lambda x: -x["views"] )
		results.append(tag)

	results = sorted(results, key=lambda x: x["tag"])

	return results
Example #32
0
def refine_ref_by_text(ref, text):
	"""
	Returns a ref (string) which refines 'ref' (string) by comparing 'text' (string),
	to the hebrew text stored in the Library.
	"""
	try:
		oref   = model.Ref(ref).section_ref()
	except:
		return ref
	needle = strip_tags(text).strip().replace("\n", "")
	hay    = model.TextChunk(oref, lang="he").text

	start, end = None, None
	for n in range(len(hay)):
		if not isinstance(hay[n], basestring):
			# TODO handle this case
			# happens with spanning ref like "Shabbat 3a-3b"
			return ref

		if needle in hay[n]:
			start, end = n+1, n+1
			break

		if not start and string_overlap(hay[n], needle):
			start = n+1
		elif string_overlap(needle, hay[n]):
			end = n+1
			break

	if start and end:
		if start == end:
			refined = "%s:%d" % (oref.normal(), start)
		else:
			refined = "%s:%d-%d" % (oref.normal(), start, end)
		ref = refined

	return ref
Example #33
0
def refine_ref_by_text(ref, text):
    """
	Returns a ref (string) which refines 'ref' (string) by comparing 'text' (string),
	to the hebrew text stored in the Library.
	"""
    try:
        oref = model.Ref(ref).section_ref()
    except:
        return ref
    needle = strip_tags(text).strip().replace("\n", "")
    hay = model.TextChunk(oref, lang="he").text

    start, end = None, None
    for n in range(len(hay)):
        if not isinstance(hay[n], basestring):
            # TODO handle this case
            # happens with spanning ref like "Shabbat 3a-3b"
            return ref

        if needle in hay[n]:
            start, end = n + 1, n + 1
            break

        if not start and string_overlap(hay[n], needle):
            start = n + 1
        elif string_overlap(needle, hay[n]):
            end = n + 1
            break

    if start and end:
        if start == end:
            refined = "%s:%d" % (oref.normal(), start)
        else:
            refined = "%s:%d-%d" % (oref.normal(), start, end)
        ref = refined

    return ref
Example #34
0
def count_sources(sources, sheet_id):
    global refs, texts, categories
    global sources_count, comments_count, outside_count, untrans_count
    global untrans_texts, untrans_categories, untrans_refs
    global fragments, fragments_count

    for s in sources:
        if "ref" in s and s["ref"] is not None:
            sources_count += 1
            try:
                oref = model.Ref(s["ref"]).padded_ref()
            except InputError:
                continue
            refs[s["ref"]] += 1
            texts[oref.book] += 1
            categories[oref.index.categories[0]] += 1

            if not model.Ref(s["ref"]).is_text_translated():
                untrans_categories[oref.index.categories[0]] += 1
                untrans_texts[oref.book] += 1
                untrans_refs[s["ref"]] += 1
                untrans_count += 1

                en = strip_tags(s.get("text", {}).get("en", ""))
                if len(en) > 25:
                    fragments[s["ref"]].append(sheet_id)
                    fragments_count += 1

            if "subsources" in s:
                count_sources(s["subsources"], sheet_id)

        elif "comment" in s:
            comments_count += 1

        elif "outsideText" in s or "outsideBiText" in s:
            outside_count += 1
def count_sources(sources, sheet_id):
	global refs, texts, categories
	global sources_count, comments_count, outside_count, untrans_count
	global untrans_texts, untrans_categories, untrans_refs
	global fragments, fragments_count

	for s in sources:
		if "ref" in s and s["ref"] is not None:
			sources_count += 1
			try:
				oref = Ref(s["ref"]).padded_ref()
			except InputError:
				continue
			refs[s["ref"]] += 1
			texts[oref.book] += 1
			categories[oref.index.categories[0]] += 1

			if not Ref(s["ref"]).is_text_translated():
				untrans_categories[oref.index.categories[0]] += 1
				untrans_texts[oref.book] += 1
				untrans_refs[s["ref"]] += 1
				untrans_count += 1

				en = strip_tags(s.get("text", {}).get("en", ""))
				if len(en) > 25:
					fragments[s["ref"]].append(sheet_id)
					fragments_count += 1

			if "subsources" in s:
				count_sources(s["subsources"], sheet_id)
		
		elif "comment" in s:
			comments_count += 1
		
		elif "outsideText" in s or "outsideBiText" in s:
			outside_count += 1
Example #36
0
def get_sheets_for_ref(tref, uid=None):
    """
	Returns a list of sheets that include ref,
	formating as need for the Client Sidebar.
	If `uid` is present return user sheets, otherwise return public sheets.
	"""
    oref = model.Ref(tref)
    # perform initial search with context to catch ranges that include a segment ref
    regex_list = oref.context_ref().regex(as_list=True)
    ref_clauses = [{"includedRefs": {"$regex": r}} for r in regex_list]
    query = {"$or": ref_clauses}
    if uid:
        query["owner"] = uid
    else:
        query["status"] = "public"
    sheetsObj = db.sheets.find(
        query, {
            "id": 1,
            "title": 1,
            "owner": 1,
            "viaOwner": 1,
            "via": 1,
            "dateCreated": 1,
            "includedRefs": 1,
            "views": 1,
            "tags": 1,
            "status": 1,
            "summary": 1,
            "attribution": 1,
            "assigner_id": 1,
            "likes": 1,
            "group": 1,
            "options": 1
        }).sort([["views", -1]])
    sheets = list((s for s in sheetsObj))
    user_ids = list(set([s["owner"] for s in sheets]))
    django_user_profiles = User.objects.filter(id__in=user_ids).values(
        'email', 'first_name', 'last_name', 'id')
    user_profiles = {item['id']: item for item in django_user_profiles}
    mongo_user_profiles = list(
        db.profiles.find({"id": {
            "$in": user_ids
        }}, {
            "id": 1,
            "slug": 1
        }))
    mongo_user_profiles = {item['id']: item for item in mongo_user_profiles}
    for profile in user_profiles:
        user_profiles[profile]["slug"] = mongo_user_profiles[profile]["slug"]

    ref_re = "(" + '|'.join(regex_list) + ")"
    results = []
    for sheet in sheets:
        potential_matches = [
            r for r in sheet["includedRefs"] if r.startswith(oref.index.title)
        ]
        matched_refs = [r for r in potential_matches if regex.match(ref_re, r)]

        for match in matched_refs:
            try:
                match = model.Ref(match)
            except InputError:
                continue
            ownerData = user_profiles.get(
                sheet["owner"], {
                    'first_name': u'Ploni',
                    'last_name': u'Almoni',
                    'email': u'*****@*****.**',
                    'slug': 'Ploni-Almoni',
                    'id': None
                })

            default_image = "https://www.sefaria.org/static/img/profile-default.png"
            gravatar_base = "https://www.gravatar.com/avatar/" + hashlib.md5(
                ownerData["email"].lower()).hexdigest() + "?"
            gravatar_url_small = gravatar_base + urllib.urlencode({
                'd': default_image,
                's': str(80)
            })

            if "assigner_id" in sheet:
                asignerData = public_user_data(sheet["assigner_id"])
                sheet["assignerName"] = asignerData["name"]
                sheet["assignerProfileUrl"] = asignerData["profileUrl"]
            if "viaOwner" in sheet:
                viaOwnerData = public_user_data(sheet["viaOwner"])
                sheet["viaOwnerName"] = viaOwnerData["name"]
                sheet["viaOwnerProfileUrl"] = viaOwnerData["profileUrl"]

            if "group" in sheet:
                group = Group().load({"name": sheet["group"]})

                try:
                    sheet["groupLogo"] = group.imageUrl
                except:
                    sheet["groupLogo"] = None

            sheet_data = {
                "owner":
                sheet["owner"],
                "_id":
                str(sheet["_id"]),
                "id":
                str(sheet["id"]),
                "anchorRef":
                match.normal(),
                "anchorVerse":
                match.sections[-1] if len(match.sections) else 1,
                "public":
                sheet["status"] == "public",
                "title":
                strip_tags(sheet["title"]),
                "sheetUrl":
                "/sheets/" + str(sheet["id"]),
                "options":
                sheet["options"],
                "naturalDateCreated":
                naturaltime(
                    datetime.strptime(sheet["dateCreated"],
                                      "%Y-%m-%dT%H:%M:%S.%f")),
                "groupLogo":
                sheet.get("groupLogo", None),
                "ownerName":
                ownerData["first_name"] + " " + ownerData["last_name"],
                "via":
                sheet.get("via", None),
                "viaOwnerName":
                sheet.get("viaOwnerName", None),
                "assignerName":
                sheet.get("assignerName", None),
                "viaOwnerProfileUrl":
                sheet.get("viaOwnerProfileUrl", None),
                "assignerProfileUrl":
                sheet.get("assignerProfileUrl", None),
                "ownerProfileUrl":
                "/profile/" + ownerData["slug"],
                "ownerImageUrl":
                gravatar_url_small,
                "status":
                sheet["status"],
                "views":
                sheet["views"],
                "tags":
                sheet.get("tags", []),
                "likes":
                sheet.get("likes", []),
                "summary":
                sheet.get("summary", None),
                "attribution":
                sheet.get("attribution", None),
                "category":
                "Sheets",  # ditto
                "type":
                "sheet",  # ditto
            }

            results.append(sheet_data)

    return results
Example #37
0
def untagged_sheets(request):
    html = ""
    page = int(request.GET.get("page", 0))
    page_size = 100
    sheets = db.sheets.find({"status": "public", "tags": []}, {"id": 1, "title": 1}).limit(page_size).skip(page_size*page)

    for sheet in sheets:
        html += "<li><a href='/sheets/%d' target='_blank'>%s</a></li>" % (sheet["id"], strip_tags(sheet["title"]))
    html += u"<br><a href='/admin/untagged-sheets?page=%d'>More ›</a>" % (page + 1)

    return HttpResponse("<html><h1>Untagged Public Sheets</h1><ul>" + html + "</ul></html>")
Example #38
0
def get_sheets_for_ref(tref, pad=True, context=1):
	"""
	Returns a list of sheets that include ref,
	formating as need for the Client Sidebar.
	"""
	oref = model.Ref(tref)
	if pad:
		oref = oref.padded_ref()
	if context:
		oref = oref.context_ref(context)

	ref_re = oref.regex()

	results = []

	regex_list = oref.regex(as_list=True)
	ref_clauses = [{"included_refs": {"$regex": r}} for r in regex_list]
	sheets = db.sheets.find({"$or": ref_clauses, "status": "public"},
		{"id": 1, "title": 1, "owner": 1, "included_refs": 1})
	for sheet in sheets:
		# Check for multiple matching refs within this sheet
		matched_refs = [r for r in sheet["included_refs"] if regex.match(ref_re, r)]
		for match in matched_refs:
			try:
				match = model.Ref(match)
			except InputError:
				continue
			com                = {}
			com["category"]    = "Sheets"
			com["type"]        = "sheet"
			com["owner"]       = sheet["owner"]
			com["_id"]         = str(sheet["_id"])
			com["anchorRef"]   = match.normal()
			com["anchorVerse"] = match.sections[-1]
			com["public"]      = True
			com["commentator"] = user_link(sheet["owner"])
			com["text"]        = "<a class='sheetLink' href='/sheets/%d'>%s</a>" % (sheet["id"], strip_tags(sheet["title"]))

			results.append(com)

	return results
Example #39
0
def get_sheets_for_ref(tref, uid=None, in_collection=None):
    """
	Returns a list of sheets that include ref,
	formating as need for the Client Sidebar.
	If `uid` is present return user sheets, otherwise return public sheets.
	If `in_collection` (list of slugs) is present, only return sheets in one of the listed collections.
	"""
    oref = model.Ref(tref)
    # perform initial search with context to catch ranges that include a segment ref
    segment_refs = [r.normal() for r in oref.all_segment_refs()]
    query = {"expandedRefs": {"$in": segment_refs}}
    if uid:
        query["owner"] = uid
    else:
        query["status"] = "public"
    if in_collection:
        collections = CollectionSet({"slug": {"$in": in_collection}})
        sheets_list = [collection.sheets for collection in collections]
        sheets_ids = [sheet for sublist in sheets_list for sheet in sublist]
        query["id"] = {"$in": sheets_ids}

    sheetsObj = db.sheets.find(
        query, {
            "id": 1,
            "title": 1,
            "owner": 1,
            "viaOwner": 1,
            "via": 1,
            "dateCreated": 1,
            "includedRefs": 1,
            "expandedRefs": 1,
            "views": 1,
            "topics": 1,
            "status": 1,
            "summary": 1,
            "attribution": 1,
            "assigner_id": 1,
            "likes": 1,
            "displayedCollection": 1,
            "options": 1
        }).sort([["views", -1]])
    sheetsObj.hint("expandedRefs_1")
    sheets = [s for s in sheetsObj]
    user_ids = list({s["owner"] for s in sheets})
    django_user_profiles = User.objects.filter(id__in=user_ids).values(
        'email', 'first_name', 'last_name', 'id')
    user_profiles = {item['id']: item for item in django_user_profiles}
    mongo_user_profiles = list(
        db.profiles.find({"id": {
            "$in": user_ids
        }}, {
            "id": 1,
            "slug": 1,
            "profile_pic_url_small": 1
        }))
    mongo_user_profiles = {item['id']: item for item in mongo_user_profiles}
    for profile in user_profiles:
        try:
            user_profiles[profile]["slug"] = mongo_user_profiles[profile][
                "slug"]
        except:
            user_profiles[profile]["slug"] = "/"

        try:
            user_profiles[profile][
                "profile_pic_url_small"] = mongo_user_profiles[profile].get(
                    "profile_pic_url_small", '')
        except:
            user_profiles[profile]["profile_pic_url_small"] = ""

    results = []
    for sheet in sheets:
        anchor_ref_list, anchor_ref_expanded_list = oref.get_all_anchor_refs(
            segment_refs, sheet.get("includedRefs", []),
            sheet.get("expandedRefs", []))
        ownerData = user_profiles.get(
            sheet["owner"], {
                'first_name': 'Ploni',
                'last_name': 'Almoni',
                'email': '*****@*****.**',
                'slug': 'Ploni-Almoni',
                'id': None,
                'profile_pic_url_small': ''
            })

        if "assigner_id" in sheet:
            asignerData = public_user_data(sheet["assigner_id"])
            sheet["assignerName"] = asignerData["name"]
            sheet["assignerProfileUrl"] = asignerData["profileUrl"]
        if "viaOwner" in sheet:
            viaOwnerData = public_user_data(sheet["viaOwner"])
            sheet["viaOwnerName"] = viaOwnerData["name"]
            sheet["viaOwnerProfileUrl"] = viaOwnerData["profileUrl"]

        if "displayedCollection" in sheet:
            collection = Collection().load(
                {"slug": sheet["displayedCollection"]})
            sheet["collectionTOC"] = getattr(collection, "toc", None)
        topics = add_langs_to_topics(sheet.get("topics", []))
        for anchor_ref, anchor_ref_expanded in zip(anchor_ref_list,
                                                   anchor_ref_expanded_list):
            sheet_data = {
                "owner": sheet["owner"],
                "_id": str(sheet["_id"]),
                "id": str(sheet["id"]),
                "public": sheet["status"] == "public",
                "title": strip_tags(sheet["title"]),
                "sheetUrl": "/sheets/" + str(sheet["id"]),
                "anchorRef": anchor_ref.normal(),
                "anchorRefExpanded": [r.normal() for r in anchor_ref_expanded],
                "options": sheet["options"],
                "collectionTOC": sheet.get("collectionTOC", None),
                "ownerName":
                ownerData["first_name"] + " " + ownerData["last_name"],
                "via": sheet.get("via", None),
                "viaOwnerName": sheet.get("viaOwnerName", None),
                "assignerName": sheet.get("assignerName", None),
                "viaOwnerProfileUrl": sheet.get("viaOwnerProfileUrl", None),
                "assignerProfileUrl": sheet.get("assignerProfileUrl", None),
                "ownerProfileUrl": "/profile/" + ownerData["slug"],
                "ownerImageUrl": ownerData.get('profile_pic_url_small', ''),
                "status": sheet["status"],
                "views": sheet["views"],
                "topics": topics,
                "likes": sheet.get("likes", []),
                "summary": sheet.get("summary", None),
                "attribution": sheet.get("attribution", None),
                "is_featured": sheet.get("is_featured", False),
                "category": "Sheets",  # ditto
                "type": "sheet",  # ditto
            }

            results.append(sheet_data)
    return results
Example #40
0
def get_sheets_for_ref(tref, uid=None, in_group=None):
    """
	Returns a list of sheets that include ref,
	formating as need for the Client Sidebar.
	If `uid` is present return user sheets, otherwise return public sheets.
	If `in_group` (list) is present, only return sheets in one of the listed groups.
	"""
    oref = model.Ref(tref)
    # perform initial search with context to catch ranges that include a segment ref
    segment_refs = [r.normal() for r in oref.all_segment_refs()]
    query = {"expandedRefs": {"$in": segment_refs}}
    if uid:
        query["owner"] = uid
    else:
        query["status"] = "public"
    if in_group:
        query["group"] = {"$in": in_group}
    sheetsObj = db.sheets.find(
        query, {
            "id": 1,
            "title": 1,
            "owner": 1,
            "viaOwner": 1,
            "via": 1,
            "dateCreated": 1,
            "includedRefs": 1,
            "expandedRefs": 1,
            "views": 1,
            "topics": 1,
            "status": 1,
            "summary": 1,
            "attribution": 1,
            "assigner_id": 1,
            "likes": 1,
            "group": 1,
            "options": 1
        }).sort([["views", -1]])
    sheetsObj.hint("expandedRefs_1")
    sheets = [s for s in sheetsObj]
    user_ids = list({s["owner"] for s in sheets})
    django_user_profiles = User.objects.filter(id__in=user_ids).values(
        'email', 'first_name', 'last_name', 'id')
    user_profiles = {item['id']: item for item in django_user_profiles}
    mongo_user_profiles = list(
        db.profiles.find({"id": {
            "$in": user_ids
        }}, {
            "id": 1,
            "slug": 1,
            "profile_pic_url_small": 1
        }))
    mongo_user_profiles = {item['id']: item for item in mongo_user_profiles}
    for profile in user_profiles:
        try:
            user_profiles[profile]["slug"] = mongo_user_profiles[profile][
                "slug"]
        except:
            user_profiles[profile]["slug"] = "/"

        try:
            user_profiles[profile][
                "profile_pic_url_small"] = mongo_user_profiles[profile].get(
                    "profile_pic_url_small", '')
        except:
            user_profiles[profile]["profile_pic_url_small"] = ""

    results = []
    for sheet in sheets:
        anchor_ref_list, anchor_ref_expanded_list = oref.get_all_anchor_refs(
            segment_refs, sheet.get("includedRefs", []),
            sheet.get("expandedRefs", []))
        ownerData = user_profiles.get(
            sheet["owner"], {
                'first_name': 'Ploni',
                'last_name': 'Almoni',
                'email': '*****@*****.**',
                'slug': 'Ploni-Almoni',
                'id': None,
                'profile_pic_url_small': ''
            })
        if len(ownerData.get('profile_pic_url_small', '')) == 0:
            default_image = "https://www.sefaria.org/static/img/profile-default.png"
            gravatar_base = "https://www.gravatar.com/avatar/" + hashlib.md5(
                ownerData["email"].lower().encode('utf8')).hexdigest() + "?"
            gravatar_url_small = gravatar_base + urllib.parse.urlencode(
                {
                    'd': default_image,
                    's': str(80)
                })
            ownerData['profile_pic_url_small'] = gravatar_url_small

        if "assigner_id" in sheet:
            asignerData = public_user_data(sheet["assigner_id"])
            sheet["assignerName"] = asignerData["name"]
            sheet["assignerProfileUrl"] = asignerData["profileUrl"]
        if "viaOwner" in sheet:
            viaOwnerData = public_user_data(sheet["viaOwner"])
            sheet["viaOwnerName"] = viaOwnerData["name"]
            sheet["viaOwnerProfileUrl"] = viaOwnerData["profileUrl"]

        if "group" in sheet:
            group = Group().load({"name": sheet["group"]})
            sheet["groupLogo"] = getattr(group, "imageUrl", None)
            sheet["groupTOC"] = getattr(group, "toc", None)
        natural_date_created = naturaltime(
            datetime.strptime(sheet["dateCreated"], "%Y-%m-%dT%H:%M:%S.%f"))
        topics = add_langs_to_topics(sheet.get("topics", []))
        for anchor_ref, anchor_ref_expanded in zip(anchor_ref_list,
                                                   anchor_ref_expanded_list):
            sheet_data = {
                "owner": sheet["owner"],
                "_id": str(sheet["_id"]),
                "id": str(sheet["id"]),
                "public": sheet["status"] == "public",
                "title": strip_tags(sheet["title"]),
                "sheetUrl": "/sheets/" + str(sheet["id"]),
                "anchorRef": anchor_ref.normal(),
                "anchorRefExpanded": [r.normal() for r in anchor_ref_expanded],
                "options": sheet["options"],
                "naturalDateCreated": natural_date_created,
                "group": sheet.get("group", None),
                "groupLogo": sheet.get("groupLogo", None),
                "groupTOC": sheet.get("groupTOC", None),
                "ownerName":
                ownerData["first_name"] + " " + ownerData["last_name"],
                "via": sheet.get("via", None),
                "viaOwnerName": sheet.get("viaOwnerName", None),
                "assignerName": sheet.get("assignerName", None),
                "viaOwnerProfileUrl": sheet.get("viaOwnerProfileUrl", None),
                "assignerProfileUrl": sheet.get("assignerProfileUrl", None),
                "ownerProfileUrl": "/profile/" + ownerData["slug"],
                "ownerImageUrl": ownerData.get('profile_pic_url_small', ''),
                "status": sheet["status"],
                "views": sheet["views"],
                "topics": topics,
                "likes": sheet.get("likes", []),
                "summary": sheet.get("summary", None),
                "attribution": sheet.get("attribution", None),
                "is_featured": sheet.get("is_featured", False),
                "category": "Sheets",  # ditto
                "type": "sheet",  # ditto
            }

            results.append(sheet_data)
    return results
Example #41
0
def get_sheets_for_ref(tref, uid=None):
	"""
	Returns a list of sheets that include ref,
	formating as need for the Client Sidebar.
	If `uid` is present return user sheets, otherwise return public sheets. 
	"""
	oref = model.Ref(tref)
	# perform initial search with context to catch ranges that include a segment ref
	regex_list = oref.context_ref().regex(as_list=True)
	ref_clauses = [{"includedRefs": {"$regex": r}} for r in regex_list]
	query = {"$or": ref_clauses }
	if uid:
		query["owner"] = uid
	else:
		query["status"] = "public"
	sheetsObj = db.sheets.find(query,
		{"id": 1, "title": 1, "owner": 1, "viaOwner":1, "via":1, "dateCreated": 1, "includedRefs": 1, "views": 1, "tags": 1, "status": 1, "summary":1, "attribution":1, "assigner_id":1, "likes":1, "options":1}).sort([["views", -1]])
	sheets = list((s for s in sheetsObj))
	user_ids = list(set([s["owner"] for s in sheets]))
	django_user_profiles = User.objects.filter(id__in=user_ids).values('email','first_name','last_name','id')
	user_profiles = {item['id']: item for item in django_user_profiles}
	mongo_user_profiles = list(db.profiles.find({"id": {"$in": user_ids}},{"id":1,"slug":1}))
	mongo_user_profiles = {item['id']: item for item in mongo_user_profiles}
	for profile in user_profiles:
		user_profiles[profile]["slug"] = mongo_user_profiles[profile]["slug"]

	ref_re = "("+'|'.join(regex_list)+")"
	results = []
	for sheet in sheets:
		potential_matches = [r for r in sheet["includedRefs"] if r.startswith(oref.index.title)]
		matched_refs = [r for r in potential_matches if regex.match(ref_re, r)]

		for match in matched_refs:
			try:
				match = model.Ref(match)
			except InputError:
				continue
			ownerData = user_profiles.get(sheet["owner"], {'first_name': u'Ploni', 'last_name': u'Almoni', 'email': u'*****@*****.**', 'slug': 'Ploni-Almoni', 'id': None})

			default_image = "https://www.sefaria.org/static/img/profile-default.png"
			gravatar_base = "https://www.gravatar.com/avatar/" + hashlib.md5(ownerData["email"].lower()).hexdigest() + "?"
			gravatar_url_small = gravatar_base + urllib.urlencode({'d': default_image, 's': str(80)})

			if "assigner_id" in sheet:
				asignerData = public_user_data(sheet["assigner_id"])
				sheet["assignerName"] = asignerData["name"]
				sheet["assignerProfileUrl"] = asignerData["profileUrl"]
			if "viaOwner" in sheet:
				viaOwnerData = public_user_data(sheet["viaOwner"])
				sheet["viaOwnerName"] = viaOwnerData["name"]
				sheet["viaOwnerProfileUrl"] = viaOwnerData["profileUrl"]

			sheet_data = {
				"owner":           sheet["owner"],
				"_id":             str(sheet["_id"]),
				"id":              str(sheet["id"]),
				"anchorRef":       match.normal(),
				"anchorVerse":     match.sections[-1] if len(match.sections) else 1,
				"public":          sheet["status"] == "public",
				"title":           strip_tags(sheet["title"]),
				"sheetUrl":        "/sheets/" + str(sheet["id"]),
				"options": 		   sheet["options"],
				"naturalDateCreated": naturaltime(datetime.strptime(sheet["dateCreated"], "%Y-%m-%dT%H:%M:%S.%f")),
				"ownerName":       ownerData["first_name"]+" "+ownerData["last_name"],
				"via":			   sheet.get("via", None),
				"viaOwnerName":	   sheet.get("viaOwnerName", None),
				"assignerName":	   sheet.get("assignerName", None),
				"viaOwnerProfileUrl":	   sheet.get("viaOwnerProfileUrl", None),
				"assignerProfileUrl":	   sheet.get("assignerProfileUrl", None),
				"ownerProfileUrl": "/profile/" + ownerData["slug"],
				"ownerImageUrl":   gravatar_url_small,
				"status":          sheet["status"],
				"views":           sheet["views"],
				"tags":            sheet.get("tags", []),
				"likes":           sheet.get("likes", []),
				"summary":         sheet.get("summary", None),
				"attribution":     sheet.get("attribution", None),
				"category":        "Sheets", # ditto
				"type":            "sheet", # ditto
			}

			results.append(sheet_data)


	return results
Example #42
0
                                     "profile_url",
                                   ])

    for row in reader:
      email = row[8]
      profile = UserProfile(email=email)
      
      if not profile.date_joined:
        writer.writerow(row + ["false", 0, 0, "", "", "", ""])
        continue

      has_account         = "true"
      sheets_count        = db.sheets.find({"owner": profile.id}).count()
      public_sheets_count = db.sheets.find({"owner": profile.id, "status": "public"}).count()
      sheet_views         = sum([sheet["views"] for sheet in sheet_list(query={"owner": profile.id})])
      bio                 = strip_tags(profile.bio)
      organization        = profile.organization
      position            = profile.position
      jewish_education    = ". ".join(profile.jewish_education)
      profile_url         = "https://www.sefaria.org/profile/%s" % profile.slug

      new_row = row + [
                        has_account, 
                        sheets_count, 
                        public_sheets_count,
                        sheet_views,
                        bio,
                        organization, 
                        position, 
                        jewish_education,
                        profile_url,
 def annotate_sheet(n, sheet_id):
     sheet_data = get_sheet_metadata(id=sheet_id)
     n["content"]["sheet_title"] = strip_tags(sheet_data["title"], remove_new_lines=True)
     n["content"]["summary"] = sheet_data["summary"]
Example #44
0
def get_sheets_for_ref(tref, pad=True, context=1):
	"""
	Returns a list of sheets that include ref,
	formating as need for the Client Sidebar.
	"""
	#tref = norm_ref(tref, pad=pad, context=context)
	#ref_re = make_ref_re(tref)

	oref = model.Ref(tref)
	if pad:
		oref = oref.padded_ref()
	if context:
		oref = oref.context_ref(context)

	ref_re = oref.regex()

	results = []
	sheets = db.sheets.find({"included_refs": {"$regex": ref_re}, "status": {"$in": LISTED_SHEETS}},
								{"id": 1, "title": 1, "owner": 1, "included_refs": 1})
	for sheet in sheets:
		# Check for multiple matching refs within this sheet
		matched_orefs = [model.Ref(r) for r in sheet["included_refs"] if regex.match(ref_re, r)]
		for match in matched_orefs:
			com = {}

			com["category"]    = "Sheets"
			com["type"]        = "sheet"
			com["owner"]       = sheet["owner"]
			com["_id"]         = str(sheet["_id"])
			com["anchorRef"]   = match.normal()
			com["anchorVerse"] = match.sections[-1]
			com["public"]      = True
			com["commentator"] = user_link(sheet["owner"])
			com["text"]        = "<a class='sheetLink' href='/sheets/%d'>%s</a>" % (sheet["id"], strip_tags(sheet["title"]))

			results.append(com)

	return results
Example #45
0
def get_sheets_for_ref(tref, uid=None):
    """
	Returns a list of sheets that include ref,
	formating as need for the Client Sidebar.
	If `uid` is present return user sheets, otherwise return public sheets. 
	"""
    oref = model.Ref(tref)
    # perform initial search with context to catch ranges that include a segment ref
    regex_list = oref.context_ref().regex(as_list=True)
    ref_clauses = [{"sources.ref": {"$regex": r}} for r in regex_list]
    query = {"$or": ref_clauses}
    if uid:
        query["owner"] = uid
    else:
        query["status"] = "public"
    sheets = db.sheets.find(
        query, {
            "id": 1,
            "title": 1,
            "owner": 1,
            "sources.ref": 1,
            "views": 1,
            "tags": 1,
            "status": 1
        }).sort([["views", -1]])

    results = []
    for sheet in sheets:
        matched_refs = []
        for source in sheet.get("sources", []):
            if "ref" in source:
                matched_refs.append(source["ref"])
        for match in matched_refs:
            try:
                match = model.Ref(match)
                if not oref.overlaps(match):
                    continue
            except InputError:
                continue
            ownerData = public_user_data(sheet["owner"])
            sheet_data = {
                "owner":
                sheet["owner"],
                "_id":
                str(sheet["_id"]),
                "anchorRef":
                match.normal(),
                "anchorVerse":
                match.sections[-1] if len(match.sections) else 1,
                "public":
                sheet["status"] == "public",
                "text":
                "<a class='sheetLink' href='/sheets/%d'>%s</a>" %
                (sheet["id"], strip_tags(
                    sheet["title"])),  # legacy, used in S1
                "title":
                strip_tags(sheet["title"]),
                "sheetUrl":
                "/sheets/" + str(sheet["id"]),
                "ownerName":
                ownerData["name"],
                "ownerProfileUrl":
                ownerData["profileUrl"],
                "ownerImageUrl":
                ownerData["imageUrl"],
                "status":
                sheet["status"],
                "views":
                sheet["views"],
                "tags":
                sheet.get("tags", []),
                "commentator":
                user_link(sheet["owner"]),  # legacy, used in S1
                "category":
                "Sheets",  # ditto
                "type":
                "sheet",  # ditto
            }

            results.append(sheet_data)

    return results