def get_or_make_summary_node(summary, nodes): """ Returns the node in 'summary' that is named by the list of categories in 'nodes', creates the node if it doesn't exist. Used recursively on sub-summaries. """ if len(nodes) == 1: # Basecase, only need to search through on level for node in summary: if node.get("category") == nodes[0]: return node["contents"] # we didn't find it, so let's add it summary.append({ "category": nodes[0], "heCategory": hebrew_term(nodes[0]), "contents": [] }) return summary[-1]["contents"] # Look for the first category, or add it, then recur for node in summary: if node.get("category") == nodes[0]: return get_or_make_summary_node(node["contents"], nodes[1:]) summary.append({ "category": nodes[0], "heCategory": hebrew_term(nodes[0]), "contents": [] }) return get_or_make_summary_node(summary[-1]["contents"], nodes[1:])
def get_or_make_summary_node(summary, nodes, contents_only=True, make_if_not_found=True): """ Returns the node in 'summary' that is named by the list of categories in 'nodes', If make_if_not_found is true, creates the node if it doesn't exist. Used recursively on sub-summaries. """ if len(nodes) == 1: # Basecase, only need to search through one level for node in summary: if node.get("category") == nodes[0]: return node["contents"] if contents_only else node # we didn't find it, so let's add it if make_if_not_found: summary.append({"category": nodes[0], "heCategory": hebrew_term(nodes[0]), "contents": []}) return summary[-1]["contents"] if contents_only else summary[-1] else: return None # Look for the first category, or add it, then recur for node in summary: if node.get("category") == nodes[0]: return get_or_make_summary_node(node["contents"], nodes[1:], contents_only=contents_only, make_if_not_found=make_if_not_found) if make_if_not_found: summary.append({"category": nodes[0], "heCategory": hebrew_term(nodes[0]), "contents": []}) return get_or_make_summary_node(summary[-1]["contents"], nodes[1:], contents_only=contents_only, make_if_not_found=make_if_not_found) else: return None
def format_link_object_for_client(link, with_text, ref, pos=None): """ :param link: Link object :param ref: Ref object of the source of the link :param pos: Optional position of the Ref in the Link. If not passed, it will be derived from the first two arguments. :return: Dict """ com = {} # The text we're asked to get links to anchorRef = Ref(link.refs[pos]) # The link we found to anchorRef linkRef = Ref(link.refs[(pos + 1) % 2]) com["_id"] = str(link._id) com['index_title'] = linkRef.index.title com["category"] = linkRef.primary_category #usually the index's categories[0] or "Commentary". com["type"] = link.type com["ref"] = linkRef.tref com["anchorRef"] = anchorRef.normal() com["sourceRef"] = linkRef.normal() com["sourceHeRef"] = linkRef.he_normal() com["anchorVerse"] = anchorRef.sections[-1] if len(anchorRef.sections) else 0 com["anchorText"] = getattr(link, "anchorText", "") # Pad out the sections list, so that comparison between comment numbers are apples-to-apples lsections = linkRef.sections[:] + [0] * (linkRef.index_node.depth - len(linkRef.sections)) # Build a decimal comment number based on the last two digits of the section array com["commentaryNum"] = lsections[-1] if len(lsections) == 1 \ else float('{0}.{1:04d}'.format(*lsections[-2:])) if len(lsections) > 1 else 0 if with_text: text = TextFamily(linkRef, context=0, commentary=False) com["text"] = text.text if isinstance(text.text, basestring) else JaggedTextArray(text.text).flatten_to_array() com["he"] = text.he if isinstance(text.he, basestring) else JaggedTextArray(text.he).flatten_to_array() # if the the link is commentary, strip redundant info (e.g. "Rashi on Genesis 4:2" -> "Rashi") # this is now simpler, and there is explicit data on the index record for it. if com["type"] == "commentary": com["linkGroupTitle"] = { 'en': getattr(linkRef.index, 'collective_title', linkRef.index.title), 'he': hebrew_term(getattr(linkRef.index, 'collective_title', linkRef.index.get_title("he"))) } com["commentator"] = getattr(linkRef.index, 'collective_title', linkRef.index.title) # TODO: deprecate com["heCommentator"] = hebrew_term(getattr(linkRef.index, 'collective_title', linkRef.index.get_title("he"))) # TODO: deprecate else: com["linkGroupTitle"] = {'en': linkRef.index.title, 'he': linkRef.index.get_title("he")} com["commentator"] = linkRef.index.title # TODO: deprecate com["heCommentator"] = linkRef.index.get_title("he") # TODO: deprecate if com["type"] != "commentary" and com["category"] == "Commentary": com["category"] = "Quoting Commentary" #add a fix here for quoting commentary appearing together with commentary in s2 panels if linkRef.index_node.primary_title("he"): com["heTitle"] = linkRef.index_node.primary_title("he") return com
def upload_index(full_text, upload=False): """ :param full_text: Data structure from parse_text() :param upload: set to True, otherwise function will do nothing """ if not upload: return books = [u'Genesis', u'Exodus', u'Leviticus', u'Numbers', u'Deuteronomy'] # create index record record = SchemaNode() record.add_title('Chizkuni', 'en', primary=True,) record.add_title(u'חזקוני', 'he', primary=True) record.key = 'Chizkuni' # add nodes for book in books: node = JaggedArrayNode() node.add_title(book, 'en', primary=True) node.add_title(hebrew_term(book), 'he', primary=True) node.key = book node.depth = 3 node.addressTypes = ['Integer', 'Integer', 'Integer'] node.sectionNames = ['Chapter', 'Verse', 'Comment'] record.append(node) record.validate() index = { "title": "Chizkuni", "categories": ["Commentary2", "Tanach", "Chizkuni"], "schema": record.serialize() } post_index(index)
def build_index(): books = library.get_indexes_in_category('Torah') # create index record record = SchemaNode() record.add_title('Baal HaTurim', 'en', primary=True, ) record.add_title(u'בעל הטורים', 'he', primary=True, ) record.key = 'Baal HaTurim' # add nodes for book in books: node = JaggedArrayNode() node.add_title(book, 'en', primary=True) node.add_title(hebrew_term(book), 'he', primary=True) node.key = book node.depth = 3 node.addressTypes = ['Integer', 'Integer', 'Integer'] node.sectionNames = ['Chapter', 'Verse', 'Comment'] node.toc_zoom = 2 record.append(node) record.validate() index = { "title": "Baal HaTurim", "categories": ["Commentary2", "Torah", "Baal HaTurim"], "schema": record.serialize() } return index
def build_index(section_names): record = SchemaNode() record.add_title(u'Noda BeYehuda', 'en', True) record.add_title(u'נודע ביהודה', 'he', True) record.key = u'Noda BeYehuda' for section in section_names: node = JaggedArrayNode() node.add_title(section, 'en', True) node.add_title(hebrew_term(section), 'he', True) node.key = section node.depth = 2 node.addressTypes = ['Integer', 'Integer'] node.sectionNames = ['Teshuva', 'Paragraph'] record.append(node) record.validate() index = { 'title': 'Noda BeYehuda', 'categories': ['Responsa'], 'schema': record.serialize() } return index
def build_index(): books = library.get_indexes_in_category('Torah') # create index record record = SchemaNode() record.add_title('Siftei Hakhamim', 'en', primary=True, ) record.add_title(u'שפתי חכמים', 'he', primary=True, ) record.key = 'Siftei Hakhamim' # add nodes for book in books: node = JaggedArrayNode() node.add_title(book, 'en', primary=True) node.add_title(hebrew_term(book), 'he', primary=True) node.key = book node.depth = 3 node.addressTypes = ['Integer', 'Integer', 'Integer'] node.sectionNames = ['Chapter', 'Verse', 'Comment'] node.toc_zoom = 2 record.append(node) record.validate() index = { "title": "Siftei Hakhamim", "categories": ["Commentary2", "Torah", "Rashi"], "schema": record.serialize() } return index
def upload_footnote_index(): """ Footnotes are uploaded as a commentary2 - Schema with each book a depth 2 jaggedArray """ books = library.get_indexes_in_category('Tanach') # create index record record = SchemaNode() record.add_title('JPS 1985 Footnotes', 'en', primary=True, ) record.add_title(u'הערות שוליים תרגום 1985 של JPS', 'he', primary=True, ) record.key = 'JPS 1985 Footnotes' # add nodes for book in books: node = JaggedArrayNode() node.add_title(book, 'en', primary=True) node.add_title(hebrew_term(book), 'he', primary=True) node.key = book node.depth = 2 node.addressTypes = ['Integer', 'Integer'] node.sectionNames = ['Chapter', 'Footnote'] record.append(node) record.validate() index = { "title": "JPS 1985 Footnotes", "categories": ["Commentary2", "Tanach", "JPS"], "schema": record.serialize() } functions.post_index(index)
def create_daf_connection_story(cls, **kwargs): # todo: use reccomendation engine daf_ref = daf_yomi_ref() connection_link, connection_ref = random_connection_to(daf_ref) if not connection_ref: return category = connection_ref.index.categories[0] mustHave = [] if not connection_ref.is_text_translated(): mustHave += ["readsHebrew"] if category == "Talmud": title = {"en": "Related Passage", "he": "סוגיה קשורה"} else: title = {"en": category + " on the Daf", "he": hebrew_term(category) + " " + "על הדף"} try: cls.generate_story( refs = [connection_link.ref_opposite(connection_ref).normal(), connection_ref.normal()], title=title, lead={'en': 'Daf Yomi', 'he': "דף יומי"}, mustHave=mustHave, **kwargs ).save() except AttributeError: # connection_link.ref_opposite(connection_ref).normal() err's out ... why? return
def _create_parasha_verse_commentator_story(parasha_obj, mustHave=None, **kwargs): from sefaria.utils.calendars import make_parashah_response_from_calendar_entry from . import ref_data mustHave = mustHave or [] cal = make_parashah_response_from_calendar_entry(parasha_obj)[0] parasha_ref = text.Ref(parasha_obj["ref"]) top_ref = ref_data.RefDataSet.from_ref(parasha_ref).nth_ref(iteration) commentary_ref = random_commentary_on(top_ref) if not commentary_ref: return if not commentary_ref.is_text_translated(): mustHave += ["readsHebrew"] commentator = commentary_ref.index.collective_title cls.generate_story( refs = [top_ref.normal(), commentary_ref.normal()], title={"en": commentator + " on " + cal["displayValue"]["en"], "he": hebrew_term(commentator) + " על " + cal["displayValue"]["he"]}, lead={"en": "Weekly Torah Portion", "he": 'פרשת השבוע'}, mustHave=mustHave, **kwargs ).save()
def _data_object(cls, **kwargs): normal_topics = [text.Term.normalize(topics) for topics in kwargs.get("topics")] # todo: handle possibility of Hebrew terms trending. return { "topics": [{"en": topic, "he": hebrew_term(topic)} for topic in normal_topics], "title": kwargs.get("title", {"en": "Trending Recently", "he": u"פופולרי"}), "lead": kwargs.get("lead", {"en": "Topics", "he": u"נושאים"}) }
def generate_topic_story(cls, topic, **kwargs): t = text.Term.normalize(topic) return cls.generate_story(sheet_ids=cls._get_topic_sheet_ids(topic), title={ "en": t, "he": hebrew_term(t) }, **kwargs)
def get_or_make_summary_node(summary, nodes): """ Returns the node in 'summary' that is named by the list of categories in 'nodes', creates the node if it doesn't exist. Used recursively on sub-summaries. """ if len(nodes) == 1: # Basecase, only need to search through on level for node in summary: if node.get("category") == nodes[0]: return node["contents"] # we didn't find it, so let's add it summary.append({"category": nodes[0], "heCategory": hebrew_term(nodes[0]), "contents": []}) return summary[-1]["contents"] # Look for the first category, or add it, then recur for node in summary: if node.get("category") == nodes[0]: return get_or_make_summary_node(node["contents"], nodes[1:]) summary.append({"category": nodes[0], "heCategory": hebrew_term(nodes[0]), "contents": []}) return get_or_make_summary_node(summary[-1]["contents"], nodes[1:])
def build_index(parser): assert isinstance(parser, PeneiDavid) root = SchemaNode() root.add_title('Penei David', 'en', primary=True) root.add_title(u'פני דוד', 'he', primary=True) root.key = 'Penei David' title_node = JaggedArrayNode() title_node.add_title('Title Page', 'en', primary=True) title_node.add_title(u'עמוד שער', 'he', primary=True) title_node.key = 'Title Page' title_node.depth = 1 title_node.addressTypes = ['Integer'] title_node.sectionNames = ["Paragraph"] root.append(title_node) # add book nodes for book in parser.book_names: book_node = SchemaNode() book_node.add_title(book, 'en', primary=True) book_node.add_title(hebrew_term(book), 'he', primary=True) book_node.key = book # add parsha nodes for parsha in parser.parsha_by_book[book]: parsha_node = JaggedArrayNode() parsha_node.add_title(parsha, 'en', primary=True) parsha_node.add_title(parser.parsha_names_translated[parsha], 'he', primary=True) parsha_node.key = parsha parsha_node.depth = 2 parsha_node.addressTypes = ['Integer', 'Integer'] parsha_node.sectionNames = ['Comment', 'Paragraph'] book_node.append(parsha_node) root.append(book_node) root.validate() index = { "title": "Penei David", "categories": ["Commentary2", "Torah", "Penei David"], "schema": root.serialize() } return index
def _data_object(cls, **kwargs): t = kwargs.get("topic") trefs = kwargs.get("refs") num = kwargs.get("num", 2) t = text.Term.normalize(t) if not trefs: from . import topic topic_manager = topic.get_topics() topic_obj = topic_manager.get(t) trefs = [pair[0] for pair in topic_obj.sources[:num]] normal_refs = [text.Ref(ref).normal() for ref in trefs] d = {"title": {"en": t, "he": hebrew_term(t)}, "refs": normal_refs} return d
def _create_parasha_verse_connection_story(parasha_obj, mustHave=None, **kwargs): from sefaria.utils.calendars import make_parashah_response_from_calendar_entry from . import ref_data mustHave = mustHave or [] cal = make_parashah_response_from_calendar_entry(parasha_obj)[0] parasha_ref = text.Ref(parasha_obj["ref"]) top_ref = ref_data.RefDataSet.from_ref(parasha_ref).nth_ref( iteration) connection_refs = [ l.ref_opposite(top_ref) for l in filter( lambda x: x.type != "commentary", top_ref.linkset()) ] connection_ref = None while connection_ref is None: connection_ref = random.choice(connection_refs) category = connection_ref.index.categories[0] if category == "Tanakh" or category == "Reference": # Quoting commentary isn't best for this connection_ref = None continue if not connection_ref.is_text_translated(): mustHave += ["readsHebrew"] cls.generate_story( refs=[top_ref.normal(), connection_ref.normal()], title={ "en": category + " on " + cal["displayValue"]["en"], "he": hebrew_term(category) + u" על " + cal["displayValue"]["he"] }, lead={ "en": "Weekly Torah Portion", "he": u'פרשת השבוע' }, mustHave=mustHave, **kwargs).save()
def construct_index(): names = node_names() en_parasha = get_parsha_dict() root = SchemaNode() root.add_title('Sefat Emet', 'en', primary=True) root.add_title(u'שפת אמת', 'he', primary=True) root.key = 'Sefat Emet' for book in names.keys(): book_node = SchemaNode() book_node.add_title(book, 'en', primary=True) book_node.add_title(hebrew_term(book), 'he', primary=True) book_node.key = book for parasha in names[book].keys(): parsha_node = SchemaNode() parsha_node.add_title(en_parasha[parasha], 'en', primary=True) parsha_node.add_title(parasha, 'he', primary=True) parsha_node.key = en_parasha[parasha] for year in names[book][parasha]: year_node = JaggedArrayNode() civil_year = get_civil_year(year, book) year_node.add_title(civil_year, 'en', primary=True) year_node.add_title(fix_hebrew_years(year), 'he', primary=True) year_node.key = civil_year year_node.depth = 1 year_node.addressTypes = ['Integer'] year_node.sectionNames = ['Paragraph'] parsha_node.append(year_node) book_node.append(parsha_node) root.append(book_node) root.validate() index = { 'title': 'Sefat Emet', 'categories': ['Chasidut'], 'schema': root.serialize() } return index
def upload_footnote_index(): """ Footnotes are uploaded as a commentary2 - Schema with each book a depth 2 jaggedArray """ books = library.get_indexes_in_category('Tanach') # create index record record = SchemaNode() record.add_title( 'JPS 1985 Footnotes', 'en', primary=True, ) record.add_title( u'הערות שוליים תרגום 1985 של JPS', 'he', primary=True, ) record.key = 'JPS 1985 Footnotes' # add nodes for book in books: node = JaggedArrayNode() node.add_title(book, 'en', primary=True) node.add_title(hebrew_term(book), 'he', primary=True) node.key = book node.depth = 2 node.addressTypes = ['Integer', 'Integer'] node.sectionNames = ['Chapter', 'Footnote'] record.append(node) record.validate() index = { "title": "JPS 1985 Footnotes", "categories": ["Commentary2", "Tanach", "JPS"], "schema": record.serialize() } functions.post_index(index)
def build_index(section_names): record = SchemaNode() record.add_title(u'Noda BeYehuda', 'en', True) record.add_title(u'נודע ביהודה', 'he', True) record.key = u'Noda BeYehuda' for section in section_names: node = JaggedArrayNode() node.add_title(section, 'en', True) node.add_title(hebrew_term(section), 'he', True) node.key = section node.depth = 2 node.addressTypes = ['Integer', 'Integer'] node.sectionNames = ['Teshuva', 'Paragraph'] record.append(node) record.validate() index = {'title': 'Noda BeYehuda', 'categories': ['Responsa'], 'schema': record.serialize()} return index
def construct_index(): names = node_names() en_parasha = get_parsha_dict() root = SchemaNode() root.add_title('Sefat Emet', 'en', primary=True) root.add_title(u'שפת אמת', 'he', primary=True) root.key = 'Sefat Emet' for book in names.keys(): book_node = SchemaNode() book_node.add_title(book, 'en', primary=True) book_node.add_title(hebrew_term(book), 'he', primary=True) book_node.key = book for parasha in names[book].keys(): parsha_node = JaggedArrayNode() p_names = [ p.contents()['name'] for p in TermSet({'scheme': "Parasha"}) ] if en_parasha[parasha] in p_names: parsha_node.add_shared_term(en_parasha[parasha]) parsha_node.key = en_parasha[parasha] else: parsha_node.add_primary_titles(en_parasha[parasha], parasha) parsha_node.add_structure(['Section', 'Comment']) book_node.append(parsha_node) root.append(book_node) root.validate() index = { 'title': 'Sefat Emet', 'categories': ['Chasidut'], 'schema': root.serialize() } return index
def format_link_object_for_client(link, with_text, ref, pos=None): """ :param link: Link object :param ref: Ref object of the source of the link :param pos: Position of the Ref in the Link. If not passed, it will be derived from the first two arguments. :return: Dict """ com = {} # The text we're asked to get links to anchorTref = link.refs[pos] anchorRef = Ref(anchorTref) anchorTrefExpanded = getattr(link, "expandedRefs{}".format(pos)) # The link we found to anchorRef linkPos = (pos + 1) % 2 linkTref = link.refs[linkPos] linkRef = Ref(linkTref) langs = getattr(link, "availableLangs", [[],[]]) linkLangs = langs[linkPos] com["_id"] = str(link._id) com['index_title'] = linkRef.index.title com["category"] = linkRef.primary_category #usually the index's categories[0] or "Commentary". com["type"] = link.type com["ref"] = linkTref com["anchorRef"] = anchorTref com["anchorRefExpanded"] = anchorTrefExpanded com["sourceRef"] = linkTref com["sourceHeRef"] = linkRef.he_normal() com["anchorVerse"] = anchorRef.sections[-1] if len(anchorRef.sections) else 0 com["sourceHasEn"] = "en" in linkLangs # com["anchorText"] = getattr(link, "anchorText", "") # not currently used if getattr(link, "inline_reference", None): com["inline_reference"] = getattr(link, "inline_reference", None) if getattr(link, "highlightedWords", None): com["highlightedWords"] = getattr(link, "highlightedWords", None) compDate = getattr(linkRef.index, "compDate", None) if compDate: com["compDate"] = int(compDate) try: com["errorMargin"] = int(getattr(linkRef.index, "errorMargin", 0)) except ValueError: com["errorMargin"] = 0 # Pad out the sections list, so that comparison between comment numbers are apples-to-apples lsections = linkRef.sections[:] + [0] * (linkRef.index_node.depth - len(linkRef.sections)) # Build a decimal comment number based on the last two digits of the section array com["commentaryNum"] = lsections[-1] if len(lsections) == 1 \ else float('{0}.{1:04d}'.format(*lsections[-2:])) if len(lsections) > 1 else 0 if with_text: text = TextFamily(linkRef, context=0, commentary=False) com["text"] = text.text if isinstance(text.text, basestring) else JaggedTextArray(text.text).flatten_to_array() com["he"] = text.he if isinstance(text.he, basestring) else JaggedTextArray(text.he).flatten_to_array() # if the the link is commentary, strip redundant info (e.g. "Rashi on Genesis 4:2" -> "Rashi") # this is now simpler, and there is explicit data on the index record for it. if com["type"] == "commentary": com["collectiveTitle"] = { 'en': getattr(linkRef.index, 'collective_title', linkRef.index.title), 'he': hebrew_term(getattr(linkRef.index, 'collective_title', linkRef.index.get_title("he"))) } else: com["collectiveTitle"] = {'en': linkRef.index.title, 'he': linkRef.index.get_title("he")} if com["type"] != "commentary" and com["category"] == "Commentary": com["category"] = "Quoting Commentary" if com["category"] == "Modern Works" and getattr(linkRef.index, "dependence", None) == "Commentary": # print "Transforming " + linkRef.normal() com["category"] = "Modern Commentary" com["collectiveTitle"] = { 'en': getattr(linkRef.index, 'collective_title', linkRef.index.title), 'he': hebrew_term(getattr(linkRef.index, 'collective_title', linkRef.index.get_title("he"))) } if linkRef.index_node.primary_title("he"): com["heTitle"] = linkRef.index_node.primary_title("he") return com