def create_link_cluster(refs, user, link_type="", attrs=None, exception_pairs=None, exception_range = None): total = 0 for i, ref in enumerate(refs): for j in range(i + 1, len(refs)): ref_strings = [refs[i].normal(), refs[j].normal()] # If this link matches an exception pair, skip it. if all([any([r.startswith(name) for r in ref_strings]) for pair in exception_pairs for name in pair]): continue # If this link matches an exception range, skip it. if refs[i].section_ref() == refs[j].section_ref(): continue d = { "refs": ref_strings, "type": link_type } if attrs: d.update(attrs) try: tracker.add(user, Link, d) print u"Created {} - {}".format(d["refs"][0], d["refs"][1]) total += 1 except Exception as e: print u"Exception: {}".format(e) return total
def add_links_from_text(ref, lang, text, text_id, user, **kwargs): """ Scan a text for explicit references to other texts and automatically add new links between ref and the mentioned text. text["text"] may be a list of segments, an individual segment, or None. Lev - added return on 13 July 2014 """ if not text: return [] elif isinstance(text, list): links = [] for i in range(len(text)): subtext = text[i] single = add_links_from_text("%s:%d" % (ref, i + 1), lang, subtext, text_id, user, **kwargs) links += single return links elif isinstance(text, basestring): existingLinks = LinkSet({ "refs": ref, "auto": True, "generated_by": "add_links_from_text", "source_text_oid": text_id }).array( ) # Added the array here to force population, so that new links don't end up in this set found = [] # The normal refs of the links found in this text links = [] # New link objects created by this processes refs = library.get_refs_in_string(text, lang) for oref in refs: link = { "refs": [ref, oref.normal()], "type": "", "auto": True, "generated_by": "add_links_from_text", "source_text_oid": text_id } found += [ oref.normal() ] # Keep this here, since tracker.add will throw an error if the link exists try: tracker.add(user, Link, link, **kwargs) links += [link] except InputError as e: pass # Remove existing links that are no longer supported by the text for exLink in existingLinks: for r in exLink.refs: if r == ref: # current base ref continue if r not in found: tracker.delete(user, Link, exLink._id) break return links
def create_link_cluster(refs, user, link_type="", attrs=None, exception_pairs=None): total = 0 for i, ref in enumerate(refs): for j in range(i + 1, len(refs)): ref_strings = [refs[i].normal(), refs[j].normal()] # If this link matches an exception pair, skip it. if all([ any([r.startswith(name) for r in ref_strings]) for pair in exception_pairs for name in pair ]): continue d = {"refs": ref_strings, "type": link_type} if attrs: d.update(attrs) try: tracker.add(user, Link, d) print u"Created {} - {}".format(d["refs"][0], d["refs"][1]) total += 1 except Exception as e: print u"Exception: {}".format(e) return total
def add_links_from_text(ref, lang, text, text_id, user, **kwargs): """ Scan a text for explicit references to other texts and automatically add new links between ref and the mentioned text. text["text"] may be a list of segments, an individual segment, or None. Returns a list of links added. """ if not text: return [] elif isinstance(text, list): oref = Ref(ref) subrefs = oref.subrefs(len(text)) links = [] for i in range(len(text)): single = add_links_from_text(subrefs[i].normal(), lang, text[i], text_id, user, **kwargs) links += single return links elif isinstance(text, basestring): existingLinks = LinkSet({ "refs": ref, "auto": True, "generated_by": "add_links_from_text", "source_text_oid": text_id }).array() # Added the array here to force population, so that new links don't end up in this set found = [] # The normal refs of the links found in this text links = [] # New link objects created by this processes refs = library.get_refs_in_string(text, lang) for oref in refs: link = { # Note -- ref of the citing text is in the first position "refs": [ref, oref.normal()], "type": "", "auto": True, "generated_by": "add_links_from_text", "source_text_oid": text_id } found += [oref.normal()] # Keep this here, since tracker.add will throw an error if the link exists try: tracker.add(user, Link, link, **kwargs) links += [link] except InputError as e: pass # Remove existing links that are no longer supported by the text for exLink in existingLinks: for r in exLink.refs: if r == ref: # current base ref continue if r not in found: tracker.delete(user, Link, exLink._id) break return links
def create_link_cluster(refs, user, link_type="", attrs=None): for i, ref in enumerate(refs): for j in range(i + 1, len(refs)): d = { "refs": [refs[i].normal(), refs[j].normal()], "type": link_type } if attrs: d.update(attrs) try: tracker.add(user, Link, d) print u"Created {} - {}".format(d["refs"][0], d["refs"][1]) except Exception as e: print u"Exception: {}".format(e)
def _save_link(self, tref, base_tref, **kwargs): nlink = { "refs": [base_tref, tref], "type": self._link_type, "anchorText": "", "auto": self._auto, "generated_by": self._generated_by_string } try: if not self._user: Link(nlink).save() else: tracker.add(self._user, Link, nlink, **kwargs) except DuplicateRecordError as e: pass return tref
def func_to_profile(): with open("links.json", 'r') as fp: links = json.load(fp) before = time.time() #result = post_link(links, server="http://localhost:8000") for l, our_link in enumerate(links): 'Link().load({"$or": [{"refs": self.refs}, {"refs": [self.refs[1], self.refs[0]]}]})' try: our_link["refs"][1] = our_link["refs"][1].split(":")[0] add(1, Link, our_link) except Exception as e: pass # with open('result.html', 'w') as f: # try: # f.writelines(result) # except: # print(result) after = time.time() print(after - before)
def add_links_from_text(ref, text, text_id, user, **kwargs): """ Scan a text for explicit references to other texts and automatically add new links between ref and the mentioned text. text["text"] may be a list of segments, an individual segment, or None. Lev - added return on 13 July 2014 """ if not text or "text" not in text: return [] elif isinstance(text["text"], list): links = [] for i in range(len(text["text"])): subtext = copy.deepcopy(text) subtext["text"] = text["text"][i] single = add_links_from_text("%s:%d" % (ref, i + 1), subtext, text_id, user, **kwargs) links += single return links elif isinstance(text["text"], basestring): links = [] matches = get_refs_in_string(text["text"]) for mref in matches: link = { "refs": [ref, mref], "type": "", "auto": True, "generated_by": "add_links_from_text", "source_text_oid": text_id } try: tracker.add(user, model.Link, link, **kwargs) links += [link] except InputError as e: pass return links
def add_links_from_text(oref, lang, text, text_id, user, **kwargs): """ Scan a text for explicit references to other texts and automatically add new links between ref and the mentioned text. text["text"] may be a list of segments, an individual segment, or None. The set of no longer supported links (`existingLinks` - `found`) is deleted. If Varnish is used, all linked refs, old and new, are refreshed Returns `links` - the list of links added. """ if not text: return [] elif isinstance(text, list): subrefs = oref.subrefs(len(text)) links = [] for i in range(len(text)): single = add_links_from_text(subrefs[i], lang, text[i], text_id, user, **kwargs) links += single return links elif isinstance(text, basestring): """ Keeps three lists: * existingLinks - The links that existed before the text was rescanned * found - The links found in this scan of the text * links - The new links added in this scan of the text The set of no longer supported links (`existingLinks` - `found`) is deleted. The set of all links (`existingLinks` + `Links`) is refreshed in Varnish. """ existingLinks = LinkSet({ "refs": oref.normal(), "auto": True, "generated_by": "add_links_from_text", "source_text_oid": text_id }).array() # Added the array here to force population, so that new links don't end up in this set found = [] # The normal refs of the links found in this text links = [] # New link objects created by this processes refs = library.get_refs_in_string(text, lang) for linked_oref in refs: link = { # Note -- ref of the citing text is in the first position "refs": [oref.normal(), linked_oref.normal()], "type": "", "auto": True, "generated_by": "add_links_from_text", "source_text_oid": text_id } found += [linked_oref.normal()] # Keep this here, since tracker.add will throw an error if the link exists try: tracker.add(user, Link, link, **kwargs) links += [link] if USE_VARNISH: invalidate_ref(linked_oref) except InputError as e: pass # Remove existing links that are no longer supported by the text for exLink in existingLinks: for r in exLink.refs: if r == oref.normal(): # current base ref continue if USE_VARNISH: invalidate_ref(Ref(r)) if r not in found: tracker.delete(user, Link, exLink._id) break return links
def add_commentary_links(tref, user, **kwargs): """ Automatically add links for each comment in the commentary text denoted by 'ref'. E.g., for the ref 'Sforno on Kohelet 3:2', automatically set links for Kohelet 3:2 <-> Sforno on Kohelet 3:2:1, Kohelet 3:2 <-> Sforno on Kohelet 3:2:2, etc. for each segment of text (comment) that is in 'Sforno on Kohelet 3:2'. """ text = get_text(tref, commentary=0, context=0, pad=False) tref = model.Ref(tref).normal() book = tref[tref.find(" on ") + 4:] if len(text["sections"]) == len(text["sectionNames"]): # this is a single comment, trim the last secton number (comment) from ref book = book[0:book.rfind(":")] link = { "refs": [book, tref], "type": "commentary", "anchorText": "", "auto": True, "generated_by": "add_commentary_links" } try: tracker.add(user, model.Link, link, **kwargs) except DuplicateRecordError as e: pass elif len(text["sections"]) == (len(text["sectionNames"]) - 1): # This means that the text (and it's corresponding ref) being posted has the amount of sections like the parent text # (the text being commented on) so this is single group of comments on the lowest unit of the parent text. # and we simply iterate and create a link for each existing one to point to the same unit of parent text length = max(len(text["text"]), len(text["he"])) for i in range(length): link = { "refs": [book, tref + ":" + str(i + 1)], "type": "commentary", "anchorText": "", "auto": True, "generated_by": "add_commentary_links" } try: tracker.add(user, model.Link, link, **kwargs) except DuplicateRecordError as e: pass elif len(text["sections"]) > 0: # any other case where the posted ref sections do not match the length of the parent texts sections # this is a larger group of comments meaning it needs to be further broken down # in order to be able to match the commentary to the basic parent text units, # recur on each section length = max(len(text["text"]), len(text["he"])) for i in range(length): add_commentary_links("%s:%d" % (tref, i + 1), user) else: #This is a special case of the above, where the sections length is 0 and that means this is # a whole text that has been posted. For this we need a better way than get_text() to get the correct length of # highest order section counts. # We use the counts document for that. text_counts = counts.count_texts(tref) length = len(text_counts["counts"]) for i in range(length): add_commentary_links("%s:%d" % (tref, i+1), user)
def add_commentary_links(tref, user, **kwargs): """ Automatically add links for each comment in the commentary text denoted by 'tref'. E.g., for the ref 'Sforno on Kohelet 3:2', automatically set links for Kohelet 3:2 <-> Sforno on Kohelet 3:2:1, Kohelet 3:2 <-> Sforno on Kohelet 3:2:2, etc. for each segment of text (comment) that is in 'Sforno on Kohelet 3:2'. """ text = TextFamily(Ref(tref), commentary=0, context=0, pad=False).contents() tref = Ref(tref).normal() book = tref[tref.find(" on ") + 4:] if len(text["sections"]) == len(text["sectionNames"]): # this is a single comment, trim the last section number (comment) from ref book = book[0:book.rfind(":")] link = { "refs": [book, tref], "type": "commentary", "anchorText": "", "auto": True, "generated_by": "add_commentary_links" } try: tracker.add(user, Link, link, **kwargs) except DuplicateRecordError as e: pass elif len(text["sections"]) == (len(text["sectionNames"]) - 1): # This means that the text (and it's corresponding ref) being posted has the amount of sections like the parent text # (the text being commented on) so this is single group of comments on the lowest unit of the parent text. # and we simply iterate and create a link for each existing one to point to the same unit of parent text length = max(len(text["text"]), len(text["he"])) for i in range(length): link = { "refs": [book, tref + ":" + str(i + 1)], "type": "commentary", "anchorText": "", "auto": True, "generated_by": "add_commentary_links" } try: tracker.add(user, Link, link, **kwargs) except DuplicateRecordError as e: pass elif len(text["sections"]) > 0: # any other case where the posted ref sections do not match the length of the parent texts sections # this is a larger group of comments meaning it needs to be further broken down # in order to be able to match the commentary to the basic parent text units, # recur on each section length = max(len(text["text"]), len(text["he"])) for i in range(length): add_commentary_links("%s:%d" % (tref, i + 1), user) else: #This is a special case of the above, where the sections length is 0 and that means this is # a whole text that has been posted. For this we need a better way than get_text() to get the correct length of # highest order section counts. # We use the counts document for that. #text_counts = counts.count_texts(tref) #length = len(text_counts["counts"]) sn = StateNode(tref) length = sn.ja('all').length() for i in range(length): add_commentary_links("%s:%d" % (tref, i + 1), user)
is_new_perek = not lastrow or row[1] != lastrow[1] is_new_mesechet = not lastrow or row[0] != lastrow[0] # Add link mishnaRef = Ref("{} {}:{}-{}".format(row[0], row[1], row[2], row[3])) mishnahInTalmudRef = Ref("{} {}:{}-{}:{}".format( row[0], row[4], row[5], row[6], row[7])) print mishnaRef.normal() + " ... " + mishnahInTalmudRef.normal() if live: try: tracker.add( 28, Link, { "refs": [mishnaRef.normal(), mishnahInTalmudRef.normal()], "auto": True, "generated_by": "mishnah_map", "type": "Mishnah in Talmud" }) except DuplicateRecordError as e: print e # Try highlighting hadran. Note that the last hadran gets highlighted outside of the loop """ 13 non standard Hadrans (code is catching all of them, but they should be checked) Hadrans in parens, joined at end of other line: Sukkah 20b:29 Sukkah 29b:5
for book in books: rashi_book = "Rashi on " + book onkelos_book = "Onkelos " + book i = library.get_index(rashi_book) assert isinstance(i, CommentaryIndex) all_rashis = i.all_segment_refs() # Loop through all of the Rashis for rashi_ref in all_rashis: rashi = strip_nikkud(TextChunk(rashi_ref, "he", "On Your Way").text) # If it matches the pattern for pat in patterns: if pat in rashi: onkelos_ref = Ref(rashi_ref.section_ref().normal().replace( rashi_book, onkelos_book)) d = { "refs": [rashi_ref.normal(), onkelos_ref.normal()], "type": "reference", "auto": True, "generated_by": "Rashi - Onkelos Linker" } tracker.add(28, Link, d) print u"{}\t{}\t{}".format(rashi_ref.normal(), pat, rashi.strip()) total += 1 break print "\nLinks: {}".format(total)
books = ["Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy"] total = 0 for book in books: rashi_book = "Rashi on " + book onkelos_book = "Onkelos " + book i = library.get_index(rashi_book) assert isinstance(i, CommentaryIndex) all_rashis = i.all_segment_refs() # Loop through all of the Rashis for rashi_ref in all_rashis: rashi = strip_nikkud(TextChunk(rashi_ref, "he", "On Your Way").text) # If it matches the pattern for pat in patterns: if pat in rashi: onkelos_ref = Ref(rashi_ref.section_ref().normal().replace(rashi_book, onkelos_book)) d = { "refs": [rashi_ref.normal(), onkelos_ref.normal()], "type": "reference", "auto": True, "generated_by": "Rashi - Onkelos Linker", } tracker.add(28, Link, d) print u"{}\t{}\t{}".format(rashi_ref.normal(), pat, rashi.strip()) total += 1 break print "\nLinks: {}".format(total)
def add_commentary_links(oref, user, text=None, **kwargs): #//TODO: commentary refactor, also many other lines can be made better """ Automatically add links for each comment in the commentary text denoted by 'tref'. E.g., for the ref 'Sforno on Kohelet 3:2', automatically set links for Kohelet 3:2 <-> Sforno on Kohelet 3:2:1, Kohelet 3:2 <-> Sforno on Kohelet 3:2:2, etc. for each segment of text (comment) that is in 'Sforno on Kohelet 3:2'. """ assert oref.is_commentary() tref = oref.normal() base_tref = tref[tref.find(" on ") + 4:] base_oref = Ref(base_tref) found_links = [] # This is a special case, where the sections length is 0 and that means this is # a whole text or complex text node that has been posted. So we get each leaf node if not oref.sections: vs = StateNode(tref).versionState if not vs.is_new_state: vs.refresh() # Needed when saving multiple nodes in a complex text. This may be moderately inefficient. content_nodes = oref.index_node.get_leaf_nodes() for r in content_nodes: cn_oref = r.ref() text = TextFamily(cn_oref, commentary=0, context=0, pad=False).contents() length = cn_oref.get_state_ja().length() for i, sr in enumerate(cn_oref.subrefs(length)): stext = {"sections": sr.sections, "sectionNames": text['sectionNames'], "text": text["text"][i] if i < len(text["text"]) else "", "he": text["he"][i] if i < len(text["he"]) else "" } found_links += add_commentary_links(sr, user, stext, **kwargs) else: if not text: try: text = TextFamily(oref, commentary=0, context=0, pad=False).contents() except AssertionError: logger.warning(u"Structure node passed to add_commentary_links: {}".format(oref.normal())) return if len(text["sectionNames"]) > len(text["sections"]) > 0: # any other case where the posted ref sections do not match the length of the parent texts sections # this is a larger group of comments meaning it needs to be further broken down # in order to be able to match the commentary to the basic parent text units, # recur on each section length = max(len(text["text"]), len(text["he"])) for i,r in enumerate(oref.subrefs(length)): stext = {"sections": r.sections, "sectionNames": text['sectionNames'], "text": text["text"][i] if i < len(text["text"]) else "", "he": text["he"][i] if i < len(text["he"]) else "" } found_links += add_commentary_links(r, user, stext, **kwargs) # this is a single comment, trim the last section number (comment) from ref elif len(text["sections"]) == len(text["sectionNames"]): if len(text['he']) or len(text['text']): #only if there is actually text base_tref = base_tref[0:base_tref.rfind(":")] link = { "refs": [base_tref, tref], "type": "commentary", "anchorText": "", "auto": True, "generated_by": "add_commentary_links" } found_links += [tref] try: tracker.add(user, Link, link, **kwargs) except DuplicateRecordError as e: pass return found_links
def add_commentary_links(oref, user, **kwargs): """ Automatically add links for each comment in the commentary text denoted by 'tref'. E.g., for the ref 'Sforno on Kohelet 3:2', automatically set links for Kohelet 3:2 <-> Sforno on Kohelet 3:2:1, Kohelet 3:2 <-> Sforno on Kohelet 3:2:2, etc. for each segment of text (comment) that is in 'Sforno on Kohelet 3:2'. """ try: text = TextFamily(oref, commentary=0, context=0, pad=False).contents() except AssertionError: logger.warning(u"Structure node passed to add_commentary_links: {}".format(oref.normal())) return assert oref.is_commentary() tref = oref.normal() base_tref = tref[tref.find(" on ") + 4:] if len(text["sections"]) == len(text["sectionNames"]): # this is a single comment, trim the last section number (comment) from ref base_tref = base_tref[0:base_tref.rfind(":")] link = { "refs": [base_tref, tref], "type": "commentary", "anchorText": "", "auto": True, "generated_by": "add_commentary_links" } try: tracker.add(user, Link, link, **kwargs) except DuplicateRecordError as e: pass elif len(text["sections"]) == (len(text["sectionNames"]) - 1): # This means that the text (and it's corresponding ref) being posted has the amount of sections like the parent text # (the text being commented on) so this is single group of comments on the lowest unit of the parent text. # and we simply iterate and create a link for each existing one to point to the same unit of parent text length = max(len(text["text"]), len(text["he"])) for i in range(length): link = { "refs": [base_tref, tref + ":" + str(i + 1)], "type": "commentary", "anchorText": "", "auto": True, "generated_by": "add_commentary_links" } try: tracker.add(user, Link, link, **kwargs) except DuplicateRecordError as e: pass elif len(text["sections"]) > 0: # any other case where the posted ref sections do not match the length of the parent texts sections # this is a larger group of comments meaning it needs to be further broken down # in order to be able to match the commentary to the basic parent text units, # recur on each section length = max(len(text["text"]), len(text["he"])) for r in oref.subrefs(length): add_commentary_links(r, user, **kwargs) else: #This is a special case of the above, where the sections length is 0 and that means this is # a whole text that has been posted. For this we need a better way than get_text() to get the correct length of # highest order section counts. # We use the counts document for that. #text_counts = counts.count_texts(tref) #length = len(text_counts["counts"]) sn = StateNode(tref) if not sn.versionState.is_new_state: sn.versionState.refresh() # Needed when saving multiple nodes in a complex text. This may be moderately inefficient. sn = StateNode(tref) length = sn.ja('all').length() for r in oref.subrefs(length): add_commentary_links(r, user, **kwargs) if USE_VARNISH: invalidate_ref(oref) invalidate_ref(Ref(base_tref))
def import_from_csv(filename, action="status", category="all"): existing_titles = [] with open(filename, 'rb') as csvfile: rows = csv.reader(csvfile) header = rows.next() for text in rows: if not len(text[2]) or not len(text[9]): # Require a primary titl and something set in "ready to upload" field continue new_index = { "title": text[2].strip(), "sectionNames": [s.strip() for s in text[8].split(",")], "categories": [s.strip() for s in text[7].split(", ")], "titleVariants": [text[2].strip()] + [s.strip() for s in text[6].split(", ")], } if len(text[3]): new_index["heTitle"] = text[3].strip() if len(text[4]): new_index["transliteratedTitle"] = text[4].strip() new_index["titleVariants"] += [ new_index["transliteratedTitle"] ] new_index["titleVariants"] = [ v for v in new_index["titleVariants"] if v ] if len(text[10]): new_index["length"] = int(text[10]) if len(text[12]): # Only import the last order field for now new_index["order"] = [map(int, text[12].split(","))[-1]] existing = db.index.find_one({"titleVariants": new_index["title"]}) if action == "status": # Print information about texts listed if not existing: print "NEW - " + new_index["title"] if existing: if new_index["title"] == existing["title"]: print "EXISTING - " + new_index["title"] else: print "EXISTING (title change) - " + new_index["title"] existing_titles.append(existing["title"]) validation = texts.validate_index(new_index) if "error" in validation: print "*** %s" % validation["error"] # Add texts if their category is specified in command line if action in ("post", "update") and category: if category == "all" or category in new_index["categories"][:2]: print "Saving %s" % new_index["title"] if action == "update": # TOOD remove any fields that have empty values like [] # before updating - don't overwrite with nothing new_index.update(existing) tracker.add(1, sefaria.model.index.Index, new_index) if action == "hebrew" and existing: if "heTitle" not in existing: print "Missing Hebrew: %s" % (existing["title"]) existing_titles.append(existing["title"]) if action == "status": indexes = db.index.find() for i in indexes: if i["title"] not in existing_titles: print "NOT ON SHEET - %s" % i["title"] if action == "hebrew": indexes = db.index.find() for i in indexes: if "heTitle" not in i and i["title"] not in existing_titles: print "Still no Hebrew: %s" % i["title"] if action in ("post", "update"): summaries.update_summaries() summaries.save_toc_to_db()
def add_links_from_text(oref, lang, text, text_id, user, **kwargs): """ Scan a text for explicit references to other texts and automatically add new links between ref and the mentioned text. text["text"] may be a list of segments, an individual segment, or None. The set of no longer supported links (`existingLinks` - `found`) is deleted. If Varnish is used, all linked refs, old and new, are refreshed Returns `links` - the list of links added. """ if not text: return [] elif isinstance(text, list): subrefs = oref.subrefs(len(text)) links = [] for i in range(len(text)): single = add_links_from_text(subrefs[i], lang, text[i], text_id, user, **kwargs) links += single return links elif isinstance(text, basestring): """ Keeps three lists: * existingLinks - The links that existed before the text was rescanned * found - The links found in this scan of the text * links - The new links added in this scan of the text The set of no longer supported links (`existingLinks` - `found`) is deleted. The set of all links (`existingLinks` + `Links`) is refreshed in Varnish. """ existingLinks = LinkSet({ "refs": oref.normal(), "auto": True, "generated_by": "add_links_from_text", "source_text_oid": text_id }).array( ) # Added the array here to force population, so that new links don't end up in this set found = [] # The normal refs of the links found in this text links = [] # New link objects created by this processes refs = library.get_refs_in_string(text, lang) for linked_oref in refs: link = { # Note -- ref of the citing text is in the first position "refs": [oref.normal(), linked_oref.normal()], "type": "", "auto": True, "generated_by": "add_links_from_text", "source_text_oid": text_id } found += [ linked_oref.normal() ] # Keep this here, since tracker.add will throw an error if the link exists try: tracker.add(user, Link, link, **kwargs) links += [link] if USE_VARNISH: invalidate_ref(linked_oref) except InputError as e: pass # Remove existing links that are no longer supported by the text for exLink in existingLinks: for r in exLink.refs: if r == oref.normal(): # current base ref continue if USE_VARNISH: invalidate_ref(Ref(r)) if r not in found: tracker.delete(user, Link, exLink._id) break return links
def import_from_csv(filename, action="status", category="all"): existing_titles = [] with open(filename, 'rb') as csvfile: rows = csv.reader(csvfile) header = rows.next() for text in rows: if not len(text[2]) or not len(text[9]): # Require a primary titl and something set in "ready to upload" field continue new_index = { "title": text[2].strip(), "sectionNames": [s.strip() for s in text[8].split(",")], "categories": [s.strip() for s in text[7].split(", ")], "titleVariants": [text[2].strip()] + [s.strip() for s in text[6].split(", ")], } if len(text[3]): new_index["heTitle"] = text[3].strip() if len(text[4]): new_index["transliteratedTitle"] = text[4].strip() new_index["titleVariants"] += [new_index["transliteratedTitle"]] new_index["titleVariants"] = [v for v in new_index["titleVariants"] if v] if len(text[10]): new_index["length"] = int(text[10]) if len(text[12]): # Only import the last order field for now new_index["order"] = [map(int, text[12].split(","))[-1]] existing = db.index.find_one({"titleVariants": new_index["title"]}) if action == "status": # Print information about texts listed if not existing: print "NEW - " + new_index["title"] if existing: if new_index["title"] == existing["title"]: print "EXISTING - " + new_index["title"] else: print "EXISTING (title change) - " + new_index["title"] existing_titles.append(existing["title"]) validation = texts.validate_index(new_index) if "error" in validation: print "*** %s" % validation["error"] # Add texts if their category is specified in command line if action in ("post", "update") and category: if category == "all" or category in new_index["categories"][:2]: print "Saving %s" % new_index["title"] if action == "update": # TOOD remove any fields that have empty values like [] # before updating - don't overwrite with nothing new_index.update(existing) tracker.add(1, sefaria.model.index.Index, new_index) if action == "hebrew" and existing: if "heTitle" not in existing: print "Missing Hebrew: %s" % (existing["title"]) existing_titles.append(existing["title"]) if action == "status": indexes = db.index.find() for i in indexes: if i["title"] not in existing_titles: print "NOT ON SHEET - %s" % i["title"] if action == "hebrew": indexes = db.index.find() for i in indexes: if "heTitle" not in i and i["title"] not in existing_titles: print "Still no Hebrew: %s" % i["title"] if action in ("post", "update"): from sefaria.model import library library.rebuild_toc()
def add_commentary_links(oref, user, text=None, **kwargs): #//TODO: commentary refactor, also many other lines can be made better """ Automatically add links for each comment in the commentary text denoted by 'tref'. E.g., for the ref 'Sforno on Kohelet 3:2', automatically set links for Kohelet 3:2 <-> Sforno on Kohelet 3:2:1, Kohelet 3:2 <-> Sforno on Kohelet 3:2:2, etc. for each segment of text (comment) that is in 'Sforno on Kohelet 3:2'. """ assert oref.is_commentary() tref = oref.normal() base_tref = tref[tref.find(" on ") + 4:] base_oref = Ref(base_tref) found_links = [] # This is a special case, where the sections length is 0 and that means this is # a whole text or complex text node that has been posted. So we get each leaf node if not oref.sections: vs = StateNode(tref).versionState if not vs.is_new_state: vs.refresh( ) # Needed when saving multiple nodes in a complex text. This may be moderately inefficient. content_nodes = oref.index_node.get_leaf_nodes() for r in content_nodes: cn_oref = r.ref() text = TextFamily(cn_oref, commentary=0, context=0, pad=False).contents() length = cn_oref.get_state_ja().length() for i, sr in enumerate(cn_oref.subrefs(length)): stext = { "sections": sr.sections, "sectionNames": text['sectionNames'], "text": text["text"][i] if i < len(text["text"]) else "", "he": text["he"][i] if i < len(text["he"]) else "" } found_links += add_commentary_links(sr, user, stext, **kwargs) else: if not text: try: text = TextFamily(oref, commentary=0, context=0, pad=False).contents() except AssertionError: logger.warning( u"Structure node passed to add_commentary_links: {}". format(oref.normal())) return if len(text["sectionNames"]) > len(text["sections"]) > 0: # any other case where the posted ref sections do not match the length of the parent texts sections # this is a larger group of comments meaning it needs to be further broken down # in order to be able to match the commentary to the basic parent text units, # recur on each section length = max(len(text["text"]), len(text["he"])) for i, r in enumerate(oref.subrefs(length)): stext = { "sections": r.sections, "sectionNames": text['sectionNames'], "text": text["text"][i] if i < len(text["text"]) else "", "he": text["he"][i] if i < len(text["he"]) else "" } found_links += add_commentary_links(r, user, stext, **kwargs) # this is a single comment, trim the last section number (comment) from ref elif len(text["sections"]) == len(text["sectionNames"]): if len(text['he']) or len( text['text']): #only if there is actually text base_tref = base_tref[0:base_tref.rfind(":")] link = { "refs": [base_tref, tref], "type": "commentary", "anchorText": "", "auto": True, "generated_by": "add_commentary_links" } found_links += [tref] try: tracker.add(user, Link, link, **kwargs) except DuplicateRecordError as e: pass return found_links
next(csvfile) for row in csv.reader(csvfile): is_new_perek = not lastrow or row[1] != lastrow[1] is_new_mesechet = not lastrow or row[0] != lastrow[0] # Add link mishnaRef = Ref("{} {}:{}-{}".format(row[0], row[1], row[2], row[3])) mishnahInTalmudRef = Ref("{} {}:{}-{}:{}".format(row[0], row[4], row[5], row[6], row[7])) print mishnaRef.normal() + " ... " + mishnahInTalmudRef.normal() if live: try: tracker.add(28, Link, { "refs": [mishnaRef.normal(), mishnahInTalmudRef.normal()], "auto": True, "generated_by": "mishnah_map", "type": "Mishnah in Talmud" }) except DuplicateRecordError as e: print e # Try highlighting hadran. Note that the last hadran gets highlighted outside of the loop """ 13 non standard Hadrans (code is catching all of them, but they should be checked) Hadrans in parens, joined at end of other line: Sukkah 20b:29 Sukkah 29b:5 In parens on it's own line: Sukkah 42b:12