def _clean_sheets(self): def rewrite_source(source): needs_save = False if "ref" in source: try: ref = Ref(source["ref"]) except InputError as e: print "Error: In _clean_sheets.rewrite_source: failed to instantiate Ref {}".format(source["ref"]) else: if self._needs_rewrite(ref, ref.is_commentary()): if self._report: print "Sheet refs - rewriting {} to {}".format(ref.normal(), self._rewrite(ref, ref.is_commentary()).normal()) needs_save = True source["ref"] = self._rewrite(ref, ref.is_commentary()).normal() if "subsources" in source: for subsource in source["subsources"]: needs_save = rewrite_source(subsource) or needs_save return needs_save for sid in self._sheets_to_update: needs_save = False sheet = db.sheets.find_one({"id": sid}) if not sheet: print "Likely error - can't load sheet {}".format(sid) for source in sheet["sources"]: if rewrite_source(source): needs_save = True if needs_save: if self._report: print "Saving modified sheet #{}".format(sheet["id"]) if self._save: sheet["lastModified"] = sheet["dateModified"] save_sheet(sheet, sheet["owner"])
def clean_sheets(sheets_to_update): def rewrite_source(source): requires_save = False if "ref" in source: try: ref = Ref(source["ref"]) except (InputError, ValueError): print "Error: In clean_sheets.rewrite_source: failed to instantiate Ref {}".format( source["ref"]) else: if needs_rewrite(source['ref']): requires_save = True source["ref"] = rewriter(source['ref']) if "subsources" in source: for subsource in source["subsources"]: requires_save = rewrite_source(subsource) or requires_save return requires_save for sid in sheets_to_update: needs_save = False sheet = db.sheets.find_one({"id": sid}) if not sheet: print "Likely error - can't load sheet {}".format(sid) for source in sheet["sources"]: if rewrite_source(source): needs_save = True if needs_save: sheet["lastModified"] = sheet["dateModified"] save_sheet(sheet, sheet["owner"], search_override=True)
def clean_sheets(sheets_to_update): def rewrite_source(source): requires_save = False if "ref" in source: try: ref = Ref(source["ref"]) except (InputError, ValueError): print "Error: In clean_sheets.rewrite_source: failed to instantiate Ref {}".format(source["ref"]) else: if needs_rewrite(source['ref']): requires_save = True source["ref"] = rewriter(source['ref']) if "subsources" in source: for subsource in source["subsources"]: requires_save = rewrite_source(subsource) or requires_save return requires_save for sid in sheets_to_update: needs_save = False sheet = db.sheets.find_one({"id": sid}) if not sheet: print "Likely error - can't load sheet {}".format(sid) for source in sheet["sources"]: if rewrite_source(source): needs_save = True if needs_save: sheet["lastModified"] = sheet["dateModified"] save_sheet(sheet, sheet["owner"], search_override=True)
def save_top_sources_by_tag(self): sheet = { "title": "Top Sources by Tag - %s" % datetime.now().strftime("%B %Y"), "sources": [{"ref": self.sorted_refs_by_tag[tag["tag"]][0][0], "title": tag["tag"]} for tag in self.top_tags[:50]], "options": {"numbered": 0, "divineNames": "noSub"} } save_sheet(sheet, 1)
def save_top_sources_by_category(self): sheet = { "title": "Top Sources by Category - %s" % datetime.now().strftime("%B %Y"), "sources": [{"ref": self.sorted_refs_by_category[cat][0][0], "title": cat} for cat in self.sorted_refs_by_category], "options": {"numbered": 0, "divineNames": "noSub"} } save_sheet(sheet, 1)
def save_top_sources_sheet(self): sheet = { "title": "Top Sources in All Source Sheets - %s" % datetime.now().strftime("%B %Y"), "sources": [{"ref": ref[0]} for ref in self.sorted_refs[:self.show_count]], "options": {"numbered": 1, "divineNames": "noSub"} } save_sheet(sheet, 1)
def _clean_sheets(self): def rewrite_source(source): needs_save = False if "ref" in source: try: ref = Ref(source["ref"]) except InputError as e: print "Error: In _clean_sheets.rewrite_source: failed to instantiate Ref {}".format( source["ref"]) else: if self._needs_rewrite(ref, ref.is_commentary()): if self._report: print "Sheet refs - rewriting {} to {}".format( ref.normal(), self._rewrite(ref, ref.is_commentary()).normal()) needs_save = True source["ref"] = self._rewrite( ref, ref.is_commentary()).normal() if "subsources" in source: for subsource in source["subsources"]: needs_save = rewrite_source(subsource) or needs_save return needs_save for sid in self._sheets_to_update: needs_save = False sheet = db.sheets.find_one({"id": sid}) if not sheet: print "Likely error - can't load sheet {}".format(sid) for source in sheet["sources"]: if rewrite_source(source): needs_save = True if needs_save: if self._report: print "Saving modified sheet #{}".format(sheet["id"]) if self._save: sheet["lastModified"] = sheet["dateModified"] save_sheet(sheet, sheet["owner"])
def save_top_for_category(self, cat, collapse=False): top_books_list = [] for book in self.sorted_books: idx = library.get_index(book[0]) if idx.categories[0] == cat and "Commentary" not in idx.categories: top_books_list.append("{} ({:,})".format(book[0], book[1])) top_books = "<ol><li>" + "</li><li>".join(top_books_list[:10]) + "</li></ol>" sources = [{"comment": "Most frequently used tractates (full list below):<br>%s" % top_books}] refs = self.sorted_refs_by_category[cat][:50] refs = self.collapse_ref_counts(refs)[:20] if collapse else refs[:20] sources += [{"ref": ref[0]} for ref in refs] all_top_books = "<ol><li>" + "</li><li>".join(top_books_list) + "</li></ol>" sources += [{"comment": "Most frequently used tractates: %s" % all_top_books}] sheet = { "title": "Top Sources in %s - %s" % (cat, datetime.now().strftime("%B %Y")), "sources": sources, "options": {"numbered": 1, "divineNames": "noSub"} } save_sheet(sheet, 1)
def clean_sheets(sheets_to_update): def rewrite_source(source): requires_save = False if "ref" in source: original_tref = source["ref"] try: rewrite = needs_rewrite(source["ref"]) except (InputError, ValueError) as e: print('needs_rewrite method threw exception:', source["ref"], e) rewrite = False if rewrite: requires_save = True try: source["ref"] = rewriter(source['ref']) except (InputError, ValueError) as e: print('rewriter threw exception:', source["ref"], e) if source["ref"] != original_tref and not Ref.is_ref( source["ref"]): print('rewiter created an invalid Ref:', source["ref"]) if "subsources" in source: for subsource in source["subsources"]: requires_save = rewrite_source(subsource) or requires_save return requires_save for sid in sheets_to_update: needs_save = False sheet = db.sheets.find_one({"id": sid}) if not sheet: print("Likely error - can't load sheet {}".format(sid)) for source in sheet["sources"]: if rewrite_source(source): needs_save = True if needs_save: sheet["lastModified"] = sheet["dateModified"] save_sheet(sheet, sheet["owner"], search_override=True)
print "\n******* Top Categories ********\n" for item in sorted_categories[:show_count]: print "%s: %d" % (item[0], item[1]) print "\n******* Top Untranslated Sources ********\n" for item in sorted_untrans_refs[:show_count]: print "%s: %d" % (item[0], item[1]) print "\n******* Top Untranslated Texts ********\n" for item in sorted_untrans_texts[:show_count]: print "%s: %d" % (item[0], item[1]) print "\n******* Top Untranslated Categories ********\n" for item in sorted_untrans_categories[:show_count]: print "%s: %d" % (item[0], item[1]) print "\n******* Top Fragments ********\n" for item in sorted_fragments[:show_count]: print "%s: %d" % (item[0], len(item[1])) if action == "savesheet" or "both": sheet = { "title": "Top Sources in All Source Sheets", "sources": [{"ref": ref[0]} for ref in sorted_refs[:show_count]], "options": {"numbered": 1, "divineNames": "noSub"} } save_sheet(sheet, 1)
print "%s: %d" % (item[0], item[1]) print "\n******* Top Untranslated Sources ********\n" for item in sorted_untrans_refs[:show_count]: print "%s: %d" % (item[0], item[1]) print "\n******* Top Untranslated Texts ********\n" for item in sorted_untrans_texts[:show_count]: print "%s: %d" % (item[0], item[1]) print "\n******* Top Untranslated Categories ********\n" for item in sorted_untrans_categories[:show_count]: print "%s: %d" % (item[0], item[1]) print "\n******* Top Fragments ********\n" for item in sorted_fragments[:show_count]: print "%s: %d" % (item[0], len(item[1])) if action == "savesheet": sheet = { "title": "Top Sources in All Source Sheets", "sources": [{ "ref": ref[0] } for ref in sorted_refs[:show_count]], "options": { "numbered": 1, "divineNames": "noSub" } } save_sheet(sheet, 1)
def save(self): sheet = save_sheet(self.sheet, self.sheet["owner"]) self.id = sheet["id"] self.post_save() print("Saved sheet {}".format(sheet["id"]))
""" uid = 15872 # Mike Feuer live = True sheets = [] current_doc_number = None current_sheet = None with open("Jerusalem Anthology - Sheet1.tsv") as tsv: next(tsv) next(tsv) for l in csv.reader(tsv, dialect="excel-tab"): if l[8] != current_doc_number: current_doc_number = l[8] if current_sheet and live: sheets.append(save_sheet(current_sheet, uid)) current_sheet = { "title": "Jerusalem Anthology - {}".format(l[9]), "sources": [], "status": "public", "options": {"numbered": 0, "divineNames": "noSub"}, "generatedBy": "Sefaria Jerusalem Anthology", "promptedToPublish": datetime.now().isoformat(), "tags": ["Jerusalem"] } try: ref = Ref(l[14]) if u"Tanach" in ref.index.categories: en = TextChunk(ref, "en").ja().flatten_to_string() else:
def setup_module(): print('Creating Dummy Index') # ensure dummy index was properly deleted index = Index().load({'title': 'Delete Me'}) if index is not None: ls = LinkSet(Ref("Delete Me")) ls.delete() ns = NoteSet({"ref": {"$regex": "Delete Me.*"}}) ns.delete() index.delete() # Build an index with some nodes root = SchemaNode() root.add_title('Delete Me', 'en', primary=True) root.add_title('תמחק אותי', 'he', primary=True) root.key = 'Delete Me' part1 = JaggedArrayNode() part1.add_title('Part1', 'en', primary=True) part1.add_title("Partone", 'en') part1.add_title('חלק 1', 'he', primary=True) part1.sectionNames = ['Chapter', 'Verse'] part1.addressTypes = ['Integer', 'Integer'] part1.depth = 2 part1.key = 'Part1' root.append(part1) part2 = JaggedArrayNode() part2.add_title('Part2', 'en', primary=True) part2.add_title('חלק 2', 'he', primary=True) part2.sectionNames = ['Section', 'Segment'] part2.addressTypes = ['Integer', 'Integer'] part2.depth = 2 part2.key = 'Part2' root.append(part2) root.validate() alt = ArrayMapNode() alt.depth = 0 alt.wholeRef = 'Delete Me, Part1 1:2-3:1' alt.add_title('Something', 'en', True) alt.add_title('משהו', 'he', True) index = Index({ 'schema': root.serialize(), 'title': 'Delete Me', 'categories': ['Tanakh'], 'alt_structs': { 'alt': alt.serialize() } }) index.save() # add some text v = Version({ "language": "en", "title": "Delete Me", "versionSource": "http://foobar.com", "versionTitle": "Schema Test", "chapter": root.create_skeleton() }).save() # an empty version v = Version({ "language": "en", "title": "Delete Me", "versionSource": "http://foobar.com", "versionTitle": "Schema Test Blank", "chapter": root.create_skeleton() }).save() p1 = [['Part1 part1', 'Part1'], ['Part1'], ['Part1', '', 'part1']] chunk = TextChunk(Ref('Delete Me, Part1'), 'en', 'Schema Test') chunk.text = p1 chunk.save() p2 = [['Part2 part2', 'Part2'], ['Part2'], ['Part2', '', 'part2']] chunk = TextChunk(Ref('Delete Me, Part2'), 'en', 'Schema Test') chunk.text = p2 chunk.save() # add some links Link({ 'refs': ['Delete Me, Part1 1:1', 'Shabbat 2a:5'], 'type': 'None' }).save() Link({ 'refs': ['Delete Me, Part1 2:1', 'Delete Me, Part2 2:1'], 'type': 'None' }).save() Link({ 'refs': ['Delete Me, Part1 3', 'Shabbat 2a:5'], 'type': 'None' }).save() Link({ 'refs': ['Delete Me, Part2 1:1', 'Shabbat 2a:5'], 'type': 'None' }).save() Link({ 'refs': ['Delete Me, Part2 3', 'Shabbat 2a:5'], 'type': 'None' }).save() # add a note Note({ 'owner': 23432, 'public': False, 'text': 'Some very important text', 'type': 'note', 'ref': 'Delete Me, Part1 1:1' }).save() VersionState("Delete Me").refresh() sheet = save_sheet( create_test_sheet( ['Delete Me, Part1 1:1', 'MigrateBook 1:1', 'MigrateBook 4:1']), 1) global TEST_SHEET_ID TEST_SHEET_ID = sheet['id'] # set up the simple text create_simple_text() print('End of test setup')
""" uid = 15872 # Mike Feuer live = True sheets = [] current_doc_number = None current_sheet = None with open("Jerusalem Anthology - Sheet1.tsv") as tsv: next(tsv) next(tsv) for l in csv.reader(tsv, dialect="excel-tab"): if l[8] != current_doc_number: current_doc_number = l[8] if current_sheet and live: sheets.append(save_sheet(current_sheet, uid)) current_sheet = { "title": "Jerusalem Anthology - {}".format(l[9]), "sources": [], "status": "public", "options": { "numbered": 0, "divineNames": "noSub" }, "generatedBy": "Sefaria Jerusalem Anthology", "promptedToPublish": datetime.now().isoformat(), "tags": ["Jerusalem"] } try: ref = Ref(l[14])