def set_titles_from_spreadsheet(dir_path, dry_run=False): MdFile.fix_titles( md_files=mahaabhaarata.get_adhyaaya_md_files(dir_path), spreadhsheet_id="1sNH1AWhhoa5VATqMdLbF652s7srTG0Raa6K-sCwDR-8", worksheet_name="कुम्भकोणाध्यायाः", id_column="क्रमाङ्कम्", title_column="अन्तिमशीर्षिका", md_file_to_id=mahaabhaarata.get_adhyaaya_id, dry_run=dry_run ) MdFile.devanaagarify_titles(md_files=mahaabhaarata.get_adhyaaya_md_files(dir_path), dry_run=dry_run)
def dump_text(base_dir): unit_info_file = os.path.join(os.path.dirname(text_data.__file__), "vedaH/vAjasaneyi/samhitA.json") for kaanda_index in text_data.get_subunit_list(json_file=unit_info_file, unit_path_list=[]): logging.info("adhyAya %d", kaanda_index) outfile_path = os.path.join(base_dir, "%02d.md" % (kaanda_index)) if os.path.exists(outfile_path): logging.info("Skipping " + outfile_path) continue url = "http://vedicheritage.gov.in/samhitas/yajurveda/shukla-yajurveda/vajasaneyi-kanva-samhita-chapter-%02d/" % ( kaanda_index) logging.info("url %s to %s", url, outfile_path) browser.get(url=url) try: text = browser.find_element_by_id("videotext").text text = text.replace("\n", " \n") title = "%02d" % kaanda_index title = sanscript.transliterate(title, sanscript.HK, sanscript.DEVANAGARI) md_file = MdFile(file_path=outfile_path) md_file.dump_to_file(metadata={"title": title}, md=text, dry_run=False) except NoSuchElementException: logging.warning("Page missing! %s ", url)
def dump_devanaagarii(source_html, dest_file): if os.path.exists(dest_file): logging.warning("Skipping %s as it exists", dest_file) return logging.info("Processing %s to %s", source_html, dest_file) with codecs.open(source_html, "r", 'utf-8') as file_in: contents = file_in.read() soup = BeautifulSoup(contents, 'lxml') metadata = {} metadata["title"] = soup.title.text.strip() lines = soup.text.split("\n") english_lines = itertools.takewhile( lambda x: x.strip() != "http://gretil.sub.uni-goettingen.de/gretil.htm", lines) intro = "\n\n## Intro\n%s" % (" \n".join(english_lines)) iast_lines = itertools.dropwhile( lambda x: x.strip() != "http://gretil.sub.uni-goettingen.de/gretil.htm", lines) text = " \n".join(list(iast_lines)[1:]) text = regex.sub("( \n){3,}", "\n\n", text) text = sanscript.transliterate(data=text, _from=sanscript.IAST, _to=sanscript.DEVANAGARI) text = "%s\n\n## पाठः\n%s" % (intro, text) out_file = MdFile(file_path=dest_file, frontmatter_type="toml") out_file.dump_to_file(metadata=metadata, md=text, dry_run=False)
def dump_text(base_dir): unit_info_file = os.path.join(os.path.dirname(text_data.__file__), "vedaH/shaunaka/samhitA.json") for kaanda_index in text_data.get_subunit_list(json_file=unit_info_file, unit_path_list=[]): subunit_list = text_data.get_subunit_list( json_file=unit_info_file, unit_path_list=[kaanda_index]) for subunit_index in subunit_list: logging.info("kaanDa %d adhyaaya %d", kaanda_index, subunit_index) outfile_path = os.path.join(base_dir, "%02d" % (kaanda_index), "%03d.md" % subunit_index) if os.path.exists(outfile_path): logging.info("Skipping " + outfile_path) continue url = "http://vedicheritage.gov.in/samhitas/atharvaveda-samhitas/shaunaka-samhita/kanda-%02d-sukta-%03d/" % ( kaanda_index, subunit_index) logging.info("url %s to %s", url, outfile_path) browser.get(url=url) text = browser.find_element_by_id("videotext").text text = text.replace("\n", " \n") title_tags = browser.find_elements_by_css_selector( "#videotext strong") title = "%03d" % subunit_index if len(title_tags) > 0: title = "%03d %s" % (subunit_index, title_tags[0].text) title = sanscript.transliterate(title, sanscript.HK, sanscript.DEVANAGARI) md_file = MdFile(file_path=outfile_path) md_file.dump_to_file(metadata={"title": title}, md=text, dry_run=False)
def dump_markdown(src_file, dest_file): logging.info("Processing %s to %s", src_file, dest_file) metadata = get_metadata(src_file=src_file) text = get_text(src_file=src_file) metadata["title"] = sanscript.transliterate(data=metadata["itxtitle"], _from=sanscript.OPTITRANS, _to=sanscript.DEVANAGARI) md_file = MdFile(file_path=dest_file, frontmatter_type=MdFile.TOML) md_file.dump_to_file(metadata=metadata, md=text, dry_run=False)
def fix_paths(dir_path, dry_run=False): files = glob.glob(os.path.join(dir_path, '**/2*.md'), recursive=True) for file_path in files: base_name = os.path.basename(file_path) year_str = base_name.split("-")[0] month_str = base_name.split("-")[1] dest_path = os.path.join(dir_path, year_str, month_str, base_name) logging.info("Move %s to %s", file_path, dest_path) if not dry_run: os.makedirs(name=os.path.dirname(dest_path), exist_ok=True) shutil.move(src=file_path, dst=dest_path) MdFile.fix_index_files(dir_path=dir_path, dry_run=dry_run)
def dump_all_texts(dest_dir, overwrite=False): soup = scraping.get_soup(url="https://adishila.com/unicodetxt-htm/") links = soup.select("div.wp-block-group a") for link in links: (title, text) = get_text(link["href"]) filename = file_helper.clean_file_path("%s.md" % title) dest_path = os.path.join(dest_dir, filename) if not overwrite and os.path.exists(dest_path): logging.warning("Skipping %s since it exists", dest_path) continue logging.info("Getting %s", link["href"]) md_file = MdFile(file_path=dest_path, frontmatter_type=MdFile.TOML) md_file.dump_to_file(metadata={"title": title}, md=text, dry_run=False)
def get_adhyaaya_md_files(md_file_path): md_files = MdFile.get_md_files_from_path( dir_path=md_file_path, file_pattern="**/*.md", file_name_filter=lambda x: len( regex.findall("\\d\\d\\d", os.path.basename(x))) > 0) return md_files
def scrape_post_markdown(url, dir_path, dry_run): #construct file_name from the posts url parsed_url = urlsplit(url=url) file_name = (parsed_url.path).strip() #remove slashes, replace with dashes when dealing with urls like https://manasataramgini.wordpress.com/2020/06/08/pandemic-days-the-fizz-is-out-of-the-bottle/ file_name = regex.sub("/(....)/(..)/(..)/(.+)/", r"\1-\2-\3_\4.md", file_name) file_path = file_helper.clean_file_path(file_path=os.path.join(dir_path, file_name)) if os.path.exists(file_path): logging.warning("Skipping %s : exists", file_name) return (title, post_html) = get_post_html(url=url) logging.info("Dumping %s to %s with title %s.", url, file_path, title) md_file = MdFile(file_path=file_path, frontmatter_type=MdFile.TOML) md_file.import_content_with_pandoc(metadata={"title": title}, content=post_html, source_format="html", dry_run=dry_run)
def dump_text(start_url, out_path, base_url="https://sa.wikisource.org/", transliteration_source=None, dry_run=False): next_url_getter = lambda soup: souper.next_url_from_soup_css( soup=soup, css="div.gen_header_forelink a", base_url=base_url) def html_fixer(soup): souper.tag_replacer(soup=soup, css_selector="big", tag_name="h2") souper.tag_replacer(soup=soup, css_selector="table", tag_name="div") souper.tag_replacer(soup=soup, css_selector="tbody", tag_name="div") souper.tag_replacer(soup=soup, css_selector="span[style*=\"font-weight:bold;\"]", tag_name="b") souper.tag_remover(soup=soup, css_selector=".noprint") def title_maker(soup, title_prefix): title = souper.title_from_element(soup=soup, title_css_selector="h1", title_prefix=title_prefix) title = regex.sub(" .+/", " ", title).strip() return title dumper = lambda url, outfile_path, title_prefix, dry_run: souper.dump_text_from_element( url=url, outfile_path=outfile_path, text_css_selector="div.mw-parser-output", title_maker=title_maker, title_prefix=title_prefix, html_fixer=html_fixer, dry_run=dry_run) souper.dump_series(start_url=start_url, out_path=out_path, dumper=dumper, next_url_getter=next_url_getter, dry_run=dry_run) MdFile.set_filenames_from_titles( dir_path=out_path, dry_run=dry_run, transliteration_source=transliteration_source) MdFile.fix_index_files(dir_path=out_path, dry_run=dry_run, transliteration_target=transliteration_source)
def process_catalog_page_selenium(url, out_dir): logging.info("Processing catalog %s", url) browser.get(url=url) text_links = browser.find_elements_by_link_text("View in Unicode transliteration") if len(text_links) == 0: logging.warning("%s does not have text", url) return catalog_body = browser.find_element_by_css_selector(".catalog_record_body") metadata = get_front_matter(catalog_body.get_attribute('innerHTML')) logging.info(metadata) dest_file_path = get_file_path(out_dir=out_dir, title_iast=metadata["title_iast"], author_iast=metadata.get("author_iast", ""), catalog_number=metadata.get("Catalog number", "")) if os.path.exists(dest_file_path): logging.warning("Skipping %s - already exists.", dest_file_path) text_url = text_links[0].get_attribute("href") file = MdFile(file_path=dest_file_path, frontmatter_type="toml") text = get_text(url=text_url) text = text.replace("\n", " \n") file.dump_to_file(metadata=metadata, md=text, dry_run=False)
def transform(): json_paths = glob.glob( "/home/vvasuki/sanskrit/raw_etexts/veda/Rg/shakala/saMhitA/sAyaNabhAShyam/*/*/*.json", recursive=True) suukta_id_to_md = {} for json_path in sorted(json_paths): with codecs.open(json_path, "r") as fp: rk = json.load(fp) suukta_id = "%02d/%03d" % (int(rk["classification"]["mandala"]), int(rk["classification"]["sukta"])) suukta_md = suukta_id_to_md.get(suukta_id, "") bhaashya = regex.sub("<.+?>", "", rk["sayanaBhashya"]) rk_number = sanscript.transliterate( "%02d" % int(rk["classification"]["rik"]), sanscript.IAST, sanscript.DEVANAGARI) attribute_str = "%s। %s। %s।" % (rk["attribute"]["devata"], rk["attribute"]["rishi"], rk["attribute"]["chandas"]) padapaatha_lines = rk["padapaatha"]["lines"] if isinstance(padapaatha_lines, str): padapaatha_lines = [padapaatha_lines] samhita_lines = rk["samhitaAux"]["lines"] if isinstance(samhita_lines, str): samhita_lines = [samhita_lines] rk_md = "%s\n\n%s %s॥\n\n%s\n\n%s" % ( attribute_str, " \n".join(samhita_lines), rk_number, " \n".join(padapaatha_lines), bhaashya) suukta_md += "\n\n%s" % rk_md if bhaashya == "": logging.warning("No bhAShya for %s", rk["id"]) suukta_id_to_md[suukta_id] = suukta_md for suukta_id in suukta_id_to_md.keys(): dest_path = os.path.join(dest_dir, suukta_id + ".md") md_file = MdFile(file_path=dest_path) title = sanscript.transliterate( suukta_id.split("/")[-1], sanscript.IAST, sanscript.DEVANAGARI) md_file.dump_to_file(metadata={"title": title}, md=suukta_id_to_md[suukta_id], dry_run=False)
def test_panchanga_chennai_2019(): panchaanga_2019 = Panchaanga.read_from_file( filename=os.path.join(TEST_DATA_PATH, 'Chennai-2019.json')) # We dump to md.txt rather than md to avoid slow checks on intellij ide. orig_md_file = os.path.join(TEST_DATA_PATH, 'Chennai-2019-devanagari.md.txt') current_md_output = os.path.join(TEST_DATA_PATH, 'Chennai-2019-devanagari.md.txt.local') md_file = MdFile(file_path=current_md_output) md_file.dump_to_file(metadata={"title": str(2019)}, md=md.make_md(panchaanga=panchaanga_2019), dry_run=False) if not os.path.exists(orig_md_file): logging.warning( "%s not present. Assuming that it was deliberately deleted to update test files.", orig_md_file) md_file = MdFile(file_path=orig_md_file) md_file.dump_to_file(metadata={"title": str(2019)}, md=md.make_md(panchaanga=panchaanga_2019), dry_run=False) with open(orig_md_file) as orig_tex: with open(current_md_output) as current_tex: assert current_tex.read() == orig_tex.read()
def dump_summary(year, city, script=xsanscript.DEVANAGARI, computation_system=ComputationSystem.MULTI_NEW_MOON_SIDEREAL_MONTH_ADHIKA__CHITRA_180): year_type = era.ERA_GREGORIAN logging.info("Generating summary panchaanga for %s year %d (%s), with computation system %s ", city.name, year, year_type, str(computation_system)) panchaanga = annual.get_panchaanga_for_year(city=city, year=year, computation_system=computation_system, year_type=year_type, allow_precomputed=True) year_table = to_table_dict(panchaanga=panchaanga ) out_path = get_canonical_path(city=panchaanga.city.name, computation_system_str=str(panchaanga.computation_system), year=year, year_type=year_type) os.makedirs(os.path.dirname(out_path), exist_ok=True) with codecs.open(out_path + ".toml", "w") as fp: toml.dump(year_table, fp) MdFile.fix_index_files(dir_path=output_dir, transliteration_target=None, dry_run=False) computation_params = get_computation_parameters_md(panchaanga=panchaanga, scripts=[script]) out_path_md = out_path + "_summary.md" md = """## Intro\n%s\n\n## Table <div class="spreadsheet" src="../%s.toml" fullHeightWithRowsPerScreen=4> </div>""" % (computation_params, str(year)) md_file = MdFile(file_path=out_path_md) md_file.dump_to_file(metadata={"title": "%d Summary" % (year)}, md=md, dry_run=False)
from doc_curation.md_helper import MdFile # Remove all handlers associated with the root logger object. for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) logging.basicConfig( level=logging.DEBUG, format="%(levelname)s:%(asctime)s:%(module)s:%(lineno)d %(message)s") def set_titles_from_spreadsheet(dir_path, dry_run=False): MdFile.fix_titles( md_files=mahaabhaarata.get_adhyaaya_md_files(dir_path), spreadhsheet_id="1sNH1AWhhoa5VATqMdLbF652s7srTG0Raa6K-sCwDR-8", worksheet_name="कुम्भकोणाध्यायाः", id_column="क्रमाङ्कम्", title_column="अन्तिमशीर्षिका", md_file_to_id=mahaabhaarata.get_adhyaaya_id, dry_run=dry_run ) MdFile.devanaagarify_titles(md_files=mahaabhaarata.get_adhyaaya_md_files(dir_path), dry_run=dry_run) def get_upaakhyaana_and_titles_from_path(dir_path, file_pattern="**/*.md"): md_files = MdFile.get_md_files_from_path(dir_path=dir_path, file_pattern=file_pattern) titles = [md_file.get_title() for md_file in md_files] upaakhyaanas = [md_file.get_upaakhyaana() for md_file in md_files] for row in zip(upaakhyaanas, titles): print ("\t".join([str(i) for i in row])) dir_path = "/home/vvasuki/vvasuki-git/kAvya/content/TIkA/padyam/purANam/mahAbhAratam/03-vana-parva/" # set_titles_from_filenames(dir_path=dir_path, dry_run=True) # get_upaakhyaana_and_titles_from_path(dir_path=dir_path) MdFile.fix_index_files(dir_path=dir_path, dry_run=False) # set_titles_from_spreadsheet(dir_path=dir_path, dry_run=False)
# MdFile.fix_index_files(dir_path="/home/vvasuki/vvasuki-git/jyotiSham/content/history/equinoctial_records.md", dry_run=False, transliteration_target=None) # # MdFile.apply_function(dir_path="/home/vvasuki/sanskrit/raw_etexts/veda/atharva/paippalAda/", fn=MdFile.set_title_from_filename, transliteration_target=sanscript.DEVANAGARI, dry_run=False) # MdFile.apply_function(dir_path="/home/vvasuki/vvasuki-git/pALi/content", fn=MdFile.ensure_ordinal_in_title, transliteration_target=sanscript.DEVANAGARI, dry_run=False) # MdFile.set_titles_from_filenames(dir_path="/home/vvasuki/vvasuki-git/notes-hugo/content/history/history_of_the_indian_people", transliteration_target=None, dry_run=False) # MdFile.apply_function(fn=MdFile.prepend_file_index_to_title, dir_path="/home/vvasuki/hindutva/hindutva-hugo/content/main/books/vivekAnanda", dry_run=False) # doc_curation.clear_bad_chars(file_path="/home/vvasuki/sanskrit/raw_etexts/mImAMsA/mImAMsA-naya-manjarI.md", dry_run=False) MdFile.apply_function( fn=MdFile.split_to_bits, dir_path= "/home/vvasuki/vvasuki-git/notes-hugo/content/skills/clothing/extremities.md", frontmatter_type=MdFile.TOML, dry_run=False, source_script=sanscript.DEVANAGARI, indexed_title_pattern=None) # MdFile.apply_function(fn=MdFile.split_to_bits, dir_path="/home/vvasuki/sanskrit/raw_etexts/veda/atharva/paippalAda", dry_run=False, source_script=sanscript.DEVANAGARI) # MdFile.apply_function(fn=MdFile.split_to_bits, dir_path="", dry_run=False, source_script=None, indexed_title_pattern=None) # MdFile(file_path="",frontmatter_type=MdFile.TOML).split_to_bits(dry_run=False, source_script=None, indexed_title_pattern=None) # , indexed_title_pattern=None # md_helper.import_md_recursive(source_dir="/home/vvasuki/Downloads/peterFreund", file_extension="txt") # file_helper.copy_file_tree(source_dir="/home/vvasuki/Downloads/peterFreund", dest_dir="/home/vvasuki/sanskrit/raw_etexts/mixed/peterFreund", file_pattern="**/*.md") # MdFile(file_path="/home/vvasuki/sanskrit/raw_etexts/vedAntam/dvaitam/mAdhvam/TikA-tippaNi/chidgagana_tika.md",frontmatter_type=MdFile.TOML).fix_lazy_anusvaara(dry_run=False, ignore_padaanta=True, omit_yrl=True) # MdFile.apply_function(fn=MdFile.fix_lazy_anusvaara, dir_path="/home/vvasuki/sanskrit/raw_etexts", file_name_filter=None, start_file="/home/vvasuki/sanskrit/raw_etexts/vyAkaraNam/aShTAdhyAyI_central-repo/vAsu/pada-1.1/1.1.1.md", dry_run=False, ignore_padaanta=True, omit_yrl=True)
import logging from curation_projects import raamaayana from doc_curation.md_helper import MdFile # Remove all handlers associated with the root logger object. for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) logging.basicConfig( level=logging.DEBUG, format="%(levelname)s:%(asctime)s:%(module)s:%(lineno)d %(message)s") md_file_path = "/home/vvasuki/vvasuki-git/kAvya/content/TIkA/padyam/purANam/rAmAyaNam/AndhrapAThaH" # MdFile.fix_index_files(dir_path=md_file_path, dry_run=False) # MdFile.fix_titles( # md_files=raamaayana.get_adhyaaya_md_files(md_file_path), # spreadhsheet_id="1xqVBhDwRzcEL7HlCJhxmnG1aOFFk6B8gGZ4GuBZynf8", # worksheet_name="शीर्षिकाः", id_column="id", title_column="अन्तिमशीर्षिका", md_file_to_id=raamaayana.get_adhyaaya_id, dry_run=False # ) MdFile.devanaagarify_titles( md_files=raamaayana.get_adhyaaya_md_files(md_file_path), dry_run=False)
def get_upaakhyaana_and_titles_from_path(dir_path, file_pattern="**/*.md"): md_files = MdFile.get_md_files_from_path(dir_path=dir_path, file_pattern=file_pattern) titles = [md_file.get_title() for md_file in md_files] upaakhyaanas = [md_file.get_upaakhyaana() for md_file in md_files] for row in zip(upaakhyaanas, titles): print ("\t".join([str(i) for i in row]))
heading_class="subhead") def makedirs(): tree = dir_helper.tree_from_file( "/home/vvasuki/sanskrit-coders/doc_curation/curation_projects/tipiTikA/dirs.txt" ) tree.root.set_ordinals() logging.info(tree.root) tree.root.regularize_keys() logging.info(tree.root) tree.root.make_dirs(base_dir="/home/vvasuki/paali-bhaasaa/raw_etexts/", dry_run=False) def dump_files(): LINK_FILE_PATH = "/home/vvasuki/sanskrit-coders/doc_curation/curation_projects/tipiTikA/links.txt" with open(LINK_FILE_PATH) as linkfile: for line in linkfile.readlines(): get_file(outdir="/home/vvasuki/paali-bhaasaa/raw_etexts/tipiTaka", url=line.strip()) # MdFile.fix_index_files(dir_path="/home/vvasuki/paali-bhaasaa/raw_etexts/", dry_run=False) # MdFile.fix_title_numbering_in_path(dir_path="/home/vvasuki/paali-bhaasaa/raw_etexts/", dry_run=False) # MdFile.fix_title_numbering_in_path(dir_path="/home/vvasuki/vvasuki-git/pALi/content/01_tipiTaka", dry_run=False) MdFile.set_filenames_from_titles( dir_path= "/home/vvasuki/vvasuki-git/tipiTaka/content/01_mUlam/02_suttapiTaka/04_anguttaranikAyo", transliteration_source=sanscript.DEVANAGARI, dry_run=False)
def dump_ics_md_pair(panchaanga, period_str): ics_calendar = ics.compute_calendar(panchaanga) (year_type, year) = period_str.split("/") year = int(year) out_path = get_canonical_path(city=panchaanga.city.name, computation_system_str=str(panchaanga.computation_system), year=year, year_type=year_type) output_file_ics = os.path.join(out_path + ".ics") ics.write_to_file(ics_calendar, output_file_ics) md_file = MdFile(file_path=output_file_ics.replace(".ics", ".md"), frontmatter_type=MdFile.YAML) intro = "## 00 Intro\n### Related files\n- [ics](../%s)\n" % str(os.path.basename(output_file_ics)) md_content = "%s\n%s" % (intro, md.make_md(panchaanga=panchaanga)) md_file.dump_to_file(metadata={"title": year}, md=md_content, dry_run=False) monthly_file_path = md_file.file_path.replace(".md", "_monthly.md") monthly_dir = monthly_file_path.replace(".md", "/") shutil.rmtree(path=monthly_dir, ignore_errors=True) logging.info("%s exists? %s", monthly_dir, os.path.exists(monthly_dir)) logging.info("Copying to %s", monthly_file_path) shutil.copy(md_file.file_path, monthly_file_path) monthly_md_file = MdFile(file_path=monthly_file_path) monthly_md_file.set_title_from_filename(dry_run=False, transliteration_target=None) monthly_md_file.split_to_bits(source_script=None, dry_run=False, indexed_title_pattern=None) MdFile.apply_function(fn=MdFile.split_to_bits, dir_path=monthly_dir, frontmatter_type=MdFile.TOML, source_script=None, dry_run=False, indexed_title_pattern=None) logging.info("%s exists? %s", monthly_dir, os.path.exists(monthly_dir)) MdFile.fix_index_files(dir_path=output_dir, transliteration_target=None, dry_run=False)
from doc_curation.md_helper import MdFile # MdFile.fix_index_files(dir_path="/home/vvasuki/vvasuki-git/kAvya/content/TIkA/padyam/kathAsaritsAgaraH/", dry_run=False) MdFile.set_titles_from_filenames( dir_path= "/home/vvasuki/vvasuki-git/kAvya/content/TIkA/padyam/kathAsaritsAgaraH/12/", dry_run=False)
# wordpress.fix_paths(dir_path="/home/vvasuki/sanskrit/raw_etexts_english/blogs/manasataramgini", dry_run=False) # wordpress.fix_paths(dir_path="/home/vvasuki/vvasuki-git/notes-hugo/content/history/paganology/Aryan/indo-iranian/indo-aryan/persons/brAhma/sage-bloodlines/bhRguH/dvitIyajanmani_bhRguH/chyavanaH/ApnavAna/aurvaH/jamadagniH/MT_charitram/", dry_run=False) # MdFile.apply_function(fn=MdFile.transliterate_content, dir_path="/home/vvasuki/sanskrit/raw_etexts/mixed/sarit-markdown", source_scheme=sanscript.IAST) # # MdFile.fix_index_files(dir_path="/home/vvasuki/sanskrit/raw_etexts/veda/atharva", dry_run=False, transliteration_target=None) # # MdFile.apply_function(dir_path="/home/vvasuki/vvasuki-git/vedAH/content/sAma/kauthumam", fn=MdFile.set_title_from_filename, transliteration_target=sanscript.DEVANAGARI, dry_run=False) # MdFile.apply_function(dir_path="/home/vvasuki/vvasuki-git/pALi/content", fn=MdFile.ensure_ordinal_in_title, transliteration_target=sanscript.DEVANAGARI, dry_run=False) # MdFile.set_titles_from_filenames(dir_path="/home/vvasuki/vvasuki-git/notes-hugo/content/history/history_of_the_indian_people", transliteration_target=None, dry_run=False) MdFile.set_filenames_from_titles( dir_path="/home/vvasuki/vvasuki-git/sanskrit/content/vyAkaraNam/whitney", transliteration_source=sanscript.DEVANAGARI, dry_run=False) # MdFile.apply_function(fn=MdFile.prepend_file_index_to_title, dir_path="/home/vvasuki/hindutva/hindutva-hugo/content/main/books/vivekAnanda", dry_run=False) # doc_curation.clear_bad_chars(file_path="/home/vvasuki/sanskrit/raw_etexts/mImAMsA/mImAMsA-naya-manjarI.md", dry_run=False) # MdFile.apply_function(fn=MdFile.split_to_bits, dir_path="/home/vvasuki/vvasuki-git/saMskAra/content/kalpe_svamatam/social-cultivation/violence/animal-protection.md", frontmatter_type=MdFile.TOML, dry_run=False, source_script=sanscript.DEVANAGARI) # MdFile.apply_function(fn=MdFile.split_to_bits, dir_path="/home/vvasuki/sanskrit/raw_etexts/veda/sAma/brAhmaNam/chandogya_brahmana", dry_run=False) # MdFile.apply_function(fn=MdFile.split_to_bits, dir_path="/home/vvasuki/vvasuki-git/saMskAra/content/kalpe_svamatam/social-cultivation/violence/animal-sacrifice.md", dry_run=False, source_script=None, indexed_title_pattern=None) # MdFile(file_path="",frontmatter_type=MdFile.TOML).split_to_bits(dry_run=False, source_script=None, indexed_title_pattern=None) # , indexed_title_pattern=None # md_helper.import_md_recursive(source_dir="/home/vvasuki/Downloads/peterFreund", file_extension="txt")