def dump_ics_md_pair(panchaanga, period_str): ics_calendar = ics.compute_calendar(panchaanga) (year_type, year) = period_str.split("/") year = int(year) out_path = get_canonical_path(city=panchaanga.city.name, computation_system_str=str(panchaanga.computation_system), year=year, year_type=year_type) output_file_ics = os.path.join(out_path + ".ics") ics.write_to_file(ics_calendar, output_file_ics) md_file = MdFile(file_path=output_file_ics.replace(".ics", ".md"), frontmatter_type=MdFile.YAML) intro = "## 00 Intro\n### Related files\n- [ics](../%s)\n" % str(os.path.basename(output_file_ics)) md_content = "%s\n%s" % (intro, md.make_md(panchaanga=panchaanga)) md_file.dump_to_file(metadata={"title": year}, md=md_content, dry_run=False) monthly_file_path = md_file.file_path.replace(".md", "_monthly.md") monthly_dir = monthly_file_path.replace(".md", "/") shutil.rmtree(path=monthly_dir, ignore_errors=True) logging.info("%s exists? %s", monthly_dir, os.path.exists(monthly_dir)) logging.info("Copying to %s", monthly_file_path) shutil.copy(md_file.file_path, monthly_file_path) monthly_md_file = MdFile(file_path=monthly_file_path) monthly_md_file.set_title_from_filename(dry_run=False, transliteration_target=None) monthly_md_file.split_to_bits(source_script=None, dry_run=False, indexed_title_pattern=None) MdFile.apply_function(fn=MdFile.split_to_bits, dir_path=monthly_dir, frontmatter_type=MdFile.TOML, source_script=None, dry_run=False, indexed_title_pattern=None) logging.info("%s exists? %s", monthly_dir, os.path.exists(monthly_dir)) MdFile.fix_index_files(dir_path=output_dir, transliteration_target=None, dry_run=False)
def fix_paths(dir_path, dry_run=False): files = glob.glob(os.path.join(dir_path, '**/2*.md'), recursive=True) for file_path in files: base_name = os.path.basename(file_path) year_str = base_name.split("-")[0] month_str = base_name.split("-")[1] dest_path = os.path.join(dir_path, year_str, month_str, base_name) logging.info("Move %s to %s", file_path, dest_path) if not dry_run: os.makedirs(name=os.path.dirname(dest_path), exist_ok=True) shutil.move(src=file_path, dst=dest_path) MdFile.fix_index_files(dir_path=dir_path, dry_run=dry_run)
def dump_summary(year, city, script=xsanscript.DEVANAGARI, computation_system=ComputationSystem.MULTI_NEW_MOON_SIDEREAL_MONTH_ADHIKA__CHITRA_180): year_type = era.ERA_GREGORIAN logging.info("Generating summary panchaanga for %s year %d (%s), with computation system %s ", city.name, year, year_type, str(computation_system)) panchaanga = annual.get_panchaanga_for_year(city=city, year=year, computation_system=computation_system, year_type=year_type, allow_precomputed=True) year_table = to_table_dict(panchaanga=panchaanga ) out_path = get_canonical_path(city=panchaanga.city.name, computation_system_str=str(panchaanga.computation_system), year=year, year_type=year_type) os.makedirs(os.path.dirname(out_path), exist_ok=True) with codecs.open(out_path + ".toml", "w") as fp: toml.dump(year_table, fp) MdFile.fix_index_files(dir_path=output_dir, transliteration_target=None, dry_run=False) computation_params = get_computation_parameters_md(panchaanga=panchaanga, scripts=[script]) out_path_md = out_path + "_summary.md" md = """## Intro\n%s\n\n## Table <div class="spreadsheet" src="../%s.toml" fullHeightWithRowsPerScreen=4> </div>""" % (computation_params, str(year)) md_file = MdFile(file_path=out_path_md) md_file.dump_to_file(metadata={"title": "%d Summary" % (year)}, md=md, dry_run=False)
def dump_text(start_url, out_path, base_url="https://sa.wikisource.org/", transliteration_source=None, dry_run=False): next_url_getter = lambda soup: souper.next_url_from_soup_css( soup=soup, css="div.gen_header_forelink a", base_url=base_url) def html_fixer(soup): souper.tag_replacer(soup=soup, css_selector="big", tag_name="h2") souper.tag_replacer(soup=soup, css_selector="table", tag_name="div") souper.tag_replacer(soup=soup, css_selector="tbody", tag_name="div") souper.tag_replacer(soup=soup, css_selector="span[style*=\"font-weight:bold;\"]", tag_name="b") souper.tag_remover(soup=soup, css_selector=".noprint") def title_maker(soup, title_prefix): title = souper.title_from_element(soup=soup, title_css_selector="h1", title_prefix=title_prefix) title = regex.sub(" .+/", " ", title).strip() return title dumper = lambda url, outfile_path, title_prefix, dry_run: souper.dump_text_from_element( url=url, outfile_path=outfile_path, text_css_selector="div.mw-parser-output", title_maker=title_maker, title_prefix=title_prefix, html_fixer=html_fixer, dry_run=dry_run) souper.dump_series(start_url=start_url, out_path=out_path, dumper=dumper, next_url_getter=next_url_getter, dry_run=dry_run) MdFile.set_filenames_from_titles( dir_path=out_path, dry_run=dry_run, transliteration_source=transliteration_source) MdFile.fix_index_files(dir_path=out_path, dry_run=dry_run, transliteration_target=transliteration_source)
from doc_curation.md_helper import MdFile # Remove all handlers associated with the root logger object. for handler in logging.root.handlers[:]: logging.root.removeHandler(handler) logging.basicConfig( level=logging.DEBUG, format="%(levelname)s:%(asctime)s:%(module)s:%(lineno)d %(message)s") def set_titles_from_spreadsheet(dir_path, dry_run=False): MdFile.fix_titles( md_files=mahaabhaarata.get_adhyaaya_md_files(dir_path), spreadhsheet_id="1sNH1AWhhoa5VATqMdLbF652s7srTG0Raa6K-sCwDR-8", worksheet_name="कुम्भकोणाध्यायाः", id_column="क्रमाङ्कम्", title_column="अन्तिमशीर्षिका", md_file_to_id=mahaabhaarata.get_adhyaaya_id, dry_run=dry_run ) MdFile.devanaagarify_titles(md_files=mahaabhaarata.get_adhyaaya_md_files(dir_path), dry_run=dry_run) def get_upaakhyaana_and_titles_from_path(dir_path, file_pattern="**/*.md"): md_files = MdFile.get_md_files_from_path(dir_path=dir_path, file_pattern=file_pattern) titles = [md_file.get_title() for md_file in md_files] upaakhyaanas = [md_file.get_upaakhyaana() for md_file in md_files] for row in zip(upaakhyaanas, titles): print ("\t".join([str(i) for i in row])) dir_path = "/home/vvasuki/vvasuki-git/kAvya/content/TIkA/padyam/purANam/mahAbhAratam/03-vana-parva/" # set_titles_from_filenames(dir_path=dir_path, dry_run=True) # get_upaakhyaana_and_titles_from_path(dir_path=dir_path) MdFile.fix_index_files(dir_path=dir_path, dry_run=False) # set_titles_from_spreadsheet(dir_path=dir_path, dry_run=False)