def main(argv=None): import argparse, sys from seed.io.may_open import may_open_stdout, may_open_stdin class Globals: output_file_encoding = 'utf8' input_file_encoding = 'utf8' ################### parser = argparse.ArgumentParser(description='merge html <p>' , epilog=''' <p></p> is the true seperator "<p>abc</p> <p>def</p>" ==>> "<p>abcdef</p>" ''') add_argument = parser.add_argument add_argument('-i', '--input_file', type=str , default=None , help='the input file') add_argument('-ie', '--input_encoding', type=str , default = Globals.input_file_encoding , help='the encoding of input file') add_argument('-o', '--output_file', type=str , default=None , help='the output file') add_argument('-oe', '--output_encoding', type=str , default = Globals.output_file_encoding , help='the encoding of output file') args = parser.parse_args(argv) with may_open_stdin(args.input_file, 'rt' , encoding=args.input_encoding) as fin: pseudo_htm = fin.read() txt = merge_html_p(pseudo_htm) if args.output_file is not None: # try output_encoding txt.encode(args.output_encoding) with may_open_stdout(args.output_file, 'xt' , encoding=args.output_encoding) as fout: fout.write(txt) #parser.exit(0) return 0 ########## with may_open_stdout(args.output_file, 'xt' , encoding=args.output_encoding) as fout\ , may_open_stdin(args.input_file, 'rt' , encoding=args.input_encoding) as fin: extract_fb_opf_items(fout, fin) #parser.exit(0) return 0
def main(args=None, /): import argparse from seed.io.may_open import may_open_stdin, may_open_stdout parser = argparse.ArgumentParser( description='factor_uint' , epilog='' , formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument('--uint_le', type=int, required=True , help='factor uint in [1..uint_le] only which coprime to [2..coprime_le]') parser.add_argument('--coprime_le', type=int, required=True , help='factor uint in [1..uint_le] only which coprime to [2..coprime_le]') parser.add_argument('-o', '--output', type=str, default=None , help='output file path') parser.add_argument('-e', '--encoding', type=str , default='utf8' , help='output file encoding') parser.add_argument('-f', '--force', action='store_true' , default = False , help='open mode for output file') args = parser.parse_args(args) encoding = args.encoding omode = 'wt' if args.force else 'xt' may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: show_factor_uint(fout=fout, uint_le=args.uint_le, coprime_le=args.coprime_le)
def main(args=None): import argparse from seed.io.may_open import may_open_stdout # may_open_stdin from pprint import pprint parser = argparse.ArgumentParser( description=f'make max_cliques from simple_decomposed_chars3980' , epilog='' , formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument('-o', '--output', type=str, default=None , help='output file path') parser.add_argument('-oe', '--output_encoding', type=str , default='utf8' , help='output file encoding') parser.add_argument('-f', '--force', action='store_true' , default = False , help='open mode for output file') args = parser.parse_args(args) output_encoding = args.output_encoding omode = 'wt' if args.force else 'xt' var_name = 'doublesize2max_clique_srcs_dsts_pairs' doublesize2max_clique_srcs_dsts_pairs = globals()[var_name] may_ofname = args.output if may_ofname is None: may_ofname = this_folder / f'{var_name}.py' with may_open_stdout(may_ofname, omode, encoding=output_encoding) as fout: print(f'#{var_name} generated by {this_file_name}', file=fout); print(f'{var_name} = \\', file=fout); pprint(doublesize2max_clique_srcs_dsts_pairs, stream=fout)
def on_subcmd__cmp_branch_dir(sf, subcmd_name, parsed_args): #see:[location4setting4the_two_kwargs] #always_tribool_as_is_or_not_same_file #bug:PP = _pairs__str2rpath # see:str2rpath_encoding_pair # PP = tuple kwargs4MkIsSameFile = dict( always_tribool_as_is_or_not_same_file=ast.literal_eval(parsed_args.always_tribool_as_is_or_not_same_file) , size_hash0_eq_as_same_file=parsed_args.size_hash0_eq_as_same_file , mtime_eq_as_same_file=parsed_args.mtime_eq_as_same_file , imay_max_size_threshold4cmp_content=parsed_args.imay_max_size_threshold4cmp_content ########### , size_eq_as_same_file=False , hash_eq_as_same_file=False , mtime_ne_as_not_same_file=False , _block_size=BLOCK_SIZE ) kwargs4dir_cmp__relative = dict( ignore_basename=None , max_depth=None ) (lhs_branch_idx4old, result_of_dir_cmp__relative) = type(sf)._main4subcmds.on_dir_cmp__relative(lhs_repository_extra_cache_root_dir_path=parsed_args.lhs_repository_extra_cache_root_dir_path, lhs_repository_root_dir_path=parsed_args.lhs_repository_root_dir_path, lhs_branch_name=parsed_args.lhs_branch_name, rhs_real_fsys_root_dir_path=parsed_args.rhs_real_fsys_root_dir_path, rhs_ignorefile_relative_path_encoding_pairs=PP(parsed_args.rhs_ignorefile_relative_path_encoding_pairs), kwargs4MkIsSameFile=kwargs4MkIsSameFile, kwargs4dir_cmp__relative=kwargs4dir_cmp__relative) parsed_args.output from seed.io.may_open import may_open_stdin, may_open_stdout may_ofname = parsed_args.output encoding = parsed_args.encoding omode = 'wt' if parsed_args.force else 'xt' with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: #see:[location4fmt_of_file4result_of_dir_cmp__relative__extended] write__file4result_of_dir_cmp__relative__extended(fout, parsed_args.lhs_branch_name, lhs_branch_idx4old, PP(parsed_args.rhs_ignorefile_relative_path_encoding_pairs), result_of_dir_cmp__relative) return
def main(args=None): import argparse from seed.io.may_open import may_open_stdout # may_open_stdin from pprint import pprint from pathlib import PurePath as Path this_file = Path(__file__) this_folder = this_file.parent this_file_name = this_file.name parser = argparse.ArgumentParser( description=f'read “{Global.ifname}”', epilog='', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-i', '--input', type=str, default=None, help='input file path') parser.add_argument('-o', '--output', type=str, default=None, help='output file path') parser.add_argument('-ie', '--input_encoding', type=str, default=Global.iencoding, help='input file encoding') parser.add_argument('-oe', '--output_encoding', type=str, default=Global.oencoding, help='output file encoding') parser.add_argument('-f', '--force', action='store_true', default=False, help='open mode for output file') args = parser.parse_args(args) input_encoding = args.input_encoding output_encoding = args.output_encoding omode = 'wt' if args.force else 'xt' may_ifname = args.input if may_ifname is None: ifname = this_folder / Global.ifname else: ifname = may_ifname with open(ifname, 'rt', encoding=input_encoding) as fin: chars_3980 = read_中华字经(fin) assert len(chars_3980) == Global.size may_ofname = args.output if may_ofname is None: may_ofname = this_folder / Global.ofname with may_open_stdout(may_ofname, omode, encoding=output_encoding) as fout: print(f'#{Global.var_name} generated by {this_file_name}', file=fout) print(f'{Global.var_name} = \\', file=fout) pprint(chars_3980, stream=fout)
def main(argv=None): import argparse from seed.io.may_open import may_open_stdout parser = argparse.ArgumentParser( description='show the 8 trigrams' ) parser.add_argument('-o', '--output', type=str, default = None , help='output file path') parser.add_argument('-e', '--encoding', type=str , default='utf8' , help='input/output file encoding') parser.add_argument('-f', '--force', action='store_true' , default = False , help='open mode for output file') parser.add_argument('-fun', '--output_fun_chars', action='store_true' , default = False , help='output fun_chars instead of grams') args = parser.parse_args(argv) encoding = args.encoding omode = 'wt' if args.force else 'xt' data = Data.fun_chars_data if args.output_fun_chars else Data.grams_data may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: do_output(fout, data)
def main(args=None): import argparse from seed.io.may_open import may_open_stdin, may_open_stdout parser = argparse.ArgumentParser( description='make Windows special address link file', epilog=f'spec_addr example: {windows_firewall_spec_addr__str!r}', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-a', '--spec_addr', type=str, required=True, help='spec_addr; for example, see epilog') parser.add_argument('-o', '--output', type=str, default=None, help='output file path') parser.add_argument('-f', '--force', action='store_true', default=False, help='open mode for output file') args = parser.parse_args(args) omode = 'wb' if args.force else 'xb' lnk_file_bytes = mk_windows_spec_lnk(args.spec_addr) may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=None) as fout: fout.write(lnk_file_bytes)
def main(args=None, /): import argparse from seed.io.may_open import may_open_stdin, may_open_stdout parser = argparse.ArgumentParser( description='python eval then hex then print' , epilog='' , formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument('-i', '--input', type=str, nargs='*', default=[] , help='input python expression') parser.add_argument('-o', '--output', type=str, default=None , help='output file path') parser.add_argument('-e', '--encoding', type=str , default='utf8' , help='input/output file encoding') parser.add_argument('-f', '--force', action='store_true' , default = False , help='open mode for output file') args = parser.parse_args(args) encoding = args.encoding omode = 'wt' if args.force else 'xt' #may_ifname = args.input #with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin: exprs = args.input may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: for expr in exprs: eval_then_show(expr, fout=fout)
def main(argv=None): import argparse from seed.io.may_open import may_open_stdin, may_open_stdout parser = argparse.ArgumentParser( description='extract first content_div of article on 360doc.com' ) parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding') parser.add_argument('-i', '--input', type=str, default = None , help='input file path') parser.add_argument('-o', '--output', type=str, default = None , help='output file path') parser.add_argument('-f', '--force', action='store_true' , default = False , help='open mode for output file') parser.add_argument('-url', '--url', type=str, default = None , help='input webpage url') args = parser.parse_args(argv) encoding = args.encoding omode = 'wt' if args.force else 'xt' if args.input is not None and args.url is not None: raise ValueError('input both file and url at same time') if args.url is not None: with open_webpage(args.url) as fin: content_div = extract_360doc_com(fin) else: may_ifname = args.input try: # open as text file with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin: content_div = extract_360doc_com(fin) except UnicodeError: assert may_ifname is not None ifname = may_ifname # open as binary file with open(ifname, 'rb') as fin: content_div = extract_360doc_com(fin) if 0: print(len(content_div)) print(repr(content_div[5216:])) for i in range(len(content_div)): if ord(content_div[i]) > 0x7f: print(i) print(repr(content_div[i:])) break return may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: fout.write(content_div) parser.exit(0) return 0
def main(argv=None): import argparse from seed.io.may_open import may_open_stdin, may_open_stdout encodingI = 'ascii' encodingO = 'utf8' parser = argparse.ArgumentParser( description='simple encrypt ascii text' , epilog=r'only " " and "\n" are allowed, other control/whitespace should not occur in input text' #, formatter_class=argparse.RawDescriptionHelpFormatter ) #parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding') parser.add_argument('cmd', type=str, choices='encrypt decrypt'.split() , help='encrypt/decrypt - treat input as cleartext/ciphertext') parser.add_argument('psw', type=str , help='password: regex = [0-9a-f]*') parser.add_argument('-i', '--input', type=str, default = None , help='input file path') parser.add_argument('-o', '--output', type=str, default = None , help='output file path') parser.add_argument('-f', '--force', action='store_true' , default = False , help='open mode for output file') args = parser.parse_args(argv) psw = args.psw if not all(ch in aCrypt.char2idxP for ch in psw): raise TypeError omode = 'wt' if args.force else 'xt' does_encrypt = args.cmd == 'encrypt' iencoding = encodingI if does_encrypt else encodingO oencoding = encodingO if does_encrypt else encodingI may_ifname = args.input with may_open_stdin(may_ifname, 'rt', encoding=iencoding) as fin: input_text = ''.join(fin) if does_encrypt: message = input_text ciphertext = aCrypt.encrypt(psw, message) output_text = ciphertext else: ciphertext = input_text message = aCrypt.decrypt(psw, ciphertext) output_text = message may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=oencoding) as fout: fout.write(output_text) parser.exit(0) return 0
def main(args=None): import argparse from seed.io.may_open import may_open_stdin, may_open_stdout choices = [ ls笔顺码字符范围__closed_rngs, cmp3, prepare_for_汉字粗拆分, prepare_for_2汉字粗拆分2 ] choices = [f.__name__ for f in choices] parser = argparse.ArgumentParser( description="汉字相关字符范围", epilog="", formatter_class=argparse.RawDescriptionHelpFormatter) r""" parser.add_argument('-i', '--input', type=str, default=None , help='input file path') #""" parser.add_argument( '-do', '--do', type=str, required=True #, default=None , choices=choices, help='input file path') parser.add_argument('-o', '--output', type=str, default=None, help='output file path') parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding') parser.add_argument('-f', '--force', action='store_true', default=False, help='open mode for output file') args = parser.parse_args(args) encoding = args.encoding omode = 'wt' if args.force else 'xt' do = args.do f = globals()[do] r""" may_ifname = args.input with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin: #""" may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: f(fout=fout)
def on_subcmd__get_branch_size(sf, subcmd_name, parsed_args): lhs_branch_sz = type(sf)._main4subcmds.on_get_branch_size(lhs_repository_extra_cache_root_dir_path=parsed_args.lhs_repository_extra_cache_root_dir_path, lhs_repository_root_dir_path=parsed_args.lhs_repository_root_dir_path, lhs_branch_name=parsed_args.lhs_branch_name) #print(lhs_branch_sz) parsed_args.output from seed.io.may_open import may_open_stdin, may_open_stdout may_ofname = parsed_args.output encoding = parsed_args.encoding omode = 'wt' if parsed_args.force else 'xt' with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: print(lhs_branch_sz, file=fout) return
def main(argv=None): import argparse, sys parser = argparse.ArgumentParser( description='translate characters from lower to upper case') parser.add_argument('-i', '--input', type=str, default=None, help='input file name') parser.add_argument('-o', '--output', type=str, default=None, help='output file name') parser.add_argument('-e', '--encoding', type=str, default='utf8', help='encoding of input/output file') parser.add_argument('-f', '--force', action='store_true', default=False, help='open mode for output file') parser.add_argument( '-s', '--sep', type=str, default=None, help='seperator string of output file which has 2 columes') parser.add_argument('-u', '--upper', action='store_true', default=False, help='lower2upper instead of upper2lower') args = parser.parse_args(argv) encoding = args.encoding omode = 'wt' if args.force else 'xt' may_ifname = args.input may_ofname = args.output with may_open_stdin(may_ifname, 'rt', encoding=encoding, newline='')\ as fin\ , may_open_stdout(may_ofname, omode, encoding=encoding, newline='')\ as fout: lower_file(fout, fin, upper=args.upper, sep=args.sep) parser.exit(0) return 0
def main(argv=None): import argparse import sys parser = argparse.ArgumentParser( description='convert torrent file to dict' , epilog=epilog_str , formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument('input', type=str , help='path to the input torrent file') parser.add_argument('-ie', '--input_encoding', type=str , default='utf8' , help='encoding to strings in input file') parser.add_argument('-pp', '--pretty_print', action='store_true' , default=False , help='pretty print output') parser.add_argument('-o', '--output', type=str , default=None , help='output file path') parser.add_argument('-oe', '--output_encoding', type=str , default='utf8' , help='output file encoding') parser.add_argument('--mode', choices='exclusive append overwrite'.split() , default='exclusive' , help='mode for open output file') args = parser.parse_args() iencoding = args.input_encoding result = parse_torrent(args.input, encoding=iencoding) #print_err('piece length =', result['info']['piece length']) #print_err('len(pieces) =', len(result['info']['pieces'])) if args.pretty_print: # pprint def xprint(obj, *, file): pprint(obj, stream=file) else: # print def xprint(obj, *, file): print(obj, file=file) oencoding = args.output_encoding mode = {'exclusive':'x', 'append':'a', 'overwrite':'w'}[args.mode] with may_open_stdout(args.output, mode+'t', encoding=oencoding) as fout: xprint(result, file=fout) parser.exit(0) return 0
def main(args=None): import argparse from seed.io.may_open import may_open_stdin, may_open_stdout parser = argparse.ArgumentParser( description='invert bytes of file', epilog='', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-i', '--input', type=str, default=None, help='input file path') parser.add_argument('-o', '--output', type=str, default=None, help='output file path') parser.add_argument('-f', '--force', action='store_true', default=False, help='open mode for output file') parser.add_argument( '-a', '--auto_name_output', action='store_true', default=False, help='add/remove ".inv" to input fname for output fname') args = parser.parse_args(args) omode = 'wb' if args.force else 'xb' may_ifname = args.input may_ofname = args.output if (args.auto_name_output and may_ofname is None and may_ifname is not None): ext = '.inv' if may_ifname.endswith(ext): #bug:may_ofname = may_ifname[-len(ext):] may_ofname = may_ifname[:-len(ext)] else: may_ofname = may_ifname + ext with may_open_stdin(may_ifname, 'rb', encoding=None) as fin: with may_open_stdout(may_ofname, omode, encoding=None) as fout: while 1: bs = fin.read(BLOCK_SIZE) if not bs: break bs = bs.translate(TABLE) fout.write(bs)
def main(sf): bs4_ops = sf._ops oprint = sf.oprint #with open(sf.may_ofname, sf.omode, encoding=sf.oencoding) as fout: with may_open_stdout(sf.may_ofname, sf.omode, encoding=sf.oencoding) as fout: sf.fout = fout for idx, (usrdata, path) in enumerate(sf.iter_usrdata_htmlpath_pairs()): html_fname = Path(path) if sf.pre_break(idx, usrdata, html_fname): break if sf.skip(idx, usrdata, html_fname): continue html_doc = html_fname.read_text(encoding=sf.iencoding) bs4_obj = bs4_ops.建(html_doc, markup_lang=MarkupLang.HTML) sf.output(bs4_ops, bs4_obj, idx, usrdata, html_fname) if sf.post_break(idx, usrdata, html_fname): break
def main(argv=None): import argparse import sys parser = argparse.ArgumentParser( description='sort and unique chars in text.') parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding') parser.add_argument('-f', '--force', action='store_true', default=False, help='open mode for output file') parser.add_argument('-i', '--input', type=str, help='path to the input novel text file') parser.add_argument('-o', '--output', type=str, help='path to the output file') parser.add_argument('-u', '--repr_as_unicode', action='store_true', default=False, help='output char in format \\UXXXXXXXX') args = parser.parse_args(argv) encoding = args.encoding omode = 'wt' if args.force else 'xt' may_ifname = args.input with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin: #all_chars = file2unique_sorted_char_string(fin) all_chars = file2all_char_set(fin) #txt = chars2sorted_char_string(all_chars) s = chars2sorted_char_string(all_chars) if args.repr_as_unicode: s = repr_string_as_unicode(s) may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: print(s, file=fout)
def main(argv=None): import argparse from seed.io.may_open import may_open_stdin, may_open_stdout parser = argparse.ArgumentParser( description='parse pythoncoded_rules_in_str', epilog=example_doc, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-i', '--input', type=str, default=None, help='input file path') parser.add_argument('-o', '--output', type=str, default=None, help='output file path') parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding') parser.add_argument('-f', '--force', action='store_true', default=False, help='open mode for output file') args = parser.parse_args(argv) encoding = args.encoding omode = 'wt' if args.force else 'xt' may_ifname = args.input with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin: pythoncoded_rules_in_str = fin.read() python_code_str_ex = parse_ex(pythoncoded_rules_in_str, name2count=None, the_input_parameter_name='p', with_class_keyword=False) head_str, tail_str, name2count = python_code_str_ex may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: fout.write(head_str) fout.write(tail_str)
def main(argv=None): import argparse, sys from seed.io.may_open import may_open_stdout, may_open_stdin class Globals: output_file_encoding = 'utf8' input_file_encoding = 'utf8' ################### parser = argparse.ArgumentParser(description='extract epub/OPS/fb.opf', epilog=''' extract epub/OPS/fb.opf::manifest.item.href where item["media-type"]=="application/xhtml+xml" ''') add_argument = parser.add_argument add_argument('-i', '--input_file', type=str, default=None, help='the input file') add_argument('-ie', '--input_encoding', type=str, default=Globals.input_file_encoding, help='the encoding of input file') add_argument('-o', '--output_file', type=str, default=None, help='the output file') add_argument('-oe', '--output_encoding', type=str, default=Globals.output_file_encoding, help='the encoding of output file') args = parser.parse_args(argv) with may_open_stdout(args.output_file, 'xt' , encoding=args.output_encoding) as fout\ , may_open_stdin(args.input_file, 'rt' , encoding=args.input_encoding) as fin: extract_fb_opf_items(fout, fin) #parser.exit(0) return 0
def main(args=None, /): import argparse from seed.io.may_open import may_open_stdin, may_open_stdout parser = argparse.ArgumentParser( description='简繁对称字-middle-parse', epilog='', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-i', '--input', type=str, default=None, help='input file path') parser.add_argument('-o', '--output', type=str, default=None, help='output file path') parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding') parser.add_argument('-f', '--force', action='store_true', default=False, help='open mode for output file') args = parser.parse_args(args) encoding = args.encoding omode = 'wt' if args.force else 'xt' may_ifname = args.input with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin: s = parser4pseudo_symmetric_hz_from_completed_chars_3980(fin) may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: print( f'###parser4pseudo_symmetric_hz_from_completed_chars_3980:total={len(s)}###', file=fout) print(s, file=fout)
def main(args=None): import argparse from seed.io.may_open import may_open_stdout # may_open_stdin from pprint import pprint from pathlib import PurePath as Path this_file = Path(__file__) this_folder = this_file.parent this_file_name = this_file.name parser = argparse.ArgumentParser( description= f'make sm2ym_num_graph__for_good_hanzis from sm2ym2num2good_hanzis', epilog='', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-o', '--output', type=str, default=None, help='output file path') parser.add_argument('-oe', '--output_encoding', type=str, default='utf8', help='output file encoding') parser.add_argument('-f', '--force', action='store_true', default=False, help='open mode for output file') args = parser.parse_args(args) output_encoding = args.output_encoding omode = 'wt' if args.force else 'xt' var_name = 'sm2ym_num_graph__for_good_hanzis' sm2ym_num_graph = globals()[var_name] may_ofname = args.output if may_ofname is None: may_ofname = this_folder / f'{var_name}.py' with may_open_stdout(may_ofname, omode, encoding=output_encoding) as fout: print(f'#{var_name} generated by {this_file_name}', file=fout) print(f'{var_name} = \\', file=fout) pprint(sm2ym_num_graph, stream=fout)
def main(argv=None): import argparse, sys parser = argparse.ArgumentParser( description='merge lines into paragraph for novel text') parser.add_argument('-i', '--input', type=str, default=None, help='input file name') parser.add_argument('-o', '--output', type=str, default=None, help='output file name') parser.add_argument('-e', '--encoding', type=str, default='utf8', help='encoding of input/output file') parser.add_argument('-f', '--force', action='store_true', default=False, help='open mode for output file') args = parser.parse_args(argv) encoding = args.encoding omode = 'wt' if args.force else 'xt' may_ifname = args.input with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin: case_line_pairs = novel_merge_lines_into_paragraph__pattern( GlobalArgsExample.not_merge_pattern, GlobalArgsExample.transparent_pattern, GlobalArgsExample.case_pattern_pairs, iter(fin)) case_line_pairs = list(case_line_pairs) #print(case_line_pairs) may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: for line in merge_case_line_pairs(case_line_pairs): print(line, file=fout)
def main(argv=None): import argparse from seed.io.may_open import may_open_stdin, may_open_stdout parser = argparse.ArgumentParser( description='simple encape text to html' , epilog=r'''only "<>&" and "\"\'" if quote=True will be escaped.''' #, formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding') parser.add_argument('-i', '--input', type=str, default = None , help='input file path') parser.add_argument('-o', '--output', type=str, default = None , help='output file path') parser.add_argument('-f', '--force', action='store_true' , default = False , help='open mode for output file') parser.add_argument('-q', '--quote', action='store_true' , default = False , help=r'''escape "\"\'" too; otherwise only "<>&" be escaped''') args = parser.parse_args(argv) encoding = args.encoding omode = 'wt' if args.force else 'xt' may_ifname = args.input with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin: input_text = ''.join(fin) output_text = escape(input_text, args.quote) may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: fout.write(output_text) parser.exit(0) return 0
def main(argv=None): ''' ''' import argparse, sys parser = argparse.ArgumentParser(description='list paths') add_argument = parser.add_argument add_argument('glob_patterns', type=str, nargs='+', metavar='GLOB_PATTERN', help='glob_patterns for list paths') add_argument('-r', '--recursive', action='store_true', default=False, help='recursive search') add_argument('-o', '--output_file', type=str, default=None, help='the output file') add_argument('-oe', '--output_encoding', type=str, default=Globals.output_file_encoding, help='the encoding of output file') args = parser.parse_args(argv) with may_open_stdout(args.output_file, 'xt', encoding=args.output_encoding) as fout: for glob_pattern in args.glob_patterns: for path in iglob(glob_pattern, recursive=args.recursive): print(path, file=fout) #parser.exit() return
def main(args=None): import argparse from seed.io.may_open import may_open_stdin, may_open_stdout parser = argparse.ArgumentParser( description='format html/xml by well indent' , epilog='' , formatter_class=argparse.RawDescriptionHelpFormatter ) parser.add_argument('-i', '--input', type=str, default=None , help='input file path') parser.add_argument('-o', '--output', type=str, default=None , help='output file path') parser.add_argument('-e', '--encoding', type=str , default='utf8' , help='input/output file encoding') parser.add_argument('-f', '--force', action='store_true' , default = False , help='open mode for output file') parser.add_argument('-iw', '--indent_width', type=int , default = 1 , help='the number of indent spaces for children') args = parser.parse_args(args) encoding = args.encoding omode = 'wt' if args.force else 'xt' may_ifname = args.input with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin: soup = BeautifulSoup(fin, 'lxml') txt = soup.prettify() txt = replace_indent_spaces(args.indent_width, txt) may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: fout.write(txt)
def main(args=None, /): import argparse from seed.io.may_open import may_open_stdin, may_open_stdout parser = argparse.ArgumentParser( description='繁简字信息相关', epilog='', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('case', type=str, choices=sorted(main_routines.keys()), help='choose one main_routine') parser.add_argument('-o', '--output', type=str, default=None, help='output file path') parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding') parser.add_argument('-f', '--force', action='store_true', default=False, help='open mode for output file') args = parser.parse_args(args) encoding = args.encoding omode = 'wt' if args.force else 'xt' main_routine = main_routines[args.case] may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: main_routine(fout)
parser.add_argument('-o', '--output', type=str, default=None , help='output file path') parser.add_argument('-oe', '--output_encoding', type=str , default='utf8' , help='output file encoding') parser.add_argument('-f', '--force', action='store_true' , default = False , help='open mode for output file') args = parser.parse_args(args) oencoding = args.output_encoding omode = 'wt' if args.force else 'xt' nm = args.name4target case = args.post_process post_process = case2post_process[case] xxx = case2name2xxx[case][nm] may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=oencoding) as fout: post_process(nm, xxx, fout=fout) if __name__ == "__main__": main()
def main(argv=None): ''' ''' import argparse, sys parser = argparse.ArgumentParser(description='extract info from files', allow_abbrev=False, epilog=''' NOTE: extract_data_cmd -oe gbk -o ./1+2.txt -ie ascii -i ./paths.txt nn_ns.filedir._extractor_example.main --encoding=utf8 where: -ie ascii the encoding of input file which contains paths to files from which data were extracted. arg of this program --encodinga utf8 the encoding of all files from which data were extracted. arg of nn_ns.filedir._extractor_example glob_cmd ./*.html | line_filter_cmd chapter(\d+)\.html --group_names 1 --INT_GROUP | sort_lines_cmd --line_type=KEY_LINE | extract_data_cmd -oe gbk -o ./1+2.txt nn_ns.filedir._extractor_example.main --encoding=utf8 ''') add_argument = parser.add_argument add_argument('extractor', type=str, help='fullname of a python function: e.g. math.log2') add_argument('-i', '--input_file', type=str, default=None, help='the input file which contains paths') add_argument('-ie', '--input_encoding', type=str, default=Globals.input_file_encoding, help='the encoding of input file') add_argument('-o', '--output_file', type=str, default=None, help='the output file') add_argument('-oe', '--output_encoding', type=str, default=Globals.output_file_encoding, help='the encoding of output file') args, unknown_args = parser.parse_known_args(argv) _args, _kwargs = parse_unknown_args(unknown_args) _kwargs = dict(_kwargs) extractor_qname = args.extractor extractor = import_object(extractor_qname) # extractor :: (fout, input_fname, **kwargs) -> None with may_open_stdout(args.output_file, 'xt' , encoding=args.output_encoding) as fout\ , may_open_stdin(args.input_file, 'rt' , encoding=args.input_encoding) as fin: for line in fin: if line[-1:] == '\n': line = line[:-1] path = line extractor(fout, path, *_args, **_kwargs) #parser.exit() return
def main(argv=None): import argparse from seed.io.may_open import may_open_stdin, may_open_stdout parser = argparse.ArgumentParser( description='extract text on ctext.org or ctext.cn') parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding') parser.add_argument('-i', '--input', type=str, default=None, help='input file path') parser.add_argument('-o', '--output', type=str, default=None, help='output file path') parser.add_argument('-f', '--force', action='store_true', default=False, help='open mode for output file') parser.add_argument('--append', action='store_true', default=False, help='open mode for output file') parser.add_argument('-V', '--verbose', action='store_true', default=False, help='show path/url that opened') parser.add_argument('-url', '--url', type=str, default=None, help='input webpage url') parser.add_argument( '-rng', '--range', type=int, default=None, nargs=2, help='input webpage url range (first, last); {url}/{i} for i in range') parser.add_argument( '-ifmt', '--index_format', type=str, default='{}', help='index python format for webpage url; base_url/{fmt}') parser.add_argument('--timeout', type=int, default=10, help='timeout for urllib') parser.add_argument('--time_sep', type=int, default=1, help='time space between two downloads') parser.add_argument('--without_book_title', action='store_true', default=False, help='not show book_title') parser.add_argument( '--book_title_at', type=str, default=None, help='extended url for book_title; {base_url}{book_title_at}') args = parser.parse_args(argv) encoding = args.encoding omode = 'wt' if args.force else 'xt' if args.append: omode = 'at' if args.input is not None and args.url is not None: raise ValueError('input both file and url at same time') if args.url is not None: if args.range is None: title, txt = extract_ctext_org__url(args.url, verbose=args.verbose, timeout=args.timeout) may_book_title = None begin = 0 result = (may_book_title, begin, [(title, txt)]) else: first, last = args.range begin, end = first, last + 1 rng = range(begin, end) base_url = args.url index_format = args.index_format it = iter_extract_ctext_org__url_rng(base_url, rng, index_format, verbose=args.verbose, timeout=args.timeout, time_sep=args.time_sep) if args.without_book_title: may_book_title = None else: if args.book_title_at is None: book_title_url = base_url else: book_title_url = f'{base_url}{args.book_title_at}' book_title, _ = extract_ctext_org__url(book_title_url, verbose=args.verbose, timeout=args.timeout) may_book_title = book_title begin = begin #result = (may_book_title, begin, list(it)) result = (may_book_title, begin, iter(it)) else: may_ifname = args.input try: # open as text file with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin: title, txt = extract_ctext_org(fin, verbose=args.verbose, timeout=args.timeout) except UnicodeError: assert may_ifname is not None ifname = may_ifname # open as binary file with open(ifname, 'rb') as fin: title, txt = extract_ctext_org(fin, verbose=args.verbose, timeout=args.timeout) may_book_title = None begin = 0 result = (may_book_title, begin, [(title, txt)]) #result :: (may_book_title, begin, [(title, txt)]) may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: def fprint(*args, **kwargs): print(*args, file=fout, **kwargs) may_book_title, begin, title_txt_pairs = result if not args.without_book_title: fprint(f'[book]:{may_book_title}') for i, (title, txt) in enumerate(title_txt_pairs, begin): fprint(f'[chapter{i}]:{title}') fprint(txt) parser.exit(0) return 0
def main(args=None): import argparse from seed.io.may_open import may_open_stdin, may_open_stdout #import skimage.io as image_io import imageio as image_io read_image = image_io.imread del image_io parser = argparse.ArgumentParser( description='convert grey_image to binary_image', epilog='', formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('-i', '--input', type=str, required=True, help='input file path') parser.add_argument('-o', '--output', type=str, default=None, help='output file path') parser.add_argument('-f', '--force', action='store_true', default=False, help='open mode for output file') parser.add_argument('-F', '--flip', action='store_true', default=False, help='flip final binary_image') parser.add_argument('-G', '--as_grey', action='store_true', default=False, help='convert color_image to grey_image first') parser.add_argument( '-T', '--threshold_coeff', type=float, required=True, nargs=3, help='threshold_coeff A B C; threshold = A*min+B*mean+C+max') ''' parser.add_argument('-out_txt', '--output_as_text', action='store_true' , default = False , help='output binary_image as text') ''' args = parser.parse_args(args) encoding = None omode = 'wb' if args.force else 'xb' ifname = args.input image = read_image(ifname, as_gray=args.as_grey) if not is_grey_image(image): raise Exception('not grey_image') grey_image = image A, B, C = args.threshold_coeff threshold = A * grey_image.min() + B * grey_image.mean( ) + C * grey_image.max() binary_image = grey_image2binary_image(grey_image, threshold, negative=args.flip) text_grey_image = binary_image2text_grey_image(binary_image) may_ofname = args.output with may_open_stdout(may_ofname, omode, encoding=encoding) as fout: for byte_row in grey_image2iter_byte_rows(text_grey_image): fout.write(byte_row) fout.write(b'\n')