Esempio n. 1
0
def main(argv=None):
    import argparse, sys
    from seed.io.may_open import may_open_stdout, may_open_stdin
    class Globals:
        output_file_encoding = 'utf8'
        input_file_encoding = 'utf8'



    ###################
    parser = argparse.ArgumentParser(description='merge html <p>'
        , epilog='''
    <p></p> is the true seperator
    "<p>abc</p> <p>def</p>" ==>> "<p>abcdef</p>"

''')
    add_argument = parser.add_argument


    add_argument('-i', '--input_file', type=str
        , default=None
        , help='the input file')
    add_argument('-ie', '--input_encoding', type=str
        , default = Globals.input_file_encoding
        , help='the encoding of input file')
    add_argument('-o', '--output_file', type=str
        , default=None
        , help='the output file')
    add_argument('-oe', '--output_encoding', type=str
        , default = Globals.output_file_encoding
        , help='the encoding of output file')

    args = parser.parse_args(argv)
    with may_open_stdin(args.input_file, 'rt'
            , encoding=args.input_encoding) as fin:
        pseudo_htm = fin.read()
    txt = merge_html_p(pseudo_htm)
    if args.output_file is not None:
        # try output_encoding
        txt.encode(args.output_encoding)

    with may_open_stdout(args.output_file, 'xt'
            , encoding=args.output_encoding) as fout:
        fout.write(txt)
    #parser.exit(0)
    return 0

    ##########
    with may_open_stdout(args.output_file, 'xt'
            , encoding=args.output_encoding) as fout\
        , may_open_stdin(args.input_file, 'rt'
            , encoding=args.input_encoding) as fin:
        extract_fb_opf_items(fout, fin)

    #parser.exit(0)
    return 0
def main(args=None, /):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='factor_uint'
        , epilog=''
        , formatter_class=argparse.RawDescriptionHelpFormatter
        )
    parser.add_argument('--uint_le', type=int, required=True
                        , help='factor uint in [1..uint_le] only which coprime to [2..coprime_le]')
    parser.add_argument('--coprime_le', type=int, required=True
            , help='factor uint in [1..uint_le] only which coprime to [2..coprime_le]')
    parser.add_argument('-o', '--output', type=str, default=None
                        , help='output file path')
    parser.add_argument('-e', '--encoding', type=str
                        , default='utf8'
                        , help='output file encoding')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')

    args = parser.parse_args(args)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        show_factor_uint(fout=fout, uint_le=args.uint_le, coprime_le=args.coprime_le)
Esempio n. 3
0
def main(args=None):
    import argparse
    from seed.io.may_open import may_open_stdout # may_open_stdin
    from pprint import pprint

    parser = argparse.ArgumentParser(
        description=f'make max_cliques from simple_decomposed_chars3980'
        , epilog=''
        , formatter_class=argparse.RawDescriptionHelpFormatter
        )
    parser.add_argument('-o', '--output', type=str, default=None
                        , help='output file path')
    parser.add_argument('-oe', '--output_encoding', type=str
                        , default='utf8'
                        , help='output file encoding')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')

    args = parser.parse_args(args)
    output_encoding = args.output_encoding
    omode = 'wt' if args.force else 'xt'
    var_name = 'doublesize2max_clique_srcs_dsts_pairs'
    doublesize2max_clique_srcs_dsts_pairs = globals()[var_name]

    may_ofname = args.output
    if may_ofname is None:
        may_ofname = this_folder / f'{var_name}.py'
    with may_open_stdout(may_ofname, omode, encoding=output_encoding) as fout:
        print(f'#{var_name} generated by {this_file_name}', file=fout);
        print(f'{var_name} = \\', file=fout);
        pprint(doublesize2max_clique_srcs_dsts_pairs, stream=fout)
Esempio n. 4
0
    def on_subcmd__cmp_branch_dir(sf, subcmd_name, parsed_args):
        #see:[location4setting4the_two_kwargs]
        #always_tribool_as_is_or_not_same_file
        #bug:PP = _pairs__str2rpath
        #   see:str2rpath_encoding_pair
        #
        PP = tuple

        kwargs4MkIsSameFile = dict(
            always_tribool_as_is_or_not_same_file=ast.literal_eval(parsed_args.always_tribool_as_is_or_not_same_file)
            , size_hash0_eq_as_same_file=parsed_args.size_hash0_eq_as_same_file
            , mtime_eq_as_same_file=parsed_args.mtime_eq_as_same_file
            , imay_max_size_threshold4cmp_content=parsed_args.imay_max_size_threshold4cmp_content
            ###########
            , size_eq_as_same_file=False
            , hash_eq_as_same_file=False
            , mtime_ne_as_not_same_file=False
            , _block_size=BLOCK_SIZE
            )
        kwargs4dir_cmp__relative = dict(
            ignore_basename=None
            , max_depth=None
            )
        (lhs_branch_idx4old, result_of_dir_cmp__relative) = type(sf)._main4subcmds.on_dir_cmp__relative(lhs_repository_extra_cache_root_dir_path=parsed_args.lhs_repository_extra_cache_root_dir_path, lhs_repository_root_dir_path=parsed_args.lhs_repository_root_dir_path, lhs_branch_name=parsed_args.lhs_branch_name, rhs_real_fsys_root_dir_path=parsed_args.rhs_real_fsys_root_dir_path, rhs_ignorefile_relative_path_encoding_pairs=PP(parsed_args.rhs_ignorefile_relative_path_encoding_pairs), kwargs4MkIsSameFile=kwargs4MkIsSameFile, kwargs4dir_cmp__relative=kwargs4dir_cmp__relative)

        parsed_args.output
        from seed.io.may_open import may_open_stdin, may_open_stdout
        may_ofname = parsed_args.output
        encoding = parsed_args.encoding
        omode = 'wt' if parsed_args.force else 'xt'
        with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
            #see:[location4fmt_of_file4result_of_dir_cmp__relative__extended]
            write__file4result_of_dir_cmp__relative__extended(fout, parsed_args.lhs_branch_name, lhs_branch_idx4old, PP(parsed_args.rhs_ignorefile_relative_path_encoding_pairs), result_of_dir_cmp__relative)
        return
def main(args=None):
    import argparse
    from seed.io.may_open import may_open_stdout  # may_open_stdin
    from pprint import pprint
    from pathlib import PurePath as Path
    this_file = Path(__file__)
    this_folder = this_file.parent
    this_file_name = this_file.name

    parser = argparse.ArgumentParser(
        description=f'read “{Global.ifname}”',
        epilog='',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file path')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-ie',
                        '--input_encoding',
                        type=str,
                        default=Global.iencoding,
                        help='input file encoding')
    parser.add_argument('-oe',
                        '--output_encoding',
                        type=str,
                        default=Global.oencoding,
                        help='output file encoding')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    args = parser.parse_args(args)
    input_encoding = args.input_encoding
    output_encoding = args.output_encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    if may_ifname is None:
        ifname = this_folder / Global.ifname
    else:
        ifname = may_ifname
    with open(ifname, 'rt', encoding=input_encoding) as fin:
        chars_3980 = read_中华字经(fin)
    assert len(chars_3980) == Global.size

    may_ofname = args.output
    if may_ofname is None:
        may_ofname = this_folder / Global.ofname
    with may_open_stdout(may_ofname, omode, encoding=output_encoding) as fout:
        print(f'#{Global.var_name} generated by {this_file_name}', file=fout)
        print(f'{Global.var_name} = \\', file=fout)
        pprint(chars_3980, stream=fout)
Esempio n. 6
0
def main(argv=None):
    import argparse
    from seed.io.may_open import may_open_stdout

    parser = argparse.ArgumentParser(
        description='show the 8 trigrams'
        )
    parser.add_argument('-o', '--output', type=str, default = None
                        , help='output file path')
    parser.add_argument('-e', '--encoding', type=str
                        , default='utf8'
                        , help='input/output file encoding')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')
    parser.add_argument('-fun', '--output_fun_chars', action='store_true'
                        , default = False
                        , help='output fun_chars instead of grams')

    args = parser.parse_args(argv)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'
    data = Data.fun_chars_data if args.output_fun_chars else Data.grams_data

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        do_output(fout, data)
def main(args=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='make Windows special address link file',
        epilog=f'spec_addr example: {windows_firewall_spec_addr__str!r}',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-a',
                        '--spec_addr',
                        type=str,
                        required=True,
                        help='spec_addr; for example, see epilog')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    args = parser.parse_args(args)
    omode = 'wb' if args.force else 'xb'

    lnk_file_bytes = mk_windows_spec_lnk(args.spec_addr)
    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=None) as fout:
        fout.write(lnk_file_bytes)
Esempio n. 8
0
def main(args=None, /):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='python eval then hex then print'
        , epilog=''
        , formatter_class=argparse.RawDescriptionHelpFormatter
        )
    parser.add_argument('-i', '--input', type=str, nargs='*', default=[]
                        , help='input python expression')
    parser.add_argument('-o', '--output', type=str, default=None
                        , help='output file path')
    parser.add_argument('-e', '--encoding', type=str
                        , default='utf8'
                        , help='input/output file encoding')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')

    args = parser.parse_args(args)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    #may_ifname = args.input
    #with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:

    exprs = args.input
    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        for expr in exprs:
            eval_then_show(expr, fout=fout)
def main(argv=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout


    parser = argparse.ArgumentParser(
        description='extract first content_div of article on 360doc.com'
        )
    parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding')
    parser.add_argument('-i', '--input', type=str, default = None
                        , help='input file path')
    parser.add_argument('-o', '--output', type=str, default = None
                        , help='output file path')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')
    parser.add_argument('-url', '--url', type=str, default = None
                        , help='input webpage url')


    args = parser.parse_args(argv)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    if args.input is not None and args.url is not None:
        raise ValueError('input both file and url at same time')
    if args.url is not None:
        with open_webpage(args.url) as fin:
            content_div = extract_360doc_com(fin)
    else:
        may_ifname = args.input
        try:
            # open as text file
            with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
                content_div = extract_360doc_com(fin)
        except UnicodeError:
            assert may_ifname is not None
            ifname = may_ifname
            # open as binary file
            with open(ifname, 'rb') as fin:
                content_div = extract_360doc_com(fin)

    if 0:
        print(len(content_div))
        print(repr(content_div[5216:]))
        for i in range(len(content_div)):
            if ord(content_div[i]) > 0x7f:
                print(i)
                print(repr(content_div[i:]))
                break
        return
    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        fout.write(content_div)

    parser.exit(0)
    return 0
Esempio n. 10
0
def main(argv=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    encodingI = 'ascii'
    encodingO = 'utf8'

    parser = argparse.ArgumentParser(
        description='simple encrypt ascii text'
        , epilog=r'only " " and "\n" are allowed, other control/whitespace should not occur in input text'
        #, formatter_class=argparse.RawDescriptionHelpFormatter
        )
    #parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding')
    parser.add_argument('cmd', type=str, choices='encrypt decrypt'.split()
                        , help='encrypt/decrypt - treat input as cleartext/ciphertext')
    parser.add_argument('psw', type=str
                        , help='password: regex = [0-9a-f]*')
    parser.add_argument('-i', '--input', type=str, default = None
                        , help='input file path')
    parser.add_argument('-o', '--output', type=str, default = None
                        , help='output file path')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')



    args = parser.parse_args(argv)
    psw = args.psw
    if not all(ch in aCrypt.char2idxP for ch in psw): raise TypeError
    omode = 'wt' if args.force else 'xt'
    does_encrypt = args.cmd == 'encrypt'
    iencoding = encodingI if does_encrypt else encodingO
    oencoding = encodingO if does_encrypt else encodingI


    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=iencoding) as fin:
        input_text = ''.join(fin)

    if does_encrypt:
        message = input_text
        ciphertext = aCrypt.encrypt(psw, message)
        output_text = ciphertext
    else:
        ciphertext = input_text
        message = aCrypt.decrypt(psw, ciphertext)
        output_text = message

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=oencoding) as fout:
        fout.write(output_text)

    parser.exit(0)
    return 0
def main(args=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout
    choices = [
        ls笔顺码字符范围__closed_rngs, cmp3, prepare_for_汉字粗拆分, prepare_for_2汉字粗拆分2
    ]
    choices = [f.__name__ for f in choices]

    parser = argparse.ArgumentParser(
        description="汉字相关字符范围",
        epilog="",
        formatter_class=argparse.RawDescriptionHelpFormatter)
    r"""
    parser.add_argument('-i', '--input', type=str, default=None
                        , help='input file path')
    #"""
    parser.add_argument(
        '-do',
        '--do',
        type=str,
        required=True  #, default=None
        ,
        choices=choices,
        help='input file path')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    args = parser.parse_args(args)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'
    do = args.do
    f = globals()[do]
    r"""
    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
    #"""

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        f(fout=fout)
Esempio n. 12
0
    def on_subcmd__get_branch_size(sf, subcmd_name, parsed_args):
        lhs_branch_sz = type(sf)._main4subcmds.on_get_branch_size(lhs_repository_extra_cache_root_dir_path=parsed_args.lhs_repository_extra_cache_root_dir_path, lhs_repository_root_dir_path=parsed_args.lhs_repository_root_dir_path, lhs_branch_name=parsed_args.lhs_branch_name)

        #print(lhs_branch_sz)
        parsed_args.output
        from seed.io.may_open import may_open_stdin, may_open_stdout
        may_ofname = parsed_args.output
        encoding = parsed_args.encoding
        omode = 'wt' if parsed_args.force else 'xt'
        with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
            print(lhs_branch_sz, file=fout)
        return
Esempio n. 13
0
def main(argv=None):
    import argparse, sys

    parser = argparse.ArgumentParser(
        description='translate characters from lower to upper case')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file name')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file name')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='encoding of input/output file')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    parser.add_argument(
        '-s',
        '--sep',
        type=str,
        default=None,
        help='seperator string of output file which has 2 columes')
    parser.add_argument('-u',
                        '--upper',
                        action='store_true',
                        default=False,
                        help='lower2upper instead of upper2lower')

    args = parser.parse_args(argv)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    may_ofname = args.output
    with may_open_stdin(may_ifname, 'rt', encoding=encoding, newline='')\
        as fin\
        , may_open_stdout(may_ofname, omode, encoding=encoding, newline='')\
        as fout:
        lower_file(fout, fin, upper=args.upper, sep=args.sep)

    parser.exit(0)
    return 0
Esempio n. 14
0
def main(argv=None):
    import argparse
    import sys

    parser = argparse.ArgumentParser(
                description='convert torrent file to dict'
                , epilog=epilog_str
                , formatter_class=argparse.RawDescriptionHelpFormatter
                )
    parser.add_argument('input', type=str
                        , help='path to the input torrent file')
    parser.add_argument('-ie', '--input_encoding', type=str
                        , default='utf8'
                        , help='encoding to strings in input file')
    parser.add_argument('-pp', '--pretty_print', action='store_true'
                        , default=False
                        , help='pretty print output')

    parser.add_argument('-o', '--output', type=str
                        , default=None
                        , help='output file path')
    parser.add_argument('-oe', '--output_encoding', type=str
                        , default='utf8'
                        , help='output file encoding')
    parser.add_argument('--mode', choices='exclusive append overwrite'.split()
                        , default='exclusive'
                        , help='mode for open output file')

    args = parser.parse_args()
    iencoding = args.input_encoding
    result = parse_torrent(args.input, encoding=iencoding)
    #print_err('piece length =', result['info']['piece length'])
    #print_err('len(pieces) =', len(result['info']['pieces']))


    if args.pretty_print:
        # pprint
        def xprint(obj, *, file):
            pprint(obj, stream=file)
    else:
        # print
        def xprint(obj, *, file):
            print(obj, file=file)

    oencoding = args.output_encoding
    mode = {'exclusive':'x', 'append':'a', 'overwrite':'w'}[args.mode]
    with may_open_stdout(args.output, mode+'t', encoding=oencoding) as fout:
        xprint(result, file=fout)

    parser.exit(0)
    return 0
Esempio n. 15
0
def main(args=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='invert bytes of file',
        epilog='',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file path')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')
    parser.add_argument(
        '-a',
        '--auto_name_output',
        action='store_true',
        default=False,
        help='add/remove ".inv" to input fname for output fname')

    args = parser.parse_args(args)
    omode = 'wb' if args.force else 'xb'

    may_ifname = args.input
    may_ofname = args.output
    if (args.auto_name_output and may_ofname is None
            and may_ifname is not None):
        ext = '.inv'
        if may_ifname.endswith(ext):
            #bug:may_ofname = may_ifname[-len(ext):]
            may_ofname = may_ifname[:-len(ext)]
        else:
            may_ofname = may_ifname + ext

    with may_open_stdin(may_ifname, 'rb', encoding=None) as fin:
        with may_open_stdout(may_ofname, omode, encoding=None) as fout:
            while 1:
                bs = fin.read(BLOCK_SIZE)
                if not bs: break
                bs = bs.translate(TABLE)
                fout.write(bs)
 def main(sf):
     bs4_ops = sf._ops
     oprint = sf.oprint
     #with open(sf.may_ofname, sf.omode, encoding=sf.oencoding) as fout:
     with may_open_stdout(sf.may_ofname, sf.omode, encoding=sf.oencoding) as fout:
         sf.fout = fout
         for idx, (usrdata, path) in enumerate(sf.iter_usrdata_htmlpath_pairs()):
             html_fname = Path(path)
             if sf.pre_break(idx, usrdata, html_fname): break
             if sf.skip(idx, usrdata, html_fname): continue
             html_doc = html_fname.read_text(encoding=sf.iencoding)
             bs4_obj = bs4_ops.建(html_doc, markup_lang=MarkupLang.HTML)
             sf.output(bs4_ops, bs4_obj, idx, usrdata, html_fname)
             if sf.post_break(idx, usrdata, html_fname): break
Esempio n. 17
0
def main(argv=None):
    import argparse
    import sys

    parser = argparse.ArgumentParser(
        description='sort and unique chars in text.')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        help='path to the input novel text file')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        help='path to the output file')
    parser.add_argument('-u',
                        '--repr_as_unicode',
                        action='store_true',
                        default=False,
                        help='output char in format \\UXXXXXXXX')

    args = parser.parse_args(argv)

    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        #all_chars = file2unique_sorted_char_string(fin)
        all_chars = file2all_char_set(fin)
    #txt = chars2sorted_char_string(all_chars)

    s = chars2sorted_char_string(all_chars)
    if args.repr_as_unicode:
        s = repr_string_as_unicode(s)

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        print(s, file=fout)
Esempio n. 18
0
def main(argv=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='parse pythoncoded_rules_in_str',
        epilog=example_doc,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file path')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    args = parser.parse_args(argv)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        pythoncoded_rules_in_str = fin.read()

    python_code_str_ex = parse_ex(pythoncoded_rules_in_str,
                                  name2count=None,
                                  the_input_parameter_name='p',
                                  with_class_keyword=False)
    head_str, tail_str, name2count = python_code_str_ex

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        fout.write(head_str)
        fout.write(tail_str)
def main(argv=None):
    import argparse, sys
    from seed.io.may_open import may_open_stdout, may_open_stdin

    class Globals:
        output_file_encoding = 'utf8'
        input_file_encoding = 'utf8'

    ###################
    parser = argparse.ArgumentParser(description='extract epub/OPS/fb.opf',
                                     epilog='''
    extract epub/OPS/fb.opf::manifest.item.href
        where item["media-type"]=="application/xhtml+xml"
''')
    add_argument = parser.add_argument

    add_argument('-i',
                 '--input_file',
                 type=str,
                 default=None,
                 help='the input file')
    add_argument('-ie',
                 '--input_encoding',
                 type=str,
                 default=Globals.input_file_encoding,
                 help='the encoding of input file')
    add_argument('-o',
                 '--output_file',
                 type=str,
                 default=None,
                 help='the output file')
    add_argument('-oe',
                 '--output_encoding',
                 type=str,
                 default=Globals.output_file_encoding,
                 help='the encoding of output file')

    args = parser.parse_args(argv)
    with may_open_stdout(args.output_file, 'xt'
            , encoding=args.output_encoding) as fout\
        , may_open_stdin(args.input_file, 'rt'
            , encoding=args.input_encoding) as fin:
        extract_fb_opf_items(fout, fin)

    #parser.exit(0)
    return 0
def main(args=None, /):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='简繁对称字-middle-parse',
        epilog='',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file path')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    args = parser.parse_args(args)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        s = parser4pseudo_symmetric_hz_from_completed_chars_3980(fin)

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        print(
            f'###parser4pseudo_symmetric_hz_from_completed_chars_3980:total={len(s)}###',
            file=fout)
        print(s, file=fout)
def main(args=None):
    import argparse
    from seed.io.may_open import may_open_stdout  # may_open_stdin
    from pprint import pprint
    from pathlib import PurePath as Path
    this_file = Path(__file__)
    this_folder = this_file.parent
    this_file_name = this_file.name

    parser = argparse.ArgumentParser(
        description=
        f'make sm2ym_num_graph__for_good_hanzis from sm2ym2num2good_hanzis',
        epilog='',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-oe',
                        '--output_encoding',
                        type=str,
                        default='utf8',
                        help='output file encoding')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    args = parser.parse_args(args)
    output_encoding = args.output_encoding
    omode = 'wt' if args.force else 'xt'
    var_name = 'sm2ym_num_graph__for_good_hanzis'
    sm2ym_num_graph = globals()[var_name]

    may_ofname = args.output
    if may_ofname is None:
        may_ofname = this_folder / f'{var_name}.py'
    with may_open_stdout(may_ofname, omode, encoding=output_encoding) as fout:
        print(f'#{var_name} generated by {this_file_name}', file=fout)
        print(f'{var_name} = \\', file=fout)
        pprint(sm2ym_num_graph, stream=fout)
def main(argv=None):
    import argparse, sys

    parser = argparse.ArgumentParser(
        description='merge lines into paragraph for novel text')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file name')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file name')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='encoding of input/output file')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    args = parser.parse_args(argv)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        case_line_pairs = novel_merge_lines_into_paragraph__pattern(
            GlobalArgsExample.not_merge_pattern,
            GlobalArgsExample.transparent_pattern,
            GlobalArgsExample.case_pattern_pairs, iter(fin))
        case_line_pairs = list(case_line_pairs)

    #print(case_line_pairs)
    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        for line in merge_case_line_pairs(case_line_pairs):
            print(line, file=fout)
Esempio n. 23
0
def main(argv=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout


    parser = argparse.ArgumentParser(
        description='simple encape text to html'
        , epilog=r'''only "<>&" and "\"\'" if quote=True will be escaped.'''
        #, formatter_class=argparse.RawDescriptionHelpFormatter
        )
    parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding')
    parser.add_argument('-i', '--input', type=str, default = None
                        , help='input file path')
    parser.add_argument('-o', '--output', type=str, default = None
                        , help='output file path')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')
    parser.add_argument('-q', '--quote', action='store_true'
                        , default = False
                        , help=r'''escape "\"\'" too; otherwise only "<>&" be escaped''')



    args = parser.parse_args(argv)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        input_text = ''.join(fin)

    output_text = escape(input_text, args.quote)

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        fout.write(output_text)

    parser.exit(0)
    return 0
Esempio n. 24
0
def main(argv=None):
    '''
'''
    import argparse, sys

    parser = argparse.ArgumentParser(description='list paths')
    add_argument = parser.add_argument

    add_argument('glob_patterns',
                 type=str,
                 nargs='+',
                 metavar='GLOB_PATTERN',
                 help='glob_patterns for list paths')
    add_argument('-r',
                 '--recursive',
                 action='store_true',
                 default=False,
                 help='recursive search')
    add_argument('-o',
                 '--output_file',
                 type=str,
                 default=None,
                 help='the output file')
    add_argument('-oe',
                 '--output_encoding',
                 type=str,
                 default=Globals.output_file_encoding,
                 help='the encoding of output file')

    args = parser.parse_args(argv)
    with may_open_stdout(args.output_file, 'xt',
                         encoding=args.output_encoding) as fout:
        for glob_pattern in args.glob_patterns:
            for path in iglob(glob_pattern, recursive=args.recursive):
                print(path, file=fout)

    #parser.exit()
    return
Esempio n. 25
0
def main(args=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='format html/xml by well indent'
        , epilog=''
        , formatter_class=argparse.RawDescriptionHelpFormatter
        )
    parser.add_argument('-i', '--input', type=str, default=None
                        , help='input file path')
    parser.add_argument('-o', '--output', type=str, default=None
                        , help='output file path')
    parser.add_argument('-e', '--encoding', type=str
                        , default='utf8'
                        , help='input/output file encoding')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')
    parser.add_argument('-iw', '--indent_width', type=int
                        , default = 1
                        , help='the number of indent spaces for children')

    args = parser.parse_args(args)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        soup = BeautifulSoup(fin, 'lxml')

    txt = soup.prettify()
    txt = replace_indent_spaces(args.indent_width, txt)

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        fout.write(txt)
def main(args=None, /):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='繁简字信息相关',
        epilog='',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('case',
                        type=str,
                        choices=sorted(main_routines.keys()),
                        help='choose one main_routine')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    args = parser.parse_args(args)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    main_routine = main_routines[args.case]
    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        main_routine(fout)
Esempio n. 27
0
    parser.add_argument('-o', '--output', type=str, default=None
                        , help='output file path')
    parser.add_argument('-oe', '--output_encoding', type=str
                        , default='utf8'
                        , help='output file encoding')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')

    args = parser.parse_args(args)
    oencoding = args.output_encoding
    omode = 'wt' if args.force else 'xt'

    nm = args.name4target
    case = args.post_process
    post_process = case2post_process[case]
    xxx = case2name2xxx[case][nm]


    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=oencoding) as fout:
        post_process(nm, xxx, fout=fout)

if __name__ == "__main__":
    main()





Esempio n. 28
0
def main(argv=None):
    '''
'''
    import argparse, sys

    parser = argparse.ArgumentParser(description='extract info from files',
                                     allow_abbrev=False,
                                     epilog='''
    NOTE:
        extract_data_cmd -oe gbk -o ./1+2.txt -ie ascii -i ./paths.txt nn_ns.filedir._extractor_example.main --encoding=utf8
          where:
            -ie ascii
                the encoding of input file which contains paths to files from which data were extracted.
                arg of this program
            --encodinga utf8
                the encoding of all files from which data were extracted.
                arg of nn_ns.filedir._extractor_example

        glob_cmd ./*.html | line_filter_cmd chapter(\d+)\.html --group_names 1 --INT_GROUP | sort_lines_cmd --line_type=KEY_LINE | extract_data_cmd -oe gbk -o ./1+2.txt nn_ns.filedir._extractor_example.main --encoding=utf8
''')
    add_argument = parser.add_argument

    add_argument('extractor',
                 type=str,
                 help='fullname of a python function: e.g. math.log2')

    add_argument('-i',
                 '--input_file',
                 type=str,
                 default=None,
                 help='the input file which contains paths')
    add_argument('-ie',
                 '--input_encoding',
                 type=str,
                 default=Globals.input_file_encoding,
                 help='the encoding of input file')
    add_argument('-o',
                 '--output_file',
                 type=str,
                 default=None,
                 help='the output file')
    add_argument('-oe',
                 '--output_encoding',
                 type=str,
                 default=Globals.output_file_encoding,
                 help='the encoding of output file')

    args, unknown_args = parser.parse_known_args(argv)
    _args, _kwargs = parse_unknown_args(unknown_args)
    _kwargs = dict(_kwargs)

    extractor_qname = args.extractor
    extractor = import_object(extractor_qname)
    # extractor :: (fout, input_fname, **kwargs) -> None


    with may_open_stdout(args.output_file, 'xt'
            , encoding=args.output_encoding) as fout\
        , may_open_stdin(args.input_file, 'rt'
            , encoding=args.input_encoding) as fin:
        for line in fin:
            if line[-1:] == '\n':
                line = line[:-1]
            path = line
            extractor(fout, path, *_args, **_kwargs)

    #parser.exit()
    return
def main(argv=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='extract text on ctext.org or ctext.cn')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file path')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')
    parser.add_argument('--append',
                        action='store_true',
                        default=False,
                        help='open mode for output file')
    parser.add_argument('-V',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help='show path/url that opened')
    parser.add_argument('-url',
                        '--url',
                        type=str,
                        default=None,
                        help='input webpage url')
    parser.add_argument(
        '-rng',
        '--range',
        type=int,
        default=None,
        nargs=2,
        help='input webpage url range (first, last); {url}/{i} for i in range')
    parser.add_argument(
        '-ifmt',
        '--index_format',
        type=str,
        default='{}',
        help='index python format for webpage url; base_url/{fmt}')
    parser.add_argument('--timeout',
                        type=int,
                        default=10,
                        help='timeout for urllib')
    parser.add_argument('--time_sep',
                        type=int,
                        default=1,
                        help='time space between two downloads')
    parser.add_argument('--without_book_title',
                        action='store_true',
                        default=False,
                        help='not show book_title')
    parser.add_argument(
        '--book_title_at',
        type=str,
        default=None,
        help='extended url for book_title; {base_url}{book_title_at}')

    args = parser.parse_args(argv)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'
    if args.append:
        omode = 'at'

    if args.input is not None and args.url is not None:
        raise ValueError('input both file and url at same time')
    if args.url is not None:
        if args.range is None:
            title, txt = extract_ctext_org__url(args.url,
                                                verbose=args.verbose,
                                                timeout=args.timeout)
            may_book_title = None
            begin = 0
            result = (may_book_title, begin, [(title, txt)])
        else:
            first, last = args.range
            begin, end = first, last + 1
            rng = range(begin, end)

            base_url = args.url
            index_format = args.index_format
            it = iter_extract_ctext_org__url_rng(base_url,
                                                 rng,
                                                 index_format,
                                                 verbose=args.verbose,
                                                 timeout=args.timeout,
                                                 time_sep=args.time_sep)
            if args.without_book_title:
                may_book_title = None
            else:
                if args.book_title_at is None:
                    book_title_url = base_url
                else:
                    book_title_url = f'{base_url}{args.book_title_at}'

                book_title, _ = extract_ctext_org__url(book_title_url,
                                                       verbose=args.verbose,
                                                       timeout=args.timeout)
                may_book_title = book_title

            begin = begin
            #result = (may_book_title, begin, list(it))
            result = (may_book_title, begin, iter(it))
    else:
        may_ifname = args.input
        try:
            # open as text file
            with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
                title, txt = extract_ctext_org(fin,
                                               verbose=args.verbose,
                                               timeout=args.timeout)
        except UnicodeError:
            assert may_ifname is not None
            ifname = may_ifname
            # open as binary file
            with open(ifname, 'rb') as fin:
                title, txt = extract_ctext_org(fin,
                                               verbose=args.verbose,
                                               timeout=args.timeout)
        may_book_title = None
        begin = 0
        result = (may_book_title, begin, [(title, txt)])

    #result :: (may_book_title, begin, [(title, txt)])
    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:

        def fprint(*args, **kwargs):
            print(*args, file=fout, **kwargs)

        may_book_title, begin, title_txt_pairs = result
        if not args.without_book_title:
            fprint(f'[book]:{may_book_title}')
        for i, (title, txt) in enumerate(title_txt_pairs, begin):
            fprint(f'[chapter{i}]:{title}')
            fprint(txt)

    parser.exit(0)
    return 0
Esempio n. 30
0
def main(args=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout
    #import skimage.io as image_io
    import imageio as image_io
    read_image = image_io.imread
    del image_io

    parser = argparse.ArgumentParser(
        description='convert grey_image to binary_image',
        epilog='',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        required=True,
                        help='input file path')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')
    parser.add_argument('-F',
                        '--flip',
                        action='store_true',
                        default=False,
                        help='flip final binary_image')
    parser.add_argument('-G',
                        '--as_grey',
                        action='store_true',
                        default=False,
                        help='convert color_image to grey_image first')
    parser.add_argument(
        '-T',
        '--threshold_coeff',
        type=float,
        required=True,
        nargs=3,
        help='threshold_coeff A B C; threshold = A*min+B*mean+C+max')
    '''
    parser.add_argument('-out_txt', '--output_as_text', action='store_true'
                        , default = False
                        , help='output binary_image as text')
    '''

    args = parser.parse_args(args)
    encoding = None
    omode = 'wb' if args.force else 'xb'

    ifname = args.input
    image = read_image(ifname, as_gray=args.as_grey)
    if not is_grey_image(image): raise Exception('not grey_image')
    grey_image = image

    A, B, C = args.threshold_coeff
    threshold = A * grey_image.min() + B * grey_image.mean(
    ) + C * grey_image.max()
    binary_image = grey_image2binary_image(grey_image,
                                           threshold,
                                           negative=args.flip)
    text_grey_image = binary_image2text_grey_image(binary_image)

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        for byte_row in grey_image2iter_byte_rows(text_grey_image):
            fout.write(byte_row)
            fout.write(b'\n')