Python may_open_stdin Examples, seed.io.may_open.may_open_stdin Python Examples

Example #1

0

Show file

File: merge_html_p.py Project: edt-yxz-zzd/python3_src

def main(argv=None):
    import argparse, sys
    from seed.io.may_open import may_open_stdout, may_open_stdin
    class Globals:
        output_file_encoding = 'utf8'
        input_file_encoding = 'utf8'



    ###################
    parser = argparse.ArgumentParser(description='merge html <p>'
        , epilog='''
    <p></p> is the true seperator
    "<p>abc</p> <p>def</p>" ==>> "<p>abcdef</p>"

''')
    add_argument = parser.add_argument


    add_argument('-i', '--input_file', type=str
        , default=None
        , help='the input file')
    add_argument('-ie', '--input_encoding', type=str
        , default = Globals.input_file_encoding
        , help='the encoding of input file')
    add_argument('-o', '--output_file', type=str
        , default=None
        , help='the output file')
    add_argument('-oe', '--output_encoding', type=str
        , default = Globals.output_file_encoding
        , help='the encoding of output file')

    args = parser.parse_args(argv)
    with may_open_stdin(args.input_file, 'rt'
            , encoding=args.input_encoding) as fin:
        pseudo_htm = fin.read()
    txt = merge_html_p(pseudo_htm)
    if args.output_file is not None:
        # try output_encoding
        txt.encode(args.output_encoding)

    with may_open_stdout(args.output_file, 'xt'
            , encoding=args.output_encoding) as fout:
        fout.write(txt)
    #parser.exit(0)
    return 0

    ##########
    with may_open_stdout(args.output_file, 'xt'
            , encoding=args.output_encoding) as fout\
        , may_open_stdin(args.input_file, 'rt'
            , encoding=args.input_encoding) as fin:
        extract_fb_opf_items(fout, fin)

    #parser.exit(0)
    return 0

Example #2

0

Show file

File: mk_py_template.py Project: edt-yxz-zzd/python3_src

def main(args=None, /):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='make new python module using template4module in python3_src/useful.txt'
        , epilog=r'''
template from:
    view ../../python3_src/useful.txt
        between:
            #[[[[[template4module:begin
            #]]]]]template4module:end
        replace "xxx.yyy" to "pkg.module"

#'''
        , formatter_class=argparse.RawDescriptionHelpFormatter
        )
    parser.add_argument('-i', '--input', type=str, default=Globals.path4useful_txt
                        , help='input file path for template4module')
    parser.add_argument('-o', '--output', type=str, default=None, required=True
                        , help='output file path for target module')
    parser.add_argument('-e', '--encoding', type=str
                        , default='utf8'
                        , help='input/output file encoding')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')

    args = parser.parse_args(args)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        txt = fin.read()
    begin = '#[[[[[template4module:begin\n'
    end = '#]]]]]template4module:end\n'
    substr4replace = r'xxx.yyy'
    i = txt.index(begin) + len(begin)
    j = txt.index(end, i)
    template4module = txt[i:j]

    ofname = args.output
    path4target_module = Path(ofname)
    if not '.py' == path4target_module.suffix: raise ValueError
    path4target_module = path4target_module.resolve()
    rpath = path4target_module.relative_to(Globals.this_pkg_root)
    s = rpath.as_posix()
    assert s.endswith('.py')
    s = s[:-3]
    if '.' in s: raise ValueError(s)
    module_qname = s.replace('/', '.')
    attrs = module_qname.split('.')
    if not attrs: raise ValueError
    if not all(attrs): raise ValueError
    if not all(attr.isidentifier() for attr in attrs): raise ValueError
    txt4output = template4module.replace(substr4replace, module_qname)
    with open(path4target_module, omode, encoding=encoding) as fout:
        fout.write(txt4output)

Example #3

0

Show file

File: extract_360doc_com.py Project: edt-yxz-zzd/python3_src

def main(argv=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout


    parser = argparse.ArgumentParser(
        description='extract first content_div of article on 360doc.com'
        )
    parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding')
    parser.add_argument('-i', '--input', type=str, default = None
                        , help='input file path')
    parser.add_argument('-o', '--output', type=str, default = None
                        , help='output file path')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')
    parser.add_argument('-url', '--url', type=str, default = None
                        , help='input webpage url')


    args = parser.parse_args(argv)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    if args.input is not None and args.url is not None:
        raise ValueError('input both file and url at same time')
    if args.url is not None:
        with open_webpage(args.url) as fin:
            content_div = extract_360doc_com(fin)
    else:
        may_ifname = args.input
        try:
            # open as text file
            with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
                content_div = extract_360doc_com(fin)
        except UnicodeError:
            assert may_ifname is not None
            ifname = may_ifname
            # open as binary file
            with open(ifname, 'rb') as fin:
                content_div = extract_360doc_com(fin)

    if 0:
        print(len(content_div))
        print(repr(content_div[5216:]))
        for i in range(len(content_div)):
            if ord(content_div[i]) > 0x7f:
                print(i)
                print(repr(content_div[i:]))
                break
        return
    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        fout.write(content_div)

    parser.exit(0)
    return 0

Example #4

0

Show file

 def fs(may_ifnames, fout):
     for may_ifname in may_ifnames:
         with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
             for line in fin:
                 hexdigits_ls = line.split()
                 try:
                     f(hexdigits_ls, fout)
                 except Exception:
                     print(line, file=fout, end='')
                     continue

Example #5

0

Show file

def main(argv=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    encodingI = 'ascii'
    encodingO = 'utf8'

    parser = argparse.ArgumentParser(
        description='simple encrypt ascii text'
        , epilog=r'only " " and "\n" are allowed, other control/whitespace should not occur in input text'
        #, formatter_class=argparse.RawDescriptionHelpFormatter
        )
    #parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding')
    parser.add_argument('cmd', type=str, choices='encrypt decrypt'.split()
                        , help='encrypt/decrypt - treat input as cleartext/ciphertext')
    parser.add_argument('psw', type=str
                        , help='password: regex = [0-9a-f]*')
    parser.add_argument('-i', '--input', type=str, default = None
                        , help='input file path')
    parser.add_argument('-o', '--output', type=str, default = None
                        , help='output file path')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')



    args = parser.parse_args(argv)
    psw = args.psw
    if not all(ch in aCrypt.char2idxP for ch in psw): raise TypeError
    omode = 'wt' if args.force else 'xt'
    does_encrypt = args.cmd == 'encrypt'
    iencoding = encodingI if does_encrypt else encodingO
    oencoding = encodingO if does_encrypt else encodingI


    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=iencoding) as fin:
        input_text = ''.join(fin)

    if does_encrypt:
        message = input_text
        ciphertext = aCrypt.encrypt(psw, message)
        output_text = ciphertext
    else:
        ciphertext = input_text
        message = aCrypt.decrypt(psw, ciphertext)
        output_text = message

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=oencoding) as fout:
        fout.write(output_text)

    parser.exit(0)
    return 0

Example #6

0

Show file

def main(argv=None):
    import argparse, sys

    parser = argparse.ArgumentParser(
        description='translate characters from lower to upper case')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file name')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file name')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='encoding of input/output file')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    parser.add_argument(
        '-s',
        '--sep',
        type=str,
        default=None,
        help='seperator string of output file which has 2 columes')
    parser.add_argument('-u',
                        '--upper',
                        action='store_true',
                        default=False,
                        help='lower2upper instead of upper2lower')

    args = parser.parse_args(argv)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    may_ofname = args.output
    with may_open_stdin(may_ifname, 'rt', encoding=encoding, newline='')\
        as fin\
        , may_open_stdout(may_ofname, omode, encoding=encoding, newline='')\
        as fout:
        lower_file(fout, fin, upper=args.upper, sep=args.sep)

    parser.exit(0)
    return 0

Example #7

0

Show file

File: invert_bytes.py Project: edt-yxz-zzd/python3_src

def main(args=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='invert bytes of file',
        epilog='',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file path')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')
    parser.add_argument(
        '-a',
        '--auto_name_output',
        action='store_true',
        default=False,
        help='add/remove ".inv" to input fname for output fname')

    args = parser.parse_args(args)
    omode = 'wb' if args.force else 'xb'

    may_ifname = args.input
    may_ofname = args.output
    if (args.auto_name_output and may_ofname is None
            and may_ifname is not None):
        ext = '.inv'
        if may_ifname.endswith(ext):
            #bug:may_ofname = may_ifname[-len(ext):]
            may_ofname = may_ifname[:-len(ext)]
        else:
            may_ofname = may_ifname + ext

    with may_open_stdin(may_ifname, 'rb', encoding=None) as fin:
        with may_open_stdout(may_ofname, omode, encoding=None) as fout:
            while 1:
                bs = fin.read(BLOCK_SIZE)
                if not bs: break
                bs = bs.translate(TABLE)
                fout.write(bs)

Example #8

0

Show file

File: main.py Project: edt-yxz-zzd/python3_src

    def on_subcmd__update_branch(sf, subcmd_name, parsed_args):
        parsed_args.input
        from seed.io.may_open import may_open_stdin, may_open_stdout
        may_ifname = parsed_args.input
        encoding = parsed_args.encoding
        with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
            #see:[location4fmt_of_file4result_of_dir_cmp__relative__extended]
            (lhs_branch_name, lhs_branch_idx4old, rhs_ignorefile_relative_path_encoding_pairs, result_of_dir_cmp__relative) = read__file4result_of_dir_cmp__relative__extended(fin)
        #options_update_branch
        if lhs_branch_name != parsed_args.lhs_branch_name: raise ValueError
        #if lhs_branch_idx4old != parsed_args.lhs_branch_idx: raise ValueError

        type(sf)._main4subcmds.on_update_lhs_branch(lhs_repository_extra_cache_root_dir_path=parsed_args.lhs_repository_extra_cache_root_dir_path, lhs_repository_root_dir_path=parsed_args.lhs_repository_root_dir_path, lhs_branch_name=lhs_branch_name, lhs_branch_idx4old=lhs_branch_idx4old, rhs_real_fsys_root_dir_path=parsed_args.rhs_real_fsys_root_dir_path, rhs_ignorefile_relative_path_encoding_pairs=rhs_ignorefile_relative_path_encoding_pairs, result_of_dir_cmp__relative=result_of_dir_cmp__relative, lcp_threshold=parsed_args.lcp_threshold)
        return

Example #9

0

Show file

def main(argv=None):
    import argparse
    import sys

    parser = argparse.ArgumentParser(
        description='sort and unique chars in text.')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        help='path to the input novel text file')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        help='path to the output file')
    parser.add_argument('-u',
                        '--repr_as_unicode',
                        action='store_true',
                        default=False,
                        help='output char in format \\UXXXXXXXX')

    args = parser.parse_args(argv)

    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        #all_chars = file2unique_sorted_char_string(fin)
        all_chars = file2all_char_set(fin)
    #txt = chars2sorted_char_string(all_chars)

    s = chars2sorted_char_string(all_chars)
    if args.repr_as_unicode:
        s = repr_string_as_unicode(s)

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        print(s, file=fout)

Example #10

0

Show file

File: __main__.py Project: edt-yxz-zzd/python3_src

def main(argv=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='parse pythoncoded_rules_in_str',
        epilog=example_doc,
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file path')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    args = parser.parse_args(argv)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        pythoncoded_rules_in_str = fin.read()

    python_code_str_ex = parse_ex(pythoncoded_rules_in_str,
                                  name2count=None,
                                  the_input_parameter_name='p',
                                  with_class_keyword=False)
    head_str, tail_str, name2count = python_code_str_ex

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        fout.write(head_str)
        fout.write(tail_str)

Example #11

0

Show file

File: extract_fb_opf_items.py Project: edt-yxz-zzd/python3_src

def main(argv=None):
    import argparse, sys
    from seed.io.may_open import may_open_stdout, may_open_stdin

    class Globals:
        output_file_encoding = 'utf8'
        input_file_encoding = 'utf8'

    ###################
    parser = argparse.ArgumentParser(description='extract epub/OPS/fb.opf',
                                     epilog='''
    extract epub/OPS/fb.opf::manifest.item.href
        where item["media-type"]=="application/xhtml+xml"
''')
    add_argument = parser.add_argument

    add_argument('-i',
                 '--input_file',
                 type=str,
                 default=None,
                 help='the input file')
    add_argument('-ie',
                 '--input_encoding',
                 type=str,
                 default=Globals.input_file_encoding,
                 help='the encoding of input file')
    add_argument('-o',
                 '--output_file',
                 type=str,
                 default=None,
                 help='the output file')
    add_argument('-oe',
                 '--output_encoding',
                 type=str,
                 default=Globals.output_file_encoding,
                 help='the encoding of output file')

    args = parser.parse_args(argv)
    with may_open_stdout(args.output_file, 'xt'
            , encoding=args.output_encoding) as fout\
        , may_open_stdin(args.input_file, 'rt'
            , encoding=args.input_encoding) as fin:
        extract_fb_opf_items(fout, fin)

    #parser.exit(0)
    return 0

Example #12

0

Show file

File: 简繁对称字-middle-parse.py Project: edt-yxz-zzd/python3_src

def main(args=None, /):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='简繁对称字-middle-parse',
        epilog='',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file path')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    args = parser.parse_args(args)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        s = parser4pseudo_symmetric_hz_from_completed_chars_3980(fin)

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        print(
            f'###parser4pseudo_symmetric_hz_from_completed_chars_3980:total={len(s)}###',
            file=fout)
        print(s, file=fout)

Example #13

0

Show file

File: novel_merge_lines_into_paragraph.py Project: edt-yxz-zzd/python3_src

def main(argv=None):
    import argparse, sys

    parser = argparse.ArgumentParser(
        description='merge lines into paragraph for novel text')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file name')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file name')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='encoding of input/output file')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    args = parser.parse_args(argv)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        case_line_pairs = novel_merge_lines_into_paragraph__pattern(
            GlobalArgsExample.not_merge_pattern,
            GlobalArgsExample.transparent_pattern,
            GlobalArgsExample.case_pattern_pairs, iter(fin))
        case_line_pairs = list(case_line_pairs)

    #print(case_line_pairs)
    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        for line in merge_case_line_pairs(case_line_pairs):
            print(line, file=fout)

Example #14

0

Show file

File: html_escape.py Project: edt-yxz-zzd/python3_src

def main(argv=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout


    parser = argparse.ArgumentParser(
        description='simple encape text to html'
        , epilog=r'''only "<>&" and "\"\'" if quote=True will be escaped.'''
        #, formatter_class=argparse.RawDescriptionHelpFormatter
        )
    parser.add_argument('-e', '--encoding', type=str, default='utf8', help='input/output file encoding')
    parser.add_argument('-i', '--input', type=str, default = None
                        , help='input file path')
    parser.add_argument('-o', '--output', type=str, default = None
                        , help='output file path')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')
    parser.add_argument('-q', '--quote', action='store_true'
                        , default = False
                        , help=r'''escape "\"\'" too; otherwise only "<>&" be escaped''')



    args = parser.parse_args(argv)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        input_text = ''.join(fin)

    output_text = escape(input_text, args.quote)

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        fout.write(output_text)

    parser.exit(0)
    return 0

Example #15

0

Show file

def main(args=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='format html/xml by well indent'
        , epilog=''
        , formatter_class=argparse.RawDescriptionHelpFormatter
        )
    parser.add_argument('-i', '--input', type=str, default=None
                        , help='input file path')
    parser.add_argument('-o', '--output', type=str, default=None
                        , help='output file path')
    parser.add_argument('-e', '--encoding', type=str
                        , default='utf8'
                        , help='input/output file encoding')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')
    parser.add_argument('-iw', '--indent_width', type=int
                        , default = 1
                        , help='the number of indent spaces for children')

    args = parser.parse_args(args)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        soup = BeautifulSoup(fin, 'lxml')

    txt = soup.prettify()
    txt = replace_indent_spaces(args.indent_width, txt)

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        fout.write(txt)

Example #16

0

Show file

File: cut_text.py Project: edt-yxz-zzd/python3_src

def main(args=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description=
        'cut text file into many text files by detect head/tail line for each output files',
        epilog=r"""
usiing python.re.match #not search/fullmatch
    for search:
        r".*?{pattern}"
    for fullmatch:
        r"^{pattern}$"
#""",
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file path')
    parser.add_argument('-re',
                        '--sep_line_regex',
                        type=str,
                        required=True,
                        help='python regex pattern for input line')
    parser.add_argument(
        '-cs',
        '--sep_line_case',
        choices='head tail'.split(),
        required=True,
        help='input line which match sep_line_regex is head/tail')

    parser.add_argument('-od',
                        '--output_dir',
                        type=str,
                        required=True,
                        help='output dir path')
    parser.add_argument(
        '-ofmt',
        '--output_file_name_fmt',
        type=str,
        default='{0}.txt',
        help=
        "python str format of output file name; {0} for number_offset; eg, -ofmt '{0:0>4}.txt'"
    )
    parser.add_argument('-oi',
                        '--output_file_name_number_offset',
                        type=str,
                        default=0,
                        help='number_offset of output file name')
    parser.add_argument('-n',
                        '--max_sep_lines_per_ofile',
                        type=int,
                        required=True,
                        help='max number of sep_lines per output_file')

    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    args = parser.parse_args(args)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        cut_text(fin,
                 force=args.force,
                 oencoding=encoding,
                 odir=args.output_dir,
                 ofname_number_offset=args.output_file_name_number_offset,
                 ofname_fmt=args.output_file_name_fmt,
                 sep_line_regex=re.compile(args.sep_line_regex),
                 sep_line_case=args.sep_line_case,
                 max_sep_lines_per_ofile=args.max_sep_lines_per_ofile)

Example #17

0

Show file

def main(args=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='count identifiers',
        epilog='',
        formatter_class=argparse.RawDescriptionHelpFormatter)
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file path')
    parser.add_argument('-g',
                        '--glob_pattern',
                        type=str,
                        default=None,
                        help='treat <input> as folder path')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    args = parser.parse_args(args)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    d = {}

    def f(fin):
        for line in fin:
            feed(d, line)

    may_glob_pattern = args.glob_pattern
    if may_glob_pattern is None:
        may_ifname = args.input
        with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
            f(fin)
    else:
        glob_pattern = may_glob_pattern
        may_root = args.input
        root = '.' if not may_root else may_root
        for path in iter_files(root, glob_pattern):
            try:
                with open(path, 'rt', encoding=encoding) as fin:
                    f(fin)
            except UnicodeDecodeError:
                print_err(path)
                continue
            except:
                print_err(path)
                raise

    ls = lst(d)
    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        show(fout, ls)

Example #18

0

Show file

File: extract_ctext_org.py Project: edt-yxz-zzd/python3_src

def main(argv=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='extract text on ctext.org or ctext.cn')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file path')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')
    parser.add_argument('--append',
                        action='store_true',
                        default=False,
                        help='open mode for output file')
    parser.add_argument('-V',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help='show path/url that opened')
    parser.add_argument('-url',
                        '--url',
                        type=str,
                        default=None,
                        help='input webpage url')
    parser.add_argument(
        '-rng',
        '--range',
        type=int,
        default=None,
        nargs=2,
        help='input webpage url range (first, last); {url}/{i} for i in range')
    parser.add_argument(
        '-ifmt',
        '--index_format',
        type=str,
        default='{}',
        help='index python format for webpage url; base_url/{fmt}')
    parser.add_argument('--timeout',
                        type=int,
                        default=10,
                        help='timeout for urllib')
    parser.add_argument('--time_sep',
                        type=int,
                        default=1,
                        help='time space between two downloads')
    parser.add_argument('--without_book_title',
                        action='store_true',
                        default=False,
                        help='not show book_title')
    parser.add_argument(
        '--book_title_at',
        type=str,
        default=None,
        help='extended url for book_title; {base_url}{book_title_at}')

    parser.add_argument(
        '--cache_fname',
        type=str,
        required=True,
        help=
        'cache file name; to store middle extract data; Map url (title, txt)')
    parser.add_argument(
        '--captcha_image_db_fname',
        type=str,
        required=True,
        help=
        'cache file name; to store (correct or wrong) captcha string and its image bytes; Map "{i}_{captcha}" (correct, image_bytes)'
    )

    args = parser.parse_args(argv)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'
    if args.append:
        omode = 'at'

    if args.input is not None and args.url is not None:
        raise ValueError('input both file and url at same time')

    if args.url is not None:
        self = ExtractCTextOrg(
            cache_fname=args.cache_fname,
            captcha_image_db_fname=args.captcha_image_db_fname)
        if args.range is None:
            title, txt = self.cached_extract_ctext_org__url(
                args.url,
                referrer=None,
                verbose=args.verbose,
                subcontents=False,
                timeout=args.timeout)
            may_book_title = None
            begin = 0
            result = (may_book_title, begin, [(title, txt)])
        elif 0:
            first, last = args.range
            begin, end = first, last + 1
            rng = range(begin, end)

            base_url = args.url
            index_format = args.index_format
            it = self.unordered_iter_extract_ctext_org__url_rng(
                base_url,
                rng,
                index_format,
                verbose=args.verbose,
                timeout=args.timeout,
                time_sep=args.time_sep)

            if args.without_book_title:
                may_book_title = None
            else:
                if args.book_title_at is None:
                    book_title_url = base_url
                else:
                    book_title_url = f'{base_url}{args.book_title_at}'

                book_title, _ = self.cached_extract_ctext_org__url(
                    book_title_url,
                    referrer=None,
                    verbose=args.verbose,
                    subcontents=False,
                    timeout=args.timeout)
                may_book_title = book_title

            for _ in it:
                pass
            it = self.ordered_iter_extract_ctext_org__url_rng__cache_only(
                base_url, rng, index_format)
            begin = begin
            #result = (may_book_title, begin, list(it))
            result = (may_book_title, begin, iter(it))
        else:
            first, last = args.range
            if (first, last) == (0, 0):
                first, last = 1, None
                begin, end = None, None
            else:
                assert first >= 1
                begin, end = first - 1, last
                #rng = range(begin, end)

            base_url = args.url
            if args.book_title_at is None:
                book_title_url = base_url
            else:
                book_title_url = f'{base_url}{args.book_title_at}'

            ((book_title, book_url),
             subtitle_url_pairs) = self.cached_extract_ctext_org__url(
                 book_title_url,
                 referrer=None,
                 verbose=args.verbose,
                 subcontents=True,
                 timeout=args.timeout)
            if args.without_book_title:
                may_book_title = None
            else:
                may_book_title = book_title

            subtitle_url_pairs = subtitle_url_pairs[begin:end]
            referrer_url_pairs = [(book_url, sub_url)
                                  for subtitle, sub_url in subtitle_url_pairs]
            it = self.unordered_iter_extract_ctext_org__referrer_url_pairs(
                referrer_url_pairs,
                verbose=args.verbose,
                timeout=args.timeout,
                time_sep=args.time_sep)

            for _ in it:
                pass

            def tmp__ordered_iter_extract_ctext_org__url_rng__cache_only():
                for (referrer, url), (subtitle,
                                      _) in zip(referrer_url_pairs,
                                                subtitle_url_pairs):
                    title, txt = self.cache[url]
                    #yield title, txt
                    yield subtitle, txt

            it = tmp__ordered_iter_extract_ctext_org__url_rng__cache_only()
            begin = first
            #result = (may_book_title, begin, list(it))
            result = (may_book_title, begin, iter(it))

    else:
        self = ExtractCTextOrgBase()
        may_ifname = args.input
        try:
            # open as text file
            with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
                title, txt = self.extract_ctext_org__text(fin,
                                                          verbose=args.verbose,
                                                          timeout=args.timeout)
        except UnicodeError:
            assert may_ifname is not None
            ifname = may_ifname
            # open as binary file
            with open(ifname, 'rb') as fin:
                title, txt = self.extract_ctext_org__text(fin,
                                                          verbose=args.verbose,
                                                          timeout=args.timeout)
        may_book_title = None
        begin = 0
        result = (may_book_title, begin, [(title, txt)])

    #result :: (may_book_title, begin, [(title, txt)])
    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:

        def fprint(*args, **kwargs):
            print(*args, file=fout, **kwargs)

        may_book_title, begin, title_txt_pairs = result
        if not args.without_book_title:
            fprint(f'[book]:{may_book_title}')
        for i, (title, txt) in enumerate(title_txt_pairs, begin):
            fprint(f'[chapter{i}]:{title}')
            fprint(txt)

    if hasattr(self, 'close'):
        self.close()
    parser.exit(0)
    return 0

Example #19

0

Show file

File: extract_data_cmd.py Project: edt-yxz-zzd/python3_src

def main(argv=None):
    '''
'''
    import argparse, sys

    parser = argparse.ArgumentParser(description='extract info from files',
                                     allow_abbrev=False,
                                     epilog='''
    NOTE:
        extract_data_cmd -oe gbk -o ./1+2.txt -ie ascii -i ./paths.txt nn_ns.filedir._extractor_example.main --encoding=utf8
          where:
            -ie ascii
                the encoding of input file which contains paths to files from which data were extracted.
                arg of this program
            --encodinga utf8
                the encoding of all files from which data were extracted.
                arg of nn_ns.filedir._extractor_example

        glob_cmd ./*.html | line_filter_cmd chapter(\d+)\.html --group_names 1 --INT_GROUP | sort_lines_cmd --line_type=KEY_LINE | extract_data_cmd -oe gbk -o ./1+2.txt nn_ns.filedir._extractor_example.main --encoding=utf8
''')
    add_argument = parser.add_argument

    add_argument('extractor',
                 type=str,
                 help='fullname of a python function: e.g. math.log2')

    add_argument('-i',
                 '--input_file',
                 type=str,
                 default=None,
                 help='the input file which contains paths')
    add_argument('-ie',
                 '--input_encoding',
                 type=str,
                 default=Globals.input_file_encoding,
                 help='the encoding of input file')
    add_argument('-o',
                 '--output_file',
                 type=str,
                 default=None,
                 help='the output file')
    add_argument('-oe',
                 '--output_encoding',
                 type=str,
                 default=Globals.output_file_encoding,
                 help='the encoding of output file')

    args, unknown_args = parser.parse_known_args(argv)
    _args, _kwargs = parse_unknown_args(unknown_args)
    _kwargs = dict(_kwargs)

    extractor_qname = args.extractor
    extractor = import_object(extractor_qname)
    # extractor :: (fout, input_fname, **kwargs) -> None


    with may_open_stdout(args.output_file, 'xt'
            , encoding=args.output_encoding) as fout\
        , may_open_stdin(args.input_file, 'rt'
            , encoding=args.input_encoding) as fin:
        for line in fin:
            if line[-1:] == '\n':
                line = line[:-1]
            path = line
            extractor(fout, path, *_args, **_kwargs)

    #parser.exit()
    return

Example #20

0

Show file

File: line_filter_cmd.py Project: edt-yxz-zzd/python3_src

def main(argv=None):
    '''
'''
    import argparse, sys

    parser = argparse.ArgumentParser(description='filter lines')
    add_argument = parser.add_argument

    add_argument('regex_patterns', type=str
        , nargs='+'
        , metavar='REGEX_PATTERN'
        , help='regular expressions; if line match i-th regex, then put it into the i-th output slot; finally, all slots will be chained together')
    add_argument('--group_names', type=str
        , nargs='*'
        , metavar='NAME'
        , help='if group_names was set, then snap that named group of regex; each line will be output as ((groups...), repr(line))')

    '''#group name can not startswith digit...
    add_argument('--INT_NAME', action='store_true'
        , default=False
        , help='a group name is treated as integer if possible')
    '''
    add_argument('--INT_GROUP', action='store_true'
        , default=False
        , help='a named group is treated as integer if possible')


    add_argument('-i', '--input_file', type=str
        , default=None
        , help='the input file')
    add_argument('-ie', '--input_encoding', type=str
        , default = Globals.input_file_encoding
        , help='the encoding of input file')
    add_argument('-o', '--output_file', type=str
        , default=None
        , help='the output file')
    add_argument('-oe', '--output_encoding', type=str
        , default = Globals.output_file_encoding
        , help='the encoding of output file')

    args = parser.parse_args(argv)
    re_objs = list(map(re.compile, args.regex_patterns))
    def to_int_if_possible(s):
        try:
            return int(s)
        except:
            return s
    def to_ints_if_possible(strs):
        return list(map(to_int_if_possible, strs))
    def get_groups(m, group_names):
        # m.group(*group_names) will error if len <= 1
        return tuple(map(m.group, group_names))

    slots = [[] for _ in range(len(re_objs))]
    if args.group_names:# and args.INT_NAME:
        args.group_names = to_ints_if_possible(args.group_names)
    with may_open_stdout(args.output_file, 'xt'
            , encoding=args.output_encoding) as fout\
        , may_open_stdin(args.input_file, 'rt'
            , encoding=args.input_encoding) as fin:
        EOF = False
        for line in fin:
            if EOF:
                raise logic-error
            if line == '':
                # EOF?
                EOF = True
                continue
            if line[-1:] == '\n':
                line = line[:-1]
            for i, rex in enumerate(re_objs):
                #m = rex.match(line)
                m = rex.search(line)
                if m: break
            else:
                # no match
                # drop this line
                continue
            if args.group_names:
                # named_groups
                groups = get_groups(m, args.group_names)
                if args.INT_GROUP:
                    # convert to int
                    groups = to_ints_if_possible(groups)

                groups = tuple(groups)
                out_line = repr((groups, line))
            else:
                out_line = line # no repr!!
            #print(out_line, file=fout)
            slots[i].append(out_line)
        for out_line in chain.from_iterable(slots):
            print(out_line, file=fout)

    #parser.exit()
    return

Example #21

0

Show file

def main(argv=None):
    '''
'''
    import argparse, sys

    parser = argparse.ArgumentParser(description='stable sort lines')
    add_argument = parser.add_argument

    add_argument('--line_type',
                 choices='RAW_LINE KEY_LINE'.split(),
                 default=RAW_LINE,
                 help='type of input lines')
    add_argument('--unique',
                 action='store_true',
                 default=False,
                 help='remove duplicate lines')
    add_argument('--reverse',
                 action='store_true',
                 default=False,
                 help='stable sort by reverse ordering')

    add_argument('-i',
                 '--input_file',
                 type=str,
                 default=None,
                 help='the input file')
    add_argument('-ie',
                 '--input_encoding',
                 type=str,
                 default=Globals.input_file_encoding,
                 help='the encoding of input file')
    add_argument('-o',
                 '--output_file',
                 type=str,
                 default=None,
                 help='the output file')
    add_argument('-oe',
                 '--output_encoding',
                 type=str,
                 default=Globals.output_file_encoding,
                 help='the encoding of output file')

    args = parser.parse_args(argv)

    if args.line_type == RAW_LINE:

        def input_line2val(input_line):
            val = org_line = input_line
            return val

        def val2org_line(val):
            return val
    elif args.line_type == KEY_LINE:

        def input_line2val(input_line):
            assert input_line[-1:] != '\n'
            try:
                keys, org_line = ast.literal_eval(input_line)
            except:
                print(input_line)
                print(repr(input_line))
                raise
            assert type(keys) is tuple
            return keys, org_line

        def val2org_line(val):
            keys, org_line = val
            return org_line

    vals = []  # [org_line] | [(keys, org_line)]
    with may_open_stdout(args.output_file, 'xt'
            , encoding=args.output_encoding) as fout\
        , may_open_stdin(args.input_file, 'rt'
            , encoding=args.input_encoding) as fin:
        EOF = False
        for line in fin:
            #print(repr(line))
            #bug:
            #   xxx.bat xxx | this_cmd
            #   xxx.bat should be:
            #       @xxx_cmd args
            #       @yyy_cmd args
            if EOF:
                raise logic - error
            if line == '':
                # EOF?
                EOF = True
                continue
            if line[-1:] == '\n':
                line = line[:-1]
            input_line = line
            vals.append(input_line2val(input_line))

        vals.sort(reverse=args.reverse)
        if args.unique:
            vals = [unique_line for unique_line, _ in groupby(vals)]

        for val in vals:
            org_line = val2org_line(val)
            print(org_line, file=fout)

    #parser.exit()
    return

Example #22

0

Show file

File: extract_ctext_org__ver1.py Project: edt-yxz-zzd/python3_src

def main(argv=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout

    parser = argparse.ArgumentParser(
        description='extract text on ctext.org or ctext.cn')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file path')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')
    parser.add_argument('--append',
                        action='store_true',
                        default=False,
                        help='open mode for output file')
    parser.add_argument('-V',
                        '--verbose',
                        action='store_true',
                        default=False,
                        help='show path/url that opened')
    parser.add_argument('-url',
                        '--url',
                        type=str,
                        default=None,
                        help='input webpage url')
    parser.add_argument(
        '-rng',
        '--range',
        type=int,
        default=None,
        nargs=2,
        help='input webpage url range (first, last); {url}/{i} for i in range')
    parser.add_argument(
        '-ifmt',
        '--index_format',
        type=str,
        default='{}',
        help='index python format for webpage url; base_url/{fmt}')
    parser.add_argument('--timeout',
                        type=int,
                        default=10,
                        help='timeout for urllib')
    parser.add_argument('--time_sep',
                        type=int,
                        default=1,
                        help='time space between two downloads')
    parser.add_argument('--without_book_title',
                        action='store_true',
                        default=False,
                        help='not show book_title')
    parser.add_argument(
        '--book_title_at',
        type=str,
        default=None,
        help='extended url for book_title; {base_url}{book_title_at}')

    args = parser.parse_args(argv)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'
    if args.append:
        omode = 'at'

    if args.input is not None and args.url is not None:
        raise ValueError('input both file and url at same time')
    if args.url is not None:
        if args.range is None:
            title, txt = extract_ctext_org__url(args.url,
                                                verbose=args.verbose,
                                                timeout=args.timeout)
            may_book_title = None
            begin = 0
            result = (may_book_title, begin, [(title, txt)])
        else:
            first, last = args.range
            begin, end = first, last + 1
            rng = range(begin, end)

            base_url = args.url
            index_format = args.index_format
            it = iter_extract_ctext_org__url_rng(base_url,
                                                 rng,
                                                 index_format,
                                                 verbose=args.verbose,
                                                 timeout=args.timeout,
                                                 time_sep=args.time_sep)
            if args.without_book_title:
                may_book_title = None
            else:
                if args.book_title_at is None:
                    book_title_url = base_url
                else:
                    book_title_url = f'{base_url}{args.book_title_at}'

                book_title, _ = extract_ctext_org__url(book_title_url,
                                                       verbose=args.verbose,
                                                       timeout=args.timeout)
                may_book_title = book_title

            begin = begin
            #result = (may_book_title, begin, list(it))
            result = (may_book_title, begin, iter(it))
    else:
        may_ifname = args.input
        try:
            # open as text file
            with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
                title, txt = extract_ctext_org(fin,
                                               verbose=args.verbose,
                                               timeout=args.timeout)
        except UnicodeError:
            assert may_ifname is not None
            ifname = may_ifname
            # open as binary file
            with open(ifname, 'rb') as fin:
                title, txt = extract_ctext_org(fin,
                                               verbose=args.verbose,
                                               timeout=args.timeout)
        may_book_title = None
        begin = 0
        result = (may_book_title, begin, [(title, txt)])

    #result :: (may_book_title, begin, [(title, txt)])
    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:

        def fprint(*args, **kwargs):
            print(*args, file=fout, **kwargs)

        may_book_title, begin, title_txt_pairs = result
        if not args.without_book_title:
            fprint(f'[book]:{may_book_title}')
        for i, (title, txt) in enumerate(title_txt_pairs, begin):
            fprint(f'[chapter{i}]:{title}')
            fprint(txt)

    parser.exit(0)
    return 0

Example #23

0

Show file

File: parse__PropList_txt.py Project: edt-yxz-zzd/python3_src

def main(args=None, /):
    from pprint import pprint
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout
    if 0:
        help(pprint)
        #pprint(object, stream=None, indent=1, width=80, depth=None, *, compact=False, sort_dicts=True)
        return

    parser = argparse.ArgumentParser(
        description='parse unicode::UCD::PropList.txt',
        epilog='',
        formatter_class=argparse.RawDescriptionHelpFormatter)

    parser.add_argument('--show_property_names_only',
                        action='store_true',
                        default=False,
                        help='output property_names without uint-rngs')
    parser.add_argument('--hex',
                        action='store_true',
                        default=False,
                        help='output uint-rngs in hex/radix<16>')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        default=None,
                        help='input file path for unicode::UCD::PropList.txt')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        default=None,
                        help='output file path')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')

    args = parser.parse_args(args)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        #lines = [*fin]
        lines = iter(fin)
        parsed_result = parsed_result = parse__PropList_txt(
            lines, result_readonly=False)

    repr_result = parsed_result2literal_text(parsed_result,
                                             decimal_vs_hex=args.hex)

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        print = mk_fprint(fout)
        if args.show_property_names_only:
            attr2rngs = parsed_result
            for property_name in sorted(attr2rngs):
                print(property_name)

        else:
            #pprint(parsed_result, stream=fout, indent='')
            stable_repr
            print(repr_result)

Example #24

0

Show file

File: find_nonGBK_chars.py Project: edt-yxz-zzd/python3_src

def main(argv=None):
    import argparse
    import sys

    parser = argparse.ArgumentParser(
        description='find(and replace) non gbk char in novel text.')
    parser.add_argument('-e',
                        '--encoding',
                        type=str,
                        default='utf8',
                        help='input/output file encoding')
    parser.add_argument('-f',
                        '--force',
                        action='store_true',
                        default=False,
                        help='open mode for output file')
    parser.add_argument('-i',
                        '--input',
                        type=str,
                        help='path to the input novel text file')
    parser.add_argument('-o',
                        '--output',
                        type=str,
                        help='path to the output file')
    parser.add_argument(
        '-r',
        '--replace',
        action='store_true',
        default=False,
        help='output replaced file instead of sorted non-gbk chars')

    args = parser.parse_args(argv)

    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'

    may_ifname = args.input
    #all_chars = set()
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        if not args.replace:
            all_chars = file2all_char_set(fin)
        else:
            txt = fin.read()
            all_chars = set(txt)

    chars = find_nonGBK_chars(all_chars)
    #chars.update(ch for ch in all_chars if not ch.isprintable())
    #chars.update(ch for ch in all_chars if ord(ch) < 0x100 and (ord(ch) > 0x7F or not ch.isalnum()))

    if not chars:
        print('no nonGBK chars at all', file=sys.stderr)
        return

    assert chars
    if not args.replace:
        s = repr_string_as_unicode(sorted(chars))
    else:
        txt
        #bug:pattern = '[' + '|'.join(map(repr_char_as_unicode, chars)) + ']'
        #pattern = '[' + ''.join(map(repr_char_as_unicode, chars)) + ']'
        pattern = make_chars_pattern(chars)

        def replace(m):
            char = m.group(0)
            return repr_char_as_unicode(char)  # \Uxxxxxxxx ; no ""

        s = re.sub(pattern, replace, txt)

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        print(s, file=fout)
    '''

Example #25

0

Show file

File: cjk_subsets__relationship.py Project: edt-yxz-zzd/python3_src

def main(args=None):
    import argparse
    from seed.io.may_open import may_open_stdin, may_open_stdout
    from pprint import pprint

    parser = argparse.ArgumentParser(
        description="find out relationship of cjk part of encodings (postprocess of charset_filter.py)"
        , epilog=""
        , formatter_class=argparse.RawDescriptionHelpFormatter
        )
    parser.add_argument('-e', '--encoding', type=str
                        , default='utf8'
                        , help='input/output file encoding')
    parser.add_argument('-i', '--input', type=str, default=None
                        , help='input file path')
    parser.add_argument('-o', '--output', type=str, default=None
                        , help='output file path')
    parser.add_argument('-f', '--force', action='store_true'
                        , default = False
                        , help='open mode for output file')
    parser.add_argument('-o2', '--output2', type=str, default=None
                        , help='output2 file path')
    parser.add_argument('-f2', '--force2', action='store_true'
                        , default = False
                        , help='open mode for output2 file')

    args = parser.parse_args(args)
    encoding = args.encoding
    omode = 'wt' if args.force else 'xt'
    omode2 = 'wt' if args.force2 else 'xt'

    may_ifname = args.input
    with may_open_stdin(may_ifname, 'rt', encoding=encoding) as fin:
        txt = fin.read()
    cjk_d = txt2cjk_d(txt)

    encoding2cjk_part_rngs = cjk_d
    encoding2relation2encodings = subset_relation_of_encoding2cjk_part_rngs(encoding2cjk_part_rngs)
    more_result = handle_encoding2relation2encodings(encoding2relation2encodings)
    more_result2 = handle_more_result(more_result, encoding2cjk_part_rngs)

    may_ofname = args.output
    with may_open_stdout(may_ofname, omode, encoding=encoding) as fout:
        pprint(encoding2relation2encodings, stream=fout)

    may_ofname2 = args.output2
    with may_open_stdout(may_ofname2, omode2, encoding=encoding) as fout2:
        pprint(more_result, stream=fout2)

    ks = r"""
    encoding_cjk_part_eq_classes__txt
    std_cjk_eq_encoding_lt_pairs__txt
    std_cjk_eq_encoding_atomic_lt_pairs__txt
    atomic_buttomup__txt
    atomic_topdown__txt

    std_cjk_eq_encoding2cjk_part_size__txt
    snd_buttoms__txt
    snd_buttom_subset2nonempty_common_rngs__txt
    snd_buttom_subsets_with_empty_common_rngs__txt
    """.split()
    show_more_result2(ks, more_result2)