Example #1
0
def _CLI_for_git_diff(sin, sout, serr, argv, fx):
    """experiment with our thing"""

    prog_name = (bash_argv := list(reversed(argv))).pop()
    from script_lib.cheap_arg_parse import formals_via_definitions as func
    foz = func((
        ('-h', '--help', 'this screen'),
        ('path', 'some path'),
    ), lambda: prog_name)
    vals, es = foz.terminal_parse(serr, bash_argv)
    if vals is None:
        return es
    if vals.get('help'):
        _ = _CLI_for_git_diff.__doc__
        return foz.write_help_into(sout, _)
    path = vals.pop('path')
    from script_lib.magnetics.error_monitor_via_stderr import func
    mon = func(serr)
    from kiss_rdb.vcs_adapters.git import git_diff as func
    rc, patch_lines = func(path, mon.listener)
    if rc:
        return rc
    for line in patch_lines:
        sout.write(line)
    if not patch_lines:
        serr.write("(no diff lines)\n")
    return mon.returncode
Example #2
0
def _CLI_for_all(stdin, stdout, stderr, argv):
    raise RuntimeError('hello')  # #open [#882.1] producer script not cover

    def do_CLI_for_all(sin, sout, serr, htd, rscr):
        mon = rscr().monitor
        _cli = _CLI_Client(mon.listener, sout, serr, argv)
        return _do_CLI_for_all(_cli, mon, sin, sout, serr, htd)

    do_CLI_for_all.__doc__ = _doc

    from script_lib.cheap_arg_parse import cheap_arg_parse as func
    formals = (_same_option(), _help_option())
    func(do_CLI_for_all, stdin, stdout, stderr, argv, formals)
Example #3
0
def CLI(sin, sout, serr, argv):
    """
    A custom client for developing with and troubleshooting the data we
    have in the database about document history.

    (We want this tooling to stay close to the API functions we expose for
    clients.)
    """

    from script_lib.cheap_arg_parse import prepare_tail_call_for_branch as func
    func_argv, rc = func(serr, argv, _commands(), lambda: CLI.__doc__)
    if not func_argv:
        return rc
    func, ch_argv = func_argv
    efx = _ExternalFunctions(serr)
    return func(sin, sout, serr, ch_argv, efx)
def _CLI(stdin, stdout, stderr, argv):
    from script_lib.cheap_arg_parse import \
        require_interactive, cheap_arg_parse as func
    if not require_interactive(stderr, stdin, argv):
        return _exitstatus_for_failure

    return func(_do_CLI, stdin, stdout, stderr, argv, _formals())
def _main(opened, do_field_names, listener):
    import kiss_rdb.storage_adapters_.markdown_table as sa
    from kiss_rdb import collection_via_storage_adapter_and_path as func
    coll = func(sa, opened, listener)
    with coll.open_schema_and_entity_traversal(listener) as (sch, ents):
        for x in _do_this(sch, ents, do_field_names):
            yield x
def _build_traversal_stream(listener, url, cached_path=None):

    if not _validate_url(url, listener):
        return

    from data_pipes.format_adapters.html.script_common import \
        cached_document_via as func
    doc = func(cached_path, url, 'html document', listener)
    if doc is None:
        return

    from bs4 import BeautifulSoup
    with open(doc.cache_path) as fh:
        soup = BeautifulSoup(fh, 'html.parser')

    toc, = soup.select('#docsToc')

    div, = _filter('div', toc)

    items = _direct_children(div)

    itr = iter(items)  # be explicit

    yield _object_via_first_anchor_tag_ONCE_ONLY(next(itr), url)

    for item in itr:
        name = item.name
        if 'a' == name:
            for dct in _objects_via_anchor_tag_up_top(item):
                yield dct
            continue
        assert ('div' == name)
        for dct in _objects_via_div_tag(item):
            yield dct
def _exit_code_via_producer_script(ps):
    formals = (('-s', '--for-sync',
                'show the traveral stream mapped thru etc'), ('-h', '--help',
                                                              'this screen'))
    kwrgs = {'description_valueser': lambda: {'url': _url}}
    _my_CLI = __CLI_function_via_producer_script(ps)
    import sys as o
    from script_lib.cheap_arg_parse import cheap_arg_parse as func
    return func(_my_CLI, o.stdin, o.stdout, o.stderr, o.argv, formals, **kwrgs)
Example #8
0
 def resolve_the_document_soup():
     from data_pipes.format_adapters.html.script_common import \
         soup_via_locators_ as func
     soup = func(url='<was some url>',
                 html_document_path=html_on_fs,
                 listener=listener)
     if not soup:
         raise stop()
     self.soup = soup
 def __enter__(self):
     self._close_this = None
     from data_pipes.format_adapters.html.script_common \
         import cached_document_via as func
     doc = func(self._markdown_path, self._raw_url, 'markdown',
                self._listener)
     # ..
     opened = open(doc.cache_path)
     self._close_this = opened
     return _main(opened, self._do_field_names, self._listener)
Example #10
0
def _do_CLI_for_report(sin, sout, serr, argv, report_module):
    def do_CLI(sin, sout, serr, htd, rscr):
        mon = rscr().monitor
        _cli = _CLI_Client(mon.listener, sout, serr, argv)
        _run_these_reports_for_CLI(_cli, htd, (report_module, ))
        return mon.exitstatus

    do_CLI.__doc__ = report_module.__doc__

    from script_lib.cheap_arg_parse import cheap_arg_parse as func
    formals = (_same_option(), _help_option())
    return func(do_CLI, sin, sout, serr, argv, formals)
def _dict_via_cels_via(far_field_names):
    _split_th_version = updated_and_version_via_string
    from data_pipes.magnetics.flat_map_horizontal_via_definition import \
        dictionary_via_cells_via_definition as func
    return func(unsanitized_far_field_names=far_field_names,
                special_field_instructions={
                    'name': ('string_via_cel', lambda s: s),
                    'parses': ('rename_to', 'grammar'),
                    'updated':
                    ('split_to', ('updated', 'version'), _split_th_version),
                },
                string_via_cel=lambda s: s)
def _CLI(stdin, stdout, stderr, argv):
    kwargs = {'description_valueser': lambda: {'eg_url': _eg_url}}
    from script_lib.cheap_arg_parse import cheap_arg_parse as func
    return func(_do_CLI, stdin, stdout, stderr, argv, _formals(), **kwargs)
Example #13
0
def _CLI(sin, sout, serr, ARGV):
    formals = (('-h', '--help', 'this screen'), (_moniker,
                                                 'ohai i am thing 1'))
    kwargs = {'description_valueser': lambda: {'moniker': _moniker}}
    from script_lib.cheap_arg_parse import cheap_arg_parse as func
    return func(_do_CLI, sin, sout, serr, ARGV, formals, **kwargs)
Example #14
0
 def produce_monitor():
     from script_lib.magnetics.error_monitor_via_stderr import func
     return func(serr)
def open_traversal_stream(listener, url, cache_path=None):
    from contextlib import nullcontext as func
    return func(_build_traversal_stream(listener, url, cache_path))
Example #16
0
def statistitican_via_collection_path(coll_path):
    def document_commits_via_title(vendor_document_title):
        """(before #history-B.4 we could get the history in one commit
        with a JOIN. but now (to accomodate rigged documents) we do it in
        two which is fine.)
        """

        c = execute(
            'SELECT notecard_based_document_ID, just_kidding_document_type '
            'FROM notecard_based_document '
            'WHERE document_title_from_vendor=?', (vendor_document_title, ))

        # Maybe we have no record of this document at all (strange)
        first_row = c.fetchone()
        if first_row is None:
            return
        assert c.fetchone() is None

        docu_ID, typ, = first_row

        if 'docu_type_common' == typ:
            return for_notecard_based_document(docu_ID)
        assert 'docu_type_rigged' == typ
        return for_rigged_document(vendor_document_title)  # ick/meh

    def for_rigged_document(vendor_document_title):
        c = execute(
            'SELECT RDC.* '
            'FROM rigged_document_commit AS RDC '
            'JOIN rigged_document AS RD USING (rigged_document_ID) '
            'WHERE RD.document_title_from_vendor=? '
            'ORDER BY datetime(RDC.normal_datetime) ',
            (vendor_document_title, ))

        # (we want to make it be commit-graph order not chrono order,
        #  but not badly enough to do it knowing that it's not covered)

        def mutable_threes():
            while True:
                row = c.fetchone()
                if not row:
                    break
                rec = RD_commit_record(*row)
                dt = datetime_via_record(rec)
                yield [dt, 'edit', rec]

        scn = _scanner_via_iterator(mutable_threes())

        # If there are no commits in the database for this docu, strange
        if scn.empty:
            return

        return docu_CIs_via_threes_scanner(scn, 'docu_type_rigged')

    def for_notecard_based_document(docu_ID):
        c = execute(
            'SELECT NBDC.* FROM notecard_based_document_commit as NBDC '
            'WHERE NBDC.notecard_based_document_ID=? '
            'ORDER BY datetime(NBDC.normal_datetime)', (docu_ID, ))

        def mutable_threes():
            while True:
                row = c.fetchone()
                if not row:
                    break
                rec = NB_commit_rec_via_row(row)
                dt = datetime_via_record(rec)
                yield [dt, 'edit', rec]

        scn = _scanner_via_iterator(mutable_threes())

        # If there are no commits in the database for this docu, strange
        if scn.empty:
            return

        return docu_CIs_via_threes_scanner(scn, 'docu_type_common')

    def docu_CIs_via_threes_scanner(scn, typ):

        scn.peek[1] = 'create'  # meh

        while True:
            three = scn.next()
            yield _DocumentCommit(*three, typ)
            if scn.empty:
                break

    # Datetime via record
    def datetime_via_record(rec):
        rec.tzinfo  # hi
        return strptime(rec.normal_datetime, '%Y-%m-%d %H:%M:%S')

    from datetime import datetime as _
    strptime = _.strptime

    # Connect to database
    from pho.document_history_._model import \
        database_via_collection_path_ as func
    # (it's a sibling file to us but we are an entrypoint file)

    db = func(coll_path)
    assert db

    # Prepare statistics
    sing = db.singleton_text

    k = 'mean_and_std'
    two_as_string = sing.get(k)
    if two_as_string is None:
        xx(f"Did you generate the statistics? Not found: {k!r}")
    mean_s, std_s = two_as_string.split(' ')
    mean, std = float(mean_s), float(std_s)

    k = 'mean_and_std_for_rigged'
    two_as_string = sing.get(k)
    if two_as_string is None:
        xx(f"Did you generate the statistics? Not found: {k!r}")
    mean_s, std_s = two_as_string.split(' ')
    mean_for_rigged, std_for_rigged = float(mean_s), float(std_s)

    from pho.document_history_._model import \
        RiggedDocumentCommitRecord_ as RD_commit_record

    NB_commit_rec_via_row = \
        db.notecard_based_document_commit_table.NBD_CI_via_row_

    execute = db.conn.execute

    # == BEGIN meh
    from dataclasses import dataclass as _dataclass
    from collections import namedtuple as _nt

    @_dataclass
    class _Statistician:
        mean: float
        std: float
        mean_for_rigged: float
        std_for_rigged: float
        document_commits_via_title: callable
        db: object

    _DocumentCommit = _nt('_DocumentCommit',
                          ('datetime', 'verb', 'record', 'document_type'))

    # == END

    return _Statistician(mean=mean,
                         std=std,
                         mean_for_rigged=mean_for_rigged,
                         std_for_rigged=std_for_rigged,
                         document_commits_via_title=document_commits_via_title,
                         db=db)
Example #17
0
def _scanner_via_iterator(itr):
    assert hasattr(itr, '__next__')
    from text_lib.magnetics.scanner_via import scanner_via_iterator as func
    return func(itr)
    dct['label'] = span.string

    s = a['href']
    if False:
        pass
    else:
        dct['url'] = s


def _direct_children(node):  # #cp
    return _filter('*', node)  # omit strings


def _filter(sel, el):
    return sv.filter(sel, el)


def _ps_lib():
    import data_pipes.format_adapters.html.script_common as lib
    return lib


if __name__ == '__main__':
    formals = _formals()
    kwargs = {'description_valueser': lambda: {'_this_one_url': _url}}
    import sys as o
    from script_lib.cheap_arg_parse import cheap_arg_parse as func
    exit(func(_my_CLI, o.stdin, o.stdout, o.stderr, o.argv, formals, **kwargs))

# #born
Example #19
0
def _command_zizzy(sin, sout, serr, argv, efx):
    """ See things for the document indicated by <title> """
    def docer():
        return _command_zizzy.__doc__

    defns = _formals_for_zizzy(efx)

    from script_lib.cheap_arg_parse import \
        prepare_tail_call_for_terminal as func

    fv, rc = func(serr, argv, defns, docer)
    if fv is None:
        return rc
    _foz, vals = fv

    coll_path = vals.pop(_coll_path_key)
    doc_title = vals.pop('title')
    do_list = vals.pop('list', False)
    assert not vals

    # mon = efx.produce_monitor()
    # listener = mon.listener

    stato = statistitican_via_collection_path(coll_path)

    if do_list:
        c = stato.db.conn.execute(
            'SELECT '
            'head_notecard_EID, document_title_from_vendor '
            'FROM notecard_based_document '
            'ORDER BY head_notecard_EID DESC')
        for (eid, title) in c:
            sout.write(''.join((eid, ' ', title, '\n')))
        return 0

    count = 0
    itr = stato.document_commits_via_title(doc_title)
    if itr is None:
        serr.write(f"(none found for {doc_title!r})\n")
        return 123

    pcs = []
    for o in itr:  # o = document commit
        count += 1
        pcs.append(o.datetime.isoformat())
        pcs.append('%7s' % (o.verb, ))
        rec = o.record
        nli = rec.number_of_lines_inserted
        nld = rec.number_of_lines_deleted

        nlis = ''.join(('+', str(nli))) if nli else ''
        nlds = ''.join(('-', str(nld))) if nld else ''
        pcs.append(" %4s  %4s" % (nlis, nlds))

        pcs.append('  ')
        pcs.append(o.record.SHA[:8])

        if 'docu_type_common' == rec.document_type:
            n = rec.number_of_notecards
            if 1 != n:
                pcs.append(f"in {n} notecards")
        else:
            assert 'docu_type_rigged' == rec.document_type

        sout.write(' '.join(pcs))
        pcs.clear()
        sout.write('\n')

    serr.write(f"(seen {count} things)\n")
    return 0
Example #20
0
def CLI_(sin, sout, serr, argv, svcser):
    prog_name = (bash_argv := list(reversed(argv))).pop()
    from script_lib.cheap_arg_parse import formals_via_definitions as func
    foz = func(_formals(), lambda: prog_name)
    vals, es = foz.terminal_parse(serr, bash_argv)
    if vals is None:
        return es
    if vals.get('help'):
        return foz.write_help_into(serr, _doc)

    html_on_fs = vals.pop('local_html_file_on_your_hard_drive_lol')
    sels = vals.pop('selector')
    assert not vals

    stack = list(reversed(sels))
    first_selector = stack.pop()
    second_selector, third_selector = None, None
    if stack:
        second_selector = stack.pop()
    if stack:
        third_selector = stack.pop()
    if stack:
        leng = 3 + len(stack)
        serr.write(f"Max 3 selectors (had {leng})\n")
        serr.write(foz.invite_line)
        return 7

    def main():
        resolve_the_document_soup()
        resolve_the_first_selector()
        maybe_resolve_the_second_selector()
        maybe_resolve_the_third_selector()

    def maybe_resolve_the_third_selector():
        if not third_selector:
            return

        count, success_count, failure_count = 0, 0, 0

        for tag in self.tags:
            count += 1

            tags = tag.select(third_selector)
            leng = len(tags)

            # If we have 316 items, we don't wan to repeat the same error msg
            # 316 times. We do a relatively simple statistical analysis of the
            # number of failures to decide how much is too much failure.

            if 0 == leng:
                failure_count += 1
                serr.write(
                    f"in item {count}, no match for '{third_selector}'\n"
                )  # noqa: E501
                if 0 == success_count:
                    if 3 == failure_count:
                        serr.write(
                            "stopping because encountered 3 failures before any successes\n"
                        )  # noqa: E501
                        raise stop_exception()
                elif (3 * success_count) < failure_count:
                    serr.write(
                        "stoping because more than 3x as many failures as successes\n"
                    )  # noqa: E501
                    raise stop_exception()
                continue
            success_count += 1  # we will count match multiple as success

            if 1 < leng:
                serr.write(
                    f"(item {count} matched third selector {leng} times ('{third_selector}'))\n"
                )  # noqa: E501
                continue

            tag, = tags  # OVERWRITE PARENT TAG VARIABLE
            use_text = tag.text.strip()
            sout.write(use_text)
            sout.write('\n')

    def maybe_resolve_the_second_selector():
        if not second_selector:
            return

        tags = self.tag.select(second_selector)
        leng = len(tags)

        if 0 == leng:
            stop(f"didn't find any tags that matched '{second_selector}'")

        serr.write(
            f"(found {leng} tag(s) matching second selector ('{second_selector}'))\n"
        )  # noqa: E501
        self.tags = tags

    def resolve_the_first_selector():
        tags = self.soup.select(first_selector)
        leng = len(tags)

        def nope(msg):
            serr.write(msg)
            serr.write('\n')
            return 1

        if 0 == leng:
            stop(f"didn't find any tags that matched '{first_selector}'")

        if 1 < leng:
            stop(f"expected 0 had {leng} tags matching that selector")

        serr.write(
            f"(found 1 tag matching first selector ('{first_selector}'))\n"
        )  # noqa: E501
        self.tag, = tags

    def resolve_the_document_soup():
        from data_pipes.format_adapters.html.script_common import \
            soup_via_locators_ as func
        soup = func(url='<was some url>',
                    html_document_path=html_on_fs,
                    listener=listener)
        if not soup:
            raise stop()
        self.soup = soup

    self = main  # #watch-the-world-burn

    def stop(msg):
        serr.write(msg)  # meh
        serr.write('\n')
        raise stop_exception()

    class stop_exception(RuntimeError):
        pass

    mon = svcser().produce_monitor()
    listener = mon.listener
    try:
        main()
    except stop_exception:
        pass
    return mon.returncode
Example #21
0
 def cli(sin, sout, serr, argv, rscser):
     assert rscser is None
     return func(sin, sout, serr, argv, children)