def _CLI_for_git_diff(sin, sout, serr, argv, fx): """experiment with our thing""" prog_name = (bash_argv := list(reversed(argv))).pop() from script_lib.cheap_arg_parse import formals_via_definitions as func foz = func(( ('-h', '--help', 'this screen'), ('path', 'some path'), ), lambda: prog_name) vals, es = foz.terminal_parse(serr, bash_argv) if vals is None: return es if vals.get('help'): _ = _CLI_for_git_diff.__doc__ return foz.write_help_into(sout, _) path = vals.pop('path') from script_lib.magnetics.error_monitor_via_stderr import func mon = func(serr) from kiss_rdb.vcs_adapters.git import git_diff as func rc, patch_lines = func(path, mon.listener) if rc: return rc for line in patch_lines: sout.write(line) if not patch_lines: serr.write("(no diff lines)\n") return mon.returncode
def _CLI_for_all(stdin, stdout, stderr, argv): raise RuntimeError('hello') # #open [#882.1] producer script not cover def do_CLI_for_all(sin, sout, serr, htd, rscr): mon = rscr().monitor _cli = _CLI_Client(mon.listener, sout, serr, argv) return _do_CLI_for_all(_cli, mon, sin, sout, serr, htd) do_CLI_for_all.__doc__ = _doc from script_lib.cheap_arg_parse import cheap_arg_parse as func formals = (_same_option(), _help_option()) func(do_CLI_for_all, stdin, stdout, stderr, argv, formals)
def CLI(sin, sout, serr, argv): """ A custom client for developing with and troubleshooting the data we have in the database about document history. (We want this tooling to stay close to the API functions we expose for clients.) """ from script_lib.cheap_arg_parse import prepare_tail_call_for_branch as func func_argv, rc = func(serr, argv, _commands(), lambda: CLI.__doc__) if not func_argv: return rc func, ch_argv = func_argv efx = _ExternalFunctions(serr) return func(sin, sout, serr, ch_argv, efx)
def _CLI(stdin, stdout, stderr, argv): from script_lib.cheap_arg_parse import \ require_interactive, cheap_arg_parse as func if not require_interactive(stderr, stdin, argv): return _exitstatus_for_failure return func(_do_CLI, stdin, stdout, stderr, argv, _formals())
def _main(opened, do_field_names, listener): import kiss_rdb.storage_adapters_.markdown_table as sa from kiss_rdb import collection_via_storage_adapter_and_path as func coll = func(sa, opened, listener) with coll.open_schema_and_entity_traversal(listener) as (sch, ents): for x in _do_this(sch, ents, do_field_names): yield x
def _build_traversal_stream(listener, url, cached_path=None): if not _validate_url(url, listener): return from data_pipes.format_adapters.html.script_common import \ cached_document_via as func doc = func(cached_path, url, 'html document', listener) if doc is None: return from bs4 import BeautifulSoup with open(doc.cache_path) as fh: soup = BeautifulSoup(fh, 'html.parser') toc, = soup.select('#docsToc') div, = _filter('div', toc) items = _direct_children(div) itr = iter(items) # be explicit yield _object_via_first_anchor_tag_ONCE_ONLY(next(itr), url) for item in itr: name = item.name if 'a' == name: for dct in _objects_via_anchor_tag_up_top(item): yield dct continue assert ('div' == name) for dct in _objects_via_div_tag(item): yield dct
def _exit_code_via_producer_script(ps): formals = (('-s', '--for-sync', 'show the traveral stream mapped thru etc'), ('-h', '--help', 'this screen')) kwrgs = {'description_valueser': lambda: {'url': _url}} _my_CLI = __CLI_function_via_producer_script(ps) import sys as o from script_lib.cheap_arg_parse import cheap_arg_parse as func return func(_my_CLI, o.stdin, o.stdout, o.stderr, o.argv, formals, **kwrgs)
def resolve_the_document_soup(): from data_pipes.format_adapters.html.script_common import \ soup_via_locators_ as func soup = func(url='<was some url>', html_document_path=html_on_fs, listener=listener) if not soup: raise stop() self.soup = soup
def __enter__(self): self._close_this = None from data_pipes.format_adapters.html.script_common \ import cached_document_via as func doc = func(self._markdown_path, self._raw_url, 'markdown', self._listener) # .. opened = open(doc.cache_path) self._close_this = opened return _main(opened, self._do_field_names, self._listener)
def _do_CLI_for_report(sin, sout, serr, argv, report_module): def do_CLI(sin, sout, serr, htd, rscr): mon = rscr().monitor _cli = _CLI_Client(mon.listener, sout, serr, argv) _run_these_reports_for_CLI(_cli, htd, (report_module, )) return mon.exitstatus do_CLI.__doc__ = report_module.__doc__ from script_lib.cheap_arg_parse import cheap_arg_parse as func formals = (_same_option(), _help_option()) return func(do_CLI, sin, sout, serr, argv, formals)
def _dict_via_cels_via(far_field_names): _split_th_version = updated_and_version_via_string from data_pipes.magnetics.flat_map_horizontal_via_definition import \ dictionary_via_cells_via_definition as func return func(unsanitized_far_field_names=far_field_names, special_field_instructions={ 'name': ('string_via_cel', lambda s: s), 'parses': ('rename_to', 'grammar'), 'updated': ('split_to', ('updated', 'version'), _split_th_version), }, string_via_cel=lambda s: s)
def _CLI(stdin, stdout, stderr, argv): kwargs = {'description_valueser': lambda: {'eg_url': _eg_url}} from script_lib.cheap_arg_parse import cheap_arg_parse as func return func(_do_CLI, stdin, stdout, stderr, argv, _formals(), **kwargs)
def _CLI(sin, sout, serr, ARGV): formals = (('-h', '--help', 'this screen'), (_moniker, 'ohai i am thing 1')) kwargs = {'description_valueser': lambda: {'moniker': _moniker}} from script_lib.cheap_arg_parse import cheap_arg_parse as func return func(_do_CLI, sin, sout, serr, ARGV, formals, **kwargs)
def produce_monitor(): from script_lib.magnetics.error_monitor_via_stderr import func return func(serr)
def open_traversal_stream(listener, url, cache_path=None): from contextlib import nullcontext as func return func(_build_traversal_stream(listener, url, cache_path))
def statistitican_via_collection_path(coll_path): def document_commits_via_title(vendor_document_title): """(before #history-B.4 we could get the history in one commit with a JOIN. but now (to accomodate rigged documents) we do it in two which is fine.) """ c = execute( 'SELECT notecard_based_document_ID, just_kidding_document_type ' 'FROM notecard_based_document ' 'WHERE document_title_from_vendor=?', (vendor_document_title, )) # Maybe we have no record of this document at all (strange) first_row = c.fetchone() if first_row is None: return assert c.fetchone() is None docu_ID, typ, = first_row if 'docu_type_common' == typ: return for_notecard_based_document(docu_ID) assert 'docu_type_rigged' == typ return for_rigged_document(vendor_document_title) # ick/meh def for_rigged_document(vendor_document_title): c = execute( 'SELECT RDC.* ' 'FROM rigged_document_commit AS RDC ' 'JOIN rigged_document AS RD USING (rigged_document_ID) ' 'WHERE RD.document_title_from_vendor=? ' 'ORDER BY datetime(RDC.normal_datetime) ', (vendor_document_title, )) # (we want to make it be commit-graph order not chrono order, # but not badly enough to do it knowing that it's not covered) def mutable_threes(): while True: row = c.fetchone() if not row: break rec = RD_commit_record(*row) dt = datetime_via_record(rec) yield [dt, 'edit', rec] scn = _scanner_via_iterator(mutable_threes()) # If there are no commits in the database for this docu, strange if scn.empty: return return docu_CIs_via_threes_scanner(scn, 'docu_type_rigged') def for_notecard_based_document(docu_ID): c = execute( 'SELECT NBDC.* FROM notecard_based_document_commit as NBDC ' 'WHERE NBDC.notecard_based_document_ID=? ' 'ORDER BY datetime(NBDC.normal_datetime)', (docu_ID, )) def mutable_threes(): while True: row = c.fetchone() if not row: break rec = NB_commit_rec_via_row(row) dt = datetime_via_record(rec) yield [dt, 'edit', rec] scn = _scanner_via_iterator(mutable_threes()) # If there are no commits in the database for this docu, strange if scn.empty: return return docu_CIs_via_threes_scanner(scn, 'docu_type_common') def docu_CIs_via_threes_scanner(scn, typ): scn.peek[1] = 'create' # meh while True: three = scn.next() yield _DocumentCommit(*three, typ) if scn.empty: break # Datetime via record def datetime_via_record(rec): rec.tzinfo # hi return strptime(rec.normal_datetime, '%Y-%m-%d %H:%M:%S') from datetime import datetime as _ strptime = _.strptime # Connect to database from pho.document_history_._model import \ database_via_collection_path_ as func # (it's a sibling file to us but we are an entrypoint file) db = func(coll_path) assert db # Prepare statistics sing = db.singleton_text k = 'mean_and_std' two_as_string = sing.get(k) if two_as_string is None: xx(f"Did you generate the statistics? Not found: {k!r}") mean_s, std_s = two_as_string.split(' ') mean, std = float(mean_s), float(std_s) k = 'mean_and_std_for_rigged' two_as_string = sing.get(k) if two_as_string is None: xx(f"Did you generate the statistics? Not found: {k!r}") mean_s, std_s = two_as_string.split(' ') mean_for_rigged, std_for_rigged = float(mean_s), float(std_s) from pho.document_history_._model import \ RiggedDocumentCommitRecord_ as RD_commit_record NB_commit_rec_via_row = \ db.notecard_based_document_commit_table.NBD_CI_via_row_ execute = db.conn.execute # == BEGIN meh from dataclasses import dataclass as _dataclass from collections import namedtuple as _nt @_dataclass class _Statistician: mean: float std: float mean_for_rigged: float std_for_rigged: float document_commits_via_title: callable db: object _DocumentCommit = _nt('_DocumentCommit', ('datetime', 'verb', 'record', 'document_type')) # == END return _Statistician(mean=mean, std=std, mean_for_rigged=mean_for_rigged, std_for_rigged=std_for_rigged, document_commits_via_title=document_commits_via_title, db=db)
def _scanner_via_iterator(itr): assert hasattr(itr, '__next__') from text_lib.magnetics.scanner_via import scanner_via_iterator as func return func(itr)
dct['label'] = span.string s = a['href'] if False: pass else: dct['url'] = s def _direct_children(node): # #cp return _filter('*', node) # omit strings def _filter(sel, el): return sv.filter(sel, el) def _ps_lib(): import data_pipes.format_adapters.html.script_common as lib return lib if __name__ == '__main__': formals = _formals() kwargs = {'description_valueser': lambda: {'_this_one_url': _url}} import sys as o from script_lib.cheap_arg_parse import cheap_arg_parse as func exit(func(_my_CLI, o.stdin, o.stdout, o.stderr, o.argv, formals, **kwargs)) # #born
def _command_zizzy(sin, sout, serr, argv, efx): """ See things for the document indicated by <title> """ def docer(): return _command_zizzy.__doc__ defns = _formals_for_zizzy(efx) from script_lib.cheap_arg_parse import \ prepare_tail_call_for_terminal as func fv, rc = func(serr, argv, defns, docer) if fv is None: return rc _foz, vals = fv coll_path = vals.pop(_coll_path_key) doc_title = vals.pop('title') do_list = vals.pop('list', False) assert not vals # mon = efx.produce_monitor() # listener = mon.listener stato = statistitican_via_collection_path(coll_path) if do_list: c = stato.db.conn.execute( 'SELECT ' 'head_notecard_EID, document_title_from_vendor ' 'FROM notecard_based_document ' 'ORDER BY head_notecard_EID DESC') for (eid, title) in c: sout.write(''.join((eid, ' ', title, '\n'))) return 0 count = 0 itr = stato.document_commits_via_title(doc_title) if itr is None: serr.write(f"(none found for {doc_title!r})\n") return 123 pcs = [] for o in itr: # o = document commit count += 1 pcs.append(o.datetime.isoformat()) pcs.append('%7s' % (o.verb, )) rec = o.record nli = rec.number_of_lines_inserted nld = rec.number_of_lines_deleted nlis = ''.join(('+', str(nli))) if nli else '' nlds = ''.join(('-', str(nld))) if nld else '' pcs.append(" %4s %4s" % (nlis, nlds)) pcs.append(' ') pcs.append(o.record.SHA[:8]) if 'docu_type_common' == rec.document_type: n = rec.number_of_notecards if 1 != n: pcs.append(f"in {n} notecards") else: assert 'docu_type_rigged' == rec.document_type sout.write(' '.join(pcs)) pcs.clear() sout.write('\n') serr.write(f"(seen {count} things)\n") return 0
def CLI_(sin, sout, serr, argv, svcser): prog_name = (bash_argv := list(reversed(argv))).pop() from script_lib.cheap_arg_parse import formals_via_definitions as func foz = func(_formals(), lambda: prog_name) vals, es = foz.terminal_parse(serr, bash_argv) if vals is None: return es if vals.get('help'): return foz.write_help_into(serr, _doc) html_on_fs = vals.pop('local_html_file_on_your_hard_drive_lol') sels = vals.pop('selector') assert not vals stack = list(reversed(sels)) first_selector = stack.pop() second_selector, third_selector = None, None if stack: second_selector = stack.pop() if stack: third_selector = stack.pop() if stack: leng = 3 + len(stack) serr.write(f"Max 3 selectors (had {leng})\n") serr.write(foz.invite_line) return 7 def main(): resolve_the_document_soup() resolve_the_first_selector() maybe_resolve_the_second_selector() maybe_resolve_the_third_selector() def maybe_resolve_the_third_selector(): if not third_selector: return count, success_count, failure_count = 0, 0, 0 for tag in self.tags: count += 1 tags = tag.select(third_selector) leng = len(tags) # If we have 316 items, we don't wan to repeat the same error msg # 316 times. We do a relatively simple statistical analysis of the # number of failures to decide how much is too much failure. if 0 == leng: failure_count += 1 serr.write( f"in item {count}, no match for '{third_selector}'\n" ) # noqa: E501 if 0 == success_count: if 3 == failure_count: serr.write( "stopping because encountered 3 failures before any successes\n" ) # noqa: E501 raise stop_exception() elif (3 * success_count) < failure_count: serr.write( "stoping because more than 3x as many failures as successes\n" ) # noqa: E501 raise stop_exception() continue success_count += 1 # we will count match multiple as success if 1 < leng: serr.write( f"(item {count} matched third selector {leng} times ('{third_selector}'))\n" ) # noqa: E501 continue tag, = tags # OVERWRITE PARENT TAG VARIABLE use_text = tag.text.strip() sout.write(use_text) sout.write('\n') def maybe_resolve_the_second_selector(): if not second_selector: return tags = self.tag.select(second_selector) leng = len(tags) if 0 == leng: stop(f"didn't find any tags that matched '{second_selector}'") serr.write( f"(found {leng} tag(s) matching second selector ('{second_selector}'))\n" ) # noqa: E501 self.tags = tags def resolve_the_first_selector(): tags = self.soup.select(first_selector) leng = len(tags) def nope(msg): serr.write(msg) serr.write('\n') return 1 if 0 == leng: stop(f"didn't find any tags that matched '{first_selector}'") if 1 < leng: stop(f"expected 0 had {leng} tags matching that selector") serr.write( f"(found 1 tag matching first selector ('{first_selector}'))\n" ) # noqa: E501 self.tag, = tags def resolve_the_document_soup(): from data_pipes.format_adapters.html.script_common import \ soup_via_locators_ as func soup = func(url='<was some url>', html_document_path=html_on_fs, listener=listener) if not soup: raise stop() self.soup = soup self = main # #watch-the-world-burn def stop(msg): serr.write(msg) # meh serr.write('\n') raise stop_exception() class stop_exception(RuntimeError): pass mon = svcser().produce_monitor() listener = mon.listener try: main() except stop_exception: pass return mon.returncode
def cli(sin, sout, serr, argv, rscser): assert rscser is None return func(sin, sout, serr, argv, children)