예제 #1
0
def curses_main(stdscr, cmd_autostart_plotting, cmd_autostart_archiving, cfg):
    log = Log()

    if cmd_autostart_plotting is not None:
        plotting_active = cmd_autostart_plotting
    else:
        plotting_active = cfg.commands.interactive.autostart_plotting

    archiving_configured = cfg.archiving is not None

    if not archiving_configured:
        archiving_active = False
    elif cmd_autostart_archiving is not None:
        archiving_active = cmd_autostart_archiving
    else:
        archiving_active = cfg.commands.interactive.autostart_archiving

    plotting_status = '<startup>'  # todo rename these msg?
    archiving_status = '<startup>'

    stdscr.nodelay(True)  # make getch() non-blocking
    stdscr.timeout(2000)

    # Create windows.  We'll size them in the main loop when we have their content.
    header_win = curses.newwin(1, 1, 1, 0)
    log_win = curses.newwin(1, 1, 1, 0)
    jobs_win = curses.newwin(1, 1, 1, 0)
    dirs_win = curses.newwin(1, 1, 1, 0)

    jobs = Job.get_running_jobs(cfg.logging.plots)
    last_refresh = None

    pressed_key = ''  # For debugging

    archdir_freebytes = None
    aging_reason = None

    while True:

        # A full refresh scans for and reads info for running jobs from
        # scratch (i.e., reread their logfiles).  Otherwise we'll only
        # initialize new jobs, and mostly rely on cached info.
        do_full_refresh = False
        elapsed = 0  # Time since last refresh, or zero if no prev. refresh
        if last_refresh is None:
            do_full_refresh = True
        else:
            elapsed = (datetime.datetime.now() - last_refresh).total_seconds()
            do_full_refresh = elapsed >= cfg.scheduling.polling_time_s

        if not do_full_refresh:
            jobs = Job.get_running_jobs(cfg.logging.plots, cached_jobs=jobs)

        else:
            last_refresh = datetime.datetime.now()
            jobs = Job.get_running_jobs(cfg.logging.plots)

            if plotting_active:
                (started,
                 msg) = manager.maybe_start_new_plot(cfg.directories,
                                                     cfg.scheduling,
                                                     cfg.plotting, cfg.logging)
                if (started):
                    if aging_reason is not None:
                        log.log(aging_reason)
                        aging_reason = None
                    log.log(msg)
                    plotting_status = '<just started job>'
                    jobs = Job.get_running_jobs(cfg.logging.plots,
                                                cached_jobs=jobs)
                else:
                    # If a plot is delayed for any reason other than stagger, log it
                    if msg.find("stagger") < 0:
                        aging_reason = msg
                    plotting_status = msg

            if archiving_configured:
                if archiving_active:
                    archiving_status, log_messages = archive.spawn_archive_process(
                        cfg.directories, cfg.archiving, cfg.logging, jobs)
                    for log_message in log_messages:
                        log.log(log_message)

                archdir_freebytes, log_messages = archive.get_archdir_freebytes(
                    cfg.archiving)
                for log_message in log_messages:
                    log.log(log_message)

        # Get terminal size.  Recommended method is stdscr.getmaxyx(), but this
        # does not seem to work on some systems.  It may be a bug in Python
        # curses, maybe having to do with registering sigwinch handlers in
        # multithreaded environments.  See e.g.
        #     https://stackoverflow.com/questions/33906183#33906270
        # Alternative option is to call out to `stty size`.  For now, we
        # support both strategies, selected by a config option.
        # TODO: also try shutil.get_terminal_size()
        n_rows: int
        n_cols: int
        if cfg.user_interface.use_stty_size:
            completed_process = subprocess.run(['stty', 'size'],
                                               check=True,
                                               encoding='utf-8',
                                               stdout=subprocess.PIPE)
            elements = completed_process.stdout.split()
            (n_rows, n_cols) = [int(v) for v in elements]
        else:
            (n_rows, n_cols) = map(int, stdscr.getmaxyx())

        stdscr.clear()
        stdscr.resize(n_rows, n_cols)
        curses.resize_term(n_rows, n_cols)

        #
        # Obtain and measure content
        #

        # Directory prefixes, for abbreviation
        tmp_prefix = os.path.commonpath(cfg.directories.tmp)
        dst_dir = cfg.directories.get_dst_directories()
        dst_prefix = os.path.commonpath(dst_dir)
        if archiving_configured:
            archive_directories = archdir_freebytes.keys()
            if len(archive_directories) == 0:
                arch_prefix = ''
            else:
                arch_prefix = os.path.commonpath(archive_directories)

        n_tmpdirs = len(cfg.directories.tmp)

        # Directory reports.
        tmp_report = reporting.tmp_dir_report(jobs, cfg.directories,
                                              cfg.scheduling, n_cols, 0,
                                              n_tmpdirs, tmp_prefix)
        dst_report = reporting.dst_dir_report(jobs, dst_dir, n_cols,
                                              dst_prefix)
        if archiving_configured:
            arch_report = reporting.arch_dir_report(archdir_freebytes, n_cols,
                                                    arch_prefix)
            if not arch_report:
                arch_report = '<no archive dir info>'
        else:
            arch_report = '<archiving not configured>'

        #
        # Layout
        #

        tmp_h = len(tmp_report.splitlines())
        tmp_w = len(max(tmp_report.splitlines(), key=len)) + 1
        dst_h = len(dst_report.splitlines())
        dst_w = len(max(dst_report.splitlines(), key=len)) + 1
        arch_h = len(arch_report.splitlines()) + 1
        arch_w = n_cols

        header_h = 3
        dirs_h = max(tmp_h, dst_h) + arch_h
        remainder = n_rows - (header_h + dirs_h)
        jobs_h = max(5, math.floor(remainder * 0.6))
        logs_h = n_rows - (header_h + jobs_h + dirs_h)

        header_pos = 0
        jobs_pos = header_pos + header_h
        stdscr.resize(n_rows, n_cols)
        dirs_pos = jobs_pos + jobs_h
        logscreen_pos = dirs_pos + dirs_h

        linecap = n_cols - 1
        logs_h = n_rows - (header_h + jobs_h + dirs_h)

        try:
            header_win = curses.newwin(header_h, n_cols, header_pos, 0)
            log_win = curses.newwin(logs_h, n_cols, logscreen_pos, 0)
            jobs_win = curses.newwin(jobs_h, n_cols, jobs_pos, 0)
            dirs_win = curses.newwin(dirs_h, n_cols, dirs_pos, 0)
        except Exception:
            raise Exception(
                'Failed to initialize curses windows, try a larger '
                'terminal window.')

        #
        # Write
        #

        # Header
        header_win.addnstr(0, 0, 'Plotman', linecap, curses.A_BOLD)
        timestamp = datetime.datetime.now().strftime("%H:%M:%S")
        refresh_msg = "now" if do_full_refresh else f"{int(elapsed)}s/{cfg.scheduling.polling_time_s}"
        header_win.addnstr(f" {timestamp} (refresh {refresh_msg})", linecap)
        header_win.addnstr('  |  <P>lotting: ', linecap, curses.A_BOLD)
        header_win.addnstr(
            plotting_status_msg(plotting_active, plotting_status), linecap)
        header_win.addnstr(' <A>rchival: ', linecap, curses.A_BOLD)
        header_win.addnstr(
            archiving_status_msg(archiving_configured, archiving_active,
                                 archiving_status), linecap)

        # Oneliner progress display
        header_win.addnstr(1, 0, 'Jobs (%d): ' % len(jobs), linecap)
        header_win.addnstr('[' + reporting.job_viz(jobs) + ']', linecap)

        # These are useful for debugging.
        # header_win.addnstr('  term size: (%d, %d)' % (n_rows, n_cols), linecap)  # Debuggin
        # if pressed_key:
        # header_win.addnstr(' (keypress %s)' % str(pressed_key), linecap)
        header_win.addnstr(2, 0, 'Prefixes:', linecap, curses.A_BOLD)
        header_win.addnstr('  tmp=', linecap, curses.A_BOLD)
        header_win.addnstr(tmp_prefix, linecap)
        header_win.addnstr('  dst=', linecap, curses.A_BOLD)
        header_win.addnstr(dst_prefix, linecap)
        if archiving_configured:
            header_win.addnstr('  archive=', linecap, curses.A_BOLD)
            header_win.addnstr(arch_prefix, linecap)
        header_win.addnstr(' (remote)', linecap)

        # Jobs
        jobs_win.addstr(
            0, 0,
            reporting.status_report(jobs, n_cols, jobs_h, tmp_prefix,
                                    dst_prefix))
        jobs_win.chgat(0, 0, curses.A_REVERSE)

        # Dirs
        tmpwin_dstwin_gutter = 6

        maxtd_h = max([tmp_h, dst_h])

        tmpwin = curses.newwin(tmp_h, tmp_w, dirs_pos + int(maxtd_h - tmp_h),
                               0)
        tmpwin.addstr(tmp_report)
        tmpwin.chgat(0, 0, curses.A_REVERSE)

        dstwin = curses.newwin(dst_h, dst_w, dirs_pos + int(
            (maxtd_h - dst_h) / 2), tmp_w + tmpwin_dstwin_gutter)
        dstwin.addstr(dst_report)
        dstwin.chgat(0, 0, curses.A_REVERSE)

        archwin = curses.newwin(arch_h, arch_w, dirs_pos + maxtd_h, 0)
        archwin.addstr(0, 0, 'Archive dirs free space', curses.A_REVERSE)
        archwin.addstr(1, 0, arch_report)

        # Log.  Could use a pad here instead of managing scrolling ourselves, but
        # this seems easier.
        log_win.addnstr(
            0, 0,
            ('Log: %d (<up>/<down>/<end> to scroll)\n' % log.get_cur_pos()),
            linecap, curses.A_REVERSE)
        for i, logline in enumerate(log.cur_slice(logs_h - 1)):
            log_win.addnstr(i + 1, 0, logline, linecap)

        stdscr.noutrefresh()
        header_win.noutrefresh()
        jobs_win.noutrefresh()
        tmpwin.noutrefresh()
        dstwin.noutrefresh()
        archwin.noutrefresh()
        log_win.noutrefresh()
        curses.doupdate()

        try:
            key = stdscr.getch()
        except KeyboardInterrupt:
            key = ord('q')

        if key == curses.KEY_UP:
            log.shift_slice(-1)
            pressed_key = 'up'
        elif key == curses.KEY_DOWN:
            log.shift_slice(1)
            pressed_key = 'dwn'
        elif key == curses.KEY_END:
            log.shift_slice_to_end()
            pressed_key = 'end'
        elif key == ord('p'):
            plotting_active = not plotting_active
            pressed_key = 'p'
        elif key == ord('a'):
            archiving_active = not archiving_active
            pressed_key = 'a'
        elif key == ord('q'):
            break
        else:
            pressed_key = key
예제 #2
0
파일: plotman.py 프로젝트: graemes/plotman
def main() -> None:
    random.seed()

    pm_parser = PlotmanArgParser()
    args = pm_parser.parse_args()

    if args.cmd == 'version':
        import pkg_resources
        print(pkg_resources.get_distribution('plotman'))
        return

    elif args.cmd == 'config':
        config_file_path = configuration.get_path()
        if args.config_subcommand == 'path':
            if os.path.isfile(config_file_path):
                print(config_file_path)
                return
            print(
                f"No 'plotman.yaml' file exists at expected location: '{config_file_path}'"
            )
            print(
                f"To generate a default config file, run: 'plotman config generate'"
            )
            return
        if args.config_subcommand == 'generate':
            if os.path.isfile(config_file_path):
                overwrite = None
                while overwrite not in {"y", "n"}:
                    overwrite = input(
                        f"A 'plotman.yaml' file already exists at the default location: '{config_file_path}' \n\n"
                        "\tInput 'y' to overwrite existing file, or 'n' to exit without overwrite."
                    ).lower()
                    if overwrite == 'n':
                        print("\nExited without overrwriting file")
                        return

            # Copy the default plotman.yaml (packaged in plotman/resources/) to the user's config file path,
            # creating the parent plotman file/directory if it does not yet exist
            with importlib.resources.path(plotman_resources,
                                          "plotman.yaml") as default_config:
                config_dir = os.path.dirname(config_file_path)

                os.makedirs(config_dir, exist_ok=True)
                copyfile(default_config, config_file_path)
                print(f"\nWrote default plotman.yaml to: {config_file_path}")
                return

        if not args.config_subcommand:
            print("No action requested, add 'generate' or 'path'.")
            return

    config_path = configuration.get_path()
    config_text = configuration.read_configuration_text(config_path)
    preset_target_definitions_text = importlib.resources.read_text(
        plotman_resources,
        "target_definitions.yaml",
    )

    cfg = configuration.get_validated_configs(config_text, config_path,
                                              preset_target_definitions_text)

    with cfg.setup():
        root_logger = logging.getLogger()
        root_handler = logging.handlers.RotatingFileHandler(
            backupCount=10,
            encoding='utf-8',
            filename=cfg.logging.application,
            maxBytes=10_000_000,
        )
        root_formatter = Iso8601Formatter(fmt='%(asctime)s: %(message)s')
        root_handler.setFormatter(root_formatter)
        root_logger.addHandler(root_handler)
        root_logger.setLevel(logging.INFO)
        root_logger.info('Start root logger')

        disk_space_logger = logging.getLogger("disk_space")
        disk_space_logger.propagate = False
        disk_space_handler = logging.handlers.RotatingFileHandler(
            backupCount=10,
            encoding='utf-8',
            filename=cfg.logging.disk_spaces,
            maxBytes=10_000_000,
        )
        disk_space_formatter = Iso8601Formatter(fmt='%(asctime)s: %(message)s')
        disk_space_handler.setFormatter(disk_space_formatter)
        disk_space_logger.addHandler(disk_space_handler)
        disk_space_logger.setLevel(logging.INFO)
        disk_space_logger.info('Start disk space logger')

        #
        # Stay alive, spawning plot jobs
        #
        if args.cmd == 'plot':
            print('...starting plot loop')
            while True:
                (started,
                 msg) = manager.maybe_start_new_plot(cfg.directories,
                                                     cfg.scheduling,
                                                     cfg.plotting, cfg.logging)

                # TODO: report this via a channel that can be polled on demand, so we don't spam the console
                if started:
                    print('%s' % (msg))
                else:
                    print('...sleeping %d s: %s' %
                          (cfg.scheduling.polling_time_s, msg))
                root_logger.info('[plot] %s', msg)

                time.sleep(cfg.scheduling.polling_time_s)

        #
        # Analysis of completed jobs
        #
        elif args.cmd == 'analyze':

            analyzer.analyze(args.logfile, args.clipterminals, args.bytmp,
                             args.bybitfield)

        #
        # Exports log metadata to CSV
        #
        elif args.cmd == 'export':
            logfilenames = glob.glob(
                os.path.join(cfg.logging.plots, '*.plot.log'))
            if args.save_to is None:
                csv_exporter.generate(logfilenames=logfilenames,
                                      file=sys.stdout)
            else:
                with open(args.save_to, 'w', encoding='utf-8') as file:
                    csv_exporter.generate(logfilenames=logfilenames, file=file)

        else:
            jobs = Job.get_running_jobs(cfg.logging.plots)

            # Status report
            if args.cmd == 'status':
                if args.json:
                    # convert jobs list into json
                    result = reporting.json_report(jobs)
                else:
                    result = "{0}\n\n{1}\n\nUpdated at: {2}".format(
                        reporting.status_report(jobs, get_term_width()),
                        reporting.summary(jobs),
                        datetime.datetime.today().strftime("%c"),
                    )
                print(result)

            # Prometheus report
            if args.cmd == 'prometheus':
                print(reporting.prometheus_report(jobs))

            # Directories report
            elif args.cmd == 'dirs':
                print(
                    reporting.dirs_report(jobs, cfg.directories, cfg.archiving,
                                          cfg.scheduling, get_term_width()))

            elif args.cmd == 'interactive':
                interactive.run_interactive(
                    cfg=cfg,
                    autostart_plotting=args.autostart_plotting,
                    autostart_archiving=args.autostart_archiving,
                )

            # Start running archival
            elif args.cmd == 'archive':
                if cfg.archiving is None:
                    start_msg = 'archiving not configured but is required for this command'
                    print(start_msg)
                    root_logger.info('[archive] %s', start_msg)
                else:
                    start_msg = '...starting archive loop'
                    print(start_msg)
                    root_logger.info('[archive] %s', start_msg)
                    firstit = True
                    while True:
                        if not firstit:
                            print('Sleeping %d s until next iteration...' %
                                  (cfg.scheduling.polling_time_s))
                            time.sleep(cfg.scheduling.polling_time_s)
                            jobs = Job.get_running_jobs(cfg.logging.plots)
                        firstit = False

                        archiving_status, log_messages = archive.spawn_archive_process(
                            cfg.directories, cfg.archiving, cfg.logging, jobs)
                        if log_messages:
                            for log_message in log_messages:
                                print(log_message)
                                root_logger.info('[archive] %s', log_message)
                        else:
                            root_logger.info('[archive] %s', archiving_status)

            # Debugging: show the destination drive usage schedule
            elif args.cmd == 'dsched':
                for (d, ph) in manager.dstdirs_to_furthest_phase(jobs).items():
                    print('  %s : %s' % (d, str(ph)))

            #
            # Job control commands
            #
            elif args.cmd in [
                    'details', 'logs', 'files', 'kill', 'suspend', 'resume'
            ]:
                print(args)

                selected = []

                # TODO: clean up treatment of wildcard
                if args.idprefix[0] == 'all':
                    selected = jobs
                else:
                    # TODO: allow multiple idprefixes, not just take the first
                    selected = manager.select_jobs_by_partial_id(
                        jobs, args.idprefix[0])
                    if (len(selected) == 0):
                        print('Error: %s matched no jobs.' % args.idprefix[0])
                    elif len(selected) > 1:
                        print('Error: "%s" matched multiple jobs:' %
                              args.idprefix[0])
                        for j in selected:
                            print('  %s' % j.plot_id)
                        selected = []

                for job in selected:
                    if args.cmd == 'details':
                        print(job.status_str_long())

                    elif args.cmd == 'logs':
                        job.print_logs(args.follow)

                    elif args.cmd == 'files':
                        temp_files = job.get_temp_files()
                        for f in temp_files:
                            print('  %s' % f)

                    elif args.cmd == 'kill':
                        # First suspend so job doesn't create new files
                        print('Pausing PID %d, plot id %s' %
                              (job.proc.pid, job.plot_id))
                        job.suspend()

                        temp_files = job.get_temp_files()

                        print('Will kill pid %d, plot id %s' %
                              (job.proc.pid, job.plot_id))
                        print('Will delete %d temp files' % len(temp_files))

                        if args.force:
                            conf = 'y'
                        else:
                            conf = input('Are you sure? ("y" to confirm): ')

                        if (conf != 'y'):
                            print(
                                'Canceled.  If you wish to resume the job, do so manually.'
                            )
                        else:
                            print('killing...')

                            job.cancel()

                            print('cleaning up temp files...')

                            for f in temp_files:
                                os.remove(f)

                    elif args.cmd == 'suspend':
                        print('Suspending ' + job.plot_id)
                        job.suspend()
                    elif args.cmd == 'resume':
                        print('Resuming ' + job.plot_id)
                        job.resume()
예제 #3
0
def main():
    random.seed()

    pm_parser = PlotmanArgParser()
    args = pm_parser.parse_args()

    if args.cmd == 'version':
        import pkg_resources
        print(pkg_resources.get_distribution('plotman'))
        return

    elif args.cmd == 'config':
        config_file_path = configuration.get_path()
        if args.config_subcommand == 'path':
            if os.path.isfile(config_file_path):
                print(config_file_path)
                return
            print(
                f"No 'plotman.yaml' file exists at expected location: '{config_file_path}'"
            )
            print(
                f"To generate a default config file, run: 'plotman config generate'"
            )
            return 1
        if args.config_subcommand == 'generate':
            if os.path.isfile(config_file_path):
                overwrite = None
                while overwrite not in {"y", "n"}:
                    overwrite = input(
                        f"A 'plotman.yaml' file already exists at the default location: '{config_file_path}' \n\n"
                        "\tInput 'y' to overwrite existing file, or 'n' to exit without overwrite."
                    ).lower()
                    if overwrite == 'n':
                        print("\nExited without overrwriting file")
                        return

            # Copy the default plotman.yaml (packaged in plotman/resources/) to the user's config file path,
            # creating the parent plotman file/directory if it does not yet exist
            with importlib.resources.path(plotman_resources,
                                          "plotman.yaml") as default_config:
                config_dir = os.path.dirname(config_file_path)

                os.makedirs(config_dir, exist_ok=True)
                copyfile(default_config, config_file_path)
                print(f"\nWrote default plotman.yaml to: {config_file_path}")
                return

        if not args.config_subcommand:
            print("No action requested, add 'generate' or 'path'.")
            return

    config_path = configuration.get_path()
    config_text = configuration.read_configuration_text(config_path)
    cfg = configuration.get_validated_configs(config_text, config_path)

    #
    # Stay alive, spawning plot jobs
    #
    if args.cmd == 'plot':
        print('...starting plot loop')
        while True:
            wait_reason = manager.maybe_start_new_plot(cfg.directories,
                                                       cfg.scheduling,
                                                       cfg.plotting)

            # TODO: report this via a channel that can be polled on demand, so we don't spam the console
            if wait_reason:
                print('...sleeping %d s: %s' %
                      (cfg.scheduling.polling_time_s, wait_reason))

            time.sleep(cfg.scheduling.polling_time_s)

    #
    # Analysis of completed jobs
    #
    elif args.cmd == 'analyze':

        analyzer.analyze(args.logfile, args.clipterminals, args.bytmp,
                         args.bybitfield)

    else:
        jobs = Job.get_running_jobs(cfg.directories.log)

        # Status report
        if args.cmd == 'status':
            print(reporting.status_report(jobs, get_term_width()))

        # Directories report
        elif args.cmd == 'dirs':
            print(
                reporting.dirs_report(jobs, cfg.directories, cfg.scheduling,
                                      get_term_width()))

        elif args.cmd == 'interactive':
            interactive.run_interactive()

        # Start running archival
        elif args.cmd == 'archive':
            print('...starting archive loop')
            firstit = True
            while True:
                if not firstit:
                    print('Sleeping 60s until next iteration...')
                    time.sleep(60)
                    jobs = Job.get_running_jobs(cfg.directories.log)
                firstit = False

                archiving_status, log_message = archive.spawn_archive_process(
                    cfg.directories, jobs)
                if log_message:
                    print(log_message)

        # Debugging: show the destination drive usage schedule
        elif args.cmd == 'dsched':
            for (d, ph) in manager.dstdirs_to_furthest_phase(jobs).items():
                print('  %s : %s' % (d, str(ph)))

        #
        # Job control commands
        #
        elif args.cmd in ['details', 'files', 'kill', 'suspend', 'resume']:
            print(args)

            selected = []

            # TODO: clean up treatment of wildcard
            if args.idprefix[0] == 'all':
                selected = jobs
            else:
                # TODO: allow multiple idprefixes, not just take the first
                selected = manager.select_jobs_by_partial_id(
                    jobs, args.idprefix[0])
                if (len(selected) == 0):
                    print('Error: %s matched no jobs.' % args.idprefix[0])
                elif len(selected) > 1:
                    print('Error: "%s" matched multiple jobs:' %
                          args.idprefix[0])
                    for j in selected:
                        print('  %s' % j.plot_id)
                    selected = []

            for job in selected:
                if args.cmd == 'details':
                    print(job.status_str_long())

                elif args.cmd == 'files':
                    temp_files = job.get_temp_files()
                    for f in temp_files:
                        print('  %s' % f)

                elif args.cmd == 'kill':
                    # First suspend so job doesn't create new files
                    print('Pausing PID %d, plot id %s' %
                          (job.proc.pid, job.plot_id))
                    job.suspend()

                    temp_files = job.get_temp_files()
                    print('Will kill pid %d, plot id %s' %
                          (job.proc.pid, job.plot_id))
                    print('Will delete %d temp files' % len(temp_files))
                    conf = input('Are you sure? ("y" to confirm): ')
                    if (conf != 'y'):
                        print(
                            'canceled.  If you wish to resume the job, do so manually.'
                        )
                    else:
                        print('killing...')
                        job.cancel()
                        print('cleaing up temp files...')
                        for f in temp_files:
                            os.remove(f)

                elif args.cmd == 'suspend':
                    print('Suspending ' + job.plot_id)
                    job.suspend()
                elif args.cmd == 'resume':
                    print('Resuming ' + job.plot_id)
                    job.resume()