def dst_dir_report(jobs, dstdirs, width, prefix=''): tab = tt.Texttable() dir2oldphase = manager.dstdirs_to_furthest_phase(jobs) dir2newphase = manager.dstdirs_to_youngest_phase(jobs) headings = ['dst', 'plots', 'GBfree', 'inbnd phases', 'pri'] tab.header(headings) tab.set_cols_dtype('t' * len(headings)) for d in sorted(dstdirs): # TODO: This logic is replicated in archive.py's priority computation, # maybe by moving more of the logic in to directory.py eldest_ph = dir2oldphase.get(d, job.Phase(0, 0)) phases = job.job_phases_for_dstdir(d, jobs) dir_plots = plot_util.list_k32_plots(d) gb_free = int(plot_util.df_b(d) / plot_util.GB) n_plots = len(dir_plots) priority = archive.compute_priority(eldest_ph, gb_free, n_plots) row = [ abbr_path(d, prefix), n_plots, gb_free, phases_str(phases, 5), priority ] tab.add_row(row) tab.set_max_width(width) tab.set_deco(tt.Texttable.BORDER | tt.Texttable.HEADER) tab.set_deco(0) # No borders return tab.draw()
def archive(dir_cfg, all_jobs): '''Configure one archive job. Needs to know all jobs so it can avoid IO contention on the plotting dstdir drives. Returns either (False, <reason>) if we should not execute an archive job or (True, <cmd>) with the archive command if we should.''' if dir_cfg.archive is None: return (False, "No 'archive' settings declared in plotman.yaml") dir2ph = manager.dstdirs_to_furthest_phase(all_jobs) best_priority = -100000000 chosen_plot = None for d in dir_cfg.dst: ph = dir2ph.get(d, (0, 0)) dir_plots = plot_util.list_k32_plots(d) gb_free = plot_util.df_b(d) / plot_util.GB n_plots = len(dir_plots) priority = compute_priority(ph, gb_free, n_plots) if priority >= best_priority and dir_plots: best_priority = priority chosen_plot = dir_plots[0] if not chosen_plot: return (False, 'No plots found') archdir = '/volume1/chia_plots' bwlimit = dir_cfg.archive.rsyncd_bwlimit throttle_arg = ('--bwlimit=%d' % bwlimit) if bwlimit else '' cmd = ('rsync %s --remove-source-files -P %s %s' % (throttle_arg, chosen_plot, rsync_dest(dir_cfg.archive, archdir))) return (True, cmd)
def test_dstdirs_to_furthest_phase(): all_jobs = [ job_w_dstdir_phase('/plots1', (1, 5)), job_w_dstdir_phase('/plots2', (1, 1)), job_w_dstdir_phase('/plots2', (3, 1)), job_w_dstdir_phase('/plots2', (2, 1)), job_w_dstdir_phase('/plots3', (4, 1)) ] assert (manager.dstdirs_to_furthest_phase(all_jobs) == { '/plots1' : (1, 5), '/plots2' : (3, 1), '/plots3' : (4, 1) } )
def archive(dir_cfg, all_jobs): '''Configure one archive job. Needs to know all jobs so it can avoid IO contention on the plotting dstdir drives. Returns either (False, <reason>) if we should not execute an archive job or (True, <cmd>) with the archive command if we should.''' if dir_cfg.archive is None: return (False, "No 'archive' settings declared in plotman.yaml") dir2ph = manager.dstdirs_to_furthest_phase(all_jobs) best_priority = -100000000 chosen_plot = None for d in dir_cfg.dst: ph = dir2ph.get(d, (0, 0)) dir_plots = plot_util.list_k32_plots(d) gb_free = plot_util.df_b(d) / plot_util.GB n_plots = len(dir_plots) priority = compute_priority(ph, gb_free, n_plots) if priority >= best_priority and dir_plots: best_priority = priority chosen_plot = dir_plots[0] if not chosen_plot: return (False, 'No plots found') # TODO: sanity check that archive machine is available # TODO: filter drives mounted RO # # Pick first archive dir with sufficient space # archdir_freebytes = get_archdir_freebytes(dir_cfg.archive) if not archdir_freebytes: return(False, 'No free archive dirs found.') archdir = '' available = [(d, space) for (d, space) in archdir_freebytes.items() if space > 1.2 * plot_util.get_k32_plotsize()] if len(available) > 0: index = min(dir_cfg.archive.index, len(available) - 1) (archdir, freespace) = sorted(available)[index] if not archdir: return(False, 'No archive directories found with enough free space') msg = 'Found %s with ~%d GB free' % (archdir, freespace / plot_util.GB) bwlimit = dir_cfg.archive.rsyncd_bwlimit throttle_arg = ('--bwlimit=%d' % bwlimit) if bwlimit else '' cmd = ('rsync %s --remove-source-files -P -e "ssh -T -c [email protected] -o Compression=no -x" %s %s' % (throttle_arg, chosen_plot, rsync_dest(dir_cfg.archive, archdir))) return (True, cmd)
def test_dstdirs_to_furthest_phase() -> None: all_jobs = [ job_w_dstdir_phase('/plots1', job.Phase(1, 5)), job_w_dstdir_phase('/plots2', job.Phase(1, 1)), job_w_dstdir_phase('/plots2', job.Phase(3, 1)), job_w_dstdir_phase('/plots2', job.Phase(2, 1)), job_w_dstdir_phase('/plots3', job.Phase(4, 1)) ] assert (manager.dstdirs_to_furthest_phase(all_jobs) == { '/plots1': job.Phase(1, 5), '/plots2': job.Phase(3, 1), '/plots3': job.Phase(4, 1) })
def main() -> None: random.seed() pm_parser = PlotmanArgParser() args = pm_parser.parse_args() if args.cmd == 'version': import pkg_resources print(pkg_resources.get_distribution('plotman')) return elif args.cmd == 'config': config_file_path = configuration.get_path() if args.config_subcommand == 'path': if os.path.isfile(config_file_path): print(config_file_path) return print( f"No 'plotman.yaml' file exists at expected location: '{config_file_path}'" ) print( f"To generate a default config file, run: 'plotman config generate'" ) return if args.config_subcommand == 'generate': if os.path.isfile(config_file_path): overwrite = None while overwrite not in {"y", "n"}: overwrite = input( f"A 'plotman.yaml' file already exists at the default location: '{config_file_path}' \n\n" "\tInput 'y' to overwrite existing file, or 'n' to exit without overwrite." ).lower() if overwrite == 'n': print("\nExited without overrwriting file") return # Copy the default plotman.yaml (packaged in plotman/resources/) to the user's config file path, # creating the parent plotman file/directory if it does not yet exist with importlib.resources.path(plotman_resources, "plotman.yaml") as default_config: config_dir = os.path.dirname(config_file_path) os.makedirs(config_dir, exist_ok=True) copyfile(default_config, config_file_path) print(f"\nWrote default plotman.yaml to: {config_file_path}") return if not args.config_subcommand: print("No action requested, add 'generate' or 'path'.") return config_path = configuration.get_path() config_text = configuration.read_configuration_text(config_path) preset_target_definitions_text = importlib.resources.read_text( plotman_resources, "target_definitions.yaml", ) cfg = configuration.get_validated_configs(config_text, config_path, preset_target_definitions_text) with cfg.setup(): root_logger = logging.getLogger() root_handler = logging.handlers.RotatingFileHandler( backupCount=10, encoding='utf-8', filename=cfg.logging.application, maxBytes=10_000_000, ) root_formatter = Iso8601Formatter(fmt='%(asctime)s: %(message)s') root_handler.setFormatter(root_formatter) root_logger.addHandler(root_handler) root_logger.setLevel(logging.INFO) root_logger.info('Start root logger') disk_space_logger = logging.getLogger("disk_space") disk_space_logger.propagate = False disk_space_handler = logging.handlers.RotatingFileHandler( backupCount=10, encoding='utf-8', filename=cfg.logging.disk_spaces, maxBytes=10_000_000, ) disk_space_formatter = Iso8601Formatter(fmt='%(asctime)s: %(message)s') disk_space_handler.setFormatter(disk_space_formatter) disk_space_logger.addHandler(disk_space_handler) disk_space_logger.setLevel(logging.INFO) disk_space_logger.info('Start disk space logger') # # Stay alive, spawning plot jobs # if args.cmd == 'plot': print('...starting plot loop') while True: (started, msg) = manager.maybe_start_new_plot(cfg.directories, cfg.scheduling, cfg.plotting, cfg.logging) # TODO: report this via a channel that can be polled on demand, so we don't spam the console if started: print('%s' % (msg)) else: print('...sleeping %d s: %s' % (cfg.scheduling.polling_time_s, msg)) root_logger.info('[plot] %s', msg) time.sleep(cfg.scheduling.polling_time_s) # # Analysis of completed jobs # elif args.cmd == 'analyze': analyzer.analyze(args.logfile, args.clipterminals, args.bytmp, args.bybitfield) # # Exports log metadata to CSV # elif args.cmd == 'export': logfilenames = glob.glob( os.path.join(cfg.logging.plots, '*.plot.log')) if args.save_to is None: csv_exporter.generate(logfilenames=logfilenames, file=sys.stdout) else: with open(args.save_to, 'w', encoding='utf-8') as file: csv_exporter.generate(logfilenames=logfilenames, file=file) else: jobs = Job.get_running_jobs(cfg.logging.plots) # Status report if args.cmd == 'status': if args.json: # convert jobs list into json result = reporting.json_report(jobs) else: result = "{0}\n\n{1}\n\nUpdated at: {2}".format( reporting.status_report(jobs, get_term_width()), reporting.summary(jobs), datetime.datetime.today().strftime("%c"), ) print(result) # Prometheus report if args.cmd == 'prometheus': print(reporting.prometheus_report(jobs)) # Directories report elif args.cmd == 'dirs': print( reporting.dirs_report(jobs, cfg.directories, cfg.archiving, cfg.scheduling, get_term_width())) elif args.cmd == 'interactive': interactive.run_interactive( cfg=cfg, autostart_plotting=args.autostart_plotting, autostart_archiving=args.autostart_archiving, ) # Start running archival elif args.cmd == 'archive': if cfg.archiving is None: start_msg = 'archiving not configured but is required for this command' print(start_msg) root_logger.info('[archive] %s', start_msg) else: start_msg = '...starting archive loop' print(start_msg) root_logger.info('[archive] %s', start_msg) firstit = True while True: if not firstit: print('Sleeping %d s until next iteration...' % (cfg.scheduling.polling_time_s)) time.sleep(cfg.scheduling.polling_time_s) jobs = Job.get_running_jobs(cfg.logging.plots) firstit = False archiving_status, log_messages = archive.spawn_archive_process( cfg.directories, cfg.archiving, cfg.logging, jobs) if log_messages: for log_message in log_messages: print(log_message) root_logger.info('[archive] %s', log_message) else: root_logger.info('[archive] %s', archiving_status) # Debugging: show the destination drive usage schedule elif args.cmd == 'dsched': for (d, ph) in manager.dstdirs_to_furthest_phase(jobs).items(): print(' %s : %s' % (d, str(ph))) # # Job control commands # elif args.cmd in [ 'details', 'logs', 'files', 'kill', 'suspend', 'resume' ]: print(args) selected = [] # TODO: clean up treatment of wildcard if args.idprefix[0] == 'all': selected = jobs else: # TODO: allow multiple idprefixes, not just take the first selected = manager.select_jobs_by_partial_id( jobs, args.idprefix[0]) if (len(selected) == 0): print('Error: %s matched no jobs.' % args.idprefix[0]) elif len(selected) > 1: print('Error: "%s" matched multiple jobs:' % args.idprefix[0]) for j in selected: print(' %s' % j.plot_id) selected = [] for job in selected: if args.cmd == 'details': print(job.status_str_long()) elif args.cmd == 'logs': job.print_logs(args.follow) elif args.cmd == 'files': temp_files = job.get_temp_files() for f in temp_files: print(' %s' % f) elif args.cmd == 'kill': # First suspend so job doesn't create new files print('Pausing PID %d, plot id %s' % (job.proc.pid, job.plot_id)) job.suspend() temp_files = job.get_temp_files() print('Will kill pid %d, plot id %s' % (job.proc.pid, job.plot_id)) print('Will delete %d temp files' % len(temp_files)) if args.force: conf = 'y' else: conf = input('Are you sure? ("y" to confirm): ') if (conf != 'y'): print( 'Canceled. If you wish to resume the job, do so manually.' ) else: print('killing...') job.cancel() print('cleaning up temp files...') for f in temp_files: os.remove(f) elif args.cmd == 'suspend': print('Suspending ' + job.plot_id) job.suspend() elif args.cmd == 'resume': print('Resuming ' + job.plot_id) job.resume()
def archive( dir_cfg: configuration.Directories, arch_cfg: configuration.Archiving, all_jobs: typing.List[job.Job] ) -> typing.Tuple[bool, typing.Optional[typing.Union[typing.Dict[str, object], str]], typing.List[str]]: '''Configure one archive job. Needs to know all jobs so it can avoid IO contention on the plotting dstdir drives. Returns either (False, <reason>) if we should not execute an archive job or (True, <cmd>) with the archive command if we should.''' log_messages: typing.List[str] = [] if arch_cfg is None: return (False, "No 'archive' settings declared in plotman.yaml", log_messages) dir2ph = manager.dstdirs_to_furthest_phase(all_jobs) best_priority = -100000000 chosen_plot = None dst_dir = dir_cfg.get_dst_directories() for d in dst_dir: ph = dir2ph.get(d, job.Phase(0, 0)) dir_plots = plot_util.list_plots(d) gb_free = plot_util.df_b(d) / plot_util.GB n_plots = len(dir_plots) priority = compute_priority(ph, gb_free, n_plots) if priority >= best_priority and dir_plots: best_priority = priority chosen_plot = dir_plots[0] if not chosen_plot: return (False, 'No plots found', log_messages) # TODO: sanity check that archive machine is available # TODO: filter drives mounted RO # # Pick first archive dir with sufficient space # archdir_freebytes, freebytes_log_messages = get_archdir_freebytes(arch_cfg) log_messages.extend(freebytes_log_messages) if not archdir_freebytes: return (False, 'No free archive dirs found.', log_messages) archdir = '' chosen_plot_size = os.stat(chosen_plot).st_size # 10MB is big enough to outsize filesystem block sizes hopefully, but small # enough to make this a pretty tight corner for people to get stuck in. free_space_margin = 10_000_000 available = [(d, space) for (d, space) in archdir_freebytes.items() if space > (chosen_plot_size + free_space_margin)] if len(available) > 0: index = arch_cfg.index % len(available) (archdir, freespace) = sorted(available)[index] if not archdir: return (False, 'No archive directories found with enough free space', log_messages) env = arch_cfg.environment( source=chosen_plot, destination=archdir, ) subprocess_arguments: typing.Dict[str, object] = { 'args': arch_cfg.target_definition().transfer_path, 'env': { **os.environ, **env } } return (True, subprocess_arguments, log_messages)
def main(): random.seed() pm_parser = PlotmanArgParser() args = pm_parser.parse_args() if args.cmd == 'version': import pkg_resources print(pkg_resources.get_distribution('plotman')) return elif args.cmd == 'config': config_file_path = configuration.get_path() if args.config_subcommand == 'path': if os.path.isfile(config_file_path): print(config_file_path) return print( f"No 'plotman.yaml' file exists at expected location: '{config_file_path}'" ) print( f"To generate a default config file, run: 'plotman config generate'" ) return 1 if args.config_subcommand == 'generate': if os.path.isfile(config_file_path): overwrite = None while overwrite not in {"y", "n"}: overwrite = input( f"A 'plotman.yaml' file already exists at the default location: '{config_file_path}' \n\n" "\tInput 'y' to overwrite existing file, or 'n' to exit without overwrite." ).lower() if overwrite == 'n': print("\nExited without overrwriting file") return # Copy the default plotman.yaml (packaged in plotman/resources/) to the user's config file path, # creating the parent plotman file/directory if it does not yet exist with importlib.resources.path(plotman_resources, "plotman.yaml") as default_config: config_dir = os.path.dirname(config_file_path) os.makedirs(config_dir, exist_ok=True) copyfile(default_config, config_file_path) print(f"\nWrote default plotman.yaml to: {config_file_path}") return if not args.config_subcommand: print("No action requested, add 'generate' or 'path'.") return config_path = configuration.get_path() config_text = configuration.read_configuration_text(config_path) cfg = configuration.get_validated_configs(config_text, config_path) # # Stay alive, spawning plot jobs # if args.cmd == 'plot': print('...starting plot loop') while True: wait_reason = manager.maybe_start_new_plot(cfg.directories, cfg.scheduling, cfg.plotting) # TODO: report this via a channel that can be polled on demand, so we don't spam the console if wait_reason: print('...sleeping %d s: %s' % (cfg.scheduling.polling_time_s, wait_reason)) time.sleep(cfg.scheduling.polling_time_s) # # Analysis of completed jobs # elif args.cmd == 'analyze': analyzer.analyze(args.logfile, args.clipterminals, args.bytmp, args.bybitfield) else: jobs = Job.get_running_jobs(cfg.directories.log) # Status report if args.cmd == 'status': print(reporting.status_report(jobs, get_term_width())) # Directories report elif args.cmd == 'dirs': print( reporting.dirs_report(jobs, cfg.directories, cfg.scheduling, get_term_width())) elif args.cmd == 'interactive': interactive.run_interactive() # Start running archival elif args.cmd == 'archive': print('...starting archive loop') firstit = True while True: if not firstit: print('Sleeping 60s until next iteration...') time.sleep(60) jobs = Job.get_running_jobs(cfg.directories.log) firstit = False archiving_status, log_message = archive.spawn_archive_process( cfg.directories, jobs) if log_message: print(log_message) # Debugging: show the destination drive usage schedule elif args.cmd == 'dsched': for (d, ph) in manager.dstdirs_to_furthest_phase(jobs).items(): print(' %s : %s' % (d, str(ph))) # # Job control commands # elif args.cmd in ['details', 'files', 'kill', 'suspend', 'resume']: print(args) selected = [] # TODO: clean up treatment of wildcard if args.idprefix[0] == 'all': selected = jobs else: # TODO: allow multiple idprefixes, not just take the first selected = manager.select_jobs_by_partial_id( jobs, args.idprefix[0]) if (len(selected) == 0): print('Error: %s matched no jobs.' % args.idprefix[0]) elif len(selected) > 1: print('Error: "%s" matched multiple jobs:' % args.idprefix[0]) for j in selected: print(' %s' % j.plot_id) selected = [] for job in selected: if args.cmd == 'details': print(job.status_str_long()) elif args.cmd == 'files': temp_files = job.get_temp_files() for f in temp_files: print(' %s' % f) elif args.cmd == 'kill': # First suspend so job doesn't create new files print('Pausing PID %d, plot id %s' % (job.proc.pid, job.plot_id)) job.suspend() temp_files = job.get_temp_files() print('Will kill pid %d, plot id %s' % (job.proc.pid, job.plot_id)) print('Will delete %d temp files' % len(temp_files)) conf = input('Are you sure? ("y" to confirm): ') if (conf != 'y'): print( 'canceled. If you wish to resume the job, do so manually.' ) else: print('killing...') job.cancel() print('cleaing up temp files...') for f in temp_files: os.remove(f) elif args.cmd == 'suspend': print('Suspending ' + job.plot_id) job.suspend() elif args.cmd == 'resume': print('Resuming ' + job.plot_id) job.resume()