def dst_dir_report(jobs, dstdirs, width, prefix=''): tab = tt.Texttable() dir2oldphase = manager.dstdirs_to_furthest_phase(jobs) dir2newphase = manager.dstdirs_to_youngest_phase(jobs) headings = ['dst', 'plots', 'GBfree', 'inbnd phases', 'pri'] tab.header(headings) tab.set_cols_dtype('t' * len(headings)) for d in sorted(dstdirs): # TODO: This logic is replicated in archive.py's priority computation, # maybe by moving more of the logic in to directory.py eldest_ph = dir2oldphase.get(d, job.Phase(0, 0)) phases = job.job_phases_for_dstdir(d, jobs) dir_plots = plot_util.list_k32_plots(d) gb_free = int(plot_util.df_b(d) / plot_util.GB) n_plots = len(dir_plots) priority = archive.compute_priority(eldest_ph, gb_free, n_plots) row = [ abbr_path(d, prefix), n_plots, gb_free, phases_str(phases, 5), priority ] tab.add_row(row) tab.set_max_width(width) tab.set_deco(tt.Texttable.BORDER | tt.Texttable.HEADER) tab.set_deco(0) # No borders return tab.draw()
def archive(dir_cfg, all_jobs): '''Configure one archive job. Needs to know all jobs so it can avoid IO contention on the plotting dstdir drives. Returns either (False, <reason>) if we should not execute an archive job or (True, <cmd>) with the archive command if we should.''' if dir_cfg.archive is None: return (False, "No 'archive' settings declared in plotman.yaml") dir2ph = manager.dstdirs_to_furthest_phase(all_jobs) best_priority = -100000000 chosen_plot = None for d in dir_cfg.dst: ph = dir2ph.get(d, (0, 0)) dir_plots = plot_util.list_k32_plots(d) gb_free = plot_util.df_b(d) / plot_util.GB n_plots = len(dir_plots) priority = compute_priority(ph, gb_free, n_plots) if priority >= best_priority and dir_plots: best_priority = priority chosen_plot = dir_plots[0] if not chosen_plot: return (False, 'No plots found') archdir = '/volume1/chia_plots' bwlimit = dir_cfg.archive.rsyncd_bwlimit throttle_arg = ('--bwlimit=%d' % bwlimit) if bwlimit else '' cmd = ('rsync %s --remove-source-files -P %s %s' % (throttle_arg, chosen_plot, rsync_dest(dir_cfg.archive, archdir))) return (True, cmd)
def archive(dir_cfg, all_jobs): '''Configure one archive job. Needs to know all jobs so it can avoid IO contention on the plotting dstdir drives. Returns either (False, <reason>) if we should not execute an archive job or (True, <cmd>) with the archive command if we should.''' if dir_cfg.archive is None: return (False, "No 'archive' settings declared in plotman.yaml") dir2ph = manager.dstdirs_to_furthest_phase(all_jobs) best_priority = -100000000 chosen_plot = None for d in dir_cfg.dst: ph = dir2ph.get(d, (0, 0)) dir_plots = plot_util.list_k32_plots(d) gb_free = plot_util.df_b(d) / plot_util.GB n_plots = len(dir_plots) priority = compute_priority(ph, gb_free, n_plots) if priority >= best_priority and dir_plots: best_priority = priority chosen_plot = dir_plots[0] if not chosen_plot: return (False, 'No plots found') # TODO: sanity check that archive machine is available # TODO: filter drives mounted RO # # Pick first archive dir with sufficient space # archdir_freebytes = get_archdir_freebytes(dir_cfg.archive) if not archdir_freebytes: return(False, 'No free archive dirs found.') archdir = '' available = [(d, space) for (d, space) in archdir_freebytes.items() if space > 1.2 * plot_util.get_k32_plotsize()] if len(available) > 0: index = min(dir_cfg.archive.index, len(available) - 1) (archdir, freespace) = sorted(available)[index] if not archdir: return(False, 'No archive directories found with enough free space') msg = 'Found %s with ~%d GB free' % (archdir, freespace / plot_util.GB) bwlimit = dir_cfg.archive.rsyncd_bwlimit throttle_arg = ('--bwlimit=%d' % bwlimit) if bwlimit else '' cmd = ('rsync %s --remove-source-files -P -e "ssh -T -c [email protected] -o Compression=no -x" %s %s' % (throttle_arg, chosen_plot, rsync_dest(dir_cfg.archive, archdir))) return (True, cmd)
def archive( dir_cfg: configuration.Directories, arch_cfg: configuration.Archiving, all_jobs: typing.List[job.Job] ) -> typing.Tuple[bool, typing.Optional[typing.Union[typing.Dict[str, object], str]], typing.List[str]]: '''Configure one archive job. Needs to know all jobs so it can avoid IO contention on the plotting dstdir drives. Returns either (False, <reason>) if we should not execute an archive job or (True, <cmd>) with the archive command if we should.''' log_messages: typing.List[str] = [] if arch_cfg is None: return (False, "No 'archive' settings declared in plotman.yaml", log_messages) dir2ph = manager.dstdirs_to_furthest_phase(all_jobs) best_priority = -100000000 chosen_plot = None dst_dir = dir_cfg.get_dst_directories() for d in dst_dir: ph = dir2ph.get(d, job.Phase(0, 0)) dir_plots = plot_util.list_plots(d) gb_free = plot_util.df_b(d) / plot_util.GB n_plots = len(dir_plots) priority = compute_priority(ph, gb_free, n_plots) if priority >= best_priority and dir_plots: best_priority = priority chosen_plot = dir_plots[0] if not chosen_plot: return (False, 'No plots found', log_messages) # TODO: sanity check that archive machine is available # TODO: filter drives mounted RO # # Pick first archive dir with sufficient space # archdir_freebytes, freebytes_log_messages = get_archdir_freebytes(arch_cfg) log_messages.extend(freebytes_log_messages) if not archdir_freebytes: return (False, 'No free archive dirs found.', log_messages) archdir = '' chosen_plot_size = os.stat(chosen_plot).st_size # 10MB is big enough to outsize filesystem block sizes hopefully, but small # enough to make this a pretty tight corner for people to get stuck in. free_space_margin = 10_000_000 available = [(d, space) for (d, space) in archdir_freebytes.items() if space > (chosen_plot_size + free_space_margin)] if len(available) > 0: index = arch_cfg.index % len(available) (archdir, freespace) = sorted(available)[index] if not archdir: return (False, 'No archive directories found with enough free space', log_messages) env = arch_cfg.environment( source=chosen_plot, destination=archdir, ) subprocess_arguments: typing.Dict[str, object] = { 'args': arch_cfg.target_definition().transfer_path, 'env': { **os.environ, **env } } return (True, subprocess_arguments, log_messages)