def get_num_conformers(inchi_key: str) -> int: """ Returns the number of conformers for the molecule with the given InChIKey. :param inchi_key: the InChIKey of the molecule :return: the number of conformers """ params_file = upsearch(WORKFLOW_PARAMS_FILENAME) unopt_pdbs = params_file.parent / "unopt_pdbs" / "{}*.pdb".format( inchi_key) num_conformers = len(glob(str(unopt_pdbs))) return num_conformers
def update_workflow_params(**kwargs) -> None: """ Updates the specified workflow parameters in the .params file with the specified values. :param kwargs: a dict of parameters and new values :return: None """ workflow_params_file = upsearch(WORKFLOW_PARAMS_FILENAME) workflow_params = load_workflow_params() for k, v in kwargs.items(): if k in workflow_params: workflow_params[k] = v with workflow_params_file.open("w") as f: f.write(json.dumps(workflow_params, indent=4))
def load_workflow_params() -> dict: """ Returns a dict of high level workflow details stored in the .params file in the main directory of a workflow. These details include the path to the workflow ] configuration file which details all of the steps, the configuration ID, and the number of conformers in the workflow. :return: a dict of workflow configuration details :raises FileNotFoundError: if .params file is not found """ try: workflow_params_file = upsearch(WORKFLOW_PARAMS_FILENAME) except FileNotFoundError: message = "Unable to find .params file; ensure that you are in a workflow directory." raise FileNotFoundError(message) with workflow_params_file.open() as f: workflow_params = json.load(f) return workflow_params
def __init__(self, step_id: str, wave_id: int, attempt_restart: bool = False, flow_config: FlowConfig = None, workflow_dir: Path = None): """ Constructs a FlowRunner object which handles setting up and submitting workflow steps. This involves setting up input files and submissions scripts and submitting them to the Slurm queue. :param step_id: the ID of the current workflow step :param wave_id: the current wave ID :param attempt_restart: if True, the specified step and wave ID will attempt to be restarted :param flow_config: a workflow configuration object :param workflow_dir: the main directory of the workflow """ if flow_config is None: workflow_params = flow_utils.load_workflow_params() config_file = workflow_params["config_file"] config_id = workflow_params["config_id"] self.flow_config = FlowConfig(config_file, config_id) else: self.flow_config = flow_config if workflow_dir is None: workflow_params_file = upsearch( flow_utils.WORKFLOW_PARAMS_FILENAME) self.workflow_dir = workflow_params_file.parent else: self.workflow_dir = workflow_dir self.attempt_restart = attempt_restart self.current_step_id = step_id self.current_wave_id = wave_id self.current_step_config = self.flow_config.get_step(step_id) self.current_step_dir = self.workflow_dir / self.current_step_id self.current_wave_dir = self.current_step_dir / "wave_{}_calcs".format( wave_id) self.step_program = self.flow_config.get_step(step_id)["program"]
def begin_step(step_id: str = None, show_progress: bool = False, wave_id: int = 1, attempt_restart: bool = False, do_not_track: bool = False) -> None: """ Starts running the specified workflow step. :param step_id: the ID of the step to start running :param show_progress: displays command-line progress bar if True, no progress bar otherwise :param wave_id: the ID of the wave to submit :param attempt_restart: if True, restarts the specified wave, otherwise submits a new wave :param do_not_track: if True, does not track the workflow in the tracked_workflows.csv file :return: None """ # try to find workflow .params file workflow_params_file = upsearch( WORKFLOW_PARAMS_FILENAME, message="Please execute this script in a workflow directory.") # read config_file and config_id from .params file workflow_params = load_workflow_params() workflow_main_dir = workflow_params_file.parent workflow_id = workflow_main_dir.name config_file = Path(workflow_params["config_file"]) config_id = workflow_params["config_id"] flow_config = FlowConfig(config_file, config_id) # validate step_id if step_id is None: step_id = flow_config.get_initial_step_id() elif step_id not in flow_config.get_step_ids(): message = "Flow config defined in {} does not have a step '{}'".format( config_file, step_id) raise AttributeError(message) # do stuff on first step (tracking, workflow params modification) if flow_config.get_initial_step_id( ) == step_id and wave_id == 1 and not attempt_restart: if not do_not_track: try: FlowTracker.track_new_flow(config_file=config_file, config_id=config_id, workflow_main_dir=workflow_main_dir) except ValueError as e: do_not_track_msg = "Note: if you would like to avoid tracking this workflow," \ " add the --do_not_track flag when you run 'pyflow begin'" print("Workflow error: {}\n{}".format(e, do_not_track_msg)) sys.exit(1) show_progress = True else: FlowTracker.update_progress(workflow_id) # setup and start running workflow flow_runner = FlowRunner(flow_config=flow_config, wave_id=wave_id, step_id=step_id, workflow_dir=workflow_main_dir, attempt_restart=attempt_restart) flow_runner.run(show_progress=show_progress)
def check_progress(verbose: bool = True) -> float: """ Checks the progress of the current workflow directory and prints a progress report to the command line (if ``verbose == True``). Returns a float representing the completion rate for the workflow (calculated as the quotient of the total number of completed calculations and the total number of expected calculations). :param verbose: if True, prints progress report to command line :return: the percentage of completed calculations for the current workflow directory """ def format_percentage(total: int, percentage: float) -> str: """Formats total count and percentage into a string""" percentage_str = "({}%)".format(round(percentage * 100, 1)) return "{0:<3} {1:>8}".format(total, percentage_str) # ensure user is in a workflow directory try: workflow_params_file = upsearch(WORKFLOW_PARAMS_FILENAME) workflow_dir = workflow_params_file.parent except FileNotFoundError: msg = "Unable to find workflow directory." raise FileNotFoundError(msg) from pyflow.flow.flow_config import FlowConfig from pyflow.flow.flow_runner import FlowRunner workflow_params = load_workflow_params() config_file = workflow_params["config_file"] config_id = workflow_params["config_id"] results_header = [ "Step ID", "Completed", "Incomplete", "Running", "Failed" ] results_table = pd.DataFrame(columns=results_header) config = FlowConfig(config_file, config_id) num_molecules = len(glob(str(workflow_dir / "unopt_pdbs" / "*0.pdb"))) num_structures = len(glob(str(workflow_dir / "unopt_pdbs" / "*.pdb"))) total_num_completed = 0 total_num_calcs = 0 for step_id in config.get_step_ids(): step_config = config.get_step(step_id) step_dir = workflow_dir / step_id / "wave_*_calcs" completed_dir = step_dir / "completed" failed_dir = step_dir / "failed" output_file_ext = FlowRunner.PROGRAM_OUTFILE_EXTENSIONS[ step_config["program"]] if step_config["conformers"]: num_jobs = num_structures else: num_jobs = num_molecules total_num_calcs += num_jobs num_completed = len( glob(str(completed_dir / "*.{}".format(output_file_ext)))) completion_rate = num_completed / num_jobs total_num_completed += num_completed num_failed = len( glob(str(failed_dir / "*.{}".format(output_file_ext)))) failure_rate = num_failed / num_jobs num_incomplete = num_jobs - num_completed incompletion_rate = num_incomplete / num_jobs running_jobs = [] for f in glob(str(step_dir / "*.{}".format(output_file_ext))): mtime = datetime.fromtimestamp(os.path.getmtime(f)) now = datetime.now() time_since_mtime = now - mtime if time_since_mtime.seconds < (5 * 60): running_jobs.append(f) num_running = len(running_jobs) running_rate = num_running / num_jobs if verbose: result_entry = { "Step ID": step_id, "Completed": format_percentage(num_completed, completion_rate), "Incomplete": format_percentage(num_incomplete, incompletion_rate), "Running": format_percentage(num_running, running_rate), "Failed": format_percentage(num_failed, failure_rate) } results_table = results_table.append(result_entry, ignore_index=True, sort=False) total_completion_rate = round( 100 * (total_num_completed / total_num_calcs), 1) if verbose: current_time_str = "[{}]".format( datetime.now().strftime("%b %d %Y %X")) print("\nProgress report for workflow '{}' {}".format( workflow_dir.name, current_time_str)) print("Num. Molecules: {} ({})".format(num_molecules, num_structures)) print( tabulate(results_table, headers="keys", tablefmt='psql', showindex=False)) print("Overall completion rate: {}/{} ({}%)".format( total_num_completed, total_num_calcs, total_completion_rate)) return total_completion_rate