コード例 #1
0
ファイル: flow_utils.py プロジェクト: kuriba/PyFlow
def get_num_conformers(inchi_key: str) -> int:
    """
    Returns the number of conformers for the molecule with the given InChIKey.

    :param inchi_key: the InChIKey of the molecule
    :return: the number of conformers
    """
    params_file = upsearch(WORKFLOW_PARAMS_FILENAME)

    unopt_pdbs = params_file.parent / "unopt_pdbs" / "{}*.pdb".format(
        inchi_key)

    num_conformers = len(glob(str(unopt_pdbs)))

    return num_conformers
コード例 #2
0
ファイル: flow_utils.py プロジェクト: kuriba/PyFlow
def update_workflow_params(**kwargs) -> None:
    """
    Updates the specified workflow parameters in the .params file with the
    specified values.

    :param kwargs: a dict of parameters and new values
    :return: None
    """
    workflow_params_file = upsearch(WORKFLOW_PARAMS_FILENAME)
    workflow_params = load_workflow_params()
    for k, v in kwargs.items():
        if k in workflow_params:
            workflow_params[k] = v

    with workflow_params_file.open("w") as f:
        f.write(json.dumps(workflow_params, indent=4))
コード例 #3
0
ファイル: flow_utils.py プロジェクト: kuriba/PyFlow
def load_workflow_params() -> dict:
    """
    Returns a dict of high level workflow details stored in the .params file in
    the main directory of a workflow. These details include the path to the workflow ]
    configuration file which details all of the steps, the configuration ID, and
    the number of conformers in the workflow.

    :return: a dict of workflow configuration details
    :raises FileNotFoundError: if .params file is not found
    """

    try:
        workflow_params_file = upsearch(WORKFLOW_PARAMS_FILENAME)
    except FileNotFoundError:
        message = "Unable to find .params file; ensure that you are in a workflow directory."
        raise FileNotFoundError(message)

    with workflow_params_file.open() as f:
        workflow_params = json.load(f)

    return workflow_params
コード例 #4
0
    def __init__(self,
                 step_id: str,
                 wave_id: int,
                 attempt_restart: bool = False,
                 flow_config: FlowConfig = None,
                 workflow_dir: Path = None):
        """
        Constructs a FlowRunner object which handles setting up and submitting
        workflow steps. This involves setting up input files and submissions scripts
        and submitting them to the Slurm queue.

        :param step_id: the ID of the current workflow step
        :param wave_id: the current wave ID
        :param attempt_restart: if True, the specified step and wave ID will attempt to be restarted
        :param flow_config: a workflow configuration object
        :param workflow_dir: the main directory of the workflow
        """
        if flow_config is None:
            workflow_params = flow_utils.load_workflow_params()
            config_file = workflow_params["config_file"]
            config_id = workflow_params["config_id"]
            self.flow_config = FlowConfig(config_file, config_id)
        else:
            self.flow_config = flow_config

        if workflow_dir is None:
            workflow_params_file = upsearch(
                flow_utils.WORKFLOW_PARAMS_FILENAME)
            self.workflow_dir = workflow_params_file.parent
        else:
            self.workflow_dir = workflow_dir

        self.attempt_restart = attempt_restart
        self.current_step_id = step_id
        self.current_wave_id = wave_id
        self.current_step_config = self.flow_config.get_step(step_id)
        self.current_step_dir = self.workflow_dir / self.current_step_id
        self.current_wave_dir = self.current_step_dir / "wave_{}_calcs".format(
            wave_id)
        self.step_program = self.flow_config.get_step(step_id)["program"]
コード例 #5
0
def begin_step(step_id: str = None,
               show_progress: bool = False,
               wave_id: int = 1,
               attempt_restart: bool = False,
               do_not_track: bool = False) -> None:
    """
    Starts running the specified workflow step.

    :param step_id: the ID of the step to start running
    :param show_progress: displays command-line progress bar if True, no progress bar otherwise
    :param wave_id: the ID of the wave to submit
    :param attempt_restart: if True, restarts the specified wave, otherwise submits a new wave
    :param do_not_track: if True, does not track the workflow in the tracked_workflows.csv file
    :return: None
    """

    # try to find workflow .params file
    workflow_params_file = upsearch(
        WORKFLOW_PARAMS_FILENAME,
        message="Please execute this script in a workflow directory.")

    # read config_file and config_id from .params file
    workflow_params = load_workflow_params()
    workflow_main_dir = workflow_params_file.parent
    workflow_id = workflow_main_dir.name
    config_file = Path(workflow_params["config_file"])
    config_id = workflow_params["config_id"]
    flow_config = FlowConfig(config_file, config_id)

    # validate step_id
    if step_id is None:
        step_id = flow_config.get_initial_step_id()
    elif step_id not in flow_config.get_step_ids():
        message = "Flow config defined in {} does not have a step '{}'".format(
            config_file, step_id)
        raise AttributeError(message)

    # do stuff on first step (tracking, workflow params modification)
    if flow_config.get_initial_step_id(
    ) == step_id and wave_id == 1 and not attempt_restart:
        if not do_not_track:
            try:
                FlowTracker.track_new_flow(config_file=config_file,
                                           config_id=config_id,
                                           workflow_main_dir=workflow_main_dir)
            except ValueError as e:
                do_not_track_msg = "Note: if you would like to avoid tracking this workflow," \
                                   " add the --do_not_track flag when you run 'pyflow begin'"
                print("Workflow error: {}\n{}".format(e, do_not_track_msg))
                sys.exit(1)
        show_progress = True
    else:
        FlowTracker.update_progress(workflow_id)

    # setup and start running workflow
    flow_runner = FlowRunner(flow_config=flow_config,
                             wave_id=wave_id,
                             step_id=step_id,
                             workflow_dir=workflow_main_dir,
                             attempt_restart=attempt_restart)

    flow_runner.run(show_progress=show_progress)
コード例 #6
0
    def check_progress(verbose: bool = True) -> float:
        """
        Checks the progress of the current workflow directory and prints a progress
        report to the command line (if ``verbose == True``). Returns a float representing
        the completion rate for the workflow (calculated as the quotient of the total
        number of completed calculations and the total number of expected calculations).

        :param verbose: if True, prints progress report to command line
        :return: the percentage of completed calculations for the current workflow directory
        """
        def format_percentage(total: int, percentage: float) -> str:
            """Formats total count and percentage into a string"""
            percentage_str = "({}%)".format(round(percentage * 100, 1))
            return "{0:<3} {1:>8}".format(total, percentage_str)

        # ensure user is in a workflow directory
        try:
            workflow_params_file = upsearch(WORKFLOW_PARAMS_FILENAME)
            workflow_dir = workflow_params_file.parent
        except FileNotFoundError:
            msg = "Unable to find workflow directory."
            raise FileNotFoundError(msg)

        from pyflow.flow.flow_config import FlowConfig
        from pyflow.flow.flow_runner import FlowRunner

        workflow_params = load_workflow_params()
        config_file = workflow_params["config_file"]
        config_id = workflow_params["config_id"]

        results_header = [
            "Step ID", "Completed", "Incomplete", "Running", "Failed"
        ]
        results_table = pd.DataFrame(columns=results_header)

        config = FlowConfig(config_file, config_id)
        num_molecules = len(glob(str(workflow_dir / "unopt_pdbs" / "*0.pdb")))
        num_structures = len(glob(str(workflow_dir / "unopt_pdbs" / "*.pdb")))
        total_num_completed = 0
        total_num_calcs = 0
        for step_id in config.get_step_ids():
            step_config = config.get_step(step_id)

            step_dir = workflow_dir / step_id / "wave_*_calcs"
            completed_dir = step_dir / "completed"
            failed_dir = step_dir / "failed"
            output_file_ext = FlowRunner.PROGRAM_OUTFILE_EXTENSIONS[
                step_config["program"]]

            if step_config["conformers"]:
                num_jobs = num_structures
            else:
                num_jobs = num_molecules
            total_num_calcs += num_jobs

            num_completed = len(
                glob(str(completed_dir / "*.{}".format(output_file_ext))))
            completion_rate = num_completed / num_jobs
            total_num_completed += num_completed

            num_failed = len(
                glob(str(failed_dir / "*.{}".format(output_file_ext))))
            failure_rate = num_failed / num_jobs

            num_incomplete = num_jobs - num_completed
            incompletion_rate = num_incomplete / num_jobs

            running_jobs = []
            for f in glob(str(step_dir / "*.{}".format(output_file_ext))):
                mtime = datetime.fromtimestamp(os.path.getmtime(f))
                now = datetime.now()

                time_since_mtime = now - mtime
                if time_since_mtime.seconds < (5 * 60):
                    running_jobs.append(f)

            num_running = len(running_jobs)
            running_rate = num_running / num_jobs

            if verbose:
                result_entry = {
                    "Step ID":
                    step_id,
                    "Completed":
                    format_percentage(num_completed, completion_rate),
                    "Incomplete":
                    format_percentage(num_incomplete, incompletion_rate),
                    "Running":
                    format_percentage(num_running, running_rate),
                    "Failed":
                    format_percentage(num_failed, failure_rate)
                }

                results_table = results_table.append(result_entry,
                                                     ignore_index=True,
                                                     sort=False)

        total_completion_rate = round(
            100 * (total_num_completed / total_num_calcs), 1)

        if verbose:
            current_time_str = "[{}]".format(
                datetime.now().strftime("%b %d %Y %X"))
            print("\nProgress report for workflow '{}' {}".format(
                workflow_dir.name, current_time_str))
            print("Num. Molecules: {} ({})".format(num_molecules,
                                                   num_structures))
            print(
                tabulate(results_table,
                         headers="keys",
                         tablefmt='psql',
                         showindex=False))
            print("Overall completion rate: {}/{} ({}%)".format(
                total_num_completed, total_num_calcs, total_completion_rate))

        return total_completion_rate