Beispiel #1
0
def setup_dirs(save_location: str, workflow_name: str, config_file: str,
               config_id: str) -> None:
    """
    Sets up the directories for a new workflow run. This function uses the config
    specified in the given ``config_file`` to determine what step directories to
    create. In addition, this function initializes the .params file in the main
    directory of the workflow. The .params file records the path to the config file
    and the config_id.

    :param save_location: the location to setup the new workflow
    :param workflow_name: the name of the workflow
    :param config_file: the config file for the workflow
    :param config_id: the config ID for the workflow
    :return: None
    :raises FileExistsError: if the specified workflow directory already exists
    """

    save_location = Path(save_location)

    main_dir = save_location / workflow_name

    # read/validate config file
    config = FlowConfig(config_file=config_file, config_id=config_id)

    # try to make main workflow directory
    try:
        main_dir.mkdir()
    except FileExistsError:
        message = "The directory {} already exists".format(main_dir.as_posix())
        raise FileExistsError(message)

    # make directories for all of the workflow steps
    for step_id in config.get_step_ids():
        step_dir = main_dir / step_id
        step_dir.mkdir()

    # make directory for initial, unoptimized PDB files
    unopt_pdbs = main_dir / "unopt_pdbs"
    unopt_pdbs.mkdir()

    # write config filename and config ID to .params file in workflow directory
    flow_instance_config_file = main_dir / WORKFLOW_PARAMS_FILENAME
    flow_instance_config = {
        "config_file": str(Path(config_file).resolve()),
        "config_id": str(config_id),
        "num_waves": 1
    }

    with flow_instance_config_file.open("w") as f:
        f.write(json.dumps(flow_instance_config, indent=4))

    print("Successfully set up workflow directory '{}'".format(workflow_name))
Beispiel #2
0
def begin_step(step_id: str = None,
               show_progress: bool = False,
               wave_id: int = 1,
               attempt_restart: bool = False,
               do_not_track: bool = False) -> None:
    """
    Starts running the specified workflow step.

    :param step_id: the ID of the step to start running
    :param show_progress: displays command-line progress bar if True, no progress bar otherwise
    :param wave_id: the ID of the wave to submit
    :param attempt_restart: if True, restarts the specified wave, otherwise submits a new wave
    :param do_not_track: if True, does not track the workflow in the tracked_workflows.csv file
    :return: None
    """

    # try to find workflow .params file
    workflow_params_file = upsearch(
        WORKFLOW_PARAMS_FILENAME,
        message="Please execute this script in a workflow directory.")

    # read config_file and config_id from .params file
    workflow_params = load_workflow_params()
    workflow_main_dir = workflow_params_file.parent
    workflow_id = workflow_main_dir.name
    config_file = Path(workflow_params["config_file"])
    config_id = workflow_params["config_id"]
    flow_config = FlowConfig(config_file, config_id)

    # validate step_id
    if step_id is None:
        step_id = flow_config.get_initial_step_id()
    elif step_id not in flow_config.get_step_ids():
        message = "Flow config defined in {} does not have a step '{}'".format(
            config_file, step_id)
        raise AttributeError(message)

    # do stuff on first step (tracking, workflow params modification)
    if flow_config.get_initial_step_id(
    ) == step_id and wave_id == 1 and not attempt_restart:
        if not do_not_track:
            try:
                FlowTracker.track_new_flow(config_file=config_file,
                                           config_id=config_id,
                                           workflow_main_dir=workflow_main_dir)
            except ValueError as e:
                do_not_track_msg = "Note: if you would like to avoid tracking this workflow," \
                                   " add the --do_not_track flag when you run 'pyflow begin'"
                print("Workflow error: {}\n{}".format(e, do_not_track_msg))
                sys.exit(1)
        show_progress = True
    else:
        FlowTracker.update_progress(workflow_id)

    # setup and start running workflow
    flow_runner = FlowRunner(flow_config=flow_config,
                             wave_id=wave_id,
                             step_id=step_id,
                             workflow_dir=workflow_main_dir,
                             attempt_restart=attempt_restart)

    flow_runner.run(show_progress=show_progress)
Beispiel #3
0
    def check_progress(verbose: bool = True) -> float:
        """
        Checks the progress of the current workflow directory and prints a progress
        report to the command line (if ``verbose == True``). Returns a float representing
        the completion rate for the workflow (calculated as the quotient of the total
        number of completed calculations and the total number of expected calculations).

        :param verbose: if True, prints progress report to command line
        :return: the percentage of completed calculations for the current workflow directory
        """
        def format_percentage(total: int, percentage: float) -> str:
            """Formats total count and percentage into a string"""
            percentage_str = "({}%)".format(round(percentage * 100, 1))
            return "{0:<3} {1:>8}".format(total, percentage_str)

        # ensure user is in a workflow directory
        try:
            workflow_params_file = upsearch(WORKFLOW_PARAMS_FILENAME)
            workflow_dir = workflow_params_file.parent
        except FileNotFoundError:
            msg = "Unable to find workflow directory."
            raise FileNotFoundError(msg)

        from pyflow.flow.flow_config import FlowConfig
        from pyflow.flow.flow_runner import FlowRunner

        workflow_params = load_workflow_params()
        config_file = workflow_params["config_file"]
        config_id = workflow_params["config_id"]

        results_header = [
            "Step ID", "Completed", "Incomplete", "Running", "Failed"
        ]
        results_table = pd.DataFrame(columns=results_header)

        config = FlowConfig(config_file, config_id)
        num_molecules = len(glob(str(workflow_dir / "unopt_pdbs" / "*0.pdb")))
        num_structures = len(glob(str(workflow_dir / "unopt_pdbs" / "*.pdb")))
        total_num_completed = 0
        total_num_calcs = 0
        for step_id in config.get_step_ids():
            step_config = config.get_step(step_id)

            step_dir = workflow_dir / step_id / "wave_*_calcs"
            completed_dir = step_dir / "completed"
            failed_dir = step_dir / "failed"
            output_file_ext = FlowRunner.PROGRAM_OUTFILE_EXTENSIONS[
                step_config["program"]]

            if step_config["conformers"]:
                num_jobs = num_structures
            else:
                num_jobs = num_molecules
            total_num_calcs += num_jobs

            num_completed = len(
                glob(str(completed_dir / "*.{}".format(output_file_ext))))
            completion_rate = num_completed / num_jobs
            total_num_completed += num_completed

            num_failed = len(
                glob(str(failed_dir / "*.{}".format(output_file_ext))))
            failure_rate = num_failed / num_jobs

            num_incomplete = num_jobs - num_completed
            incompletion_rate = num_incomplete / num_jobs

            running_jobs = []
            for f in glob(str(step_dir / "*.{}".format(output_file_ext))):
                mtime = datetime.fromtimestamp(os.path.getmtime(f))
                now = datetime.now()

                time_since_mtime = now - mtime
                if time_since_mtime.seconds < (5 * 60):
                    running_jobs.append(f)

            num_running = len(running_jobs)
            running_rate = num_running / num_jobs

            if verbose:
                result_entry = {
                    "Step ID":
                    step_id,
                    "Completed":
                    format_percentage(num_completed, completion_rate),
                    "Incomplete":
                    format_percentage(num_incomplete, incompletion_rate),
                    "Running":
                    format_percentage(num_running, running_rate),
                    "Failed":
                    format_percentage(num_failed, failure_rate)
                }

                results_table = results_table.append(result_entry,
                                                     ignore_index=True,
                                                     sort=False)

        total_completion_rate = round(
            100 * (total_num_completed / total_num_calcs), 1)

        if verbose:
            current_time_str = "[{}]".format(
                datetime.now().strftime("%b %d %Y %X"))
            print("\nProgress report for workflow '{}' {}".format(
                workflow_dir.name, current_time_str))
            print("Num. Molecules: {} ({})".format(num_molecules,
                                                   num_structures))
            print(
                tabulate(results_table,
                         headers="keys",
                         tablefmt='psql',
                         showindex=False))
            print("Overall completion rate: {}/{} ({}%)".format(
                total_num_completed, total_num_calcs, total_completion_rate))

        return total_completion_rate