예제 #1
0
def main():
    """
    Grabs the inputs for a specific model version ID, sets up the folder
    structure, and pickles the inputs object plus writes the settings json
    for use later on.

    If you're doing a drill, then only get input data from locations
    that will be used for the drilling for parent-children.
    """
    args = get_args()
    logging.basicConfig(level=LEVELS[args.loglevel])

    LOG.info(
        f"Configuring inputs for model version ID {args.model_version_id}.")
    LOG.debug(f"Arguments: {args}.")

    context = Context(model_version_id=args.model_version_id,
                      make=args.make,
                      configure_application=args.configure)
    parameter_json = settings_json_from_model_version_id(
        model_version_id=args.model_version_id,
        conn_def=context.model_connection)
    settings = load_settings(settings_json=parameter_json)

    inputs = MeasurementInputsFromSettings(settings=settings)
    inputs.get_raw_inputs()
    inputs.configure_inputs_for_dismod(settings=settings)

    context.write_inputs(inputs=inputs, settings=parameter_json)
예제 #2
0
def configure_inputs(model_version_id: int,
                     make: bool,
                     configure: bool,
                     midpoint: bool = False,
                     test_dir: Optional[str] = None,
                     json_file: Optional[str] = None) -> None:
    """
    Grabs the inputs for a specific model version ID, sets up the folder
    structure, and pickles the inputs object plus writes the settings json
    for use later on. Also uploads CSMR to the database attached to the model version,
    if applicable.

    Optionally use a json file for settings instead of a model version ID's json file.

    Parameters
    ----------
    model_version_id
        The model version ID to configure inputs for
    make
        Whether or not to make the directory structure for the model version ID
    configure
        Configure the application for the IHME cluster, otherwise will use the
        test_dir for the directory tree instead.
    test_dir
        A test directory to use rather than the directory specified by the
        model version context in the IHME file system.
    json_file
        An optional filepath pointing to a different json than is attached to the
        model_version_id. Will use this instead for settings.
    """
    LOG.info(f"Configuring inputs for model version ID {model_version_id}.")

    context = Context(model_version_id=model_version_id,
                      make=make,
                      configure_application=configure,
                      root_directory=test_dir)
    if json_file:
        LOG.info(f"Reading settings from file: {json_file}")
        with open(json_file, 'r') as json_file:
            parameter_json = json.load(json_file)
    else:
        parameter_json = settings_json_from_model_version_id(
            model_version_id=model_version_id,
            conn_def=context.model_connection)

    settings = load_settings(settings_json=parameter_json)

    inputs = MeasurementInputsFromSettings(settings=settings)
    inputs.get_raw_inputs()
    inputs.configure_inputs_for_dismod(settings=settings, midpoint=midpoint)

    if not inputs.csmr.raw.empty:
        LOG.info("Uploading CSMR to t3 table.")
        inputs.csmr.attach_to_model_version_in_db(
            model_version_id=model_version_id,
            conn_def=context.model_connection)

    context.write_inputs(inputs=inputs, settings=parameter_json)
예제 #3
0
파일: run.py 프로젝트: ihmeuw/cascade-at
def run(model_version_id: int,
        jobmon: bool = True,
        make: bool = True,
        n_sim: int = 10,
        n_pool: int = 10,
        addl_workflow_args: Optional[str] = None,
        skip_configure: bool = False,
        json_file: Optional[str] = None,
        test_dir: Optional[str] = None,
        execute_dag: bool = True) -> None:
    """
    Runs the whole cascade or drill for a model version (whichever one is specified
    in the model version settings).

    Creates a cascade command and a bunch of cascade operations based
    on the model version settings. More information on this structure
    is in :ref:`executor`.

    Parameters
    ----------
    model_version_id
        The model version to run
    jobmon
        Whether or not to use Jobmon. If not using Jobmon, executes
        the commands in sequence in this session.
    make
        Whether or not to make the directory structure for the databases, inputs, and outputs.
    n_sim
        Number of simulations to do going down the cascade
    addl_workflow_args
        Additional workflow args to add to the jobmon workflow name
        so that it is unique if you're testing
    skip_configure
        Skip configuring the inputs because
    """
    LOG.info(f"Starting model for {model_version_id}.")

    context = Context(model_version_id=model_version_id,
                      make=make,
                      configure_application=not skip_configure,
                      root_directory=test_dir)
    context.update_status(status='Submitted')

    if json_file:
        with open(json_file) as fn:
            LOG.info(f"Reading settings from {json_file}")
            parameter_json = json.loads(fn.read())
        settings = load_settings(parameter_json)
        # Save the json file as it is used throughout the cascade
        LOG.info(f"Replacing {context.settings_file}")
        context.write_inputs(settings=parameter_json)
    else:
        settings = settings_from_model_version_id(
            model_version_id=model_version_id,
            conn_def=context.model_connection)
    dag = LocationDAG(location_set_version_id=settings.location_set_version_id,
                      gbd_round_id=settings.gbd_round_id)

    if settings.model.drill == 'drill':
        cascade_command = Drill(
            model_version_id=model_version_id,
            drill_parent_location_id=settings.model.drill_location_start,
            drill_sex=settings.model.drill_sex,
            n_sim=n_sim,
            n_pool=n_pool,
            skip_configure=skip_configure,
        )
    elif settings.model.drill == 'cascade':

        location_start = None
        sex = None

        if isinstance(settings.model.drill_location_start, int):
            location_start = settings.model.drill_location_start
        if isinstance(settings.model.drill_sex, int):
            sex = settings.model.drill_sex

        cascade_command = TraditionalCascade(
            model_version_id=model_version_id,
            split_sex=settings.model.split_sex == 'most_detailed',
            dag=dag,
            n_sim=n_sim,
            n_pool=n_pool,
            location_start=settings.model.drill_location_start,
            sex=sex,
            skip_configure=skip_configure,
        )
    else:
        raise NotImplementedError(
            f"The drill/cascade setting {settings.model.drill} is not implemented."
        )

    dag_cmds_path = (context.inputs_dir / 'dag_commands.txt')
    LOG.info(f"Writing cascade dag commands to {dag_cmds_path}.")
    dag_cmds_path.write_text('\n'.join(cascade_command.get_commands()))

    if not execute_dag: return

    if jobmon:
        LOG.info("Configuring jobmon.")
        wf = jobmon_workflow_from_cascade_command(
            cc=cascade_command,
            context=context,
            addl_workflow_args=addl_workflow_args)
        wf_run = wf.run(seconds_until_timeout=60 * 60 * 24 * 3, resume=True)
        if wf_run.status != 'D':
            context.update_status(status='Failed')
            raise RuntimeError("Jobmon workflow failed.")
    else:
        LOG.info("Running without jobmon.")
        for c in cascade_command.get_commands():
            LOG.info(f"Running {c}")
            process = subprocess.run(c,
                                     shell=True,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
            if process.returncode:
                context.update_status(status='Failed')
                raise RuntimeError(f"Command {c} failed with error"
                                   f"{process.stderr.decode()}")
        if process.stderr:
            print(process.stderr.decode())
        if process.stdout:
            print(process.stdout.decode())

    context.update_status(status='Complete')