Ejemplo n.º 1
0
def main():
    args = get_step_args()
    setup_logging(model_version_id=args.model_version_id,
                  step_id=STEP_IDS['OptimalPSI'])

    logger.info("Initiating Optimal Psi")
    t = OptimalPSI(
        model_version_id=args.model_version_id,
        db_connection=args.db_connection,
        debug_mode=args.debug_mode,
        old_covariates_mvid=args.old_covariates_mvid,
        cores=args.cores,
        cutoff=100
    )
    t.alerts.alert("Beginning the optimal psi process")
    t.database_wipe()
    t.get_best_psi()
    t.calculate_weights()
    t.make_temp_ensemble_preds()
    t.calculate_in_sample_rmse()
    t.calculate_in_sample_trend()
    t.determine_submodel_ids()
    t.delete_predictions()
    t.return_submodel_labels()
    t.submodel_pv()
    t.write_submodel_covariates()
    t.save_outputs()
    logger.info(f"Finished finding optimal psi: {t.pickled_outputs['model_pv']['best_psi']}")
    t.alerts.alert("Finished with optimal psi")
Ejemplo n.º 2
0
def main():
    args = get_step_args()
    setup_logging(model_version_id=args.model_version_id,
                  step_id=STEP_IDS['InputData'])

    logger.info("Initiating the knockout generation.")
    t = ModelTask(model_version_id=args.model_version_id,
                  db_connection=args.db_connection,
                  old_covariates_mvid=args.old_covariates_mvid,
                  debug_mode=args.debug_mode,
                  cores=args.cores,
                  step_id=STEP_IDS['InputData'],
                  make_inputs=False,
                  make_ko=False)
    logger.info("Finished with input creation.")
    t.alerts.alert("Done creating inputs.")

    logger.info("Updating job metadata parameters.")
    with open(t.model_paths.JOB_METADATA, 'r') as json_file:
        logger.info("Reading inputs json.")
        inputs_info = json.load(json_file)
        logger.info(f"{inputs_info}")

    inputs_info.update(inspect_all_inputs(t.model_metadata))
    with open(t.model_paths.JOB_METADATA, 'w') as outfile:
        logger.info("Writing inputs json.")
        logger.info(f"{inputs_info}")
        json.dump(inputs_info, outfile)
Ejemplo n.º 3
0
def main():
    args = get_step_args()
    setup_logging(model_version_id=args.model_version_id,
                  step_id=STEP_IDS['GPRDraws'])
    ne.set_num_threads(1)
    logger.info("Initiating GPR draws.")
    t = GPRDraws(model_version_id=args.model_version_id,
                 db_connection=args.db_connection,
                 debug_mode=args.debug_mode,
                 old_covariates_mvid=args.old_covariates_mvid,
                 cores=args.cores)
    t.alerts.alert("Creating GPR draws.")
    t.make_draws()
    t.save_outputs()
    t.alerts.alert("Done creating GPR draws.")
Ejemplo n.º 4
0
def main():
    args = get_step_args()
    setup_logging(model_version_id=args.model_version_id,
                  step_id=STEP_IDS['ApplySpacetimeSmoothing'])

    logger.info("Initiating spacetime smoothing.")
    t = ApplySpacetimeSmoothing(model_version_id=args.model_version_id,
                                db_connection=args.db_connection,
                                debug_mode=args.debug_mode,
                                old_covariates_mvid=args.old_covariates_mvid,
                                cores=args.cores)
    t.alerts.alert("Initiating spacetime smoothing")
    t.apply_spacetime_smoothing()
    t.save_outputs()
    t.alerts.alert("Done with spacetime smoothing")
Ejemplo n.º 5
0
def main():
    args = get_step_args()
    setup_logging(model_version_id=args.model_version_id,
                  step_id=STEP_IDS['ApplyGPSmoothing'])

    logger.info("Initiating GPR smoothing.")
    ne.set_num_threads(1)
    t = ApplyGPSmoothing(model_version_id=args.model_version_id,
                         db_connection=args.db_connection,
                         debug_mode=args.debug_mode,
                         old_covariates_mvid=args.old_covariates_mvid,
                         cores=args.cores)
    t.alerts.alert("Applying GP Smoothing")
    t.apply_gp_smoothing()
    t.save_outputs()
    t.alerts.alert("Done with GP Smoothing")
Ejemplo n.º 6
0
def main():
    args = get_step_args()
    setup_logging(model_version_id=args.model_version_id,
                  step_id=str(STEP_IDS['CovariateSelection']) +
                  f"_{args.outcome}")

    logger.info("Initiating Covariate Selection")
    t = CovariateSelection(model_version_id=args.model_version_id,
                           db_connection=args.db_connection,
                           debug_mode=args.debug_mode,
                           old_covariates_mvid=args.old_covariates_mvid,
                           cores=args.cores,
                           outcome=args.outcome)
    t.launch_covariate_selection()
    logger.info(f"Finished with Covariate Selection for {args.outcome}")
    t.alerts.alert(f"Finished with covariate selection for {args.outcome}")
Ejemplo n.º 7
0
def main():
    args = get_step_args()
    setup_logging(model_version_id=args.model_version_id,
                  step_id=STEP_IDS['LinearModelBuilds'])

    logger.info("Initiating linear model builds")
    t = LinearModelBuilds(model_version_id=args.model_version_id,
                          db_connection=args.db_connection,
                          debug_mode=args.debug_mode,
                          old_covariates_mvid=args.old_covariates_mvid,
                          cores=args.cores)
    t.launch_linear_model_builds()
    t.save_response_list()
    t.save_outputs()
    logger.info("Finished with Linear Model Builds")
    t.alerts.alert("Finished with linear model builds")
Ejemplo n.º 8
0
def main():
    args = get_step_args()
    setup_logging(model_version_id=args.model_version_id,
                  step_id=STEP_IDS['Diagnostics'])

    logger.info("Initiating model diagnostics.")
    t = Diagnostics(model_version_id=args.model_version_id,
                    db_connection=args.db_connection,
                    debug_mode=args.debug_mode,
                    old_covariates_mvid=args.old_covariates_mvid,
                    cores=args.cores)
    t.alerts.alert("Making diagnostics for modeler")
    t.get_table_diagnostics()
    t.save_tables()
    t.get_plot_diagnostics()
    t.alerts.alert("Done making diagnostics for modeler")
Ejemplo n.º 9
0
def main():
    args = get_step_args()
    setup_logging(model_version_id=args.model_version_id,
                  step_id=STEP_IDS['GenerateKnockouts'])

    logger.info("Initiating the knockout generation.")
    t = ModelTask(model_version_id=args.model_version_id,
                  db_connection=args.db_connection,
                  old_covariates_mvid=args.old_covariates_mvid,
                  debug_mode=args.debug_mode,
                  cores=args.cores,
                  step_id=STEP_IDS['GenerateKnockouts'],
                  make_inputs=False,
                  make_ko=True)
    t.alerts.alert("Done creating knockouts.")
    logger.info("Finished with knockout creation.")
Ejemplo n.º 10
0
def main():
    args = get_step_args()
    setup_logging(model_version_id=args.model_version_id,
                  step_id=STEP_IDS['WriteResults'])

    logger.info("Initiating writing results.")
    t = WriteResults(model_version_id=args.model_version_id,
                     db_connection=args.db_connection,
                     debug_mode=args.debug_mode,
                     old_covariates_mvid=args.old_covariates_mvid,
                     cores=args.cores)
    t.database_wipe()
    t.read_draws()
    t.aggregate_draws()
    t.get_full_envelope()
    t.write_submodel_means()
    t.write_model_mean()
    t.create_global_table()
Ejemplo n.º 11
0
def main():
    args = get_step_args()
    setup_logging(model_version_id=args.model_version_id,
                  step_id=STEP_IDS['Email'])

    logger.info("Initiating modeler email.")
    t = Email(
        model_version_id=args.model_version_id,
        db_connection=args.db_connection,
        debug_mode=args.debug_mode,
        old_covariates_mvid=args.old_covariates_mvid,
        cores=args.cores
    )
    t.alerts.alert("Creating the email to send to modelers")
    t.create_email_body()
    t.create_email_text()
    t.send_email()
    t.alerts.alert("Check your email! :)")
Ejemplo n.º 12
0
def main():
    args = get_step_args()
    setup_logging(model_version_id=args.model_version_id,
                  step_id=STEP_IDS['ReadSpacetimeModels'])

    logger.info("Initiating reading spacetime models.")
    t = ReadModels(model_version_id=args.model_version_id,
                   db_connection=args.db_connection,
                   debug_mode=args.debug_mode,
                   old_covariates_mvid=args.old_covariates_mvid,
                   cores=args.cores,
                   step_id=STEP_IDS['ReadSpacetimeModels'])
    t.alerts.alert("Reading spacetime models")
    t.read_models(input_json_path=t.model_paths.JSON_FILES['spacetime'],
                  output_object_name='st_models_linear')
    t.pickled_outputs['st_models_linear'].delete_mixed_model_parameters()
    t.save_outputs()
    t.alerts.alert("Done reading spacetime models")
Ejemplo n.º 13
0
def main():
    args = get_step_args()
    setup_logging(model_version_id=args.model_version_id,
                  step_id=STEP_IDS['LinearPV'])

    logger.info("Initiating Linear Model PV.")
    t = PV(model_version_id=args.model_version_id,
           db_connection=args.db_connection,
           debug_mode=args.debug_mode,
           old_covariates_mvid=args.old_covariates_mvid,
           cores=args.cores,
           step_id=STEP_IDS['LinearPV'])
    t.alerts.alert(
        "Calculating submodel predictive validity for linear models.")
    t.calculate_submodel_pv(input_object_name='linear_models_linear',
                            output_object_prefix='linear_models')
    t.save_outputs()
    t.alerts.alert(
        "Done calculating submodel predictive validity for linear models.")
Ejemplo n.º 14
0
def main():
    args = get_step_args()
    setup_logging(model_version_id=args.model_version_id,
                  step_id=STEP_IDS['EnsemblePredictions'])

    ne.set_num_threads(1)
    logger.info("Initiating calculate coverage.")
    t = EnsemblePredictions(
        model_version_id=args.model_version_id,
        db_connection=args.db_connection,
        debug_mode=args.debug_mode,
        old_covariates_mvid=args.old_covariates_mvid,
        cores=args.cores
    )
    t.alerts.alert("Ensemble-ing all the predictions!")
    t.calculate_oos_coverage()
    t.calculate_is_coverage()
    t.write_pv()
    t.add_draws_to_df()
    t.write_draws()
    t.save_outputs()
    t.alerts.alert("Done with ensemble-ing the predictions.")
Ejemplo n.º 15
0
def main():

    args = get_args()

    model_version_id = args.model_version_id
    db_connection = args.db_connection
    old_covariates_mvid = args.old_covariates_mvid
    debug_mode = args.debug_mode
    additional_resources = args.step_resources
    additional_resources = json.loads(additional_resources.replace('^', '"'))

    branch = log_branch(model_version_id, db_connection)
    log_directory = setup_logging(model_version_id, step_id=None)

    alerts = ModelerAlert(model_version_id=model_version_id,
                          db_connection=db_connection)
    alerts.alert(f"This model is running on branch {branch}")
    alerts.alert(f"You can find your logs here {log_directory}")

    wf_0 = CODEmWorkflow(name=f'cod_{model_version_id}_inputs',
                         description=f'cod_{model_version_id}_cov_ko',
                         resume=True,
                         reset_running_jobs=True)
    wf_1 = CODEmWorkflow(name=f'cod_{model_version_id}_inputs',
                         description=f'cod_{model_version_id}_cov_ko',
                         resume=True,
                         reset_running_jobs=True)
    wf_2 = CODEmWorkflow(name=f'cod_{model_version_id}_ensemble',
                         description=f'cod_{model_version_id}_ensemble',
                         resume=True,
                         reset_running_jobs=True)

    step_generator = StepTaskGenerator(
        model_version_id=model_version_id,
        db_connection=db_connection,
        old_covariates_mvid=old_covariates_mvid,
        debug_mode=debug_mode,
        additional_resources=additional_resources)

    model_parameters = get_model_parameters(model_version_id=model_version_id,
                                            db_connection=db_connection,
                                            update=True)

    paths = ModelPaths(model_version_id=model_version_id,
                       acause=model_parameters['acause'])

    old_model_version = ModelPaths(model_version_id=old_covariates_mvid,
                                   acause=model_parameters['acause'])

    logger.info(f"Saving the job parameters to {paths.JOB_METADATA}.")
    if os.path.isfile(paths.JOB_METADATA):
        with open(paths.JOB_METADATA, 'r') as json_file:
            logger.info("Reading inputs json.")
            inputs_info = json.load(json_file)
            inputs_info.update(inspect_parameters(parameters=model_parameters))
            logger.info(f"{inputs_info}")
    else:
        inputs_info = inspect_parameters(parameters=model_parameters)

    with open(paths.JOB_METADATA, 'w') as outfile:
        json.dump(inputs_info, outfile)

    input_data = step_generator.generate(step_id=STEP_IDS['InputData'],
                                         inputs_info=inputs_info)

    wf_0.add_task(input_data)
    try:
        logger.info("Running input data workflow.")
        exit_status = wf_0.run()
        if exit_status:
            logger.info(
                f"Input data workflow returned exit status {exit_status}")
            alerts.alert(
                "The input data workflow failed. Please submit a ticket!")
            change_model_status(model_version_id=model_version_id,
                                status=7,
                                db_connection=db_connection)
            sys.exit(exit_status)
        else:
            logger.info("Input data workflow finished successfully.")
            alerts.alert("Input data workflow finished successfully.")
    except WorkflowAlreadyComplete:
        logger.info("Workflow already complete for input data. Skipping.")
        pass

    logger.info("Reading job metadata.")
    with open(paths.JOB_METADATA, 'r') as json_file:
        inputs_info = json.load(json_file)

    generate_knockouts = step_generator.generate(
        step_id=STEP_IDS['GenerateKnockouts'],
        inputs_info=inputs_info,
        resource_scales={
            'm_mem_free': 0.5,
            'max_runtime_seconds': 0.1
        })
    wf_1.add_task(generate_knockouts)

    if old_covariates_mvid:
        logger.info(f'Using {old_covariates_mvid} for old covariates.')
        alerts.alert(f"Skipping: using {old_covariates_mvid}s "
                     f"covariates for this models covariates instead")
        subprocess.call('cp ' + old_model_version.COVARIATE_FILES['ln_rate'] +
                        ' ' + paths.COVARIATE_FILES['ln_rate'],
                        shell=True)
        subprocess.call('cp ' + old_model_version.COVARIATE_FILES['lt_cf'] +
                        ' ' + paths.COVARIATE_FILES['lt_cf'],
                        shell=True)
    else:
        logger.info("Need to run covariate selection.")
        covariate_selection_tasks = []
        for outcome in ['ln_rate', 'lt_cf']:
            covariate_selection_tasks.append(
                step_generator.generate(step_id=STEP_IDS['CovariateSelection'],
                                        inputs_info=inputs_info,
                                        additional_args={'outcome': outcome},
                                        resource_scales={
                                            'm_mem_free': 1,
                                            'max_runtime_seconds': 1
                                        }))
        wf_1.add_tasks(covariate_selection_tasks)

    try:
        logger.info("Running covariate selection + knockouts workflow.")
        exit_status = wf_1.run()
        if exit_status:
            logger.info(
                f"Covariate selection + knockouts returned exit status {exit_status}"
            )
            alerts.alert(
                "The covariate selection workflow failed. Please submit a ticket!"
            )
            change_model_status(model_version_id=model_version_id,
                                status=7,
                                db_connection=db_connection)
            sys.exit(exit_status)
        else:
            logger.info(
                "The covariate selection + knockouts workflow was successful.")
            alerts.alert(
                "The covariate selection + knockouts workflow was successful.")
    except WorkflowAlreadyComplete:
        logger.info(
            "Workflow already complete for covariate selection + knockouts. Skipping."
        )
        pass
    if (os.path.isfile(paths.COVARIATE_FILES_NO_SELECT['ln_rate'])) and \
            (os.path.isfile(paths.COVARIATE_FILES_NO_SELECT['lt_cf'])):
        logger.info(
            "No covariates selected for either ln_rate or lt_cf. Please"
            "try different covariates.")
        alerts.alert("No covariates were selected for this model. "
                     "Please relaunch the model with different covariates!")
        raise RuntimeError("There were no covariates selected at all.")

    logger.info("Updating job metadata parameters.")
    with open(paths.JOB_METADATA, 'r') as json_file:
        logger.info("Reading inputs after covariate selection json.")
        inputs_info = json.load(json_file)
        logger.info(f"{inputs_info}")
    inputs_info.update(inspect_submodels(paths))
    with open(paths.JOB_METADATA, 'w') as outfile:
        logger.info("Writing inputs after covariate selection json.")
        logger.info(f"{inputs_info}")
        json.dump(inputs_info, outfile)

    logger.info("Reading job metadata.")
    with open(paths.JOB_METADATA, 'r') as json_file:
        inputs_info = json.load(json_file)

    linear_model_builds = step_generator.generate(
        step_id=STEP_IDS['LinearModelBuilds'],
        inputs_info=inputs_info,
        resource_scales={
            'm_mem_free': 1,
            'max_runtime_seconds': 1
        })
    read_linear_models = step_generator.generate(
        step_id=STEP_IDS['ReadLinearModels'],
        inputs_info=inputs_info,
        upstream_tasks=[linear_model_builds],
        resource_scales={
            'm_mem_free': 0.3,
            'max_runtime_seconds': 0.25
        })
    linear_pv = step_generator.generate(step_id=STEP_IDS['LinearPV'],
                                        inputs_info=inputs_info,
                                        upstream_tasks=[read_linear_models],
                                        resource_scales={
                                            'm_mem_free': 0.3,
                                            'max_runtime_seconds': 0.25
                                        })
    read_spacetime_models = step_generator.generate(
        step_id=STEP_IDS['ReadSpacetimeModels'],
        inputs_info=inputs_info,
        upstream_tasks=[linear_model_builds],
        resource_scales={
            'm_mem_free': 0.3,
            'max_runtime_seconds': 0.25
        })
    apply_spacetime_smoothing = step_generator.generate(
        step_id=STEP_IDS['ApplySpacetimeSmoothing'],
        inputs_info=inputs_info,
        upstream_tasks=[read_spacetime_models],
        resource_scales={
            'm_mem_free': 1,
            'max_runtime_seconds': 0.5
        })
    apply_gp_smoothing = step_generator.generate(
        step_id=STEP_IDS['ApplyGPSmoothing'],
        inputs_info=inputs_info,
        upstream_tasks=[apply_spacetime_smoothing],
        resource_scales={
            'm_mem_free': 1,
            'max_runtime_seconds': 0.5
        })
    spacetime_pv = step_generator.generate(step_id=STEP_IDS['SpacetimePV'],
                                           inputs_info=inputs_info,
                                           upstream_tasks=[apply_gp_smoothing],
                                           resource_scales={
                                               'm_mem_free': 0.3,
                                               'max_runtime_seconds': 0.25
                                           })
    optimal_psi = step_generator.generate(
        step_id=STEP_IDS['OptimalPSI'],
        inputs_info=inputs_info,
        upstream_tasks=[linear_pv, spacetime_pv],
        resource_scales={
            'm_mem_free': 0.3,
            'max_runtime_seconds': 0.25
        })
    linear_draws = step_generator.generate(step_id=STEP_IDS['LinearDraws'],
                                           inputs_info=inputs_info,
                                           upstream_tasks=[optimal_psi],
                                           resource_scales={
                                               'm_mem_free': 0.3,
                                               'max_runtime_seconds': 0.5
                                           })
    gpr_draws = step_generator.generate(step_id=STEP_IDS['GPRDraws'],
                                        inputs_info=inputs_info,
                                        upstream_tasks=[optimal_psi],
                                        resource_scales={
                                            'm_mem_free': 0.3,
                                            'max_runtime_seconds': 0.5
                                        })
    ensemble_predictions = step_generator.generate(
        step_id=STEP_IDS['EnsemblePredictions'],
        inputs_info=inputs_info,
        upstream_tasks=[optimal_psi, linear_draws, gpr_draws],
        resource_scales={
            'm_mem_free': 0.3,
            'max_runtime_seconds': 0.25
        })
    write_results = step_generator.generate(
        step_id=STEP_IDS['WriteResults'],
        inputs_info=inputs_info,
        upstream_tasks=[ensemble_predictions],
        resource_scales={
            'm_mem_free': 0.3,
            'max_runtime_seconds': 0.25
        })
    diagnostics = step_generator.generate(
        step_id=STEP_IDS['Diagnostics'],
        inputs_info=inputs_info,
        upstream_tasks=[ensemble_predictions],
        resource_scales={
            'm_mem_free': 0.3,
            'max_runtime_seconds': 0.25
        })
    email = step_generator.generate(
        step_id=STEP_IDS['Email'],
        inputs_info=inputs_info,
        upstream_tasks=[write_results, diagnostics],
        resource_scales={
            'm_mem_free': 0.3,
            'max_runtime_seconds': 0.25
        })

    wf_2.add_tasks([
        linear_model_builds, read_linear_models, linear_pv, linear_draws,
        read_spacetime_models, apply_spacetime_smoothing, apply_gp_smoothing,
        spacetime_pv, gpr_draws, optimal_psi, ensemble_predictions,
        write_results, diagnostics, email
    ])
    alerts.alert("Initiating the ensemble workflow.")
    logger.info("Running the rest of the ensemble workflow.")
    exit_status = wf_2.run()

    if exit_status:
        logger.info(
            f"The ensemble workflow failed, returning exit status {exit_status}"
        )
        alerts.alert("The ensemble workflow failed. Please submit a ticket!")
        change_model_status(model_version_id=model_version_id,
                            status=7,
                            db_connection=db_connection)
        sys.exit(exit_status)
    else:
        logger.info("The ensemble workflow successfully completed.")
        alerts.alert("The model has successful completed!!")
        logger.info(
            f"Changing model version {model_version_id} to complete in the database."
        )
        change_model_status(model_version_id=model_version_id,
                            status=1,
                            db_connection=db_connection)
        logger.info("Cleaning up files...")
        cleanup_files(model_version_id=model_version_id,
                      acause=model_parameters['acause'])
        logger.info("Finished.")