コード例 #1
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def all_submodel_covs(model_version_id, db_connection):
    """
    :param model_version_id: integer representing a codem model version id
    :return: Pandas Data frame with submodels, covariates, and other info
    """
    submodels = get_submodels(model_version_id, db_connection)
    covs = pd.concat([
        submodel_covs(x, db_connection) for x in submodels.submodel_version_id
    ],
                     axis=0).reset_index(drop=True)
    df = covs.merge(submodels, how="left")
    df = df.sort_values(["rank", "covariate_name_short"])
    call = '''
    SELECT submodel_type_id, submodel_type_name FROM cod.submodel_type;
    '''
    df2 = db_connect.query(call, db_connection)
    df = df.merge(df2, how="left")
    call = '''
    SELECT submodel_dep_id, submodel_dep_name FROM cod.submodel_dep;
    '''
    df2 = db_connect.query(call, db_connection)
    df = df.merge(df2, how="left")
    df.drop(["submodel_type_id", "submodel_dep_id"], inplace=True, axis=1)
    df = df.sort_values(["rank", "covariate_name_short"])
    df["approximate_draws"] = np.round(df.weight.values * 1000.)
    return df
コード例 #2
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def submodel_covs(submodel_version_id, db_connection):
    """
    :param submodel_version_id: integer representing a codem submodel version id
    :return: Pandas data frame with information on submodel covariates

    Given a submodel version id returns the covariates that were used in the
    construction of that model.

    Note: this function was re-worked with the database migrations for covariates
    so that it can pull covariate names via the covariate database but
    submodel version information from the cod database.
    """
    call = '''
        SELECT covariate_model_version_id AS ids
        FROM cod.submodel_version_covariate
        WHERE submodel_version_id = {}
        '''.format(submodel_version_id)
    covs = db_connect.query(call, db_connection).ids.values.tolist()
    df = db_connect.query(
        covNameQueryStr.format(", ".join(str(x) for x in covs)),
        'modeling-covariates-db')
    df.rename(columns={'stub': 'covariate_name_short'}, inplace=True)
    df.drop('covariate_model_id', axis=1, inplace=True)
    df['submodel_version_id'] = submodel_version_id
    return df
    return df
コード例 #3
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def getModelParams(model_version_id, db_connection, update=False):
    '''
    integer -> dictionary

    Given an integer that indicates a valid model version id  the function will
    return a dictionary with keys indicating the model parameters start age,
    end age, sex, start year, cause, and whether to run covariate selection or
    not. "update" indicates whether during the querying process the database
    should be updated to running during the querying process, default is False.
    True should be used when running CODEm.
    '''
    call = "SELECT * FROM cod.model_version WHERE model_version_id = {0}"
    df = db_connect.query(call.format(model_version_id), db_connection)
    model = {k: df[k][0] for k in df.columns}
    model["start_year"] = 1980
    call = "SELECT acause FROM shared.cause WHERE cause_id = {0}"
    aC = db_connect.query(call.format(model["cause_id"]),
                          db_connection)["acause"][0]
    model["acause"] = aC
    call = "SELECT gbd_round FROM shared.gbd_round WHERE gbd_round_id = {0}"
    gbd_round = int(
        db_connect.query(call.format(model["gbd_round_id"]),
                         db_connection)["gbd_round"][0])
    model["gbd_round"] = gbd_round
    call = """UPDATE cod.model_version SET status = 0
              WHERE model_version_id = {0}"""
    if update:
        db_connect.query(call.format(model_version_id), db_connection)
    return model
コード例 #4
0
    def time_stamp(self, procedure):
        '''
        (self, str) -> None

        Write to the database when a particular part of the procedure has
        started so that it can be read by the CodViz tool. This enables users
        to see what stage a model is currently at (i.e. covariate selection,
        linear model building, space time smoothing, gaussian process, etc.).
        '''
        phrase = "%s started." % procedure
        call = QS.status_write.format(self.model_version_id, phrase)
        db_connect.query(call, self.db_connection)
        self.add_warning(phrase)
コード例 #5
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def write_submodel(model_version_id, submodel_type_id, submodel_dep_id, weight,
                   rank, db_connection):
    '''
    (int, int, int, float, int) -> int

    Write a submodel to the table and get the id back
    '''
    call = QS.submodel_query_str.format(model_version_id, submodel_type_id,
                                        submodel_dep_id, weight, rank)
    db_connect.query(call, db_connection)
    call = QS.submodel_get_id.format(model_version_id, rank)
    submodel_id_df = db_connect.query(call, db_connection)
    submodel_id = submodel_id_df["submodel_version_id"][0]
    return submodel_id
コード例 #6
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def codQuery(cause_id, sex, start_year, start_age, end_age,
             location_set_version_id, db_connection, model_version_id,
             gbd_round_id, outlier_save):
    '''
    strings indicating model parameters -> Pandas Data Frame

    Given a list of model parameters will query from the COD database and
    return a pandas data frame. The data frame contains the base variables
    used in the CODEm process.

    Also will call the outlier stored procedure in the database to save model outliers if
    outlier_save is marked as True.
    '''
    call = QS.codQueryStr.format(c=cause_id,
                                 s=sex,
                                 sy=start_year,
                                 sa=start_age,
                                 ea=end_age,
                                 loc_set_id=location_set_version_id)
    df = db_connect.query(call, db_connection)

    if outlier_save == True:
        outlierSPROC(model_version_id, gbd_round_id, db_connection)

    df['national'] = df['national'].map(lambda x: x == 1).astype(int)
    return df
コード例 #7
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def get_submodel_summary(model_version_id, db_connection):
    '''
    (int) -> data_frame

    Retrieves the summary submodel rank table for a particular model.
    '''
    call = QS.submodel_summary_query.format(model_version_id)
    df = db_connect.query(call, db_connection)
    return df
コード例 #8
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def covMetaData(model_version_id, db_connection):
    '''
    integer -> Pandas data frame

    Given an integer that represents a valid model ID number, will
    return a pandas data frame which contains the covariate model ID's
    for that model as well as the metadata needed for covariate selection.
    '''
    call = QS.metaQueryStr.format(model_version_id)
    df = db_connect.query(call, db_connection)
    return df
コード例 #9
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def get_submodels(model_version_id, db_connection):
    """
    :param model_version_id: integer representing a codem model version id
    :return: Pandas Data frame with submodels and corresponding information
    """
    call = '''
    SELECT submodel_version_id, rank, weight, submodel_type_id, submodel_dep_id
    FROM cod.submodel_version
    WHERE model_version_id = {model_version_id}
    '''.format(model_version_id=model_version_id)
    df = db_connect.query(call, db_connection)
    return df
コード例 #10
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def covMetaData(model_version_id, db_connection):
    '''
    integer -> Pandas data frame

    Given an integer that represents a valid model ID number, will
    return a pandas data frame which contains the covariate model ID's
    for that model as well as the metadata needed for covariate selection.

    Note: this was re-worked to call the covariate prior information from the cod database (either dev or prod)
    and then covariate names from modeling-covariate-db because the model versions must be merged with many tables
    only available in the covariate database.
    '''
    df = db_connect.query(QS.metaQueryStr.format(mvid=model_version_id),
                          db_connection)
    models = df.covariate_model_id.values.tolist()
    names = db_connect.query(
        QS.covNameQueryStr.format(", ".join(str(x) for x in models)),
        'DATABASE')

    df = df.merge(names, on='covariate_model_id')
    return df
コード例 #11
0
ファイル: input.py プロジェクト: zhouxm4/ihme-modeling
    def get_modeler(self):
        '''
        (self) -> string

        Look up the modeler for the cause being modeled in the cod.modeler table
        '''
        call = """ SELECT m.username
            FROM cod.modeler m
                INNER JOIN shared.cause c
                ON m.cause_id = c.cause_id
            WHERE c.acause = '{acause}' AND m.gbd_round_id = {gbd}
            """.format(acause=self.acause, gbd=self.gbd_round_id)
        modeler = db_connect.query(call, self.db_connection)
        return modeler.ix[0, 0].split(', ')
コード例 #12
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def codQuery(cause_id, sex, start_year, start_age, end_age,
             location_set_version_id, db_connection):
    '''
    Strings indicating model parameters -> Pandas Data Frame

    Given a list of model parameters will query from the COD database and
    return a pandas data frame. The data frame contains the base variables
    used in the CODEm process.
    '''
    call = QS.codQueryStr.format(c=cause_id, s=sex, sy=start_year, sa=start_age,
                                 ea=end_age, loc_set_id=location_set_version_id)
    df = db_connect.query(call, db_connection)
    df['national'] = df['national'].map(lambda x: x == 1).astype(int)
    return df
コード例 #13
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def covQuery(covID, location_set_version_id):
    '''
    integer -> Pandas data frame

    Given an integer which represents a valid covariate ID will return a data
    frame which contains a unique value for each country, year, age group.
    This data may be aggregated in some form as well.
    '''
    call = QS.cvQueryStr.format(mvid=covID, loc_set_id=location_set_version_id)
    df = db_connect.query(call, 'DATABASE')
    if df.shape[0] == 0:
        err_log = "There appears to be an error with covariate id {0}"
        sys.stderr.write(err_log.format(covID))
        sys.exit()
    df = df.rename(columns={"mean_value": df["name"][0]})
    return df
コード例 #14
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def locQuery(location_set_version_id, db_connection):
    '''
    list -> Pandas Data Frame

    Given a list of country ID numbers will query from the mortality database
    and return a pandas data frame. The data frame contains columns for
    location, super region and region ID.
    '''
    call = QS.locQueryStr.format(loc_set_ver_id=location_set_version_id)
    df = db_connect.query(call, db_connection)
    df["path_to_top_parent"] = \
        df["path_to_top_parent"].map(lambda x: ",".join((x[2:]).split(",")[:3]))
    arr = np.array(list(df.path_to_top_parent.map(lambda x: x.split(","))))
    df2 = pd.DataFrame(arr.astype(int),
                       columns=["super_region", "region", "country_id"])
    return pd.concat([df["location_id"], df2], axis=1)
コード例 #15
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def createAgeDF(db_connection):
    '''
    None -> Pandas data frame

    Creates a Pandas data frame with two columns, all the age groups currently
    used in analysis at IHME as noted by the data base as well as a column with
    the code used for the aggregate group.
    '''
    # this age_group_set_id is currently specific to gbd 2016
    call = """
        SELECT age_group_id as all_ages
        FROM shared.age_group_set_list
        WHERE age_group_set_id = 12 AND is_estimate = 1;
    """
    ageDF = db_connect.query(call, db_connection)
    ageDF['age'] = 22
    return ageDF
コード例 #16
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def acause_from_id(model_version_id, db_connection):
    """
    Given a valid model version id returns the acause associated with it.

    :param model_version_id: int
        valid model version id
    :return: str
        string representing an acause
    """
    call = '''
    SELECT
    acause
    FROM
    shared.cause
    WHERE
    cause_id = (SELECT cause_id
                FROM cod.model_version
                WHERE model_version_id = {})
    '''.format(model_version_id)
    acause = db_connect.query(call, db_connection)["acause"][0]
    return acause
コード例 #17
0
ファイル: Ensemble.py プロジェクト: zhouxm4/ihme-modeling
    def submodel_rank(self):
        """
        Pandas data frame

        Returns a dataframe of the submodels for the model version id,
        ranked by weight.
        """
        call = '''SELECT
            submodel_version_id,
            submodel_type_id,
            submodel_dep_id,
            weight,
            rank
        FROM
            cod.submodel_version
        WHERE
            model_version_id = {}
        '''
        call = call.format(self.model_version_id)
        df = db_connect.query(call, self.db_connection)
        return df.sort("rank")
コード例 #18
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def submodel_covs(submodel_version_id, db_connection):
    """
    :param submodel_version_id: integer representing a codem submodel version id
    :return: Pandas data frame with information on submodel covariates

    Given a submodel version id returns the covariates that were used in the
    construction of that model.
    """
    call = '''
    SELECT covariate_name_short FROM shared.covariate
    WHERE covariate_id IN (
        SELECT covariate_id FROM covariate.data_version
        WHERE data_version_id IN (
                SELECT data_version_id FROM covariate.model_version
                WHERE model_version_id IN (
                    SELECT covariate_model_version_id
                    FROM cod.submodel_version_covariate
                    WHERE submodel_version_id={submodel_version_id})))
    '''.format(submodel_version_id=submodel_version_id)
    df = db_connect.query(call, db_connection)
    df["submodel_version_id"] = submodel_version_id
    return df
コード例 #19
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def get_codem_run_time(model_version_id, db_connection):
    call = QS.codem_run_time.format(model_version_id=model_version_id)
    df = db_connect.query(call, db_connection)
    return float(df["time"][0])
コード例 #20
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def write_submodel_covariate(submodel_id, list_of_covariate_ids,
                             db_connection):
    for cov in list_of_covariate_ids:
        call = QS.submodel_cov_write_str.format(submodel_id, cov)
        db_connect.query(call, db_connection)
コード例 #21
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def write_model_pv(tag, value, model_version_id, db_connection):
    call = QS.pv_write.format(tag, value, model_version_id)
    db_connect.query(call, db_connection)
コード例 #22
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def model_version_type_id(model_version_id, db_connection):
    call = QS.model_version_type_id.format(mvid=model_version_id)
    m_type_id = db_connect.query(call,
                                 db_connection)["model_version_type_id"][0]
    return m_type_id
コード例 #23
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def model_date(model_version_id, db_connection):
    call = QS.model_date.format(mvid=model_version_id)
    date = db_connect.query(call, db_connection)["model_date"][0]
    return date
コード例 #24
0
ファイル: query.py プロジェクト: zhouxm4/ihme-modeling
def cause_name_from_id(cause_id, db_connection):
    call = QS.cause_name.format(cause_id=cause_id)
    cause_name = db_connect.query(call, db_connection)["cause_name"][0]
    return cause_name