Python query Examples, db_connect.query Python Examples

Example #1

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def all_submodel_covs(model_version_id, db_connection):
    """
    :param model_version_id: integer representing a codem model version id
    :return: Pandas Data frame with submodels, covariates, and other info
    """
    submodels = get_submodels(model_version_id, db_connection)
    covs = pd.concat([
        submodel_covs(x, db_connection) for x in submodels.submodel_version_id
    ],
                     axis=0).reset_index(drop=True)
    df = covs.merge(submodels, how="left")
    df = df.sort_values(["rank", "covariate_name_short"])
    call = '''
    SELECT submodel_type_id, submodel_type_name FROM cod.submodel_type;
    '''
    df2 = db_connect.query(call, db_connection)
    df = df.merge(df2, how="left")
    call = '''
    SELECT submodel_dep_id, submodel_dep_name FROM cod.submodel_dep;
    '''
    df2 = db_connect.query(call, db_connection)
    df = df.merge(df2, how="left")
    df.drop(["submodel_type_id", "submodel_dep_id"], inplace=True, axis=1)
    df = df.sort_values(["rank", "covariate_name_short"])
    df["approximate_draws"] = np.round(df.weight.values * 1000.)
    return df

Example #2

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def submodel_covs(submodel_version_id, db_connection):
    """
    :param submodel_version_id: integer representing a codem submodel version id
    :return: Pandas data frame with information on submodel covariates

    Given a submodel version id returns the covariates that were used in the
    construction of that model.

    Note: this function was re-worked with the database migrations for covariates
    so that it can pull covariate names via the covariate database but
    submodel version information from the cod database.
    """
    call = '''
        SELECT covariate_model_version_id AS ids
        FROM cod.submodel_version_covariate
        WHERE submodel_version_id = {}
        '''.format(submodel_version_id)
    covs = db_connect.query(call, db_connection).ids.values.tolist()
    df = db_connect.query(
        covNameQueryStr.format(", ".join(str(x) for x in covs)),
        'modeling-covariates-db')
    df.rename(columns={'stub': 'covariate_name_short'}, inplace=True)
    df.drop('covariate_model_id', axis=1, inplace=True)
    df['submodel_version_id'] = submodel_version_id
    return df
    return df

Example #3

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def getModelParams(model_version_id, db_connection, update=False):
    '''
    integer -> dictionary

    Given an integer that indicates a valid model version id  the function will
    return a dictionary with keys indicating the model parameters start age,
    end age, sex, start year, cause, and whether to run covariate selection or
    not. "update" indicates whether during the querying process the database
    should be updated to running during the querying process, default is False.
    True should be used when running CODEm.
    '''
    call = "SELECT * FROM cod.model_version WHERE model_version_id = {0}"
    df = db_connect.query(call.format(model_version_id), db_connection)
    model = {k: df[k][0] for k in df.columns}
    model["start_year"] = 1980
    call = "SELECT acause FROM shared.cause WHERE cause_id = {0}"
    aC = db_connect.query(call.format(model["cause_id"]),
                          db_connection)["acause"][0]
    model["acause"] = aC
    call = "SELECT gbd_round FROM shared.gbd_round WHERE gbd_round_id = {0}"
    gbd_round = int(
        db_connect.query(call.format(model["gbd_round_id"]),
                         db_connection)["gbd_round"][0])
    model["gbd_round"] = gbd_round
    call = """UPDATE cod.model_version SET status = 0
              WHERE model_version_id = {0}"""
    if update:
        db_connect.query(call.format(model_version_id), db_connection)
    return model

Example #4

0

Show file

    def time_stamp(self, procedure):
        '''
        (self, str) -> None

        Write to the database when a particular part of the procedure has
        started so that it can be read by the CodViz tool. This enables users
        to see what stage a model is currently at (i.e. covariate selection,
        linear model building, space time smoothing, gaussian process, etc.).
        '''
        phrase = "%s started." % procedure
        call = QS.status_write.format(self.model_version_id, phrase)
        db_connect.query(call, self.db_connection)
        self.add_warning(phrase)

Example #5

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def write_submodel(model_version_id, submodel_type_id, submodel_dep_id, weight,
                   rank, db_connection):
    '''
    (int, int, int, float, int) -> int

    Write a submodel to the table and get the id back
    '''
    call = QS.submodel_query_str.format(model_version_id, submodel_type_id,
                                        submodel_dep_id, weight, rank)
    db_connect.query(call, db_connection)
    call = QS.submodel_get_id.format(model_version_id, rank)
    submodel_id_df = db_connect.query(call, db_connection)
    submodel_id = submodel_id_df["submodel_version_id"][0]
    return submodel_id

Example #6

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def codQuery(cause_id, sex, start_year, start_age, end_age,
             location_set_version_id, db_connection, model_version_id,
             gbd_round_id, outlier_save):
    '''
    strings indicating model parameters -> Pandas Data Frame

    Given a list of model parameters will query from the COD database and
    return a pandas data frame. The data frame contains the base variables
    used in the CODEm process.

    Also will call the outlier stored procedure in the database to save model outliers if
    outlier_save is marked as True.
    '''
    call = QS.codQueryStr.format(c=cause_id,
                                 s=sex,
                                 sy=start_year,
                                 sa=start_age,
                                 ea=end_age,
                                 loc_set_id=location_set_version_id)
    df = db_connect.query(call, db_connection)

    if outlier_save == True:
        outlierSPROC(model_version_id, gbd_round_id, db_connection)

    df['national'] = df['national'].map(lambda x: x == 1).astype(int)
    return df

Example #7

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def get_submodel_summary(model_version_id, db_connection):
    '''
    (int) -> data_frame

    Retrieves the summary submodel rank table for a particular model.
    '''
    call = QS.submodel_summary_query.format(model_version_id)
    df = db_connect.query(call, db_connection)
    return df

Example #8

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def covMetaData(model_version_id, db_connection):
    '''
    integer -> Pandas data frame

    Given an integer that represents a valid model ID number, will
    return a pandas data frame which contains the covariate model ID's
    for that model as well as the metadata needed for covariate selection.
    '''
    call = QS.metaQueryStr.format(model_version_id)
    df = db_connect.query(call, db_connection)
    return df

Example #9

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def get_submodels(model_version_id, db_connection):
    """
    :param model_version_id: integer representing a codem model version id
    :return: Pandas Data frame with submodels and corresponding information
    """
    call = '''
    SELECT submodel_version_id, rank, weight, submodel_type_id, submodel_dep_id
    FROM cod.submodel_version
    WHERE model_version_id = {model_version_id}
    '''.format(model_version_id=model_version_id)
    df = db_connect.query(call, db_connection)
    return df

Example #10

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def covMetaData(model_version_id, db_connection):
    '''
    integer -> Pandas data frame

    Given an integer that represents a valid model ID number, will
    return a pandas data frame which contains the covariate model ID's
    for that model as well as the metadata needed for covariate selection.

    Note: this was re-worked to call the covariate prior information from the cod database (either dev or prod)
    and then covariate names from modeling-covariate-db because the model versions must be merged with many tables
    only available in the covariate database.
    '''
    df = db_connect.query(QS.metaQueryStr.format(mvid=model_version_id),
                          db_connection)
    models = df.covariate_model_id.values.tolist()
    names = db_connect.query(
        QS.covNameQueryStr.format(", ".join(str(x) for x in models)),
        'DATABASE')

    df = df.merge(names, on='covariate_model_id')
    return df

Example #11

0

Show file

File: input.py Project: zhouxm4/ihme-modeling

    def get_modeler(self):
        '''
        (self) -> string

        Look up the modeler for the cause being modeled in the cod.modeler table
        '''
        call = """ SELECT m.username
            FROM cod.modeler m
                INNER JOIN shared.cause c
                ON m.cause_id = c.cause_id
            WHERE c.acause = '{acause}' AND m.gbd_round_id = {gbd}
            """.format(acause=self.acause, gbd=self.gbd_round_id)
        modeler = db_connect.query(call, self.db_connection)
        return modeler.ix[0, 0].split(', ')

Example #12

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def codQuery(cause_id, sex, start_year, start_age, end_age,
             location_set_version_id, db_connection):
    '''
    Strings indicating model parameters -> Pandas Data Frame

    Given a list of model parameters will query from the COD database and
    return a pandas data frame. The data frame contains the base variables
    used in the CODEm process.
    '''
    call = QS.codQueryStr.format(c=cause_id, s=sex, sy=start_year, sa=start_age,
                                 ea=end_age, loc_set_id=location_set_version_id)
    df = db_connect.query(call, db_connection)
    df['national'] = df['national'].map(lambda x: x == 1).astype(int)
    return df

Example #13

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def covQuery(covID, location_set_version_id):
    '''
    integer -> Pandas data frame

    Given an integer which represents a valid covariate ID will return a data
    frame which contains a unique value for each country, year, age group.
    This data may be aggregated in some form as well.
    '''
    call = QS.cvQueryStr.format(mvid=covID, loc_set_id=location_set_version_id)
    df = db_connect.query(call, 'DATABASE')
    if df.shape[0] == 0:
        err_log = "There appears to be an error with covariate id {0}"
        sys.stderr.write(err_log.format(covID))
        sys.exit()
    df = df.rename(columns={"mean_value": df["name"][0]})
    return df

Example #14

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def locQuery(location_set_version_id, db_connection):
    '''
    list -> Pandas Data Frame

    Given a list of country ID numbers will query from the mortality database
    and return a pandas data frame. The data frame contains columns for
    location, super region and region ID.
    '''
    call = QS.locQueryStr.format(loc_set_ver_id=location_set_version_id)
    df = db_connect.query(call, db_connection)
    df["path_to_top_parent"] = \
        df["path_to_top_parent"].map(lambda x: ",".join((x[2:]).split(",")[:3]))
    arr = np.array(list(df.path_to_top_parent.map(lambda x: x.split(","))))
    df2 = pd.DataFrame(arr.astype(int),
                       columns=["super_region", "region", "country_id"])
    return pd.concat([df["location_id"], df2], axis=1)

Example #15

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def createAgeDF(db_connection):
    '''
    None -> Pandas data frame

    Creates a Pandas data frame with two columns, all the age groups currently
    used in analysis at IHME as noted by the data base as well as a column with
    the code used for the aggregate group.
    '''
    # this age_group_set_id is currently specific to gbd 2016
    call = """
        SELECT age_group_id as all_ages
        FROM shared.age_group_set_list
        WHERE age_group_set_id = 12 AND is_estimate = 1;
    """
    ageDF = db_connect.query(call, db_connection)
    ageDF['age'] = 22
    return ageDF

Example #16

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def acause_from_id(model_version_id, db_connection):
    """
    Given a valid model version id returns the acause associated with it.

    :param model_version_id: int
        valid model version id
    :return: str
        string representing an acause
    """
    call = '''
    SELECT
    acause
    FROM
    shared.cause
    WHERE
    cause_id = (SELECT cause_id
                FROM cod.model_version
                WHERE model_version_id = {})
    '''.format(model_version_id)
    acause = db_connect.query(call, db_connection)["acause"][0]
    return acause

Example #17

0

Show file

File: Ensemble.py Project: zhouxm4/ihme-modeling

    def submodel_rank(self):
        """
        Pandas data frame

        Returns a dataframe of the submodels for the model version id,
        ranked by weight.
        """
        call = '''SELECT
            submodel_version_id,
            submodel_type_id,
            submodel_dep_id,
            weight,
            rank
        FROM
            cod.submodel_version
        WHERE
            model_version_id = {}
        '''
        call = call.format(self.model_version_id)
        df = db_connect.query(call, self.db_connection)
        return df.sort("rank")

Example #18

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def submodel_covs(submodel_version_id, db_connection):
    """
    :param submodel_version_id: integer representing a codem submodel version id
    :return: Pandas data frame with information on submodel covariates

    Given a submodel version id returns the covariates that were used in the
    construction of that model.
    """
    call = '''
    SELECT covariate_name_short FROM shared.covariate
    WHERE covariate_id IN (
        SELECT covariate_id FROM covariate.data_version
        WHERE data_version_id IN (
                SELECT data_version_id FROM covariate.model_version
                WHERE model_version_id IN (
                    SELECT covariate_model_version_id
                    FROM cod.submodel_version_covariate
                    WHERE submodel_version_id={submodel_version_id})))
    '''.format(submodel_version_id=submodel_version_id)
    df = db_connect.query(call, db_connection)
    df["submodel_version_id"] = submodel_version_id
    return df

Example #19

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def get_codem_run_time(model_version_id, db_connection):
    call = QS.codem_run_time.format(model_version_id=model_version_id)
    df = db_connect.query(call, db_connection)
    return float(df["time"][0])

Example #20

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def write_submodel_covariate(submodel_id, list_of_covariate_ids,
                             db_connection):
    for cov in list_of_covariate_ids:
        call = QS.submodel_cov_write_str.format(submodel_id, cov)
        db_connect.query(call, db_connection)

Example #21

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def write_model_pv(tag, value, model_version_id, db_connection):
    call = QS.pv_write.format(tag, value, model_version_id)
    db_connect.query(call, db_connection)

Example #22

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def model_version_type_id(model_version_id, db_connection):
    call = QS.model_version_type_id.format(mvid=model_version_id)
    m_type_id = db_connect.query(call,
                                 db_connection)["model_version_type_id"][0]
    return m_type_id

Example #23

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def model_date(model_version_id, db_connection):
    call = QS.model_date.format(mvid=model_version_id)
    date = db_connect.query(call, db_connection)["model_date"][0]
    return date

Example #24

0

Show file

File: query.py Project: zhouxm4/ihme-modeling

def cause_name_from_id(cause_id, db_connection):
    call = QS.cause_name.format(cause_id=cause_id)
    cause_name = db_connect.query(call, db_connection)["cause_name"][0]
    return cause_name