Ejemplo n.º 1
0
def alter_col_types():
    """
    Alter column types of the table to better suit the data.

    For example, convert the character-represented-dates to type DATE.
    """
    con, cur = cursor_connect(db_dsn)
    try:
        # Get column names so you can index the 2th and 3th columns
        sql = "SELECT * FROM {0} LIMIT 0;".format(TABLE_NAME)
        cur.execute(sql)
        colnames = [desc[0] for desc in cur.description]
        cols = (colnames[1], colnames[2])  # DO-Birth and DO-Death
        for col in cols:
            sql = """
            ALTER TABLE {0} ALTER COLUMN {1} TYPE DATE
            USING to_date({1}, 'YYYYMMDD');
            """.format(TABLE_NAME, col)
            cur.execute(sql)
    except psycopg2.Error:
        raise
    else:
        con.commit()
        cur.close()
        con.close()
Ejemplo n.º 2
0
def disease_frequency(col):
    """
    Get the states in descending order of the percentage of disease claims,
    where disease corresponds to the column name.

    Parameters
    ----------
    col : str, unicode
        A column name.

    Returns
    -------
    json
        A labeled JSON object with the state and percent disease claims out
        of all of that state's claims.

    Examples
    --------
    /api/v1/freq/depression
    /api/v1/freq/diabetes
    """
    disease = []
    accepted_cols = (
        "end_stage_renal_disease",
        "alzheimers_related_senile",
        "heart_failure",
        "chronic_kidney",
        "cancer",
        "chronic_obstructive_pulmonary",
        "depression",
        "diabetes",
        "ischemic_heart",
        "osteoporosis",
        "rheumatoid_osteo_arthritis",
        "stroke_ischemic_attack",
    )
    # Strip the user input to alpha characters only
    cleaned_col = re.sub('\W+', '', col)
    try:
        if cleaned_col not in accepted_cols:
            return json_error(403,
                              "column '{0}' is not allowed".format(cleaned_col))
        con, cur = cursor_connect(db_dsn, psycopg2.extras.DictCursor)
        query = """
        SELECT state, {1}/claims::float AS frequency FROM (SELECT
        LHS.state AS state, {1}, claims FROM (SELECT state, count(*) AS
        claims FROM {0} GROUP BY state order by claims desc)
        AS LHS LEFT JOIN (SELECT state, count(*) AS {1} FROM
        {0} WHERE {1}='true' GROUP BY state) AS RHS
        ON LHS.state=RHS.state) AS outer_q
        ORDER by frequency DESC;""".format(TABLE_NAME, cleaned_col)
        cur.execute(query)
        result = cur.fetchall()
        for row in result:
            freq = {row['state']: row['frequency']}
            disease.append(freq)
    except Exception as e:
        return jsonify({'error': e.message})
    return jsonify(state_depression=disease)
Ejemplo n.º 3
0
def index():
    """
    Main page with no JSON API, just a short message about number of rows
    available.

    Returns
    -------
    str
        A short message saying hello and then displaying the number of rows
        available to query.
    """
    num_rows = 0  # Default value
    try:
        con, cur = cursor_connect(db_dsn)
        sql = "SELECT COUNT(*) FROM {0}".format(TABLE_NAME)
        cur.execute(sql)
        result = cur.fetchone()
        num_rows = int(result[0])
    except (psycopg2.Error, ValueError) as e:
        num_rows = 0
    finally:
        html = """
        <html>
        <body>
        <div>
            <p>Hello World! I can access {0:,d} rows of data!</p>
            <p>The data is from the 2010 Medicare synthetic claims summary.</p>
            <p>Number of claims by sex:
                <a href="/api/v1/count/sex">/api/v1/count/sex</a>
            </p>
            <p>Number of cancer claims:
                <a href="/api/v1/count/cancer">/api/v1/count/cancer</a>
            </p>
            <p>Average inpatient reimbursement amount:
                <a href="/api/v1/average/inpatient_reimbursement">
                    /api/v1/average/inpatient_reimbursement</a>
            </p>
            <p>Average outpatient reimbursement amount:
                <a href="/api/v1/average/outpatient_reimbursement">
                    /api/v1/average/outpatient_reimbursement</a>
            </p>
            <p>Average beneficiary responsibility:
                <a href="/api/v1/average/beneficiary_responsibility">
                    /api/v1/average/beneficiary_responsibility</a>
            </p>
            <p>Get frequency of depression claims by state:
                <a href="/api/v1/freq/depression">
                    /api/v1/freq/depression</a>
            </p>
            <p>Get frequency of cancer claims by state:
                <a href="/api/v1/freq/cancer">
                    /api/v1/freq/cancer</a>
            </p>
        </div>
        </body>
        </html>
        """.format(num_rows)
        return html
Ejemplo n.º 4
0
def drop_table():
    """
    Drop the table specified by TABLE_NAME.
    """
    con, cur = cursor_connect(db_dsn)
    try:
        sql = "DROP TABLE IF EXISTS {0};".format(TABLE_NAME)
        cur.execute(sql)
    except psycopg2.Error:
        raise
    else:
        con.commit()
        cur.close()
        con.close()
Ejemplo n.º 5
0
def get_average(col):
    """
    Get the average value from a column.

    Parameters
    ----------
    col : str, unicode
        The name of a column to get the average of.

    Returns
    -------
    json
        A labeled value containing the column name as key and the average of
        that column as the value, as the value for key 'average'.
    """
    avg = {}
    # Only allow average value computation on certain (numeric) columns
    accepted_cols = (
        "inpatient_reimbursement",
        "inpatient_beneficiary_responsibility",
        "inpatient_primary_payer_reimbursement",
        "outpatient_reimbursement",
        "outpatient_beneficiary_responsibility",
        "outpatient_primary_payer_reimbursement",
        "carrier_reimbursement",
        "beneficiary_responsibility",
        "primary_payer_reimbursement",
        "part_a_coverage_months",
        "part_b_coverage_months",
        "hmo_coverage_months",
        "part_d_coverage_months",
    )
    # Strip the user input to alpha characters only
    cleaned_col = re.sub('\W+', '', col)
    try:
        if cleaned_col not in accepted_cols:
            return json_error(403,
                              "column '{0}' is not allowed".format(cleaned_col))
        con, cur = cursor_connect(db_dsn, psycopg2.extras.DictCursor)
        query = "SELECT AVG({0}) FROM {1};".format(cleaned_col, TABLE_NAME)
        cur.execute(query, (cleaned_col, ))
        result = cur.fetchall()
        for row in result:
            avg[cleaned_col] = round(row['avg'], 2)
    except Exception as e:
        return jsonify({'error': e.message})
    return jsonify({'average': avg})
Ejemplo n.º 6
0
def verify_data_load():
    """
    Verify that all the data was loaded into the DB.
    """
    con, cur = cursor_connect(db_dsn)
    try:
        sql = "SELECT COUNT(*) FROM {0}".format(TABLE_NAME)
        cur.execute(sql)
        result = cur.fetchone()
        num_rows = result[0]
    except psycopg2.Error:
        raise
    else:
        cur.close()
        con.close()
        expected_row_count = 2255098
        if num_rows != expected_row_count:
            raise AssertionError("{0} rows in DB. Should be {1}".format(
                                 num_rows, expected_row_count))
        print("Data load complete.")
Ejemplo n.º 7
0
def load_csv(csv_file):
    """
    Load data from a CSV file or file-like object into the database.

    Parameters
    ----------
    csv_file : str, unicode
        A file of file-like object returned from download_zip(). The file must
        have both `read()` and `readline()` methods.

    """
    con, cur = cursor_connect(db_dsn)
    try:
        with open(csv_file, 'r') as f:
            cur.copy_from(f, TABLE_NAME, sep=',', null='')
    except psycopg2.Error:
        raise
    else:
        con.commit()
        cur.close()
        con.close()
Ejemplo n.º 8
0
def get_counts(col):
    """
    Get counts of distinct values in the available columns.

    Parameters
    ----------
    col : str, unicode
        The name of a column to get the average of.

    Returns
    -------
    json
        A labeled JSON object with corresponding counts.

    Examples
    --------
    /api/v1/count/race
    /api/v1/count/cancer
    """
    count = {}
    cleaned_col = re.sub('\W+', '', col)
    try:
        if cleaned_col == 'id':
            return json_error(403,
                              "column '{0}' is not allowed".format(cleaned_col))
        con, cur = cursor_connect(db_dsn, psycopg2.extras.DictCursor)
        query = """
        SELECT {0}, COUNT(*) AS num FROM {1}
        GROUP BY {0};""".format(cleaned_col, TABLE_NAME)
        cur.execute(query, (cleaned_col, ))
        result = cur.fetchall()
        for row in result:
            label = row[cleaned_col]
            count[label] = row['num']
    except Exception as e:
        return jsonify({'error': e.message})
    return jsonify(count)
Ejemplo n.º 9
0
def create_table():
    """
    Create the table given by TABLE_NAME.
    """
    con, cur = cursor_connect(db_dsn)
    # Create new column types, like factors in R, to hold sex and race.
    new_types = [
        ("CREATE TYPE sex AS ENUM ('male', 'female');",),
        ("CREATE TYPE race as ENUM ('white', 'black', 'others', 'hispanic');",),
    ]
    for i, val in enumerate(new_types):
        cmd = val[0]
        try:
            cur.execute(cmd)
        except psycopg2.ProgrammingError as e:
            # If the types already exist just continue on
            if "already exists" in e.message:
                con, cur = cursor_connect(db_dsn)  # Re-create the connection
            else:
                cur.close()
                con.close()
                raise
    try:
        sql = ("CREATE TABLE {0} ("
               "id CHAR(16) UNIQUE, "
               "dob CHAR(8), "  # These are converted to DATE later
               "dod CHAR(8), "  # These are converted to DATE later
               "sex sex, "
               "race race, "
               "end_stage_renal_disease BOOLEAN, "
               "state VARCHAR(4), "
               "county_code INT, "
               "part_a_coverage_months INT, "
               "part_b_coverage_months INT, "
               "hmo_coverage_months INT, "
               "part_d_coverage_months INT, "
               "alzheimers_related_senile BOOLEAN, "
               "heart_failure BOOLEAN, "
               "chronic_kidney BOOLEAN, "
               "cancer BOOLEAN, "
               "chronic_obstructive_pulmonary BOOLEAN, "
               "depression BOOLEAN, "
               "diabetes BOOLEAN, "
               "ischemic_heart BOOLEAN, "
               "osteoporosis BOOLEAN, "
               "rheumatoid_osteo_arthritis BOOLEAN, "
               "stroke_ischemic_attack BOOLEAN, "
               "inpatient_reimbursement INT, "
               "inpatient_beneficiary_responsibility INT, "
               "inpatient_primary_payer_reimbursement INT, "
               "outpatient_reimbursement INT, "
               "outpatient_beneficiary_responsibility INT, "
               "outpatient_primary_payer_reimbursement INT, "
               "carrier_reimbursement INT, "
               "beneficiary_responsibility INT, "
               "primary_payer_reimbursement INT"
               ");".format(TABLE_NAME))
        cur.execute(sql)
    except psycopg2.Error:
        raise
    else:
        con.commit()
        cur.close()
        con.close()