def alter_col_types(): """ Alter column types of the table to better suit the data. For example, convert the character-represented-dates to type DATE. """ con, cur = cursor_connect(db_dsn) try: # Get column names so you can index the 2th and 3th columns sql = "SELECT * FROM {0} LIMIT 0;".format(TABLE_NAME) cur.execute(sql) colnames = [desc[0] for desc in cur.description] cols = (colnames[1], colnames[2]) # DO-Birth and DO-Death for col in cols: sql = """ ALTER TABLE {0} ALTER COLUMN {1} TYPE DATE USING to_date({1}, 'YYYYMMDD'); """.format(TABLE_NAME, col) cur.execute(sql) except psycopg2.Error: raise else: con.commit() cur.close() con.close()
def disease_frequency(col): """ Get the states in descending order of the percentage of disease claims, where disease corresponds to the column name. Parameters ---------- col : str, unicode A column name. Returns ------- json A labeled JSON object with the state and percent disease claims out of all of that state's claims. Examples -------- /api/v1/freq/depression /api/v1/freq/diabetes """ disease = [] accepted_cols = ( "end_stage_renal_disease", "alzheimers_related_senile", "heart_failure", "chronic_kidney", "cancer", "chronic_obstructive_pulmonary", "depression", "diabetes", "ischemic_heart", "osteoporosis", "rheumatoid_osteo_arthritis", "stroke_ischemic_attack", ) # Strip the user input to alpha characters only cleaned_col = re.sub('\W+', '', col) try: if cleaned_col not in accepted_cols: return json_error(403, "column '{0}' is not allowed".format(cleaned_col)) con, cur = cursor_connect(db_dsn, psycopg2.extras.DictCursor) query = """ SELECT state, {1}/claims::float AS frequency FROM (SELECT LHS.state AS state, {1}, claims FROM (SELECT state, count(*) AS claims FROM {0} GROUP BY state order by claims desc) AS LHS LEFT JOIN (SELECT state, count(*) AS {1} FROM {0} WHERE {1}='true' GROUP BY state) AS RHS ON LHS.state=RHS.state) AS outer_q ORDER by frequency DESC;""".format(TABLE_NAME, cleaned_col) cur.execute(query) result = cur.fetchall() for row in result: freq = {row['state']: row['frequency']} disease.append(freq) except Exception as e: return jsonify({'error': e.message}) return jsonify(state_depression=disease)
def index(): """ Main page with no JSON API, just a short message about number of rows available. Returns ------- str A short message saying hello and then displaying the number of rows available to query. """ num_rows = 0 # Default value try: con, cur = cursor_connect(db_dsn) sql = "SELECT COUNT(*) FROM {0}".format(TABLE_NAME) cur.execute(sql) result = cur.fetchone() num_rows = int(result[0]) except (psycopg2.Error, ValueError) as e: num_rows = 0 finally: html = """ <html> <body> <div> <p>Hello World! I can access {0:,d} rows of data!</p> <p>The data is from the 2010 Medicare synthetic claims summary.</p> <p>Number of claims by sex: <a href="/api/v1/count/sex">/api/v1/count/sex</a> </p> <p>Number of cancer claims: <a href="/api/v1/count/cancer">/api/v1/count/cancer</a> </p> <p>Average inpatient reimbursement amount: <a href="/api/v1/average/inpatient_reimbursement"> /api/v1/average/inpatient_reimbursement</a> </p> <p>Average outpatient reimbursement amount: <a href="/api/v1/average/outpatient_reimbursement"> /api/v1/average/outpatient_reimbursement</a> </p> <p>Average beneficiary responsibility: <a href="/api/v1/average/beneficiary_responsibility"> /api/v1/average/beneficiary_responsibility</a> </p> <p>Get frequency of depression claims by state: <a href="/api/v1/freq/depression"> /api/v1/freq/depression</a> </p> <p>Get frequency of cancer claims by state: <a href="/api/v1/freq/cancer"> /api/v1/freq/cancer</a> </p> </div> </body> </html> """.format(num_rows) return html
def drop_table(): """ Drop the table specified by TABLE_NAME. """ con, cur = cursor_connect(db_dsn) try: sql = "DROP TABLE IF EXISTS {0};".format(TABLE_NAME) cur.execute(sql) except psycopg2.Error: raise else: con.commit() cur.close() con.close()
def get_average(col): """ Get the average value from a column. Parameters ---------- col : str, unicode The name of a column to get the average of. Returns ------- json A labeled value containing the column name as key and the average of that column as the value, as the value for key 'average'. """ avg = {} # Only allow average value computation on certain (numeric) columns accepted_cols = ( "inpatient_reimbursement", "inpatient_beneficiary_responsibility", "inpatient_primary_payer_reimbursement", "outpatient_reimbursement", "outpatient_beneficiary_responsibility", "outpatient_primary_payer_reimbursement", "carrier_reimbursement", "beneficiary_responsibility", "primary_payer_reimbursement", "part_a_coverage_months", "part_b_coverage_months", "hmo_coverage_months", "part_d_coverage_months", ) # Strip the user input to alpha characters only cleaned_col = re.sub('\W+', '', col) try: if cleaned_col not in accepted_cols: return json_error(403, "column '{0}' is not allowed".format(cleaned_col)) con, cur = cursor_connect(db_dsn, psycopg2.extras.DictCursor) query = "SELECT AVG({0}) FROM {1};".format(cleaned_col, TABLE_NAME) cur.execute(query, (cleaned_col, )) result = cur.fetchall() for row in result: avg[cleaned_col] = round(row['avg'], 2) except Exception as e: return jsonify({'error': e.message}) return jsonify({'average': avg})
def verify_data_load(): """ Verify that all the data was loaded into the DB. """ con, cur = cursor_connect(db_dsn) try: sql = "SELECT COUNT(*) FROM {0}".format(TABLE_NAME) cur.execute(sql) result = cur.fetchone() num_rows = result[0] except psycopg2.Error: raise else: cur.close() con.close() expected_row_count = 2255098 if num_rows != expected_row_count: raise AssertionError("{0} rows in DB. Should be {1}".format( num_rows, expected_row_count)) print("Data load complete.")
def load_csv(csv_file): """ Load data from a CSV file or file-like object into the database. Parameters ---------- csv_file : str, unicode A file of file-like object returned from download_zip(). The file must have both `read()` and `readline()` methods. """ con, cur = cursor_connect(db_dsn) try: with open(csv_file, 'r') as f: cur.copy_from(f, TABLE_NAME, sep=',', null='') except psycopg2.Error: raise else: con.commit() cur.close() con.close()
def get_counts(col): """ Get counts of distinct values in the available columns. Parameters ---------- col : str, unicode The name of a column to get the average of. Returns ------- json A labeled JSON object with corresponding counts. Examples -------- /api/v1/count/race /api/v1/count/cancer """ count = {} cleaned_col = re.sub('\W+', '', col) try: if cleaned_col == 'id': return json_error(403, "column '{0}' is not allowed".format(cleaned_col)) con, cur = cursor_connect(db_dsn, psycopg2.extras.DictCursor) query = """ SELECT {0}, COUNT(*) AS num FROM {1} GROUP BY {0};""".format(cleaned_col, TABLE_NAME) cur.execute(query, (cleaned_col, )) result = cur.fetchall() for row in result: label = row[cleaned_col] count[label] = row['num'] except Exception as e: return jsonify({'error': e.message}) return jsonify(count)
def create_table(): """ Create the table given by TABLE_NAME. """ con, cur = cursor_connect(db_dsn) # Create new column types, like factors in R, to hold sex and race. new_types = [ ("CREATE TYPE sex AS ENUM ('male', 'female');",), ("CREATE TYPE race as ENUM ('white', 'black', 'others', 'hispanic');",), ] for i, val in enumerate(new_types): cmd = val[0] try: cur.execute(cmd) except psycopg2.ProgrammingError as e: # If the types already exist just continue on if "already exists" in e.message: con, cur = cursor_connect(db_dsn) # Re-create the connection else: cur.close() con.close() raise try: sql = ("CREATE TABLE {0} (" "id CHAR(16) UNIQUE, " "dob CHAR(8), " # These are converted to DATE later "dod CHAR(8), " # These are converted to DATE later "sex sex, " "race race, " "end_stage_renal_disease BOOLEAN, " "state VARCHAR(4), " "county_code INT, " "part_a_coverage_months INT, " "part_b_coverage_months INT, " "hmo_coverage_months INT, " "part_d_coverage_months INT, " "alzheimers_related_senile BOOLEAN, " "heart_failure BOOLEAN, " "chronic_kidney BOOLEAN, " "cancer BOOLEAN, " "chronic_obstructive_pulmonary BOOLEAN, " "depression BOOLEAN, " "diabetes BOOLEAN, " "ischemic_heart BOOLEAN, " "osteoporosis BOOLEAN, " "rheumatoid_osteo_arthritis BOOLEAN, " "stroke_ischemic_attack BOOLEAN, " "inpatient_reimbursement INT, " "inpatient_beneficiary_responsibility INT, " "inpatient_primary_payer_reimbursement INT, " "outpatient_reimbursement INT, " "outpatient_beneficiary_responsibility INT, " "outpatient_primary_payer_reimbursement INT, " "carrier_reimbursement INT, " "beneficiary_responsibility INT, " "primary_payer_reimbursement INT" ");".format(TABLE_NAME)) cur.execute(sql) except psycopg2.Error: raise else: con.commit() cur.close() con.close()