def get_course_ces_data(course_list, start_year, end_year, cur, tbl='vw1_course_summaries_fixed', schema='ces'): # Returns a dataframe with CES data for courses in course list qry = ' SELECT \n' \ " year, semester, level, \n" \ " course_code, \n" \ " course_code_ces, \n" \ ' reliability, round(gts, 1) AS gts, round(gts_mean, 1) AS gts_mean, \n' \ ' round(osi, 1) AS osi, round(osi_mean, 1) AS osi_mean, \n' \ ' round(gts1, 1) AS gts1, round(gts2, 1) AS gts2, round(gts3, 1) AS gts3, \n' \ ' round(gts4, 1) AS gts4, round(gts5, 1) AS gts5, round(gts6, 1) AS gts6, \n' \ ' course_coordinator, population, osi_count, gts_count \n' \ ' FROM {0}.{1} \n' \ " WHERE course_code IN {2} \n" \ " AND year >= {3} \n" \ " AND year <= {4} \n" \ " ORDER BY course_code, year, semester; \n" \ "".format(schema, tbl, list_to_text(course_list), start_year, end_year) return db_extract_query_to_dataframe(qry, cur, print_messages=False)
def get_course_enhancement_list(year, semester, cur, schema='course_enhancement'): # Returns a dataframe of the courses undergoing enhancement course in year, semester from db (cur) qry = qry_course_enhancement_list(year, semester, 'vw100_courses', schema) return db_extract_query_to_dataframe(qry, cur, print_messages=False)
def line_trace_course_fr_diff(course_name, colour, semester=None, dash_type=None, showlegend=True): qry = qry_location_diff_history(course_name) df1 = db_extract_query_to_dataframe(qry, postgres_cur, print_messages=False) if semester == 1: df1 = df1.loc[df1['semester'] == 1] elif semester == 2: df1 = df1.loc[df1['semester'] == 2] x = [i - 0.5 for i in range(1, 7)] # Create Semester 1 trace (solid) trace = go.Scatter( x=x, y=df1['fr_diff'].tolist(), name='<span style="color: {1}"> {0} </span>'.format( course_name, colour), text=None, textfont={ 'size': 14, 'color': colour }, line=go.Line(width=3, color=colour, dash=dash_type), marker=go.Marker(color=colour, size=10, symbol='diamond'), connectgaps=False, mode='markers', showlegend=showlegend, textposition='bottom right', ) return trace
def get_prg_ces_data(program_list, start_year, end_year, cur, tbl='vw135_program', schema='ces'): # Returns a dataframe with CES data for courses in course list qry = ' SELECT \n' \ " year, semester, level, \n" \ " program_code, \n" \ ' population::int, reliability, \n' \ ' round(gts::numeric, 1) AS gts, round(gts_mean::numeric, 1) AS gts_mean, \n' \ ' round(osi::numeric, 1) AS osi, round(osi_mean::numeric, 1) AS osi_mean, \n' \ ' round(gts1::numeric, 1) AS gts1, round(gts2::numeric, 1) AS gts2, round(gts3::numeric, 1) AS gts3, \n' \ ' round(gts4::numeric, 1) AS gts4, round(gts5::numeric, 1) AS gts5, round(gts6::numeric, 1) AS gts6 \n' \ ' FROM {0}.{1} \n' \ " WHERE program_code IN {2} \n" \ " AND year >= {3} \n" \ " AND year <= {4} \n" \ " ORDER BY year, semester; \n" \ "".format(schema, tbl, list_to_text(program_list), start_year, end_year) return db_extract_query_to_dataframe(qry, cur, print_messages=False)
def get_course_teacher_data(year, semester, cur, tbl='vw001_course_teacher', schema='sim_ces'): # Returns a dataframe of the SIM ces in year, semester from db (cur) qry = qry_course_teacher_data(year, semester, tbl, schema) return db_extract_query_to_dataframe(qry, cur, print_messages=False)
def get_school_data(start_year, postgres_cur): qry = ' SELECT * \n' \ ' FROM ces.vw146_school_bus_for_graph \n' \ " WHERE year >= {}" \ " ORDER BY school_code, level, year, semester" \ "; \n".format(start_year) return db_extract_query_to_dataframe(qry, postgres_cur, print_messages=False)
def get_course_comments(year, semester, cur, tbl='vw001_course_teacher_comments', schema='sim_ces'): # Returns a dataframe with SIM CES comments for courses in year and semester qry = " SELECT * \n" \ " FROM {0}.{1} \n" \ " WHERE year = {2} AND semester = {3} \n" \ "".format(schema, tbl, year, semester) return db_extract_query_to_dataframe(qry, cur, print_messages=False)
def get_course_program_ces_data(course_list, program_list, start_year, end_year, cur, tbl='vw115_course_program', schema='ces'): # Returns a dataframe with CES data for courses in course list qry = ' SELECT \n' \ ' crse_prg.*, \n' \ ' pd.program_name, \n' \ " CASE WHEN pd.college = 'BUS' THEN pd.school_code ELSE 'Not CoB' END AS school_code, \n" \ " COALESCE(bsd.school_name_short, 'Not CoB') AS school_name_short, \n" \ " CASE WHEN pd.college = 'BUS' THEN bsd.html ELSE '#FAC800' END AS school_colour, \n" \ " pd.college, \n" \ " col.college_name_short, \n" \ " col.html AS college_colour \n " \ ' FROM ( \n' \ ' SELECT \n' \ " year, semester, level, \n" \ " course_code, course_code_ces, program_code, \n" \ ' reliability, \n' \ ' round(gts, 1) AS gts, round(gts_mean, 1) AS gts_mean, \n' \ ' round(osi, 1) AS osi, round(osi_mean, 1) AS osi_mean, \n' \ ' population::int, osi_count, gts_count \n' \ ' FROM {0}.{1} \n' \ " WHERE course_code IN {2} \n" \ " AND year >= {3} \n" \ " AND year <= {4} \n" \ " ) crse_prg \n" \ " LEFT JOIN ( \n" \ " SELECT program_code, program_name, school_code, college \n" \ " FROM lookups.tbl_program_details \n" \ " ) pd ON (crse_prg.program_code = pd.program_code) \n" \ " LEFT JOIN ( \n" \ " SELECT sd.school_code, sd.school_name_short, sc.html \n" \ " FROM (SELECT school_code, school_name_short, colour FROM lookups.tbl_bus_school_details) sd \n" \ " LEFT JOIN (SELECT colour_name, html FROM lookups.tbl_rmit_colours) sc \n" \ " ON sc.colour_name = sd.colour \n" \ " ) bsd ON (pd.school_code=bsd.school_code)\n" \ " LEFT JOIN ( \n" \ " SELECT cd.college_code, cd.college_name, cd.college_name_short, rc.html \n" \ " FROM lookups.tbl_rmit_college_details cd, lookups.tbl_rmit_colours rc \n" \ " WHERE rc.colour_name = cd.colour \n" \ " ) col ON (pd.college = col.college_code) \n" \ " WHERE crse_prg.program_code IN {5} \n" \ " ORDER BY course_code, year, semester; \n" \ "".format(schema, tbl, list_to_text(course_list), start_year, end_year, list_to_text(program_list)) return db_extract_query_to_dataframe(qry, cur, print_messages=False)
def line_trace_course_fr(course_name, colour, location=None, degree_type=None, semester=None, dash_type=None, show_course_name=True, showlegend=True): qry = qry_course_history(location=location, course_name=course_name, degree_type=degree_type) df1 = db_extract_query_to_dataframe(qry, postgres_cur, print_messages=False) if semester == 1: df1 = df1.loc[df1['semester'] == 1] elif semester == 2: df1 = df1.loc[df1['semester'] == 2] x = [i / 2.0 for i in range(1, 7)] if location == 'VIETNAM': x = [] for j in range(0, 2): for i in range(0, 3): x.append(j + 0.5 + i / 4.0) if show_course_name == True: label = '<span style="color: {0}">{2} ({1}) </span>'.format( colour, location, course_name) else: label = '<span style="color: {0}">{1} </span>'.format( colour, location) # Create Semester 1 trace (solid) trace = go.Scatter(x=x, y=df1['fail_rate'].tolist(), name=label, text=None, textfont={ 'size': 14, 'color': colour }, line=go.Line(width=3, color=colour, dash=dash_type), marker=go.Marker(color=colour, size=8, symbol='diamond'), connectgaps=True, mode='lines+markers', showlegend=showlegend, textposition='bottom right') return trace
def get_ce_courses(year, semester): qry = ' SELECT level, course_code_ces, \n' \ ' gts_pre, \n' \ ' gts_post, \n' \ ' gts_delta, \n' \ ' osi_pre, \n' \ ' osi_post, \n' \ ' osi_delta, \n' \ ' school_pre_gts_target,\n' \ ' school_post_gts_target \n' \ ' FROM course_enhancement.vw205_ce_evaluation \n' \ ' WHERE ce = True \n' \ ' AND year = {} \n' \ ' AND semester = {} \n'.format(year, semester) df = db_extract_query_to_dataframe(qry, cur) return df
def get_ce_courses(year, semester, table='vw204_ce_evaluation', ce=True): qry = ' SELECT DISTINCT level, course_code_ces, \n' \ ' gts_pre, \n' \ ' gts_post, \n' \ ' gts_delta, \n' \ ' osi_pre, \n' \ ' osi_post, \n' \ ' osi_delta \n' \ ' FROM course_enhancement.{0} \n' \ ' WHERE la = {1} \n' \ ' AND year = {2} \n' \ ' AND semester = {3} \n' \ ' AND gts_delta IS NOT NULL \n' \ ' AND osi_delta IS NOT NULL \n' \ ' ORDER BY gts_delta \n'.format(table, ce, year, semester) df = db_extract_query_to_dataframe(qry, cur) return df
' population, gts, osi \n' \ ' FROM ces.vw146_school_bus_for_graph \n' \ " WHERE \n" \ " year >= {0} AND year <= {1} \n" \ " UNION \n" \ ' SELECT \n' \ ' year, semester, level, college_name_short, colour, colour_html, \n' \ ' population, gts, osi \n' \ ' FROM ces.vw157_college_for_graph \n' \ " WHERE \n" \ " year >= {0} AND year <= {1} \n" \ " ORDER BY year, semester, level, school_name_short \n" \ "".format(start_year, end_year) df_schools_data = db_extract_query_to_dataframe(qry, postgres_cur, print_messages=False) # print(tabulate(df_schools_data, headers='keys')) ''' create_CoB_graph( df_schools_data, measure='gts', start_year=2015, end_year=2019, semester=1, height=600, width=1100, background='#FFFFFF') create_CoB_graph( df_schools_data, measure='osi',
def get_prg_crse_data(program_list, cur): qry = " SELECT * \n" \ " FROM programs.tbl_plan_course_structure \n" \ " WHERE program_code IN {} " \ "".format(list_to_text(program_list)) return db_extract_query_to_dataframe(qry, cur, print_messages=False)
def create_course_graph(course_name, degree, semester=None, folder='C:\\Peter\\CoB\\CES\\2018_Semester_1\\', height=600, width=1100): traces = [] x = [i / 2.0 for i in range(1, 6)] xlabels = ['2016 S1', '2016 S2', '2017 S1', '2017 S2', '2018 S1'] # graph_title = 'School {} (% Agree) by year (Semester {})'.format(measure.upper(), semester) graph_title = '{} ({}) Fail Rate'.format(course_name, degree) colourList = [rc.RMIT_Green, rc.RMIT_DarkBlue, rc.RMIT_Red, rc.RMIT_Arctic] qry = ' SELECT DISTINCT location \n' \ ' FROM projects.tbl_common_core_fr_cleaned \n' \ " WHERE degree_type = '{}' \n" \ " AND course_name = '{}' " \ " AND location <> 'UPH' \n" \ " AND location <> 'VIETNAM' \n" \ ' ORDER BY location' \ ''.format(degree, course_name) df_locs = db_extract_query_to_dataframe(qry, postgres_cur, print_messages=False) for i, r in df_locs.iterrows(): colour = colourList[i] location = r.location traces.append( line_trace_course_fr(course_name, colour, location=location, degree_type="('{}')".format(degree), semester=semester, dash_type=None, show_course_name=False)) fig = go.Figure(data=traces, layout=go.Layout( title=graph_title, titlefont={ 'size': 16, }, showlegend=True, legend=dict( font=dict(size=12), orientation="h", ), xaxis=dict( range=[0, 5], tickvals=x, tickfont={'size': 12}, showgrid=False, ticktext=xlabels, ticks='outside', tick0=1, dtick=1, ticklen=5, zeroline=True, gridcolor='#FFFFFF', zerolinewidth=2, ), yaxis=dict( title='Fail Rate (%)', titlefont={'size': 14}, range=[0, 40], ticklen=5, tickfont={'size': 12}, zeroline=True, zerolinewidth=2, layer="below traces", ), width=width, height=height, hovermode='closest', margin=dict(b=40, l=60, r=5, t=40), hidesources=True, )) filename = folder + '{}_{}_graph.html'.format(course_name.lower(), degree) plotly.offline.plot(fig, filename=filename) print(filename) return fig
con_string = "host='{0}' " \ "dbname='{1}' " \ "user='******' " \ "password='******' " \ "".format(postgres_host, postgres_dbname, postgres_user, postgres_pw) postgres_con, postgres_cur = connect_to_postgres_db(con_string) '''-------------------------------------------- Get Data -------------------------------------''' qry = ' SELECT * \n' \ ' FROM ces.vw157_college_for_graph \n' \ " WHERE year >= {1} AND level = '{0}' \n" \ " ORDER BY college, year, semester \n" \ "; \n".format(acad_career, start_year) df_college = db_extract_query_to_dataframe(qry, postgres_cur, print_messages=False) print(tabulate(df_college, headers='keys')) def line_graph_colleges_measure(df1, level, measure='gts', start_year=2015, end_year=2018, height=400, width=800): # all traces for plotly traces = []
# header image image_filename = 'C:\\Peter\\CoB\\logos\\L&T_Transparent_200.png' # replace with your own image logo = base64.b64encode(open(image_filename, 'rb').read()) '''------------------------Get Data-----------------------''' qry = ' SELECT \n' \ ' year, semester, level, school_name_short, colour, colour_html, \n' \ ' course_code_ces, population, osi_count, reliability, gts, osi \n' \ ' FROM ces.vw2_course_summaries_fixed \n' \ " WHERE \n" \ " year >= {0} AND year <= {1} \n" \ "".format(start_year, end_year) df_course_data = db_extract_query_to_dataframe(qry, postgres_cur, print_messages=False) qry = " SELECT \n" \ " year, semester, school_name_short, level, colour_html, \n" \ " count(*) AS courses, \n" \ " sum(CASE WHEN reliability='G' THEN 1 ELSE 0 END) AS g_count,\n" \ " sum(CASE WHEN reliability='S' THEN 1 ELSE 0 END) AS s_count, \n" \ " sum(CASE WHEN reliability='N' THEN 1 ELSE 0 END) AS n_count, \n" \ " round(avg(osi), 1) AS osi, round(avg(gts),1) as gts \n" \ " FROM ces.vw2_course_summaries_fixed \n " \ " WHERE \n" \ " year >= {0} AND year <= {1} \n" \ " GROUP BY year, semester, school_name_short, level, colour_html \n" \ " UNION \n" \ " SELECT \n" \ " year, semester, 'CoB' AS school_name_short, level, '#000000' AS colour_html, \n" \
fig = {'data': data, 'layout': layout, } plotly.offline.plot(fig, filename='C:\\Peter\\CoB\\CES Response Rates\\graphs\\rdr.html') return #qry1 = qry_vw_changes(schema='ces_responses', table='vw_changes_2') #df_courses = db_extract_query_to_dataframe(qry1, cur, print_messages=False) qry2 = qry_vw_changes(schema='ces_responses', table='vw_agg_changes') df_all = db_extract_query_to_dataframe(qry2, cur, print_messages=False) print(qry2) #print(tabulate(df_courses, headers='keys')) #print(tabulate(df_courses, headers='keys')) print('\n\n') print(qry2) print(tabulate(df_all, headers='keys')) #fig = graphRRbar(df_all) fig = graphDRRbar(df_all)
def get_data(cur, tbl, schema='ces'): # Returns a dataframe with CES data for courses in course list qry = ' SELECT *\n' \ ' FROM {0}.{1} \n' \ "".format(schema, tbl) return db_extract_query_to_dataframe(qry, cur, print_messages=False)
def get_table(cur, schema, tbl): # Returns all values in a table qry = 'SELECT * FROM {}.{}'.format(schema, tbl) return db_extract_query_to_dataframe(qry, cur, print_messages=False)
def create_ce_comparison_chart(cur, width=800, height=600, display=False, save=False, start_year=2017, end_year=2019, show_title=True, show_annotations=True, show_ylabel=True, show_pval=True, table='vw204_ce_evaluation' ): qry = " SELECT year, semester, course_code_ces, gts_delta::numeric, " \ " CASE WHEN la=true THEN 1 ELSE 0 END AS ce" \ " FROM course_enhancement.{} \n" \ " WHERE gts_pre IS NOT NULL AND gts_post IS NOT NULL \n" \ " AND year >= {} AND year <= {} \n" \ "".format(table, start_year, end_year) df1 = db_extract_query_to_dataframe(qry, cur, print_messages=False) df1['gts_delta'] = df1['gts_delta'].astype(float) df1_group = df1.groupby(["year", "semester", "ce"]) #print(df1_group.gts_delta.agg([np.mean, np.std, scipystats.sem, len])) pval = [] ms_mean = [] nms_mean = [] ms_sem = [] nms_sem = [] labels = [] for sem in [[2017, 1], [2017, 2], [2018, 1], [2018, 2], [2019, 1], [2019, 2]]: df_temp1 = df1.query('ce==1 & year=={} & semester=={}'.format(sem[0], sem[1])) df_temp2 = df1.query('ce==0 & year=={} & semester=={}'.format(sem[0], sem[1])) if show_pval == True: pval.append(scipystats.ttest_ind(df_temp1.gts_delta, df_temp2.gts_delta)[1]) labels.append('{} S{}<br>' 'p-val={}'.format(sem[0], sem[1], '%.3f' % scipystats.ttest_ind(df_temp1.gts_delta, df_temp2.gts_delta)[1])) xtick_size = 14 else: labels.append('{} S{}'.format(sem[0], sem[1])) xtick_size = 10 ms_mean.append(np.mean(df_temp1.gts_delta)) nms_mean.append(np.mean(df_temp2.gts_delta)) ms_sem.append(scipystats.sem(df_temp1.gts_delta)) nms_sem.append(scipystats.sem(df_temp2.gts_delta)) print(ms_mean) print(nms_mean) print(pval) #print(labels) trace1 = go.Bar( x=labels, y=nms_mean, text=['%.1f' % val for val in nms_mean], textposition='outside', name='Other', marker=dict( color=rc.RMIT_Red), error_y=dict( type='data', array=nms_sem, visible=True, color=rc.RMIT_Green ) ) trace2 = go.Bar( x=labels, y=ms_mean, text=['%.2f' % val for val in ms_mean], textposition='outside', name='Enhanced', marker=dict( color=rc.RMIT_Black), error_y=dict( type='data', array=ms_sem, visible=True, color=rc.RMIT_Green ) ) data = [trace2, trace1] if show_title == True: title='Mean Change in Course GTS' else: title = None if show_ylabel == True: ylabel = 'Change in GTS' else: ylabel = None layout = go.Layout( title=title, titlefont=dict(size=24), showlegend=True, width=width, height=height, margin=dict(b=20, l=20, r=10, t=10), hidesources=True, xaxis=dict( tickfont=dict( size=xtick_size, ) ), plot_bgcolor=rc.RMIT_White, yaxis=dict( title=ylabel, titlefont=dict( size=16,) ), legend=dict( orientation="v", x=0.6, y=0.95) ) if show_annotations == True: annotations = [dict(x=0.6, y=1, text='The "Change in course GTS" is between:<br>' ' 1. The average GTS of the two previous offerings of the course; and<br>' ' 2. The average GTS of the labeled and next offering of the course<br>' ' There is no next offering data for 2019 S1', font=dict(size=12), xref='paper', yref='paper', showarrow=False ), ] layout['annotations'] = annotations fig = go.Figure(data=data, layout=layout) if display == True: plotly.offline.plot( fig, filename='H:\\Projects\\CoB\\CES\\Course Enhancement\\CE_vs_NCE_2019S2.html' ) if save == True: plotly.plotly.image.save_as( fig, filename='H:\\Projects\\CoB\\CES\\Course Enhancement\\CE_vs_NCE_2019S2.png' ) return fig
def create_ce_growth(cur, width=800, height=600, start_year=2017, end_year=2019, show_title=True, show_annotations=True, show_ylabel=True ): qry = " SELECT year, semester, count(DISTINCT course_code_ces) AS count \n " \ " FROM course_enhancement.tbl_courses \n " \ " WHERE " \ " cob_engagement = True \n" \ " AND level != 'VN' \n" \ " AND year >= {} AND year <= {}" \ " GROUP BY year, semester \n" \ "".format(start_year, end_year) df1 = db_extract_query_to_dataframe(qry, cur, print_messages=False) labels = [] for sem in [[2017, 1], [2017, 2], [2018, 1], [2018, 2], [2019, 1], [2019, 2]]: labels.append('{} S{}'.format(sem[0], sem[1])) trace1 = go.Bar( x=labels, y=df1['count'], name='No. of Courses', textposition='inside', marker=dict( color=rc.RMIT_Black), ) data = [trace1] if show_title == True: title = 'Growth in Course Enhancement' else: title = None if show_ylabel == True: ylabel = 'Change in GTS' else: ylabel = None layout = go.Layout( title=title, titlefont=dict(size=24), showlegend=True, width=width, height=height, margin=dict(b=20, l=25, r=10, t=10), hidesources=True, xaxis=dict( tickfont=dict( size=10, ) ), plot_bgcolor=rc.RMIT_White, yaxis=dict( title=ylabel, titlefont=dict( size=16, ) ), legend=dict( orientation="v", x=0.01, y=0.90) ) if show_annotations == True: annotations = [dict(x=0.6, y=1, text='The "Change in course GTS" is between:<br>' ' 1. The average GTS of the two previous offerings of the course; and<br>' ' 2. The average GTS of the labeled and next offering of the course<br>' ' There is no next offering data for 2019 S1', font=dict(size=12), xref='paper', yref='paper', showarrow=False ), ] layout['annotations'] = annotations fig = go.Figure(data=data, layout=layout) return fig