def raw_mr_apa_table_word(mod_raw_data_df, output_df): mod_output_df = output_df[[ "Variable", "B", "95% CI B", "beta", "t", "adjusted_pvalues" ]] pd.options.mode.chained_assignment = None mod_output_df[["B", "beta", "t"]] = mod_output_df[[ "B", "beta", "t" ]].applymap(lambda x: "{:.2f}".format(x)) mod_output_df["adjusted_pvalues"] = mod_output_df["adjusted_pvalues"].map( helper_funcs.pvalue_formatting) mod_output_df.rename(columns={ "95% CI B": "95% CI", "adjusted_pvalues": "p" }, inplace=True) mod_output_df.loc[ 0, "beta"] = "" # removes the beta value for constant as it is always 0 doc = Document() table_rows_len = len(mod_output_df) + 1 table_cols_len = len(mod_output_df.columns) table = doc.add_table(rows=table_rows_len, cols=table_cols_len) for ind, var in enumerate(mod_output_df.columns): table.cell(row_idx=0, col_idx=ind).text = var for row in range(1, table_rows_len): for col in range(0, table_cols_len): table.cell(row_idx=row, col_idx=col).text = mod_output_df.iloc[row - 1, col] for cell in table.rows[0].cells: helper_funcs.word_style(cell, italic=True) for cell in table.rows[0].cells: helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word, bottom=global_vars.border_APA_word) for cell in table.rows[table_rows_len - 1].cells: helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word) for row in range(0, table_rows_len): for cell in table.rows[row].cells: cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER doc = helper_funcs.set_autofit(doc) doc.add_paragraph("R squared adjusted = {R}".format( R="{:.2f}".format(output_df["R2adj"][0]))) doc.add_paragraph( "Dependent Variable: {DV}".format(DV=global_vars.raw_mr_outcomevar)) helper_funcs.add_correction_message_word(doc) helper_funcs.savefile(doc=doc)
def raw_mr_apa_table_excel(mod_raw_data_df, output_df): mod_output_df = output_df[[ "Variable", "B", "95% CI B", "beta", "t", "adjusted_pvalues" ]] pd.options.mode.chained_assignment = None mod_output_df[["B", "beta", "t"]] = mod_output_df[[ "B", "beta", "t" ]].applymap(lambda x: "{:.2f}".format(x)) mod_output_df["adjusted_pvalues"] = mod_output_df["adjusted_pvalues"].map( helper_funcs.pvalue_formatting) mod_output_df.rename(columns={ "95% CI B": "95% CI", "adjusted_pvalues": "p" }, inplace=True) mod_output_df.loc[ 0, "beta"] = "" # removes the beta value for constant as it is always 0 wb = Workbook() ws = wb.active ws.append(list(mod_output_df.columns)) for row in dataframe_to_rows(mod_output_df, index=False, header=False): ws.append(row) for cell in ws[1]: cell.font = global_vars.font_header for cell in ws[1] + ws[len(mod_output_df) + 1]: cell.border = Border(top=global_vars.border_APA, bottom=global_vars.border_APA) for cell in ws[len(mod_output_df) + 1]: cell.border = Border(bottom=global_vars.border_APA) for row in range(1, len(mod_output_df) + 2): for cell in ws[row]: cell.alignment = global_vars.alignment_center table_notes = [ "R squared adjusted = {R}".format( R="{:.2f}".format(output_df["R2adj"][0])), "Dependent Variable: {DV}".format(DV=global_vars.raw_mr_outcomevar) ] helper_funcs.add_table_notes(ws, table_notes) helper_funcs.savefile(wb=wb)
def spss_mr_apa_table_word(mod_raw_data_df, output_df): output_df.drop(columns=["pvalues"], inplace=True) pd.options.mode.chained_assignment = None output_df["adjusted_pvalues"] = output_df["adjusted_pvalues"].map( helper_funcs.pvalue_formatting) output_df.rename(columns={"adjusted_pvalues": "p"}, inplace=True) doc = Document() table_rows_len = len(output_df) + 1 table_cols_len = len(output_df.columns) table = doc.add_table(rows=table_rows_len, cols=table_cols_len) for ind, var in enumerate(output_df.columns): table.cell(row_idx=0, col_idx=ind).text = var for row in range(1, table_rows_len): for col in range(0, table_cols_len): table.cell(row_idx=row, col_idx=col).text = output_df.iloc[row - 1, col] for cell in table.rows[0].cells: helper_funcs.word_style(cell, italic=True) for row in range(0, table_rows_len): for cell in table.rows[row].cells: cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER for cell in table.rows[0].cells: helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word, bottom=global_vars.border_APA_word) for cell in table.rows[table_rows_len - 1].cells: helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word) doc = helper_funcs.set_autofit(doc) DV_cell = mod_raw_data_df[mod_raw_data_df.columns[0]][ len(mod_raw_data_df[mod_raw_data_df.columns[0]]) - 1] DV = DV_cell[DV_cell.find(":") + 2:] doc.add_paragraph("R squared adjusted = X.XX") doc.add_paragraph("Dependent Variable: {}".format(DV)) if output_df["95% CI"][0] == "[,]": doc.add_paragraph( "Confidence intervals were not found in the SPSS table. Please add them to your SPSS table and re-run the program or add them manually." ) helper_funcs.add_correction_message_word(doc) helper_funcs.savefile(doc=doc)
def spss_mr_apa_table_excel(mod_raw_data_df, output_df): output_df.drop(columns=["pvalues"], inplace=True) pd.options.mode.chained_assignment = None output_df["adjusted_pvalues"] = output_df["adjusted_pvalues"].map( helper_funcs.pvalue_formatting) output_df.rename(columns={"adjusted_pvalues": "p"}, inplace=True) wb = Workbook() ws = wb.active for row in dataframe_to_rows(output_df, index=False, header=True): ws.append(row) for cell in ws[1]: cell.font = global_vars.font_header for row in range(1, len(output_df) + 2): for cell in ws[row]: cell.alignment = global_vars.alignment_center for cell in ws[1]: cell.border = Border(bottom=global_vars.border_APA, top=global_vars.border_APA) for cell in ws[len(output_df) + 1]: cell.border = Border(bottom=global_vars.border_APA) DV_cell = mod_raw_data_df[mod_raw_data_df.columns[0]][ len(mod_raw_data_df[mod_raw_data_df.columns[0]]) - 1] DV = DV_cell[DV_cell.find(":") + 2:] table_notes = [ "R squared adjusted = X.XX", "Dependent variable: {}".format(DV) ] if output_df["95% CI"][0] == "[,]": table_notes.append( "Confidence intervals were not found in the SPSS table. Please add them to your SPSS table and re-run the program or add them manually." ) helper_funcs.add_table_notes(ws, table_notes) helper_funcs.savefile(wb=wb)
def raw_pairttest_apa_table_excel(mod_raw_data_df, output_df): output_df.drop(columns=["pvalues"], inplace=True) apa_table_df = output_df[[ "Variable", "Time1_Mean", "Time1_SD", "Time2_Mean", "Time2_SD", "Degrees of Freedom", "t", global_vars.effect_size_choice, "adjusted_pvalues" ]] pd.options.mode.chained_assignment = None apa_table_df[list(apa_table_df.columns)[1:-1]] = apa_table_df[list( apa_table_df.columns)[1:-1]].applymap(lambda x: "{:.2f}".format(x)) apa_table_df["adjusted_pvalues"] = apa_table_df["adjusted_pvalues"].map( helper_funcs.pvalue_formatting) pd.options.mode.chained_assignment = "warn" wb = Workbook() ws = wb.active ws.cell(row=1, column=1).value = "Variable" ws.merge_cells("A1:A2") ws.cell(row=1, column=1).font = global_vars.font_header ws.cell(row=1, column=2).value = "Time 1" ws.merge_cells("B1:C1") ws.cell(row=1, column=4).value = "Time 2" ws.merge_cells("D1:E1") ws.cell(row=1, column=6).value = "df" ws.merge_cells("F1:F2") ws.cell(row=1, column=6).font = global_vars.font_header ws.cell(row=1, column=7).value = "t" ws.merge_cells("G1:G2") ws.cell(row=1, column=7).font = global_vars.font_header ws.cell(row=1, column=8).value = global_vars.effect_size_choice ws.merge_cells("H1:H2") ws.cell(row=1, column=8).font = global_vars.font_header ws.cell(row=1, column=9).value = "p" ws.merge_cells("I1:I2") ws.cell(row=1, column=9).font = global_vars.font_header for col in range(2, 5, 2): ws.cell(row=2, column=col).value = "M" ws.cell(row=2, column=col).font = global_vars.font_header ws.cell(row=2, column=col + 1).value = "SD" ws.cell(row=2, column=col + 1).font = global_vars.font_header for row in dataframe_to_rows(apa_table_df, index=False, header=False): ws.append(row) for row in range(1, len(apa_table_df) + 3): for cell in ws[row]: cell.alignment = global_vars.alignment_center for cell in ws[2] + ws[len(apa_table_df) + 2]: cell.border = Border(bottom=global_vars.border_APA) for cell in ws[1]: cell.border = Border(top=global_vars.border_APA) if global_vars.effect_size_choice == "None": ws.delete_cols(8) helper_funcs.add_table_notes(ws, []) helper_funcs.savefile(wb=wb)
def raw_pairttest_apa_table_word(mod_raw_data_df, output_df): output_df.drop(columns=["pvalues"], inplace=True) apa_table_df = output_df[[ "Variable", "Time1_Mean", "Time1_SD", "Time2_Mean", "Time2_SD", "Degrees of Freedom", "t", global_vars.effect_size_choice, "adjusted_pvalues" ]] pd.options.mode.chained_assignment = None apa_table_df[list(apa_table_df.columns)[1:-1]] = apa_table_df[list( apa_table_df.columns)[1:-1]].applymap(lambda x: "{:.2f}".format(x)) apa_table_df["adjusted_pvalues"] = apa_table_df["adjusted_pvalues"].map( helper_funcs.pvalue_formatting) pd.options.mode.chained_assignment = "warn" doc = Document() table_rows_len = len(apa_table_df) + 2 table_cols_len = len(apa_table_df.columns) table = doc.add_table(rows=table_rows_len, cols=table_cols_len) table.cell(row_idx=0, col_idx=0).text = "Variable" table.cell(row_idx=0, col_idx=0).merge(table.cell(row_idx=1, col_idx=0)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=0), italic=True) table.cell(row_idx=0, col_idx=1).text = "Time 1" table.cell(row_idx=0, col_idx=1).merge(table.cell(row_idx=0, col_idx=2)) table.cell(row_idx=0, col_idx=3).text = "Time 2" table.cell(row_idx=0, col_idx=3).merge(table.cell(row_idx=0, col_idx=4)) table.cell(row_idx=0, col_idx=5).text = "df" table.cell(row_idx=0, col_idx=5).merge(table.cell(row_idx=1, col_idx=5)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=5), italic=True) table.cell(row_idx=0, col_idx=6).text = "t" table.cell(row_idx=0, col_idx=6).merge(table.cell(row_idx=1, col_idx=6)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=6), italic=True) table.cell(row_idx=0, col_idx=7).text = global_vars.effect_size_choice if global_vars.effect_size_choice != "None": # otherwise cant remove with delete columns below if merged; see helper_funcs.delete_columns_word func notes table.cell(row_idx=0, col_idx=7).merge(table.cell(row_idx=1, col_idx=7)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=7), italic=True) table.cell(row_idx=0, col_idx=8).text = "p" table.cell(row_idx=0, col_idx=8).merge(table.cell(row_idx=1, col_idx=8)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=8), italic=True) for col in range(1, 4, 2): table.cell(row_idx=1, col_idx=col).text = "M" helper_funcs.word_style(table.cell(row_idx=1, col_idx=col), italic=True) table.cell(row_idx=1, col_idx=col + 1).text = "SD" helper_funcs.word_style(table.cell(row_idx=1, col_idx=col + 1), italic=True) for row in range(2, table_rows_len): for col in range(0, table_cols_len): table.cell(row_idx=row, col_idx=col).text = apa_table_df.iloc[row - 2, col] for row in range(0, table_rows_len): for cell in table.rows[row].cells: cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER for cell in table.rows[0].cells: helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word) for cell in table.rows[2].cells: helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word) for cell in table.rows[table_rows_len - 1].cells: helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word) if global_vars.effect_size_choice == "None": helper_funcs.delete_columns_word(table, [7]) doc = helper_funcs.set_autofit(doc) helper_funcs.add_correction_message_word(doc) helper_funcs.savefile(doc=doc)
def spss_corr_apa_table_excel(mod_raw_data_df, output_df): # could very easily use the summ_corr_apa_table function here as I pass identical data - seperate is preferred, however, as might be updated/adjusted in the future correlation_label = mod_raw_data_df.iloc[0, 1] variables_list = list(mod_raw_data_df.columns)[2:] if global_vars.corr_table_triangle == "Upper triangle": header_cols = variables_list[1:] header_rows = variables_list[:-1] elif global_vars.corr_table_triangle == "Lower triangle": header_cols = variables_list[:-1] header_rows = variables_list[1:] elif global_vars.corr_table_triangle == "Both": header_cols, header_rows = variables_list, variables_list wb = Workbook() ws = wb.active ws.append([""] + header_cols) for ind, var in enumerate(header_rows): ws.cell(row=ind + 2, column=1).value = var inside_loop_ind_start = 2 for outside_loop_ind in range(2, len(header_rows) + 2): if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both": outside_loop_var = ws.cell(row=outside_loop_ind, column=1).value elif global_vars.corr_table_triangle == "Lower triangle": outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value for inside_loop_ind in range(inside_loop_ind_start, len(header_cols) + 2): if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both": inside_loop_var = ws.cell(row=1, column=inside_loop_ind).value elif global_vars.corr_table_triangle == "Lower triangle": inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value if outside_loop_var == inside_loop_var: ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = 1 else: # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val) df_filtered = output_df[( (output_df["var1"] == outside_loop_var) & (output_df["var2"] == inside_loop_var)) | ( (output_df["var1"] == inside_loop_var) & (output_df["var2"] == outside_loop_var))].iloc[0] r = df_filtered[correlation_label] p = df_filtered["adjusted_pvalues"] r = helper_funcs.correlations_format_val(r, p) if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both": ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = r elif global_vars.corr_table_triangle == "Lower triangle": ws.cell(row=inside_loop_ind, column=outside_loop_ind).value = r if global_vars.corr_table_triangle != "Both": inside_loop_ind_start += 1 for row in range(1, len(header_rows) + 2): for cell in ws[row]: cell.alignment = global_vars.alignment_center for cell in ws[1]: cell.font = global_vars.font_header for row in range(2, len(header_rows) + 2): ws.cell(row=row, column=1).font = global_vars.font_header for cell in ws[len(header_rows) + 1]: cell.border = Border(bottom=global_vars.border_APA) for cell in ws[1]: cell.border = Border(top=global_vars.border_APA, bottom=global_vars.border_APA) table_notes = ["**p < 0.01", "*p < {}".format(global_vars.alpha_threshold)] table_notes.append( "Correlation Coefficient used: {}".format(correlation_label)) helper_funcs.add_table_notes(ws, table_notes) helper_funcs.savefile(wb=wb)
def spss_corr_apa_table_word(mod_raw_data_df, output_df): # could very easily use the summ_corr_apa_table function here as I pass identical data - seperate is preferred, however, as might be updated/adjusted in the future correlation_label = mod_raw_data_df.iloc[0, 1] variables_list = list(mod_raw_data_df.columns)[2:] if global_vars.corr_table_triangle == "Upper triangle": header_cols = variables_list[1:] header_rows = variables_list[:-1] elif global_vars.corr_table_triangle == "Lower triangle": header_cols = variables_list[:-1] header_rows = variables_list[1:] elif global_vars.corr_table_triangle == "Both": header_cols = [ x for x in variables_list ] # not straight up variables_list; pass by value vs pass by reference header_rows = [x for x in variables_list] # this adds an empty column where the significance signs will be placed for better presentation # the code looks slightly wrong as it return a None array but it works [header_cols.insert(x, "") for x in range(1, len(header_cols) * 2, 2)] doc = Document() table_rows_len = len(header_rows) + 1 table_cols_len = len(header_cols) + 1 table = doc.add_table(rows=table_rows_len, cols=table_cols_len) for ind, var in enumerate([""] + header_cols): table.cell(row_idx=0, col_idx=ind).text = var for ind, var in enumerate([""] + header_rows): table.cell(row_idx=ind, col_idx=0).text = var if global_vars.corr_table_triangle == "Upper triangle": inside_loop_ind_start = 1 for outside_loop_ind in range(1, table_rows_len): outside_loop_var = table.cell(row_idx=outside_loop_ind, col_idx=0).text for inside_loop_ind in range(inside_loop_ind_start, table_cols_len): inside_loop_var = table.cell(row_idx=0, col_idx=inside_loop_ind).text if inside_loop_var == "" or outside_loop_var == "": # this allows to skip the columns designed for the significance signs continue else: # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val) df_filtered = output_df[( (output_df["var1"] == outside_loop_var) & (output_df["var2"] == inside_loop_var)) | ( (output_df["var1"] == inside_loop_var) & (output_df["var2"] == outside_loop_var))].iloc[0] r = df_filtered[correlation_label] p = df_filtered["adjusted_pvalues"] r = helper_funcs.correlations_format_val(r, p) if "_" in r: r, sign = r.split("_") else: sign = "" table.cell(row_idx=outside_loop_ind, col_idx=inside_loop_ind).text = r table.cell(row_idx=outside_loop_ind, col_idx=inside_loop_ind + 1).text = sign inside_loop_ind_start += 2 elif global_vars.corr_table_triangle == "Lower triangle": inside_loop_ind_start = 1 for outside_loop_ind in range(1, table_cols_len): outside_loop_var = table.cell(row_idx=0, col_idx=outside_loop_ind).text if outside_loop_var == "": continue else: for inside_loop_ind in range(inside_loop_ind_start, table_rows_len): inside_loop_var = table.cell(row_idx=inside_loop_ind, col_idx=0).text # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val) df_filtered = output_df[( (output_df["var1"] == outside_loop_var) & (output_df["var2"] == inside_loop_var)) | ( (output_df["var1"] == inside_loop_var) & (output_df["var2"] == outside_loop_var))].iloc[0] r = df_filtered[correlation_label] p = df_filtered["adjusted_pvalues"] r = helper_funcs.correlations_format_val(r, p) if "_" in r: r, sign = r.split("_") else: sign = "" table.cell(row_idx=inside_loop_ind, col_idx=outside_loop_ind).text = r table.cell(row_idx=inside_loop_ind, col_idx=outside_loop_ind + 1).text = sign inside_loop_ind_start += 1 elif global_vars.corr_table_triangle == "Both": inside_loop_ind_start = 1 for outside_loop_ind in range(1, table_cols_len): outside_loop_var = table.cell(row_idx=0, col_idx=outside_loop_ind).text if outside_loop_var == "": continue else: for inside_loop_ind in range(inside_loop_ind_start, table_rows_len): inside_loop_var = table.cell(row_idx=inside_loop_ind, col_idx=0).text if inside_loop_var == "": # this allows to skip the columns designed for the significance signs continue else: if outside_loop_var == inside_loop_var: table.cell(row_idx=inside_loop_ind, col_idx=outside_loop_ind).text = "1" else: # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val) df_filtered = output_df[( (output_df["var1"] == outside_loop_var) & (output_df["var2"] == inside_loop_var)) | ( (output_df["var1"] == inside_loop_var) & (output_df["var2"] == outside_loop_var) )].iloc[0] r = df_filtered[correlation_label] p = df_filtered["adjusted_pvalues"] r = helper_funcs.correlations_format_val(r, p) if "_" in r: r, sign = r.split("_") else: sign = "" table.cell(row_idx=inside_loop_ind, col_idx=outside_loop_ind).text = r table.cell(row_idx=inside_loop_ind, col_idx=outside_loop_ind + 1).text = sign for cell in table.rows[0].cells: helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word, bottom=global_vars.border_APA_word) for cell in table.rows[table_rows_len - 1].cells: helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word) for cell in table.rows[0].cells: helper_funcs.word_style(cell, italic=True) for cell in table.columns[0].cells: helper_funcs.word_style(cell, italic=True) for i in range(1, table_cols_len, 2): table.cell(row_idx=0, col_idx=i).merge(table.cell(row_idx=0, col_idx=i + 1)) table.cell(row_idx=0, col_idx=i).text = table.cell( row_idx=0, col_idx=i ).text[: -1] # meging cells adds a paragraph break at the end; this gets rid of it for cell in table.rows[0].cells: helper_funcs.word_style(cell, italic=True) cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER for cell in table.columns[0].cells: helper_funcs.word_style(cell, italic=True) cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER for row in range(0, table_rows_len): for cell in table.rows[row].cells: cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER cell.paragraphs[0].paragraph_format.space_after = Inches(0) cell.paragraphs[0].paragraph_format.space_before = Inches(0) cell.paragraphs[0].paragraph_format.line_spacing = 1 for row in range(1, table_rows_len): for col in range(1, table_cols_len, 2): table.cell( row_idx=row, col_idx=col ).paragraphs[0].paragraph_format.right_indent = Inches(-0.08) table.cell( row_idx=row, col_idx=col + 1).paragraphs[0].paragraph_format.left_indent = Inches(-0.06) table.cell( row_idx=row, col_idx=col).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.RIGHT table.cell(row_idx=row, col_idx=col + 1).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.LEFT doc = helper_funcs.set_autofit(doc) para = doc.add_paragraph("** ") para.add_run("p").italic = True para.add_run(" < 0.01") para.add_run().add_break() para.add_run("* ") para.add_run("p").italic = True para.add_run(" < {}".format(global_vars.alpha_threshold)) doc.add_paragraph( "Correlation Coefficient used: {}".format(correlation_label)) helper_funcs.add_correction_message_word(doc) helper_funcs.savefile(doc=doc)
def raw_corr_apa_table_noCIs_excel(mod_raw_data_df, output_df): variables_list = list( mod_raw_data_df.columns ) #local var and .columns preferred to minimize use of global scope wb = Workbook() ws = wb.active if global_vars.corr_table_triangle == "Upper triangle": header_cols = variables_list[1:] header_rows = variables_list[:-1] elif global_vars.corr_table_triangle == "Lower triangle": header_cols = variables_list[:-1] header_rows = variables_list[1:] elif global_vars.corr_table_triangle == "Both": header_cols = [x for x in variables_list] header_rows = [x for x in variables_list] ws.append([""] + header_cols) for ind, var in enumerate(header_rows): ws.cell(row=ind + 2, column=1).value = var inside_loop_ind_start = 2 for outside_loop_ind in range(2, len(header_rows) + 2): if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both": outside_loop_var = ws.cell(row=outside_loop_ind, column=1).value elif global_vars.corr_table_triangle == "Lower triangle": outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value for inside_loop_ind in range(inside_loop_ind_start, len(header_cols) + 2): if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both": inside_loop_var = ws.cell(row=1, column=inside_loop_ind).value elif global_vars.corr_table_triangle == "Lower triangle": inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value if outside_loop_var == inside_loop_var: ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = 1 else: # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val) df_filtered = output_df[( (output_df["Variable1"] == outside_loop_var) & (output_df["Variable2"] == inside_loop_var)) | ( (output_df["Variable1"] == inside_loop_var) & (output_df["Variable2"] == outside_loop_var))].iloc[0] r = df_filtered["Correlation_Coefficient"] p = df_filtered["adjusted_pvalues"] r = helper_funcs.correlations_format_val(r, p) if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both": ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = r elif global_vars.corr_table_triangle == "Lower triangle": ws.cell(row=inside_loop_ind, column=outside_loop_ind).value = r if global_vars.corr_table_triangle != "Both": inside_loop_ind_start += 1 for row in range(1, len(header_rows) + 2): for cell in ws[row]: cell.alignment = global_vars.alignment_center for cell in ws[1] + ws["A"]: cell.font = global_vars.font_header for cell in ws[1]: cell.border = Border(top=global_vars.border_APA, bottom=global_vars.border_APA) for cell in ws[len(header_rows) + 1]: cell.border = Border(bottom=global_vars.border_APA) table_notes = [ "Correlation coefficient used: {}".format( list(global_vars.master_dict.keys())[list( global_vars.master_dict.values()).index( global_vars.raw_corr_type)]) ] table_notes.append("**p < 0.01") table_notes.append("*p < {}".format(global_vars.alpha_threshold)) helper_funcs.add_table_notes(ws, table_notes) helper_funcs.savefile(wb=wb)
def raw_corr_apa_table_withCIs_word(mod_raw_data_df, output_df): variables_list = list(mod_raw_data_df.columns) if global_vars.corr_table_triangle == "Upper triangle": header_cols = variables_list[1:] header_rows = variables_list[:-1] elif global_vars.corr_table_triangle == "Lower triangle": header_cols = variables_list[:-1] header_rows = variables_list[1:] elif global_vars.corr_table_triangle == "Both": header_cols = [x for x in variables_list] header_rows = [x for x in variables_list] # this adds an empty column where the significance signs will be placed for better presentation # then adds empty rows for the CIs # the code looks slightly wrong as it return a None array but it works [header_cols.insert(x, "") for x in range(1, len(header_cols) * 2, 2)] [header_rows.insert(x, "") for x in range(1, len(header_rows) * 2, 2)] doc = Document() table_rows_len = len(header_rows) + 1 table_cols_len = len(header_cols) + 1 table = doc.add_table(rows=table_rows_len, cols=table_cols_len) for ind, var in enumerate([""] + header_cols): table.cell(row_idx=0, col_idx=ind).text = var for ind, var in enumerate([""] + header_rows): table.cell(row_idx=ind, col_idx=0).text = var if global_vars.corr_table_triangle == "Upper triangle": inside_loop_ind_start = 1 for outside_loop_ind in range(1, table_rows_len, 2): outside_loop_var = table.cell(row_idx=outside_loop_ind, col_idx=0).text for inside_loop_ind in range(inside_loop_ind_start, table_cols_len, 2): inside_loop_var = table.cell(row_idx=0, col_idx=inside_loop_ind).text # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val) df_filtered = output_df[( (output_df["Variable1"] == outside_loop_var) & (output_df["Variable2"] == inside_loop_var)) | ( (output_df["Variable1"] == inside_loop_var) & (output_df["Variable2"] == outside_loop_var))].iloc[0] r = df_filtered["Correlation_Coefficient"] p = df_filtered["adjusted_pvalues"] ci_low, ci_high = helper_funcs.correlations_format_val( df_filtered["CI_low"] ), helper_funcs.correlations_format_val(df_filtered["CI_high"]) r = helper_funcs.correlations_format_val(r, p) if "_" in r: r, sign = r.split("_") else: sign = "" table.cell(row_idx=outside_loop_ind, col_idx=inside_loop_ind).text = r table.cell(row_idx=outside_loop_ind, col_idx=inside_loop_ind + 1).text = sign table.cell(row_idx=outside_loop_ind + 1, col_idx=inside_loop_ind ).text = "[" + ci_low + ", " + ci_high + "]" inside_loop_ind_start += 2 elif global_vars.corr_table_triangle == "Lower triangle": inside_loop_ind_start = 1 for outside_loop_ind in range(1, table_cols_len, 2): outside_loop_var = table.cell(row_idx=0, col_idx=outside_loop_ind).text for inside_loop_ind in range(inside_loop_ind_start, table_rows_len, 2): inside_loop_var = table.cell(row_idx=inside_loop_ind, col_idx=0).text # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val) df_filtered = output_df[( (output_df["Variable1"] == outside_loop_var) & (output_df["Variable2"] == inside_loop_var)) | ( (output_df["Variable1"] == inside_loop_var) & (output_df["Variable2"] == outside_loop_var))].iloc[0] r = df_filtered["Correlation_Coefficient"] p = df_filtered["adjusted_pvalues"] ci_low, ci_high = helper_funcs.correlations_format_val( df_filtered["CI_low"] ), helper_funcs.correlations_format_val(df_filtered["CI_high"]) r = helper_funcs.correlations_format_val(r, p) if "_" in r: r, sign = r.split("_") else: sign = "" table.cell(row_idx=inside_loop_ind, col_idx=outside_loop_ind).text = r table.cell(row_idx=inside_loop_ind, col_idx=outside_loop_ind + 1).text = sign table.cell(row_idx=inside_loop_ind + 1, col_idx=outside_loop_ind ).text = "[" + ci_low + ", " + ci_high + "]" inside_loop_ind_start += 2 elif global_vars.corr_table_triangle == "Both": inside_loop_ind_start = 1 for outside_loop_ind in range(1, table_cols_len, 2): outside_loop_var = table.cell(row_idx=0, col_idx=outside_loop_ind).text for inside_loop_ind in range(inside_loop_ind_start, table_rows_len, 2): inside_loop_var = table.cell(row_idx=inside_loop_ind, col_idx=0).text if outside_loop_var == inside_loop_var: table.cell(row_idx=inside_loop_ind, col_idx=outside_loop_ind).text = "1" else: # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val) df_filtered = output_df[ ((output_df["Variable1"] == outside_loop_var) & (output_df["Variable2"] == inside_loop_var)) | ((output_df["Variable1"] == inside_loop_var) & (output_df["Variable2"] == outside_loop_var))].iloc[0] r = df_filtered["Correlation_Coefficient"] p = df_filtered["adjusted_pvalues"] ci_low, ci_high = helper_funcs.correlations_format_val( df_filtered["CI_low"] ), helper_funcs.correlations_format_val( df_filtered["CI_high"]) r = helper_funcs.correlations_format_val(r, p) if "_" in r: r, sign = r.split("_") else: sign = "" table.cell(row_idx=inside_loop_ind, col_idx=outside_loop_ind).text = r table.cell(row_idx=inside_loop_ind, col_idx=outside_loop_ind + 1).text = sign table.cell(row_idx=inside_loop_ind + 1, col_idx=outside_loop_ind ).text = "[" + ci_low + ", " + ci_high + "]" for cell in table.rows[0].cells: helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word, bottom=global_vars.border_APA_word) for cell in table.rows[table_rows_len - 1].cells: helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word) for i in range(1, table_cols_len, 2): table.cell(row_idx=0, col_idx=i).merge(table.cell(row_idx=0, col_idx=i + 1)) table.cell(row_idx=0, col_idx=i).text = table.cell( row_idx=0, col_idx=i ).text[: -1] # meging cells adds a paragraph break at the end; this gets rid of it for i in range(1, table_rows_len, 2): table.cell(row_idx=i, col_idx=0).merge(table.cell(row_idx=i + 1, col_idx=0)) table.cell(row_idx=i, col_idx=0).text = table.cell( row_idx=i, col_idx=0 ).text[: -1] # meging cells adds a paragraph break at the end; this gets rid of it for row in range(2, table_rows_len, 2): for col in range(1, table_cols_len, 2): table.cell(row_idx=row, col_idx=col).merge( table.cell(row_idx=row, col_idx=col + 1)) table.cell(row_idx=row, col_idx=col).alignment = WD_ALIGN_PARAGRAPH.CENTER helper_funcs.word_style(table.cell(row_idx=row, col_idx=col), size=9) for cell in table.rows[0].cells: helper_funcs.word_style(cell, italic=True) cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER cell.vertical_alignment = WD_ALIGN_VERTICAL.TOP for cell in table.columns[0].cells: helper_funcs.word_style(cell, italic=True) cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER for row in range(0, table_rows_len): for col in range(1, table_cols_len): cell = table.cell(row_idx=row, col_idx=col) if col >= 1: cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER cell.paragraphs[0].paragraph_format.space_after = Inches(0) cell.paragraphs[0].paragraph_format.space_before = Inches(0) cell.paragraphs[0].paragraph_format.line_spacing = 1 for row in range(1, table_rows_len, 2): for col in range(1, table_cols_len, 2): table.cell( row_idx=row, col_idx=col ).paragraphs[0].paragraph_format.right_indent = Inches(-0.08) table.cell( row_idx=row, col_idx=col + 1).paragraphs[0].paragraph_format.left_indent = Inches(-0.06) table.cell( row_idx=row, col_idx=col).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.RIGHT table.cell(row_idx=row, col_idx=col + 1).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.LEFT doc = helper_funcs.set_autofit(doc) para = doc.add_paragraph("** ") para.add_run("p").italic = True para.add_run(" < 0.01") para.add_run().add_break() para.add_run("* ") para.add_run("p").italic = True para.add_run(" < {}".format(global_vars.alpha_threshold)) doc.add_paragraph("Correlation coefficient used: {}".format( list(global_vars.master_dict.keys())[list( global_vars.master_dict.values()).index( global_vars.raw_corr_type)])) helper_funcs.add_correction_message_word(doc) helper_funcs.savefile(doc=doc)
def raw_corr_apa_table_withCIs_excel(mod_raw_data_df, output_df): variables_list = list( mod_raw_data_df.columns ) # local var and .columns preferred to minimize use of global scope wb = Workbook() ws = wb.active if global_vars.corr_table_triangle == "Upper triangle": header_cols = variables_list[1:] header_rows = variables_list[:-1] elif global_vars.corr_table_triangle == "Lower triangle": header_cols = variables_list[:-1] header_rows = variables_list[1:] elif global_vars.corr_table_triangle == "Both": header_cols = [x for x in variables_list] header_rows = [x for x in variables_list] # this adds an empty row where the CIs will be # the code looks slightly wrong as it return a None array but it works [header_rows.insert(x, "") for x in range(1, len(header_rows) * 2, 2)] ws.append([""] + header_cols) for ind, var in enumerate(header_rows): ws.cell(row=ind + 2, column=1).value = var if global_vars.corr_table_triangle == "Upper triangle": inside_loop_ind_start = 2 for outside_loop_ind in range(2, len(header_rows) + 2, 2): outside_loop_var = ws.cell(row=outside_loop_ind, column=1).value for inside_loop_ind in range(inside_loop_ind_start, len(header_cols) + 2): inside_loop_var = ws.cell(row=1, column=inside_loop_ind).value # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val) df_filtered = output_df[( (output_df["Variable1"] == outside_loop_var) & (output_df["Variable2"] == inside_loop_var)) | ( (output_df["Variable1"] == inside_loop_var) & (output_df["Variable2"] == outside_loop_var))].iloc[0] r = df_filtered["Correlation_Coefficient"] p = df_filtered["adjusted_pvalues"] r = helper_funcs.correlations_format_val(r, p) ci_low, ci_high = helper_funcs.correlations_format_val( df_filtered["CI_low"] ), helper_funcs.correlations_format_val(df_filtered["CI_high"]) ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = r ws.cell(row=outside_loop_ind + 1, column=inside_loop_ind ).value = "[" + ci_low + ", " + ci_high + "]" inside_loop_ind_start += 1 elif global_vars.corr_table_triangle == "Lower triangle": inside_loop_ind_start = 2 for outside_loop_ind in range(2, len(header_cols) + 2): outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value for inside_loop_ind in range(inside_loop_ind_start, len(header_rows) + 2, 2): inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val) df_filtered = output_df[( (output_df["Variable1"] == outside_loop_var) & (output_df["Variable2"] == inside_loop_var)) | ( (output_df["Variable1"] == inside_loop_var) & (output_df["Variable2"] == outside_loop_var))].iloc[0] r = df_filtered["Correlation_Coefficient"] p = df_filtered["adjusted_pvalues"] r = helper_funcs.correlations_format_val(r, p) ci_low, ci_high = helper_funcs.correlations_format_val( df_filtered["CI_low"] ), helper_funcs.correlations_format_val(df_filtered["CI_high"]) ws.cell(row=inside_loop_ind, column=outside_loop_ind).value = r ws.cell(row=inside_loop_ind + 1, column=outside_loop_ind ).value = "[" + ci_low + ", " + ci_high + "]" inside_loop_ind_start += 2 elif global_vars.corr_table_triangle == "Both": inside_loop_ind_start = 2 for outside_loop_ind in range(2, len(header_cols) + 2): outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value for inside_loop_ind in range(inside_loop_ind_start, len(header_rows) + 2, 2): inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value if outside_loop_var == inside_loop_var: ws.cell(row=inside_loop_ind, column=outside_loop_ind).value = "1" else: # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val) df_filtered = output_df[ ((output_df["Variable1"] == outside_loop_var) & (output_df["Variable2"] == inside_loop_var)) | ((output_df["Variable1"] == inside_loop_var) & (output_df["Variable2"] == outside_loop_var))].iloc[0] r = df_filtered["Correlation_Coefficient"] p = df_filtered["adjusted_pvalues"] r = helper_funcs.correlations_format_val(r, p) ci_low, ci_high = helper_funcs.correlations_format_val( df_filtered["CI_low"] ), helper_funcs.correlations_format_val( df_filtered["CI_high"]) ws.cell(row=inside_loop_ind, column=outside_loop_ind).value = r ws.cell(row=inside_loop_ind + 1, column=outside_loop_ind ).value = "[" + ci_low + ", " + ci_high + "]" for row in range(2, len(header_rows) + 1, 2): ws.merge_cells(start_row=row, start_column=1, end_row=row + 1, end_column=1) if global_vars.corr_table_triangle == "Both": for col in range(2, len(header_cols) + 2): ws.merge_cells(start_row=col * 2 - 2, start_column=col, end_row=col * 2 - 1, end_column=col) for row in range(1, len(header_rows) + 2): for cell in ws[row]: cell.alignment = global_vars.alignment_center for cell in ws[1] + ws["A"]: cell.font = global_vars.font_header for row in range(3, len(header_rows) + 2, 2): for cell in ws[row]: cell.font = Font(size=9) for cell in ws[1]: cell.border = Border(top=global_vars.border_APA, bottom=global_vars.border_APA) for cell in ws[len(header_rows) + 1]: cell.border = Border(bottom=global_vars.border_APA) table_notes = [ "Correlation coefficient used: {}".format( list(global_vars.master_dict.keys())[list( global_vars.master_dict.values()).index( global_vars.raw_corr_type)]) ] table_notes.append("**p < 0.01") table_notes.append("*p < {}".format(global_vars.alpha_threshold)) helper_funcs.add_table_notes(ws, table_notes) helper_funcs.savefile(wb=wb)
def summ_corr_apa_table_excel(mod_raw_data_df, output_df): # unique vars in var1 given by user's order and here ONLY adding vars from col 2 that are NOT in var 1 variables_list = list(output_df[global_vars.summ_corr_varOne].unique()) + list(set(output_df[global_vars.summ_corr_varTwo].unique()) - set(output_df[global_vars.summ_corr_varOne].unique())) if global_vars.corr_table_triangle == "Upper triangle": header_cols = variables_list[1:] header_rows = variables_list[:-1] elif global_vars.corr_table_triangle == "Lower triangle": header_cols = variables_list[:-1] header_rows = variables_list[1:] elif global_vars.corr_table_triangle == "Both": header_cols, header_rows = variables_list, variables_list wb = Workbook() ws = wb.active ws.append([""] + header_cols) for ind,var in enumerate(header_rows): ws.cell(row=ind+2, column=1).value = var inside_loop_ind_start = 2 for outside_loop_ind in range(2, len(header_rows) + 2): if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both": outside_loop_var = ws.cell(row=outside_loop_ind, column=1).value elif global_vars.corr_table_triangle == "Lower triangle": outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value for inside_loop_ind in range(inside_loop_ind_start, len(header_cols) + 2): if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both": inside_loop_var = ws.cell(row=1, column=inside_loop_ind).value elif global_vars.corr_table_triangle == "Lower triangle": inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value if outside_loop_var == inside_loop_var: ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = 1 else: # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val) df_filtered = output_df[((output_df[global_vars.summ_corr_varOne]==outside_loop_var) & (output_df[global_vars.summ_corr_varTwo]==inside_loop_var)) | ((output_df[global_vars.summ_corr_varOne]==inside_loop_var) & (output_df[global_vars.summ_corr_varTwo]==outside_loop_var))].iloc[0] r = df_filtered[global_vars.summ_corr_coeff] p = df_filtered["adjusted_pvalues"] r = helper_funcs.correlations_format_val(r, p) if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both": ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = r elif global_vars.corr_table_triangle == "Lower triangle": ws.cell(row=inside_loop_ind, column=outside_loop_ind).value = r if global_vars.corr_table_triangle != "Both": inside_loop_ind_start += 1 for row in range(1, len(header_rows) + 2): for cell in ws[row]: cell.alignment = global_vars.alignment_center for cell in ws[1]: cell.font = global_vars.font_header for row in range(2, len(header_rows) + 2): ws.cell(row=row, column=1).font = global_vars.font_header for cell in ws[len(header_rows) + 1]: cell.border = Border(bottom=global_vars.border_APA) for cell in ws[1]: cell.border = Border(top=global_vars.border_APA, bottom=global_vars.border_APA) table_notes = ["**p < 0.01", "*p < {}".format(global_vars.alpha_threshold)] helper_funcs.add_table_notes(ws, table_notes) helper_funcs.savefile(wb=wb)
def spss_indttest_apa_table_word(mod_raw_data_df, output_df): # very similar to raw_corr_indttest_apa_table func but separate as might be udpated/adjusted in the future output_df.drop(columns=["pvalues"], inplace=True) pd.options.mode.chained_assignment = None output_df[list(output_df.columns)[7:-1]] = output_df[list( output_df.columns)[7:-1]].applymap(lambda x: "{:.2f}".format(x)) output_df["adjusted_pvalues"] = output_df["adjusted_pvalues"].map( helper_funcs.pvalue_formatting) pd.options.mode.chained_assignment = "warn" doc = Document() table_rows_len = len(output_df) + 2 table_cols_len = len(output_df.columns) table = doc.add_table(rows=table_rows_len, cols=table_cols_len) table.cell(row_idx=0, col_idx=0).text = "Variable" table.cell(row_idx=0, col_idx=0).merge(table.cell(row_idx=1, col_idx=0)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=0), italic=True) table.cell(row_idx=0, col_idx=1).text = "All, n=?" table.cell(row_idx=0, col_idx=1).merge(table.cell(row_idx=0, col_idx=2)) if global_vars.spss_indttest_nOne != -1: n1_label = global_vars.spss_indttest_nOne else: n1_label = "?" table.cell(row_idx=0, col_idx=3).text = "{g}, n={n}".format( g=global_vars.spss_indttest_groupOneLabel, n=n1_label) table.cell(row_idx=0, col_idx=3).merge(table.cell(row_idx=0, col_idx=4)) if global_vars.spss_indttest_nOne != -1: n2_label = global_vars.spss_indttest_nTwo else: n2_label = "?" table.cell(row_idx=0, col_idx=5).text = "{g}, n={n}".format( g=global_vars.spss_indttest_groupTwoLabel, n=n2_label) table.cell(row_idx=0, col_idx=5).merge(table.cell(row_idx=0, col_idx=6)) table.cell(row_idx=0, col_idx=7).text = "df" table.cell(row_idx=0, col_idx=7).merge(table.cell(row_idx=1, col_idx=7)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=7), italic=True) table.cell(row_idx=0, col_idx=8).text = "t" table.cell(row_idx=0, col_idx=8).merge(table.cell(row_idx=1, col_idx=8)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=8), italic=True) table.cell(row_idx=0, col_idx=9).text = global_vars.effect_size_choice if global_vars.effect_size_choice != "None": # otherwise cant remove with delete columns below if merged; see helper_funcs.delete_columns_word func notes table.cell(row_idx=0, col_idx=9).merge(table.cell(row_idx=1, col_idx=9)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=9), italic=True) table.cell(row_idx=0, col_idx=10).text = "p" table.cell(row_idx=0, col_idx=10).merge(table.cell(row_idx=1, col_idx=10)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=10), italic=True) for col in range(1, 6, 2): table.cell(row_idx=1, col_idx=col).text = "M" helper_funcs.word_style(table.cell(row_idx=1, col_idx=col), italic=True) table.cell(row_idx=1, col_idx=col + 1).text = "SD" helper_funcs.word_style(table.cell(row_idx=1, col_idx=col + 1), italic=True) for row in range(2, table_rows_len): for col in range(0, table_cols_len): table.cell(row_idx=row, col_idx=col).text = output_df.iloc[row - 2, col] for row in range(0, table_rows_len): for cell in table.rows[row].cells: cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER for cell in table.rows[0].cells: helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word) for cell in table.rows[2].cells: helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word) for cell in table.rows[table_rows_len - 1].cells: helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word) if global_vars.effect_size_choice == "None": helper_funcs.delete_columns_word(table, [9]) doc = helper_funcs.set_autofit(doc) doc.add_paragraph( "Means and Standard Deviations cannot be read from the SPSS table. Please add them yourself or remove those columns if they are not needed." ) helper_funcs.add_correction_message_word(doc) helper_funcs.savefile(doc=doc)
def spss_indttest_apa_table_excel(mod_raw_data_df, output_df): output_df.drop(columns=["pvalues"], inplace=True) pd.options.mode.chained_assignment = None output_df[list(output_df.columns)[7:-1]] = output_df[list( output_df.columns)[7:-1]].applymap(lambda x: "{:.2f}".format(x)) output_df["adjusted_pvalues"] = output_df["adjusted_pvalues"].map( helper_funcs.pvalue_formatting) pd.options.mode.chained_assignment = "warn" wb = Workbook() ws = wb.active ws.cell(row=1, column=1).value = "Variable" ws.merge_cells('A1:A2') ws.cell(row=1, column=1).font = global_vars.font_header ws.cell(row=1, column=2).value = "All, n=?" ws.merge_cells('B1:C1') if global_vars.spss_indttest_nOne != -1: n1_label = global_vars.spss_indttest_nOne else: n1_label = "?" ws.cell(row=1, column=4).value = "{g}, n={n}".format( g=global_vars.spss_indttest_groupOneLabel, n=n1_label) ws.merge_cells('D1:E1') if global_vars.spss_indttest_nOne != -1: n2_label = global_vars.spss_indttest_nTwo else: n2_label = "?" ws.cell(row=1, column=6).value = "{g}, n={n}".format( g=global_vars.spss_indttest_groupTwoLabel, n=n2_label) ws.merge_cells('F1:G1') ws.cell(row=1, column=8).value = "df" ws.merge_cells('H1:H2') ws.cell(row=1, column=8).font = global_vars.font_header ws.cell(row=1, column=9).value = "t" ws.merge_cells('I1:I2') ws.cell(row=1, column=9).font = global_vars.font_header ws.cell(row=1, column=10).value = global_vars.effect_size_choice ws.merge_cells('J1:J2') ws.cell(row=1, column=10).font = global_vars.font_header ws.cell(row=1, column=11).value = "p" ws.merge_cells('K1:K2') ws.cell(row=1, column=11).font = global_vars.font_header for col in range(2, 7, 2): ws.cell(row=2, column=col).value = "M" ws.cell(row=2, column=col).font = global_vars.font_header ws.cell(row=2, column=col + 1).value = "SD" ws.cell(row=2, column=col + 1).font = global_vars.font_header for row in dataframe_to_rows(output_df, index=False, header=False): ws.append(row) for row in range(1, len(output_df) + 3): for cell in ws[row]: cell.alignment = global_vars.alignment_center for cell in ws[1]: cell.border = Border(top=global_vars.border_APA) for cell in ws[2] + ws[len(output_df) + 2]: cell.border = Border(bottom=global_vars.border_APA) if global_vars.effect_size_choice == "None": ws.delete_cols(10) table_notes = [ "Means and Standard Deviations cannot be read from the SPSS table. Please add them yourself or remove those columns if they are not needed." ] helper_funcs.add_table_notes(ws, table_notes) helper_funcs.savefile(wb=wb)
def raw_indttest_apa_table_excel(mod_raw_data_df, output_df): apa_table_df = output_df[[ "Variable", "All_Mean", "All_SD", global_vars.raw_indttest_grouplevel1 + "_Mean", global_vars.raw_indttest_grouplevel1 + "_SD", global_vars.raw_indttest_grouplevel2 + "_Mean", global_vars.raw_indttest_grouplevel2 + "_SD", "Degrees_of_Freedom", "t", global_vars.effect_size_choice, "adjusted_pvalues" ]] # the two operations below are correct so the SettingWithCopyWarning pandas error is supressed temporarily pd.options.mode.chained_assignment = None apa_table_df[list(apa_table_df.columns)[1:-1]] = apa_table_df[list( apa_table_df.columns)[1:-1]].applymap(lambda x: "{:.2f}".format(x)) apa_table_df["adjusted_pvalues"] = apa_table_df["adjusted_pvalues"].map( helper_funcs.pvalue_formatting) pd.options.mode.chained_assignment = "warn" wb = Workbook() ws = wb.active ws.cell(row=1, column=1).value = "Variable" ws.merge_cells('A1:A2') ws.cell(row=1, column=1).font = global_vars.font_header ws.cell(row=1, column=2).value = "All, n={}".format(output_df.iloc[0, 1]) ws.merge_cells('B1:C1') ws.cell(row=1, column=4).value = "{g}, n={n}".format( g=global_vars.raw_indttest_grouplevel1, n=output_df.iloc[0, 4]) ws.merge_cells('D1:E1') ws.cell(row=1, column=6).value = "{g}, n={n}".format( g=global_vars.raw_indttest_grouplevel2, n=output_df.iloc[0, 7]) ws.merge_cells('F1:G1') ws.cell(row=1, column=8).value = "df" ws.merge_cells('H1:H2') ws.cell(row=1, column=8).font = global_vars.font_header ws.cell(row=1, column=9).value = "t" ws.merge_cells('I1:I2') ws.cell(row=1, column=9).font = global_vars.font_header ws.cell(row=1, column=10).value = global_vars.effect_size_choice ws.merge_cells('J1:J2') ws.cell(row=1, column=10).font = global_vars.font_header ws.cell(row=1, column=11).value = "p" ws.merge_cells('K1:K2') ws.cell(row=1, column=11).font = global_vars.font_header for col in range(2, 7, 2): ws.cell(row=2, column=col).value = "M" ws.cell(row=2, column=col).font = global_vars.font_header ws.cell(row=2, column=col + 1).value = "SD" ws.cell(row=2, column=col + 1).font = global_vars.font_header for row in dataframe_to_rows(apa_table_df, index=False, header=False): ws.append(row) for row in range(1, len(apa_table_df) + 3): for cell in ws[row]: cell.alignment = global_vars.alignment_center for cell in ws[1]: cell.border = Border(top=global_vars.border_APA) for cell in ws[2] + ws[len(apa_table_df) + 2]: cell.border = Border(bottom=global_vars.border_APA) if global_vars.effect_size_choice == "None": ws.delete_cols(10) helper_funcs.add_table_notes(ws, []) helper_funcs.savefile(wb=wb)
def raw_indttest_apa_table_word(mod_raw_data_df, output_df): apa_table_df = output_df[[ "Variable", "All_Mean", "All_SD", global_vars.raw_indttest_grouplevel1 + "_Mean", global_vars.raw_indttest_grouplevel1 + "_SD", global_vars.raw_indttest_grouplevel2 + "_Mean", global_vars.raw_indttest_grouplevel2 + "_SD", "Degrees_of_Freedom", "t", global_vars.effect_size_choice, "adjusted_pvalues" ]] # the two operations below are correct so the SettingWithCopyWarning pandas error is supressed temporarily pd.options.mode.chained_assignment = None apa_table_df[list(apa_table_df.columns)[1:-1]] = apa_table_df[list( apa_table_df.columns)[1:-1]].applymap(lambda x: "{:.2f}".format(x)) apa_table_df["adjusted_pvalues"] = apa_table_df["adjusted_pvalues"].map( helper_funcs.pvalue_formatting) pd.options.mode.chained_assignment = "warn" doc = Document() table_rows_len = len(apa_table_df) + 2 table_cols_len = len(apa_table_df.columns) table = doc.add_table(rows=table_rows_len, cols=table_cols_len) table.cell(row_idx=0, col_idx=0).text = "Variable" table.cell(row_idx=0, col_idx=0).merge(table.cell(row_idx=1, col_idx=0)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=0), italic=True) table.cell(row_idx=0, col_idx=1).text = "All, n={}".format(output_df.iloc[0, 1]) table.cell(row_idx=0, col_idx=1).merge(table.cell(row_idx=0, col_idx=2)) table.cell(row_idx=0, col_idx=3).text = "{g}, n={n}".format( g=global_vars.raw_indttest_grouplevel1, n=output_df.iloc[0, 4]) table.cell(row_idx=0, col_idx=3).merge(table.cell(row_idx=0, col_idx=4)) table.cell(row_idx=0, col_idx=5).text = "{g}, n={n}".format( g=global_vars.raw_indttest_grouplevel2, n=output_df.iloc[0, 7]) table.cell(row_idx=0, col_idx=5).merge(table.cell(row_idx=0, col_idx=6)) table.cell(row_idx=0, col_idx=7).text = "df" table.cell(row_idx=0, col_idx=7).merge(table.cell(row_idx=1, col_idx=7)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=7), italic=True) table.cell(row_idx=0, col_idx=8).text = "t" table.cell(row_idx=0, col_idx=8).merge(table.cell(row_idx=1, col_idx=8)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=8), italic=True) table.cell(row_idx=0, col_idx=9).text = global_vars.effect_size_choice if global_vars.effect_size_choice != "None": # otherwise cant remove with delete columns below if merged; see helper_funcs.delete_columns_word func notes table.cell(row_idx=0, col_idx=9).merge(table.cell(row_idx=1, col_idx=9)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=9), italic=True) table.cell(row_idx=0, col_idx=10).text = "p" table.cell(row_idx=0, col_idx=10).merge(table.cell(row_idx=1, col_idx=10)) helper_funcs.word_style(table.cell(row_idx=0, col_idx=10), italic=True) for col in range(1, 6, 2): table.cell(row_idx=1, col_idx=col).text = "M" helper_funcs.word_style(table.cell(row_idx=1, col_idx=col), italic=True) table.cell(row_idx=1, col_idx=col + 1).text = "SD" helper_funcs.word_style(table.cell(row_idx=1, col_idx=col + 1), italic=True) for row in range(2, table_rows_len): for col in range(0, table_cols_len): table.cell(row_idx=row, col_idx=col).text = apa_table_df.iloc[row - 2, col] for row in range(0, table_rows_len): for cell in table.rows[row].cells: cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER for cell in table.rows[0].cells: helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word) for cell in table.rows[2].cells: helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word) for cell in table.rows[table_rows_len - 1].cells: helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word) if global_vars.effect_size_choice == "None": helper_funcs.delete_columns_word(table, [9]) doc = helper_funcs.set_autofit(doc) helper_funcs.add_correction_message_word(doc) helper_funcs.savefile(doc=doc)