Exemple #1
0
def raw_mr_apa_table_word(mod_raw_data_df, output_df):
    mod_output_df = output_df[[
        "Variable", "B", "95% CI B", "beta", "t", "adjusted_pvalues"
    ]]

    pd.options.mode.chained_assignment = None
    mod_output_df[["B", "beta", "t"]] = mod_output_df[[
        "B", "beta", "t"
    ]].applymap(lambda x: "{:.2f}".format(x))

    mod_output_df["adjusted_pvalues"] = mod_output_df["adjusted_pvalues"].map(
        helper_funcs.pvalue_formatting)

    mod_output_df.rename(columns={
        "95% CI B": "95% CI",
        "adjusted_pvalues": "p"
    },
                         inplace=True)
    mod_output_df.loc[
        0,
        "beta"] = ""  # removes the beta value for constant as it is always 0

    doc = Document()
    table_rows_len = len(mod_output_df) + 1
    table_cols_len = len(mod_output_df.columns)
    table = doc.add_table(rows=table_rows_len, cols=table_cols_len)

    for ind, var in enumerate(mod_output_df.columns):
        table.cell(row_idx=0, col_idx=ind).text = var

    for row in range(1, table_rows_len):
        for col in range(0, table_cols_len):
            table.cell(row_idx=row,
                       col_idx=col).text = mod_output_df.iloc[row - 1, col]

    for cell in table.rows[0].cells:
        helper_funcs.word_style(cell, italic=True)

    for cell in table.rows[0].cells:
        helper_funcs.set_cell_border(cell,
                                     top=global_vars.border_APA_word,
                                     bottom=global_vars.border_APA_word)
    for cell in table.rows[table_rows_len - 1].cells:
        helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word)

    for row in range(0, table_rows_len):
        for cell in table.rows[row].cells:
            cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
            cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

    doc = helper_funcs.set_autofit(doc)

    doc.add_paragraph("R squared adjusted = {R}".format(
        R="{:.2f}".format(output_df["R2adj"][0])))
    doc.add_paragraph(
        "Dependent Variable: {DV}".format(DV=global_vars.raw_mr_outcomevar))
    helper_funcs.add_correction_message_word(doc)

    helper_funcs.savefile(doc=doc)
Exemple #2
0
def raw_mr_apa_table_excel(mod_raw_data_df, output_df):
    mod_output_df = output_df[[
        "Variable", "B", "95% CI B", "beta", "t", "adjusted_pvalues"
    ]]

    pd.options.mode.chained_assignment = None
    mod_output_df[["B", "beta", "t"]] = mod_output_df[[
        "B", "beta", "t"
    ]].applymap(lambda x: "{:.2f}".format(x))

    mod_output_df["adjusted_pvalues"] = mod_output_df["adjusted_pvalues"].map(
        helper_funcs.pvalue_formatting)

    mod_output_df.rename(columns={
        "95% CI B": "95% CI",
        "adjusted_pvalues": "p"
    },
                         inplace=True)
    mod_output_df.loc[
        0,
        "beta"] = ""  # removes the beta value for constant as it is always 0

    wb = Workbook()
    ws = wb.active

    ws.append(list(mod_output_df.columns))

    for row in dataframe_to_rows(mod_output_df, index=False, header=False):
        ws.append(row)

    for cell in ws[1]:
        cell.font = global_vars.font_header

    for cell in ws[1] + ws[len(mod_output_df) + 1]:
        cell.border = Border(top=global_vars.border_APA,
                             bottom=global_vars.border_APA)
    for cell in ws[len(mod_output_df) + 1]:
        cell.border = Border(bottom=global_vars.border_APA)

    for row in range(1, len(mod_output_df) + 2):
        for cell in ws[row]:
            cell.alignment = global_vars.alignment_center

    table_notes = [
        "R squared adjusted = {R}".format(
            R="{:.2f}".format(output_df["R2adj"][0])),
        "Dependent Variable: {DV}".format(DV=global_vars.raw_mr_outcomevar)
    ]
    helper_funcs.add_table_notes(ws, table_notes)

    helper_funcs.savefile(wb=wb)
Exemple #3
0
def spss_mr_apa_table_word(mod_raw_data_df, output_df):
    output_df.drop(columns=["pvalues"], inplace=True)
    pd.options.mode.chained_assignment = None
    output_df["adjusted_pvalues"] = output_df["adjusted_pvalues"].map(
        helper_funcs.pvalue_formatting)
    output_df.rename(columns={"adjusted_pvalues": "p"}, inplace=True)

    doc = Document()

    table_rows_len = len(output_df) + 1
    table_cols_len = len(output_df.columns)
    table = doc.add_table(rows=table_rows_len, cols=table_cols_len)

    for ind, var in enumerate(output_df.columns):
        table.cell(row_idx=0, col_idx=ind).text = var

    for row in range(1, table_rows_len):
        for col in range(0, table_cols_len):
            table.cell(row_idx=row, col_idx=col).text = output_df.iloc[row - 1,
                                                                       col]

    for cell in table.rows[0].cells:
        helper_funcs.word_style(cell, italic=True)

    for row in range(0, table_rows_len):
        for cell in table.rows[row].cells:
            cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
            cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

    for cell in table.rows[0].cells:
        helper_funcs.set_cell_border(cell,
                                     top=global_vars.border_APA_word,
                                     bottom=global_vars.border_APA_word)
    for cell in table.rows[table_rows_len - 1].cells:
        helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word)

    doc = helper_funcs.set_autofit(doc)

    DV_cell = mod_raw_data_df[mod_raw_data_df.columns[0]][
        len(mod_raw_data_df[mod_raw_data_df.columns[0]]) - 1]
    DV = DV_cell[DV_cell.find(":") + 2:]
    doc.add_paragraph("R squared adjusted = X.XX")
    doc.add_paragraph("Dependent Variable: {}".format(DV))
    if output_df["95% CI"][0] == "[,]":
        doc.add_paragraph(
            "Confidence intervals were not found in the SPSS table. Please add them to your SPSS table and re-run the program or add them manually."
        )
    helper_funcs.add_correction_message_word(doc)

    helper_funcs.savefile(doc=doc)
Exemple #4
0
def spss_mr_apa_table_excel(mod_raw_data_df, output_df):
    output_df.drop(columns=["pvalues"], inplace=True)
    pd.options.mode.chained_assignment = None
    output_df["adjusted_pvalues"] = output_df["adjusted_pvalues"].map(
        helper_funcs.pvalue_formatting)
    output_df.rename(columns={"adjusted_pvalues": "p"}, inplace=True)

    wb = Workbook()
    ws = wb.active

    for row in dataframe_to_rows(output_df, index=False, header=True):
        ws.append(row)

    for cell in ws[1]:
        cell.font = global_vars.font_header

    for row in range(1, len(output_df) + 2):
        for cell in ws[row]:
            cell.alignment = global_vars.alignment_center

    for cell in ws[1]:
        cell.border = Border(bottom=global_vars.border_APA,
                             top=global_vars.border_APA)

    for cell in ws[len(output_df) + 1]:
        cell.border = Border(bottom=global_vars.border_APA)

    DV_cell = mod_raw_data_df[mod_raw_data_df.columns[0]][
        len(mod_raw_data_df[mod_raw_data_df.columns[0]]) - 1]
    DV = DV_cell[DV_cell.find(":") + 2:]
    table_notes = [
        "R squared adjusted = X.XX", "Dependent variable: {}".format(DV)
    ]
    if output_df["95% CI"][0] == "[,]":
        table_notes.append(
            "Confidence intervals were not found in the SPSS table. Please add them to your SPSS table and re-run the program or add them manually."
        )

    helper_funcs.add_table_notes(ws, table_notes)

    helper_funcs.savefile(wb=wb)
Exemple #5
0
def raw_pairttest_apa_table_excel(mod_raw_data_df, output_df):
    output_df.drop(columns=["pvalues"], inplace=True)
    apa_table_df = output_df[[
        "Variable", "Time1_Mean", "Time1_SD", "Time2_Mean", "Time2_SD",
        "Degrees of Freedom", "t", global_vars.effect_size_choice,
        "adjusted_pvalues"
    ]]

    pd.options.mode.chained_assignment = None
    apa_table_df[list(apa_table_df.columns)[1:-1]] = apa_table_df[list(
        apa_table_df.columns)[1:-1]].applymap(lambda x: "{:.2f}".format(x))

    apa_table_df["adjusted_pvalues"] = apa_table_df["adjusted_pvalues"].map(
        helper_funcs.pvalue_formatting)
    pd.options.mode.chained_assignment = "warn"

    wb = Workbook()
    ws = wb.active

    ws.cell(row=1, column=1).value = "Variable"
    ws.merge_cells("A1:A2")
    ws.cell(row=1, column=1).font = global_vars.font_header

    ws.cell(row=1, column=2).value = "Time 1"
    ws.merge_cells("B1:C1")

    ws.cell(row=1, column=4).value = "Time 2"
    ws.merge_cells("D1:E1")

    ws.cell(row=1, column=6).value = "df"
    ws.merge_cells("F1:F2")
    ws.cell(row=1, column=6).font = global_vars.font_header

    ws.cell(row=1, column=7).value = "t"
    ws.merge_cells("G1:G2")
    ws.cell(row=1, column=7).font = global_vars.font_header

    ws.cell(row=1, column=8).value = global_vars.effect_size_choice
    ws.merge_cells("H1:H2")
    ws.cell(row=1, column=8).font = global_vars.font_header

    ws.cell(row=1, column=9).value = "p"
    ws.merge_cells("I1:I2")
    ws.cell(row=1, column=9).font = global_vars.font_header

    for col in range(2, 5, 2):
        ws.cell(row=2, column=col).value = "M"
        ws.cell(row=2, column=col).font = global_vars.font_header
        ws.cell(row=2, column=col + 1).value = "SD"
        ws.cell(row=2, column=col + 1).font = global_vars.font_header

    for row in dataframe_to_rows(apa_table_df, index=False, header=False):
        ws.append(row)

    for row in range(1, len(apa_table_df) + 3):
        for cell in ws[row]:
            cell.alignment = global_vars.alignment_center

    for cell in ws[2] + ws[len(apa_table_df) + 2]:
        cell.border = Border(bottom=global_vars.border_APA)

    for cell in ws[1]:
        cell.border = Border(top=global_vars.border_APA)

    if global_vars.effect_size_choice == "None":
        ws.delete_cols(8)

    helper_funcs.add_table_notes(ws, [])

    helper_funcs.savefile(wb=wb)
Exemple #6
0
def raw_pairttest_apa_table_word(mod_raw_data_df, output_df):
    output_df.drop(columns=["pvalues"], inplace=True)
    apa_table_df = output_df[[
        "Variable", "Time1_Mean", "Time1_SD", "Time2_Mean", "Time2_SD",
        "Degrees of Freedom", "t", global_vars.effect_size_choice,
        "adjusted_pvalues"
    ]]

    pd.options.mode.chained_assignment = None
    apa_table_df[list(apa_table_df.columns)[1:-1]] = apa_table_df[list(
        apa_table_df.columns)[1:-1]].applymap(lambda x: "{:.2f}".format(x))

    apa_table_df["adjusted_pvalues"] = apa_table_df["adjusted_pvalues"].map(
        helper_funcs.pvalue_formatting)
    pd.options.mode.chained_assignment = "warn"

    doc = Document()
    table_rows_len = len(apa_table_df) + 2
    table_cols_len = len(apa_table_df.columns)
    table = doc.add_table(rows=table_rows_len, cols=table_cols_len)

    table.cell(row_idx=0, col_idx=0).text = "Variable"
    table.cell(row_idx=0, col_idx=0).merge(table.cell(row_idx=1, col_idx=0))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=0), italic=True)

    table.cell(row_idx=0, col_idx=1).text = "Time 1"
    table.cell(row_idx=0, col_idx=1).merge(table.cell(row_idx=0, col_idx=2))

    table.cell(row_idx=0, col_idx=3).text = "Time 2"
    table.cell(row_idx=0, col_idx=3).merge(table.cell(row_idx=0, col_idx=4))

    table.cell(row_idx=0, col_idx=5).text = "df"
    table.cell(row_idx=0, col_idx=5).merge(table.cell(row_idx=1, col_idx=5))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=5), italic=True)

    table.cell(row_idx=0, col_idx=6).text = "t"
    table.cell(row_idx=0, col_idx=6).merge(table.cell(row_idx=1, col_idx=6))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=6), italic=True)

    table.cell(row_idx=0, col_idx=7).text = global_vars.effect_size_choice
    if global_vars.effect_size_choice != "None":  # otherwise cant remove with delete columns below if merged; see helper_funcs.delete_columns_word func notes
        table.cell(row_idx=0, col_idx=7).merge(table.cell(row_idx=1,
                                                          col_idx=7))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=7), italic=True)

    table.cell(row_idx=0, col_idx=8).text = "p"
    table.cell(row_idx=0, col_idx=8).merge(table.cell(row_idx=1, col_idx=8))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=8), italic=True)

    for col in range(1, 4, 2):
        table.cell(row_idx=1, col_idx=col).text = "M"
        helper_funcs.word_style(table.cell(row_idx=1, col_idx=col),
                                italic=True)
        table.cell(row_idx=1, col_idx=col + 1).text = "SD"
        helper_funcs.word_style(table.cell(row_idx=1, col_idx=col + 1),
                                italic=True)

    for row in range(2, table_rows_len):
        for col in range(0, table_cols_len):
            table.cell(row_idx=row,
                       col_idx=col).text = apa_table_df.iloc[row - 2, col]

    for row in range(0, table_rows_len):
        for cell in table.rows[row].cells:
            cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
            cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

    for cell in table.rows[0].cells:
        helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word)
    for cell in table.rows[2].cells:
        helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word)
    for cell in table.rows[table_rows_len - 1].cells:
        helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word)

    if global_vars.effect_size_choice == "None":
        helper_funcs.delete_columns_word(table, [7])

    doc = helper_funcs.set_autofit(doc)

    helper_funcs.add_correction_message_word(doc)

    helper_funcs.savefile(doc=doc)
def spss_corr_apa_table_excel(mod_raw_data_df, output_df):
    # could very easily use the summ_corr_apa_table function here as I pass identical data - seperate is preferred, however, as might be updated/adjusted in the future
    correlation_label = mod_raw_data_df.iloc[0, 1]
    variables_list = list(mod_raw_data_df.columns)[2:]

    if global_vars.corr_table_triangle == "Upper triangle":
        header_cols = variables_list[1:]
        header_rows = variables_list[:-1]
    elif global_vars.corr_table_triangle == "Lower triangle":
        header_cols = variables_list[:-1]
        header_rows = variables_list[1:]
    elif global_vars.corr_table_triangle == "Both":
        header_cols, header_rows = variables_list, variables_list

    wb = Workbook()
    ws = wb.active

    ws.append([""] + header_cols)
    for ind, var in enumerate(header_rows):
        ws.cell(row=ind + 2, column=1).value = var

    inside_loop_ind_start = 2
    for outside_loop_ind in range(2, len(header_rows) + 2):

        if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
            outside_loop_var = ws.cell(row=outside_loop_ind, column=1).value
        elif global_vars.corr_table_triangle == "Lower triangle":
            outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value

        for inside_loop_ind in range(inside_loop_ind_start,
                                     len(header_cols) + 2):

            if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
                inside_loop_var = ws.cell(row=1, column=inside_loop_ind).value
            elif global_vars.corr_table_triangle == "Lower triangle":
                inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value

            if outside_loop_var == inside_loop_var:
                ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = 1
            else:
                # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                df_filtered = output_df[(
                    (output_df["var1"] == outside_loop_var) &
                    (output_df["var2"] == inside_loop_var)) | (
                        (output_df["var1"] == inside_loop_var) &
                        (output_df["var2"] == outside_loop_var))].iloc[0]
                r = df_filtered[correlation_label]
                p = df_filtered["adjusted_pvalues"]
                r = helper_funcs.correlations_format_val(r, p)

                if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
                    ws.cell(row=outside_loop_ind,
                            column=inside_loop_ind).value = r
                elif global_vars.corr_table_triangle == "Lower triangle":
                    ws.cell(row=inside_loop_ind,
                            column=outside_loop_ind).value = r

        if global_vars.corr_table_triangle != "Both":
            inside_loop_ind_start += 1

    for row in range(1, len(header_rows) + 2):
        for cell in ws[row]:
            cell.alignment = global_vars.alignment_center

    for cell in ws[1]:
        cell.font = global_vars.font_header

    for row in range(2, len(header_rows) + 2):
        ws.cell(row=row, column=1).font = global_vars.font_header

    for cell in ws[len(header_rows) + 1]:
        cell.border = Border(bottom=global_vars.border_APA)

    for cell in ws[1]:
        cell.border = Border(top=global_vars.border_APA,
                             bottom=global_vars.border_APA)

    table_notes = ["**p < 0.01", "*p < {}".format(global_vars.alpha_threshold)]
    table_notes.append(
        "Correlation Coefficient used: {}".format(correlation_label))
    helper_funcs.add_table_notes(ws, table_notes)

    helper_funcs.savefile(wb=wb)
def spss_corr_apa_table_word(mod_raw_data_df, output_df):
    # could very easily use the summ_corr_apa_table function here as I pass identical data - seperate is preferred, however, as might be updated/adjusted in the future
    correlation_label = mod_raw_data_df.iloc[0, 1]
    variables_list = list(mod_raw_data_df.columns)[2:]

    if global_vars.corr_table_triangle == "Upper triangle":
        header_cols = variables_list[1:]
        header_rows = variables_list[:-1]
    elif global_vars.corr_table_triangle == "Lower triangle":
        header_cols = variables_list[:-1]
        header_rows = variables_list[1:]
    elif global_vars.corr_table_triangle == "Both":
        header_cols = [
            x for x in variables_list
        ]  # not straight up variables_list; pass by value vs pass by reference
        header_rows = [x for x in variables_list]
    # this adds an empty column where the significance signs will be placed for better presentation
    # the code looks slightly wrong as it return a None array but it works
    [header_cols.insert(x, "") for x in range(1, len(header_cols) * 2, 2)]

    doc = Document()
    table_rows_len = len(header_rows) + 1
    table_cols_len = len(header_cols) + 1
    table = doc.add_table(rows=table_rows_len, cols=table_cols_len)

    for ind, var in enumerate([""] + header_cols):
        table.cell(row_idx=0, col_idx=ind).text = var
    for ind, var in enumerate([""] + header_rows):
        table.cell(row_idx=ind, col_idx=0).text = var

    if global_vars.corr_table_triangle == "Upper triangle":
        inside_loop_ind_start = 1
        for outside_loop_ind in range(1, table_rows_len):
            outside_loop_var = table.cell(row_idx=outside_loop_ind,
                                          col_idx=0).text
            for inside_loop_ind in range(inside_loop_ind_start,
                                         table_cols_len):
                inside_loop_var = table.cell(row_idx=0,
                                             col_idx=inside_loop_ind).text
                if inside_loop_var == "" or outside_loop_var == "":  # this allows to skip the columns designed for the significance signs
                    continue
                else:
                    # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                    df_filtered = output_df[(
                        (output_df["var1"] == outside_loop_var) &
                        (output_df["var2"] == inside_loop_var)) | (
                            (output_df["var1"] == inside_loop_var) &
                            (output_df["var2"] == outside_loop_var))].iloc[0]
                    r = df_filtered[correlation_label]
                    p = df_filtered["adjusted_pvalues"]
                    r = helper_funcs.correlations_format_val(r, p)
                    if "_" in r:
                        r, sign = r.split("_")
                    else:
                        sign = ""
                    table.cell(row_idx=outside_loop_ind,
                               col_idx=inside_loop_ind).text = r
                    table.cell(row_idx=outside_loop_ind,
                               col_idx=inside_loop_ind + 1).text = sign
            inside_loop_ind_start += 2
    elif global_vars.corr_table_triangle == "Lower triangle":
        inside_loop_ind_start = 1
        for outside_loop_ind in range(1, table_cols_len):
            outside_loop_var = table.cell(row_idx=0,
                                          col_idx=outside_loop_ind).text
            if outside_loop_var == "":
                continue
            else:
                for inside_loop_ind in range(inside_loop_ind_start,
                                             table_rows_len):
                    inside_loop_var = table.cell(row_idx=inside_loop_ind,
                                                 col_idx=0).text
                    # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                    df_filtered = output_df[(
                        (output_df["var1"] == outside_loop_var) &
                        (output_df["var2"] == inside_loop_var)) | (
                            (output_df["var1"] == inside_loop_var) &
                            (output_df["var2"] == outside_loop_var))].iloc[0]
                    r = df_filtered[correlation_label]
                    p = df_filtered["adjusted_pvalues"]
                    r = helper_funcs.correlations_format_val(r, p)
                    if "_" in r:
                        r, sign = r.split("_")
                    else:
                        sign = ""
                    table.cell(row_idx=inside_loop_ind,
                               col_idx=outside_loop_ind).text = r
                    table.cell(row_idx=inside_loop_ind,
                               col_idx=outside_loop_ind + 1).text = sign
                inside_loop_ind_start += 1
    elif global_vars.corr_table_triangle == "Both":
        inside_loop_ind_start = 1
        for outside_loop_ind in range(1, table_cols_len):
            outside_loop_var = table.cell(row_idx=0,
                                          col_idx=outside_loop_ind).text
            if outside_loop_var == "":
                continue
            else:
                for inside_loop_ind in range(inside_loop_ind_start,
                                             table_rows_len):
                    inside_loop_var = table.cell(row_idx=inside_loop_ind,
                                                 col_idx=0).text
                    if inside_loop_var == "":  # this allows to skip the columns designed for the significance signs
                        continue
                    else:
                        if outside_loop_var == inside_loop_var:
                            table.cell(row_idx=inside_loop_ind,
                                       col_idx=outside_loop_ind).text = "1"
                        else:
                            # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                            df_filtered = output_df[(
                                (output_df["var1"] == outside_loop_var) &
                                (output_df["var2"] == inside_loop_var)) | (
                                    (output_df["var1"] == inside_loop_var) &
                                    (output_df["var2"] == outside_loop_var)
                                )].iloc[0]
                            r = df_filtered[correlation_label]
                            p = df_filtered["adjusted_pvalues"]
                            r = helper_funcs.correlations_format_val(r, p)
                            if "_" in r:
                                r, sign = r.split("_")
                            else:
                                sign = ""
                            table.cell(row_idx=inside_loop_ind,
                                       col_idx=outside_loop_ind).text = r
                            table.cell(row_idx=inside_loop_ind,
                                       col_idx=outside_loop_ind +
                                       1).text = sign

    for cell in table.rows[0].cells:
        helper_funcs.set_cell_border(cell,
                                     top=global_vars.border_APA_word,
                                     bottom=global_vars.border_APA_word)
    for cell in table.rows[table_rows_len - 1].cells:
        helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word)

    for cell in table.rows[0].cells:
        helper_funcs.word_style(cell, italic=True)
    for cell in table.columns[0].cells:
        helper_funcs.word_style(cell, italic=True)

    for i in range(1, table_cols_len, 2):
        table.cell(row_idx=0,
                   col_idx=i).merge(table.cell(row_idx=0, col_idx=i + 1))
        table.cell(row_idx=0, col_idx=i).text = table.cell(
            row_idx=0, col_idx=i
        ).text[:
               -1]  # meging cells adds a paragraph break at the end; this gets rid of it

    for cell in table.rows[0].cells:
        helper_funcs.word_style(cell, italic=True)
        cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
    for cell in table.columns[0].cells:
        helper_funcs.word_style(cell, italic=True)
        cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

    for row in range(0, table_rows_len):
        for cell in table.rows[row].cells:
            cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
            cell.paragraphs[0].paragraph_format.space_after = Inches(0)
            cell.paragraphs[0].paragraph_format.space_before = Inches(0)
            cell.paragraphs[0].paragraph_format.line_spacing = 1

    for row in range(1, table_rows_len):
        for col in range(1, table_cols_len, 2):
            table.cell(
                row_idx=row, col_idx=col
            ).paragraphs[0].paragraph_format.right_indent = Inches(-0.08)
            table.cell(
                row_idx=row, col_idx=col +
                1).paragraphs[0].paragraph_format.left_indent = Inches(-0.06)
            table.cell(
                row_idx=row,
                col_idx=col).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.RIGHT
            table.cell(row_idx=row, col_idx=col +
                       1).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.LEFT

    doc = helper_funcs.set_autofit(doc)

    para = doc.add_paragraph("** ")
    para.add_run("p").italic = True
    para.add_run(" < 0.01")
    para.add_run().add_break()
    para.add_run("* ")
    para.add_run("p").italic = True
    para.add_run(" < {}".format(global_vars.alpha_threshold))
    doc.add_paragraph(
        "Correlation Coefficient used: {}".format(correlation_label))
    helper_funcs.add_correction_message_word(doc)

    helper_funcs.savefile(doc=doc)
Exemple #9
0
def raw_corr_apa_table_noCIs_excel(mod_raw_data_df, output_df):
    variables_list = list(
        mod_raw_data_df.columns
    )  #local var and .columns preferred to minimize use of global scope

    wb = Workbook()
    ws = wb.active

    if global_vars.corr_table_triangle == "Upper triangle":
        header_cols = variables_list[1:]
        header_rows = variables_list[:-1]
    elif global_vars.corr_table_triangle == "Lower triangle":
        header_cols = variables_list[:-1]
        header_rows = variables_list[1:]
    elif global_vars.corr_table_triangle == "Both":
        header_cols = [x for x in variables_list]
        header_rows = [x for x in variables_list]

    ws.append([""] + header_cols)
    for ind, var in enumerate(header_rows):
        ws.cell(row=ind + 2, column=1).value = var

    inside_loop_ind_start = 2
    for outside_loop_ind in range(2, len(header_rows) + 2):

        if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
            outside_loop_var = ws.cell(row=outside_loop_ind, column=1).value
        elif global_vars.corr_table_triangle == "Lower triangle":
            outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value

        for inside_loop_ind in range(inside_loop_ind_start,
                                     len(header_cols) + 2):

            if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
                inside_loop_var = ws.cell(row=1, column=inside_loop_ind).value
            elif global_vars.corr_table_triangle == "Lower triangle":
                inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value

            if outside_loop_var == inside_loop_var:
                ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = 1
            else:
                # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                df_filtered = output_df[(
                    (output_df["Variable1"] == outside_loop_var) &
                    (output_df["Variable2"] == inside_loop_var)) | (
                        (output_df["Variable1"] == inside_loop_var) &
                        (output_df["Variable2"] == outside_loop_var))].iloc[0]
                r = df_filtered["Correlation_Coefficient"]
                p = df_filtered["adjusted_pvalues"]
                r = helper_funcs.correlations_format_val(r, p)

                if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
                    ws.cell(row=outside_loop_ind,
                            column=inside_loop_ind).value = r
                elif global_vars.corr_table_triangle == "Lower triangle":
                    ws.cell(row=inside_loop_ind,
                            column=outside_loop_ind).value = r

        if global_vars.corr_table_triangle != "Both":
            inside_loop_ind_start += 1

    for row in range(1, len(header_rows) + 2):
        for cell in ws[row]:
            cell.alignment = global_vars.alignment_center

    for cell in ws[1] + ws["A"]:
        cell.font = global_vars.font_header

    for cell in ws[1]:
        cell.border = Border(top=global_vars.border_APA,
                             bottom=global_vars.border_APA)
    for cell in ws[len(header_rows) + 1]:
        cell.border = Border(bottom=global_vars.border_APA)

    table_notes = [
        "Correlation coefficient used: {}".format(
            list(global_vars.master_dict.keys())[list(
                global_vars.master_dict.values()).index(
                    global_vars.raw_corr_type)])
    ]
    table_notes.append("**p < 0.01")
    table_notes.append("*p < {}".format(global_vars.alpha_threshold))
    helper_funcs.add_table_notes(ws, table_notes)

    helper_funcs.savefile(wb=wb)
Exemple #10
0
def raw_corr_apa_table_withCIs_word(mod_raw_data_df, output_df):
    variables_list = list(mod_raw_data_df.columns)

    if global_vars.corr_table_triangle == "Upper triangle":
        header_cols = variables_list[1:]
        header_rows = variables_list[:-1]
    elif global_vars.corr_table_triangle == "Lower triangle":
        header_cols = variables_list[:-1]
        header_rows = variables_list[1:]
    elif global_vars.corr_table_triangle == "Both":
        header_cols = [x for x in variables_list]
        header_rows = [x for x in variables_list]
    # this adds an empty column where the significance signs will be placed for better presentation
    # then adds empty rows for the CIs
    # the code looks slightly wrong as it return a None array but it works
    [header_cols.insert(x, "") for x in range(1, len(header_cols) * 2, 2)]
    [header_rows.insert(x, "") for x in range(1, len(header_rows) * 2, 2)]

    doc = Document()
    table_rows_len = len(header_rows) + 1
    table_cols_len = len(header_cols) + 1
    table = doc.add_table(rows=table_rows_len, cols=table_cols_len)

    for ind, var in enumerate([""] + header_cols):
        table.cell(row_idx=0, col_idx=ind).text = var
    for ind, var in enumerate([""] + header_rows):
        table.cell(row_idx=ind, col_idx=0).text = var

    if global_vars.corr_table_triangle == "Upper triangle":
        inside_loop_ind_start = 1
        for outside_loop_ind in range(1, table_rows_len, 2):
            outside_loop_var = table.cell(row_idx=outside_loop_ind,
                                          col_idx=0).text
            for inside_loop_ind in range(inside_loop_ind_start, table_cols_len,
                                         2):
                inside_loop_var = table.cell(row_idx=0,
                                             col_idx=inside_loop_ind).text
                # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                df_filtered = output_df[(
                    (output_df["Variable1"] == outside_loop_var) &
                    (output_df["Variable2"] == inside_loop_var)) | (
                        (output_df["Variable1"] == inside_loop_var) &
                        (output_df["Variable2"] == outside_loop_var))].iloc[0]
                r = df_filtered["Correlation_Coefficient"]
                p = df_filtered["adjusted_pvalues"]
                ci_low, ci_high = helper_funcs.correlations_format_val(
                    df_filtered["CI_low"]
                ), helper_funcs.correlations_format_val(df_filtered["CI_high"])
                r = helper_funcs.correlations_format_val(r, p)
                if "_" in r:
                    r, sign = r.split("_")
                else:
                    sign = ""
                table.cell(row_idx=outside_loop_ind,
                           col_idx=inside_loop_ind).text = r
                table.cell(row_idx=outside_loop_ind,
                           col_idx=inside_loop_ind + 1).text = sign
                table.cell(row_idx=outside_loop_ind + 1,
                           col_idx=inside_loop_ind
                           ).text = "[" + ci_low + ", " + ci_high + "]"
            inside_loop_ind_start += 2
    elif global_vars.corr_table_triangle == "Lower triangle":
        inside_loop_ind_start = 1
        for outside_loop_ind in range(1, table_cols_len, 2):
            outside_loop_var = table.cell(row_idx=0,
                                          col_idx=outside_loop_ind).text
            for inside_loop_ind in range(inside_loop_ind_start, table_rows_len,
                                         2):
                inside_loop_var = table.cell(row_idx=inside_loop_ind,
                                             col_idx=0).text
                # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                df_filtered = output_df[(
                    (output_df["Variable1"] == outside_loop_var) &
                    (output_df["Variable2"] == inside_loop_var)) | (
                        (output_df["Variable1"] == inside_loop_var) &
                        (output_df["Variable2"] == outside_loop_var))].iloc[0]
                r = df_filtered["Correlation_Coefficient"]
                p = df_filtered["adjusted_pvalues"]
                ci_low, ci_high = helper_funcs.correlations_format_val(
                    df_filtered["CI_low"]
                ), helper_funcs.correlations_format_val(df_filtered["CI_high"])
                r = helper_funcs.correlations_format_val(r, p)
                if "_" in r:
                    r, sign = r.split("_")
                else:
                    sign = ""
                table.cell(row_idx=inside_loop_ind,
                           col_idx=outside_loop_ind).text = r
                table.cell(row_idx=inside_loop_ind,
                           col_idx=outside_loop_ind + 1).text = sign
                table.cell(row_idx=inside_loop_ind + 1,
                           col_idx=outside_loop_ind
                           ).text = "[" + ci_low + ", " + ci_high + "]"
            inside_loop_ind_start += 2
    elif global_vars.corr_table_triangle == "Both":
        inside_loop_ind_start = 1
        for outside_loop_ind in range(1, table_cols_len, 2):
            outside_loop_var = table.cell(row_idx=0,
                                          col_idx=outside_loop_ind).text
            for inside_loop_ind in range(inside_loop_ind_start, table_rows_len,
                                         2):
                inside_loop_var = table.cell(row_idx=inside_loop_ind,
                                             col_idx=0).text
                if outside_loop_var == inside_loop_var:
                    table.cell(row_idx=inside_loop_ind,
                               col_idx=outside_loop_ind).text = "1"
                else:
                    # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                    df_filtered = output_df[
                        ((output_df["Variable1"] == outside_loop_var) &
                         (output_df["Variable2"] == inside_loop_var)) |
                        ((output_df["Variable1"] == inside_loop_var) &
                         (output_df["Variable2"] == outside_loop_var))].iloc[0]
                    r = df_filtered["Correlation_Coefficient"]
                    p = df_filtered["adjusted_pvalues"]
                    ci_low, ci_high = helper_funcs.correlations_format_val(
                        df_filtered["CI_low"]
                    ), helper_funcs.correlations_format_val(
                        df_filtered["CI_high"])
                    r = helper_funcs.correlations_format_val(r, p)
                    if "_" in r:
                        r, sign = r.split("_")
                    else:
                        sign = ""
                    table.cell(row_idx=inside_loop_ind,
                               col_idx=outside_loop_ind).text = r
                    table.cell(row_idx=inside_loop_ind,
                               col_idx=outside_loop_ind + 1).text = sign
                    table.cell(row_idx=inside_loop_ind + 1,
                               col_idx=outside_loop_ind
                               ).text = "[" + ci_low + ", " + ci_high + "]"

    for cell in table.rows[0].cells:
        helper_funcs.set_cell_border(cell,
                                     top=global_vars.border_APA_word,
                                     bottom=global_vars.border_APA_word)
    for cell in table.rows[table_rows_len - 1].cells:
        helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word)

    for i in range(1, table_cols_len, 2):
        table.cell(row_idx=0,
                   col_idx=i).merge(table.cell(row_idx=0, col_idx=i + 1))
        table.cell(row_idx=0, col_idx=i).text = table.cell(
            row_idx=0, col_idx=i
        ).text[:
               -1]  # meging cells adds a paragraph break at the end; this gets rid of it
    for i in range(1, table_rows_len, 2):
        table.cell(row_idx=i,
                   col_idx=0).merge(table.cell(row_idx=i + 1, col_idx=0))
        table.cell(row_idx=i, col_idx=0).text = table.cell(
            row_idx=i, col_idx=0
        ).text[:
               -1]  # meging cells adds a paragraph break at the end; this gets rid of it
    for row in range(2, table_rows_len, 2):
        for col in range(1, table_cols_len, 2):
            table.cell(row_idx=row, col_idx=col).merge(
                table.cell(row_idx=row, col_idx=col + 1))
            table.cell(row_idx=row,
                       col_idx=col).alignment = WD_ALIGN_PARAGRAPH.CENTER
            helper_funcs.word_style(table.cell(row_idx=row, col_idx=col),
                                    size=9)

    for cell in table.rows[0].cells:
        helper_funcs.word_style(cell, italic=True)
        cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
        cell.vertical_alignment = WD_ALIGN_VERTICAL.TOP
    for cell in table.columns[0].cells:
        helper_funcs.word_style(cell, italic=True)
        cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

    for row in range(0, table_rows_len):
        for col in range(1, table_cols_len):
            cell = table.cell(row_idx=row, col_idx=col)
            if col >= 1:
                cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
            cell.paragraphs[0].paragraph_format.space_after = Inches(0)
            cell.paragraphs[0].paragraph_format.space_before = Inches(0)
            cell.paragraphs[0].paragraph_format.line_spacing = 1

    for row in range(1, table_rows_len, 2):
        for col in range(1, table_cols_len, 2):
            table.cell(
                row_idx=row, col_idx=col
            ).paragraphs[0].paragraph_format.right_indent = Inches(-0.08)
            table.cell(
                row_idx=row, col_idx=col +
                1).paragraphs[0].paragraph_format.left_indent = Inches(-0.06)
            table.cell(
                row_idx=row,
                col_idx=col).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.RIGHT
            table.cell(row_idx=row, col_idx=col +
                       1).paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.LEFT

    doc = helper_funcs.set_autofit(doc)

    para = doc.add_paragraph("** ")
    para.add_run("p").italic = True
    para.add_run(" < 0.01")
    para.add_run().add_break()
    para.add_run("* ")
    para.add_run("p").italic = True
    para.add_run(" < {}".format(global_vars.alpha_threshold))
    doc.add_paragraph("Correlation coefficient used: {}".format(
        list(global_vars.master_dict.keys())[list(
            global_vars.master_dict.values()).index(
                global_vars.raw_corr_type)]))
    helper_funcs.add_correction_message_word(doc)

    helper_funcs.savefile(doc=doc)
Exemple #11
0
def raw_corr_apa_table_withCIs_excel(mod_raw_data_df, output_df):
    variables_list = list(
        mod_raw_data_df.columns
    )  # local var and .columns preferred to minimize use of global scope

    wb = Workbook()
    ws = wb.active

    if global_vars.corr_table_triangle == "Upper triangle":
        header_cols = variables_list[1:]
        header_rows = variables_list[:-1]
    elif global_vars.corr_table_triangle == "Lower triangle":
        header_cols = variables_list[:-1]
        header_rows = variables_list[1:]
    elif global_vars.corr_table_triangle == "Both":
        header_cols = [x for x in variables_list]
        header_rows = [x for x in variables_list]
    # this adds an empty row where the CIs will be
    # the code looks slightly wrong as it return a None array but it works
    [header_rows.insert(x, "") for x in range(1, len(header_rows) * 2, 2)]

    ws.append([""] + header_cols)
    for ind, var in enumerate(header_rows):
        ws.cell(row=ind + 2, column=1).value = var

    if global_vars.corr_table_triangle == "Upper triangle":
        inside_loop_ind_start = 2
        for outside_loop_ind in range(2, len(header_rows) + 2, 2):
            outside_loop_var = ws.cell(row=outside_loop_ind, column=1).value
            for inside_loop_ind in range(inside_loop_ind_start,
                                         len(header_cols) + 2):
                inside_loop_var = ws.cell(row=1, column=inside_loop_ind).value
                # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                df_filtered = output_df[(
                    (output_df["Variable1"] == outside_loop_var) &
                    (output_df["Variable2"] == inside_loop_var)) | (
                        (output_df["Variable1"] == inside_loop_var) &
                        (output_df["Variable2"] == outside_loop_var))].iloc[0]
                r = df_filtered["Correlation_Coefficient"]
                p = df_filtered["adjusted_pvalues"]
                r = helper_funcs.correlations_format_val(r, p)
                ci_low, ci_high = helper_funcs.correlations_format_val(
                    df_filtered["CI_low"]
                ), helper_funcs.correlations_format_val(df_filtered["CI_high"])
                ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = r
                ws.cell(row=outside_loop_ind + 1, column=inside_loop_ind
                        ).value = "[" + ci_low + ", " + ci_high + "]"
            inside_loop_ind_start += 1
    elif global_vars.corr_table_triangle == "Lower triangle":
        inside_loop_ind_start = 2
        for outside_loop_ind in range(2, len(header_cols) + 2):
            outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value
            for inside_loop_ind in range(inside_loop_ind_start,
                                         len(header_rows) + 2, 2):
                inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value
                # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                df_filtered = output_df[(
                    (output_df["Variable1"] == outside_loop_var) &
                    (output_df["Variable2"] == inside_loop_var)) | (
                        (output_df["Variable1"] == inside_loop_var) &
                        (output_df["Variable2"] == outside_loop_var))].iloc[0]
                r = df_filtered["Correlation_Coefficient"]
                p = df_filtered["adjusted_pvalues"]
                r = helper_funcs.correlations_format_val(r, p)
                ci_low, ci_high = helper_funcs.correlations_format_val(
                    df_filtered["CI_low"]
                ), helper_funcs.correlations_format_val(df_filtered["CI_high"])
                ws.cell(row=inside_loop_ind, column=outside_loop_ind).value = r
                ws.cell(row=inside_loop_ind + 1, column=outside_loop_ind
                        ).value = "[" + ci_low + ", " + ci_high + "]"
            inside_loop_ind_start += 2
    elif global_vars.corr_table_triangle == "Both":
        inside_loop_ind_start = 2
        for outside_loop_ind in range(2, len(header_cols) + 2):
            outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value
            for inside_loop_ind in range(inside_loop_ind_start,
                                         len(header_rows) + 2, 2):
                inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value
                if outside_loop_var == inside_loop_var:
                    ws.cell(row=inside_loop_ind,
                            column=outside_loop_ind).value = "1"
                else:
                    # here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
                    df_filtered = output_df[
                        ((output_df["Variable1"] == outside_loop_var) &
                         (output_df["Variable2"] == inside_loop_var)) |
                        ((output_df["Variable1"] == inside_loop_var) &
                         (output_df["Variable2"] == outside_loop_var))].iloc[0]
                    r = df_filtered["Correlation_Coefficient"]
                    p = df_filtered["adjusted_pvalues"]
                    r = helper_funcs.correlations_format_val(r, p)
                    ci_low, ci_high = helper_funcs.correlations_format_val(
                        df_filtered["CI_low"]
                    ), helper_funcs.correlations_format_val(
                        df_filtered["CI_high"])
                    ws.cell(row=inside_loop_ind,
                            column=outside_loop_ind).value = r
                    ws.cell(row=inside_loop_ind + 1, column=outside_loop_ind
                            ).value = "[" + ci_low + ", " + ci_high + "]"

    for row in range(2, len(header_rows) + 1, 2):
        ws.merge_cells(start_row=row,
                       start_column=1,
                       end_row=row + 1,
                       end_column=1)
    if global_vars.corr_table_triangle == "Both":
        for col in range(2, len(header_cols) + 2):
            ws.merge_cells(start_row=col * 2 - 2,
                           start_column=col,
                           end_row=col * 2 - 1,
                           end_column=col)

    for row in range(1, len(header_rows) + 2):
        for cell in ws[row]:
            cell.alignment = global_vars.alignment_center

    for cell in ws[1] + ws["A"]:
        cell.font = global_vars.font_header

    for row in range(3, len(header_rows) + 2, 2):
        for cell in ws[row]:
            cell.font = Font(size=9)

    for cell in ws[1]:
        cell.border = Border(top=global_vars.border_APA,
                             bottom=global_vars.border_APA)
    for cell in ws[len(header_rows) + 1]:
        cell.border = Border(bottom=global_vars.border_APA)

    table_notes = [
        "Correlation coefficient used: {}".format(
            list(global_vars.master_dict.keys())[list(
                global_vars.master_dict.values()).index(
                    global_vars.raw_corr_type)])
    ]
    table_notes.append("**p < 0.01")
    table_notes.append("*p < {}".format(global_vars.alpha_threshold))
    helper_funcs.add_table_notes(ws, table_notes)

    helper_funcs.savefile(wb=wb)
Exemple #12
0
def summ_corr_apa_table_excel(mod_raw_data_df, output_df):
	# unique vars in var1 given by user's order and here ONLY adding vars from col 2 that are NOT in var 1
	variables_list = list(output_df[global_vars.summ_corr_varOne].unique()) + list(set(output_df[global_vars.summ_corr_varTwo].unique()) - set(output_df[global_vars.summ_corr_varOne].unique()))
	if global_vars.corr_table_triangle == "Upper triangle":
		header_cols = variables_list[1:]
		header_rows = variables_list[:-1]
	elif global_vars.corr_table_triangle == "Lower triangle":
		header_cols = variables_list[:-1]
		header_rows = variables_list[1:]
	elif global_vars.corr_table_triangle == "Both":
		header_cols, header_rows = variables_list, variables_list

	wb = Workbook()
	ws = wb.active

	ws.append([""] + header_cols)
	for ind,var in enumerate(header_rows):
		ws.cell(row=ind+2, column=1).value = var

	inside_loop_ind_start = 2
	for outside_loop_ind in range(2, len(header_rows) + 2):

		if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
			outside_loop_var = ws.cell(row=outside_loop_ind, column=1).value
		elif global_vars.corr_table_triangle == "Lower triangle":
			outside_loop_var = ws.cell(row=1, column=outside_loop_ind).value
		
		for inside_loop_ind in range(inside_loop_ind_start, len(header_cols) + 2):
			
			if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
				inside_loop_var = ws.cell(row=1, column=inside_loop_ind).value
			elif global_vars.corr_table_triangle == "Lower triangle":
				inside_loop_var = ws.cell(row=inside_loop_ind, column=1).value

			if outside_loop_var == inside_loop_var:
				ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = 1
			else:
				# here query method is not preferred as it is not only slower as it is much smaller dataset but also cannot refer to two different vaiables (colname and val)
				df_filtered = output_df[((output_df[global_vars.summ_corr_varOne]==outside_loop_var) & (output_df[global_vars.summ_corr_varTwo]==inside_loop_var)) | ((output_df[global_vars.summ_corr_varOne]==inside_loop_var) & (output_df[global_vars.summ_corr_varTwo]==outside_loop_var))].iloc[0]
				r = df_filtered[global_vars.summ_corr_coeff]
				p = df_filtered["adjusted_pvalues"]
				r = helper_funcs.correlations_format_val(r, p)

				if global_vars.corr_table_triangle == "Upper triangle" or global_vars.corr_table_triangle == "Both":
					ws.cell(row=outside_loop_ind, column=inside_loop_ind).value = r
				elif global_vars.corr_table_triangle == "Lower triangle":
					ws.cell(row=inside_loop_ind, column=outside_loop_ind).value = r
		
		if global_vars.corr_table_triangle != "Both":
			inside_loop_ind_start += 1


	for row in range(1, len(header_rows) + 2):
		for cell in ws[row]:
			cell.alignment = global_vars.alignment_center

	for cell in ws[1]:
		cell.font = global_vars.font_header

	for row in range(2, len(header_rows) + 2):
		ws.cell(row=row, column=1).font = global_vars.font_header

	for cell in ws[len(header_rows) + 1]:
		cell.border = Border(bottom=global_vars.border_APA)

	for cell in ws[1]:
		cell.border = Border(top=global_vars.border_APA, bottom=global_vars.border_APA)

	table_notes = ["**p < 0.01", "*p < {}".format(global_vars.alpha_threshold)]
	helper_funcs.add_table_notes(ws, table_notes)

	helper_funcs.savefile(wb=wb)
def spss_indttest_apa_table_word(mod_raw_data_df, output_df):
    # very similar to raw_corr_indttest_apa_table func but separate as might be udpated/adjusted in the future
    output_df.drop(columns=["pvalues"], inplace=True)

    pd.options.mode.chained_assignment = None
    output_df[list(output_df.columns)[7:-1]] = output_df[list(
        output_df.columns)[7:-1]].applymap(lambda x: "{:.2f}".format(x))

    output_df["adjusted_pvalues"] = output_df["adjusted_pvalues"].map(
        helper_funcs.pvalue_formatting)
    pd.options.mode.chained_assignment = "warn"

    doc = Document()
    table_rows_len = len(output_df) + 2
    table_cols_len = len(output_df.columns)
    table = doc.add_table(rows=table_rows_len, cols=table_cols_len)

    table.cell(row_idx=0, col_idx=0).text = "Variable"
    table.cell(row_idx=0, col_idx=0).merge(table.cell(row_idx=1, col_idx=0))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=0), italic=True)

    table.cell(row_idx=0, col_idx=1).text = "All, n=?"
    table.cell(row_idx=0, col_idx=1).merge(table.cell(row_idx=0, col_idx=2))

    if global_vars.spss_indttest_nOne != -1:
        n1_label = global_vars.spss_indttest_nOne
    else:
        n1_label = "?"
    table.cell(row_idx=0, col_idx=3).text = "{g}, n={n}".format(
        g=global_vars.spss_indttest_groupOneLabel, n=n1_label)
    table.cell(row_idx=0, col_idx=3).merge(table.cell(row_idx=0, col_idx=4))

    if global_vars.spss_indttest_nOne != -1:
        n2_label = global_vars.spss_indttest_nTwo
    else:
        n2_label = "?"
    table.cell(row_idx=0, col_idx=5).text = "{g}, n={n}".format(
        g=global_vars.spss_indttest_groupTwoLabel, n=n2_label)
    table.cell(row_idx=0, col_idx=5).merge(table.cell(row_idx=0, col_idx=6))

    table.cell(row_idx=0, col_idx=7).text = "df"
    table.cell(row_idx=0, col_idx=7).merge(table.cell(row_idx=1, col_idx=7))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=7), italic=True)

    table.cell(row_idx=0, col_idx=8).text = "t"
    table.cell(row_idx=0, col_idx=8).merge(table.cell(row_idx=1, col_idx=8))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=8), italic=True)

    table.cell(row_idx=0, col_idx=9).text = global_vars.effect_size_choice
    if global_vars.effect_size_choice != "None":  # otherwise cant remove with delete columns below if merged; see helper_funcs.delete_columns_word func notes
        table.cell(row_idx=0, col_idx=9).merge(table.cell(row_idx=1,
                                                          col_idx=9))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=9), italic=True)

    table.cell(row_idx=0, col_idx=10).text = "p"
    table.cell(row_idx=0, col_idx=10).merge(table.cell(row_idx=1, col_idx=10))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=10), italic=True)

    for col in range(1, 6, 2):
        table.cell(row_idx=1, col_idx=col).text = "M"
        helper_funcs.word_style(table.cell(row_idx=1, col_idx=col),
                                italic=True)
        table.cell(row_idx=1, col_idx=col + 1).text = "SD"
        helper_funcs.word_style(table.cell(row_idx=1, col_idx=col + 1),
                                italic=True)

    for row in range(2, table_rows_len):
        for col in range(0, table_cols_len):
            table.cell(row_idx=row, col_idx=col).text = output_df.iloc[row - 2,
                                                                       col]

    for row in range(0, table_rows_len):
        for cell in table.rows[row].cells:
            cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
            cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

    for cell in table.rows[0].cells:
        helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word)
    for cell in table.rows[2].cells:
        helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word)
    for cell in table.rows[table_rows_len - 1].cells:
        helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word)

    if global_vars.effect_size_choice == "None":
        helper_funcs.delete_columns_word(table, [9])

    doc = helper_funcs.set_autofit(doc)

    doc.add_paragraph(
        "Means and Standard Deviations cannot be read from the SPSS table. Please add them yourself or remove those columns if they are not needed."
    )
    helper_funcs.add_correction_message_word(doc)

    helper_funcs.savefile(doc=doc)
def spss_indttest_apa_table_excel(mod_raw_data_df, output_df):
    output_df.drop(columns=["pvalues"], inplace=True)

    pd.options.mode.chained_assignment = None
    output_df[list(output_df.columns)[7:-1]] = output_df[list(
        output_df.columns)[7:-1]].applymap(lambda x: "{:.2f}".format(x))

    output_df["adjusted_pvalues"] = output_df["adjusted_pvalues"].map(
        helper_funcs.pvalue_formatting)
    pd.options.mode.chained_assignment = "warn"

    wb = Workbook()
    ws = wb.active

    ws.cell(row=1, column=1).value = "Variable"
    ws.merge_cells('A1:A2')
    ws.cell(row=1, column=1).font = global_vars.font_header

    ws.cell(row=1, column=2).value = "All, n=?"
    ws.merge_cells('B1:C1')

    if global_vars.spss_indttest_nOne != -1:
        n1_label = global_vars.spss_indttest_nOne
    else:
        n1_label = "?"
    ws.cell(row=1, column=4).value = "{g}, n={n}".format(
        g=global_vars.spss_indttest_groupOneLabel, n=n1_label)
    ws.merge_cells('D1:E1')

    if global_vars.spss_indttest_nOne != -1:
        n2_label = global_vars.spss_indttest_nTwo
    else:
        n2_label = "?"
    ws.cell(row=1, column=6).value = "{g}, n={n}".format(
        g=global_vars.spss_indttest_groupTwoLabel, n=n2_label)
    ws.merge_cells('F1:G1')

    ws.cell(row=1, column=8).value = "df"
    ws.merge_cells('H1:H2')
    ws.cell(row=1, column=8).font = global_vars.font_header

    ws.cell(row=1, column=9).value = "t"
    ws.merge_cells('I1:I2')
    ws.cell(row=1, column=9).font = global_vars.font_header

    ws.cell(row=1, column=10).value = global_vars.effect_size_choice
    ws.merge_cells('J1:J2')
    ws.cell(row=1, column=10).font = global_vars.font_header

    ws.cell(row=1, column=11).value = "p"
    ws.merge_cells('K1:K2')
    ws.cell(row=1, column=11).font = global_vars.font_header

    for col in range(2, 7, 2):
        ws.cell(row=2, column=col).value = "M"
        ws.cell(row=2, column=col).font = global_vars.font_header
        ws.cell(row=2, column=col + 1).value = "SD"
        ws.cell(row=2, column=col + 1).font = global_vars.font_header

    for row in dataframe_to_rows(output_df, index=False, header=False):
        ws.append(row)

    for row in range(1, len(output_df) + 3):
        for cell in ws[row]:
            cell.alignment = global_vars.alignment_center

    for cell in ws[1]:
        cell.border = Border(top=global_vars.border_APA)
    for cell in ws[2] + ws[len(output_df) + 2]:
        cell.border = Border(bottom=global_vars.border_APA)

    if global_vars.effect_size_choice == "None":
        ws.delete_cols(10)

    table_notes = [
        "Means and Standard Deviations cannot be read from the SPSS table. Please add them yourself or remove those columns if they are not needed."
    ]
    helper_funcs.add_table_notes(ws, table_notes)

    helper_funcs.savefile(wb=wb)
Exemple #15
0
def raw_indttest_apa_table_excel(mod_raw_data_df, output_df):

    apa_table_df = output_df[[
        "Variable", "All_Mean", "All_SD",
        global_vars.raw_indttest_grouplevel1 + "_Mean",
        global_vars.raw_indttest_grouplevel1 + "_SD",
        global_vars.raw_indttest_grouplevel2 + "_Mean",
        global_vars.raw_indttest_grouplevel2 + "_SD", "Degrees_of_Freedom",
        "t", global_vars.effect_size_choice, "adjusted_pvalues"
    ]]

    # the two operations below are correct so the SettingWithCopyWarning pandas error is supressed temporarily
    pd.options.mode.chained_assignment = None
    apa_table_df[list(apa_table_df.columns)[1:-1]] = apa_table_df[list(
        apa_table_df.columns)[1:-1]].applymap(lambda x: "{:.2f}".format(x))

    apa_table_df["adjusted_pvalues"] = apa_table_df["adjusted_pvalues"].map(
        helper_funcs.pvalue_formatting)
    pd.options.mode.chained_assignment = "warn"

    wb = Workbook()
    ws = wb.active

    ws.cell(row=1, column=1).value = "Variable"
    ws.merge_cells('A1:A2')
    ws.cell(row=1, column=1).font = global_vars.font_header

    ws.cell(row=1, column=2).value = "All, n={}".format(output_df.iloc[0, 1])
    ws.merge_cells('B1:C1')

    ws.cell(row=1, column=4).value = "{g}, n={n}".format(
        g=global_vars.raw_indttest_grouplevel1, n=output_df.iloc[0, 4])
    ws.merge_cells('D1:E1')

    ws.cell(row=1, column=6).value = "{g}, n={n}".format(
        g=global_vars.raw_indttest_grouplevel2, n=output_df.iloc[0, 7])
    ws.merge_cells('F1:G1')

    ws.cell(row=1, column=8).value = "df"
    ws.merge_cells('H1:H2')
    ws.cell(row=1, column=8).font = global_vars.font_header

    ws.cell(row=1, column=9).value = "t"
    ws.merge_cells('I1:I2')
    ws.cell(row=1, column=9).font = global_vars.font_header

    ws.cell(row=1, column=10).value = global_vars.effect_size_choice
    ws.merge_cells('J1:J2')
    ws.cell(row=1, column=10).font = global_vars.font_header

    ws.cell(row=1, column=11).value = "p"
    ws.merge_cells('K1:K2')
    ws.cell(row=1, column=11).font = global_vars.font_header

    for col in range(2, 7, 2):
        ws.cell(row=2, column=col).value = "M"
        ws.cell(row=2, column=col).font = global_vars.font_header
        ws.cell(row=2, column=col + 1).value = "SD"
        ws.cell(row=2, column=col + 1).font = global_vars.font_header

    for row in dataframe_to_rows(apa_table_df, index=False, header=False):
        ws.append(row)

    for row in range(1, len(apa_table_df) + 3):
        for cell in ws[row]:
            cell.alignment = global_vars.alignment_center

    for cell in ws[1]:
        cell.border = Border(top=global_vars.border_APA)
    for cell in ws[2] + ws[len(apa_table_df) + 2]:
        cell.border = Border(bottom=global_vars.border_APA)

    if global_vars.effect_size_choice == "None":
        ws.delete_cols(10)

    helper_funcs.add_table_notes(ws, [])

    helper_funcs.savefile(wb=wb)
Exemple #16
0
def raw_indttest_apa_table_word(mod_raw_data_df, output_df):
    apa_table_df = output_df[[
        "Variable", "All_Mean", "All_SD",
        global_vars.raw_indttest_grouplevel1 + "_Mean",
        global_vars.raw_indttest_grouplevel1 + "_SD",
        global_vars.raw_indttest_grouplevel2 + "_Mean",
        global_vars.raw_indttest_grouplevel2 + "_SD", "Degrees_of_Freedom",
        "t", global_vars.effect_size_choice, "adjusted_pvalues"
    ]]

    # the two operations below are correct so the SettingWithCopyWarning pandas error is supressed temporarily
    pd.options.mode.chained_assignment = None
    apa_table_df[list(apa_table_df.columns)[1:-1]] = apa_table_df[list(
        apa_table_df.columns)[1:-1]].applymap(lambda x: "{:.2f}".format(x))

    apa_table_df["adjusted_pvalues"] = apa_table_df["adjusted_pvalues"].map(
        helper_funcs.pvalue_formatting)
    pd.options.mode.chained_assignment = "warn"

    doc = Document()
    table_rows_len = len(apa_table_df) + 2
    table_cols_len = len(apa_table_df.columns)
    table = doc.add_table(rows=table_rows_len, cols=table_cols_len)

    table.cell(row_idx=0, col_idx=0).text = "Variable"
    table.cell(row_idx=0, col_idx=0).merge(table.cell(row_idx=1, col_idx=0))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=0), italic=True)

    table.cell(row_idx=0,
               col_idx=1).text = "All, n={}".format(output_df.iloc[0, 1])
    table.cell(row_idx=0, col_idx=1).merge(table.cell(row_idx=0, col_idx=2))

    table.cell(row_idx=0, col_idx=3).text = "{g}, n={n}".format(
        g=global_vars.raw_indttest_grouplevel1, n=output_df.iloc[0, 4])
    table.cell(row_idx=0, col_idx=3).merge(table.cell(row_idx=0, col_idx=4))

    table.cell(row_idx=0, col_idx=5).text = "{g}, n={n}".format(
        g=global_vars.raw_indttest_grouplevel2, n=output_df.iloc[0, 7])
    table.cell(row_idx=0, col_idx=5).merge(table.cell(row_idx=0, col_idx=6))

    table.cell(row_idx=0, col_idx=7).text = "df"
    table.cell(row_idx=0, col_idx=7).merge(table.cell(row_idx=1, col_idx=7))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=7), italic=True)

    table.cell(row_idx=0, col_idx=8).text = "t"
    table.cell(row_idx=0, col_idx=8).merge(table.cell(row_idx=1, col_idx=8))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=8), italic=True)

    table.cell(row_idx=0, col_idx=9).text = global_vars.effect_size_choice
    if global_vars.effect_size_choice != "None":  # otherwise cant remove with delete columns below if merged; see helper_funcs.delete_columns_word func notes
        table.cell(row_idx=0, col_idx=9).merge(table.cell(row_idx=1,
                                                          col_idx=9))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=9), italic=True)

    table.cell(row_idx=0, col_idx=10).text = "p"
    table.cell(row_idx=0, col_idx=10).merge(table.cell(row_idx=1, col_idx=10))
    helper_funcs.word_style(table.cell(row_idx=0, col_idx=10), italic=True)

    for col in range(1, 6, 2):
        table.cell(row_idx=1, col_idx=col).text = "M"
        helper_funcs.word_style(table.cell(row_idx=1, col_idx=col),
                                italic=True)
        table.cell(row_idx=1, col_idx=col + 1).text = "SD"
        helper_funcs.word_style(table.cell(row_idx=1, col_idx=col + 1),
                                italic=True)

    for row in range(2, table_rows_len):
        for col in range(0, table_cols_len):
            table.cell(row_idx=row,
                       col_idx=col).text = apa_table_df.iloc[row - 2, col]

    for row in range(0, table_rows_len):
        for cell in table.rows[row].cells:
            cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
            cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER

    for cell in table.rows[0].cells:
        helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word)
    for cell in table.rows[2].cells:
        helper_funcs.set_cell_border(cell, top=global_vars.border_APA_word)
    for cell in table.rows[table_rows_len - 1].cells:
        helper_funcs.set_cell_border(cell, bottom=global_vars.border_APA_word)

    if global_vars.effect_size_choice == "None":
        helper_funcs.delete_columns_word(table, [9])

    doc = helper_funcs.set_autofit(doc)

    helper_funcs.add_correction_message_word(doc)

    helper_funcs.savefile(doc=doc)