Python modify_raw_data_df 예제들, decision_funcs.modify_raw_data_df Python 예제들

예제 #1

0

파일 보기

def spss_indttest_tests_nonDefaultGroupLabels():
    # global vars
    global_vars.input_type = "spss"
    global_vars.spss_test = "indttest"
    global_vars.spss_indttest_nOne = 50
    global_vars.spss_indttest_nTwo = 50
    global_vars.spss_indttest_groupOneLabel = "MyG1"
    global_vars.spss_indttest_groupTwoLabel = "MyG2"
    global_vars.effect_size_choice = "Cohen's d"

    # setup - actual
    input_df = pd.read_excel(
        os.path.join(global_vars.TESTS_MAIN_FUNCS_INPUT_DATAFRAMES_FOLDER,
                     "spss_indttest_tests.xlsx"))
    mod_input_df = decision_funcs.modify_raw_data_df(input_df)
    actual_df = decision_funcs.generate_output_df(mod_input_df)

    # setup - expected
    # given the few empty columns (reserved for means and sds), the keep_default_na forces the nan values to be interpreted as empty strings, rather than np.nan
    expected_df = pd.read_excel(os.path.join(
        global_vars.TESTS_MAIN_FUNCS_OUTPUT_DATAFRAMES_FOLDER,
        "spss_indttest_tests_nonDefaultGroupLabels.xlsx"),
                                keep_default_na=False)

    # assert
    pd.testing.assert_frame_equal(actual_df, expected_df, check_less_precise=3)

예제 #2

0

파일 보기

def raw_mr_tests_withMissingData():
    # global vars
    global_vars.input_type = "raw"
    global_vars.raw_test = "mr"
    global_vars.raw_mr_outcomevar = "var1"
    global_vars.raw_mr_predictors = ["var2", "var3"]

    # setup - actual
    input_df = pd.read_excel(
        os.path.join(global_vars.TESTS_MAIN_FUNCS_INPUT_DATAFRAMES_FOLDER,
                     "raw_mr_tests_withMissingData.xlsx"))
    mod_input_df = decision_funcs.modify_raw_data_df(input_df)
    actual_df = decision_funcs.generate_output_df(mod_input_df)

    # setup - expected
    expected_df = pd.read_excel(
        os.path.join(global_vars.TESTS_MAIN_FUNCS_OUTPUT_DATAFRAMES_FOLDER,
                     "raw_mr_tests_withMissingData.xlsx"))
    # next lines are forcing two of the pvalues to be the same as actual - they are the same but there is a problem with check_less_precise argument
    # see - https://github.com/pandas-dev/pandas/issues/25068
    expected_df.loc[0, "beta"] = actual_df.loc[0, "beta"]
    expected_df.loc[0, "Std Err beta"] = actual_df.loc[0, "Std Err beta"]

    # assert
    pd.testing.assert_frame_equal(actual_df, expected_df, check_less_precise=3)

예제 #3

0

파일 보기

def spss_indttest_tests_defaultGroupLabels():
    # global vars
    global_vars.input_type = "spss"
    global_vars.spss_test = "indttest"
    global_vars.spss_indttest_nOne = 50
    global_vars.spss_indttest_nTwo = 50
    global_vars.spss_indttest_groupOneLabel = "Group1"
    global_vars.spss_indttest_groupTwoLabel = "Group2"
    global_vars.effect_size_choice = "Cohen's d"

    # setup - actual
    input_df = pd.read_excel(
        os.path.join(global_vars.TESTS_MAIN_FUNCS_INPUT_DATAFRAMES_FOLDER,
                     "spss_indttest_tests.xlsx"))
    mod_input_df = decision_funcs.modify_raw_data_df(input_df)
    actual_df = decision_funcs.generate_output_df(mod_input_df)

    # setup - expected
    expected_df = pd.read_excel(os.path.join(
        global_vars.TESTS_MAIN_FUNCS_OUTPUT_DATAFRAMES_FOLDER,
        "spss_indttest_tests_defaultGroupLabels.xlsx"),
                                keep_default_na=False)

    # assert
    pd.testing.assert_frame_equal(actual_df, expected_df, check_less_precise=3)

예제 #4

0

파일 보기

파일: tests_summ_indttest.py 프로젝트: nikbpetrov/mufos

def summ_indttest_tests_noEqualVarCol_hedgesg():
    # global vars
    global_vars.input_type = "summ_indttest"
    global_vars.summ_indttest_var = "Variable"
    global_vars.summ_indttest_meanOne = "Mean1"
    global_vars.summ_indttest_sdOne = "SD1"
    global_vars.summ_indttest_nOne = "N1"
    global_vars.summ_indttest_meanTwo = "Mean2"
    global_vars.summ_indttest_sdTwo = "SD2"
    global_vars.summ_indttest_nTwo = "N2"
    global_vars.summ_indttest_equal_var = ""
    global_vars.effect_size_choice = "Hedge's g"

    # setup - actual
    input_df = pd.read_excel(
        os.path.join(global_vars.TESTS_MAIN_FUNCS_INPUT_DATAFRAMES_FOLDER,
                     "summ_indttest_tests_noEqualVarCol.xlsx"))
    mod_input_df = decision_funcs.modify_raw_data_df(input_df)
    actual_df = decision_funcs.generate_output_df(mod_input_df)

    # setup - expected
    expected_df = pd.read_excel(
        os.path.join(global_vars.TESTS_MAIN_FUNCS_OUTPUT_DATAFRAMES_FOLDER,
                     "summ_indttest_tests_noEqualVarCol_hedgesg.xlsx"))

    # assert
    pd.testing.assert_frame_equal(actual_df, expected_df, check_less_precise=3)

예제 #5

0

파일 보기

파일: tests_spss_mr.py 프로젝트: nikbpetrov/mufos

def spss_mr_tests_minimalStats():
    # global vars
    global_vars.input_type = "spss"
    global_vars.spss_test = "mr"

    # setup - actual
    input_df = pd.read_excel(
        os.path.join(global_vars.TESTS_MAIN_FUNCS_INPUT_DATAFRAMES_FOLDER,
                     "spss_mr_tests_minimalStats.xlsx"))
    mod_input_df = decision_funcs.modify_raw_data_df(input_df)
    actual_df = decision_funcs.generate_output_df(mod_input_df)

    # setup - expected
    # forcing dtype to object as data formatting is done within the output_df func
    expected_df = pd.read_excel(os.path.join(
        global_vars.TESTS_MAIN_FUNCS_OUTPUT_DATAFRAMES_FOLDER,
        "spss_mr_tests_minimalStats.xlsx"),
                                dtype=object)
    # forcing the interpreted nan value to be empty as in the actual_df
    expected_df.iloc[0, 3] = actual_df.iloc[0, 3]
    #  forcing pvalues as float as this was a later change; pvlaues are later updated in the multitest correction function and formatted in the tables functions
    expected_df["pvalues"] = expected_df["pvalues"].astype(float)

    # assert
    pd.testing.assert_frame_equal(actual_df, expected_df, check_less_precise=3)

예제 #6

0

파일 보기

def spss_correlations_tests_pearson():
    # global vars
    global_vars.input_type = "spss"
    global_vars.spss_test = "corr"

    # setup - actual
    input_df = pd.read_excel(
        os.path.join(global_vars.TESTS_MAIN_FUNCS_INPUT_DATAFRAMES_FOLDER,
                     "spss_correlations_tests_pearson.xlsx"))
    mod_input_df = decision_funcs.modify_raw_data_df(input_df)
    actual_df = decision_funcs.generate_output_df(mod_input_df)

    # setup - expected
    expected_df = pd.read_excel(
        os.path.join(global_vars.TESTS_MAIN_FUNCS_OUTPUT_DATAFRAMES_FOLDER,
                     "spss_correlations_tests_pearson.xlsx"))

    # assert
    pd.testing.assert_frame_equal(actual_df, expected_df, check_less_precise=3)

예제 #7

0

파일 보기

def raw_correlations_tests_pearson_withMissingData():
    # global vars
    global_vars.input_type = "raw"
    global_vars.raw_test = "corr"
    global_vars.raw_corr_type = "pearson"
    global_vars.raw_corr_vars = [
        "var1", "var2", "var3", "var4", "var5", "var6", "var7", "var8"
    ]

    # setup - actual
    input_df = pd.read_excel(
        os.path.join(global_vars.TESTS_MAIN_FUNCS_INPUT_DATAFRAMES_FOLDER,
                     "raw_correlations_tests_withMissingData.xlsx"))
    mod_input_df = decision_funcs.modify_raw_data_df(input_df)
    actual_df = decision_funcs.generate_output_df(mod_input_df)

    # setup - expected
    expected_df = pd.read_excel(
        os.path.join(global_vars.TESTS_MAIN_FUNCS_OUTPUT_DATAFRAMES_FOLDER,
                     "raw_correlations_tests_pearson_withMissingData.xlsx"))

    # assert
    pd.testing.assert_frame_equal(actual_df, expected_df, check_less_precise=3)

예제 #8

0

파일 보기

def raw_indttest_tests_noEffectSize():
	# global vars
	global_vars.input_type = "raw"
	global_vars.raw_test = "indttest"
	global_vars.raw_indttest_groupvar = "Group"
	global_vars.raw_indttest_grouplevel1 = "Group1"
	global_vars.raw_indttest_grouplevel2 = "Group2"
	global_vars.raw_indttest_dv = ["var1", "var2", "var3"]
	global_vars.effect_size_choice = "None"

	# setup - actual
	input_df = pd.read_excel(os.path.join(global_vars.TESTS_MAIN_FUNCS_INPUT_DATAFRAMES_FOLDER, "raw_indttest_tests.xlsx"))
	mod_input_df = decision_funcs.modify_raw_data_df(input_df)
	actual_df = decision_funcs.generate_output_df(mod_input_df)

	# setup - expected
	expected_df = pd.read_excel(os.path.join(global_vars.TESTS_MAIN_FUNCS_OUTPUT_DATAFRAMES_FOLDER, "raw_indttest_tests_noEffectSize.xlsx"))
	# next line is forcing one of the expected values to be the same as actual - they are the same but there is a problem with check_less_precise argument
	# see - https://github.com/pandas-dev/pandas/issues/25068
	expected_df.loc[1, "pvalues"] = actual_df.loc[1, "pvalues"]

	# assert
	pd.testing.assert_frame_equal(actual_df, expected_df, check_less_precise=3)

예제 #9

0

파일 보기

파일: tests_raw_pairttest.py 프로젝트: nikbpetrov/mufos

def raw_pairttest_tests_unevenGroups_noEffectSize():
    # global vars
    global_vars.input_type = "raw"
    global_vars.raw_test = "pairttest"
    global_vars.raw_pairttest_var_pairs = [['Bar1', 'Bar2'], ['Foo1', 'Foo2']]
    global_vars.effect_size_choice = "None"

    # setup - actual
    input_df = pd.read_excel(
        os.path.join(global_vars.TESTS_MAIN_FUNCS_INPUT_DATAFRAMES_FOLDER,
                     "raw_pairttest_tests_unevenGroups.xlsx"))
    mod_input_df = decision_funcs.modify_raw_data_df(input_df)
    actual_df = decision_funcs.generate_output_df(mod_input_df)

    # setup - expected
    expected_df = pd.read_excel(
        os.path.join(global_vars.TESTS_MAIN_FUNCS_OUTPUT_DATAFRAMES_FOLDER,
                     "raw_pairttest_tests_unevenGroups_noEffectSize.xlsx"))
    # degrees of freedom is float in the actual_df as it's looking up from the researchpy output dataframe which defaults to float behaviour
    expected_df["Degrees of Freedom"] = expected_df[
        "Degrees of Freedom"].astype("float")

    # assert
    pd.testing.assert_frame_equal(actual_df, expected_df, check_less_precise=3)

예제 #10

0

파일 보기

def raw_indttest_tests_hedgesg():
	# global vars
	global_vars.input_type = "raw"
	global_vars.raw_test = "indttest"
	global_vars.raw_indttest_groupvar = "Group"
	global_vars.raw_indttest_grouplevel1 = "Group1"
	global_vars.raw_indttest_grouplevel2 = "Group2"
	global_vars.raw_indttest_dv = ["var1", "var2", "var3"]
	global_vars.effect_size_choice = "Hedge's g"

	# setup - actual
	input_df = pd.read_excel(os.path.join(global_vars.TESTS_MAIN_FUNCS_INPUT_DATAFRAMES_FOLDER, "raw_indttest_tests.xlsx"))
	mod_input_df = decision_funcs.modify_raw_data_df(input_df)
	actual_df = decision_funcs.generate_output_df(mod_input_df)

	# setup - expected
	expected_df = pd.read_excel(os.path.join(global_vars.TESTS_MAIN_FUNCS_OUTPUT_DATAFRAMES_FOLDER, "raw_indttest_tests_hedgesg.xlsx"))
	# next line is forcing one of the expected values to be the same as actual - they are the same but there is a problem with check_less_precise argument
	# see - https://github.com/pandas-dev/pandas/issues/25068
	expected_df.loc[1, "pvalues"] = actual_df.loc[1, "pvalues"]

	# assert
	# precision set to 2 digits due to insignificant differences in effect sizes that throw error (due to usage of slightly different numbers in the formulas)
	pd.testing.assert_frame_equal(actual_df, expected_df, check_less_precise=2)

예제 #11

0

파일 보기

파일: tests_APAtables.py 프로젝트: nikbpetrov/mufos

def apa_outputs_test():
	df = pd.read_csv(os.path.join(global_vars.unit_tests_directory, "APA_tables", "apa_table_tests.csv"), keep_default_na=False)

	def str_to_arr(x, pairttest=False):
		x = x.replace("[","")
		x = x.replace("]", "")
		x = x.replace("'", "")
		x = x.replace(" ", "")
		x = x.split(",")
		if pairttest:
			x = [x[i:i+2] for i in range(0, len(x), 2)]
		return x

	for row_ind in range(0, len(df)):
		row = df.loc[row_ind]
		
		global_vars.input_path_and_filename = os.path.join(TESTS_APA_TABLES_INPUT_DATAFRAMES_FOLDER, row[0])
		global_vars.alpha_threshold = row[1]
		global_vars.output_filename = None
		global_vars.output_filetype = row[3]
		global_vars.input_type = row[4]
		global_vars.raw_test = row[5]
		global_vars.raw_corr_type = row[6]
		global_vars.raw_corr_vars = str_to_arr(row[7])
		global_vars.raw_corr_include_CI = row[8]
		global_vars.raw_mr_outcomevar = row[9]
		global_vars.raw_mr_predictors = str_to_arr(row[10])
		global_vars.raw_indttest_groupvar = row[11]
		global_vars.raw_indttest_grouplevel1 = row[12]
		global_vars.raw_indttest_grouplevel2 = row[13]
		global_vars.raw_indttest_dv = str_to_arr(row[14])
		global_vars.raw_pairttest_var_pairs = str_to_arr(row[15], pairttest=True)
		global_vars.summ_corr_varOne = row[16]
		global_vars.summ_corr_varTwo = row[17]
		global_vars.summ_corr_coeff = row[18]
		global_vars.summ_corr_pvalues = row[19]
		global_vars.summ_indttest_var = row[20]
		global_vars.summ_indttest_meanOne = row[21]
		global_vars.summ_indttest_sdOne = row[22]
		global_vars.summ_indttest_nOne = row[23]
		global_vars.summ_indttest_meanTwo = row[24]
		global_vars.summ_indttest_sdTwo = row[25]
		global_vars.summ_indttest_nTwo = row[26]
		global_vars.summ_indttest_equal_var = row[27]
		global_vars.spss_test = row[28]
		global_vars.spss_indttest_nOne = row[29]
		global_vars.spss_indttest_nTwo = row[30]
		global_vars.spss_indttest_groupOneLabel = row[31]
		global_vars.spss_indttest_groupTwoLabel = row[32]
		global_vars.spss_pairttest_n = row[33]
		global_vars.pvalues_col = row[34]
		global_vars.effect_size_choice = row[35]
		global_vars.correction_type = row[36]
		global_vars.non_numeric_input_raise_errors = row[37]
		global_vars.corr_table_triangle = row[38]
		test_name = row[39]
		test_id = row[40]

		global_vars.output_filename = os.path.join(TESTS_APA_TABLES_OUTPUT_TABLES_FOLDER, str(test_id) + "_" + test_name)

		raw_data_df = decision_funcs.get_raw_data_df()
		mod_raw_data_df = decision_funcs.modify_raw_data_df(raw_data_df)
		output_df = decision_funcs.generate_output_df(mod_raw_data_df)
		output_df = decision_funcs.multitest_correction(output_df)
		decision_funcs.save_output(mod_raw_data_df, output_df)

		if global_vars.output_filetype == "Word":
			expected_table = Document(os.path.join(TEST_APA_TABLES_EXPECTED_TABLES_FOLDER, str(test_id) + "_" + test_name + ".docx"))
			output_table = Document(os.path.join(TESTS_APA_TABLES_OUTPUT_TABLES_FOLDER, str(test_id) + "_" + test_name + ".docx"))
			if expected_table.element.xml != output_table.element.xml:
				raise Exception("Problem when comparing word xml. Test ID: {}".format(test_id))
		elif global_vars.output_filetype == "Excel":
			expected_table = load_workbook(os.path.join(TEST_APA_TABLES_EXPECTED_TABLES_FOLDER, str(test_id) + "_" + test_name + ".xlsx"), read_only=True)
			expected_table_ws = expected_table.active
			output_table = load_workbook(os.path.join(TESTS_APA_TABLES_OUTPUT_TABLES_FOLDER, str(test_id) + "_" + test_name + ".xlsx"), read_only=True)
			output_table_ws = output_table.active

			# get all cell attributes; then loop through each cell and compare each cell's attributes
			cell_attrs = [attr for attr in dir(expected_table_ws["A1"]) if not attr.startswith("_") and attr != "parent"]
			for row in expected_table_ws.iter_rows(min_row=1, max_row=expected_table_ws.max_row, max_col=expected_table_ws.max_column):
				for expected_table_cell in row:
					if type(expected_table_cell).__name__ != "EmptyCell":
						output_table_cell = output_table_ws[expected_table_cell.coordinate]
						for attr in cell_attrs:
							if getattr(expected_table_cell, attr) != getattr(output_table_cell, attr):
								raise Exception("Problem when comparing excel cells. Test ID {id}. Problematic cell's coordinates: {c}. Problematic attribute: {a}".format(id=test_id, c=expected_table_cell.coordinate, a=attr))