def test__missing_ld_pair_col__throws_error(self, valid_basic_pargs, temp_test_dir): n = copy.copy(valid_basic_pargs) # Remove column from LD scores file ld_df = pd.read_csv(n.ld_scores[0], sep=None, engine='python', nrows=1, comment="#") ld_cols = ld_df.columns ld_pair_cols = [col for col in ld_cols if "_" in col] dropped_col = ld_pair_cols[0] dropped_anc1, dropped_anc2 = dropped_col.split("_") ld_df.drop([dropped_col], axis=1, inplace=True) bad_ldscores_file = os.path.join(temp_test_dir, 'missing_pair_col_ldscores.txt') ld_df.to_csv(bad_ldscores_file, sep="\t", index=False) n.ld_scores = [bad_ldscores_file] with pytest.raises(RuntimeError) as ex_info: mama.validate_inputs(n, dict()) assert dropped_anc1 in str(ex_info.value) assert dropped_anc2 in str(ex_info.value)
def test__invalid_frequency_filter_range__throws_error( self, valid_basic_pargs): n = copy.copy(valid_basic_pargs) # Set min frequency > max frequency n.freq_bounds = [1.0, 0.0] with pytest.raises(RuntimeError) as ex_info: mama.validate_inputs(n, dict()) assert str(n.freq_bounds[0]) in str(ex_info.value) assert str(n.freq_bounds[1]) in str(ex_info.value)
def test__freq_filter_flag__expected_results(self, min_f, max_f, valid_basic_pargs): n = copy.copy(valid_basic_pargs) # Set the frequency filter flag setattr(n, "freq_bounds", [min_f, max_f]) freq_data = [ min_f - 0.00001, min_f, min_f + 0.00001, 0.5 * (min_f + max_f), max_f - 0.00001, max_f, max_f + 0.00001 ] df = pd.DataFrame({ss.FREQ_COL: freq_data}) result = mama.validate_inputs(n, dict()) assert mama.FREQ_FILTER in result[mama.FILTER_MAP] freq_filter_func, freq_filter_desc = result[mama.FILTER_MAP][ mama.FREQ_FILTER] filt_results = freq_filter_func(df) assert filt_results.sum() == 2 assert filt_results[0] == True assert filt_results[len(filt_results) - 1] == True assert str(min_f) in freq_filter_desc assert str(max_f) in freq_filter_desc
def test__specify_std_units__expected_results(self, valid_basic_pargs): n = copy.copy(valid_basic_pargs) # Set harmonized output flag n.use_standardized_units = True result = mama.validate_inputs(n, dict()) assert result['use_standardized_units']
def test__filter_removal_flags__expected_results(self, farg, filter_name, valid_basic_pargs): n = copy.copy(valid_basic_pargs) # Remove the indicated filter setattr(n, farg, True) result = mama.validate_inputs(n, dict()) assert filter_name not in result[mama.FILTER_MAP]
def test__specify_reg_out__expected_results(self, valid_basic_pargs): n = copy.copy(valid_basic_pargs) # Set harmonized output flag n.out_reg_coef = True result = mama.validate_inputs(n, dict()) assert result[mama.REG_FILENAME_FSTR] assert n.out in result[mama.REG_FILENAME_FSTR] assert mama.LD_COEF_SUFFIX in result[mama.REG_FILENAME_FSTR]
def test__specify_harm_out__expected_results(self, valid_basic_pargs): n = copy.copy(valid_basic_pargs) # Set harmonized output flag n.out_harmonized = True result = mama.validate_inputs(n, dict()) assert result[mama.HARM_FILENAME_FSTR] assert n.out in result[mama.HARM_FILENAME_FSTR] assert mama.HARMONIZED_SUFFIX in result[mama.HARM_FILENAME_FSTR]
def test__reg_coef_file_opts__expected_results(self, valid_basic_pargs): n = copy.copy(valid_basic_pargs) arr = np.array([1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0]) filename = os.path.abspath( os.path.join(data_directory, "coef_mat/sym_mat_1.coef")) n.reg_ld_coef = mama.input_np_matrix(filename) result = mama.validate_inputs(n, dict()) assert isinstance(result[mama.REG_LD_COEF_OPT], np.ndarray) assert result[mama.REG_LD_COEF_OPT].shape == (9, 9) assert np.allclose(result[mama.REG_LD_COEF_OPT], np.outer(arr, arr)) assert result[mama.REG_SE2_COEF_OPT] == mama.MAMA_REG_OPT_OFFDIAG_ZERO assert result[mama.REG_INT_COEF_OPT] == mama.MAMA_REG_OPT_ALL_FREE
def test__reg_coef_nonfile_opts__expected_results(self, ld_se2_int_tuples, valid_basic_pargs): n = copy.copy(valid_basic_pargs) ld_scale_factor = np.random.rand() # Set flag values for t in ld_se2_int_tuples: setattr(n, t[0], ld_scale_factor if t[0] == "reg_ld_set_corr" else True) result = mama.validate_inputs(n, dict()) assert result[mama.REG_LD_COEF_OPT] == ld_se2_int_tuples[0][1] assert result[mama.REG_SE2_COEF_OPT] == ld_se2_int_tuples[1][1] assert result[mama.REG_INT_COEF_OPT] == ld_se2_int_tuples[2][1] if ld_se2_int_tuples[0][1] == mama.MAMA_REG_OPT_SET_CORR: assert result[mama.REG_LD_COEF_SCALE_COEF] == ld_scale_factor
def test__add_and_replace_re__expected_results(self, valid_basic_pargs): n = copy.copy(valid_basic_pargs) # Set some regular expressions (at least one add and at least one replace) add_col = ss.CHR_COL add_re = 'XYZ' replace_col = ss.A1_COL replace_re = '.*A1.*' setattr(n, mama.to_arg(mama.MAMA_RE_ADD_FLAGS[add_col]), add_re) setattr(n, mama.to_arg(mama.MAMA_RE_REPLACE_FLAGS[replace_col]), replace_re) result = mama.validate_inputs(n, dict()) assert result[mama.RE_MAP][add_col] == ( mama.MAMA_RE_EXPR_MAP[add_col] + "|" + add_re) assert result[mama.RE_MAP][replace_col] == replace_re
def test__happy_path__expected_results(self, valid_basic_pargs, temp_test_dir): result = mama.validate_inputs(valid_basic_pargs, dict()) num_ancestries = len(result[mama.ANCESTRIES]) assert result[mama.OUT_DIR] == temp_test_dir assert result[mama.OUT_PREFIX] == 'test_prefix' assert num_ancestries > 0 assert result[mama.RE_MAP] == mama.MAMA_RE_EXPR_MAP assert result[mama.FILTER_MAP] == mama.MAMA_STD_FILTERS assert len(result[mama.SUMSTATS_MAP]) == len( valid_basic_pargs.sumstats) assert not result[mama.HARM_FILENAME_FSTR] assert not result[mama.REG_FILENAME_FSTR] assert result[mama.REG_LD_COEF_OPT] == mama.MAMA_REG_OPT_ALL_FREE assert result[mama.REG_SE2_COEF_OPT] == mama.MAMA_REG_OPT_OFFDIAG_ZERO assert result[mama.REG_INT_COEF_OPT] == mama.MAMA_REG_OPT_ALL_FREE assert not result['use_standardized_units'] for attr in vars(valid_basic_pargs): assert getattr(valid_basic_pargs, attr) == result[attr]