def genotypearray_df(): DATA_DIR = Path(__file__).parent.parent / "data" / "plink" input = DATA_DIR / "plink_test_small" df = io.from_plink(input, max_variants=20, swap_alleles=True) df["num"] = [1.0 for n in range(len(df))] df["bool"] = [True if n % 3 == 0 else False for n in range(len(df))] return df
def test_round_trip_sim(tmp_path): """Simulate data, save it, and load it again""" d = tmp_path / "test" d.mkdir() output = str(d / "test") data = sim.BAMS().generate_case_control() original = data.copy() io.to_plink( data, output, phenotype_name="Outcome", phenotype_case="Case", phenotype_control="Control", ) # Load data and reset index to extract phenotype and get original data format back loaded_data = ( io.from_plink(output, categorical_phenotype=True) .reset_index(level=-1) .reset_index(drop=True) ) loaded_data.columns = data.columns # Correct column names assert_frame_equal( data, loaded_data, check_categorical=False ) # Categorical order may be different # Ensure there were no side effects assert_array_equal( original["SNP1"].array.allele_idxs, data["SNP1"].array.allele_idxs )
def test_round_trip_real(tmp_path): """Load real data, save it, and load it again""" d = tmp_path / "test" d.mkdir() output = str(d / "test") # Load data input = DATA_DIR / "plink_test_small" loaded = io.from_plink(str(input), categorical_phenotype=True, max_variants=100) # Save data io.to_plink(loaded, output) # Reload data reloaded = io.from_plink(str(output), categorical_phenotype=True) # Compare assert_frame_equal( loaded.reset_index(), reloaded.reset_index(), check_categorical=False )
def plink_small_20_swap(): input = DATA_DIR / "plink" / "plink_test_small" result = io.from_plink(input, max_variants=20, swap_alleles=True) return result
def plink_small_20(): input = DATA_DIR / "plink" / "plink_test_small" result = io.from_plink(input, max_variants=20) return result
def test_loaded_medium(): """Validate the medium dataset""" input = DATA_DIR / "plink_test_medium" result = io.from_plink(input) assert result.shape == (600, 45100)
def test_small(): """Validate the small dataset""" input = DATA_DIR / "plink_test_small" result = io.from_plink(input, categorical_phenotype=True) assert result.shape == (150, 3020)