Exemple #1
0
def genotypearray_df():
    DATA_DIR = Path(__file__).parent.parent / "data" / "plink"
    input = DATA_DIR / "plink_test_small"
    df = io.from_plink(input, max_variants=20, swap_alleles=True)
    df["num"] = [1.0 for n in range(len(df))]
    df["bool"] = [True if n % 3 == 0 else False for n in range(len(df))]
    return df
def test_round_trip_sim(tmp_path):
    """Simulate data, save it, and load it again"""
    d = tmp_path / "test"
    d.mkdir()
    output = str(d / "test")
    data = sim.BAMS().generate_case_control()
    original = data.copy()
    io.to_plink(
        data,
        output,
        phenotype_name="Outcome",
        phenotype_case="Case",
        phenotype_control="Control",
    )
    # Load data and reset index to extract phenotype and get original data format back
    loaded_data = (
        io.from_plink(output, categorical_phenotype=True)
        .reset_index(level=-1)
        .reset_index(drop=True)
    )
    loaded_data.columns = data.columns  # Correct column names
    assert_frame_equal(
        data, loaded_data, check_categorical=False
    )  # Categorical order may be different

    # Ensure there were no side effects
    assert_array_equal(
        original["SNP1"].array.allele_idxs, data["SNP1"].array.allele_idxs
    )
def test_round_trip_real(tmp_path):
    """Load real data, save it, and load it again"""
    d = tmp_path / "test"
    d.mkdir()
    output = str(d / "test")
    # Load data
    input = DATA_DIR / "plink_test_small"
    loaded = io.from_plink(str(input), categorical_phenotype=True, max_variants=100)
    # Save data
    io.to_plink(loaded, output)
    # Reload data
    reloaded = io.from_plink(str(output), categorical_phenotype=True)
    # Compare
    assert_frame_equal(
        loaded.reset_index(), reloaded.reset_index(), check_categorical=False
    )
Exemple #4
0
def plink_small_20_swap():
    input = DATA_DIR / "plink" / "plink_test_small"
    result = io.from_plink(input, max_variants=20, swap_alleles=True)
    return result
Exemple #5
0
def plink_small_20():
    input = DATA_DIR / "plink" / "plink_test_small"
    result = io.from_plink(input, max_variants=20)
    return result
def test_loaded_medium():
    """Validate the medium dataset"""
    input = DATA_DIR / "plink_test_medium"
    result = io.from_plink(input)
    assert result.shape == (600, 45100)
def test_small():
    """Validate the small dataset"""
    input = DATA_DIR / "plink_test_small"
    result = io.from_plink(input, categorical_phenotype=True)
    assert result.shape == (150, 3020)