Python pca 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: sgkit.stats.pca

메소드/함수: pca

hotexamples.com에서의 예제들: 9

Python pca - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 sgkit.stats.pca.pca에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

def test_pca__raise_on_missing_data(sample_dataset, sentinel):
    ac = sample_dataset["call_alternate_allele_count"]
    ac = ac.where(sample_dataset["call_alternate_allele_count"] == 1, sentinel)
    ds = sample_dataset.assign(call_alternate_allele_count=ac)
    with pytest.raises(ValueError,
                       match="Input data cannot contain missing values"):
        pca.pca(ds, n_components=2)

예제 #2

파일 보기

def test_pca__default_allele_counts_with_index(sample_dataset):
    pca.pca(
        sample_dataset.drop_vars("call_alternate_allele_count").set_index(
            {"variants": ("variant_contig", "variant_position")}),
        n_components=2,
        merge=False,
    ).compute()

예제 #3

파일 보기

파일: test_pca.py 프로젝트: timothymillar/sgkit

def test_pca__lazy_evaluation(shape, chunks, algorithm):
    # Ensure that all new variables are backed by lazy arrays
    if algorithm == "tsqr" and all(c > 0 for c in chunks):
        return
    ds = simulate_dataset(*shape, chunks=chunks)  # type: ignore[misc]
    ds = pca.pca(ds, n_components=2, algorithm=algorithm, merge=False)
    for v in ds:
        assert isinstance(ds[v].data, da.Array)

예제 #4

파일 보기

def test_pca__array_backend(backend, algorithm):
    # Ensure that calculation succeeds regardless of array input backend
    ds = simulate_dataset(25, 5)
    ds["call_alternate_allele_count"] = ds["call_alternate_allele_count"].copy(
        data=backend.asarray(ds["call_alternate_allele_count"]))
    ds = pca.pca(ds, n_components=2, algorithm=algorithm, merge=False)
    for v in ds:
        ds[v].compute()

예제 #5

파일 보기

파일: test_pca.py 프로젝트: timothymillar/sgkit

def test_pca__tsqr_allel_comparison(shape, chunks, n_components):
    # Validate chunked, non-random implementation vs scikit-allel single chunk results
    ds = simulate_dataset(*shape, chunks=chunks)  # type: ignore[misc]
    ds_sg = pca.pca(ds, n_components=n_components, algorithm="tsqr")
    ds_sk = allel_pca(
        ds["call_alternate_allele_count"].values.astype("float32"),
        n_components=n_components,
        scaler="patterson",
        randomized=False,
    )
    assert ds_sg["sample_pca_projection"].values.dtype == np.float32
    assert ds_sk["sample_pca_projection"].values.dtype == np.float32
    validate_allel_comparison(ds_sg, ds_sk)

예제 #6

파일 보기

파일: test_pca.py 프로젝트: timothymillar/sgkit

def test_pca__stability(stability_test_result, chunks, algorithm):
    # Ensure that results are stable across algorithms and that sign flips
    # do not occur when chunking changes
    if algorithm == "tsqr" and all(c > 0 for c in chunks):
        return
    shape, expected = stability_test_result
    ds = simulate_dataset(*shape, chunks=chunks, n_cohort=3)  # type: ignore[misc]
    actual = pca.pca(
        ds, n_components=2, algorithm=algorithm, n_iter=6, random_state=0, merge=False
    )
    # Results are expected to change slightly with chunking, but they
    # will change drastically (far more than 1e-5) if a sign flip occurs
    xr.testing.assert_allclose(expected, actual, atol=1e-5)

예제 #7

파일 보기

파일: test_pca.py 프로젝트: timothymillar/sgkit

def test_pca__randomized_allel_comparison(shape, chunks, n_components):
    # Validate chunked, randomized implementation vs scikit-allel single chunk results --
    # randomized validation requires more data, more structure, and fewer components in
    # order for results to be equal within the same tolerance as deterministic svd.
    ds = simulate_dataset(*shape, chunks=chunks, n_cohort=3)  # type: ignore[misc]
    ds_sg = pca.pca(
        ds, n_components=n_components, algorithm="randomized", n_iter=5, random_state=0
    )
    ds_sk = allel_pca(
        ds["call_alternate_allele_count"].values.astype("float32"),
        n_components=n_components,
        scaler="patterson",
        randomized=True,
        iterated_power=5,
        random_state=0,
    )
    assert ds_sg["sample_pca_projection"].values.dtype == np.float32
    assert ds_sk["sample_pca_projection"].values.dtype == np.float32
    validate_allel_comparison(ds_sg, ds_sk)

예제 #8

파일 보기

파일: test_pca.py 프로젝트: timothymillar/sgkit

def stability_test_result(request):
    shape = request.param
    ds = simulate_dataset(*shape, chunks=(-1, -1), n_cohort=3)  # type: ignore[misc]
    res = pca.pca(ds, n_components=2, algorithm="tsqr", merge=False)
    return shape, res

예제 #9

파일 보기

파일: test_pca.py 프로젝트: timothymillar/sgkit

def test_pca__default_allele_counts(sample_dataset):
    pca.pca(
        sample_dataset.drop_vars("call_alternate_allele_count"),
        n_components=2,
        merge=False,
    ).compute()