Ejemplo n.º 1
0
def test_display_genotypes__duplicate_variant_ids():
    ds = simulate_genotype_call_dataset(n_variant=3, n_sample=3, seed=0)
    # set some variant IDs
    ds["variant_id"] = (["variants"], np.array(["V0", "V1", "V1"]))
    ds["variant_id_mask"] = (["variants"], np.array([False, False, False]))
    disp = display_genotypes(ds)
    expected = """\
        samples    S0   S1   S2
        variants               
        0         0/0  1/0  1/0
        1         0/1  1/0  0/1
        2         0/0  1/0  1/1"""  # noqa: W291
    assert str(disp) == dedent(expected)
Ejemplo n.º 2
0
def test_display_genotypes__truncated_rows():
    ds = simulate_genotype_call_dataset(n_variant=10, n_sample=10, seed=0)
    disp = display_genotypes(ds, max_variants=4, max_samples=10)
    expected = """\
        samples    S0   S1   S2   S3   S4   S5   S6   S7   S8   S9
        variants                                                  
        0         0/0  1/0  1/0  0/1  1/0  0/1  0/0  1/0  1/1  0/0
        1         1/0  0/1  1/0  1/1  1/1  1/0  1/0  0/0  1/0  1/1
        ...       ...  ...  ...  ...  ...  ...  ...  ...  ...  ...
        8         0/1  0/0  1/0  0/1  0/1  1/0  1/0  0/1  1/0  1/0
        9         1/1  0/1  1/0  0/1  1/0  1/1  0/1  1/0  1/1  1/0

        [10 rows x 10 columns]"""  # noqa: W291
    assert str(disp) == dedent(expected)
Ejemplo n.º 3
0
def test_display_genotypes():
    ds = simulate_genotype_call_dataset(n_variant=3, n_sample=3, seed=0)
    disp = display_genotypes(ds)
    expected = """\
        samples    S0   S1   S2
        variants               
        0         0/0  1/0  1/0
        1         0/1  1/0  0/1
        2         0/0  1/0  1/1"""  # noqa: W291
    assert str(disp) == dedent(expected)

    expected_html = """<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>samples</th>
      <th>S0</th>
      <th>S1</th>
      <th>S2</th>
    </tr>
    <tr>
      <th>variants</th>
      <th></th>
      <th></th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>0/0</td>
      <td>1/0</td>
      <td>1/0</td>
    </tr>
    <tr>
      <th>1</th>
      <td>0/1</td>
      <td>1/0</td>
      <td>0/1</td>
    </tr>
    <tr>
      <th>2</th>
      <td>0/0</td>
      <td>1/0</td>
      <td>1/1</td>
    </tr>
  </tbody>
</table>""".strip()
    assert expected_html in disp._repr_html_()
Ejemplo n.º 4
0
def test_display_genotypes__large():
    ds = simulate_genotype_call_dataset(n_variant=100_000,
                                        n_sample=1000,
                                        seed=0)
    disp = display_genotypes(ds, max_variants=4, max_samples=4)
    expected = """\
        samples    S0   S1  ... S998 S999
        variants            ...          
        0         0/0  1/0  ...  0/1  1/1
        1         1/1  1/1  ...  0/1  1/1
        ...       ...  ...  ...  ...  ...
        99998     0/1  1/1  ...  1/0  0/1
        99999     1/0  1/0  ...  1/0  1/0

        [100000 rows x 1000 columns]"""  # noqa: W291
    assert str(disp) == dedent(expected)
Ejemplo n.º 5
0
def test_create_genotype_call_dataset():
    variant_contig_names = ["chr1"]
    variant_contig = np.array([0, 0], dtype="i1")
    variant_position = np.array([1000, 2000], dtype="i4")
    variant_allele = np.array([["A", "C"], ["G", "A"]], dtype="S1")
    variant_id = np.array(["rs1", "rs2"], dtype=str)
    sample_id = np.array(["sample_1", "sample_2", "sample_3"], dtype=str)
    call_genotype = np.array(
        [[[0, 0], [0, 1], [1, 0]], [[-1, 0], [0, -1], [-1, -1]]], dtype="i1")
    call_genotype_phased = np.array(
        [[True, True, False], [True, False, False]], dtype=bool)
    ds = create_genotype_call_dataset(
        variant_contig_names=variant_contig_names,
        variant_contig=variant_contig,
        variant_position=variant_position,
        variant_allele=variant_allele,
        sample_id=sample_id,
        call_genotype=call_genotype,
        call_genotype_phased=call_genotype_phased,
        variant_id=variant_id,
    )

    assert DIM_VARIANT in ds.dims
    assert DIM_SAMPLE in ds.dims
    assert DIM_PLOIDY in ds.dims
    assert DIM_ALLELE in ds.dims

    assert ds.attrs["source"] == f"sgkit-{__version__}"
    assert ds.attrs["contigs"] == variant_contig_names
    assert_array_equal(ds["variant_contig"], variant_contig)
    assert_array_equal(ds["variant_position"], variant_position)
    assert_array_equal(ds["variant_allele"], variant_allele)
    assert_array_equal(ds["variant_id"], variant_id)
    assert_array_equal(ds["sample_id"], sample_id)
    assert_array_equal(ds["call_genotype"], call_genotype)
    assert_array_equal(ds["call_genotype_mask"], call_genotype < 0)
    assert_array_equal(ds["call_genotype_phased"], call_genotype_phased)

    disp = display_genotypes(ds)
    assert (str(disp) == """
samples  sample_1 sample_2 sample_3
variants                           
rs1           0|0      0|1      1/0
rs2           .|0      0/.      ./.
""".strip()  # noqa: W291
            )
Ejemplo n.º 6
0
def test_display_genotypes__truncated_columns():
    ds = simulate_genotype_call_dataset(n_variant=10, n_sample=10, seed=0)
    disp = display_genotypes(ds, max_variants=10, max_samples=4)
    expected = """\
        samples    S0   S1  ...   S8   S9
        variants            ...          
        0         0/0  1/0  ...  1/1  0/0
        1         1/0  0/1  ...  1/0  1/1
        2         1/1  1/1  ...  0/0  1/0
        3         0/1  0/0  ...  1/0  0/0
        4         0/1  0/0  ...  0/0  1/1
        5         1/1  1/0  ...  0/0  1/0
        6         1/1  0/0  ...  1/0  0/1
        7         1/0  0/1  ...  0/1  0/0
        8         0/1  0/0  ...  1/0  1/0
        9         1/1  0/1  ...  1/1  1/0

        [10 rows x 10 columns]"""  # noqa: W291
    assert str(disp) == dedent(expected)
Ejemplo n.º 7
0
def test_display_genotypes__truncated_rows_and_columns():
    ds = simulate_genotype_call_dataset(n_variant=10, n_sample=10, seed=0)
    disp = display_genotypes(ds, max_variants=4, max_samples=4)
    expected = """\
        samples    S0   S1  ...   S8   S9
        variants            ...          
        0         0/0  1/0  ...  1/1  0/0
        1         1/0  0/1  ...  1/0  1/1
        ...       ...  ...  ...  ...  ...
        8         0/1  0/0  ...  1/0  1/0
        9         1/1  0/1  ...  1/1  1/0

        [10 rows x 10 columns]"""  # noqa: W291
    assert str(disp) == dedent(expected)

    expected_html = """<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>samples</th>
      <th>S0</th>
      <th>S1</th>
      <th>...</th>
      <th>S8</th>
      <th>S9</th>
    </tr>
    <tr>
      <th>variants</th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>0/0</td>
      <td>1/0</td>
      <td>...</td>
      <td>1/1</td>
      <td>0/0</td>
    </tr>
    <tr>
      <th>1</th>
      <td>1/0</td>
      <td>0/1</td>
      <td>...</td>
      <td>1/0</td>
      <td>1/1</td>
    </tr>
    <tr>
      <th>...</th>
      <td>...</td>
      <td>...</td>
      <td>...</td>
      <td>...</td>
      <td>...</td>
    </tr>
    <tr>
      <th>8</th>
      <td>0/1</td>
      <td>0/0</td>
      <td>...</td>
      <td>1/0</td>
      <td>1/0</td>
    </tr>
    <tr>
      <th>9</th>
      <td>1/1</td>
      <td>0/1</td>
      <td>...</td>
      <td>1/1</td>
      <td>1/0</td>
    </tr>
  </tbody>
</table>
<p>10 rows x 10 columns</p>""".strip()
    assert expected_html in disp._repr_html_()