def conn():
    # Do not use ":memory:" ! SqlThread open the file
    tempdb = tempfile.mkstemp(suffix=".db")[1]
    conn = sql.get_sql_connection(tempdb)
    importer.import_reader(
        conn, VcfReader(open("examples/test.snpeff.vcf"), "snpeff"))
    return conn
def test_import_pedfile():
    """Test import of samples from .tfam PED file"""
    reader = VcfReader(open("examples/test.snpeff.vcf"), "snpeff")
    conn = sqlite3.connect(":memory:")
    import_reader(conn, reader)
    import_pedfile(conn, "examples/test.snpeff.pedigree.tfam")

    samples = [dict(row) for row in conn.execute("SELECT * FROM samples")]
    print("Found samples:", samples)

    expected_first_sample = {
        "id": 1,
        "name": "NORMAL",
        "family_id": "fam",
        "father_id": 2,
        "mother_id": 1,
        "sex": 2,
        "phenotype": 1,
    }
    expected_second_sample = {
        "id": 2,
        "name": "TUMOR",
        "family_id": "fam",
        "father_id": 0,
        "mother_id": 0,
        "sex": 1,
        "phenotype": 2,
    }

    # Third sample is not conform
    assert len(samples) == 2

    assert expected_first_sample in samples
    assert expected_second_sample in samples
Beispiel #3
0
def conn():

    #  Required a real file to make it work !
    tempdb = tempfile.mkstemp(suffix=".db")[1]
    conn = sql.get_sql_connection(tempdb)
    importer.import_reader(
        conn, VcfReader(open("examples/test.snpeff.vcf"), "snpeff"))
    return conn
Beispiel #4
0
def create_reader(filepath):
    """Context manager that wraps the given file and return an accurate reader

    A detection of the file type is made as well as a detection of the
    annotations format if required.

    Filetypes and annotations parsers supported:

        - vcf.gz: snpeff, vep
        - vcf: snpeff, vep
        - csv, tsv, txt: vep
    """
    path = pathlib.Path(filepath)

    LOGGER.debug(
        "create_reader: PATH suffix %s, is_gz_file: %s",
        path.suffixes,
        cm.is_gz_file(filepath),
    )

    if ".vcf" in path.suffixes and ".gz" in path.suffixes:
        annotation_detected = detect_vcf_annotation(filepath)
        device = open(filepath, "rb")
        reader = VcfReader(device, annotation_parser=annotation_detected)
        yield reader
        device.close()
        return

    if ".vcf" in path.suffixes:
        annotation_detected = detect_vcf_annotation(filepath)
        device = open(filepath, "r")
        reader = VcfReader(device, annotation_parser=annotation_detected)
        yield reader
        device.close()
        return

    if {".tsv", ".csv", ".txt"} & set(path.suffixes):
        device = open(filepath, "r")
        reader = CsvReader(device)
        yield reader
        device.close()
        return

    raise Exception("create_reader:: Could not choose parser for this file.")
def test_import_and_create_counting():
    reader = VcfReader(open("examples/test.snpeff.vcf"), "snpeff")
    pedfile = "examples/test.snpeff.pedigree.tfam"

    conn = sqlite3.connect(":memory:")

    for i, msg in async_import_reader(conn, reader, pedfile):
        print(msg)

    samples = list(sql.get_samples(conn))

    assert samples[0]["phenotype"] == 1
    assert samples[1]["phenotype"] == 2

    for record in conn.execute(
            """SELECT count_hom, count_het, count_ref, control_count_hom,control_count_het, control_count_ref,
        case_count_hom,case_count_het, case_count_ref  FROM variants"""):
        print(dict(record))
        assert record["control_count_ref"] == 1
        assert record["case_count_het"] == 1
        assert record["count_hom"] == 0
        assert record["count_het"] == 1
Beispiel #6
0
def conn():
    conn = sql.get_sql_connection(":memory:")
    import_reader(conn, VcfReader(open("examples/test.snpeff.vcf"), "snpeff"))
    return conn
Beispiel #7
0
import pytest
import sqlite3
from collections import OrderedDict

# Custom imports

from cutevariant.core.reader.abstractreader import nullify

from cutevariant.core.reader import VcfReader, FakeReader
from cutevariant.core.reader import BedReader
from cutevariant.core.reader import check_variant_schema, check_field_schema
from cutevariant.core import sql

READERS = [
    FakeReader(),
    VcfReader(open("examples/test.vcf")),
    VcfReader(open("examples/test.vep.vcf"), "vep"),
    VcfReader(open("examples/test.snpeff.vcf"), "snpeff"),
]


@pytest.mark.parametrize("reader",
                         READERS,
                         ids=[str(i.__class__.__name__) for i in READERS])
def test_fields(reader):
    fields = tuple(reader.get_fields())
    field_names = [f["name"] for f in fields]

    # test mandatory fields name
    assert "chr" in field_names
    assert "pos" in field_names