Ejemplo n.º 1
0
def test_attach(tmp_path):
    dbfile = str(tmp_path / "test.gsql")
    con = genomicsqlite.connect(dbfile, unsafe_load=True)
    _fill_exons(con, gri=False)
    con.commit()

    attach_script = genomicsqlite.attach_sql(con,
                                             str(tmp_path /
                                                 "test_attached.gsql"),
                                             "db2",
                                             unsafe_load=True)
    con.executescript(attach_script)
    con.executescript("CREATE TABLE db2.exons2 AS SELECT * FROM exons")
    con.executescript(
        genomicsqlite.create_genomic_range_index_sql(con, "db2.exons2", "rid",
                                                     "beg", "end"))
    ref_script = genomicsqlite.put_reference_assembly_sql(
        con, "GRCh38_no_alt_analysis_set", schema="db2")
    con.executescript(ref_script)

    query = (
        "SELECT exons.id, db2.exons2.id FROM exons LEFT JOIN db2.exons2 ON db2.exons2._rowid_ IN\n"
        + genomicsqlite.genomic_range_rowids_sql(
            con, "db2.exons2", "exons.rid", "exons.beg", "exons.end") +
        " AND exons.id != db2.exons2.id ORDER BY exons.id, db2.exons2.id")
    results = list(con.execute(query))
    assert len(results) == 5191

    refseq_by_name = genomicsqlite.get_reference_sequences_by_name(
        con, schema="db2")
    assert len(refseq_by_name) > 24
Ejemplo n.º 2
0
def test_join():
    for len_gri in (False, True):
        con = sqlite3.connect(":memory:")
        con.executescript(
            genomicsqlite.put_reference_assembly_sql(
                con, "GRCh38_no_alt_analysis_set"))
        _fill_exons(con, table="exons")
        _fill_exons(con, floor=2, table="exons2", len_gri=len_gri)
        con.commit()

        query = (
            "SELECT exons.id, exons2.id FROM exons LEFT JOIN exons2 ON exons2._rowid_ IN\n"
            + genomicsqlite.genomic_range_rowids_sql(
                con, "exons2", "exons.rid", "exons.beg", "exons.end") +
            " AND exons.id != exons2.id ORDER BY exons.id, exons2.id")
        print(query)
        indexed = 0
        for expl in con.execute("EXPLAIN QUERY PLAN " + query):
            print(expl[3])
            if ("((_gri_rid,_gri_lvl,_gri_beg)>(?,?,?) AND (_gri_rid,_gri_lvl,_gri_beg)<(?,?,?))"
                    in expl[3]):
                indexed += 1
        assert indexed == 2
        results = list(con.execute(query))
        assert len(results) == 5191
        assert len([result for result in results if result[1] is None]) == 5
        control = "SELECT exons.id, exons2.id FROM exons LEFT JOIN exons2 NOT INDEXED ON NOT (exons2.end < exons.beg OR exons2.beg > exons.end) AND exons.id != exons2.id ORDER BY exons.id, exons2.id"
        control = list(con.execute(control))
        assert results == control
Ejemplo n.º 3
0
def test_refseq():
    con = sqlite3.connect(":memory:")

    create_assembly = genomicsqlite.put_reference_assembly_sql(
        con, "GRCh38_no_alt_analysis_set")
    lines = create_assembly.strip().split("\n")
    print("\n".join([line for line in lines if "INSERT INTO" in line][:24]))
    assert len([line for line in lines if "INSERT INTO" in line]) == 195
    print("\n".join([line for line in lines if "INSERT INTO" not in line]))
    assert len([line for line in lines if "INSERT INTO" not in line]) == 2
    con.executescript(create_assembly)

    _fill_exons(con, floor=2)
    con.commit()

    refseq_by_rid = genomicsqlite.get_reference_sequences_by_rid(con)
    refseq_by_name = genomicsqlite.get_reference_sequences_by_name(con)
    for refseq in refseq_by_rid.values():
        assert refseq_by_rid[refseq.rid] == refseq
        assert refseq_by_name[refseq.name] == refseq
        if refseq.name == "chr17":
            assert refseq.rid == 17
            assert refseq.length == 83257441
            assert refseq.assembly == "GRCh38_no_alt_analysis_set"
            assert refseq.refget_id == "f9a0fb01553adb183568e3eb9d8626db"
    assert len(refseq_by_rid) == 195

    query = (
        "SELECT _gri_refseq._gri_rid, rid, beg, end, id FROM exons, _gri_refseq WHERE exons.rid = gri_refseq_name AND exons._rowid_ IN "
        + genomicsqlite.genomic_range_rowids_sql(con, "exons"))
    print("\n" + query)
    assert len([line for line in query.split("\n") if "BETWEEN" in line]) == 2
    assert len(list(con.execute(query, ("chr17", 43115725, 43125370)))) == 56
Ejemplo n.º 4
0
def test_connect(tmp_path):
    dbfile = str(tmp_path / "test.gsql")
    con = genomicsqlite.connect(dbfile, unsafe_load=True)
    con.executescript(
        genomicsqlite.put_reference_assembly_sql(con,
                                                 "GRCh38_no_alt_analysis_set"))
    _fill_exons(con)
    con.commit()
    del con

    con = genomicsqlite.connect(dbfile, read_only=True)
    query = (
        "WITH exons2 AS (SELECT * from exons) SELECT exons.id, exons2.id FROM exons2 LEFT JOIN exons ON exons._rowid_ IN\n"
        + genomicsqlite.genomic_range_rowids_sql(con, "exons", "exons2.rid",
                                                 "exons2.beg", "exons2.end") +
        " AND exons.id != exons2.id ORDER BY exons.id, exons2.id")
    results = list(con.execute(query))
    assert len(results) == 5191