def test_digest():
    pytest.importorskip("Bio")
    fasta_records = bioframe.load_fasta(testdir + "/test_data/test.fa")
    assert len(fasta_records) == 2
    ### no HindIII sites in the test.fa fasta records, so shouldn't change shape[0]
    assert bioframe.digest(fasta_records, "HindIII").shape == (2, 3)
    ### one DpnII site on chrTEST2, shape[0] should increase by one
    assert bioframe.digest(fasta_records, "DpnII").shape == (3, 3)
    ### DpnII site is on chrTEST2 position 3, first interval of chrTEST2 should end at 3
    assert bioframe.digest(fasta_records, "DpnII").iloc[1].end == 3
Esempio n. 2
0
def digest(chromsizes_path, fasta_path, enzyme_name):
    import bioframe

    chromsizes = bioframe.read_chromsizes(chromsizes_path, all_names=True)
    fasta_records = bioframe.load_fasta(fasta_path,
                                        engine="pyfaidx",
                                        as_raw=True)
    if not chromsizes.index.isin(fasta_records).all():
        raise ValueError("Some chromosomes mentioned in {}"
                         " are not found in {}".format(chromsizes_path,
                                                       fasta_path))
    frags = bioframe.digest(fasta_records, enzyme_name)
    print(frags.to_csv(sep="\t", index=False))