def test_digest(): pytest.importorskip("Bio") fasta_records = bioframe.load_fasta(testdir + "/test_data/test.fa") assert len(fasta_records) == 2 ### no HindIII sites in the test.fa fasta records, so shouldn't change shape[0] assert bioframe.digest(fasta_records, "HindIII").shape == (2, 3) ### one DpnII site on chrTEST2, shape[0] should increase by one assert bioframe.digest(fasta_records, "DpnII").shape == (3, 3) ### DpnII site is on chrTEST2 position 3, first interval of chrTEST2 should end at 3 assert bioframe.digest(fasta_records, "DpnII").iloc[1].end == 3
def digest(chromsizes_path, fasta_path, enzyme_name): import bioframe chromsizes = bioframe.read_chromsizes(chromsizes_path, all_names=True) fasta_records = bioframe.load_fasta(fasta_path, engine="pyfaidx", as_raw=True) if not chromsizes.index.isin(fasta_records).all(): raise ValueError("Some chromosomes mentioned in {}" " are not found in {}".format(chromsizes_path, fasta_path)) frags = bioframe.digest(fasta_records, enzyme_name) print(frags.to_csv(sep="\t", index=False))