Python BedDataSet Examples

Programming Language: Python

Namespace/Package Name: peaksql

Method/Function: BedDataSet

Examples at hotexamples.com: 10

Python BedDataSet - 10 examples found. These are the top rated real world Python examples of peaksql.BedDataSet extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

 def test_306_BedDataSet_array_from_query(self):
     chromstart = 10
     chromend = 20
     query = [(0, 15, 25), (0, 5, 13)]
     dataset = peaksql.BedDataSet(DATABASE_BED, seq_length=10, stride=10)
     assert np.all(
         dataset.array_from_query(query, chromstart, chromend) == np.array([
             [True, True, True, False, False, True, True, True, True, True]
         ]))

Example #2

Show file

    def test_310_BedDataSet_random_pos_distribution(self):
        dataset = peaksql.BedDataSet(DATABASE_BED,
                                     seq_length=10,
                                     nr_rand_pos=100_000)

        # chromosomes are of equal size, so we expect equal nr of positions for each
        un_cumsum = dataset.cumsum - np.roll(dataset.cumsum, shift=1)
        for count in un_cumsum[1:]:
            assert 0.245 <= count / 100_000 <= 0.255

Example #3

Show file

    def test_302_BedDataSet_stride_sequences(self):
        dataset = peaksql.BedDataSet(DATABASE_BED, seq_length=10, stride=10)

        all_dna = ("AAAACCCCGGGGTTTTAAACCCGGGTTTAACCGGTTACGT" +
                   "TTTTGGGGCCCCAAAATTTGGGCCCAAATTGGCCAATGCA" +
                   "ATGCGTAGCTGATCGATGCTAGCTAGCTAGCTAGCTAAAA" +
                   "ATGGTGAATGTGAGTAGTGATGATGAGTGTAGTGAGGGGG")

        dna_strided = [all_dna[i:i + 10] for i in range(0, len(all_dna), 10)]
        dna_onehot = [
            peaksql.util.sequence_to_onehot(dna) for dna in dna_strided
        ]
        for seq, label in dataset:
            assert np.sum(
                np.all(seq == potential_seq) for potential_seq in dna_onehot)

Example #4

Show file

 def test_309_BedDataSet_random_pos_sequences(self):
     dataset = peaksql.BedDataSet(DATABASE_BED,
                                  seq_length=10,
                                  nr_rand_pos=20)
     all_dna = [
         "AAAACCCCGGGGTTTTAAACCCGGGTTTAACCGGTTACGT",
         "TTTTGGGGCCCCAAAATTTGGGCCCAAATTGGCCAATGCA",
         "ATGCGTAGCTGATCGATGCTAGCTAGCTAGCTAGCTAAAA",
         "ATGGTGAATGTGAGTAGTGATGATGAGTGTAGTGAGGGGG",
     ]
     dna_onehot = [peaksql.util.sequence_to_onehot(dna) for dna in all_dna]
     for i, (seq, label) in enumerate(dataset):
         found = False
         for chromosome in range(4):
             for idx in range(0, 30):
                 if np.all(seq == dna_onehot[chromosome][idx:idx + 10]):
                     found = True
         assert found

Example #5

Show file

File: test_04_dataloader_integration.py Project: vanheeringen-lab/peaksql

 def test_402_Integration_PyTorch_DataLoader(self):
     dataset = peaksql.BedDataSet(DATABASE_BED, nr_rand_pos=100, seq_length=3)
     dataloader = DataLoader(dataset, batch_size=10)
     for seq, label in dataloader:
         assert tuple(seq.shape) == (10, 3, 4,)
         assert tuple(label.shape) == (10, 1,)

Example #6

Show file

File: test_04_dataloader_integration.py Project: vanheeringen-lab/peaksql

 def test_401_iterable(self):
     dataset = peaksql.BedDataSet(DATABASE_BED, nr_rand_pos=100, seq_length=3)
     for seq, label in dataset:
         assert seq.shape == (3, 4,)
         assert label.shape == (1,)

Example #7

Show file

 def test_308_BedDataSet_random_pos_length(self):
     dataset = peaksql.BedDataSet(DATABASE_BED,
                                  seq_length=10,
                                  nr_rand_pos=20)
     assert len(dataset) == 20

Example #8

Show file

 def test_305_Bed_label_fraction(self):
     dataset = peaksql.BedDataSet(DATABASE_BED, seq_length=10, stride=10)
     dataset.ratio = 0.4
     assert all(
         dataset.fraction(self.positions) == [False, True, False, True])

Example #9

Show file

 def test_304_Bed_label_all(self):
     dataset = peaksql.BedDataSet(DATABASE_BED, seq_length=10, stride=10)
     assert all(dataset.all(self.positions) == [False, False, False, True])

Example #10

Show file

 def test_301_BedDataSet_stride_length(self):
     dataset = peaksql.BedDataSet(DATABASE_BED, seq_length=10, stride=10)
     assert len(dataset) == 16