Esempio n. 1
0
    def test_normal_sequence(self):
        seq = random_seq(1000)
        stats = DNAStats(seq, 14, 20, 20)
        print(seq)
        print(stats())
        print(stats.cost())
        assert stats.cost() < 15

        print(
            DNAStats(
                "CGAGACCACTCGGGACTTCCGGCCATAGCGTACCGTTTTGTGACAAA"
                "ACCCCCACTCGAACGTGAGAAAACCCTTCTCTTCATGTAATTCCCCCACA"
                "GTCCGCGGGTCGGTCAAACCTGGATAAGGTAAAGACTAATATCTAAACCT"
                "GCTGGAGAGTCGAACCGCGGTCTTAGGCCCACGCAGAGTGTATGTTA"
                "TTCGTCTGCCGCTATATCGGTCAACACTAGTTGACGGATAGGAATGTTGG"
                "ATTAACGCGTCTCCAACGCTGGGATACCCTCGCAAAATTTTCCCGAT"
                "ACTATCCGGAATCTCTAACGCCGTTGGTTTGGGCTCCCAACCACCCGTG"
                "AACTTCTAACACGAGAATCACCGCTGGAGCGCGCGCCTTCTCTCAATT"
                "TACCTGAGCTTTCGCTTCCTACTTAGCAGAATCGTGAACCTAAATTTTA"
                "GCAGCTTCAAGTCAGTTACGCTCGACACTTCCGATTCCAGGTAAAATA"
                "ACCACTTCTAAGGTTCGTGACTGGTTCTCTATTCAACGCACGCGGTGCCC"
                "TCGCGGGTCCTCTGCTGCCGGGAAGCACATGATTGCCAGCTTGTTAA"
                "ACAACACAAGGTGGCCAATCTCAAACTCGCATAAGCCCTGTTTTTTCTTG"
                "CAAGCTGCAACCGAGCATTCCTTCAGTCAGTGGTGGTTTTTCAAAAC"
                "TATTCCTATGGGTGCTGACACGTGTGTAATTGTTTTCTACTATCTCTCG"
                "GTTTATAGCGTAGTTGCCGAGGCTATTGAGTCTCCTTTGCTAATAGCT"
                "AAGGTGGAAATTTTTTTTTTTTTGAACCGGGTGAATATACTTGATACAT"
                "CAATAGCCCCTAGCGTATTGTACCCGTCACGGGCTCAAATACTCTGCC"
                "CAGGGCGATACCATGGAAGTTCTCGTAACATACAATGGATCTGGGCCGT"
                "CATCGCTTGATGCTCTAGAAGAAAAAGCAGAGACCGGCCATTACCGCG"
                "TCAACTAACACGCCTCAGGCCGGGGTTAACACTAGGTGTGT",
                14,
                20,
                20,
            )())
Esempio n. 2
0
def test_copy():
    seq = random_seq(1000)
    stats = DNAStats(seq, 14, 20, 20)
    stats2 = stats.copy(slice(None, None))
    assert stats is not stats2
    assert stats.cost(1, 1000) == stats.cost(1, 1000)
    print(stats)
    print(stats2)
Esempio n. 3
0
def test_slice(index):
    seq = random_seq(1000)
    stats = DNAStats(seq, 14, 20, 20)
    stats2 = stats[:index]
    if index is None:
        assert len(stats2) == len(stats)
    else:
        assert len(stats2) == index
    print(stats.cost())
    print(stats2.cost())
Esempio n. 4
0
def find_best_partitions(
    stats: DNAStats,
    threshold: int,
    i=None,
    j=None,
    step_size: int = 100,
    delta: Optional[int] = None,
    partitions=None,
):
    if partitions is None:
        partitions = []
    c = stats.cost(i, j)
    if c < threshold:
        return partitions
    p, pmin = find_fast_opt_partition(stats,
                                      i=i,
                                      j=j,
                                      step_size=step_size,
                                      delta=delta)
    if p is None or p in partitions:
        return partitions
    else:
        partitions.append(p)
    c1 = stats.cost(i, p)
    c2 = stats.cost(p, j)
    if c1 > threshold:
        find_best_partitions(stats,
                             threshold=threshold,
                             i=i,
                             j=p,
                             partitions=partitions)
    if c2 > threshold:
        find_best_partitions(stats,
                             threshold=threshold,
                             i=p,
                             j=j,
                             partitions=partitions)

    return partitions
Esempio n. 5
0
    def test_hairpins(self, length__kmer):
        hairpin_length, kmer = length__kmer
        hairpin = random_seq(hairpin_length)

        # make a sequence with a hairpin
        seq = (random_seq(1000) + hairpin + random_seq(500) +
               revcomp(hairpin) + random_seq(1000))

        # look for hairpins of size kmer
        stats = DNAStats(seq, 20, 20, kmer)

        assert stats()["n_repeats"] == 0
        assert stats()["n_hairpins"] > 0

        print(stats.cost(1000, 1500))
        print(stats.cost(None, None))
Esempio n. 6
0
def cached_stats_cost(stats: DNAStats, i: int, j: int):
    return stats.cost(i, j)
Esempio n. 7
0
 def test_case_insensitive(self):
     seq = random_seq(1000)
     stats1 = DNAStats(seq.lower(), 20, 20, 20)
     stats2 = DNAStats(seq.upper(), 20, 20, 20)
     print(stats1.cost())
     assert stats1.cost() == stats2.cost()