Exemple #1
0
def main():
    # question 1: What is the length of the shortest common superstring of the following strings?
    shortest_string, num_strings = utils.scs(['CCT', 'CTT', 'TGC', 'TGG', 'GAT', 'ATT'])
    print len(shortest_string)
    # question 2: How many different shortest common superstrings are there for the input strings given in the previous question?
    print num_strings

    # question 3 and 4:

    unknown_virus_seq, _ = utils.readFastq('ads1_week4_reads.fq')
    for k in range(100, 1, -1):
        genome = utils.greedy_scs(unknown_virus_seq, k)
        if len(genome) == 15894:
            # q3: How many As are there in the full, assembled genome?
            print(genome.count('A'))
            # q4: How many As are there in the full, assembled genome?
            print(genome.count('T'))
            # g5: final genome that we can search for in the BLAST database
            print(genome)
            break
Exemple #2
0
def main():
    chr1 = utils.readGenome('chr1.GRCh38.excerpt.fasta')
    #Question 1
    start = time.clock()
    print edit_distance_alt('GCTGATCGATCGTACG', chr1)
    end = time.clock()
    print ">> %.2gs" % (end - start)
    #Question 2
    start = time.clock()
    print edit_distance_alt('GATTTACCAGATTGAG', chr1)
    end = time.clock()
    print ">> %.2gs" % (end - start)
    #Questions 3 and 4
    start = time.clock()
    seqs, _ = utils.readFastq('ERR266411_1.for_asm.fastq')
    edges, suffixes = overlap_graph(seqs, 30)
    print edges
    print suffixes
    end = time.clock()
    print ">> %.2gs" % (end - start)
Exemple #3
0
def main():
    chr1 = utils.readGenome("chr1.GRCh38.excerpt.fasta")
    # Question 1
    start = time.clock()
    print edit_distance_alt("GCTGATCGATCGTACG", chr1)
    end = time.clock()
    print ">> %.2gs" % (end - start)
    # Question 2
    start = time.clock()
    print edit_distance_alt("GATTTACCAGATTGAG", chr1)
    end = time.clock()
    print ">> %.2gs" % (end - start)
    # Questions 3 and 4
    start = time.clock()
    seqs, _ = utils.readFastq("ERR266411_1.for_asm.fastq")
    edges, suffixes = overlap_graph(seqs, 30)
    print edges
    print suffixes
    end = time.clock()
    print ">> %.2gs" % (end - start)
Exemple #4
0
def main():
    # question 1: What is the length of the shortest common superstring of the following strings?
    shortest_string, num_strings = utils.scs(
        ['CCT', 'CTT', 'TGC', 'TGG', 'GAT', 'ATT'])
    print len(shortest_string)
    # question 2: How many different shortest common superstrings are there for the input strings given in the previous question?
    print num_strings

    # question 3 and 4:

    unknown_virus_seq, _ = utils.readFastq('ads1_week4_reads.fq')
    for k in range(100, 1, -1):
        genome = utils.greedy_scs(unknown_virus_seq, k)
        if len(genome) == 15894:
            # q3: How many As are there in the full, assembled genome?
            print(genome.count('A'))
            # q4: How many As are there in the full, assembled genome?
            print(genome.count('T'))
            # g5: final genome that we can search for in the BLAST database
            print(genome)
            break
from utils import readFastq
from w4 import scs

seq, _ = readFastq('ads1_week4_reads.fq')

combined = scs(seq)
print(len(combined))
print(combined.count('A'))
print(combined.count('T'))