Ejemplo n.º 1
0
 def test_single_contig_1(self):
     contigs_1 = ['ATCGCTGATT']
     reads_1 = {\
     'ATCG':[[0,0,0.5]],'TCGC':[[0,1,0.5]],'CCCT':[[0,2,0.5]],'GCTG':[[0,3,0.5]],\
     'TTCA':[[0,4,0.5]],'TGAT':[[0,5,0.5]],'GATT':[[0,6,0.5]]}
     freq = cs.consensus_sequence(reads_1)
     self.assertEqual(contigs_1, cs.compute_new_contigs(freq))
Ejemplo n.º 2
0
 def test_single_contig_2(self):
     contigs_2 = ['ATCGCTGATT', 'ATCCCTCATT', 'ATCGCTCATT', 'ATCCCTGATT']
     reads_2 = {\
     'ATCC':[[0,0,0.5]],'TCGC':[[0,1,0.5]],'CCCT':[[0,2,0.5]],'GCTC':[[0,3,0.5]],\
     'TTCA':[[0,4,0.5]],'TGAT':[[0,5,0.5]],'GATT':[[0,6,0.5]]}
     freq = cs.consensus_sequence(reads_2)
     self.assertTrue(cs.compute_new_contigs(freq)[0] in contigs_2)
Ejemplo n.º 3
0
 def test_multiple_contigs_1(self):
     contigs_3 = ['ATCGCTGATT', 'TTTACGATGC']
     reads_3 = {\
     'ATCG':[[0,0,0.5]],'TCGC':[[0,1,0.5]],'CCCT':[[0,2,0.5]],'GCTG':[[0,3,0.5]],\
     'TTCA':[[0,4,0.5]],'TGAT':[[0,5,0.5]],'GATT':[[0,6,0.5]],\
     'TTAA':[[1,0,0.5]],'TTAC':[[1,1,0.5]],'TGCG':[[1,2,0.5]],'ACGA':[[1,3,0.5]],\
     'AGAT':[[1,4,0.5]],'GTTG':[[1,5,0.5]],'AAGC':[[1,6,0.5]]}
     freq = cs.consensus_sequence(reads_3)
     self.assertEqual(contigs_3, cs.compute_new_contigs(freq))
Ejemplo n.º 4
0
 def test_multiple_contigs_2(self):
     contigs_4 = ['ATCGCTGATT','ATCCCTCATT','ATCGCTCATT','ATCCCTGATT',\
         'TTTACGATGC','TTTAAGATGC']
     reads_4 = {\
     'ATCG':[[0,0,0.5]],'TCGC':[[0,1,0.5]],'CCCT':[[0,2,0.5]],'GCTG':[[0,3,0.5]],\
     'TTCA':[[0,4,0.5]],'TGAT':[[0,5,0.5]],'GATT':[[0,6,0.5]],\
     'TTAA':[[1,0,0.5],[1,1,0.5]],'TGCG':[[1,2,0.5]],'CCGA':[[1,3,0.5]],\
     'AGAT':[[1,4,0.5]],'GTTG':[[1,5,0.5]],'AAGC':[[1,6,0.5]]}
     freq = cs.consensus_sequence(reads_4)
     self.assertTrue(cs.compute_new_contigs(freq)[0] in contigs_4\
         and cs.compute_new_contigs(freq)[1] in contigs_4)
 def test_single_contig_2(self):
     contigs_2 = ["ATCGCTGATT", "ATCCCTCATT", "ATCGCTCATT", "ATCCCTGATT"]
     reads_2 = {
         "ATCC": [[0, 0, 0.5]],
         "TCGC": [[0, 1, 0.5]],
         "CCCT": [[0, 2, 0.5]],
         "GCTC": [[0, 3, 0.5]],
         "TTCA": [[0, 4, 0.5]],
         "TGAT": [[0, 5, 0.5]],
         "GATT": [[0, 6, 0.5]],
     }
     freq = cs.consensus_sequence(reads_2)
     self.assertTrue(cs.compute_new_contigs(freq)[0] in contigs_2)
 def test_single_contig_1(self):
     contigs_1 = ["ATCGCTGATT"]
     reads_1 = {
         "ATCG": [[0, 0, 0.5]],
         "TCGC": [[0, 1, 0.5]],
         "CCCT": [[0, 2, 0.5]],
         "GCTG": [[0, 3, 0.5]],
         "TTCA": [[0, 4, 0.5]],
         "TGAT": [[0, 5, 0.5]],
         "GATT": [[0, 6, 0.5]],
     }
     freq = cs.consensus_sequence(reads_1)
     self.assertEqual(contigs_1, cs.compute_new_contigs(freq))
 def test_multiple_contigs_2(self):
     contigs_4 = ["ATCGCTGATT", "ATCCCTCATT", "ATCGCTCATT", "ATCCCTGATT", "TTTACGATGC", "TTTAAGATGC"]
     reads_4 = {
         "ATCG": [[0, 0, 0.5]],
         "TCGC": [[0, 1, 0.5]],
         "CCCT": [[0, 2, 0.5]],
         "GCTG": [[0, 3, 0.5]],
         "TTCA": [[0, 4, 0.5]],
         "TGAT": [[0, 5, 0.5]],
         "GATT": [[0, 6, 0.5]],
         "TTAA": [[1, 0, 0.5], [1, 1, 0.5]],
         "TGCG": [[1, 2, 0.5]],
         "CCGA": [[1, 3, 0.5]],
         "AGAT": [[1, 4, 0.5]],
         "GTTG": [[1, 5, 0.5]],
         "AAGC": [[1, 6, 0.5]],
     }
     freq = cs.consensus_sequence(reads_4)
     self.assertTrue(cs.compute_new_contigs(freq)[0] in contigs_4 and cs.compute_new_contigs(freq)[1] in contigs_4)
 def test_multiple_contigs_1(self):
     contigs_3 = ["ATCGCTGATT", "TTTACGATGC"]
     reads_3 = {
         "ATCG": [[0, 0, 0.5]],
         "TCGC": [[0, 1, 0.5]],
         "CCCT": [[0, 2, 0.5]],
         "GCTG": [[0, 3, 0.5]],
         "TTCA": [[0, 4, 0.5]],
         "TGAT": [[0, 5, 0.5]],
         "GATT": [[0, 6, 0.5]],
         "TTAA": [[1, 0, 0.5]],
         "TTAC": [[1, 1, 0.5]],
         "TGCG": [[1, 2, 0.5]],
         "ACGA": [[1, 3, 0.5]],
         "AGAT": [[1, 4, 0.5]],
         "GTTG": [[1, 5, 0.5]],
         "AAGC": [[1, 6, 0.5]],
     }
     freq = cs.consensus_sequence(reads_3)
     self.assertEqual(contigs_3, cs.compute_new_contigs(freq))
Ejemplo n.º 9
0
def _main():

    # Open files and process reads dictionary
    f = open(sys.argv[1], 'r')
    reads_dict = init._process(f)

    # Get contigs from first consensus sequence
    contigs = cs.run_consensus(reads_dict)
    contig_file = open(sys.argv[2] + '/contig.txt', 'w+')
    ll_file = open(sys.argv[2] + '/likelihood.txt', 'w+')

    # Set initial parameters
    likelihood = 0
    likelihood_new = 0
    #likelihood_list = []

    for i in range(NUM_ITERS):
        '''FILE WRITES'''
        # Contigs file write data
        contig_file.write('%s\tstart\t' % (str(i)))
        for c in contigs:
            contig_file.write('%s\t' % (str(c)))
        contig_file.write('\n')
        contig_file.flush()
        # Likelihood file write data
        ll_file.write(
            '%s\t%s\t%s\n' %
            (str(i), str(likelihood), str(len(contigs)))), ll_file.flush()
        #likelihood_list.append(float(likelihood))
        # Reads file write data
        reads_file = open(sys.argv[2] + '/reads_trial_' + str(i) + '.txt', 'w')
        for r in reads_dict:
            for l in reads_dict[r]:
                reads_file.write(
                    str(l[3]) + ',' + str(l[0]) + ',' + str(l[1]) + str(',') +
                    str(l[3]) + '\n')
        reads_file.close()
        '''COMPUTATION OF ALGORITHM'''
        # Update likelihood
        likelihood = likelihood_new
        # Map reads
        reads_dict = rm.run(reads_dict, contigs)
        # Run Consensus Sequence
        contigs = cs.run_consensus(reads_dict)
        # Print data to file
        contig_file.write('%s\tmerge\t' % (str(i)))
        for c in contigs:
            contig_file.write('%s\t' % (str(c)))
        contig_file.write('\n')
        # Run merge
        contigs, reads_dict = mc.run_merge(
            contigs, reads_dict
        )  # how do we know if a merge has happened..do we need to know?
        # Get new likelihood
        likelihood_new = ll._likelihood(reads_dict, contigs)
    '''FILE WRITES'''
    # Reads file write data
    reads_file = open(sys.argv[2] + '/reads_trial_' + str(i + 1) + '.txt', 'w')
    for r in reads_dict:
        for l in reads_dict[r]:
            reads_file.write(
                str(l[3]) + ',' + str(l[0]) + ',' + str(l[1]) + str(',') +
                str(l[3]) + '\n')
    reads_file.close()
    # Print data to file
    for c in contigs:
        contig_file.write('1000\tend\t%s\n' % (str(c)))
    ll_file.write(
        '%s\t%s\t%s\n' %
        (str(NUM_ITERS), str(likelihood), str(len(contigs)))), ll_file.flush()
def _main():

    # Open files and process reads dictionary
    f = open(sys.argv[1], 'r') 
    reads_dict = init._process(f)

    # Get contigs from first consensus sequence
    contigs = cs.run_consensus(reads_dict)
    contig_file = open(sys.argv[2] + '/contig.txt', 'w+')
    ll_file = open(sys.argv[2] + '/likelihood.txt', 'w+')

    # Set initial parameters 
    likelihood = 0
    likelihood_new = 0
    #likelihood_list = []

    for i in range(NUM_ITERS):

        '''FILE WRITES'''
        # Contigs file write data
        contig_file.write('%s\tstart\t' %(str(i)))
        for c in contigs:
            contig_file.write('%s\t' %(str(c)))
        contig_file.write('\n')
        contig_file.flush()
        # Likelihood file write data
        ll_file.write('%s\t%s\t%s\n' %(str(i), str(likelihood), str(len(contigs)))), ll_file.flush()
        #likelihood_list.append(float(likelihood))
        # Reads file write data
        reads_file = open(sys.argv[2] + '/reads_trial_' + str(i) + '.txt','w')
        for r in reads_dict:
            for l in reads_dict[r]:
                reads_file.write(str(l[3])+','+str(l[0])+','+str(l[1])+str(',')+str(l[3])+'\n')
        reads_file.close()
        '''COMPUTATION OF ALGORITHM'''
        # Update likelihood
        likelihood = likelihood_new
        # Map reads
        reads_dict = rm.run(reads_dict, contigs)
        # Run Consensus Sequence
        contigs = cs.run_consensus(reads_dict)
        # Print data to file
        contig_file.write('%s\tmerge\t' %(str(i)))
        for c in contigs:
            contig_file.write('%s\t' %(str(c)))
        contig_file.write('\n')
        # Run merge
        contigs, reads_dict = mc.run_merge(contigs,reads_dict) # how do we know if a merge has happened..do we need to know?
        # Get new likelihood
        likelihood_new = ll._likelihood(reads_dict,contigs)


    '''FILE WRITES'''
    # Reads file write data
    reads_file = open(sys.argv[2] + '/reads_trial_' + str(i+1) + '.txt','w')
    for r in reads_dict:
        for l in reads_dict[r]:
            reads_file.write(str(l[3])+','+str(l[0])+','+str(l[1])+str(',')+str(l[3])+'\n')
    reads_file.close()
    # Print data to file
    for c in contigs:
        contig_file.write('1000\tend\t%s\n' %(str(c)))
    ll_file.write('%s\t%s\t%s\n' %(str(NUM_ITERS), str(likelihood), str(len(contigs)))), ll_file.flush()