def test_single_contig_1(self): contigs_1 = ['ATCGCTGATT'] reads_1 = {\ 'ATCG':[[0,0,0.5]],'TCGC':[[0,1,0.5]],'CCCT':[[0,2,0.5]],'GCTG':[[0,3,0.5]],\ 'TTCA':[[0,4,0.5]],'TGAT':[[0,5,0.5]],'GATT':[[0,6,0.5]]} freq = cs.consensus_sequence(reads_1) self.assertEqual(contigs_1, cs.compute_new_contigs(freq))
def test_single_contig_2(self): contigs_2 = ['ATCGCTGATT', 'ATCCCTCATT', 'ATCGCTCATT', 'ATCCCTGATT'] reads_2 = {\ 'ATCC':[[0,0,0.5]],'TCGC':[[0,1,0.5]],'CCCT':[[0,2,0.5]],'GCTC':[[0,3,0.5]],\ 'TTCA':[[0,4,0.5]],'TGAT':[[0,5,0.5]],'GATT':[[0,6,0.5]]} freq = cs.consensus_sequence(reads_2) self.assertTrue(cs.compute_new_contigs(freq)[0] in contigs_2)
def test_multiple_contigs_1(self): contigs_3 = ['ATCGCTGATT', 'TTTACGATGC'] reads_3 = {\ 'ATCG':[[0,0,0.5]],'TCGC':[[0,1,0.5]],'CCCT':[[0,2,0.5]],'GCTG':[[0,3,0.5]],\ 'TTCA':[[0,4,0.5]],'TGAT':[[0,5,0.5]],'GATT':[[0,6,0.5]],\ 'TTAA':[[1,0,0.5]],'TTAC':[[1,1,0.5]],'TGCG':[[1,2,0.5]],'ACGA':[[1,3,0.5]],\ 'AGAT':[[1,4,0.5]],'GTTG':[[1,5,0.5]],'AAGC':[[1,6,0.5]]} freq = cs.consensus_sequence(reads_3) self.assertEqual(contigs_3, cs.compute_new_contigs(freq))
def test_multiple_contigs_2(self): contigs_4 = ['ATCGCTGATT','ATCCCTCATT','ATCGCTCATT','ATCCCTGATT',\ 'TTTACGATGC','TTTAAGATGC'] reads_4 = {\ 'ATCG':[[0,0,0.5]],'TCGC':[[0,1,0.5]],'CCCT':[[0,2,0.5]],'GCTG':[[0,3,0.5]],\ 'TTCA':[[0,4,0.5]],'TGAT':[[0,5,0.5]],'GATT':[[0,6,0.5]],\ 'TTAA':[[1,0,0.5],[1,1,0.5]],'TGCG':[[1,2,0.5]],'CCGA':[[1,3,0.5]],\ 'AGAT':[[1,4,0.5]],'GTTG':[[1,5,0.5]],'AAGC':[[1,6,0.5]]} freq = cs.consensus_sequence(reads_4) self.assertTrue(cs.compute_new_contigs(freq)[0] in contigs_4\ and cs.compute_new_contigs(freq)[1] in contigs_4)
def test_single_contig_2(self): contigs_2 = ["ATCGCTGATT", "ATCCCTCATT", "ATCGCTCATT", "ATCCCTGATT"] reads_2 = { "ATCC": [[0, 0, 0.5]], "TCGC": [[0, 1, 0.5]], "CCCT": [[0, 2, 0.5]], "GCTC": [[0, 3, 0.5]], "TTCA": [[0, 4, 0.5]], "TGAT": [[0, 5, 0.5]], "GATT": [[0, 6, 0.5]], } freq = cs.consensus_sequence(reads_2) self.assertTrue(cs.compute_new_contigs(freq)[0] in contigs_2)
def test_single_contig_1(self): contigs_1 = ["ATCGCTGATT"] reads_1 = { "ATCG": [[0, 0, 0.5]], "TCGC": [[0, 1, 0.5]], "CCCT": [[0, 2, 0.5]], "GCTG": [[0, 3, 0.5]], "TTCA": [[0, 4, 0.5]], "TGAT": [[0, 5, 0.5]], "GATT": [[0, 6, 0.5]], } freq = cs.consensus_sequence(reads_1) self.assertEqual(contigs_1, cs.compute_new_contigs(freq))
def test_multiple_contigs_2(self): contigs_4 = ["ATCGCTGATT", "ATCCCTCATT", "ATCGCTCATT", "ATCCCTGATT", "TTTACGATGC", "TTTAAGATGC"] reads_4 = { "ATCG": [[0, 0, 0.5]], "TCGC": [[0, 1, 0.5]], "CCCT": [[0, 2, 0.5]], "GCTG": [[0, 3, 0.5]], "TTCA": [[0, 4, 0.5]], "TGAT": [[0, 5, 0.5]], "GATT": [[0, 6, 0.5]], "TTAA": [[1, 0, 0.5], [1, 1, 0.5]], "TGCG": [[1, 2, 0.5]], "CCGA": [[1, 3, 0.5]], "AGAT": [[1, 4, 0.5]], "GTTG": [[1, 5, 0.5]], "AAGC": [[1, 6, 0.5]], } freq = cs.consensus_sequence(reads_4) self.assertTrue(cs.compute_new_contigs(freq)[0] in contigs_4 and cs.compute_new_contigs(freq)[1] in contigs_4)
def test_multiple_contigs_1(self): contigs_3 = ["ATCGCTGATT", "TTTACGATGC"] reads_3 = { "ATCG": [[0, 0, 0.5]], "TCGC": [[0, 1, 0.5]], "CCCT": [[0, 2, 0.5]], "GCTG": [[0, 3, 0.5]], "TTCA": [[0, 4, 0.5]], "TGAT": [[0, 5, 0.5]], "GATT": [[0, 6, 0.5]], "TTAA": [[1, 0, 0.5]], "TTAC": [[1, 1, 0.5]], "TGCG": [[1, 2, 0.5]], "ACGA": [[1, 3, 0.5]], "AGAT": [[1, 4, 0.5]], "GTTG": [[1, 5, 0.5]], "AAGC": [[1, 6, 0.5]], } freq = cs.consensus_sequence(reads_3) self.assertEqual(contigs_3, cs.compute_new_contigs(freq))
def _main(): # Open files and process reads dictionary f = open(sys.argv[1], 'r') reads_dict = init._process(f) # Get contigs from first consensus sequence contigs = cs.run_consensus(reads_dict) contig_file = open(sys.argv[2] + '/contig.txt', 'w+') ll_file = open(sys.argv[2] + '/likelihood.txt', 'w+') # Set initial parameters likelihood = 0 likelihood_new = 0 #likelihood_list = [] for i in range(NUM_ITERS): '''FILE WRITES''' # Contigs file write data contig_file.write('%s\tstart\t' % (str(i))) for c in contigs: contig_file.write('%s\t' % (str(c))) contig_file.write('\n') contig_file.flush() # Likelihood file write data ll_file.write( '%s\t%s\t%s\n' % (str(i), str(likelihood), str(len(contigs)))), ll_file.flush() #likelihood_list.append(float(likelihood)) # Reads file write data reads_file = open(sys.argv[2] + '/reads_trial_' + str(i) + '.txt', 'w') for r in reads_dict: for l in reads_dict[r]: reads_file.write( str(l[3]) + ',' + str(l[0]) + ',' + str(l[1]) + str(',') + str(l[3]) + '\n') reads_file.close() '''COMPUTATION OF ALGORITHM''' # Update likelihood likelihood = likelihood_new # Map reads reads_dict = rm.run(reads_dict, contigs) # Run Consensus Sequence contigs = cs.run_consensus(reads_dict) # Print data to file contig_file.write('%s\tmerge\t' % (str(i))) for c in contigs: contig_file.write('%s\t' % (str(c))) contig_file.write('\n') # Run merge contigs, reads_dict = mc.run_merge( contigs, reads_dict ) # how do we know if a merge has happened..do we need to know? # Get new likelihood likelihood_new = ll._likelihood(reads_dict, contigs) '''FILE WRITES''' # Reads file write data reads_file = open(sys.argv[2] + '/reads_trial_' + str(i + 1) + '.txt', 'w') for r in reads_dict: for l in reads_dict[r]: reads_file.write( str(l[3]) + ',' + str(l[0]) + ',' + str(l[1]) + str(',') + str(l[3]) + '\n') reads_file.close() # Print data to file for c in contigs: contig_file.write('1000\tend\t%s\n' % (str(c))) ll_file.write( '%s\t%s\t%s\n' % (str(NUM_ITERS), str(likelihood), str(len(contigs)))), ll_file.flush()
def _main(): # Open files and process reads dictionary f = open(sys.argv[1], 'r') reads_dict = init._process(f) # Get contigs from first consensus sequence contigs = cs.run_consensus(reads_dict) contig_file = open(sys.argv[2] + '/contig.txt', 'w+') ll_file = open(sys.argv[2] + '/likelihood.txt', 'w+') # Set initial parameters likelihood = 0 likelihood_new = 0 #likelihood_list = [] for i in range(NUM_ITERS): '''FILE WRITES''' # Contigs file write data contig_file.write('%s\tstart\t' %(str(i))) for c in contigs: contig_file.write('%s\t' %(str(c))) contig_file.write('\n') contig_file.flush() # Likelihood file write data ll_file.write('%s\t%s\t%s\n' %(str(i), str(likelihood), str(len(contigs)))), ll_file.flush() #likelihood_list.append(float(likelihood)) # Reads file write data reads_file = open(sys.argv[2] + '/reads_trial_' + str(i) + '.txt','w') for r in reads_dict: for l in reads_dict[r]: reads_file.write(str(l[3])+','+str(l[0])+','+str(l[1])+str(',')+str(l[3])+'\n') reads_file.close() '''COMPUTATION OF ALGORITHM''' # Update likelihood likelihood = likelihood_new # Map reads reads_dict = rm.run(reads_dict, contigs) # Run Consensus Sequence contigs = cs.run_consensus(reads_dict) # Print data to file contig_file.write('%s\tmerge\t' %(str(i))) for c in contigs: contig_file.write('%s\t' %(str(c))) contig_file.write('\n') # Run merge contigs, reads_dict = mc.run_merge(contigs,reads_dict) # how do we know if a merge has happened..do we need to know? # Get new likelihood likelihood_new = ll._likelihood(reads_dict,contigs) '''FILE WRITES''' # Reads file write data reads_file = open(sys.argv[2] + '/reads_trial_' + str(i+1) + '.txt','w') for r in reads_dict: for l in reads_dict[r]: reads_file.write(str(l[3])+','+str(l[0])+','+str(l[1])+str(',')+str(l[3])+'\n') reads_file.close() # Print data to file for c in contigs: contig_file.write('1000\tend\t%s\n' %(str(c))) ll_file.write('%s\t%s\t%s\n' %(str(NUM_ITERS), str(likelihood), str(len(contigs)))), ll_file.flush()