def test_blast_get_words(sub_matrix, query_seq, blast_words): global blast passed = True try: blast = Blast(sub_matrix) except Exception: blast = None passed = False assert passed, 'Error while creating Blast.' assert blast is not None, 'Blast initialization failed.' try: words = blast.get_words(sequence=query_seq, T=13) except Exception: passed = False assert passed, 'Error in Blast.get_words(sequence).' try: passed_1 = (len(blast_words) == len(words)) passed_2 = (set(blast_words) == set(words)) except Exception: passed = False assert passed, 'Error while comparing Blast.get_words(sequence) output.' passed = (passed_1 and passed_2) assert passed, 'Incorrect words returned for sequence.'
def add_hybridization_sites(self): """ Tests succinctly the main function of BLAST class """ target = get_data_from_csv(data_16)[0] target.compute_optimal_primers_pairs() primers_pair = target.all_primers_pairs blast = Blast(target.all_primers_pairs) blast.add_hybridization_sites() print(len(primers_pair))
def test_get_unique_primers(self): """ Tests if the function get correctly each unique primers and\ each primer has not been forgotten. """ target = get_data_from_csv(data_1)[0] target.compute_optimal_primers_pairs() blast = Blast(target.all_primers_pairs) u_l_primers, u_r_primers = blast.get_unique_primers() self.assertTrue(len(u_l_primers) == len(set(u_l_primers))) self.assertTrue(len(u_r_primers) == len(set(u_r_primers))) for primers_pair in target.all_primers_pairs: self.assertTrue(primers_pair.right_primer in u_r_primers) self.assertTrue(primers_pair.left_primer in u_l_primers)
def test_run_blast(self): """ Tests if blast hits are correct respect to our constraint. """ target = get_data_from_csv(data_1)[0] target.compute_optimal_primers_pairs() blast = Blast(target.all_primers_pairs) u_l_primers, u_r_primers = blast.get_unique_primers() blast.run_blast(u_l_primers[0]) with open(blast_output_file_path, "r") as o: for line in o: blast_result = line.split() self.assertTrue(float(blast_result[2]) == 100.) self.assertTrue(float(blast_result[-2]) < 1) self.assertTrue( int(blast_result[3]) / float(blast_result[7]) >= 0.8) blast.run_blast(u_r_primers[0]) with open(blast_output_file_path, "r") as o: for line in o: blast_result = line.split() self.assertTrue(float(blast_result[2]) == 100.) self.assertTrue(float(blast_result[-2]) < 1) self.assertTrue( int(blast_result[3]) / float(blast_result[7]) >= 0.8)
def main(): # Openfiles and create their objects blast_filename = "blastp.outfmt6" diff_exp_filename = "diffExpr.P1e-3_C2.matrix" blast = Blast(blast_filename) matrix = Matrix(diff_exp_filename) # Load transcript_id and sp_id within the good BlastHit into dict blast_dict = { blast.transcript_id: blast.sp_id for blast in blast.blast_hit_list if blast.hit_good_match() } # Look-up and output file with open("output.txt", "w") as output: for info in matrix.expressions: matrix_info = blast_dict.get(info.transcript, info.transcript) \ + "\t" + tuple_to_string(info) output.write(matrix_info + "\n")
def test_search_hybridization_sites_lr_rl(self): """ Tests if the function detects well lr or rl hybridisation """ target = get_data_from_csv(data_16)[0] target.compute_optimal_primers_pairs() blast = Blast(target.all_primers_pairs) u_l_primers, u_r_primers = blast.get_unique_primers() blast.parse_blast_output(u_l_primers[0]) blast.parse_blast_output(u_r_primers[0]) self.assertFalse( blast.search_hybridization_sites_lr_rl( u_l_primers[0].primer_hybridization_sites, u_r_primers[0].primer_hybridization_sites)) u_r_primers[0].primer_hybridization_sites["chr20"] = [[ 50283620, 50283640, 1 ]] self.assertTrue( blast.search_hybridization_sites_lr_rl( u_l_primers[0].primer_hybridization_sites, u_r_primers[0].primer_hybridization_sites))
self.group[keep].append(edge) self.seqlist[edge['sname']] = keep self.seqlist[edge['qname']] = keep self.group[discard] = None return len(self.group[sgroup]) # ================================================================================================== # main/test # ================================================================================================== if __name__ == '__main__': infile = sys.argv[1] sys.stderr.write('Blast search: {}\n'.format(infile)) blast = Blast(file=sys.argv[1]) # fmt = 'qname qlen qbegin qend sname slen sbegin send alignlen score evalue stitle' fmt = 'qname sname id alignlen mismatch gapopen qbeg qend sbeg send evalue bit_score' nfields = blast.setFormat(fmt) record = [] record_n = 0 sidx = {} qidx = {} threshold = 1e-5 # read the search and store all matches over a threshold while blast.next(): # print(blast.line) # print('query:{}\tsubject:{}'.format(blast.qname, blast.sname))
from turtle import * import random import math from blast import Blast from shoot import Shoot from tal_maya_ball_blast import Ball import time tracer(0, 0) frame = 1 shots = [] running = True blast_1 = Blast(-200, -300, 70) def movearound(): blast_1.goto(getcanvas().winfo_pointerx() - screen_width * 2, -300) x = blast_1.pos()[0] y = blast_1.pos()[1] + blast_1.r - 0.5 if frame % 10 == 0: shoot = Shoot(x, y, 10) shots.append(shoot) screen_width = getcanvas().winfo_width() / 2 screen_height = getcanvas().winfo_height() / 2 first_r = 100 first_y = screen_height - first_r - 10 first_x = random.randint(-screen_width + first_r + 3, screen_width - first_r - 3)
def test_search_hybridization_site_rr_ll(self): """ Tests if the function detects well rr or ll hybridisation Tests the function is_primers_compatible as well. """ target = get_data_from_csv(data_16)[0] target.compute_optimal_primers_pairs() blast = Blast(target.all_primers_pairs) u_l_primers, u_r_primers = blast.get_unique_primers() blast.parse_blast_output(u_l_primers[0]) self.assertFalse(blast.search_hybridization_site_rr_ll(u_l_primers[0])) u_l_primers[0].primer_hybridization_sites["chr20"].append( [50283620, 50283640, 1]) self.assertTrue(blast.search_hybridization_site_rr_ll(u_l_primers[0])) blast.parse_blast_output(u_r_primers[0]) self.assertFalse(blast.search_hybridization_site_rr_ll(u_r_primers[0])) u_r_primers[0].primer_hybridization_sites["chr11"].append( [60672820, 60672805, -1]) self.assertTrue(blast.search_hybridization_site_rr_ll(u_r_primers[0]))
"""================================================================================================= Single linkage clustering for all-against-all blast search Michael Gribskov 12 April 2021 =================================================================================================""" import sys from blast import Blast from cluster.single_linkage import SingleLinkage # -------------------------------------------------------------------------------------------------- # main program # -------------------------------------------------------------------------------------------------- if __name__ == '__main__': blast = Blast(sys.argv[1]) fmt = 'qname sname id alignlen mismatch gapopen qbeg qend sbeg send evalue bit_score' nfields = blast.setFormat(fmt) out = open('clusters80.out', 'w') cluster = SingleLinkage() cluster.set_keys(['sname', 'qname', 'evalue', 'id']) cluster.labels = ['sname', 'qname'] nseq = 0 while blast.readTabular(): nseq += 1 b = { 'sname': blast.sname, 'qname': blast.qname, 'evalue': float(blast.evalue),