def query_subject_example(scope="module"): """This fixture returns an example of a subject and query""" subject = ContigRegion(911, 757, Context(9795, True), name="subjectexample", forward=False, filename="templates/pRIAS (CC15).gb") query = ContigRegion(1, 155, Context(22240, True), name="queryexample", forward=True, filename="templates/pRasdfasdfaIAS (CC15).gb") return [query, subject]
def test_subquery(): c1 = Context(10000, True) c2 = Context(5000, True) q1 = ContigRegion(100, 300, context=c1, forward=True) s1 = ContigRegion(500, 700, context=c2, forward=True) contig = Contig(q1, s1, "BLAST") sub_query1 = contig.sub_query(200, 250) sub_query2 = contig.sub_query(220, 300) assert len(sub_query1) == 51 assert len(sub_query2) == 81
def test_gap_cost_both_extendable(): subject = ContigRegion(1, 100, Context(9795, True)) query_context = Context(20000, True) query = ContigRegion(1, 100, query_context) left = BlastContig(query, subject, "test") from matplotlib import pyplot as plt for left_extendable, right_extendable, color in [(True, True, "blue"), (False, True, "green"), (False, False, "red")]: X = [] Y = [] left.lp_extendable = left_extendable left.rp_extendable = left_extendable for right_start in range(1, 1000): right = BlastContig( ContigRegion(right_start, right_start + 99, query_context), subject, "test") right.lp_extendable = right_extendable right.rp_extendable = right_extendable gap_cost = Assembly._gap_cost(left, right) left_end = left.query.end right_start = right.query.start distance = right_start - left_end print( f"{left_end} {right_start} Distance: {distance}, Gap_cost: {gap_cost}" ) X.append(distance) Y.append(gap_cost) plt.scatter(X, Y, s=1.0, c=[color] * len(Y), label=f"{left_extendable + right_extendable}") if False: plt.ylim(0, 500) plt.xlabel("Distance (bp)") plt.ylabel("Gap Cost ($)") plt.legend(loc=2) plt.show()
def test_contig_schema(): c1 = Context(10000, True) c2 = Context(5100, True) q1 = ContigRegion(100, 300, context=c1, forward=True) s1 = ContigRegion(500, 700, context=c2, forward=True) contig1 = Contig(q1, s1, "BLAST", data1=5) schema = schemas.ContigSchema() data = schema.dump(contig1) assert data['query']['start'] == 100 assert data['query']['end'] == 300 assert data['query']['context']['length'] == 10000 assert data['subject']['start'] == 500 assert data['subject']['end'] == 700 assert data['subject']['context']['length'] == 5100 assert data['quality'] assert data['metadata'] == {'data1': 5} assert data['alignment_length'] == 201
def test_contig_region_with_circular_context(circular_context): """This test instantiation of a contig region using a circular context.""" c = ContigRegion(1, 100, circular_context, name="example", forward=False, sequence="AGTCAGAGTCAG", filename="somesequence2.gb") assert c.start == 1 assert c.end == 100 assert c.circular assert c.name == "example" assert c.direction == Region.REVERSE assert c.sequence == "AGTCAGAGTCAG" assert c.filename == "somesequence2.gb"
def test_contig_region_with_linear_context(linear_context): """This tests instantiation of a contig region using a linear context.""" c = ContigRegion(1, 100, linear_context, name="example", forward=True, sequence="AGTCAG", filename="somesequence.gb") assert c.start == 1 assert c.end == 100 assert not c.circular assert c.name == "example" assert c.direction == Region.FORWARD assert c.sequence == "AGTCAG" assert c.filename == "somesequence.gb"
def test_divide_contigs(): c1 = Context(10000, True) c2 = Context(8000, True) q1 = ContigRegion(1000, 3000, context=c1, forward=True) s1 = ContigRegion(5000, 7000, context=c2, forward=True) contig = Contig(q1, s1, "BLAST") # not including self divided_contigs = contig.divide_contig( [1100, 1200], [2000, 2200, 2300], include_self=False) assert len(divided_contigs) == 12 # including self divided_contigs = contig.divide_contig( [1100, 1200], [2000, 2200, 2300], include_self=True) assert len(divided_contigs) == 13
def _overlap_range(left, right): """Gap span range between a left and right contig""" # primers are 60bp, 20bp are annealed, leaving 40bp max_primer_extension = 40.0 gap_span = ContigRegion.get_gap_span(left.query, right.query) if gap_span is not None: extension = max_primer_extension * left.end_extendable extension += max_primer_extension * right.start_extendable min_overlap = -1.0 * gap_span max_overlap = -1.0 * gap_span + extension return (min_overlap, max_overlap, gap_span) return None
def test_fuse(): # Test fuse c1 = Context(10000, True) c2 = Context(5100, True) q1 = ContigRegion(100, 300, context=c1, forward=True) q2 = ContigRegion(301, 600, context=c1, forward=True) q3 = ContigRegion(301, 600, context=c2, forward=True) s1 = ContigRegion(500, 700, context=c2, forward=True) s2 = ContigRegion(701, 1000, context=c2, forward=True) s3 = ContigRegion(701, 1000, context=c2, forward=True) contig1 = Contig(q1, s1, "BLAST") contig2 = Contig(q2, s2, "BLAST") contig1.fuse(contig2) assert contig1.query.end == 600 # Raises error if one has sequence and other does not contig1 = Contig(q1, s1, "BLAST") contig2 = Contig(q2, s2, "BLAST") a = "A"*contig1.query.length b = "B"*contig1.subject.length c = "C"*contig2.query.length d = "D"*contig2.subject.length contig1.query.sequence = a contig1.subject.sequence = b contig2.query.sequence = c contig2.subject.sequence = d contig1.fuse(contig2) assert contig1.query.sequence == a + c assert contig1.subject.sequence == b + d # Raises error if one has sequence and other does not contig1 = Contig(q1, s1, "BLAST") contig2 = Contig(q2, s2, "BLAST") contig1.query.sequence = "AGTCTGAGCTGTCGTGATAGTGCTGA" contig1.subject.sequence = "AGTYAGYCHYAYHCYHYSCHYHAYCY" with pytest.raises(ContigError): contig1.fuse(contig2) # Raise error if different contexts contig1 = Contig(q1, s1, "BLAST") contig2 = Contig(q2, s2, "BLAST") contig3 = Contig(q3, s3, "BLAST") with pytest.raises(ContigError): contig3.fuse(contig2)
def perfect_matches(self, rc=True): """ Pseudo-blast for finding perfect sequence matches (i.e. primers) :param rc: :return: """ # Get primer sequences (defined in db) out, seqs, metadata = self.concate_db_to_fsa() contig_container = ContigContainer() # Get the query sequence query_seq = open_sequence(self.query)[0].seq query_seq_str = str(query_seq) query_seq_str = re.sub('[nN]', '.', query_seq_str).lower() fwd_matches = [] rev_matches = [] for seq in seqs: seq_str = str(seq.seq) try: rc_seq_str = dna_reverse_complement(seq_str) except KeyError: continue seq_str = re.sub('[nN]', '.', seq_str).lower() rc_seq_str = re.sub('[nN]', '.', rc_seq_str).lower() for match in re.finditer(seq_str, query_seq_str): subject = ContigRegion( seq.id, ContigRegion.START_INDEX, len(seq), len(seq), False, True, sequence=seq_str, ) query = ContigRegion( self.query, match.start() + ContigRegion.START_INDEX, match.end() + ContigRegion.START_INDEX - 1, self.query_length, self.query_circular, True, ) c = Contig(query, subject, Contig.TYPE_PRIMER) contig_container.contigs.append(c) for match in re.finditer(rc_seq_str, query_seq_str): subject = ContigRegion( seq.id, len(seq), ContigRegion.START_INDEX, len(seq), False, False, sequence=seq_str, ) query = ContigRegion( self.query, match.start() + ContigRegion.START_INDEX, match.end() + ContigRegion.START_INDEX - 1, self.query_length, self.query_circular, True, ) c = Contig(query, subject, Contig.TYPE_PRIMER) contig_container.contigs.append(c) return contig_container