def query_subject_example(scope="module"):
    """This fixture returns an example of a subject and query"""
    subject = ContigRegion(911, 757, Context(9795, True), name="subjectexample", forward=False,
                           filename="templates/pRIAS (CC15).gb")
    query = ContigRegion(1, 155, Context(22240, True), name="queryexample", forward=True,
                         filename="templates/pRasdfasdfaIAS (CC15).gb")
    return [query, subject]
def test_subquery():
    c1 = Context(10000, True)
    c2 = Context(5000, True)
    q1 = ContigRegion(100, 300, context=c1, forward=True)
    s1 = ContigRegion(500, 700, context=c2, forward=True)

    contig = Contig(q1, s1, "BLAST")
    sub_query1 = contig.sub_query(200, 250)
    sub_query2 = contig.sub_query(220, 300)

    assert len(sub_query1) == 51
    assert len(sub_query2) == 81
Exemple #3
0
def test_gap_cost_both_extendable():
    subject = ContigRegion(1, 100, Context(9795, True))
    query_context = Context(20000, True)
    query = ContigRegion(1, 100, query_context)

    left = BlastContig(query, subject, "test")

    from matplotlib import pyplot as plt
    for left_extendable, right_extendable, color in [(True, True, "blue"),
                                                     (False, True, "green"),
                                                     (False, False, "red")]:

        X = []
        Y = []
        left.lp_extendable = left_extendable
        left.rp_extendable = left_extendable

        for right_start in range(1, 1000):
            right = BlastContig(
                ContigRegion(right_start, right_start + 99, query_context),
                subject, "test")
            right.lp_extendable = right_extendable
            right.rp_extendable = right_extendable

            gap_cost = Assembly._gap_cost(left, right)
            left_end = left.query.end
            right_start = right.query.start
            distance = right_start - left_end
            print(
                f"{left_end} {right_start} Distance: {distance}, Gap_cost: {gap_cost}"
            )

            X.append(distance)
            Y.append(gap_cost)

        plt.scatter(X,
                    Y,
                    s=1.0,
                    c=[color] * len(Y),
                    label=f"{left_extendable + right_extendable}")

    if False:
        plt.ylim(0, 500)
        plt.xlabel("Distance (bp)")
        plt.ylabel("Gap Cost ($)")
        plt.legend(loc=2)
        plt.show()
def test_contig_schema():
    c1 = Context(10000, True)
    c2 = Context(5100, True)
    q1 = ContigRegion(100, 300, context=c1, forward=True)
    s1 = ContigRegion(500, 700, context=c2, forward=True)
    contig1 = Contig(q1, s1, "BLAST", data1=5)
    schema = schemas.ContigSchema()
    data = schema.dump(contig1)

    assert data['query']['start'] == 100
    assert data['query']['end'] == 300
    assert data['query']['context']['length'] == 10000

    assert data['subject']['start'] == 500
    assert data['subject']['end'] == 700
    assert data['subject']['context']['length'] == 5100

    assert data['quality']
    assert data['metadata'] == {'data1': 5}
    assert data['alignment_length'] == 201
def test_contig_region_with_circular_context(circular_context):
    """This test instantiation of a contig region using a circular context."""
    c = ContigRegion(1, 100, circular_context, name="example", forward=False, sequence="AGTCAGAGTCAG",
                     filename="somesequence2.gb")
    assert c.start == 1
    assert c.end == 100
    assert c.circular
    assert c.name == "example"
    assert c.direction == Region.REVERSE
    assert c.sequence == "AGTCAGAGTCAG"
    assert c.filename == "somesequence2.gb"
def test_contig_region_with_linear_context(linear_context):
    """This tests instantiation of a contig region using a linear context."""
    c = ContigRegion(1, 100, linear_context, name="example", forward=True, sequence="AGTCAG",
                     filename="somesequence.gb")
    assert c.start == 1
    assert c.end == 100
    assert not c.circular
    assert c.name == "example"
    assert c.direction == Region.FORWARD
    assert c.sequence == "AGTCAG"
    assert c.filename == "somesequence.gb"
def test_divide_contigs():
    c1 = Context(10000, True)
    c2 = Context(8000, True)
    q1 = ContigRegion(1000, 3000, context=c1, forward=True)
    s1 = ContigRegion(5000, 7000, context=c2, forward=True)

    contig = Contig(q1, s1, "BLAST")

    # not including self
    divided_contigs = contig.divide_contig(
        [1100, 1200],
        [2000, 2200, 2300],
        include_self=False)
    assert len(divided_contigs) == 12

    # including self
    divided_contigs = contig.divide_contig(
        [1100, 1200],
        [2000, 2200, 2300],
        include_self=True)
    assert len(divided_contigs) == 13
    def _overlap_range(left, right):
        """Gap span range between a left and right contig"""
        # primers are 60bp, 20bp are annealed, leaving 40bp
        max_primer_extension = 40.0
        gap_span = ContigRegion.get_gap_span(left.query, right.query)
        if gap_span is not None:
            extension = max_primer_extension * left.end_extendable
            extension += max_primer_extension * right.start_extendable

            min_overlap = -1.0 * gap_span
            max_overlap = -1.0 * gap_span + extension

            return (min_overlap, max_overlap, gap_span)
        return None
def test_fuse():
    # Test fuse
    c1 = Context(10000, True)
    c2 = Context(5100, True)
    q1 = ContigRegion(100, 300, context=c1, forward=True)
    q2 = ContigRegion(301, 600, context=c1, forward=True)
    q3 = ContigRegion(301, 600, context=c2, forward=True)
    s1 = ContigRegion(500, 700, context=c2, forward=True)
    s2 = ContigRegion(701, 1000, context=c2, forward=True)
    s3 = ContigRegion(701, 1000, context=c2, forward=True)
    contig1 = Contig(q1, s1, "BLAST")
    contig2 = Contig(q2, s2, "BLAST")

    contig1.fuse(contig2)
    assert contig1.query.end == 600

    # Raises error if one has sequence and other does not
    contig1 = Contig(q1, s1, "BLAST")
    contig2 = Contig(q2, s2, "BLAST")
    a = "A"*contig1.query.length
    b = "B"*contig1.subject.length
    c = "C"*contig2.query.length
    d = "D"*contig2.subject.length
    contig1.query.sequence = a
    contig1.subject.sequence = b
    contig2.query.sequence = c
    contig2.subject.sequence = d

    contig1.fuse(contig2)
    assert contig1.query.sequence == a + c
    assert contig1.subject.sequence == b + d

    # Raises error if one has sequence and other does not
    contig1 = Contig(q1, s1, "BLAST")
    contig2 = Contig(q2, s2, "BLAST")
    contig1.query.sequence = "AGTCTGAGCTGTCGTGATAGTGCTGA"
    contig1.subject.sequence = "AGTYAGYCHYAYHCYHYSCHYHAYCY"

    with pytest.raises(ContigError):
        contig1.fuse(contig2)

    # Raise error if different contexts
    contig1 = Contig(q1, s1, "BLAST")
    contig2 = Contig(q2, s2, "BLAST")
    contig3 = Contig(q3, s3, "BLAST")
    with pytest.raises(ContigError):
        contig3.fuse(contig2)
    def perfect_matches(self, rc=True):
        """
        Pseudo-blast for finding perfect sequence matches (i.e. primers)
        :param rc:
        :return:
        """
        # Get primer sequences (defined in db)
        out, seqs, metadata = self.concate_db_to_fsa()

        contig_container = ContigContainer()

        # Get the query sequence
        query_seq = open_sequence(self.query)[0].seq
        query_seq_str = str(query_seq)
        query_seq_str = re.sub('[nN]', '.', query_seq_str).lower()

        fwd_matches = []
        rev_matches = []
        for seq in seqs:
            seq_str = str(seq.seq)
            try:
                rc_seq_str = dna_reverse_complement(seq_str)
            except KeyError:
                continue
            seq_str = re.sub('[nN]', '.', seq_str).lower()
            rc_seq_str = re.sub('[nN]', '.', rc_seq_str).lower()

            for match in re.finditer(seq_str, query_seq_str):
                subject = ContigRegion(
                    seq.id,
                    ContigRegion.START_INDEX,
                    len(seq),
                    len(seq),
                    False,
                    True,
                    sequence=seq_str,
                )

                query = ContigRegion(
                    self.query,
                    match.start() + ContigRegion.START_INDEX,
                    match.end() + ContigRegion.START_INDEX - 1,
                    self.query_length,
                    self.query_circular,
                    True,
                )

                c = Contig(query, subject, Contig.TYPE_PRIMER)
                contig_container.contigs.append(c)
            for match in re.finditer(rc_seq_str, query_seq_str):
                subject = ContigRegion(
                    seq.id,
                    len(seq),
                    ContigRegion.START_INDEX,
                    len(seq),
                    False,
                    False,
                    sequence=seq_str,
                )

                query = ContigRegion(
                    self.query,
                    match.start() + ContigRegion.START_INDEX,
                    match.end() + ContigRegion.START_INDEX - 1,
                    self.query_length,
                    self.query_circular,
                    True,
                )

                c = Contig(query, subject, Contig.TYPE_PRIMER)
                contig_container.contigs.append(c)
        return contig_container