Ejemplo n.º 1
0
    def test_globalxx_simple(self):
        aligns = pairwise2.align.globalxx("GAACT", "GAT")
        self.assertEqual(len(aligns), 2)
        aligns.sort()
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
GAACT
|||||
G-A-T
  Score=3
""",
        )
        seq1, seq2, score, begin, end = aligns[1]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
GAACT
|||||
GA--T
  Score=3
""",
        )
Ejemplo n.º 2
0
def align_and_mutate(ref_seq, work_seq, mut_set):
    #align reference to work using a blosum62 matrix alignment with no gap penalty
    matrix = matlist.blosum62
    for aln in pairwise2.align.globalds(ref_seq.seq, work_seq.seq, matrix, -5, -1):
        #go through the reference sequence until we get to one of the mapped positions
    
        ref_aln_seq = aln[0]
        work_aln_seq = aln[1]
        aln_position = -1
        ref_position = -1
        mapped_mut_set = {}
        for aa in ref_aln_seq:
            aln_position += 1
            if aa != "-":
                ref_position += 1
                if ref_position in mut_set.keys():
                   mapped_mut_set[aln_position] = mut_set[ref_position] 
        pprint(mapped_mut_set)
        
        new_seq = work_aln_seq.tomutable()
        for pos in sorted(mapped_mut_set.keys()):
            if(work_aln_seq[pos] == '-'):
                print "Error, mutation position " + str(pos) + "(" + ref_aln_seq[pos]+"->"+str(mapped_mut_set[pos]) + ") aligned to a gap!"
                print(pairwise2.format_alignment(*aln))
                exit()
            new_seq[pos] = mapped_mut_set[pos]
            print "making mutation at position "+str(pos)+": "+work_aln_seq[pos]+"->"+new_seq[pos]+" (canonincal mutation is "+ref_aln_seq[pos]+"->"+mapped_mut_set[pos]+")"
        print(pairwise2.format_alignment(*aln))
    
        return str(new_seq).replace('-', '')
Ejemplo n.º 3
0
    def test_penalize_end_gaps(self):
        aligns = pairwise2.align.globalxs("GACT", "GT", -0.2, -0.8, penalize_end_gaps=0)
        self.assertEqual(len(aligns), 3)
        aligns.sort()
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
GACT
||||
--GT
  Score=1
""")
        seq1, seq2, score, begin, end = aligns[1]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
GACT
||||
G--T
  Score=1
""")
        seq1, seq2, score, begin, end = aligns[2]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
GACT
||||
GT--
  Score=1
""")
Ejemplo n.º 4
0
    def test_separate_gap_penalties_with_extension(self):
        aligns = pairwise2.align.localxd(list("GAAT"), list("GTCCT"), -0.1, 0, -0.1, -0.1, gap_char=["-"])
        self.assertEqual(len(aligns), 3)
        aligns.sort()
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
['G', '-', 'A', 'A', 'T']
|||||
['G', 'T', 'C', 'C', 'T']
  Score=1.9
""")
        seq1, seq2, score, begin, end = aligns[1]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
['G', 'A', '-', 'A', 'T']
|||||
['G', 'T', 'C', 'C', 'T']
  Score=1.9
""")
        seq1, seq2, score, begin, end = aligns[2]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
['G', 'A', 'A', '-', 'T']
|||||
['G', 'T', 'C', 'C', 'T']
  Score=1.9
""")
Ejemplo n.º 5
0
    def test_align_one_char2(self):
        aligns = pairwise2.align.localxs("abcce", "c", -0.3, -0.1)
        self.assertEqual(len(aligns), 2)
        aligns.sort()
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
abcce
   |
---c-
  Score=1
""",
        )
        seq1, seq2, score, begin, end = aligns[1]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
abcce
  |
--c--
  Score=1
""",
        )
Ejemplo n.º 6
0
    def test_extend_penalty2(self):
        aligns = pairwise2.align.globalxs("GACT", "GT", -0.2, -1.5)
        self.assertEqual(len(aligns), 2)
        aligns.sort()
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
GACT
||||
-G-T
  Score=0.6
""",
        )
        seq1, seq2, score, begin, end = aligns[1]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
GACT
||||
G-T-
  Score=0.6
""",
        )
    def test_penalize_end_gaps2(self):
        """Do the same, but use the generic method (with the same resutlt)"""
        aligns = pairwise2.align.globalxs("GACT", "GT", -0.8, -0.2,
                                          penalize_end_gaps=0,
                                          force_generic=True)
        self.assertEqual(len(aligns), 3)
        aligns.sort()
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
GACT
||||
--GT
  Score=1
""")
        seq1, seq2, score, begin, end = aligns[1]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
GACT
||||
G--T
  Score=1
""")
        seq1, seq2, score, begin, end = aligns[2]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
GACT
||||
GT--
  Score=1
""")
Ejemplo n.º 8
0
    def test_separate_gap_penalties1(self):
        aligns = pairwise2.align.localxd("GAT", "GTCT", -0.3, 0, -0.8, 0)
        self.assertEqual(len(aligns), 2)
        aligns.sort()
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
G-AT
||||
GTCT
  Score=1.7
""",
        )
        seq1, seq2, score, begin, end = aligns[1]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
GA-T
||||
GTCT
  Score=1.7
""",
        )
Ejemplo n.º 9
0
    def test_localxs(self):
        aligns = pairwise2.align.localxs("AxBx", "zABz", -0.1, 0)
        aligns.sort()
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
-AxBx
 |||
zA-Bz
  Score=1.9
""",
        )
        seq1, seq2, score, begin, end = aligns[1]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
-AxBx
 ||||
zA-Bz
  Score=1.9
""",
        )
Ejemplo n.º 10
0
    def test_match_score_open_penalty1(self):
        aligns = pairwise2.align.globalms("AA", "A", 2.0, -1, -0.1, 0)
        self.assertEqual(len(aligns), 2)
        aligns.sort()
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
AA
||
-A
  Score=1.9
""",
        )
        seq1, seq2, score, begin, end = aligns[1]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
AA
||
A-
  Score=1.9
""",
        )
Ejemplo n.º 11
0
    def test_match_score_open_penalty2(self):
        aligns = pairwise2.align.globalms("GAA", "GA", 1.5, 0, -0.1, 0)
        self.assertEqual(len(aligns), 2)
        aligns.sort()
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
GAA
|||
G-A
  Score=2.9
""",
        )
        seq1, seq2, score, begin, end = aligns[1]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
GAA
|||
GA-
  Score=2.9
""",
        )
    def test_separate_gap_penalties_with_extension(self):
        """Test separate gap-extension penalties and list input."""
        aligns = pairwise2.align.localxd(
            list("GAAT"), list("GTCCT"),
            -0.1, 0, -0.1, -0.1, gap_char=["-"])
        self.assertEqual(len(aligns), 3)
        aligns.sort()
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score,
                                               begin, end)
        self.assertEqual(alignment, """\
G - A A T 
|   . . | 
G T C C T 
  Score=1.9
""")  # noqa: W291
        seq1, seq2, score, begin, end = aligns[1]
        alignment = pairwise2.format_alignment(seq1, seq2, score,
                                               begin, end)
        self.assertEqual(alignment, """\
G A - A T 
| .   . | 
G T C C T 
  Score=1.9
""")  # noqa: W291
        seq1, seq2, score, begin, end = aligns[2]
        alignment = pairwise2.format_alignment(seq1, seq2, score,
                                               begin, end)
        self.assertEqual(alignment, """\
G A A - T 
| . .   | 
G T C C T 
  Score=1.9
""")  # noqa: W291
    def test_penalize_end_gaps(self):
        """Turn off end-gap penalties."""
        aligns = pairwise2.align.globalxs("GACT", "GT", -0.8, -0.2,
                                          penalize_end_gaps=0)
        self.assertEqual(len(aligns), 3)
        aligns.sort()
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score,
                                               begin, end)
        self.assertEqual(alignment, """\
GACT
  .|
--GT
  Score=1
""")
        seq1, seq2, score, begin, end = aligns[1]
        alignment = pairwise2.format_alignment(seq1, seq2, score,
                                               begin, end)
        self.assertEqual(alignment, """\
GACT
|  |
G--T
  Score=1
""")
        seq1, seq2, score, begin, end = aligns[2]
        alignment = pairwise2.format_alignment(seq1, seq2, score,
                                               begin, end)
        self.assertEqual(alignment, """\
GACT
|.  
GT--
  Score=1
""")  # noqa: W291
Ejemplo n.º 14
0
    def test_match_dictionary1(self):
        aligns = pairwise2.align.localds("ATAT", "ATT", self.match_dict, -0.5, 0)
        self.assertEqual(len(aligns), 2)
        aligns.sort()
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
ATAT
||||
AT-T
  Score=3
""",
        )
        seq1, seq2, score, begin, end = aligns[1]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
ATAT
|||
ATT-
  Score=3
""",
        )
Ejemplo n.º 15
0
def align():
    matrix = MatrixInfo.blosum62
    for x in range(len(seqs)):
        for y in range(x, len(seqs)):
            a = pairwise2.align.globaldx(seqs[x].seq, seqs[y].seq, matrix)
            print len(a)
            a = a[0]
            print pairwise2.format_alignment(*a)
Ejemplo n.º 16
0
def align_chain_to_seq(sequence,chain,verbose=False):
	#Build Polypeptides from the chains
	polypeptides = build_polypeptides(chain)
	
	#Can't be broken out into another function, because we need seq_lens
	contiguous_seqs = [single_pp.get_sequence().tostring() for single_pp in polypeptides]
	ATOM_joined_seq = ''.join(contiguous_seqs)
	
	seq_lens = [0] + [len(single_pp) for single_pp in polypeptides]


	#Figuring all of this out took days...
	#I am so tired of dealing with mapping various numberings around
	#I wish Biopython, especially Bio.pairwise2 had better documentation
	breaks = set(S.cumsum(seq_lens) )#TODO : Tear hair out GYAAAAA
	
	nogaps = lambda x,y: -2000 -200*y #There really should not be inserts with respect to the database sequence.

	def specificgaps(x,y):
		if x in breaks:#very minor penalty for gaps at breaks in the PDB structure, consider using 0
			return (0 -y)
		else:
			return (-2000 -200*y)#strongly discourage gaps anywhere else.
	
	alignments = __PW.align.globalxc(sequence.seq.tostring(),ATOM_joined_seq,nogaps,specificgaps)
	
	if verbose:
		#some output?
		for a in alignments:
			__stderr.write( __PW.format_alignment(*a) )
			__stderr.write('\n')

	return alignments
Ejemplo n.º 17
0
    def test_gap_here_only_2(self):
        """Force a bad alignment.

        Forces a bad alignment by having a very expensive gap penalty
        where one would normally expect a gap, and a cheap gap penalty
        in another place.
        """
        seq1 = "AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA"
        seq2 = "AABBBAAAACCCCAAAABBBAA"
        breaks = [0, 3, len(seq2)]
        # Very expensive to open a gap in seq1:
        nogaps = lambda x, y: -2000 - y
        # Very expensive to open a gap in seq2 unless it is in one of the allowed positions:
        specificgaps = lambda x, y: (-2 - y) if x in breaks else (-2000 - y)
        alignments = pairwise2.align.globalmc(seq1, seq2, 1, -1, nogaps, specificgaps)
        self.assertEqual(len(alignments), 1)
        formatted = pairwise2.format_alignment(*alignments[0])
        self.assertEqual(
            formatted,
            """\
AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
||||||||||||||||||||||||||||||||||||
--AAB----------BBAAAACCCCAAAABBBAA--
  Score=-10
""",
        )
Ejemplo n.º 18
0
def _pretty_print_align(align1, align2, score, begin, end):
    s = pairwise2.format_alignment(align1, align2, score, begin, end)
    a1 = n.fromstring(align1,dtype='S1')
    a2 = n.fromstring(align2,dtype='S1')
    print "Identity: %.2f Alignment length: %s" % (float(score)/len(align1)*100,len(align1))
    print "(a1 == a2).sum() = ", (a1 == a2).sum()
    print s,
Ejemplo n.º 19
0
def main(argv):
    with open(argv[1], "r") as fstream:
        sequence1=fstream.readline().rstrip()
        sequence2=fstream.readline().rstrip()

    print sequence1
    print"-"
    print sequence2
    print""
    alignments=[]
    matrix = MatrixInfo.pam250




    for a in pairwise2.align.localds(sequence1, sequence2,matrix,-5,-5):
         print(format_alignment(*a))
         alignments.append(a)

    alignments=alignments[0]
    seq1= alignments[0][alignments[3]:alignments[4]]
    seq2= alignments[1][alignments[3]:alignments[4]]
    currentScore=score_pairwise(seq1, seq2, matrix, -5, -5)
    print currentScore
    print seq1
    print len(seq1)
    print seq2
    print len(seq2)
    def test_gap_here_only_2(self):
        """Force a bad alignment.

        Forces a bad alignment by having a very expensive gap penalty
        where one would normally expect a gap, and a cheap gap penalty
        in another place.
        """
        seq1 = "AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA"
        seq2 = "AABBBAAAACCCCAAAABBBAA"

        def no_gaps(x, y):
            """Very expensive to open a gap in seq1."""
            x = 0  # fool QuantifiedCode, x is not used here
            return -2000 - y

        def specific_gaps(x, y):
            """Very expensive to open a gap in seq2

            ...unless it is in one of the allowed positions:
            """
            breaks = [0, 3, len(seq2)]
            return (-2 - y) if x in breaks else (-2000 - y)

        alignments = pairwise2.align.globalmc(seq1, seq2, 1, -1, no_gaps,
                                              specific_gaps)
        self.assertEqual(len(alignments), 1)
        formatted = pairwise2.format_alignment(*alignments[0])
        self.assertEqual(formatted, """\
AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
||||||||||||||||||||||||||||||||||||
--AAB----------BBAAAACCCCAAAABBBAA--
  Score=-10
""")
    def test_gap_here_only_1(self):
        seq1 = "AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA"
        seq2 = "AABBBAAAACCCCAAAABBBAA"

        def no_gaps(x, y):
            """Very expensive to open a gap in seq1."""

            x = 0  # fool QuantifiedCode, x is not used here
            return -2000 - y

        def specific_gaps(x, y):
            """Very expensive to open a gap in seq2

            ...unless it is in one of the allowed positions:
            """
            breaks = [0, 11, len(seq2)]
            return (-2 - y) if x in breaks else (-2000 - y)

        alignments = pairwise2.align.globalmc(seq1, seq2, 1, -1, no_gaps,
                                              specific_gaps)
        self.assertEqual(len(alignments), 1)
        formatted = pairwise2.format_alignment(*alignments[0])
        self.assertEqual(formatted, """\
AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
||||||||||||||||||||||||||||||||||||
--AABBBAAAACC----------CCAAAABBBAA--
  Score=2
""")
    def test_localms(self):
        """Two different local alignments"""
        aligns = sorted(pairwise2.align.localms("xxxABCDxxx", "zzzABzzCDz", 1,
                                                -0.5, -3, -1))
        alignment = pairwise2.format_alignment(*aligns[0])
        self.assertEqual(alignment, """\
--xxxABCDxxx
       ||
zzzABzzCDz--
  Score=2
""")
        alignment = pairwise2.format_alignment(*aligns[1])
        self.assertEqual(alignment, """\
xxxABCDxxx
   ||
zzzABzzCDz
  Score=2
""")
 def test_blosum62(self):
     """Test localds with blosum62."""
     self.assertEqual(1, blosum62[('K', 'Q')])
     self.assertEqual(4, blosum62[('A', 'A')])
     self.assertEqual(8, blosum62[('H', 'H')])
     alignments = pairwise2.align.localds('VKAHGKKV', 'FQAHCAGV',
                                          blosum62, -4, -4)
     for a in alignments:
         self.assertEqual(pairwise2.format_alignment(*a),
                          "VKAHGKKV\n |||\nFQAHCAGV\n  Score=13\n")
Ejemplo n.º 24
0
def seqdist(s1, s2, mismatchpen=-.5, gapopenpen=-.25, gapextendpen=-.05):
    """
    The distance between two sequences.
    """
    # s1, s2 = removecommongaps(s1, s2)
    from Bio.pairwise2 import align, format_alignment
    alignment = next(iter(align.globalms(
        s1, s2, 1, mismatchpen, gapopenpen, gapextendpen)))
    print(format_alignment(*alignment))
    return alignment[2]
Ejemplo n.º 25
0
    def test_align_one_char3(self):
        aligns = pairwise2.align.globalxs("abcde", "c", -0.3, -0.1)
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
abcde
|||||
--c--
  Score=0.2
""")
Ejemplo n.º 26
0
    def test_separate_gap_penalties2(self):
        aligns = pairwise2.align.localxd("GAT", "GTCT", -0.5, 0, -0.2, 0)
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
GAT--
|||
G-TCT
  Score=1.8
""")
Ejemplo n.º 27
0
    def test_match_dictionary3(self):
        aligns = pairwise2.align.localds("ATT", "ATAT", self.match_dict, -1, 0)
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
ATT-
|||
ATAT
  Score=3
""")
Ejemplo n.º 28
0
    def test_match_score_open_penalty4(self):
        aligns = pairwise2.align.globalms("GCT", "GATA", 1, -2, -0.1, 0)
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
GCT-
||||
GATA
  Score=-0.1
""")
Ejemplo n.º 29
0
    def test_penalize_extend_when_opening(self):
        aligns = pairwise2.align.globalxs("GACT", "GT", -0.2, -1.5, penalize_extend_when_opening=1)
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
GACT
||||
G--T
  Score=-1.2
""")
Ejemplo n.º 30
0
    def test_extend_penalty1(self):
        aligns = pairwise2.align.globalxs("GACT", "GT", -0.2, -0.5)
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
GACT
||||
G--T
  Score=1.3
""")
Ejemplo n.º 31
0
def findMaxAlignmentForProteins(protein_records, output_file,
                                carrier_proteomes, mild_carrier_proteomes,
                                non_carrier_proteomes):
    output_file.write("\n")
    output_file.write(
        "optimal alignments ---------------------------------- \n")
    for record in protein_records:
        output_file.write(record.id + " : " + "\n")

        output_file.write("\n")
        output_file.write("\t Carrier alignments :  \n")
        for proteomeID in carrier_proteomes:
            alignment = computeMaxAlignment(record,
                                            carrier_proteomes[proteomeID])
            output_file.write("\t \t -" + proteomeID + " : \t" +
                              format_alignment(alignment) + "\n")

        output_file.write("\n \n")
        output_file.write("\t mild carrier alignments : \n")
        for proteomeID in mild_carrier_proteomes:
            alignment = computeMaxAlignment(record,
                                            mild_carrier_proteomes[proteomeID])
            output_file.write("\t \t -" + proteomeID + " : \t" +
                              format_alignment(alignment) + "\n")

        output_file.write("\n \n")
        output_file.write("\t Non Carrier alignments : \n")
        for proteomeID in non_carrier_proteomes:
            alignment = computeMaxAlignment(record,
                                            non_carrier_proteomes[proteomeID])
            output_file.write("\t \t -" + proteomeID + " : \t" +
                              format_alignment(alignment) + "\n")

        output_file.write("\n \n")

    return
Ejemplo n.º 32
0
    def test_match_score_open_penalty3(self):
        """Test 3."""
        aligns = pairwise2.align.globalxs("GAACT", "GAT", -0.1, 0)
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
GAACT
||  |
GA--T
  Score=2.9
""",
        )
Ejemplo n.º 33
0
    def test_match_score_open_penalty4(self):
        """Test 4."""
        aligns = pairwise2.align.globalms("GCT", "GATA", 1, -2, -0.1, 0)
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
GC-T-
|  | 
G-ATA
  Score=1.7
""",  # noqa: W291
        )
Ejemplo n.º 34
0
def aligner(new_file,SSR_list):
    #converts fasta file into a set of tuples for speed
    fiter = fasta_iter('AutoJobber_Logs/SSR_Containing_Genes.fa')
    SSR = 0
    #iterates through all of the sequences
    for ff in fiter:
        headerStr, seq = ff
        #uses pairwise2 align tool to generate alignments
        alignments = pairwise2.align.globalds(SSR_list[SSR], seq, blosum62,-10,-0.5, penalize_end_gaps = False, one_alignment_only = True)
        #writes alignments into a text document
        for a in alignments:
            new_file.write(pairwise2.format_alignment(*a))
        #increments the SSR index
        SSR += 1
    new_file.close()
Ejemplo n.º 35
0
    def test_separate_gap_penalties2(self):
        """Test 2."""
        aligns = pairwise2.align.localxd("GAT", "GTCT", -0.5, 0, -0.2, 0)
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
1 GAT
  | |
1 G-T
  Score=1.8
""",
        )
Ejemplo n.º 36
0
    def test_match_dictionary3(self):
        """Test 3."""
        aligns = pairwise2.align.localds("ATT", "ATAT", self.match_dict, -1, 0)
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
1 ATT
  ||.
1 ATA
  Score=3
""",
        )
Ejemplo n.º 37
0
    def test_align_one_char1(self):
        """Test sequence with only one match."""
        aligns = pairwise2.align.localxs("abcde", "c", -0.3, -0.1)
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
3 c
  |
1 c
  Score=1
""",
        )
def local_similarity(s1, s2):
    '''Compute a match score. When a match score sets a new record, print it.'''
    s1, s2 = sorted((s1, s2), key=len)
    score = 0
    read_length = len(s1)
    for start_loc in range(0, len(s2), read_length): 
        results = pairwise2.align.localxs(s1, s2[start_loc:start_loc+2*read_length], -10, -10)
        new_score = int(results[0][2])
        if new_score > score:
            score = new_score
            if score > local_similarity.best_score:
                local_similarity.best_score = score
                print('new best alignment: ')
                print(pairwise2.format_alignment(*results[0]))
    return score
Ejemplo n.º 39
0
    def test_penalize_extend_when_opening(self):
        aligns = pairwise2.align.globalxs("GACT",
                                          "GT",
                                          -0.2,
                                          -1.5,
                                          penalize_extend_when_opening=1)
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
GACT
||||
G--T
  Score=-1.2
""")
Ejemplo n.º 40
0
    def test_align_one_char3(self):
        """Like test 1, but global alignment."""
        aligns = pairwise2.align.globalxs("abcde", "c", -0.3, -0.1)
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
abcde
  |  
--c--
  Score=0.2
""",  # noqa: W291
        )
Ejemplo n.º 41
0
    def test_extend_penalty1(self):
        """Test 1."""
        aligns = pairwise2.align.globalxs("GACT", "GT", -0.5, -0.2)
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(
            alignment,
            """\
GACT
|  |
G--T
  Score=1.3
""",
        )
Ejemplo n.º 42
0
    def test_localxs_generic(self):
        """Test the generic method with local alignments."""
        aligns = sorted(pairwise2.align.localxs("AxBx", "zABz", -0.1, 0,
                                                force_generic=True))
        # From Biopython 1.74 on this should only give one alignment, since
        # we disallow leading and trailing 'zero-extensions'
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1, seq2, score, begin, end)
        self.assertEqual(alignment, """\
1 AxB
  | |
2 A-B
  Score=1.9
""")
Ejemplo n.º 43
0
def format_alignment(mol1: Molecule, mol2: Molecule):
    '''### Do alignment of two molecules
        #### params:
        - mol1, mol2: You includer molecule to align

        *returns* -> Molecule with alignment result and indentity
    '''
    alignment = align.localds(mol1.seq, mol2.seq, blosum62, -12, -4)

    alignment_formated = pairwise2.format_alignment(*alignment[0])
    alignment_formated = alignment_formated.split('\n')

    header = ''
    seq_mol1 = alignment_formated[0]
    seq_mol2 = alignment_formated[2]
    result_raw = alignment_formated[1]
    identity = alignment[0][-1]

    body_mol1 = ''
    body_mol2 = ''
    result = ''
    body = ''
    count = 0
    errors = 0

    for i in range(len(seq_mol1)):
        body_mol1 += seq_mol1[i]
        body_mol2 += seq_mol2[i]
        result += result_raw[i]

        if not seq_mol1[i].isnumeric() and seq_mol1[i] != ' ':
            if seq_mol1[i] == seq_mol2[i]:
                count += 1
            else:
                errors += 1

        if (i + 1) % 60 == 0:
            body += f"{body_mol1}\n{result}\n{body_mol2}\n\n"
            result = ''
            body_mol1 = ''
            body_mol2 = ''

    identity = count / (count + errors)
    header = "< %s - %s | %s | %.1f%%\n" % (mol1.dbname, mol2.dbname,
                                            mol1.name, identity * 100)
    text = header + body

    return {'text': text, 'identity': identity}
Ejemplo n.º 44
0
    def __init__(self, reference_sequence, other_sequence):
        matrix = matlist.blosum62
        self.reference_sequence = reference_sequence
        self.other_sequence = other_sequence
        #using the default in Emboss Needle on the EBI website. Blosum62 sub matrix, gap penalty of -10, extension penalty of -0.5
        alignment = pairwise2.align.globalds(reference_sequence,
                                             other_sequence,
                                             matrix,
                                             -10.0,
                                             -0.5,
                                             one_alignment_only=True)
        print('alignment')
        print(format_alignment(*alignment[0]))
        ref_alignment = alignment[0][0]
        other_alignment = alignment[0][1]
        self.ref_to_other_positions = {}
        self.other_to_ref_positions = {}
        assert (len(ref_alignment) == len(other_alignment))
        ref_position = 0
        other_position = 0
        other_started = False
        ref_started = False
        """
        self.reference_matched_positions should be the same length as self.other_matched_positions
        
        Basically, just have positions of match states in both the reference and other sequence
        """
        self.reference_matched_positions = []
        self.other_matched_positions = []
        for i in range(0, len(ref_alignment)):
            if (not ref_started) and ref_alignment[i] != '-':
                ref_started = True
            if (not other_started) and other_alignment[i] != '-':
                other_started = True
            old_ref_position = ref_position
            if other_started and ref_started and ref_alignment[
                    i] != '-' and other_alignment[i] != '-':
                self.reference_matched_positions.append(ref_position)
                self.other_matched_positions.append(other_position)

            if ref_alignment[i] != '-' and other_started:
                self.ref_to_other_positions[ref_position] = other_position
            if ref_alignment[i] != '-':
                ref_position += 1
            if other_alignment[i] != '-' and ref_started:
                self.other_to_ref_positions[other_position] = old_ref_position
            if other_alignment[i] != '-':
                other_position += 1
Ejemplo n.º 45
0
 def align_l(self):
     if self.var_fromfile.get() == 1:
         self.load_fasta()
         seq1 = self.sequences[0][1]
         seq2 = self.sequences[1][1]
         pass
     else:
         self.sequences = []
         seq1 = self.field_S1.get()
         seq2 = self.field_S2.get()
     alignments = pairwise2.align.localxx(seq1, seq2)
     with open('output.txt', 'w') as file:
         file.write("SEQUENCE ALIGNMENT TOOL OUTPUT - LOCAL ALIGNMENT\n")
         for i, alignment in enumerate(alignments):
             file.write(format_alignment(*alignments[i]))
             file.write("\n")
def balign(first_seq, second_seq, op_gap=-5, ext_gap=-0.5):

    # Load the matrix
    matrix = matlist.blosum62

    # Generate the alignments
    alns = pairwise2.align.globalds(first_seq, second_seq, matrix, op_gap,
                                    ext_gap)

    # Extract the best alignment (first one in the alns list)
    top_aln = alns[0]

    # Print the alignment
    #aln_A, aln_B, score, begin, end = #<<<<<<<<<<...>>>>>>>>>>>
    print(pairwise2.format_alignment(*top_aln))  #<<<<<<<<<<...>>>>>>>>>>>
    return (top_aln)
Ejemplo n.º 47
0
 def get_pairwise2_localds_result(self,
                                  asequence,
                                  bsequence,
                                  matrx=blosum62,
                                  gap_open_penalty=10,
                                  extension_penalty=1):
     alignments = pairwise2.align.localds(
         asequence.upper().replace(" ", ""),
         bsequence.upper().replace(" ", ""), matrx, -gap_open_penalty,
         -extension_penalty)
     alignments_result = pairwise2.format_alignment(*alignments[0])
     align_arr = alignments_result.split("\n")
     return ''.join([i for i in align_arr[0] if not i.isdigit()]), ''.join([
         i for i in align_arr[1] if not i.isdigit()
     ]), ''.join([i for i in align_arr[2]
                  if not i.isdigit()]), alignments_result
Ejemplo n.º 48
0
def alignment_pairwise2(fuzzy_list,string, Dict):                                           ### defining function for alignment
    '''perform the alignment of the string and pattern
    by using extracted match objects from the list'''
    empty_list=[]                                                                           ### create empty list
    for x in range(1, len(fuzzy_list),2):                                                   ### iterates over the matched list in the given range
        start_ = int(str(fuzzy_list[x]).split(",")[0].split("=")[1])                        ### extracting the start position using "split" method
        end_ = int(str(fuzzy_list[x]).split(",")[1].split("=")[1])                          ### to extract the end position using "split"
        match_ = str(fuzzy_list[x]).split(",")[3].split("=")[1].strip("'").split("'")[0]    ### extract matched string

        for k,v in Dict.items():                                                            ### for keys and values in test_dict
            if match_ in v:                                                                 ### if matched string is present in values
                for a in pairwise2.align.localms(v[start_:end_:1],string,1, 0, -1, -0.5, one_alignment_only=True):
                    ### performing alignment and iterates over each alignment
                    empty_list.append(k +"\n"+format_alignment(*a))                         ### appending the outputs to the list in a precise format

    return(empty_list)                                                                      ### returning the list
Ejemplo n.º 49
0
 def align_to_ref(self,
                  ref_seq,
                  query_seq,
                  ident_score=4,
                  sim_score=2,
                  gap_open=-2,
                  gap_ext=-.5,
                  verbose=False):
     pw = pairwise2.align.localms(ref_seq, query_seq, ident_score,
                                  sim_score, gap_open, gap_ext)
     score = pw[0][2]
     if verbose:
         print(format_alignment(*pw[0]))
         print(score)
         print(self.aligned_seq_len, len(pw[0][1]))
     return score
Ejemplo n.º 50
0
    def test_gap_here_only_1(self):
        seq1 = "AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA"
        seq2 = "AABBBAAAACCCCAAAABBBAA"
        breaks = [0, 11, len(seq2)]
        # Very expensive to open a gap in seq1:
        nogaps = lambda x, y: -2000 - y
        # Very expensive to open a gap in seq2 unless it is in one of the allowed positions
        specificgaps = lambda x, y: (-2 -y) if x in breaks else (-2000 - y)
        alignments = pairwise2.align.globalmc(seq1, seq2, 1, -1, nogaps, specificgaps)
        self.assertEqual(len(alignments), 1)
        formatted = pairwise2.format_alignment(*alignments[0])
        self.assertEqual(formatted, """\
AAAABBBAAAACCCCCCCCCCCCCCAAAABBBAAAA
||||||||||||||||||||||||||||||||||||
--AABBBAAAACC----------CCAAAABBBAA--
  Score=2
""")
Ejemplo n.º 51
0
def getGlobalAlign(seq1, seq2):
    """ Complete global alignment using Needleman-Wunsch algorithm"""

    print(BIYellow + "GLOBAL ALIGNMENTS (Needleman-Wunsch algorithm)" + White)
    myAlignments = pairwise2.align.globalxx(seq1, seq2)
    print(myAlignments)

    for thisAlignment in myAlignments:
        print(thisAlignment)

    print(BICyan)
    from Bio.pairwise2 import format_alignment

    for thisAlignment in myAlignments:
        print(format_alignment(*thisAlignment))

    print(White)
Ejemplo n.º 52
0
def mainloop():
	seq = input("Sequence 1:")
	seq2 = input("Sequence 2:")
	alignments = pairwise2.align.globalxx(seq, seq2)
	print(format_alignment(*alignments[0]))	
	print("No1.Total GC content:")
	print(GC(seq))
	print("No1.GC by parts:")
	print(GC123(seq))
	print("No2.Total GC content:")
	print(GC(seq2))
	print("No2.GC by parts:")
	print(GC123(seq2))
	input('next prot')
	cls = lambda: os.system('cls')
	cls()
	mainloop()
Ejemplo n.º 53
0
    def _check_seq(self):
        from Bio import pairwise2
        from Bio.pairwise2 import format_alignment

        try:
            primary_seq = self.seq
            protocol_seq = one(self.get_synthesis_attr('product_seqs'))
        except (QueryError, ValueError):
            pass
        else:
            if primary_seq != protocol_seq:
                alignments = pairwise2.align.globalxx(primary_seq,
                                                      protocol_seq)
                err = CheckError(culprit=self, alignments=alignments)
                err.brief = "sequence doesn't match construction"
                err.info = lambda e: format_alignment(e.alignments[0])
                raise err
Ejemplo n.º 54
0
def result_local(request):
    input_seq = request.POST.get('tool1', 'default')
    input_seq2 = request.POST.get('tool2', 'default')
    str1 = ""
    for x in input_seq:
        if (x != " "):
            str1 = str1 + x
    str2 = ""
    for x in input_seq2:
        if (x != " "):
            str2 = str2 + x
    dna6a = Seq(str1)
    dna6b = Seq(str2)
    stri = ""
    for a in alignments:
        stri = stri + str(format_alignment(*a))
    params = {'res': stri}
    return render(request, 'mysite/result_align.html', params)
Ejemplo n.º 55
0
def getLocalAlign(seq1, seq2):
    """ Complete local alignments using Smith-Waterman algorithm"""

    print(BIYellow + "LOCAL ALIGNMENTS (Smith-Waterman algorithm)" + White)

    myAlignments = pairwise2.align.localxx(seq1, seq2)
    print(myAlignments)

    for thisAlignment in myAlignments:
        print(thisAlignment)

    print(BICyan)
    from Bio.pairwise2 import format_alignment

    for thisAlignment in myAlignments:
        print(format_alignment(*thisAlignment))

    print(White)
Ejemplo n.º 56
0
def align(s1, s2, test=False, psm=2, pmm=0.5, pgo=-3, pge=-1):
    """
    Creates pairwise local alignment between seqeunces.
    Get the visualization and alignment scores.
    :param s1: seqeunce 1
    :param s2: seqeunce 2    
    
    REF: http://biopython.org/DIST/docs/api/Bio.pairwise2-module.html
    The match parameters are:

    CODE  DESCRIPTION
    x     No parameters. Identical characters have score of 1, otherwise 0.
    m     A match score is the score of identical chars, otherwise mismatch
          score.
    d     A dictionary returns the score of any pair of characters.
    c     A callback function returns scores.
    The gap penalty parameters are:

    CODE  DESCRIPTION
    x     No gap penalties.
    s     Same open and extend gap penalties for both sequences.
    d     The sequences have different open and extend gap penalties.
    c     A callback function returns the gap penalties.    
    """
    import operator
    from Bio import pairwise2
    if any([p is None for p in [psm, pmm, pgo, pge]]):
        alignments = pairwise2.align.localxx(s1.upper(), s2.upper())
    else:
        alignments = pairwise2.align.localms(s1.upper(), s2.upper(), psm, pmm,
                                             pgo, pge)
    if test:
        print(alignments)
    alignsymb = np.nan
    score = np.nan
    sorted_alignments = sorted(alignments, key=operator.itemgetter(2))
    for a in alignments:
        alignstr = pairwise2.format_alignment(*a)
        alignsymb = alignstr.split('\n')[1]
        score = a[2]
        if test:
            print(alignstr)
        break
    return alignsymb.replace(' ', '-'), score
Ejemplo n.º 57
0
    def test_localxs_2(self):
        """Test localxx with ``full_sequences=True``."""
        aligns = sorted(pairwise2.align.localxs("AxBx", "zABz", -0.1, 0))
        # From Biopython 1.74 on this should only give one alignment, since
        # we disallow leading and trailing 'zero-extensions'
        self.assertEqual(len(aligns), 1)
        seq1, seq2, score, begin, end = aligns[0]
        alignment = pairwise2.format_alignment(seq1,
                                               seq2,
                                               score,
                                               begin,
                                               end,
                                               full_sequences=True)
        self.assertEqual(alignment, """\
-AxBx
 | | 
zA-Bz
  Score=1.9
""")  # noqa: W291
Ejemplo n.º 58
0
def main():

	# Reading File

	file_name = '../pdm2_neurogenic.fa'

	print("Reading in Original Sequences:")
	sequences = []
	print("Completed Sequences by ID:\n")
	for seq_record in SeqIO.parse(file_name,'fasta'):
	    print(seq_record.id, "\n")
	    sequences.append(seq_record)
	print("Completed.")


	# Turn sequences into a basic Python data structure
	# 
	# `df_sequences`: Pandas Dataframe with one sequence object per row
	# 	`id`: Description header of each sequence
	# 	`seq`: Letter sequence
	# 	`length`: integer length of the sequence
	# 	`func`: 0 for negative function, 1 for positive function
	# 	`GC_ratio`: Ratio of GC pairs to full sequence

	df_sequences = pd.DataFrame({"id": [i.id for i in sequences],
	                             "seq": [j.seq for j in sequences],
	                             "length": [len(k.seq) for k in sequences],
	                            })
	df_sequences['func'] = df_sequences['id'].str[-1].replace({'-': 0, '+': 1})
	df_sequences['species'] = df_sequences['id'].str.findall('MEMB(....)')

	# Measure GC content per sequence

	df_sequences["GC_ratio"] = df_sequences.seq.apply(GC) / 100.0
	print(df_sequences)

	# Alignment Sequence
	# 	`pairwise2.align.globalxx`: Pairwise alignment with no cost value for misaligned pairs, and a value of 1 for matched pairs between the two sequences.
	# 	`pairwise2.format_alignment(*alignment[0])`: Show matches from start to finish positions as defined by previous function.

	alignment_0_1 = pairwise2.align.globalxx(df_sequences.seq.iloc[0], df_sequences.seq.iloc[1])
	print(pairwise2.format_alignment(*alignment_0_1[0]))
Ejemplo n.º 59
0
def get_alignment(Rb, Ru):
    """
    get alignment of two PDBs, based on the biopython algorithm and two input PDBs, name1 and name2
    """
    
    # get CA for alignment
    Rb_idxs = Rb.atomselect("*", "*", "CA", get_index=True)[1]
    Ru_idxs = Ru.atomselect("*", "*", "CA", get_index=True)[1]
    
    Rb_sub = Rb.get_subset(Rb_idxs)
    Ru_sub = Ru.get_subset(Ru_idxs)
    
    # convert to sequence code
    Rb_seq = convert_res(Rb_sub.data["resname"])
    Ru_seq = convert_res(Ru_sub.data["resname"])
    
    R_alignment = pairwise2.align.globalxx(Rb_seq, Ru_seq)
    txt = format_alignment(*R_alignment[-1])
    
    return txt
Ejemplo n.º 60
0
def findBestAlignment(seq, query, dna=False, offset=0, show=False):
    if not dna:
        alignments = align.localds(seq.replace('*', 'X'), query,
                                   matlist.blosum62, -100, -100)
    else:
        alignments = align.localms(seq, query, 1, -2, -2, -2)

    #     print(seq, query, alignments)
    scores = [a[2] for a in alignments]
    if len(scores) == 0:
        return -1, -1, True
    best = scores.index(max(scores))
    if show:
        print(format_alignment(*alignments[best]))
        print(alignments[best])

    # FR4 start is where both sequence start to align with each other
    # including leading mismatches (these mismatches maybe due to mutations)
    #     0123456
    # eg: GGGGACGTACGTACGT
    #           ||||||||||
    #     ----CAGTACGTACGT
    # although alignment starts at pos 6, we still consider FR4 to start at pos 4
    start = extend5align(alignments[best]) + offset + 1  # 1-based start

    end = int(offset + alignments[best][-1])  # 1-based end

    gapped = False

    # subtract away non-existing '-'s from the seq because seq itself doesn't have these '-'s
    # eg: -GGGACGTACGTACGT
    #      |||||||||||||||
    #     GGGACAGTACGTACGT
    # should start at 1, not 2. because the leading '-' doesn't exist in the actual sequence!
    if '-' in alignments[best][0]:
        start -= alignments[best][0][:(alignments[best][-2] + 1)].count('-')
        end -= alignments[best][0][:(alignments[best][-1] + 1)].count('-')
        gapped = True
    return start, end, gapped  # 1-based