Beispiel #1
0
def run_align_aligner(data):
    for s in data:
        aligner(s[0],
                s[1],
                method='global',
                gap_open=-1,
                gap_extend=-1,
                matrix=MY_MATRIX)
Beispiel #2
0
 def test_score(self):
     s0 = "AGEBANAN"
     s1 = "ACEBAN"
     r = aligner(s0,
                 s1,
                 gap_extend=-1,
                 gap_open=-2,
                 matrix="BLOSUM62",
                 method=self.method)
     assert r == ('AGEBANAN', 'ACEBAN--')
Beispiel #3
0
	def gen_pic(self):
		for i, o_w_h in enumerate(self.info, 1):
			offset, width, height = o_w_h
			with open(os.path.join(self.path, str(i) + ".bmp"), "wb") as img_out_fh:
				head_info_obj = add_header.header_info(img_out_fh)
				head_info_obj.gen_header(width, height)
				raw = self.data[i-1]
				if width % 4 == 0:
					img_out_fh.write(raw)
				else:
					align_obj = align.aligner(raw, img_out_fh)
					align_obj.align_write_raw_data(width, height)
def build_alignments(structure_to_sequences,
                     pickle_path="./pickled_intermediates"):
    ''' Given a map, align the canonical and structure-based sequences.
    Args:
        structure_to_sequences (dict, required. See build_sequence_dict). 
    '''
    alignments = {}
    for structure, sequences in structure_to_sequences.items():
        print(f"Aligning {structure}...")
        fasta_seq = sequences["canonical"].replace("U", "C")
        structure_seq = sequences['structure'].replace("U", "C")
        alignment = aligner(fasta_seq, structure_seq, method="glocal")[0]

        # If there are more than 5 mismatches, there may be a real gap.
        # Try a lower gap penalty
        if alignment.n_mismatches > 5:
            try:
                alignment = aligner(fasta_seq,
                                    structure_seq,
                                    method="global",
                                    gap_open=-5,
                                    gap_extend=-1)[0]
            except Exception as e:
                print(
                    "[NOTE] An exception is about to raise due to a failed global alignment."
                )
                print(
                    f"[NOTE] {structure} had more than 5 mismatches with a semi-global",
                    f"alignment, but failed to generate any global alignments with gap_open = -5 and gap_extend = -1"
                )
                raise

        alignments[structure] = alignment
        print(alignment)

    with open(f"{pickle_path}/alignments.pickle", "wb") as f:
        pickle.dump(alignments, f)

    return alignments
Beispiel #5
0
 def test_all(self):
     r = aligner("CELECANTH", "PELICAN", method=self.method)
     assert r ==  ('CELECANTH', 'PELICAN--'), r
     r = aligner("PELICAN", "CELECANTH", method=self.method)
     assert r == ('PELICAN--', 'CELECANTH')
Beispiel #6
0
 def test_it(self):
     r = aligner('AAAAAAAAAAAAACCTGCGCCCCAAAAAAAAAAAAAAAAAAAA', 'CCTGCGCACCCC', method='global_cfe')
     assert r == ('AAAAAAAAAAAAACCTGCGC-CCCAAAAAAAAAAAAAAAAAAAA', '-------------CCTGCGCACCCC-------------------')
Beispiel #7
0
 def test_score(self):
     s0 = "AGEBANAN"
     s1 = "ACEBAN"
     r = aligner(s0, s1, gap_extend=-1, gap_open=-2, matrix="BLOSUM62",
                 method=self.method)
     assert r == ('AGEBANAN', 'ACEBAN--')
Beispiel #8
0
#!/usr/bin/env python
#-*- coding:utf8 -*-

import add_header
import align

with open('./res/index.txt', 'r') as fh:
	lines = fh.readlines()
for i, line in enumerate(lines, 1):
	path = './res/'
	filename = str(i)
	with open(path + filename + '.bmp', 'rb') as img_in_fh:
		width, height, offset = line.split()
		width, height, offset = int(width), int(height), int(offset)
		with open(filename + '.out.bmp', 'wb') as img_out_fh:
			head_info_obj = add_header.header_info(img_out_fh)
			head_info_obj.gen_header(width, height)
			if width % 4 == 0:
				raw = img_in_fh.read()
				img_out_fh.write(raw)
			else:
				align_obj = align.aligner(img_in_fh, img_out_fh)
				align_obj.align_write(width, height)
Beispiel #9
0
    def test_all(self):
        # global
        a, b = aligner('WW', 'WEW', method='global')
        assert list(a) == ['W', '-', 'W']
        assert list(b) == ['W', 'E', 'W']
        a, b = aligner('WW', 'WEW', method='global', gap_open=-100)
        assert list(a) == ['W', '-', 'W']
        assert list(b) == ['W', 'E', 'W']
        a, b = aligner('A', 'A', method='global', gap_open=-7)
        assert list(a) == ['A']
        assert list(b) == ['A']
        a, b = aligner('R', 'K', method='global', gap_open=-7)
        assert list(a) == ['R']
        assert list(b) == ['K']
        a, b = aligner('R', 'AR', method='global', gap_open=-7)
        assert list(a) == ['-', 'R'], (a, b)
        assert list(b) == ['A', 'R']
        a, b = aligner('AR', 'R', method='global', gap_open=-7)
        assert list(a) == ['A', 'R']
        assert list(b) == ['-', 'R']
        a, b = aligner('AR', 'RA', method='global', gap_open=-7)
        assert list(a) == ['A', 'R']
        assert list(b) == ['R', 'A']
        a, b = aligner('AR', 'RA', method='global', gap_open=-3)
        assert list(a) == ['A', 'R', '-']
        assert list(b) == ['-', 'R', 'A']
        a, b = aligner('RAR', 'RR', method='global', gap_open=-3)
        assert list(b) == ['R', '-', 'R']
        assert list(a) == ['R', 'A', 'R']
        a, b = aligner('RAR', 'RR', method='global', gap_open=-10)
        assert list(b) == ['R', '-', 'R']
        assert list(a) == ['R', 'A', 'R']
        a, b = aligner('RAAR', 'RR', method='global', gap_open=-5)
        assert list(a) == ['R', 'A', 'A', 'R']
        assert list(b) == ['R', '-', '-', 'R']
        a, b = aligner('RLR', 'RER', method='global', gap_open=-9)
        assert list(a) == ['R', 'L', 'R']
        assert list(b) == ['R', 'E', 'R']
        a, b = aligner('RLR', 'RER', method='global', gap_open=-1)
        assert list(a) == ['R', 'L', '-', 'R']
        assert list(b) == ['R', '-', 'E', 'R']
        a, b = aligner('RLR', 'REER', method='global', gap_open=-1)
        assert list(a) == ['R', 'L', '-', '-', 'R']
        assert list(b) == ['R', '-', 'E', 'E', 'R']
        a, b = aligner('AGEBAM', 'AGEBAMAM', method='global', gap_open=-6)
        assert list(a) == ['A', 'G', 'E', 'B', 'A', 'M', '-', '-']
        assert list(b) == ['A', 'G', 'E', 'B', 'A', 'M', 'A', 'M']
        a, b = aligner('CPELIRKNCANTH',
                       'PREKRLICAN',
                       method='global',
                       gap_open=-0.5)
        assert list(a) == [
            'C', 'P', '-', 'E', '-', '-', 'L', 'I', 'R', 'K', 'N', 'C', 'A',
            'N', 'T', 'H'
        ]
        assert list(b) == [
            '-', 'P', 'R', 'E', 'K', 'R', 'L', 'I', '-', '-', '-', 'C', 'A',
            'N', '-', '-'
        ]
        a, b = aligner('CPEL', 'PREK', method='global', gap_open=-6.)
        # assert list(a) == ['C', 'P', 'E', 'L']
        # assert list(b) == ['P', 'R', 'E', 'K']
        a, b = aligner('CPEL', 'PREK', method='global', gap_open=-5)
        assert list(a) == ['C', 'P', '-', 'E', 'L']
        assert list(b) == ['-', 'P', 'R', 'E', 'K']
        a, b = aligner('RLRR', 'RRER', method='global', gap_open=-1)
        assert list(a) == ['R', 'L', 'R', '-', 'R']
        assert list(b) == ['R', '-', 'R', 'E', 'R']
        a, b = aligner('TAAT', 'TAATTC', method='global', matrix='DNA')
        assert list(a) == ['T', 'A', 'A', 'T', '-', '-']
        assert list(b) == ['T', 'A', 'A', 'T', 'T', 'C']
        # global_cfe
        a, b = aligner('TAAT', 'TAATTC', method='global_cfe', matrix='DNA')
        assert list(a) == ['T', 'A', 'A', 'T', '-', '-']
        assert list(b) == ['T', 'A', 'A', 'T', 'T', 'C']
        a, b = aligner('TCTAAT', 'TAAT', method='global_cfe', matrix='DNA')
        assert list(b) == ['-', '-', 'T', 'A', 'A', 'T']
        assert list(a) == ['T', 'C', 'T', 'A', 'A', 'T']
        # local
        a, b = aligner('TCTAAT', 'TAAT', method='local', matrix='DNA')
        assert list(b) == ['T', 'A', 'A', 'T']
        assert list(a) == ['T', 'A', 'A', 'T']
        a, b = aligner('TCTAAT', 'TAATCT', method='local', matrix='DNA')
        assert list(b) == ['T', 'A', 'A', 'T']
        assert list(a) == ['T', 'A', 'A', 'T']
        # glocal
        a, b = aligner('AAATAATAAA', 'TAAT', method='glocal', matrix='DNA')
        assert list(b) == ['T', 'A', 'A', 'T']
        assert list(a) == ['T', 'A', 'A', 'T']
        a, b = aligner('AAATAATAAA',
                       'TATAT',
                       method='glocal',
                       gap_open=-1,
                       matrix='DNA')
        assert list(a) == ['T', 'A', '-', 'A', 'T']
        assert list(b) == ['T', 'A', 'T', 'A', 'T']

        a, b = aligner('TATATAAA',
                       'CCTATAT',
                       method='glocal',
                       gap_open=-1,
                       matrix='DNA')
        assert (a, b) == ('--TATAT', 'CCTATAT'), (a, b)

        a, b = aligner('CCTATAT',
                       'TATATAAA',
                       method='glocal',
                       gap_open=-1,
                       matrix='DNA')
        assert list(b) == ['-', '-', 'T', 'A', 'T', 'A', 'T']
        assert list(a) == ['C', 'C', 'T', 'A', 'T', 'A', 'T']
        # old
        a, b = aligner('A', 'A', method='local')
        assert list(a) == ['A']
        assert list(b) == ['A']
        a, b = aligner('RA', 'AR', method='local')
        assert list(a) == ['R']
        assert list(b) == ['R']
        a, b = aligner('RRR', 'RR', method='local')
        assert list(a) == ['R', 'R']
        assert list(b) == ['R', 'R']
        a, b = aligner('WR', 'WRR', method='global')
        assert list(b) == ['W', 'R', 'R']
        assert list(a) == ['W', 'R', '-']
        a, b = aligner('PYNCHAN', 'YNCH', method='local')
        assert list(a) == ['Y', 'N', 'C', 'H']
        assert list(b) == ['Y', 'N', 'C', 'H']
        a, b = aligner('AIP', 'AP', method='local')
        assert list(a) == ['P']
        assert list(b) == ['P']
        a, b = aligner('AIP', 'AP', method='global')
        assert list(a) == ['A', 'I', 'P']
        assert list(b) == ['A', '-', 'P']
        a, b = aligner('PAA', 'PA', method='local')
        assert list(a) == ['P', 'A']
        assert list(b) == ['P', 'A']
        a, b = aligner('PAA', 'PA', method='global')
        assert list(a) == ['P', 'A', 'A']
        assert list(b) == ['P', 'A', '-']
        a, b = aligner('PAA', 'PA', method='global_cfe')
        assert list(a) == ['P', 'A', 'A']
        assert list(b) == ['P', 'A', '-']
        a, b = aligner('TAATTC',
                       'TAAT',
                       method='global',
                       matrix='DNA',
                       gap_open=-10,
                       gap_extend=-1)
        assert list(a) == ['T', 'A', 'A', 'T', 'T', 'C']
        assert list(b) == ['T', 'A', 'A', 'T', '-', '-']
    def test_all(self):
        # global
        a, b = aligner('WW','WEW', method= 'global')
        assert list(a) == ['W', '-', 'W']
        assert list(b) == ['W', 'E', 'W']
        a, b = aligner('WW','WEW', method= 'global', gap_open=-100)
        assert list(a) == ['W', '-', 'W']
        assert list(b) == ['W', 'E', 'W']
        a,b = aligner('A', 'A', method='global', gap_open=-7)
        assert list(a) == ['A']
        assert list(b) == ['A']
        a,b = aligner('R','K', method='global', gap_open=-7)
        assert list(a) == ['R']
        assert list(b) == ['K']
        a,b = aligner('R','AR', method='global', gap_open=-7)
        assert list(a) == ['-','R'], (a, b)
        assert list(b) == ['A','R']
        a,b = aligner('AR','R', method='global', gap_open=-7)
        assert list(a) == ['A','R']
        assert list(b) == ['-','R']
        a,b = aligner('AR','RA', method='global', gap_open=-7)
        assert list(a) == ['A','R']
        assert list(b) == ['R','A']
        a,b = aligner('AR','RA', method='global', gap_open=-3)
        assert list(a) == ['A', 'R', '-']
        assert list(b) == ['-', 'R', 'A']
        a,b = aligner('RAR','RR', method='global', gap_open=-3)
        assert list(b) == ['R', '-', 'R']
        assert list(a) == ['R', 'A', 'R']
        a,b = aligner('RAR','RR', method='global', gap_open=-10)
        assert list(b) == ['R', '-', 'R']
        assert list(a) == ['R', 'A', 'R']
        a,b = aligner('RAAR','RR', method='global', gap_open=-5)
        assert list(a) == ['R', 'A', 'A', 'R']
        assert list(b) == ['R', '-', '-', 'R']
        a,b = aligner('RLR','RER', method='global', gap_open=-9)
        assert list(a) == ['R', 'L', 'R']
        assert list(b) == ['R', 'E', 'R']
        a,b = aligner('RLR','RER', method='global', gap_open=-1)
        assert list(a) == ['R', 'L', '-', 'R']
        assert list(b) == ['R', '-', 'E', 'R']
        a,b = aligner('RLR','REER', method='global', gap_open=-1)
        assert list(a) == ['R', 'L', '-', '-', 'R']
        assert list(b) == ['R', '-', 'E', 'E', 'R']
        a, b = aligner('AGEBAM', 'AGEBAMAM', method='global', gap_open=-6)
        assert list(a) == ['A', 'G', 'E', 'B', 'A', 'M', '-', '-']
        assert list(b) == ['A', 'G', 'E', 'B', 'A', 'M', 'A', 'M']
        a, b= aligner('CPELIRKNCANTH', 'PREKRLICAN', method='global', gap_open=-0.5)
        assert list(a) == ['C', 'P', '-', 'E', '-', '-', 'L', 'I', 'R', 'K', 'N', 'C', 'A', 'N', 'T', 'H']
        assert list(b) == ['-', 'P', 'R', 'E', 'K', 'R', 'L', 'I', '-', '-', '-', 'C', 'A', 'N', '-', '-']
        a, b= aligner('CPEL', 'PREK', method='global', gap_open=-6.)
        # assert list(a) == ['C', 'P', 'E', 'L']
        # assert list(b) == ['P', 'R', 'E', 'K']
        a, b= aligner('CPEL', 'PREK', method='global', gap_open=-5)
        assert list(a) == ['C', 'P', '-','E', 'L']
        assert list(b) == ['-','P', 'R', 'E', 'K']
        a,b = aligner('RLRR','RRER', method='global', gap_open=-1)
        assert list(a) == ['R', 'L', 'R', '-', 'R']
        assert list(b) == ['R', '-', 'R', 'E', 'R']
        a, b = aligner('TAAT', 'TAATTC', method='global', matrix='DNA')
        assert list(a) == ['T', 'A', 'A', 'T', '-', '-']
        assert list(b) == ['T', 'A', 'A', 'T', 'T', 'C']
        # global_cfe
        a, b = aligner('TAAT', 'TAATTC', method='global_cfe', matrix='DNA')
        assert list(a) == ['T', 'A', 'A', 'T', '-', '-']
        assert list(b) == ['T', 'A', 'A', 'T', 'T', 'C']
        a, b = aligner('TCTAAT', 'TAAT', method='global_cfe', matrix='DNA')
        assert list(b) == ['-', '-','T', 'A', 'A', 'T' ]
        assert list(a) == ['T', 'C','T', 'A', 'A', 'T' ]
        # local
        a, b = aligner('TCTAAT', 'TAAT', method='local', matrix='DNA')
        assert list(b) == ['T', 'A', 'A', 'T' ]
        assert list(a) == ['T', 'A', 'A', 'T' ]
        a, b = aligner('TCTAAT', 'TAATCT', method='local', matrix='DNA')
        assert list(b) == ['T', 'A', 'A', 'T' ]
        assert list(a) == ['T', 'A', 'A', 'T' ]
        # glocal
        a, b = aligner('AAATAATAAA', 'TAAT', method='glocal', matrix='DNA')
        assert list(b) == ['T', 'A', 'A', 'T' ]
        assert list(a) == ['T', 'A', 'A', 'T' ]
        a, b = aligner('AAATAATAAA', 'TATAT', method='glocal', gap_open=-1, matrix='DNA')
        assert list(a) == ['T', 'A', '-', 'A', 'T' ]
        assert list(b) == ['T', 'A', 'T', 'A', 'T' ]

        a, b = aligner('TATATAAA', 'CCTATAT', method='glocal', gap_open=-1, matrix='DNA')
        assert (a, b) == ('--TATAT', 'CCTATAT' ), (a, b)

        a, b = aligner('CCTATAT', 'TATATAAA',method='glocal', gap_open=-1, matrix='DNA')
        assert list(b) == ['-', '-', 'T', 'A', 'T', 'A', 'T' ]
        assert list(a) == ['C', 'C', 'T', 'A', 'T', 'A', 'T' ]
        # old
        a, b = aligner('A', 'A', method ='local')
        assert list(a) == ['A']
        assert list(b) == ['A']
        a, b = aligner('RA', 'AR', method ='local')
        assert list(a) == ['R']
        assert list(b) == ['R']
        a, b = aligner('RRR', 'RR', method ='local')
        assert list(a) == ['R', 'R']
        assert list(b) == ['R', 'R']
        a, b = aligner('WR', 'WRR', method ='global')
        assert list(b) == ['W', 'R', 'R']
        assert list(a) == ['W', 'R', '-']
        a,b = aligner('PYNCHAN', 'YNCH', method='local')
        assert list(a) == ['Y', 'N', 'C', 'H']
        assert list(b) == ['Y', 'N', 'C', 'H']
        a, b = aligner('AIP', 'AP', method='local')
        assert list(a) == ['P']
        assert list(b) == ['P']
        a, b = aligner('AIP', 'AP', method='global')
        assert list(a) == ['A','I','P']
        assert list(b) == ['A','-','P']
        a, b = aligner('PAA', 'PA', method='local')
        assert list(a) == ['P','A']
        assert list(b) == ['P','A']
        a, b = aligner('PAA', 'PA', method='global')
        assert list(a) == ['P','A','A']
        assert list(b) == ['P','A','-']
        a, b = aligner('PAA', 'PA', method='global_cfe')
        assert list(a) == ['P','A','A']
        assert list(b) == ['P','A','-']
        a, b = aligner('TAATTC', 'TAAT', method='global', matrix='DNA', gap_open=-10, gap_extend=-1)
        assert list(a) == ['T', 'A', 'A', 'T', 'T', 'C']
        assert list(b) == ['T', 'A', 'A', 'T', '-', '-']
Beispiel #11
0
 def test_all(self):
     r = aligner("CELECANTH", "PELICAN", method=self.method)
     assert r == ('CELECANTH', 'PELICAN--'), r
     r = aligner("PELICAN", "CELECANTH", method=self.method)
     assert r == ('PELICAN--', 'CELECANTH')
Beispiel #12
0
 def test_it(self):
     r = aligner('AAAAAAAAAAAAACCTGCGCCCCAAAAAAAAAAAAAAAAAAAA',
                 'CCTGCGCACCCC',
                 method='global_cfe')
     assert r == ('AAAAAAAAAAAAACCTGCGC-CCCAAAAAAAAAAAAAAAAAAAA',
                  '-------------CCTGCGCACCCC-------------------')