def run_align_aligner(data): for s in data: aligner(s[0], s[1], method='global', gap_open=-1, gap_extend=-1, matrix=MY_MATRIX)
def test_score(self): s0 = "AGEBANAN" s1 = "ACEBAN" r = aligner(s0, s1, gap_extend=-1, gap_open=-2, matrix="BLOSUM62", method=self.method) assert r == ('AGEBANAN', 'ACEBAN--')
def gen_pic(self): for i, o_w_h in enumerate(self.info, 1): offset, width, height = o_w_h with open(os.path.join(self.path, str(i) + ".bmp"), "wb") as img_out_fh: head_info_obj = add_header.header_info(img_out_fh) head_info_obj.gen_header(width, height) raw = self.data[i-1] if width % 4 == 0: img_out_fh.write(raw) else: align_obj = align.aligner(raw, img_out_fh) align_obj.align_write_raw_data(width, height)
def build_alignments(structure_to_sequences, pickle_path="./pickled_intermediates"): ''' Given a map, align the canonical and structure-based sequences. Args: structure_to_sequences (dict, required. See build_sequence_dict). ''' alignments = {} for structure, sequences in structure_to_sequences.items(): print(f"Aligning {structure}...") fasta_seq = sequences["canonical"].replace("U", "C") structure_seq = sequences['structure'].replace("U", "C") alignment = aligner(fasta_seq, structure_seq, method="glocal")[0] # If there are more than 5 mismatches, there may be a real gap. # Try a lower gap penalty if alignment.n_mismatches > 5: try: alignment = aligner(fasta_seq, structure_seq, method="global", gap_open=-5, gap_extend=-1)[0] except Exception as e: print( "[NOTE] An exception is about to raise due to a failed global alignment." ) print( f"[NOTE] {structure} had more than 5 mismatches with a semi-global", f"alignment, but failed to generate any global alignments with gap_open = -5 and gap_extend = -1" ) raise alignments[structure] = alignment print(alignment) with open(f"{pickle_path}/alignments.pickle", "wb") as f: pickle.dump(alignments, f) return alignments
def test_all(self): r = aligner("CELECANTH", "PELICAN", method=self.method) assert r == ('CELECANTH', 'PELICAN--'), r r = aligner("PELICAN", "CELECANTH", method=self.method) assert r == ('PELICAN--', 'CELECANTH')
def test_it(self): r = aligner('AAAAAAAAAAAAACCTGCGCCCCAAAAAAAAAAAAAAAAAAAA', 'CCTGCGCACCCC', method='global_cfe') assert r == ('AAAAAAAAAAAAACCTGCGC-CCCAAAAAAAAAAAAAAAAAAAA', '-------------CCTGCGCACCCC-------------------')
#!/usr/bin/env python #-*- coding:utf8 -*- import add_header import align with open('./res/index.txt', 'r') as fh: lines = fh.readlines() for i, line in enumerate(lines, 1): path = './res/' filename = str(i) with open(path + filename + '.bmp', 'rb') as img_in_fh: width, height, offset = line.split() width, height, offset = int(width), int(height), int(offset) with open(filename + '.out.bmp', 'wb') as img_out_fh: head_info_obj = add_header.header_info(img_out_fh) head_info_obj.gen_header(width, height) if width % 4 == 0: raw = img_in_fh.read() img_out_fh.write(raw) else: align_obj = align.aligner(img_in_fh, img_out_fh) align_obj.align_write(width, height)
def test_all(self): # global a, b = aligner('WW', 'WEW', method='global') assert list(a) == ['W', '-', 'W'] assert list(b) == ['W', 'E', 'W'] a, b = aligner('WW', 'WEW', method='global', gap_open=-100) assert list(a) == ['W', '-', 'W'] assert list(b) == ['W', 'E', 'W'] a, b = aligner('A', 'A', method='global', gap_open=-7) assert list(a) == ['A'] assert list(b) == ['A'] a, b = aligner('R', 'K', method='global', gap_open=-7) assert list(a) == ['R'] assert list(b) == ['K'] a, b = aligner('R', 'AR', method='global', gap_open=-7) assert list(a) == ['-', 'R'], (a, b) assert list(b) == ['A', 'R'] a, b = aligner('AR', 'R', method='global', gap_open=-7) assert list(a) == ['A', 'R'] assert list(b) == ['-', 'R'] a, b = aligner('AR', 'RA', method='global', gap_open=-7) assert list(a) == ['A', 'R'] assert list(b) == ['R', 'A'] a, b = aligner('AR', 'RA', method='global', gap_open=-3) assert list(a) == ['A', 'R', '-'] assert list(b) == ['-', 'R', 'A'] a, b = aligner('RAR', 'RR', method='global', gap_open=-3) assert list(b) == ['R', '-', 'R'] assert list(a) == ['R', 'A', 'R'] a, b = aligner('RAR', 'RR', method='global', gap_open=-10) assert list(b) == ['R', '-', 'R'] assert list(a) == ['R', 'A', 'R'] a, b = aligner('RAAR', 'RR', method='global', gap_open=-5) assert list(a) == ['R', 'A', 'A', 'R'] assert list(b) == ['R', '-', '-', 'R'] a, b = aligner('RLR', 'RER', method='global', gap_open=-9) assert list(a) == ['R', 'L', 'R'] assert list(b) == ['R', 'E', 'R'] a, b = aligner('RLR', 'RER', method='global', gap_open=-1) assert list(a) == ['R', 'L', '-', 'R'] assert list(b) == ['R', '-', 'E', 'R'] a, b = aligner('RLR', 'REER', method='global', gap_open=-1) assert list(a) == ['R', 'L', '-', '-', 'R'] assert list(b) == ['R', '-', 'E', 'E', 'R'] a, b = aligner('AGEBAM', 'AGEBAMAM', method='global', gap_open=-6) assert list(a) == ['A', 'G', 'E', 'B', 'A', 'M', '-', '-'] assert list(b) == ['A', 'G', 'E', 'B', 'A', 'M', 'A', 'M'] a, b = aligner('CPELIRKNCANTH', 'PREKRLICAN', method='global', gap_open=-0.5) assert list(a) == [ 'C', 'P', '-', 'E', '-', '-', 'L', 'I', 'R', 'K', 'N', 'C', 'A', 'N', 'T', 'H' ] assert list(b) == [ '-', 'P', 'R', 'E', 'K', 'R', 'L', 'I', '-', '-', '-', 'C', 'A', 'N', '-', '-' ] a, b = aligner('CPEL', 'PREK', method='global', gap_open=-6.) # assert list(a) == ['C', 'P', 'E', 'L'] # assert list(b) == ['P', 'R', 'E', 'K'] a, b = aligner('CPEL', 'PREK', method='global', gap_open=-5) assert list(a) == ['C', 'P', '-', 'E', 'L'] assert list(b) == ['-', 'P', 'R', 'E', 'K'] a, b = aligner('RLRR', 'RRER', method='global', gap_open=-1) assert list(a) == ['R', 'L', 'R', '-', 'R'] assert list(b) == ['R', '-', 'R', 'E', 'R'] a, b = aligner('TAAT', 'TAATTC', method='global', matrix='DNA') assert list(a) == ['T', 'A', 'A', 'T', '-', '-'] assert list(b) == ['T', 'A', 'A', 'T', 'T', 'C'] # global_cfe a, b = aligner('TAAT', 'TAATTC', method='global_cfe', matrix='DNA') assert list(a) == ['T', 'A', 'A', 'T', '-', '-'] assert list(b) == ['T', 'A', 'A', 'T', 'T', 'C'] a, b = aligner('TCTAAT', 'TAAT', method='global_cfe', matrix='DNA') assert list(b) == ['-', '-', 'T', 'A', 'A', 'T'] assert list(a) == ['T', 'C', 'T', 'A', 'A', 'T'] # local a, b = aligner('TCTAAT', 'TAAT', method='local', matrix='DNA') assert list(b) == ['T', 'A', 'A', 'T'] assert list(a) == ['T', 'A', 'A', 'T'] a, b = aligner('TCTAAT', 'TAATCT', method='local', matrix='DNA') assert list(b) == ['T', 'A', 'A', 'T'] assert list(a) == ['T', 'A', 'A', 'T'] # glocal a, b = aligner('AAATAATAAA', 'TAAT', method='glocal', matrix='DNA') assert list(b) == ['T', 'A', 'A', 'T'] assert list(a) == ['T', 'A', 'A', 'T'] a, b = aligner('AAATAATAAA', 'TATAT', method='glocal', gap_open=-1, matrix='DNA') assert list(a) == ['T', 'A', '-', 'A', 'T'] assert list(b) == ['T', 'A', 'T', 'A', 'T'] a, b = aligner('TATATAAA', 'CCTATAT', method='glocal', gap_open=-1, matrix='DNA') assert (a, b) == ('--TATAT', 'CCTATAT'), (a, b) a, b = aligner('CCTATAT', 'TATATAAA', method='glocal', gap_open=-1, matrix='DNA') assert list(b) == ['-', '-', 'T', 'A', 'T', 'A', 'T'] assert list(a) == ['C', 'C', 'T', 'A', 'T', 'A', 'T'] # old a, b = aligner('A', 'A', method='local') assert list(a) == ['A'] assert list(b) == ['A'] a, b = aligner('RA', 'AR', method='local') assert list(a) == ['R'] assert list(b) == ['R'] a, b = aligner('RRR', 'RR', method='local') assert list(a) == ['R', 'R'] assert list(b) == ['R', 'R'] a, b = aligner('WR', 'WRR', method='global') assert list(b) == ['W', 'R', 'R'] assert list(a) == ['W', 'R', '-'] a, b = aligner('PYNCHAN', 'YNCH', method='local') assert list(a) == ['Y', 'N', 'C', 'H'] assert list(b) == ['Y', 'N', 'C', 'H'] a, b = aligner('AIP', 'AP', method='local') assert list(a) == ['P'] assert list(b) == ['P'] a, b = aligner('AIP', 'AP', method='global') assert list(a) == ['A', 'I', 'P'] assert list(b) == ['A', '-', 'P'] a, b = aligner('PAA', 'PA', method='local') assert list(a) == ['P', 'A'] assert list(b) == ['P', 'A'] a, b = aligner('PAA', 'PA', method='global') assert list(a) == ['P', 'A', 'A'] assert list(b) == ['P', 'A', '-'] a, b = aligner('PAA', 'PA', method='global_cfe') assert list(a) == ['P', 'A', 'A'] assert list(b) == ['P', 'A', '-'] a, b = aligner('TAATTC', 'TAAT', method='global', matrix='DNA', gap_open=-10, gap_extend=-1) assert list(a) == ['T', 'A', 'A', 'T', 'T', 'C'] assert list(b) == ['T', 'A', 'A', 'T', '-', '-']
def test_all(self): # global a, b = aligner('WW','WEW', method= 'global') assert list(a) == ['W', '-', 'W'] assert list(b) == ['W', 'E', 'W'] a, b = aligner('WW','WEW', method= 'global', gap_open=-100) assert list(a) == ['W', '-', 'W'] assert list(b) == ['W', 'E', 'W'] a,b = aligner('A', 'A', method='global', gap_open=-7) assert list(a) == ['A'] assert list(b) == ['A'] a,b = aligner('R','K', method='global', gap_open=-7) assert list(a) == ['R'] assert list(b) == ['K'] a,b = aligner('R','AR', method='global', gap_open=-7) assert list(a) == ['-','R'], (a, b) assert list(b) == ['A','R'] a,b = aligner('AR','R', method='global', gap_open=-7) assert list(a) == ['A','R'] assert list(b) == ['-','R'] a,b = aligner('AR','RA', method='global', gap_open=-7) assert list(a) == ['A','R'] assert list(b) == ['R','A'] a,b = aligner('AR','RA', method='global', gap_open=-3) assert list(a) == ['A', 'R', '-'] assert list(b) == ['-', 'R', 'A'] a,b = aligner('RAR','RR', method='global', gap_open=-3) assert list(b) == ['R', '-', 'R'] assert list(a) == ['R', 'A', 'R'] a,b = aligner('RAR','RR', method='global', gap_open=-10) assert list(b) == ['R', '-', 'R'] assert list(a) == ['R', 'A', 'R'] a,b = aligner('RAAR','RR', method='global', gap_open=-5) assert list(a) == ['R', 'A', 'A', 'R'] assert list(b) == ['R', '-', '-', 'R'] a,b = aligner('RLR','RER', method='global', gap_open=-9) assert list(a) == ['R', 'L', 'R'] assert list(b) == ['R', 'E', 'R'] a,b = aligner('RLR','RER', method='global', gap_open=-1) assert list(a) == ['R', 'L', '-', 'R'] assert list(b) == ['R', '-', 'E', 'R'] a,b = aligner('RLR','REER', method='global', gap_open=-1) assert list(a) == ['R', 'L', '-', '-', 'R'] assert list(b) == ['R', '-', 'E', 'E', 'R'] a, b = aligner('AGEBAM', 'AGEBAMAM', method='global', gap_open=-6) assert list(a) == ['A', 'G', 'E', 'B', 'A', 'M', '-', '-'] assert list(b) == ['A', 'G', 'E', 'B', 'A', 'M', 'A', 'M'] a, b= aligner('CPELIRKNCANTH', 'PREKRLICAN', method='global', gap_open=-0.5) assert list(a) == ['C', 'P', '-', 'E', '-', '-', 'L', 'I', 'R', 'K', 'N', 'C', 'A', 'N', 'T', 'H'] assert list(b) == ['-', 'P', 'R', 'E', 'K', 'R', 'L', 'I', '-', '-', '-', 'C', 'A', 'N', '-', '-'] a, b= aligner('CPEL', 'PREK', method='global', gap_open=-6.) # assert list(a) == ['C', 'P', 'E', 'L'] # assert list(b) == ['P', 'R', 'E', 'K'] a, b= aligner('CPEL', 'PREK', method='global', gap_open=-5) assert list(a) == ['C', 'P', '-','E', 'L'] assert list(b) == ['-','P', 'R', 'E', 'K'] a,b = aligner('RLRR','RRER', method='global', gap_open=-1) assert list(a) == ['R', 'L', 'R', '-', 'R'] assert list(b) == ['R', '-', 'R', 'E', 'R'] a, b = aligner('TAAT', 'TAATTC', method='global', matrix='DNA') assert list(a) == ['T', 'A', 'A', 'T', '-', '-'] assert list(b) == ['T', 'A', 'A', 'T', 'T', 'C'] # global_cfe a, b = aligner('TAAT', 'TAATTC', method='global_cfe', matrix='DNA') assert list(a) == ['T', 'A', 'A', 'T', '-', '-'] assert list(b) == ['T', 'A', 'A', 'T', 'T', 'C'] a, b = aligner('TCTAAT', 'TAAT', method='global_cfe', matrix='DNA') assert list(b) == ['-', '-','T', 'A', 'A', 'T' ] assert list(a) == ['T', 'C','T', 'A', 'A', 'T' ] # local a, b = aligner('TCTAAT', 'TAAT', method='local', matrix='DNA') assert list(b) == ['T', 'A', 'A', 'T' ] assert list(a) == ['T', 'A', 'A', 'T' ] a, b = aligner('TCTAAT', 'TAATCT', method='local', matrix='DNA') assert list(b) == ['T', 'A', 'A', 'T' ] assert list(a) == ['T', 'A', 'A', 'T' ] # glocal a, b = aligner('AAATAATAAA', 'TAAT', method='glocal', matrix='DNA') assert list(b) == ['T', 'A', 'A', 'T' ] assert list(a) == ['T', 'A', 'A', 'T' ] a, b = aligner('AAATAATAAA', 'TATAT', method='glocal', gap_open=-1, matrix='DNA') assert list(a) == ['T', 'A', '-', 'A', 'T' ] assert list(b) == ['T', 'A', 'T', 'A', 'T' ] a, b = aligner('TATATAAA', 'CCTATAT', method='glocal', gap_open=-1, matrix='DNA') assert (a, b) == ('--TATAT', 'CCTATAT' ), (a, b) a, b = aligner('CCTATAT', 'TATATAAA',method='glocal', gap_open=-1, matrix='DNA') assert list(b) == ['-', '-', 'T', 'A', 'T', 'A', 'T' ] assert list(a) == ['C', 'C', 'T', 'A', 'T', 'A', 'T' ] # old a, b = aligner('A', 'A', method ='local') assert list(a) == ['A'] assert list(b) == ['A'] a, b = aligner('RA', 'AR', method ='local') assert list(a) == ['R'] assert list(b) == ['R'] a, b = aligner('RRR', 'RR', method ='local') assert list(a) == ['R', 'R'] assert list(b) == ['R', 'R'] a, b = aligner('WR', 'WRR', method ='global') assert list(b) == ['W', 'R', 'R'] assert list(a) == ['W', 'R', '-'] a,b = aligner('PYNCHAN', 'YNCH', method='local') assert list(a) == ['Y', 'N', 'C', 'H'] assert list(b) == ['Y', 'N', 'C', 'H'] a, b = aligner('AIP', 'AP', method='local') assert list(a) == ['P'] assert list(b) == ['P'] a, b = aligner('AIP', 'AP', method='global') assert list(a) == ['A','I','P'] assert list(b) == ['A','-','P'] a, b = aligner('PAA', 'PA', method='local') assert list(a) == ['P','A'] assert list(b) == ['P','A'] a, b = aligner('PAA', 'PA', method='global') assert list(a) == ['P','A','A'] assert list(b) == ['P','A','-'] a, b = aligner('PAA', 'PA', method='global_cfe') assert list(a) == ['P','A','A'] assert list(b) == ['P','A','-'] a, b = aligner('TAATTC', 'TAAT', method='global', matrix='DNA', gap_open=-10, gap_extend=-1) assert list(a) == ['T', 'A', 'A', 'T', 'T', 'C'] assert list(b) == ['T', 'A', 'A', 'T', '-', '-']