def nw_bio_mat(seq1, seq2, cost_mat, key): aligner = PairwiseAligner(alphabet=key) matrix = {} for i in range(len(key)): for j in range(0, len(key)): matrix[(key[i], key[j])] = cost_mat[i * len(key) + j] aligner.substitution_matrix = substitution_matrices.Array(data=matrix) aligner.gap_score = cost_mat[len(key)**2] alignments = aligner.align(seq1, seq2) formated_alignments = [] for i in range(len(alignments)): als = str(alignments[i]).split("\n") formated_alignments.append([als[0], als[2], int(alignments[i].score)]) return formated_alignments
parser.add_argument('-f',help='name of the fasta file',required=True) parser.add_argument('-s',help='name of subsitution matrix from BioPython',required=True,choices=MatrixInfo.available_matrices) parser.add_argument('-go',help='gap opening score',type=float,required=True) parser.add_argument('-ge',help='gap extension score',type=float,required=True) args = parser.parse_args() # Parse fasta file # seqs = list(SeqIO.parse(args.f,'fasta')) # Get substitution matrix substitution_matrix = getattr(MatrixInfo,args.s) #Pairwise alignment aligner = PairwiseAligner() aligner.open_gap_score, aligner.extend_gap_score = args.go, args.ge aligner.substitution_matrix = substitution_matrix # Align sequences and build matrix def similarity_matrix(seqs,n=len(seqs)): similarity_matrix = np.zeros([n,n]) for i in range(len(seqs)): for j in range(len(seqs)): alignment = aligner.align(seqs[i].seq,seqs[j].seq) similarity_matrix[i][j] = alignment.score return similarity_matrix m = similarity_matrix(seqs) def print_matrix(m): for i in m: row = ""
args.sub_matrix) aligners['global'].substitution_matrix = sub_matrix if not args.open_gap_score: args.open_gap_score = -11 if not args.extend_gap_score: args.extend_gap_score = -1 aligners['global'].open_gap_score = args.open_gap_score aligners['global'].extend_gap_score = args.extend_gap_score if args.sim_algo == 'smith-waterman': aligners['local'] = PairwiseAligner() aligners['local'].mode = 'local' if args.seq_type in ('dna', 'rna'): aligners['local'].match = args.match_score aligners['local'].mismatch = args.mismatch_score else: aligners['local'].substitution_matrix = sub_matrix aligners['local'].open_gap_score = args.open_gap_score aligners['local'].extend_gap_score = args.extend_gap_score # Karlin-Altschul parameter values if args.seq_type in ('dna', 'rna'): if ((args.match_score, args.mismatch_score) in KA_PARAMS['na'] and (abs(args.open_gap_score), abs(args.extend_gap_score)) in KA_PARAMS['na'][(args.match_score, args.mismatch_score)]): args.ka_gapped_l = KA_PARAMS['na'][(args.match_score, args.mismatch_score)][( abs(args.open_gap_score), abs(args.extend_gap_score))][0] args.ka_gapped_k = KA_PARAMS['na'][(args.match_score, args.mismatch_score)][( abs(args.open_gap_score),