def bioPython_default_local_aligner(a, b): aligner = PairwiseAligner() aligner.mode = 'local' aligner.match_score = 2 aligner.mismatch_score = -3 aligner.open_gap_score = -7 aligner.extend_gap_score = -2 sequence1 = SeqIO.read('./resource/fasta' + str(a) + '.fasta', 'fasta') sequence2 = SeqIO.read('./resource/fasta' + str(b) + '.fasta', 'fasta') alignments = aligner.align(sequence1.seq, sequence2.seq)
type=str, required=True) parser.add_argument('-r', '--reference', help='Reference to be aligned to', type=str, required=True) parser.add_argument('-n', '--seq_name', help='Name of the aligned sequence', type=str, required=True) args = parser.parse_args() aligner = PairwiseAligner() aligner.mode = 'global' aligner.match_score = 1 aligner.mismatch_score = 0 aligner.open_gap_score = -2 aligner.extend_gap_score = -1 ref = SeqIO.read(args.reference, "fasta") ref.seq = str(ref.seq.upper()).replace('-', 'N') cons = SeqIO.read(args.infile, "fasta") aln = aligner.align(ref.seq, cons.seq) with open(args.outfile, 'w') as out: print(">", args.seq_name, file=out) print(str(aln[0]).strip().split('\n')[2], file=out)
parser = argparse.ArgumentParser(description='Computes a pairwise similarity matrix from a fasta file.') parser.add_argument('-f',help='name of the fasta file',required=True) parser.add_argument('-s',help='name of subsitution matrix from BioPython',required=True,choices=MatrixInfo.available_matrices) parser.add_argument('-go',help='gap opening score',type=float,required=True) parser.add_argument('-ge',help='gap extension score',type=float,required=True) args = parser.parse_args() # Parse fasta file # seqs = list(SeqIO.parse(args.f,'fasta')) # Get substitution matrix substitution_matrix = getattr(MatrixInfo,args.s) #Pairwise alignment aligner = PairwiseAligner() aligner.open_gap_score, aligner.extend_gap_score = args.go, args.ge aligner.substitution_matrix = substitution_matrix # Align sequences and build matrix def similarity_matrix(seqs,n=len(seqs)): similarity_matrix = np.zeros([n,n]) for i in range(len(seqs)): for j in range(len(seqs)): alignment = aligner.align(seqs[i].seq,seqs[j].seq) similarity_matrix[i][j] = alignment.score return similarity_matrix m = similarity_matrix(seqs) def print_matrix(m): for i in m:
aligners['global'].substitution_matrix = sub_matrix if not args.open_gap_score: args.open_gap_score = -11 if not args.extend_gap_score: args.extend_gap_score = -1 aligners['global'].open_gap_score = args.open_gap_score aligners['global'].extend_gap_score = args.extend_gap_score if args.sim_algo == 'smith-waterman': aligners['local'] = PairwiseAligner() aligners['local'].mode = 'local' if args.seq_type in ('dna', 'rna'): aligners['local'].match = args.match_score aligners['local'].mismatch = args.mismatch_score else: aligners['local'].substitution_matrix = sub_matrix aligners['local'].open_gap_score = args.open_gap_score aligners['local'].extend_gap_score = args.extend_gap_score # Karlin-Altschul parameter values if args.seq_type in ('dna', 'rna'): if ((args.match_score, args.mismatch_score) in KA_PARAMS['na'] and (abs(args.open_gap_score), abs(args.extend_gap_score)) in KA_PARAMS['na'][(args.match_score, args.mismatch_score)]): args.ka_gapped_l = KA_PARAMS['na'][(args.match_score, args.mismatch_score)][( abs(args.open_gap_score), abs(args.extend_gap_score))][0] args.ka_gapped_k = KA_PARAMS['na'][(args.match_score, args.mismatch_score)][( abs(args.open_gap_score), abs(args.extend_gap_score))][1]