Beispiel #1
0
def nw_bio_mat(seq1, seq2, cost_mat, key):
    aligner = PairwiseAligner(alphabet=key)
    matrix = {}
    for i in range(len(key)):
        for j in range(0, len(key)):
            matrix[(key[i], key[j])] = cost_mat[i * len(key) + j]
    aligner.substitution_matrix = substitution_matrices.Array(data=matrix)
    aligner.gap_score = cost_mat[len(key)**2]
    alignments = aligner.align(seq1, seq2)
    formated_alignments = []

    for i in range(len(alignments)):
        als = str(alignments[i]).split("\n")
        formated_alignments.append([als[0], als[2], int(alignments[i].score)])

    return formated_alignments
Beispiel #2
0
parser.add_argument('-f',help='name of the fasta file',required=True)
parser.add_argument('-s',help='name of subsitution matrix from BioPython',required=True,choices=MatrixInfo.available_matrices)
parser.add_argument('-go',help='gap opening score',type=float,required=True)
parser.add_argument('-ge',help='gap extension score',type=float,required=True)
args = parser.parse_args()

# Parse fasta file #
seqs = list(SeqIO.parse(args.f,'fasta'))

# Get substitution matrix 
substitution_matrix = getattr(MatrixInfo,args.s)

#Pairwise alignment 
aligner = PairwiseAligner()
aligner.open_gap_score, aligner.extend_gap_score  = args.go, args.ge
aligner.substitution_matrix = substitution_matrix

# Align sequences and build matrix
def similarity_matrix(seqs,n=len(seqs)):
  similarity_matrix = np.zeros([n,n])
	for i in range(len(seqs)):
    for j in range(len(seqs)):
			alignment = aligner.align(seqs[i].seq,seqs[j].seq)
			similarity_matrix[i][j] = alignment.score
	return similarity_matrix

m = similarity_matrix(seqs)

def print_matrix(m):
	for i in m:
		row = ""
                         args.sub_matrix)
    aligners['global'].substitution_matrix = sub_matrix
    if not args.open_gap_score:
        args.open_gap_score = -11
    if not args.extend_gap_score:
        args.extend_gap_score = -1
aligners['global'].open_gap_score = args.open_gap_score
aligners['global'].extend_gap_score = args.extend_gap_score
if args.sim_algo == 'smith-waterman':
    aligners['local'] = PairwiseAligner()
    aligners['local'].mode = 'local'
    if args.seq_type in ('dna', 'rna'):
        aligners['local'].match = args.match_score
        aligners['local'].mismatch = args.mismatch_score
    else:
        aligners['local'].substitution_matrix = sub_matrix
    aligners['local'].open_gap_score = args.open_gap_score
    aligners['local'].extend_gap_score = args.extend_gap_score

# Karlin-Altschul parameter values
if args.seq_type in ('dna', 'rna'):
    if ((args.match_score, args.mismatch_score) in KA_PARAMS['na']
            and (abs(args.open_gap_score), abs(args.extend_gap_score))
            in KA_PARAMS['na'][(args.match_score, args.mismatch_score)]):
        args.ka_gapped_l = KA_PARAMS['na'][(args.match_score,
                                            args.mismatch_score)][(
                                                abs(args.open_gap_score),
                                                abs(args.extend_gap_score))][0]
        args.ka_gapped_k = KA_PARAMS['na'][(args.match_score,
                                            args.mismatch_score)][(
                                                abs(args.open_gap_score),