Python PairwiseAligner.PairwiseAligner 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: Bio.Align

클래스/타입: PairwiseAligner

메소드/함수: PairwiseAligner

hotexamples.com에서의 예제들: 13

Python PairwiseAligner.PairwiseAligner - 13개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 Bio.Align.PairwiseAligner.PairwiseAligner에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

PairwiseAligner(13)

mode(8)

align(7)

match_score(6)

mismatch_score(6)

open_gap_score(4)

substitution_matrix(3)

extend_gap_score(3)

internal_open_gap_score(2)

gap_score(2)

internal_extend_gap_score(2)

mismatch(1)

match(1)

query_internal_gap_score(1)

query_left_gap_score(1)

query_right_gap_score(1)

score(1)

target_internal_gap_score(1)

target_left_gap_score(1)

target_right_gap_score(1)

예제 #1

파일 보기

파일: test_pairwise_alignment_map.py 프로젝트: svalqui/biopython

 def setUp(self):
     aligner = PairwiseAligner()
     aligner.internal_open_gap_score = -1
     aligner.internal_extend_gap_score = -0.0
     aligner.match_score = +1
     aligner.mismatch_score = -1
     aligner.mode = "local"
     self.aligner = aligner

예제 #2

파일 보기

파일: seq.py 프로젝트: iTaxoTools/TaxI2

def make_aligner() -> PairwiseAligner:
    aligner = PairwiseAligner(match_score=MATCH_SCORE,
                              mismatch_score=MISMATCH_SCORE,
                              end_open_gap_score=END_GAP_PENALTY,
                              end_extend_gap_score=END_GAP_EXTEND_PENALTY,
                              internal_open_gap_score=GAP_PENALTY,
                              internal_extend_gap_score=GAP_EXTEND_PENALTY)
    return aligner

예제 #3

파일 보기

def bioPython_default_local_aligner(a, b):
    aligner = PairwiseAligner()
    aligner.mode = 'local'
    aligner.match_score = 2
    aligner.mismatch_score = -3
    aligner.open_gap_score = -7
    aligner.extend_gap_score = -2

    sequence1 = SeqIO.read('./resource/fasta' + str(a) + '.fasta', 'fasta')
    sequence2 = SeqIO.read('./resource/fasta' + str(b) + '.fasta', 'fasta')
    alignments = aligner.align(sequence1.seq, sequence2.seq)

예제 #4

파일 보기

def nw_bio(seq1, seq2, cost_table):
    aligner = PairwiseAligner(alphabet=list(set(seq1 + seq2)))
    aligner.match_score = cost_table[0]
    aligner.mismatch_score = cost_table[1]
    aligner.gap_score = cost_table[2]
    alignments = aligner.align(seq1, seq2)
    formated_alignments = []

    for i in range(len(alignments)):
        als = str(alignments[i]).split("\n")
        formated_alignments.append([als[0], als[2], int(alignments[i].score)])

    return formated_alignments

예제 #5

파일 보기

파일: SharedFunctions.py 프로젝트: Swart-lab/bleties

def get_clusters_from_seqlist(seqlist, dist_threshold=0.05):
    """Cluster a list of sequences by a distance identity threshold

    Parameters
    ----------
    seqlist : list
        list of sequences as str
    dist_threshold : float
        Max distance value to retain, branches above this length in the 
        hierarchical clustering tree will be cut.

    Returns
    -------
    list
        list of lists - input sequences now grouped by cluster
    list
        list of int - cluster memberships of the originally input list
    """
    if len(seqlist) == 1:
        # Skip alignment if there is only one sequence
        return([seqlist], [0])
    else:
        aligner = PairwiseAligner()
        aligner.mode = "local"

        # Convert sequence list to distance matrix
        distmatrix = []
        for seq1 in seqlist:
            row = []
            for seq2 in seqlist:
                maxlen = max([len(seq1), len(seq2)])
                # Take percentage identity of pairwise alignment score (match base
                # +1, all other operations +0) over the longer sequence in pair
                idval = aligner.align(seq1, seq2).score / maxlen
                distval = 1 - idval  # convert to distance fraction
                row.append(distval)
            distmatrix.append(row)
        # Hierarchical clustering from the distance matrix
        htree = treecluster(data=None, distancematrix=array(distmatrix))
        # Find number of branches with length longer than threshold, and add 1
        # to get number of cuts
        cuts = 1 + len([htree[i].distance for i in range(len(htree))
                        if htree[i].distance > dist_threshold])
        clust_ids = list(htree.cut(cuts))
        clust_seqs_dict = defaultdict(list)
        for i in range(len(seqlist)):
            clust_seqs_dict[clust_ids[i]] += [seqlist[i]]
        # Convert dict of lists to list of lists
        clust_seqs = [clust_seqs_dict[i] for i in clust_seqs_dict]
        return(clust_seqs, clust_ids)

예제 #6

파일 보기

def nw_bio_mat(seq1, seq2, cost_mat, key):
    aligner = PairwiseAligner(alphabet=key)
    matrix = {}
    for i in range(len(key)):
        for j in range(0, len(key)):
            matrix[(key[i], key[j])] = cost_mat[i * len(key) + j]
    aligner.substitution_matrix = substitution_matrices.Array(data=matrix)
    aligner.gap_score = cost_mat[len(key)**2]
    alignments = aligner.align(seq1, seq2)
    formated_alignments = []

    for i in range(len(alignments)):
        als = str(alignments[i]).split("\n")
        formated_alignments.append([als[0], als[2], int(alignments[i].score)])

    return formated_alignments

예제 #7

파일 보기

def create_aligner() -> PairwiseAligner:
    """
    Creates an aligner that can be used to search for proteins.
    """
    aligner = PairwiseAligner(mode="local")

    # By default we want matches and penalize mismatches.
    aligner.mismatch_score = -1
    aligner.match_score = 1

    # left or right gaps shouldn't count negatively due to the local search.
    aligner.query_left_gap_score = 0
    aligner.query_right_gap_score = 0
    aligner.target_right_gap_score = 0
    aligner.target_left_gap_score = 0

    # Gaps in the middle should count negatively to narrow down the search space.
    aligner.query_internal_gap_score = -1
    aligner.target_internal_gap_score = -1

    return aligner

예제 #8

파일 보기

파일: tree_building.py 프로젝트: bels4/Classification_sandbox

    def pairwise(self, potential_parent):
        """
        Парное выравнивание последовательности листа на потенциальных родителей и сохранение скора

        Args:
            potential_parent (str): последовательность потенциального родителя
        """
        aligner = PairwiseAligner()

        # используем локальное выравнивание
        aligner.mode = 'local'

        # заменим дефолтные символы пропуска на другие
        seq = self.seq.replace('-', '')
        potential_parent_undersores = potential_parent.replace('-', '')

        # выравнивание
        score = aligner.score(seq, potential_parent_undersores)

        # сохраняем скор
        self.parent_scores[potential_parent] = score

예제 #9

파일 보기

파일: __init__.py 프로젝트: shuichiro-makigaki/agora

 def _remove_missing_res(self, record: SeqRecord, pdb: Path):
     structure = PDBParser().get_structure(record.id, pdb)
     sequence = ''.join([
         str(_.get_sequence())
         for _ in CaPPBuilder().build_peptides(structure, aa_only=False)
     ])
     path = PairwiseAligner().align(record.seq.ungap('-'), sequence)[0].path
     gaps = []
     for i, _ in enumerate(path[:-1]):
         if path[i][1] == path[i + 1][1]:
             gaps.append((path[i][0], path[i + 1][0]))
     gaps = list(reversed(gaps))
     mut = record.seq.tomutable()
     for gap in gaps:
         i = 0
         for k, res in enumerate(mut):
             if res == '-':
                 continue
             if gap[0] <= i < gap[1]:
                 mut[k] = '-'
             i += 1
     record.seq = mut.toseq()
     return record

예제 #10

파일 보기

파일: test_pairwise_alignment_map.py 프로젝트: svalqui/biopython

def perform_randomized_tests(n=1000):
    """Perform randomized tests and compare to pslMap.

    Run this function to perform 8 x n mappings for alignments of randomly
    generated sequences, get the alignment in PSL format, and compare the
    result to that of pslMap.
    """
    aligner = PairwiseAligner()
    aligner.internal_open_gap_score = -1
    aligner.internal_extend_gap_score = -0.0
    aligner.match_score = +1
    aligner.mismatch_score = -1
    aligner.mode = "local"
    for i in range(n):
        nBlocks1 = random.randint(1, 10)
        nBlocks2 = random.randint(1, 10)
        test_random(aligner, nBlocks1, nBlocks2, "+", "+")
        test_random(aligner, nBlocks1, nBlocks2, "+", "-")
        test_random(aligner, nBlocks1, nBlocks2, "-", "+")
        test_random(aligner, nBlocks1, nBlocks2, "-", "-")
        test_random_sequences("+", "+")
        test_random_sequences("+", "-")
        test_random_sequences("-", "+")
        test_random_sequences("-", "-")

예제 #11

파일 보기

                    type=str,
                    required=True)

parser.add_argument('-r',
                    '--reference',
                    help='Reference to be aligned to',
                    type=str,
                    required=True)

parser.add_argument('-n',
                    '--seq_name',
                    help='Name of the aligned sequence',
                    type=str,
                    required=True)

args = parser.parse_args()

aligner = PairwiseAligner()
aligner.mode = 'global'
aligner.match_score = 1
aligner.mismatch_score = 0
aligner.open_gap_score = -2
aligner.extend_gap_score = -1

ref = SeqIO.read(args.reference, "fasta")
ref.seq = str(ref.seq.upper()).replace('-', 'N')
cons = SeqIO.read(args.infile, "fasta")
aln = aligner.align(ref.seq, cons.seq)
with open(args.outfile, 'w') as out:
    print(">", args.seq_name, file=out)
    print(str(aln[0]).strip().split('\n')[2], file=out)

예제 #12

파일 보기

try:
    GAP_PENALTY = scores_dict['gap penalty']
    GAP_EXTEND_PENALTY = scores_dict['gap extend penalty']
    END_GAP_PENALTY = scores_dict['end gap penalty']
    END_GAP_EXTEND_PENALTY = scores_dict['end gap extend penalty']
    MATCH_SCORE = scores_dict['match score']
    MISMATCH_SCORE = scores_dict['mismatch score']
except KeyError as ex:
    raise ValueError(f"'{ex.args[0]}' is missing in data/scores.tab") from ex

score_matrix = np.empty((16, 16))

for i, j in itertools.product(range(0, 16), range(0, 16)):
    score_matrix[i, j] = MATCH_SCORE if i & j else MISMATCH_SCORE

aligner = PairwiseAligner(substitution_matrix=score_matrix, end_open_gap_score=END_GAP_PENALTY,
                          end_extend_gap_score=END_GAP_EXTEND_PENALTY, internal_open_gap_score=GAP_PENALTY, internal_extend_gap_score=GAP_EXTEND_PENALTY)

seq_read_dict = {
    '-':0,
    'A':1,
    'C':2,
    'G':4,
    'T':8,
    'R':5,
    'Y':10,
    'S':6,
    'W':9,
    'K':12,
    'M':3,
    'B':14,
    'D':13,

예제 #13

파일 보기

파일: fuzzy_kmer_seq_aligner.py 프로젝트: efreebern/fuzzy-kmer-seq-aligner

args = parser.parse_args()

for file in (args.query_seq, args.target_seq):
    if not path.isfile(file):
        parser.error("File %s doesn't exist" % file)

# alphabet
if args.seq_type == 'dna':
    args.seq_abc = IUPACAmbiguousDNA()
elif args.seq_type == 'rna':
    args.seq_abc = IUPACAmbiguousRNA()
else:
    args.seq_abc = ExtendedIUPACProtein()

# Aligners setup
aligners = {'global': PairwiseAligner(), 'local': None}
aligners['global'].mode = 'global'
if args.seq_type in ('dna', 'rna'):
    aligners['global'].match = args.match_score
    aligners['global'].mismatch = args.mismatch_score
    if not args.open_gap_score:
        args.open_gap_score = -5
    if not args.extend_gap_score:
        args.extend_gap_score = -2
else:
    sub_matrix = getattr(import_module('Bio.SubsMat.MatrixInfo'),
                         args.sub_matrix)
    aligners['global'].substitution_matrix = sub_matrix
    if not args.open_gap_score:
        args.open_gap_score = -11
    if not args.extend_gap_score: