Esempio n. 1
0
    def _align_clusters(config, one, two, cutoff=0.3):
        """Constructs a cluster alignment using the given configuration."""
        LOG.info("%s vs %s", one.name, two.name)

        aligner = Align.PairwiseAligner()
        matrix = config.pop("substitution_matrix", "BLOSUM62")
        if matrix not in substitution_matrices.load():
            LOG.warning(
                "Invalid substitution matrix (%s), defaulting to BLOSUM62",
                matrix)
            matrix = "BLOSUM62"
        aligner.substitution_matrix = substitution_matrices.load(matrix)
        for k, v in config.items():
            setattr(aligner, k, v)

        alignment = Alignment(query=one, target=two)
        for locusA, locusB in product(one.loci, two.loci):
            for geneA, geneB in product(locusA.genes, locusB.genes):
                if not geneA.translation or not geneB.translation:
                    continue
                aln = aligner.align(geneA.translation, geneB.translation)
                identity, similarity = compute_identity(aln[0])
                if identity < cutoff:
                    continue
                alignment.add_link(geneA, geneB, identity, similarity)
        return alignment
Esempio n. 2
0
    def _align_clusters(config, one, two, cutoff=0.3):
        """Constructs a cluster alignment using the given configuration."""
        LOG.info("%s vs %s", one.name, two.name)

        aligner = Align.PairwiseAligner()

        # Select the substitution matrix.
        # Defaults to BLOSUM62 when none or invalid matrix specified.
        matrix = config.pop("substitution_matrix", "BLOSUM62")
        if matrix not in substitution_matrices.load():
            LOG.warning(
                "Invalid substitution matrix '(%s)', defaulting to BLOSUM62",
                matrix)
            matrix = "BLOSUM62"
        aligner.substitution_matrix = substitution_matrices.load(matrix)

        # ValueError is thrown during sequence alignment when a letter
        # in the sequence is not found in the substitution matrix.
        # Extended IUPAC codes (BXZJUO) are added to mitigate this.
        extend_matrix_alphabet(aligner.substitution_matrix, codes='BXZJUO')

        for k, v in config.items():
            setattr(aligner, k, v)

        alignment = Alignment(query=one, target=two)
        for locusA, locusB in product(one.loci, two.loci):
            for geneA, geneB in product(locusA.genes, locusB.genes):
                if not geneA.translation or not geneB.translation:
                    continue
                aln = aligner.align(geneA.translation, geneB.translation)
                identity, similarity = compute_identity(aln[0])
                if identity < cutoff:
                    continue
                alignment.add_link(geneA, geneB, identity, similarity)
        return alignment
Esempio n. 3
0
def align_sequences_match_residues(mobile_seq,
                                   target_seq,
                                   seq_align_mat='BLOSUM80',
                                   gap_penalty=-1.0,
                                   verbosity=0):
    """ Align two aminoacid sequences using Bio.pairwise2.globalds and substution matrix seq_align_mat, return a tuple
    with two list of residues to be used in the 3D alignment (mobile, refence)

    :param str mobile_seq: sequence of mobile protein
    :param str target_seq: sequence of target protein
    :param str seq_align_mat: use this substution matrix from Bio.SubsMat.MatrixInfo
    :param float gap_penalty: gap penalty to the alignment; avoid values too low in module
    :param int verbosity: sets the verbosity level
    :rtype: tuple
    """
    try:
        from Bio.pairwise2 import align
        from Bio.Align import substitution_matrices
        seq_align_mat = substitution_matrices.load(seq_align_mat)
    except ImportError as error:
        os_util.local_print(
            'Failed to import Biopython with error: {}\nBiopython is necessary to sequence'
            'alignment. Sequences to be aligned:\nReference: {}\nMobile: {}'
            ''.format(error, target_seq, mobile_seq),
            msg_verbosity=os_util.verbosity_level.error,
            current_verbosity=verbosity)
        raise ImportError(error)
    except FileNotFoundError as error:
        available_matrices = substitution_matrices.load()
        os_util.local_print(
            'Failed to import substitution matrix {} with error: {}\nSubstitution matrix must be one '
            'of: {})'
            ''.format(seq_align_mat, error, available_matrices),
            msg_verbosity=os_util.verbosity_level.error,
            current_verbosity=verbosity)
        raise FileNotFoundError(error)
    else:
        align_result = align.globalds(target_seq, mobile_seq, seq_align_mat,
                                      gap_penalty, gap_penalty)[0]
        os_util.local_print(
            'This is the alignment result to be used in protein alignment:\n{}'
            ''.format(align_result),
            msg_verbosity=os_util.verbosity_level.info,
            current_verbosity=verbosity)
        ref_align_str = [
            True if res_j != '-' else False
            for res_i, res_j in zip(align_result[0], align_result[1])
            if res_i != '-'
        ]
        mob_align_str = [
            True if res_i != '-' else False
            for res_i, res_j in zip(align_result[0], align_result[1])
            if res_j != '-'
        ]

        return mob_align_str, ref_align_str
Esempio n. 4
0
def needle_alignment(s1, s2):
    '''
DESCRIPTION

    Does a Needleman-Wunsch Alignment of sequence s1 and s2 and
    returns a Bio.Align.MultipleSeqAlignment object.
    '''
    from Bio import pairwise2
    from Bio.Align import MultipleSeqAlignment
    from Bio.SeqRecord import SeqRecord
    from Bio.Seq import Seq

    try:
        from Bio.Align import substitution_matrices
    except ImportError:
        from Bio.SubsMat.MatrixInfo import blosum62
    else:
        blosum62 = substitution_matrices.load("BLOSUM62")

    def match_callback(c1, c2):
        return blosum62.get((c1, c2), 1 if c1 == c2 else -4)

    alns = pairwise2.align.globalcs(s1, s2,
            match_callback, -10., -.5,
            one_alignment_only=True)

    return MultipleSeqAlignment([
        SeqRecord(Seq(alns[0][0]), "s1"),
        SeqRecord(Seq(alns[0][1]), "s2"),
    ])
Esempio n. 5
0
def check_for_qrdr_mutations(hits_dict, contigs, qrdr, min_ident, min_cov):
    qrdr_loci = {
        'GyrA': [(83, 'S'), (87, 'D')],
        'ParC': [(80, 'S'), (84, 'E')]
    }

    gyra_ref = 'MSDLAREITPVNIEEELKNSYLDYAMSVIVGRALPDVRDGLKPVHRRVLYAMNVLGNDWN' \
               'KAYKKSARVVGDVIGKYHPHGDSAVYDTIVRMAQPFSLRYMLVDGQGNFGSIDGDSAAAM'
    parc_ref = 'MSDMAERLALHEFTENAYLNYSMYVIMDRALPFIGDGLKPVQRRIVYAMSELGLNASAKF' \
               'KKSARTVGDVLGKYHPHGDSACYEAMVLMAQPFSYRYPLVDGQGNWGAPDDPKSFAAMRY'

    blosum62 = substitution_matrices.load('BLOSUM62')

    snps = []
    hits = run_blastn(qrdr, contigs, None, min_ident)
    for hit in hits:
        _, coverage, translation = truncation_check(hit)
        if coverage > min_cov:
            if hit.gene_id == 'GyrA':
                alignments = pairwise2.align.globalds(gyra_ref, translation,
                                                      blosum62, -10, -0.5)
            elif hit.gene_id == 'ParC':
                alignments = pairwise2.align.globalds(parc_ref, translation,
                                                      blosum62, -10, -0.5)
            else:
                assert False
            bases_per_ref_pos = get_bases_per_ref_pos(alignments[0])
            loci = qrdr_loci[hit.gene_id]
            for pos, wt_base in loci:
                assembly_base = bases_per_ref_pos[pos]
                if pos in bases_per_ref_pos and assembly_base != wt_base and \
                        assembly_base != '-' and assembly_base != '.':
                    snps.append(hit.gene_id + '-' + str(pos) + assembly_base)
    if snps:
        hits_dict['Flq_mutations'] += snps
Esempio n. 6
0
def biopython_align(qseq, tseq, param, table=False, strict=False):

    # Query and target sequences.
    q = str(qseq.seq)
    t = str(tseq.seq)

    aligner = Align.PairwiseAligner()

    # Select local mode. Global, semiglobal are about scoring.
    if param.mode == const.LOCAL_ALIGN:
        aligner.mode = 'local'

    # Attempts to detect DNA vs peptide sequences.
    param.is_dna = all(x in "ATGC" for x in q[:100])

    # Default substituion matrix.
    if not param.matrix:
        param.matrix = 'NUC.4.4' if param.is_dna else 'BLOSUM62'

    # Apply substitution matrix.
    aligner.substitution_matrix = substitution_matrices.load(param.matrix)

    # Gap scoring.
    aligner.open_gap_score = -param.gap_open
    aligner.extend_gap_score = -param.gap_extend

    # End gap scoring.
    if strict:
        aligner.target_end_open_gap_score = -param.gap_open
        aligner.target_end_extend_gap_score = -param.gap_extend

        aligner.query_end_open_gap_score = -param.gap_open
        aligner.query_end_extend_gap_score = -param.gap_extend
    else:
        aligner.target_end_gap_score = 0.0
        aligner.query_end_gap_score = 0.0

    # Semiglobal will override strict mode.
    if param.mode == const.SEMIGLOBAL_ALIGN:
        aligner.target_end_gap_score = 0.0
        aligner.query_end_gap_score = 0.0

    # Biopython alignment target to query.
    alns = aligner.align(t, q)

    # Reformat alignments as a more detailed class.
    def builder(aln):
        rec = Alignment(qseq=qseq, tseq=tseq, aln=aln, param=param)
        return rec

    alns = map(builder, alns)

    # Format the aligners
    if table:
        print_func = print_tabular
    else:
        print_func = print_pairwise

    for index, aln in enumerate(alns):
        print_func(aln, param=param, index=index)
Esempio n. 7
0
def map_seqs(obj, ref, segid_obj=None, segid_ref=None, matrix='BLOSUM62'):
    """
    given two sequences obj and ref 
    return a mapping dict map_obj2ref_fullseq={(segid,0-based pos):(segid,0-based pos)}
    """
    aligner = Align.PairwiseAligner()
    aligner.substitution_matrix = substitution_matrices.load(matrix)

    best_score = 0
    best_aln = 'no'
    i = 0
    for a in aligner.align(str(obj), str(ref)):
        if (a.score > best_score):
            best_score = a.score
            best_aln = a
        i = i + 1
        if i > 100:  # we analyze only first 100 alignments
            break

    t2q = {}

    for i, j in zip(best_aln.aligned[0], best_aln.aligned[1]):
        for x, y in zip(range(*i), range(*j)):
            t2q[x] = y
    if segid_obj is None:
        return t2q
    else:
        return {(segid_obj, k): (segid_ref, v) for k, v in t2q.items()}
Esempio n. 8
0
def load_matrix(name: str) -> substitution_matrices.Array:
    """
    Loads a substitution matrix from the ones built in the Biopython library.

    :param name: The name of the matrix to load.
    :return: The chosen substitution matrix.
    """
    return substitution_matrices.load(name)
        def set_scores():
            # checks which alignment type
            if object['alignment_type'] == "local":
                aligner.mode = 'local'

            elif object['alignment_type'] != "global":
                raise ValueError(
                    f"Alignment type {object['alignment_type']} asked is not available or does not exist"
                )
            """
            Checks if a substitution matrix has been chosen
                if not it requires the match/mismatch score
            """
            if object['substitution_matrix']:
                from Bio.Align import substitution_matrices
                try:
                    aligner.substitution_matrix = substitution_matrices.load(
                        object['substitution_matrix'])
                except:
                    raise FileNotFoundError(
                        f"There's No {object['substitution_matrix']} matrix")

            elif not object['substitution_matrix']:
                aligner.match_score = object['match_score']
                aligner.mismatch_score = object['mismatch_score']

            else:
                raise ValueError(
                    f"Score schema must be 'LOCAL/GLOBAL' not {object['score-schema']}"
                )

            if object['score_schema'] == 'simple':
                aligner.gap_score = object['gap_score']
            elif object['score_schema'] == 'complex':
                aligner.target_internal_open_gap_score = object[
                    'target_internal_open_gap_score']
                aligner.target_internal_extend_gap_score = object[
                    'target_internal_extend_gap_score']
                aligner.target_left_open_gap_score = object[
                    'target_left_open_gap_score']
                aligner.target_left_extend_gap_score = object[
                    'target_left_extend_gap_score']
                aligner.target_right_open_gap_score = object[
                    'target_right_open_gap_score']
                aligner.target_right_extend_gap_score = object[
                    'target_right_extend_gap_score']
                aligner.query_internal_open_gap_score = object[
                    'query_internal_open_gap_score']
                aligner.query_internal_extend_gap_score = object[
                    'query_internal_extend_gap_score']
                aligner.query_left_open_gap_score = object[
                    'query_left_open_gap_score']
                aligner.query_left_extend_gap_score = object[
                    'query_left_extend_gap_score']
                aligner.query_right_open_gap_score = object[
                    'query_right_open_gap_score']
                aligner.query_right_extend_gap_score = object[
                    'query_right_extend_gap_score']
Esempio n. 10
0
def matrix_offer():
    """
    Provides a list of available substitution matrices.

    :return: Prints a numbered list of matrices.
    """
    mxs = substitution_matrices.load()
    for elem in range(len(mxs)):
        print(str((elem + 1)) + " " + mxs[elem])
Esempio n. 11
0
def scores_pairwise(ref: str, seq: str):
    """
    :return: scores for aligning each reference amino acid to each amino acid of our sequence `seq`
    """
    subst_mtx = substitution_matrices.load('BLOSUM50')
    res = np.empty([len(seq), len(ref)])
    for c_idx, c in enumerate(seq):
        res[c_idx] = [subst_mtx[(r, c)] for r in ref]
    return res
Esempio n. 12
0
def align(seqA, seqB, sigma=5):
    sigma = -abs(sigma)
    mat = substitution_matrices.load("PAM250")
    alignments = pairwise2.align.localds(seqA,
                                         seqB,
                                         match_dict=mat,
                                         open=sigma,
                                         extend=sigma)
    alignments.sort(key=lambda x: x.score, reverse=True)
    print(pairwise2.format_alignment(*alignments[0]))
Esempio n. 13
0
def BLOSUM45_score_dist(s1, s2):
    aligner = Align.PairwiseAligner()
    aligner.open_gap_score = -10
    aligner.substitution_matrix = substitution_matrices.load("BLOSUM45")
    aligner.mode = "global"
    score_s12 = aligner.score(s1, s2)
    score11 = aligner.score(s1, s1)
    score22 = aligner.score(s2, s2)
    distance = 1 - score_s12 / max(score11, score22)
    return distance
Esempio n. 14
0
def matrix_choice(num: int) -> str:
    """
    Returns a name of the matrix based on its number in the list
    from the matrix_offer function.

    :param num: The position of the matrix in the list from the matrix_offer function.
    :return: Name of the matrix at a given position.
    """
    mxs = substitution_matrices.load()
    return mxs[num - 1]
 def _get_protein_similarity(self,
                             seq1,
                             seq2,
                             matrix="BLOSUM62",
                             gap_open=-10,
                             gap_extend=-0.5):
     mat = substitution_matrices.load(name=matrix)
     alns = pairwise2.align.globalds(seq1, seq2, mat, gap_open, gap_extend)
     top_aln = alns[0]
     aln_human, aln_mouse, score, begin, end = top_aln
     return score / len(seq1)
Esempio n. 16
0
    def align_sequences(self, structA, structB):
        """
        Performs a global pairwise alignment between two sequences
        using the BLOSUM62 matrix and the Needleman-Wunsch algorithm
        as implemented in Biopython. Returns the alignment, the sequence
        identity and the residue mapping between both original sequences.
        """
        def _get_pdb_sequence(structure):
            """
            Retrieves the AA sequence from a PDB structure.
            """

            _aainfo = lambda r: (r.id[1], aa3to1.get(r.resname, "X"))
            seq = [_aainfo(r) for r in structure.get_residues() if is_aa(r)]
            return seq

        resseq_A = _get_pdb_sequence(structA)
        resseq_B = _get_pdb_sequence(structB)

        sequence_A = "".join([i[1] for i in resseq_A])
        sequence_B = "".join([i[1] for i in resseq_B])
        alns = pairwise2.align.globalds(
            sequence_A,
            sequence_B,
            substitution_matrices.load("BLOSUM62"),
            one_alignment_only=True,
            open=-10.0,
            extend=-0.5,
            penalize_end_gaps=(False, False),
        )

        best_aln = alns[0]
        aligned_A, aligned_B, score, begin, end = best_aln

        # Equivalent residue numbering
        # Relative to reference
        mapping = {}
        aa_i_A, aa_i_B = 0, 0
        for aln_i, (aa_aln_A, aa_aln_B) in enumerate(zip(aligned_A,
                                                         aligned_B)):
            if aa_aln_A == "-":
                if aa_aln_B != "-":
                    aa_i_B += 1
            elif aa_aln_B == "-":
                if aa_aln_A != "-":
                    aa_i_A += 1
            else:
                assert resseq_A[aa_i_A][1] == aa_aln_A
                assert resseq_B[aa_i_B][1] == aa_aln_B
                mapping[resseq_A[aa_i_A][0]] = resseq_B[aa_i_B][0]
                aa_i_A += 1
                aa_i_B += 1

        return mapping
Esempio n. 17
0
 def __load_matrix(self, mname):
     matrix = dict()
     orig_mname = self.mnames[mname]
     omatrix = substitution_matrices.load(orig_mname)
     for key, val in omatrix.items():
         if key[::-1] in omatrix and omatrix[key[::-1]] != val:
             raise KeyError((key, val, key[::-1], omatrix[key[::-1]]))
         matrix["".join(key)] = val
         matrix["".join(key[::-1])] = val
     for key in orig_mname, orig_mname.lower(), orig_mname.upper():
         self.__matrices[key] = matrix
Esempio n. 18
0
    def __init__(self):

        mnames = substitution_matrices.load()
        self.mnames = dict()

        for mname in mnames:
            self.mnames[mname] = mname
            self.mnames[mname.upper()] = mname
            self.mnames[mname.lower()] = mname

        self.__matrices = dict()
Esempio n. 19
0
def align(seqA, seqB, opening=11, extension=1):
    opening = -abs(opening)
    extension = -abs(extension)
    mat = substitution_matrices.load("BLOSUM62")
    alignments = pairwise2.align.globaldc(
        seqA,
        seqB,
        match_dict=mat,
        gap_A_fn=create_gap(opening, extension),
        gap_B_fn=create_gap(opening, extension))
    alignments.sort(key=lambda x: x.score, reverse=True)
    print(pairwise2.format_alignment(*alignments[0]))
Esempio n. 20
0
def alignUsingLinearSpace(v,w,
                          replace_score = substitution_matrices.load("BLOSUM62"),
                          indel_cost    = 5):
    
    def isRightOrDownRight(midEdge):
        return midEdge==RIGHT or midEdge==DOWNRIGHT
    
    def isDownOrDownRight(midEdge):
        return midEdge==DOWN or midEdge==DOWNRIGHT
    
    # MiddleNodeAndEdge
    #
    # An adapter which replaces MiddleNode and MiddleEdge in the pseudocode, and calls FindMiddleEdge
    def MiddleNodeAndEdge(top, bottom, left, right):
        ((i1,j1),(i2,j2)) = FindMiddleEdge(v[top:bottom],w[left:right],replace_score=replace_score,indel_cost=indel_cost)
        direction         = RIGHT if i1==i2 else DOWN if j1==j2 else DOWNRIGHT
        return j1,direction
    
    # LinearSpaceAlignment
    #
    # Find longest path between a substring of v[top] v[bottom-1]
    # and w[left] and w[right-1]
    #
    # Inputs: top
    #         bottom
    #         left
    #         right
    
    def  LinearSpaceAlignment(top, bottom, left, right):
        if left==right:
            return indel_cost*(bottom - top)
        if top==bottom:
            return indel_cost*(right-left)
        middle           = (left + right)//2
        midNode,midEdge  = MiddleNodeAndEdge(top, bottom, left, right)
  
        LinearSpaceAlignment(top, midNode, left, middle)
        # output midEdge
        if isRightOrDownRight(midEdge):
            middle += 1
        if isDownOrDownRight(midEdge):
            midNode+= 1
        LinearSpaceAlignment(midNode, bottom, middle, right) 
        
    RIGHT     = 0
    DOWN      = 1
    DOWNRIGHT = 2
    LinearSpaceAlignment(0,len(v)+1,0,len(w)+1)
Esempio n. 21
0
def align_with_blosum62(aa_seq1, aa_seq2):
    """
    Creates a protein alignment when given two amino acid sequences.  Tuple of top alignments is returned
    """

    # note: depending on the sequence homology it may make sense to use
    # another blosum matrix (or different gap_open gap_close)
    # matrix = matlist.align_with_blosum62
    matrix = substitution_matrices.load("BLOSUM62")
    gap_open = -12  # cost to open a gap
    gap_extend = -3  # cost to extend a gap

    alignments = pairwise2.align.globalds(aa_seq1, aa_seq2, matrix, gap_open,
                                          gap_extend)

    return (alignments[0])
def getBLOSUMDistanceMatrix(alignment):
    blosumMatrix = substitution_matrices.load("BLOSUM62")

    df = pd.DataFrame(columns=list(r.id for r in alignment),
                      index=list(r.id for r in alignment))

    for record1 in alignment:
        for record2 in alignment:
            score = 0
            for i in range(len(record1.seq)):
                aa1 = record1[i] if record1[i] != '-' else '*'
                aa2 = record2[i] if record2[i] != '-' else '*'
                score -= blosumMatrix[aa1][aa2]
            df[record1.id][record2.id] = score

    return df.apply(pd.to_numeric)
Esempio n. 23
0
def sequence_similarity(
    sequence1: str,
    sequence2: str,
    open_gap_penalty: int = -11,
    extend_gap_penalty: int = -1,
    substitution_matrix: str = "BLOSUM62",
) -> float:
    """
    Calculate the squence similarity of two amino acid sequences.

    Parameters
    ----------
    sequence1: str
        The first sequence.
    sequence2: str
        The second sequence.
    open_gap_penalty: int
        The penalty to open a gap.
    extend_gap_penalty: int
        The penalty to extend a gap.
    substitution_matrix: str
        The substitution matrix to use during alignment.
        Available matrices can be found via:
        >>> from Bio.Align import substitution_matrices
        >>> substitution_matrices.load()

    Returns
    -------
    score: float
        Similarity of sequences.
    """
    from Bio import pairwise2
    from Bio.Align import substitution_matrices

    substitution_matrix = substitution_matrices.load(substitution_matrix)
    # replace any characters unknown to the substitution matrix by *
    sequence1_clean = "".join([x if x in substitution_matrix.alphabet else "*" for x in sequence1])
    sequence2_clean = "".join([x if x in substitution_matrix.alphabet else "*" for x in sequence2])
    score = pairwise2.align.globalds(
        sequence1_clean,
        sequence2_clean,
        substitution_matrix,
        open_gap_penalty,
        extend_gap_penalty,
        score_only=True,
    )
    return score
Esempio n. 24
0
def main():
    q = Queue(connection=conn)
    mat_name = "BLOSUM62"
    matrix = substitution_matrices.load(mat_name)
    aligner = Align.PairwiseAligner()
    # aligner.substitution_matrix = matrix
    job = q.enqueue(global_align, args=(aligner, x, y, matrix))
    # alignments = global_align()
    count = 0
    while True:
        if job.result != None or count > 100000:
            break
        time.sleep(2)
        count += 1
        print(f'job.get_id(): {job.get_id()}, ' f'job.result:{job.result}')
    alignments = job.result
    print(f'alignments[0]:{alignments[0]}\n score: {alignments[0].score}')
Esempio n. 25
0
def tester():
    q = Queue(connection=conn2)
    mat_name = "BLOSUM62"
    matrix = substitution_matrices.load(mat_name)
    #aligner = Align.PairwiseAligner()
    # aligner.substitution_matrix = matrix
    job = q.enqueue(local_align, args=(x, y, matrix))
    # alignments = global_align()
    count = 0

    while True:
        if job.result is not None or count > 100:
            #print(f'result: {job.result}')
            break
        time.sleep(1)
        count += 1
        print(f'job.get_id(): {job.get_id()}\n')
        #f'job result: {job.result}')

    #only returning one
    alignment = job.result
    seqA, connector, seqB = get_protein_alignment(alignment)
    #todo: this is just returning seqA
    seqA_adj = make_single_seq(seqA, connector)
    seqB_adj = make_single_seq(seqB, connector)

    #assert len(seqA_adj) == len(seqB_adj) == len(connector)

    print(f'strA:\n{str(seqA)}\n' f'strB:\n{str(seqB)}')

    print(f'adjA:\n {seqA_adj}\n, adjB:\n {seqB_adj}')
    #print(type(alignment))
    #print(f'alignments[0]:{alignment}\n score: {alignment.score}')
    # practice pretty printing
    count = 0
    while True:
        print(f'line: {count}')
        print(seqA[count * 50:(count * 50) + 50])
        #print('\n')
        print(connector[count * 50:(count * 50) + 50])
        #print('\n')
        print(seqB[count * 50:(count * 50) + 50])
        #print('\n')
        if (count * 50) + 50 > len(connector):
            break
        count += 1
Esempio n. 26
0
    def __init__(self, model="identity", skip_letters=None):
        """Initialize with a distance model."""
        # Shim for backward compatibility (#491)
        if skip_letters:
            self.skip_letters = skip_letters
        elif model == "identity":
            self.skip_letters = ()
        else:
            self.skip_letters = ("-", "*")

        if model == "identity":
            self.scoring_matrix = None
        elif model in self.models:
            if model == "blastn":
                name = "NUC.4.4"
            else:
                name = model.upper()
            self.scoring_matrix = substitution_matrices.load(name)
        else:
            raise ValueError("Model not supported. Available models: " +
                             ", ".join(self.models))
Esempio n. 27
0
def gen_aligner():
    """Create the global sequence aligner

    the parameters are the same as EMBOSS Needle.
    """
    aligner = Align.PairwiseAligner()
    aligner.mode = "global"
    aligner.open_gap_score = -10
    aligner.extend_gap_score = -0.5

    # Tweak the matrix in BioPython 0.78+ to accomodate amino acid U
    # See https://github.com/biopython/biopython/issues/3205
    # Otherwise use the default matrix and replace U's with X's
    # aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
    sub_mat = substitution_matrices.load("BLOSUM62")
    sub_mat = sub_mat.select(sub_mat.alphabet + "U")
    # Make U score like X
    sub_mat[:, -1] = -4
    sub_mat[-1, :] = -4
    sub_mat[-1, -1] = 1
    aligner.substitution_matrix = sub_mat
    return aligner
def max_alignment_score(seq1, seq2):
    """Calculate & return max alignment score between seq1 and seq2"""

    #Create aligner object which will store alignment parameters
    aligner = Align.PairwiseAligner()

    #Set parameters
    aligner.mode = "global"
    blosum62 = substitution_matrices.load("BLOSUM62")
    aligner.substitution_matrix = blosum62

    #Gap open penalty & gap extension penalty both set to -5 as using linear gap penalty equal to 5
    aligner.open_gap_score = -5
    aligner.extend_gap_score = -5

    #Calculate & print optimal alignment
    alignments = aligner.align(seq1, seq2)
    print(alignments[0])

    #Calculate & print alignment score
    score = aligner.score(seq1, seq2)
    return score
Esempio n. 29
0
 def _local_align(self, record_a: SeqRecord, record_b: SeqRecord,
                  open_gap_score: int):
     aligner = Align.PairwiseAligner()
     aligner.mode = 'local'
     aligner.substitution_matrix = substitution_matrices.load('BLOSUM62')
     aligner.open_gap_score = open_gap_score
     aligner.extend_gap_score = -1
     aln = aligner.align(
         record_a.seq.ungap('-').upper(),
         record_b.seq.ungap('-').upper())[0]
     seq_a = Seq(
         str(aln).splitlines()[0].replace(' ', '-'), generic_protein)
     seq_b = Seq(
         str(aln).splitlines()[2].replace(' ', '-'), generic_protein)
     return MultipleSeqAlignment([
         SeqRecord(seq_a, id=record_a.id),
         SeqRecord(seq_b, id=record_b.id)
     ],
                                 annotations={
                                     'score': aln.score,
                                     'path': aln.path,
                                     'aligned': aln.aligned
                                 })
Esempio n. 30
0
def get_sequence_alignment(sequence_1, sequence_2, mode='global', open_gap_score=-11, extend_gap_score=-2):
    """Perform a sequence alignment using Needleman-Wunsch algorithm.

    :param sequence_1: First input sequence.
    :type sequence_1: str
    :param sequence_2: Second input sequence.
    :type sequence_2: str
    :param mode: Alignment mode, defaults to 'global'.
    :type mode: str, optional
    :param open_gap_score: Opening gap penalty, defaults to -11.
    :type open_gap_score: int, optional
    :param extend_gap_score: Extension gap penalty, defaults to -2.
    :type extend_gap_score: int, optional

    :return alignment_dict: Dictionary with the residue mapping between both input sequences.
    :rtype alignment_dict: dict [int, int]

    """
    aligner = Align.PairwiseAligner()
    aligner.mode = mode
    aligner.substitution_matrix = substitution_matrices.load("BLOSUM62")
    aligner.open_gap_score = open_gap_score
    aligner.extend_gap_score = extend_gap_score
    try:
        alignments = list(aligner.align(sequence_1, sequence_2))
    except ValueError as e:
        logging.warning('Needleman-Wunsch alignment failed due to wrong alphabet:\n{}'.format(e))
        return None
    alignments.sort(key=lambda x: x.score, reverse=True)
    aligned_indices = alignments[0].aligned
    alignment_dict = {}

    for query_chunk, target_chunk in zip(*aligned_indices):
        for query_index, target_index in zip(range(*query_chunk), range(*target_chunk)):
            alignment_dict[target_index] = query_index

    return alignment_dict