Esempio n. 1
0
def common_residues(chn_1, chn_2):
    """
    Truncates input chains to the common set of residues in chn_1 and chn_2 after sequence alignment.
    Returns both truncated chains.
    """
    # Apply default quick alignment
    alignment = align_sequences_default(seq_a=chn_1.as_sequence(), seq_b=chn_2.as_sequence())
    # Flags for which residues to use
    m_seq_1, m_seq_2 = alignment.exact_match_selections()
    # print(len(m_seq_1))
    # print(max(m_seq_1))
    # print(len(m_seq_2))
    # print(max(m_seq_2))
    # print(len(alignment.a))
    # print(len(alignment.b))
    assert len(m_seq_1) == len(m_seq_2), 'Something has gone wrong: these should be the same length!'
    # assert (max(m_seq_1)<len(alignment.a)) and (max(m_seq_2)<len(alignment.b)), 'Something has gone wrong: selecting residue index greater than chain length'
    # Truncate down to the identical selections
    out_c_1 = _truncate_by_idx(chn_1, m_seq_1)
    out_c_2 = _truncate_by_idx(chn_2, m_seq_2)
    return out_c_1, out_c_2
Esempio n. 2
0
 def test_align_sequences_default_mutation(self):
     ali = align_sequences_default(self.seq_a, self.seq_c)
     self.assertEqual(ali.a, self.seq_a)
     self.assertEqual(ali.b, self.seq_c)
     self.assertEqual(ali.calculate_sequence_identity(), 112 / 115.)
     self.assertEqual(ali.matches(), '|' * 37 + ' ' * 3 + '|' * 75)
Esempio n. 3
0
 def test_align_sequences_default_truncated(self):
     ali = align_sequences_default(self.seq_a, self.seq_b)
     self.assertEqual(ali.a, self.seq_a)
     self.assertEqual(ali.b, self.seq_b + '-' * 5)
     self.assertEqual(ali.calculate_sequence_identity(), 110 / 115.)
     self.assertEqual(ali.matches(), '|' * len(self.seq_b) + ' ' * 5)
Esempio n. 4
0
 def test_align_sequences_default_identical(self):
     ali = align_sequences_default(self.seq_a, self.seq_a)
     self.assertEqual(ali.a, self.seq_a)
     self.assertEqual(ali.b, self.seq_a)
     self.assertEqual(ali.calculate_sequence_identity(), 1.0)
     self.assertEqual(ali.matches(), '|' * len(self.seq_a))
Esempio n. 5
0
def align_structures_flexible(mov_hierarchy, ref_hierarchy, altlocs=['','A'], cutoff_radius=15, sequence_identity_threshold=0.95,
                              one_to_one_mapping=True, require_hierarchies_identical=True, verbose=False):
    """
    Perform a flexible alignment on two hierarchies. Alignments are performed on a chain-by-chain basis.
    Each chain of mov_hierarchy is aligned
    """

    # List of the alignments for each chain
    local_alignments = []
    # Trim to protein only
    mov_hierarchy = backbone(mov_hierarchy, copy=True)
    ref_hierarchy = backbone(ref_hierarchy, copy=True)
    # Check the structures only have one model
    try:
        mov_hierarchy.only_model()
        ref_hierarchy.only_model()
    except:
        raise Exception('Structures for alignment can only have one model!')
    # Check the structures are identical
    if require_hierarchies_identical:
        assert mov_hierarchy.is_similar_hierarchy(ref_hierarchy), 'Structures for alignment must have the same atoms (although atomic parameters can vary)'
    # Extract the chains from the structures
    c_mov = list(mov_hierarchy.chains())
    c_ref = list(ref_hierarchy.chains())
    # Match chains in the two structures (c_mov is first so the array is first indexed by the chains in mov)
    chn_sim = pairwise_chain_sequence_identity(c_mov, c_ref, seq_identity_threshold=None)
    # Create strings for use in case of errors/verbose printing
    s = 'Chain and sequences for aligment:'
    s += '\n{} chains in mov_hierarchy:'.format(len(c_mov))
    for c in c_mov: s += '\n\t{}: {}'.format(c.id, ''.join(c.as_sequence()))
    s += '\n{} chains in ref_hierarchy:'.format(len(c_ref))
    for c in c_ref: s += '\n\t{}: {}'.format(c.id, ''.join(c.as_sequence()))
    s += '\nPairwise chain-by-chain sequence identities:'
    s += '\n     REF'
    s += '\nMOV  {}'.format(' '.join(['{:4}'.format(c.id) for c in c_ref]))
    for i,i_c in enumerate(c_mov):
        s+= '\n{:3}  {}'.format(i_c.id, ' '.join(['{:4}'.format(v) for v in chn_sim[i]]))
    # Report to be returned in case of error
    report = Report(s, verbose=verbose)
    # Make the array boolean at the threshold value
    chn_sim = (chn_sim>sequence_identity_threshold).astype(int)
    # Report
    s = 'Pairwise chain-by-chain sequence identities (thresholded at {}%):'.format(100*sequence_identity_threshold)
    s += '\n     REF'
    s += '\nMOV  {}'.format(' '.join(['{:4}'.format(c.id) for c in c_ref]))
    for i,i_c in enumerate(c_mov):
        s+= '\n{:3}  {}'.format(i_c.id, ' '.join(['{:4}'.format(v) for v in chn_sim[i]]))
    report(s)
    # Iterate through and align the chains
    for i, chn_mov in enumerate(c_mov):
        # Skip if not protein
        if not chn_mov.is_protein(): continue
        # Find the first chain in the reference structure that's "alignable"
        try:
            idx_ref = list(chn_sim[i]).index(1)
            chn_ref = c_ref[idx_ref]
            report('Aligning chain {} of mov_hierarchy to chain {} in ref_hierarchy'.format(chn_mov.id, chn_ref.id))
            if one_to_one_mapping:
                report('Removing chain {} of ref_hierarchy from the pool of alignment chains (one_to_one_mapping is turned on)'.format(chn_ref.id))
                chn_sim[:,idx_ref] = 0
        except ValueError:
            raise Failure('Error raised during alignment.\n'
                          'Unable to align chain {} from mov_hierarchy: there is no suitable chain in ref_hierarchy.\n'\
                          'This might be fixed by setting one_to_one_mapping to False or decreasing sequence_identity_threshold.\n'.format(chn_mov.id)+
                          str(report))
            continue
        # Align the selected chains
        l_ali = align_chains_flexible(chn_mov=chn_mov, chn_ref=chn_ref, altlocs=altlocs, cutoff_radius=cutoff_radius)
        # Add aligned chains as the ID of the LocalAlignment object
        l_ali.id = 'chain {} to chain {}'.format(chn_mov.id, chn_ref.id)
        l_ali.mov_id = chn_mov.id
        l_ali.ref_id = chn_ref.id
        l_ali.seq_ali = align_sequences_default(seq_a=chn_ref.as_sequence(), seq_b=chn_mov.as_sequence())
        # Append to the alignments
        local_alignments.append(l_ali)
    # Print which chains were aligned to which
    report('\n'.join(['Alignment finished:']+['\t(mov) chain {} aligned to (ref) chain {}'.format(*l_ali.id) for l_ali in local_alignments]))
    # Combine all of the local alignments
    return MultipleLocalAlignment(local_alignments=local_alignments)