Beispiel #1
0
def simple_occupancy_groups(hierarchy,
                            include_single_conformer_groups=False,
                            verbose=False):
    """Given a selection, return the default occupancy groups"""

    occupancy_groups = []
    # Iterate through the default occupancy groups
    for g in hierarchy.occupancy_groups_simple():
        # Skip single groups
        if (len(g) == 1) and (len(g[0]) == 1):
            if verbose:
                print 'Not making simple restraints for single-atom groups:', ','.join(
                    [
                        Labeller.format(a)
                        for a in hierarchy.select(flex.size_t(g[0])).atoms()
                    ])
            continue
        if (len(g) == 1) and (not include_single_conformer_groups):
            if verbose:
                print 'Not making simple restraints for single-conformer groups:\n\t', '\n\t'.join(
                    [
                        Labeller.format(a) for a in hierarchy.select(
                            flex.size_t(g[0])).atom_groups()
                    ])
            continue
        selections = []
        for sel in g:
            ags = [
                GenericSelection.to_dict(ag)
                for ag in hierarchy.select(flex.size_t(sel)).atom_groups()
            ]
            selections.append(ags)
        occupancy_groups.append(selections)
    return occupancy_groups
Beispiel #2
0
def expand_alternate_conformations(hierarchy, in_place=False, verbose=False):
    """Convert all atoms to multiple conformers - full multi-conformer representation of the model"""
    if not in_place: hierarchy = hierarchy.deep_copy()
    # Get all of the altlocs that should be present for each atom
    full_altloc_set = sorted([a for a in hierarchy.altloc_indices() if a])
    # If not altlocs found, expand all to "A"
    if full_altloc_set == []:
        if verbose:
            print 'No altlocs in structure: expanding all residues to conformer "A"'
        full_altloc_set = ['A']
    if verbose:
        print 'Expanding all (appropriate) residues to have altlocs {}'.format(
            full_altloc_set)
        print '------------------>'
    # Iterate through and expand each residue group to have all conformers
    for chain in hierarchy.chains():
        for residue_group in chain.residue_groups():
            # If has conformers but has blank altloc atoms (add blank ag to all other ags)
            if residue_group.have_conformers(
            ) and residue_group.move_blank_altloc_atom_groups_to_front():
                if verbose:
                    print '{} - expanding to pure conformer (altlocs {})'.format(
                        Labeller.format(residue_group),
                        [a.altloc for a in residue_group.atom_groups()])
                # Convert all residue_groups to pure alt-conf
                create_pure_alt_conf_from_proper_alt_conf(
                    residue_group=residue_group, in_place=True)
            # Can go to next if all conformers are present for this residue group
            current_set = {a.altloc for a in residue_group.atom_groups()}
            if not current_set.symmetric_difference(full_altloc_set): continue
            # Only want to expand conformers for protein atoms (which should be present in all conformers)
            # or where the residue group is only present in one conformation (single conformer water)
            # but DO NOT want to expand waters in conformer A to A,B,C etc...
            if protein_amino_acid_set.intersection(
                    residue_group.unique_resnames()) or (
                        not residue_group.have_conformers()):
                if verbose:
                    print '{} - populating missing conformers (current altlocs {}, target set {})'.format(
                        Labeller.format(residue_group), current_set,
                        full_altloc_set)
                # Populate missing conformers (from the other conformers)
                populate_missing_conformers(residue_group=residue_group,
                                            full_altloc_set=full_altloc_set,
                                            in_place=True,
                                            verbose=verbose)
                assert sorted([a.altloc for a in residue_group.atom_groups()
                               ]) == full_altloc_set
                if verbose:
                    print '{} - updated conformer list: (current altlocs {}, target set {})'.format(
                        Labeller.format(residue_group),
                        [a.altloc for a in residue_group.atom_groups()],
                        full_altloc_set)
    if verbose: print '------------------>'
    return hierarchy
Beispiel #3
0
def set_conformer_occupancy(hierarchy, altlocs, occupancy, in_place=False, verbose=False):
    """Normalise the occupancies of a hierarchy so that the occupancies for a residue sum to 1.0"""
    if isinstance(altlocs, str): altlocs=list(altlocs)
    else: assert isinstance(altlocs, list), 'altlocs must be either str or list'
    if (not in_place): hierarchy = hierarchy.deep_copy()
    for ag in hierarchy.atom_groups():
        if ag.altloc in altlocs:
            if verbose: print '{} - setting occupancy to {}'.format(Labeller.format(ag), occupancy)
            ag.atoms().set_occ(flex.double(ag.atoms().size(), occupancy))
    return hierarchy
Beispiel #4
0
def increment_altlocs(hierarchy, offset=1, in_place=False, verbose=False):
    """Increment all altlocs in the structure by a certain number of letters"""
    if not in_place: hierarchy = hierarchy.deep_copy()
    cur_altlocs = [a for a in hierarchy.altloc_indices() if a]
    all_altlocs = iotbx.pdb.systematic_chain_ids()
    new_altlocs = dict([(a, all_altlocs[all_altlocs.index(a) + offset])
                        for a in cur_altlocs])
    if verbose:
        print '------------------>'
        print 'Updating altlocs:'
        for a in cur_altlocs:
            print '{} -> {}'.format(a, new_altlocs[a])
        print '------------------>'
    for atom_group in hierarchy.atom_groups():
        if verbose:
            print '{} - updating altloc: {} -> {}'.format(
                Labeller.format(atom_group), atom_group.altloc,
                new_altlocs[atom_group.altloc])
        assert atom_group.altloc != ''
        atom_group.altloc = new_altlocs[atom_group.altloc]
    return hierarchy
Beispiel #5
0
def create_levels_tab(parameterisation):
    p = parameterisation
    f = parameterisation.fitter
    fm = parameterisation.file_manager

    chain_ids = [c.id for c in p.blank_master_hierarchy().select(flex.bool(p.atom_mask.tolist()),copy_atoms=True).chains()]

    tab = {'id'         : 'levels',
           'short_name' : 'ADP Summary',
           'long_name'  : 'Level-by-level TLS parameterisation',
           'description': 'Parameteristaion composed of {} levels.'.format(len(f.levels)),
           'tabs'       : [],
          }
    # -------------------------------->
    # Create overview sub-tab
    # -------------------------------->
    overview_tab = {'id'            : tab['id']+'overview',
                    'active'        : True,
                    'short_name'    : 'Overview',
                    'long_name'     : 'Overview of the parameterised hierarchical ADP model',
                    'description'   : '',
                    'panels'        : [],
                   }
    tab['tabs'].append(overview_tab)
    # Split the panels up by chain
    for c_id in chain_ids:
        # Split up the chains with divider panels
        prof_f = fm.get_file('png-combined-profile-template').format(c_id)
        resd_f = fm.get_file('png-residual-profile-template').format(c_id)
        panel = {'id'             : '<h4>Levels for Chain {}</h4>'.format(c_id),
                 'width'          : 12,
                 'show'           : True,
                 'table'          : None,
                 'objects'        : [{'width':6, 'text': 'TLS-level components', 'path':png2base64src_maybe(prof_f, print_on_missing=DEBUG)},
                                     {'width':6, 'text': 'Residual component',   'path':png2base64src_maybe(resd_f, print_on_missing=DEBUG)}],
                }
        overview_tab['panels'].append(panel)
        # Add images to the overview tab for each TLS level
        for i_level, (level_num, level_lab, level) in enumerate(f):
            chain_image = fm.get_file('pml-level-chain-template').format(level_num, c_id)
            stack_image = fm.get_file('png-tls-profile-template').format(level_num, c_id)
            aniso_image = fm.get_file('png-tls-anisotropy-template').format(level_num, c_id)
            panel = {'id'             : 'Level {} of {} ({})'.format(level_num, len(f.levels),level_lab),
                     'width'          : 4,
                     'show'           : True,
                     'table'          : None,
                     'objects'        : [{'width':12, 'text':'{} atoms.'.format('X')},
                                         {'width':12, 'path':png2base64src_maybe(chain_image, print_on_missing=DEBUG)},
                                         {'width':12, 'path':png2base64src_maybe(stack_image, print_on_missing=DEBUG)},
                                         {'width':12, 'path':png2base64src_maybe(aniso_image, print_on_missing=DEBUG)}],
                    }
            overview_tab['panels'].append(panel)
        # Format residual level
        chain_image = fm.get_file('pml-residual-chain-template').format(c_id)
        stack_image = fm.get_file('png-residual-profile-template').format(c_id)
        aniso_image = fm.get_file('png-residual-anisotropy-template').format(c_id)
        panel = {'id'             : 'Final Level (residual)',
                 'width'          : 4,
                 'show'           : True,
                 'table'          : None,
                 'objects'        : [{'width':12, 'text':'{} atoms.'.format('X')},
                                     {'width':12, 'path':png2base64src_maybe(chain_image, print_on_missing=DEBUG)},
                                     {'width':12, 'path':png2base64src_maybe(stack_image, print_on_missing=DEBUG)},
                                     {'width':12, 'path':png2base64src_maybe(aniso_image, print_on_missing=DEBUG)}],
                  }
        overview_tab['panels'].append(panel)
    # -------------------------------->
    # Create tab for each level
    # -------------------------------->
    for i_level, (level_num, level_lab, level) in enumerate(f):
        # Create dictionary for this tab and add to tab_list
        level_tab = {'id'         : tab['id']+'lvl{}'.format(level_num),
                     'short_name' : 'Level {}'.format(level_num),
                     'long_name'  : 'Level {} ({})'.format(level_num, level_lab),
                     'description': 'Level {} of {}. '.format(level_num, len(f.levels))+\
                                    'Composed of {} groups'.format(level.n_groups()),
                     'panels'       : [],
              }
        tab['tabs'].append(level_tab)
        # Add overview at the top of the tab
        for c_id in chain_ids:
            partn_image = fm.get_file('pml-level-partition-template').format(level_num, c_id)
            chain_image = fm.get_file('pml-level-chain-template').format(level_num, c_id)
            stack_image = fm.get_file('png-tls-profile-template').format(level_num, c_id)
            aniso_image = fm.get_file('png-tls-anisotropy-template').format(level_num, c_id)
            panel = {'id'             : 'Chain {}'.format(c_id),
                     'width'          : 12,
                     'show'           : True,
                     'table'          : None,
                     'objects'        : [{'width':12, 'text':'{} atoms.'.format('X')},
                                         {'width':6, 'path':png2base64src_maybe(partn_image, print_on_missing=DEBUG)},
                                         {'width':6, 'path':png2base64src_maybe(chain_image, print_on_missing=DEBUG)},
                                         {'width':6, 'path':png2base64src_maybe(stack_image, print_on_missing=DEBUG)},
                                         {'width':6, 'path':png2base64src_maybe(aniso_image, print_on_missing=DEBUG)}],
                    }
            level_tab['panels'].append(panel)
        # Read in the TLS models and amplitudes for this level
        tls_models     = pandas.read_csv(fm.get_file('csv-tls-mdl-template').format(level_num)).set_index(['group','model']).drop('Unnamed: 0', axis=1, errors='ignore')
        tls_amplitudes = pandas.read_csv(fm.get_file('csv-tls-amp-template').format(level_num)).set_index(['group','model','cpt']).drop('Unnamed: 0', axis=1, errors='ignore')
        # Extract groups for each level
        for i_group, (group_num, sel, group_fitter) in enumerate(level):
            # Extract TLS values for this group
            tls_vals = [tls_models.loc[(group_num, i_mode)] for i_mode in xrange(p.params.fitting.tls_models_per_tls_group)]
            # Skip if no TLS values
            if numpy.abs(tls_vals).sum() == 0.0:
                continue
            # Get images and format values
            scl_image = fm.get_file('pml-level-scaled-template').format(level_num, group_num)
            adp_image = fm.get_file('pml-level-group-template').format(level_num, group_num)
            amp_image = fm.get_file('png-tls-amp-dist-template').format(level_num, group_num)
            tls_mdl_strs = [('Mode {}:<br>' + \
                             '<samp>\n' + \
                             'T: {T11:>9.3f}, {T22:>9.3f}, {T33:>9.3f}, {T12:>9.3f}, {T13:>9.3f}, {T23:>9.3f},<br>' + \
                             'L: {L11:>9.3f}, {L22:>9.3f}, {L33:>9.3f}, {L12:>9.3f}, {L13:>9.3f}, {L23:>9.3f},<br>' + \
                             'S: {S11:>9.3f}, {S12:>9.3f}, {S13:>9.3f}, {S21:>9.3f}, {S22:>9.3f}, {S23:>9.3f}, {S31:>9.3f}, {S32:>9.3f}, {S33:>9.3f}' + \
                             '\n</samp>'
                            ).format(i_mode+1, **mode_vals.round(3)).replace(' ','&nbsp') if mode_vals.any() else 'Zero-value TLS values for mode {}'.format(i_mode+1) for i_mode, mode_vals in enumerate(tls_vals)]
            # Create panel dictionary
            panel = {'id'    : 'Group {} - {}'.format(group_num, p.levels[i_level][i_group]),
                     'width' : 12, #max(4,12//level.n_groups()),
                     'table' : None,
                     'objects': [{'width':12, 'text':'<br>'.join(['Number of atoms: {}'.format(sum(sel))])},
                                 {'width':4,  'text':'Shape of disorder (arbitrary scale)',     'path': png2base64src_maybe(scl_image, print_on_missing=DEBUG)},
                                 {'width':4,  'text':'Average size over all datasets',          'path': png2base64src_maybe(adp_image, print_on_missing=DEBUG)},
                                 {'width':4,  'text':'Amplitude Distribution',                  'path': png2base64src_maybe(amp_image, print_on_missing=DEBUG)}] + \
                                [{'width':12,'text':s} for s in tls_mdl_strs],
                    }
            level_tab['panels'].append(panel)
        # Make  the first panel open
        if len(level_tab['panels']) > 0:
            level_tab['panels'][0]['show'] = True
    # -------------------------------->
    # Create tab for residual level
    # -------------------------------->
    residual_tab = {'id'         : 'lvlres',
                    'short_name' : 'Residual',
                    'long_name'  : 'Final Level  (residual)',
                    'description': '',
                    'panels'     : [],
                   }
    tab['tabs'].append(residual_tab)
    # Get selection for fitted atoms
    atom_sel = flex.bool(p.atom_mask.tolist())
    # Create row for each residue
    for i_chain, c in enumerate(p.blank_master_hierarchy().select(atom_sel,copy_atoms=True).chains()):
        panel = {'id'    : 'Residual components for chain {}'.format(c.id),
                 'width' : 12,
                 'table' : None,
                 'objects': [],
                }
        residual_tab['panels'].append(panel)
        for i_rg, rg in enumerate(c.residue_groups()):
            short_label = ShortLabeller.format(rg)
            long_label  = Labeller.format(rg)
            adp_image = fm.get_file('pml-residual-group-template').format(short_label)
            panel['objects'].append({'width':4, 'text':long_label, 'path': png2base64src_maybe(adp_image, print_on_missing=DEBUG)})
        # Make  the first panel open
        residual_tab['panels'][0]['show'] = True

    return tab
Beispiel #6
0
def transfer_residue_groups_from_other(acceptor_hierarchy,
                                       donor_hierarchy,
                                       in_place=False,
                                       verbose=False):
    """Transfer atom_groups from donor_hierarchy to matching residue_groups in acceptor_hierarchy, creating new chains and residue groups only where necessary"""
    if not in_place: acceptor_hierarchy = acceptor_hierarchy.deep_copy()
    # Sort all residues (by chain then id) for the acceptor hierarchy
    accept_model = acceptor_hierarchy.only_model()
    accept_dict = {c.id: {} for c in accept_model.chains()}
    [
        accept_dict.get(rg.parent().id).setdefault(rg.resid(), []).append(rg)
        for rg in accept_model.residue_groups()
    ]
    # Dictionary to link matching chains (allows multiple chain As to be linked uniquely to multiple chain As)
    link_dict = {}
    # Residues that don't have a matching partner in the old hierarchy
    tricky_rgs = []
    # Iterate through donor chains
    for donor_ch in donor_hierarchy.only_model().chains():
        # If chain not in hierarchy, simply copy across
        if accept_dict.get(donor_ch.id, None) is None:
            if verbose:
                print 'Transferring whole chain:    {}'.format(
                    Labeller.format(donor_ch))
            accept_model.append_chain(donor_ch.detached_copy())
            continue
        # Chain present, copy by residue_group
        for donor_rg in donor_ch.residue_groups():
            # Find equivalent residue groups in the other hierarchy
            accept_rg = accept_dict.get(donor_ch.id).get(donor_rg.resid(), [])
            if len(accept_rg) > 1:
                # Should only be one...
                raise Exception(
                    'More than one residue group in hierarchy with the same residue_id and chain_id'
                )
            elif len(accept_rg) == 1:
                accept_rg = accept_rg[0]
                # Record the links between these chains
                link_dict.setdefault(donor_rg.parent(), accept_rg.parent())
                # Transfer atom groups to this residue_group
                if verbose:
                    print 'Transferring atom groups:    {} > {}'.format(
                        Labeller.format(donor_rg), Labeller.format(accept_rg))
                for donor_ag in donor_rg.atom_groups():
                    accept_rg.append_atom_group(donor_ag.detached_copy())
            else:
                # Have the possibility of multiple chains with the same id, so at the moment, store for later
                tricky_rgs.append(donor_rg)
    # Transfer residues that have chain matches, but don't have residue matches in the acceptor structures
    for donor_rg in tricky_rgs:
        # Get chain from link_dict
        accept_ch = link_dict.get(donor_rg.parent().id, None)
        # If the chain isn't linked:
        if accept_ch is None:
            # If there's only one chain with the same ID, choose this one
            possible_chains = [
                c for c in accept_model.chains()
                if c.id == donor_rg.parent().id
            ]
            if len(possible_chains) == 1:
                accept_ch = possible_chains[0]
            else:
                raise Failure(
                    "Don't know how to transfer {} to the output model".format(
                        Labeller.format(donor_rg)))
        # Simply append to chain
        if verbose:
            print 'Transferring residue group:  {} > {}'.format(
                Labeller.format(donor_rg), Labeller.format(accept_ch))
        accept_ch.append_residue_group(donor_rg.detached_copy())

    return acceptor_hierarchy
Beispiel #7
0
def resolve_residue_id_clashes(fixed_hierarchy,
                               moving_hierarchy,
                               in_place=False,
                               verbose=False):
    """Move residues in mov_hierarchiy to new chains if they have the same resid as a residue in fixed_hierarchy but different resnames"""
    if (not in_place): moving_hierarchy = moving_hierarchy.deep_copy()
    # Sort all residues (by chain then id) for the fixed hierarchy - chains > residue_ids > residue_groups (objects)
    fixed_dict = {c.id: {} for c in fixed_hierarchy.only_model().chains()}
    [
        fixed_dict.get(rg.parent().id).setdefault(rg.resid(), []).append(rg)
        for rg in fixed_hierarchy.residue_groups()
    ]
    # Find the residues with clashing resids
    residues_to_update = []
    for rg_mov in moving_hierarchy.only_model().residue_groups():
        # Extract equivalent group for this residue
        rg_ref = fixed_dict.get(rg_mov.parent().id, {}).get(rg_mov.resid(), [])
        # Check that we've found only one (rather than silently erroring)
        if len(rg_ref) == 0:
            continue
        elif len(rg_ref) > 1:
            raise Exception(
                'More than one matching residue group in the output hierarchy?'
            )
        rg_ref = rg_ref[0]
        # Check to see if the residue is the same type as in the reference structure
        if map(str.strip,
               rg_ref.unique_resnames()) == map(str.strip,
                                                rg_mov.unique_resnames()):
            # Same residue -- that's fine
            # TODO allow this if the resnames match OR if it's a protein residue? (allows mutations/reactions?)
            continue
        else:
            # Will need to be added to different chain
            if verbose:
                print 'Different residues with same id - changing chains: {} != {}'.format(
                    list(rg_ref.unique_resnames()),
                    list(rg_mov.unique_resnames()))
            residues_to_update.append(rg_mov)
    # Nothing to do -- return
    if not residues_to_update:
        return moving_hierarchy
    # New chain to add clashing groups to
    new_chain = None
    old_chain_prev = None
    new_chain_ids = find_unused_chain_ids(
        hierarchies=[fixed_hierarchy, moving_hierarchy])
    # Go through and transfer the residue groups to new chains
    for rg_mov in residues_to_update:
        old_chain = rg_mov.parent()
        # See if there is a residue with this id already present in the new chain
        if (not new_chain) or (rg_mov.resid() in new_chain.get_residue_ids()
                               ) or (old_chain_prev.id != old_chain.id):
            new_chain = iotbx.pdb.hierarchy.chain(id=new_chain_ids.pop(0))
            old_chain.parent().append_chain(new_chain)
        if verbose:
            print '{} - moving to chain {}'.format(Labeller.format(rg_mov),
                                                   new_chain.id)
        # Remove from old chain and add to the new chain
        old_chain.remove_residue_group(rg_mov)
        new_chain.append_residue_group(rg_mov)
        # Keep track of old_chain of previous residue
        old_chain_prev = old_chain

    return moving_hierarchy
Beispiel #8
0
def sanitise_occupancies(hierarchy,
                         fixed_conformers=None,
                         min_occ=0.0,
                         max_occ=1.0,
                         in_place=False,
                         verbose=False):
    """Sanitise the occupancies of a hierarchy so that the occupancies for a residue sum to 1.0"""
    assert (min_occ >= 0.0) and (max_occ <= 1.0)
    if fixed_conformers is None: fixed_conformers = []
    if (not in_place): hierarchy = hierarchy.deep_copy()
    # Iterate through the output structure, and normalise the occupancies if necessary
    for rg in hierarchy.residue_groups():
        # Calculate occupancy of the residue group
        rg_occ = calculate_residue_group_occupancy(residue_group=rg)
        # If occupancy in range, continue
        if min_occ <= rg_occ <= max_occ:
            continue
        if verbose:
            print 'Occupancy of residue {} is {} -- sanitising'.format(
                Labeller.format(rg), rg_occ)
        # Extract main-conf and alt-conf ags
        main_ag, alt_ags = split_main_and_alt_conf_atom_groups(rg)
        # Sanitise main conf
        if main_ag is not None:
            if verbose:
                print '------------------>'
                print 'Sanitising main-conf atom group:\n\t{}'.format(
                    Labeller.format(main_ag))
                print 'Current occupancy: {}'.format(
                    max(main_ag.atoms().extract_occ()))
            sanitise_atom_group_occupancies_in_place(main_ag,
                                                     min_occ=min_occ,
                                                     max_occ=max_occ)
            if verbose:
                print 'New occupancy:     {}'.format(
                    max(main_ag.atoms().extract_occ()))
        # Sanitise alt confs
        if alt_ags is not None:
            # Get the groups to change and the groups to keep constant
            ag_chnge = [
                ag for ag in alt_ags if (ag.altloc not in fixed_conformers)
            ]
            ag_const = [
                ag for ag in alt_ags if (ag.altloc in fixed_conformers)
            ]
            # Calculate the total occupancy of the groups
            occ_chnge = sum([max(ag.atoms().extract_occ()) for ag in ag_chnge])
            occ_const = sum([max(ag.atoms().extract_occ()) for ag in ag_const])
            if occ_const > max_occ:
                raise Exception(
                    'Occupancy of fixed atom groups ({}) is already greater than maximum ({})'
                    .format(occ_const, max_occ))
            # Normalise the occupancies of the changing groups
            if verbose:
                print '------------------>'
                print 'Sanitising alt-conf atom groups:'
                print 'Fixed conformers:\n\t{}'.format('\n\t'.join(
                    [Labeller.format(ag) for ag in ag_const]))
                print 'Other conformers:\n\t{}'.format('\n\t'.join(
                    [Labeller.format(ag) for ag in ag_chnge]))
                print 'Total occupancy (fixed): {}'.format(occ_const)
                print 'Individual occupancies:  {}'.format(', '.join(
                    map(str,
                        [max(ag.atoms().extract_occ()) for ag in ag_const])))
                print 'Total occupancy (other): {}'.format(occ_chnge)
                print 'Individual occupancies:  {}'.format(', '.join(
                    map(str,
                        [max(ag.atoms().extract_occ()) for ag in ag_chnge])))
            sanitise_multiple_atom_group_occupancies_in_place(
                atom_groups=ag_chnge,
                min_occ=max(0.0, min_occ - occ_const),
                max_occ=min(1.0, max_occ - occ_const))
            if verbose:
                print 'New total occupancy (other): {}'.format(
                    sum([max(ag.atoms().extract_occ()) for ag in ag_chnge]))
                print 'Individual occupancies:  {}'.format(', '.join(
                    map(str,
                        [max(ag.atoms().extract_occ()) for ag in ag_chnge])))

    return hierarchy
Beispiel #9
0
def prune_redundant_alternate_conformations(hierarchy,
                                            required_altlocs=[],
                                            rmsd_cutoff=0.1,
                                            in_place=False,
                                            verbose=False):
    """Remove alternate conformers of residues if residues has conformers of required_altlocs and all conformers are within rmsd_cutoff"""
    if not in_place: hierarchy = hierarchy.deep_copy()
    required_altlocs = set(required_altlocs)
    for residue_group in hierarchy.residue_groups():
        # Skip if no conformers
        if not residue_group.have_conformers():
            continue
        # Get the blank and non-blank altloc atom_groups
        if residue_group.move_blank_altloc_atom_groups_to_front() != 0:
            main_ag = residue_group.atom_groups()[0]
            alt_ags = residue_group.atom_groups()[1:]
            assert main_ag.altloc == ''
            assert alt_ags != []
        else:
            main_ag = None
            alt_ags = residue_group.atom_groups()
        # Check no misplaced main conf
        assert '' not in [ag.altloc for ag in alt_ags]
        # Check if required altlocs are present (return if not)
        if required_altlocs.difference([ag.altloc for ag in alt_ags]):
            continue
        # Check if all pair of conformers are within rmsd cutoff
        prune = True
        for i, ag_1 in enumerate(alt_ags):
            for j, ag_2 in enumerate(alt_ags):
                if j <= i: continue
                d = calculate_paired_atom_rmsd(atoms_1=ag_1.atoms(),
                                               atoms_2=ag_2.atoms(),
                                               sort=True,
                                               truncate_to_common_set=False)
                if verbose:
                    print 'Residue {}, alt {} - alt {}: rmsd {}'.format(
                        Labeller.format(residue_group), i, j, d)
                if (d is None) or (d > rmsd_cutoff):
                    prune = False
                    break
            if prune is False: break
        if prune is False: continue
        # All rmsds below cutoff - prune!
        if verbose:
            print 'Pruning {}: altlocs {} -> [""]'.format(
                Labeller.format(residue_group), [ag.altloc for ag in alt_ags])
        if main_ag:
            # Merge one alt group with the main atom_group
            new_main_ag = alt_ags[0].detached_copy()
            new_main_ag.altloc = ''
            normalise_occupancies(atoms=new_main_ag.atoms(),
                                  max_occ=max(main_ag.atoms().extract_occ()))
            residue_group.merge_atom_groups(main_ag, new_main_ag)
        else:
            # Remove one atom_group and set altloc to ''
            new_main_ag = alt_ags.pop(0)
            new_main_ag.altloc = ''
            normalise_occupancies(atoms=new_main_ag.atoms(),
                                  max_occ=sum([
                                      max(ag.atoms().extract_occ())
                                      for ag in [new_main_ag] + alt_ags
                                  ]))
        # Remove all remaining alternate groups
        [residue_group.remove_atom_group(ag) for ag in alt_ags]
        assert len(residue_group.atom_groups()) == 1

    return hierarchy
Beispiel #10
0
def split_conformations(filename, params, log=None):

    if log is None: log = Log(verbose=True)

    # Read the pdb header - for writing later...
    header_contents = get_pdb_header(filename)

    # Read in and validate the input file
    ens_obj = strip_pdb_to_input(filename, remove_ter=True)
    ens_obj.hierarchy.only_model()

    # Create a new copy of the structures
    new_ens = ens_obj.hierarchy.deep_copy()

    # Extract conformers from the structure as set
    all_confs = set(ens_obj.hierarchy.altloc_indices())
    all_confs.discard('')

    if params.options.mode == 'by_residue_name':
        sel_resnames = params.options.by_residue_name.resname.split(',')
        sel_confs = [
            ag.altloc for ag in new_ens.atom_groups()
            if (ag.resname in sel_resnames)
        ]
        # List of conformers to output for each structure, and suffixes
        out_confs = map(sorted, [
            all_confs.intersection(sel_confs),
            all_confs.difference(sel_confs)
        ])
        out_suffs = [
            params.options.by_residue_name.selected_name,
            params.options.by_residue_name.unselected_name
        ]
    elif params.options.mode == 'by_conformer':
        sel_resnames = None
        sel_confs = None
        # One structure for each conformer
        out_confs = [[c] for c in sorted(all_confs)]
        out_suffs = [''.join(c) for c in out_confs]
    elif params.options.mode == 'by_conformer_group':
        sel_resnames = None
        sel_confs = None
        # One structure for each set of supplied conformer sets
        out_confs = [
            s.split(',') for s in params.options.by_conformer_group.conformers
        ]
        out_suffs = [''.join(c) for c in out_confs]
    else:
        raise Exception('Invalid selection for options.mode: {}'.format(
            params.options.mode))

    assert len(out_confs) == len(out_suffs), '{} not same length as {}'.format(
        str(out_confs), str(out_suffs))

    for confs, suffix in zip(out_confs, out_suffs):
        log('Conformers {} -> {}'.format(str(confs), suffix))

    # Create paths from the suffixes
    out_paths = [
        '.'.join([
            os.path.splitext(filename)[0], params.output.suffix_prefix, suff,
            'pdb'
        ]) for suff in out_suffs
    ]

    log.subheading('Processing {}'.format(filename[-70:]))

    for this_confs, this_path in zip(out_confs, out_paths):

        if not this_confs: continue

        # Select atoms to keep - no altloc, or altloc in selection
        sel_string = ' or '.join(
            ['altid " "'] + ['altid "{}"'.format(alt) for alt in this_confs])
        # Extract selection from the hierarchy
        sel_hiery = new_ens.select(
            new_ens.atom_selection_cache().selection(sel_string),
            copy_atoms=True)

        log.bar(True, False)
        log('Outputting conformer(s) {} to {}'.format(''.join(this_confs),
                                                      this_path))
        log.bar()
        log('Keeping ANY atom with conformer id: {}'.format(
            ' or '.join(['" "'] + this_confs)))
        log('Selection: \n\t' + sel_string)

        if params.options.pruning.prune_duplicates:
            log.bar()
            log('Pruning redundant conformers')
            # Remove an alternate conformers than are duplicated after selection
            prune_redundant_alternate_conformations(
                hierarchy=sel_hiery,
                required_altlocs=[a for a in sel_hiery.altloc_indices() if a],
                rmsd_cutoff=params.options.pruning.rmsd_cutoff,
                in_place=True,
                verbose=params.settings.verbose)

        if params.options.reset_altlocs:
            log.bar()
            # Change the altlocs so that they start from "A"
            if len(this_confs) == 1:
                conf_hash = {this_confs[0]: ' '}
            else:
                conf_hash = dict(
                    zip(this_confs, iotbx.pdb.systematic_chain_ids()))
            log('Resetting structure altlocs:')
            for k in sorted(conf_hash.keys()):
                log('\t{} -> "{}"'.format(k, conf_hash[k]))
            if params.settings.verbose: log.bar()
            for ag in sel_hiery.atom_groups():
                if ag.altloc in this_confs:
                    if params.settings.verbose:
                        log('{} -> alt {}'.format(Labeller.format(ag),
                                                  conf_hash[ag.altloc]))
                    ag.altloc = conf_hash[ag.altloc]

        if params.options.reset_occupancies:
            log.bar()
            log('Resetting output occupancies (maximum occupancy of 1.0, etc.)'
                )
            # Divide through by the smallest occupancy of any complete residues groups with occupancies of less than one
            rg_occs = [
                calculate_residue_group_occupancy(rg) for rg in
                residue_groups_with_complete_set_of_conformers(sel_hiery)
            ]
            non_uni = [v for v in numpy.unique(rg_occs) if 0.0 < v < 1.0]
            if non_uni:
                div_occ = min(non_uni)
                log('Dividing all occupancies by {}'.format(div_occ))
                sel_hiery.atoms().set_occ(sel_hiery.atoms().extract_occ() /
                                          div_occ)
            # Normalise the occupancies of any residue groups with more than unitary occupancy
            log('Fixing any residues that have greater than unitary occupancy')
            sanitise_occupancies(hierarchy=sel_hiery,
                                 min_occ=0.0,
                                 max_occ=1.0,
                                 in_place=True,
                                 verbose=params.settings.verbose)
            # Perform checks
            max_occ = max([
                calculate_residue_group_occupancy(rg)
                for rg in sel_hiery.residue_groups()
            ])
            log('Maximum occupancy of output structue: {}'.format(max_occ))
            assert max_occ >= 0.0, 'maximum occupancy is less than 0.0?!?!'
            assert max_occ <= 1.0, 'maximum occupancy is greater than 1.0?!?!'

        log.bar()
        log('Writing structure: {}'.format(this_path))
        log.bar(False, True)

        # Write header contents
        with open(this_path, 'w') as fh:
            fh.write(header_contents)
        # Write output file
        sel_hiery.write_pdb_file(this_path, open_append=True)

    return out_paths