Exemple #1
0
def find_atoms_around_alternate_conformers(hierarchy,
                                           altlocs=None,
                                           dist_cutoff=4.2):
    """For all alternate conformers in (or subset altlocs, if given) return atom pairs to surrounding atoms"""

    # Remove hydrograns and extract atoms
    hierarchy = non_h(hierarchy)
    h_atoms = hierarchy.atoms()

    # Get all the altlocs in the structure
    all_altlocs = list(hierarchy.altloc_indices())
    if not altlocs: altlocs = all_altlocs
    # Get the indices of each conformer in the structure
    conf_indices = hierarchy.get_conformer_indices()
    # Get selection for blank altloc atoms
    i_alt_blank = all_altlocs.index('')
    alt_blank_sel = (conf_indices == i_alt_blank).iselection()

    # Output list and squared distance cutoff
    atom_pairs = []
    dist_cut_sq = dist_cutoff**2

    # Iterate through altlocs
    for alt in altlocs:
        if alt == '':
            continue
        elif alt not in all_altlocs:
            continue
        # Get a selection for atoms with this altloc
        i_alt = all_altlocs.index(alt)
        alt_sel = (conf_indices == i_alt).iselection()
        # Combine with the blank altloc selection
        comb_sel = flex.size_t(sorted(alt_sel.concatenate(alt_blank_sel)))
        # These should be mutually exclusive sets...
        assert len(comb_sel) == len(alt_sel) + len(alt_blank_sel)
        # Extract all atoms of this conformer
        alt_ats = h_atoms.select(alt_sel)
        comb_ats = h_atoms.select(comb_sel)

        # Iterate through the atoms in this conformation
        for atom in alt_ats:
            # Find all atoms within dist_cutoff
            at_dists_sq = (comb_ats.extract_xyz() - atom.xyz).dot()
            at_dists_sel = (at_dists_sq < dist_cut_sq).iselection()
            # Iterate through nearby atoms and append
            for atom_2 in comb_ats.select(at_dists_sel):
                atom_pairs.append((atom.fetch_labels(), atom_2.fetch_labels(),
                                   round(float(atom.distance(atom_2)), 3)))

    return atom_pairs
Exemple #2
0
 def calculate_residue_mean_normalised_b_factors(self):
     """Extract Mean-B values in each of the structures"""
     # ----------------------------------------------------->
     self.tables.residue_observations.loc[:, :, 'mean-bz-all'] = numpy.nan
     self.tables.residue_observations.loc[:, :,
                                          'mean-bz-backbone'] = numpy.nan
     self.tables.residue_observations.loc[:, :,
                                          'mean-bz-sidechain'] = numpy.nan
     # ----------------------------------------------------->
     print('------------------------------------>')
     for lab_h, pdb_h in zip(self.structures.labels,
                             self.structures.hierarchies):
         print('Calculating Local Normalised Mean B-Factors: {}'.format(
             lab_h))
         # Normalise the b-factors of the structure
         pdb_h_z = normalise_b_factors_to_z_scores(pdb_hierarchy=pdb_h,
                                                   method='protein')
         cache = pdb_h_z.atom_selection_cache()
         # Non-Hydrogens
         for c in conformers_via_residue_groups(
                 s_select.non_h(hierarchy=pdb_h_z, cache=cache)):
             res_lab = make_label(c)
             res_mean_b = flex.mean_weighted(c.atoms().extract_b(),
                                             c.atoms().extract_occ())
             self.tables.residue_observations.set_value(
                 res_lab, lab_h, 'mean-bz-all', res_mean_b)
         # Backbone Atoms
         for c in conformers_via_residue_groups(
                 s_select.backbone(hierarchy=pdb_h_z, cache=cache)):
             res_lab = make_label(c)
             res_mean_b = flex.mean_weighted(c.atoms().extract_b(),
                                             c.atoms().extract_occ())
             self.tables.residue_observations.set_value(
                 res_lab, lab_h, 'mean-bz-backbone', res_mean_b)
         # Sidechain Atoms
         for c in conformers_via_residue_groups(
                 s_select.sidechains(hierarchy=pdb_h_z, cache=cache)):
             res_lab = make_label(c)
             res_mean_b = flex.mean_weighted(c.atoms().extract_b(),
                                             c.atoms().extract_occ())
             self.tables.residue_observations.set_value(
                 res_lab, lab_h, 'mean-bz-sidechain', res_mean_b)
Exemple #3
0
    def from_pdb(cls, pdb_input=None, pdb_hierarchy=None):
        """Calculate the b-factor statistics of a model"""

        assert [pdb_input, pdb_hierarchy
                ].count(None) == 1, 'Provide pdb_input OR pdb_hierarchy'
        if pdb_input: pdb_hierarchy = pdb_input.construct_hierarchy()

        cache = pdb_hierarchy.atom_selection_cache()

        all_b = non_h(hierarchy=pdb_hierarchy, cache=cache,
                      copy=True).atoms().extract_b()
        protein_b = protein(hierarchy=pdb_hierarchy, cache=cache,
                            copy=True).atoms().extract_b()
        backbone_b = backbone(hierarchy=pdb_hierarchy, cache=cache,
                              copy=True).atoms().extract_b()
        sidechain_b = sidechains(hierarchy=pdb_hierarchy,
                                 cache=cache,
                                 copy=True).atoms().extract_b()

        return cls(all=basic_statistics(all_b),
                   protein=basic_statistics(protein_b),
                   backbone=basic_statistics(backbone_b),
                   sidechain=basic_statistics(sidechain_b))
Exemple #4
0
def score_model(params, pdb1, mtz1, pdb2=None, mtz2=None, label_prefix='', verbose=False):
    """
    Score residues against density, and generate other model quality indicators.
    Identified residues in pdb1 are scored against mtz1 (and mtz2, if provided) using edstats.
    Identified residues in pdb1 are compared to the equivalent residues in pdb2, if provided.
    B-factors ratios of identified residues to surrounding sidechains are calculated.
    """

    if label_prefix: label_prefix = label_prefix + '-'

    # Extract the residues to look for
    res_names = params.selection.res_names_list

    print 'Reading input structure:', pdb1

    # Extract Structure
    h1_all = non_h(strip_pdb_to_input(pdb1, remove_ter=True, remove_end=True).hierarchy)
    # Normalise hierarchy (standardise atomic naming, etc...)
    sanitise_hierarchy(h1_all)
    h1_pro = protein(h1_all)
    h1_bck = backbone(h1_all)
    h1_sch = sidechains(h1_all)

    # Pull out residues to analyse
    if res_names:
        rg_for_analysis = [rg for rg in h1_all.residue_groups() if [n for n in rg.unique_resnames() if n in res_names]]
        print 'Selecting residues named {}: {} residue(s)'.format(' or '.join(res_names), len(rg_for_analysis))
    else:
        rg_for_analysis = h1_all.residue_groups()
        print 'Analysing all residues ({} residues)'.format(len(rg_for_analysis))

    # Check residues to analyse or skip
    if not rg_for_analysis:
        raise Exception('There are no residues called {} in {}'.format(' or '.join(params.selection.res_names_list), pdb1))

    # Extract PDB2
    if pdb2 is not None:
        print 'Reading input structure:', pdb2
        h2_all = non_h(strip_pdb_to_input(pdb2, remove_ter=True, remove_end=True).hierarchy)
        sanitise_hierarchy(h2_all)

    # Score MTZ1
    if mtz1 is not None:
        print 'Scoring model against mtz file'
        print 'Scoring {} >>> {}'.format(pdb1, mtz1)
        mtz1_edstats_scores = Edstats(mtz_file=mtz1, pdb_file=pdb1, f_label=params.input.f_label)
    else:
        mtz1_edstats_scores = None
    # Score MTZ2
    if mtz2 is not None:
        print 'Scoring model against mtz file'
        print 'Scoring {} >>> {}'.format(pdb1, mtz2)
        mtz2_edstats_scores = Edstats(mtz_file=mtz2, pdb_file=pdb1, f_label=params.input.f_label)
    else:
        mtz2_edstats_scores = None

    # Prepare output table
    data_table = prepare_table()

    for rg_sel in rg_for_analysis:

        # Create label for the output table
        #rg_label = (label_prefix+rg_sel.unique_resnames()[0]+'-'+rg_sel.parent().id+'-'+rg_sel.resseq+rg_sel.icode).replace(' ','')
        #rg_label = (label_prefix+rg_sel.parent().id+'-'+rg_sel.resseq+rg_sel.icode).replace(' ','')
        rg_label = ShortLabeller.format(rg_sel).replace(' ','')
        tab_label = label_prefix + rg_label

        if len(rg_sel.unique_resnames()) != 1:
            raise Exception(tab_label+': More than one residue name associated with residue group -- cannot process')

        # Append empty row to output table
        data_table.loc[tab_label] = None

        data_table.set_value(index = tab_label,
                             col   = 'PDB',
                             value = pdb1 )
        data_table.set_value(index = tab_label,
                             col   = 'Occupancy',
                             value = calculate_residue_group_occupancy(residue_group=rg_sel) )

        data_table = calculate_residue_group_bfactor_ratio(residue_group = rg_sel,
                                                           hierarchy     = h1_sch,
                                                           data_table    = data_table,
                                                           rg_label      = tab_label)

        if pdb2 is not None:
            data_table.set_value(index = tab_label,
                                 col   = 'PDB-2',
                                 value = pdb2 )

            # Extract the equivalent residue in pdb2
            rg_sel_2 = [rg for rg in h2_all.residue_groups() if ShortLabeller.format(rg).replace(' ','') == rg_label]

            try:
                assert rg_sel_2, 'Residue is not present in pdb file: {} not in {}'.format(rg_label, pdb2)
                assert len(rg_sel_2) == 1, 'More than one residue has been selected for {} in {}'.format(rg_label, pdb2)
            except:
                raise

            # Extract occupancy
            data_table.set_value(index = tab_label,
                                 col   = 'Occupancy-2',
                                 value = calculate_residue_group_occupancy(residue_group=rg_sel_2[0]) )

            # Calculate the RMSD between the models
            try:
                confs1, confs2, rmsds = zip(*calculate_paired_conformer_rmsds(conformers_1=rg_sel.conformers(), conformers_2=rg_sel_2[0].conformers()))
                data_table.set_value(index=tab_label, col='Model RMSD', value=min(rmsds))
            except:
                raise
                print 'Could not calculate RMSD between pdb_1 and pdb_2 for residue {}'.format(rg_label)
                pass

        # Extract Density Scores - MTZ 1
        if mtz1 is not None:
            data_table.set_value(index=tab_label, col='MTZ', value=mtz1)
        if mtz1_edstats_scores is not None:
            data_table = mtz1_edstats_scores.extract_residue_group_scores(  residue_group  = rg_sel,
                                                                            data_table     = data_table,
                                                                            rg_label       = tab_label )
            # Normalise the RSZO by the Occupancy of the ligand
            data_table['RSZO/OCC'] = data_table['RSZO']/data_table['Occupancy']

        # Extract Density Scores - MTZ 2
        if mtz2 is not None:
            data_table.set_value(index=tab_label, col='MTZ-2', value=mtz2)
        if mtz2_edstats_scores is not None:
            data_table = mtz2_edstats_scores.extract_residue_group_scores(  residue_group  = rg_sel,
                                                                            data_table     = data_table,
                                                                            rg_label       = tab_label,
                                                                            column_suffix  = '-2' )
            # Normalise the RSZO by the Occupancy of the ligand
            data_table['RSZO/OCC-2'] = data_table['RSZO-2']/data_table['Occupancy-2']

    return data_table
Exemple #5
0
def overlapping_occupancy_groups(hierarchy,
                                 resnames,
                                 group_dist,
                                 overlap_dist,
                                 complete_groups=True,
                                 exclude_altlocs=[],
                                 verbose=False):

    if exclude_altlocs is None: exclude_altlocs = []
    if exclude_altlocs == ['']: exclude_altlocs = []

    # Remove hydrogens to prevent ridiculous amounts of restraints
    hierarchy = non_h(hierarchy)
    # Extract all altlocs and ags with altlocs
    sel_altlocs = [
        a for a in hierarchy.altloc_indices()
        if (a != '') and (a not in exclude_altlocs)
    ]
    sel_alt_ags = [
        ag for ag in hierarchy.atom_groups() if (ag.altloc in sel_altlocs)
    ]

    # Record for each altloc
    # - atom groups for each altloc
    # - assigment of each ag to a cluster of ags
    cluster_dict = {}

    if verbose:
        print '-------------------------------------->'
        print ''
        print 'Generating groups of nearby alternate conformers (cutoff {}A)'.format(
            group_dist)
        if exclude_altlocs:
            print 'Excluding conformer(s): {}'.format(
                ','.join(exclude_altlocs))
        print ''

    for altloc in sel_altlocs:
        # Select atom groups with this altloc
        altloc_ags = filter_by_altloc(sel_alt_ags, altloc)
        # Cluster the atom groups
        altloc_clusters = cluster_atom_groups(altloc_ags, cutoff=group_dist)
        # Dictionary mapping altlocs to ags to clusters
        cluster_dict[altloc] = (altloc_ags, altloc_clusters)
        if verbose:
            print '- altloc {}: {} residues clustered into {} groups'.format(
                altloc, len(altloc_ags), len(set(altloc_clusters)))
    if verbose:
        print ''

    # Find atom_groups with the selected resnames
    seed_ags = [ag for ag in sel_alt_ags if (ag.resname in resnames)]
    # List of 2-length tuples (containing constrained pairs)
    constrain_groups = []

    # Loop until all atom groups have been used
    while seed_ags:
        # Pick the first residue to focus on
        focus_ag = seed_ags.pop(0)

        # Find which cluster this ag is in
        altloc_ags, altloc_clusters = cluster_dict[focus_ag.altloc]
        focus_clust = altloc_clusters[altloc_ags.index(focus_ag)]
        # Extract all ags in this cluster
        group_ags = [
            ag for i, ag in enumerate(altloc_ags)
            if altloc_clusters[i] == focus_clust
        ]
        group_xyz = group_ags[0].atoms().extract_xyz()
        for ag in group_ags[1:]:
            group_xyz = group_xyz.concatenate(ag.atoms().extract_xyz())

        if verbose:
            print '-------------------------------------->'
            print ''
            print 'Creating occupancy group based on: {}'.format(
                GenericSelection.to_str(focus_ag))
            print '- this residue is part of alternate conformer {}'.format(
                focus_ag.altloc)
            print '- there are {} atom_groups in this group'.format(
                len(group_ags))
            print ''
            print 'Looking for overlapping groups of residues with different alternate conformers:'
            print ''

        tmp_constrain_groups = []
        for altloc in sel_altlocs:
            # Skip blank altloc or the selected altloc
            if altloc == '' or altloc == focus_ag.altloc:
                continue
            # Find all ags for this altloc that overlap with the selected cluster
            altloc_ags, altloc_clusters = cluster_dict[altloc]
            overlap_ags = filter_by_distance(atom_groups=altloc_ags,
                                             xyz=group_xyz,
                                             cutoff=overlap_dist)
            overlap_clusts = sorted(
                set([
                    altloc_clusters[altloc_ags.index(ag)] for ag in overlap_ags
                ]))

            if verbose:
                print '- altloc {}: overlaps with {} group(s) of residues'.format(
                    altloc, len(overlap_clusts))

            for cluster in overlap_clusts:
                tmp_constrain_groups.append(
                    ((focus_ag.altloc, focus_clust), (altloc, cluster)))

            # Remove any used seed groups in the overlapping group
            [seed_ags.remove(ag) for ag in overlap_ags if ag in seed_ags]
        # Remove any used seed groups in the seed group
        [seed_ags.remove(ag) for ag in group_ags if ag in seed_ags]

        if verbose:
            print ''
            print 'Occupancy groups for this residue'
            print '- {} overlapping group(s) found'.format(
                len(tmp_constrain_groups))

        # Add to the complete list
        if tmp_constrain_groups:
            if complete_groups:
                if verbose:
                    print '- complete_groups=={}: concatenating occupancy groups'.format(
                        complete_groups)
                tmp_constrain_groups = [
                    tuple([(focus_ag.altloc, focus_clust)] +
                          [t[1] for t in tmp_constrain_groups])
                ]
            print '- creating {} occupancy group constraint(s)'.format(
                len(tmp_constrain_groups))
            constrain_groups.extend(tmp_constrain_groups)
        else:
            if verbose:
                print '...no overlapping groups found.'
                print '- not creating any occupancy groups for this residue'
        if verbose:
            print ''

    # Filter duplicated restraint groups
    tmp = []
    for g in map(sorted, constrain_groups):
        if g not in tmp: tmp.append(g)
    constrain_groups = tmp

    # Format to generic residue selections
    occupancy_groups = []
    for g in constrain_groups:
        ag_groups = {}
        for altloc, cluster in g:
            ag_groups.setdefault(altloc, []).extend([
                GenericSelection.to_dict(ag)
                for ag, c in zip(*cluster_dict[altloc]) if c == cluster
            ])
        occupancy_groups.append(
            [ag_groups[a] for a in sorted(ag_groups.keys())])

    return occupancy_groups