Beispiel #1
0
 def calculate_residue_mean_normalised_b_factors(self):
     """Extract Mean-B values in each of the structures"""
     # ----------------------------------------------------->
     self.tables.residue_observations.loc[:, :, 'mean-bz-all'] = numpy.nan
     self.tables.residue_observations.loc[:, :,
                                          'mean-bz-backbone'] = numpy.nan
     self.tables.residue_observations.loc[:, :,
                                          'mean-bz-sidechain'] = numpy.nan
     # ----------------------------------------------------->
     print('------------------------------------>')
     for lab_h, pdb_h in zip(self.structures.labels,
                             self.structures.hierarchies):
         print('Calculating Local Normalised Mean B-Factors: {}'.format(
             lab_h))
         # Normalise the b-factors of the structure
         pdb_h_z = normalise_b_factors_to_z_scores(pdb_hierarchy=pdb_h,
                                                   method='protein')
         cache = pdb_h_z.atom_selection_cache()
         # Non-Hydrogens
         for c in conformers_via_residue_groups(
                 s_select.non_h(hierarchy=pdb_h_z, cache=cache)):
             res_lab = make_label(c)
             res_mean_b = flex.mean_weighted(c.atoms().extract_b(),
                                             c.atoms().extract_occ())
             self.tables.residue_observations.set_value(
                 res_lab, lab_h, 'mean-bz-all', res_mean_b)
         # Backbone Atoms
         for c in conformers_via_residue_groups(
                 s_select.backbone(hierarchy=pdb_h_z, cache=cache)):
             res_lab = make_label(c)
             res_mean_b = flex.mean_weighted(c.atoms().extract_b(),
                                             c.atoms().extract_occ())
             self.tables.residue_observations.set_value(
                 res_lab, lab_h, 'mean-bz-backbone', res_mean_b)
         # Sidechain Atoms
         for c in conformers_via_residue_groups(
                 s_select.sidechains(hierarchy=pdb_h_z, cache=cache)):
             res_lab = make_label(c)
             res_mean_b = flex.mean_weighted(c.atoms().extract_b(),
                                             c.atoms().extract_occ())
             self.tables.residue_observations.set_value(
                 res_lab, lab_h, 'mean-bz-sidechain', res_mean_b)
Beispiel #2
0
    def from_pdb(cls, pdb_input=None, pdb_hierarchy=None):
        """Calculate the b-factor statistics of a model"""

        assert [pdb_input, pdb_hierarchy
                ].count(None) == 1, 'Provide pdb_input OR pdb_hierarchy'
        if pdb_input: pdb_hierarchy = pdb_input.construct_hierarchy()

        cache = pdb_hierarchy.atom_selection_cache()

        all_b = non_h(hierarchy=pdb_hierarchy, cache=cache,
                      copy=True).atoms().extract_b()
        protein_b = protein(hierarchy=pdb_hierarchy, cache=cache,
                            copy=True).atoms().extract_b()
        backbone_b = backbone(hierarchy=pdb_hierarchy, cache=cache,
                              copy=True).atoms().extract_b()
        sidechain_b = sidechains(hierarchy=pdb_hierarchy,
                                 cache=cache,
                                 copy=True).atoms().extract_b()

        return cls(all=basic_statistics(all_b),
                   protein=basic_statistics(protein_b),
                   backbone=basic_statistics(backbone_b),
                   sidechain=basic_statistics(sidechain_b))
Beispiel #3
0
def score_model(params, pdb1, mtz1, pdb2=None, mtz2=None, label_prefix='', verbose=False):
    """
    Score residues against density, and generate other model quality indicators.
    Identified residues in pdb1 are scored against mtz1 (and mtz2, if provided) using edstats.
    Identified residues in pdb1 are compared to the equivalent residues in pdb2, if provided.
    B-factors ratios of identified residues to surrounding sidechains are calculated.
    """

    if label_prefix: label_prefix = label_prefix + '-'

    # Extract the residues to look for
    res_names = params.selection.res_names_list

    print 'Reading input structure:', pdb1

    # Extract Structure
    h1_all = non_h(strip_pdb_to_input(pdb1, remove_ter=True, remove_end=True).hierarchy)
    # Normalise hierarchy (standardise atomic naming, etc...)
    sanitise_hierarchy(h1_all)
    h1_pro = protein(h1_all)
    h1_bck = backbone(h1_all)
    h1_sch = sidechains(h1_all)

    # Pull out residues to analyse
    if res_names:
        rg_for_analysis = [rg for rg in h1_all.residue_groups() if [n for n in rg.unique_resnames() if n in res_names]]
        print 'Selecting residues named {}: {} residue(s)'.format(' or '.join(res_names), len(rg_for_analysis))
    else:
        rg_for_analysis = h1_all.residue_groups()
        print 'Analysing all residues ({} residues)'.format(len(rg_for_analysis))

    # Check residues to analyse or skip
    if not rg_for_analysis:
        raise Exception('There are no residues called {} in {}'.format(' or '.join(params.selection.res_names_list), pdb1))

    # Extract PDB2
    if pdb2 is not None:
        print 'Reading input structure:', pdb2
        h2_all = non_h(strip_pdb_to_input(pdb2, remove_ter=True, remove_end=True).hierarchy)
        sanitise_hierarchy(h2_all)

    # Score MTZ1
    if mtz1 is not None:
        print 'Scoring model against mtz file'
        print 'Scoring {} >>> {}'.format(pdb1, mtz1)
        mtz1_edstats_scores = Edstats(mtz_file=mtz1, pdb_file=pdb1, f_label=params.input.f_label)
    else:
        mtz1_edstats_scores = None
    # Score MTZ2
    if mtz2 is not None:
        print 'Scoring model against mtz file'
        print 'Scoring {} >>> {}'.format(pdb1, mtz2)
        mtz2_edstats_scores = Edstats(mtz_file=mtz2, pdb_file=pdb1, f_label=params.input.f_label)
    else:
        mtz2_edstats_scores = None

    # Prepare output table
    data_table = prepare_table()

    for rg_sel in rg_for_analysis:

        # Create label for the output table
        #rg_label = (label_prefix+rg_sel.unique_resnames()[0]+'-'+rg_sel.parent().id+'-'+rg_sel.resseq+rg_sel.icode).replace(' ','')
        #rg_label = (label_prefix+rg_sel.parent().id+'-'+rg_sel.resseq+rg_sel.icode).replace(' ','')
        rg_label = ShortLabeller.format(rg_sel).replace(' ','')
        tab_label = label_prefix + rg_label

        if len(rg_sel.unique_resnames()) != 1:
            raise Exception(tab_label+': More than one residue name associated with residue group -- cannot process')

        # Append empty row to output table
        data_table.loc[tab_label] = None

        data_table.set_value(index = tab_label,
                             col   = 'PDB',
                             value = pdb1 )
        data_table.set_value(index = tab_label,
                             col   = 'Occupancy',
                             value = calculate_residue_group_occupancy(residue_group=rg_sel) )

        data_table = calculate_residue_group_bfactor_ratio(residue_group = rg_sel,
                                                           hierarchy     = h1_sch,
                                                           data_table    = data_table,
                                                           rg_label      = tab_label)

        if pdb2 is not None:
            data_table.set_value(index = tab_label,
                                 col   = 'PDB-2',
                                 value = pdb2 )

            # Extract the equivalent residue in pdb2
            rg_sel_2 = [rg for rg in h2_all.residue_groups() if ShortLabeller.format(rg).replace(' ','') == rg_label]

            try:
                assert rg_sel_2, 'Residue is not present in pdb file: {} not in {}'.format(rg_label, pdb2)
                assert len(rg_sel_2) == 1, 'More than one residue has been selected for {} in {}'.format(rg_label, pdb2)
            except:
                raise

            # Extract occupancy
            data_table.set_value(index = tab_label,
                                 col   = 'Occupancy-2',
                                 value = calculate_residue_group_occupancy(residue_group=rg_sel_2[0]) )

            # Calculate the RMSD between the models
            try:
                confs1, confs2, rmsds = zip(*calculate_paired_conformer_rmsds(conformers_1=rg_sel.conformers(), conformers_2=rg_sel_2[0].conformers()))
                data_table.set_value(index=tab_label, col='Model RMSD', value=min(rmsds))
            except:
                raise
                print 'Could not calculate RMSD between pdb_1 and pdb_2 for residue {}'.format(rg_label)
                pass

        # Extract Density Scores - MTZ 1
        if mtz1 is not None:
            data_table.set_value(index=tab_label, col='MTZ', value=mtz1)
        if mtz1_edstats_scores is not None:
            data_table = mtz1_edstats_scores.extract_residue_group_scores(  residue_group  = rg_sel,
                                                                            data_table     = data_table,
                                                                            rg_label       = tab_label )
            # Normalise the RSZO by the Occupancy of the ligand
            data_table['RSZO/OCC'] = data_table['RSZO']/data_table['Occupancy']

        # Extract Density Scores - MTZ 2
        if mtz2 is not None:
            data_table.set_value(index=tab_label, col='MTZ-2', value=mtz2)
        if mtz2_edstats_scores is not None:
            data_table = mtz2_edstats_scores.extract_residue_group_scores(  residue_group  = rg_sel,
                                                                            data_table     = data_table,
                                                                            rg_label       = tab_label,
                                                                            column_suffix  = '-2' )
            # Normalise the RSZO by the Occupancy of the ligand
            data_table['RSZO/OCC-2'] = data_table['RSZO-2']/data_table['Occupancy-2']

    return data_table