Exemple #1
0
def run(params):

    # Create log file
    log = Log(log_file=params.output.log, verbose=True)

    # Report
    log.heading('Validating input parameters and input files')

    # Check one or other have been provided
    assert params.input.pdb, 'No pdb files have been provided'
    for pdb in params.input.pdb:
        if not os.path.exists(pdb):
            raise Sorry('pdb does not exist: {}'.format(pdb))

    for pdb in params.input.pdb:

        log.subheading('Reading pdb: {}'.format(pdb))
        obj = strip_pdb_to_input(pdb, remove_ter=True)
        try:
            obj.hierarchy.only_model()
        except:
            raise Sorry('Input structures may only have one model')

        # Merge the hierarchies
        final = standardise_multiconformer_model(
            hierarchy=obj.hierarchy,
            pruning_rmsd=params.options.pruning_rmsd,
            in_place=True,
            verbose=params.settings.verbose)

        # Update the atoms numbering
        final.sort_atoms_in_place()

        # Write output file
        filename = os.path.splitext(pdb)[0] + params.output.suffix + '.pdb'
        log('Writing output structure to {}'.format(filename))
        final.write_pdb_file(file_name=filename,
                             crystal_symmetry=obj.crystal_symmetry())

    log.heading('FINISHED')
    log.heading('Final Parameters')
    log(master_phil.format(params).as_str().strip())

    return
Exemple #2
0
def score_model(params, pdb1, mtz1, pdb2=None, mtz2=None, label_prefix='', verbose=False):
    """
    Score residues against density, and generate other model quality indicators.
    Identified residues in pdb1 are scored against mtz1 (and mtz2, if provided) using edstats.
    Identified residues in pdb1 are compared to the equivalent residues in pdb2, if provided.
    B-factors ratios of identified residues to surrounding sidechains are calculated.
    """

    if label_prefix: label_prefix = label_prefix + '-'

    # Extract the residues to look for
    res_names = params.selection.res_names_list

    print 'Reading input structure:', pdb1

    # Extract Structure
    h1_all = non_h(strip_pdb_to_input(pdb1, remove_ter=True, remove_end=True).hierarchy)
    # Normalise hierarchy (standardise atomic naming, etc...)
    sanitise_hierarchy(h1_all)
    h1_pro = protein(h1_all)
    h1_bck = backbone(h1_all)
    h1_sch = sidechains(h1_all)

    # Pull out residues to analyse
    if res_names:
        rg_for_analysis = [rg for rg in h1_all.residue_groups() if [n for n in rg.unique_resnames() if n in res_names]]
        print 'Selecting residues named {}: {} residue(s)'.format(' or '.join(res_names), len(rg_for_analysis))
    else:
        rg_for_analysis = h1_all.residue_groups()
        print 'Analysing all residues ({} residues)'.format(len(rg_for_analysis))

    # Check residues to analyse or skip
    if not rg_for_analysis:
        raise Exception('There are no residues called {} in {}'.format(' or '.join(params.selection.res_names_list), pdb1))

    # Extract PDB2
    if pdb2 is not None:
        print 'Reading input structure:', pdb2
        h2_all = non_h(strip_pdb_to_input(pdb2, remove_ter=True, remove_end=True).hierarchy)
        sanitise_hierarchy(h2_all)

    # Score MTZ1
    if mtz1 is not None:
        print 'Scoring model against mtz file'
        print 'Scoring {} >>> {}'.format(pdb1, mtz1)
        mtz1_edstats_scores = Edstats(mtz_file=mtz1, pdb_file=pdb1, f_label=params.input.f_label)
    else:
        mtz1_edstats_scores = None
    # Score MTZ2
    if mtz2 is not None:
        print 'Scoring model against mtz file'
        print 'Scoring {} >>> {}'.format(pdb1, mtz2)
        mtz2_edstats_scores = Edstats(mtz_file=mtz2, pdb_file=pdb1, f_label=params.input.f_label)
    else:
        mtz2_edstats_scores = None

    # Prepare output table
    data_table = prepare_table()

    for rg_sel in rg_for_analysis:

        # Create label for the output table
        #rg_label = (label_prefix+rg_sel.unique_resnames()[0]+'-'+rg_sel.parent().id+'-'+rg_sel.resseq+rg_sel.icode).replace(' ','')
        #rg_label = (label_prefix+rg_sel.parent().id+'-'+rg_sel.resseq+rg_sel.icode).replace(' ','')
        rg_label = ShortLabeller.format(rg_sel).replace(' ','')
        tab_label = label_prefix + rg_label

        if len(rg_sel.unique_resnames()) != 1:
            raise Exception(tab_label+': More than one residue name associated with residue group -- cannot process')

        # Append empty row to output table
        data_table.loc[tab_label] = None

        data_table.set_value(index = tab_label,
                             col   = 'PDB',
                             value = pdb1 )
        data_table.set_value(index = tab_label,
                             col   = 'Occupancy',
                             value = calculate_residue_group_occupancy(residue_group=rg_sel) )

        data_table = calculate_residue_group_bfactor_ratio(residue_group = rg_sel,
                                                           hierarchy     = h1_sch,
                                                           data_table    = data_table,
                                                           rg_label      = tab_label)

        if pdb2 is not None:
            data_table.set_value(index = tab_label,
                                 col   = 'PDB-2',
                                 value = pdb2 )

            # Extract the equivalent residue in pdb2
            rg_sel_2 = [rg for rg in h2_all.residue_groups() if ShortLabeller.format(rg).replace(' ','') == rg_label]

            try:
                assert rg_sel_2, 'Residue is not present in pdb file: {} not in {}'.format(rg_label, pdb2)
                assert len(rg_sel_2) == 1, 'More than one residue has been selected for {} in {}'.format(rg_label, pdb2)
            except:
                raise

            # Extract occupancy
            data_table.set_value(index = tab_label,
                                 col   = 'Occupancy-2',
                                 value = calculate_residue_group_occupancy(residue_group=rg_sel_2[0]) )

            # Calculate the RMSD between the models
            try:
                confs1, confs2, rmsds = zip(*calculate_paired_conformer_rmsds(conformers_1=rg_sel.conformers(), conformers_2=rg_sel_2[0].conformers()))
                data_table.set_value(index=tab_label, col='Model RMSD', value=min(rmsds))
            except:
                raise
                print 'Could not calculate RMSD between pdb_1 and pdb_2 for residue {}'.format(rg_label)
                pass

        # Extract Density Scores - MTZ 1
        if mtz1 is not None:
            data_table.set_value(index=tab_label, col='MTZ', value=mtz1)
        if mtz1_edstats_scores is not None:
            data_table = mtz1_edstats_scores.extract_residue_group_scores(  residue_group  = rg_sel,
                                                                            data_table     = data_table,
                                                                            rg_label       = tab_label )
            # Normalise the RSZO by the Occupancy of the ligand
            data_table['RSZO/OCC'] = data_table['RSZO']/data_table['Occupancy']

        # Extract Density Scores - MTZ 2
        if mtz2 is not None:
            data_table.set_value(index=tab_label, col='MTZ-2', value=mtz2)
        if mtz2_edstats_scores is not None:
            data_table = mtz2_edstats_scores.extract_residue_group_scores(  residue_group  = rg_sel,
                                                                            data_table     = data_table,
                                                                            rg_label       = tab_label,
                                                                            column_suffix  = '-2' )
            # Normalise the RSZO by the Occupancy of the ligand
            data_table['RSZO/OCC-2'] = data_table['RSZO-2']/data_table['Occupancy-2']

    return data_table
def run(params):

    # Create log file
    log = Log(log_file=params.output.log, verbose=True)

    # Report
    log.heading('Validating input parameters and input files')

    # Check one or other have been provided
    if (params.input.major or params.input.minor
        ) and not (params.input.pdb == [None] or params.input.pdb == []):
        raise Exception(
            'Have provided input.major & input.minor, as well as files to input.pdb. Specify either input.major & input.minor, or two input.pdb.'
        )
    # Assign files to major and minor if necessary
    if not (params.input.major and params.input.minor):
        if len(params.input.pdb) != 2:
            raise Exception('Must provide zero or two pdb files to input.pdb')
        params.input.major = params.input.pdb[0]
        params.input.minor = params.input.pdb[1]
    # Check files exist
    if not os.path.exists(params.input.major):
        raise Exception('input.major does not exist: {}'.format(
            params.input.major))
    if not os.path.exists(params.input.minor):
        raise Exception('input.minor does not exist: {}'.format(
            params.input.minor))
    # Just check again...
    assert params.input.major
    assert params.input.minor
    assert params.output.pdb
    # Check existence of output pdb and delete as necessary
    if os.path.exists(params.output.pdb):
        if params.settings.overwrite:
            os.remove(params.output.pdb)
        else:
            raise Exception(
                'Output file already exists: {}. Run with overwrite=True to remove this file'
                .format(params.output.pdb))

    # Check that the input occupancies are valid
    if (params.options.minor_occupancy >
            1.0) or (params.options.major_occupancy > 1.0):
        raise Exception(
            'minor_occupancy and major_occupancy cannot be greater than 1.0 (currently {} and {})'
            .format(params.options.minor_occupancy,
                    params.options.major_occupancy))

    # Report validated parameters
    log.subheading('Processed merging parameters')
    for obj in master_phil.format(params).objects:
        if obj.name == 'restraints': continue
        log(obj.as_str().strip())

    # Read in the ligand file and set each residue to the requested conformer
    log.subheading('Reading input files')
    maj_obj = strip_pdb_to_input(params.input.major, remove_ter=True)
    min_obj = strip_pdb_to_input(params.input.minor, remove_ter=True)

    # Check that ... something
    try:
        maj_obj.hierarchy.only_model()
        min_obj.hierarchy.only_model()
    except:
        raise Sorry('Input structures may only have one model')

    # Multiply the input hierarchies by occupancy multipliers
    log.subheading('Updating input occupancies prior to merging')
    log('Multiplying occupancies of input.major by {}'.format(
        params.options.major_occupancy))
    maj_obj.hierarchy.atoms().set_occ(maj_obj.hierarchy.atoms().extract_occ() *
                                      params.options.major_occupancy)
    log('Multiplying occupancies of input.minor by {}'.format(
        params.options.minor_occupancy))
    min_obj.hierarchy.atoms().set_occ(min_obj.hierarchy.atoms().extract_occ() *
                                      params.options.minor_occupancy)

    # Merge the hierarchies
    final_struct = merge_complementary_hierarchies(
        hierarchy_1=maj_obj.hierarchy,
        hierarchy_2=min_obj.hierarchy,
        prune_duplicates_rmsd=params.options.prune_duplicates_rmsd,
        in_place=True,
        verbose=params.settings.verbose)

    # Set output occupancies
    log.subheading('Post-processing occupancies')
    # Set all main-conf occupancies to 1.0
    log('Setting all main-conf occupancies to 1.0')
    set_conformer_occupancy(hierarchy=final_struct,
                            altlocs=[''],
                            occupancy=1.0,
                            in_place=True,
                            verbose=params.settings.verbose)
    # Reset occupancies if required
    if params.options.reset_all_occupancies:
        # Calculate number of altlocs and associated occupancy
        altlocs = [a for a in final_struct.altloc_indices() if a]
        if altlocs:
            new_occ = 1.0 / len(altlocs)
            # Set the occupancies
            log('Setting all conformer ({}) occupancies to {}'.format(
                ','.join(altlocs), new_occ))
            set_conformer_occupancy(hierarchy=final_struct,
                                    altlocs=altlocs,
                                    occupancy=new_occ,
                                    in_place=True,
                                    verbose=params.settings.verbose)

    # Update the atoms numbering
    final_struct.sort_atoms_in_place()
    final_struct.atoms_reset_serial()
    # Write output file
    log('Writing output structure to {}'.format(params.output.pdb))
    final_struct.write_pdb_file(file_name=params.output.pdb,
                                crystal_symmetry=maj_obj.crystal_symmetry())

    # Run the restraint generation for the merged structure if requested
    if params.output.make_restraints:

        # Transfer the other phil objects from the master phil
        r_params = make_restraints.master_phil.extract()
        for name, obj in r_params.__dict__.items():
            if name.startswith('_'): continue
            if name not in params.restraints.__dict__:
                params.restraints.__inject__(name, obj)

        # Apply the output of merging to input of restraints
        params.restraints.input.pdb = params.output.pdb
        # Rename output files to be in same folder as output structure
        if params.restraints.output.phenix:
            params.restraints.output.phenix = os.path.join(
                os.path.dirname(params.output.pdb),
                os.path.basename(params.restraints.output.phenix))
        if params.restraints.output.refmac:
            params.restraints.output.refmac = os.path.join(
                os.path.dirname(params.output.pdb),
                os.path.basename(params.restraints.output.refmac))
        # Set log file name to this program if one given
        if params.output.log:
            params.restraints.output.log = params.output.log
        elif params.restraints.output.log:
            params.restraints.output.log = os.path.join(
                os.path.dirname(params.output.pdb),
                os.path.basename(params.restraints.output.log))
        # Which alternate conformations to generate restraints for
        params.restraints.local_restraints.altlocs = ','.join(
            [a for a in min_obj.hierarchy.altloc_indices() if a])
        # Update settigns
        params.restraints.settings.verbose = params.settings.verbose
        params.restraints.settings.overwrite = params.settings.overwrite

        # Report
        log.heading('Parameters for generating restraints')
        log(master_phil.format(params).get('restraints').as_str().strip())
        log.heading('Generating restraints')
        # Run make_restraints
        make_restraints.run(params.restraints)

    log.heading('FINISHED')
    log.heading('Final Parameters')
    log(master_phil.format(params).as_str().strip())

    return
Exemple #4
0
 def from_file(cls, filename):
     #        ih = iotbx.pdb.hierarchy.input(filename)
     ih = strip_pdb_to_input(filename, remove_ter=True)
     c = cls(input=ih.input, hierarchy=ih.hierarchy)
     c.filename = filename
     return c
Exemple #5
0
def split_conformations(filename, params, log=None):

    if log is None: log = Log(verbose=True)

    # Read the pdb header - for writing later...
    header_contents = get_pdb_header(filename)

    # Read in and validate the input file
    ens_obj = strip_pdb_to_input(filename, remove_ter=True)
    ens_obj.hierarchy.only_model()

    # Create a new copy of the structures
    new_ens = ens_obj.hierarchy.deep_copy()

    # Extract conformers from the structure as set
    all_confs = set(ens_obj.hierarchy.altloc_indices())
    all_confs.discard('')

    if params.options.mode == 'by_residue_name':
        sel_resnames = params.options.by_residue_name.resname.split(',')
        sel_confs = [
            ag.altloc for ag in new_ens.atom_groups()
            if (ag.resname in sel_resnames)
        ]
        # List of conformers to output for each structure, and suffixes
        out_confs = map(sorted, [
            all_confs.intersection(sel_confs),
            all_confs.difference(sel_confs)
        ])
        out_suffs = [
            params.options.by_residue_name.selected_name,
            params.options.by_residue_name.unselected_name
        ]
    elif params.options.mode == 'by_conformer':
        sel_resnames = None
        sel_confs = None
        # One structure for each conformer
        out_confs = [[c] for c in sorted(all_confs)]
        out_suffs = [''.join(c) for c in out_confs]
    elif params.options.mode == 'by_conformer_group':
        sel_resnames = None
        sel_confs = None
        # One structure for each set of supplied conformer sets
        out_confs = [
            s.split(',') for s in params.options.by_conformer_group.conformers
        ]
        out_suffs = [''.join(c) for c in out_confs]
    else:
        raise Exception('Invalid selection for options.mode: {}'.format(
            params.options.mode))

    assert len(out_confs) == len(out_suffs), '{} not same length as {}'.format(
        str(out_confs), str(out_suffs))

    for confs, suffix in zip(out_confs, out_suffs):
        log('Conformers {} -> {}'.format(str(confs), suffix))

    # Create paths from the suffixes
    out_paths = [
        '.'.join([
            os.path.splitext(filename)[0], params.output.suffix_prefix, suff,
            'pdb'
        ]) for suff in out_suffs
    ]

    log.subheading('Processing {}'.format(filename[-70:]))

    for this_confs, this_path in zip(out_confs, out_paths):

        if not this_confs: continue

        # Select atoms to keep - no altloc, or altloc in selection
        sel_string = ' or '.join(
            ['altid " "'] + ['altid "{}"'.format(alt) for alt in this_confs])
        # Extract selection from the hierarchy
        sel_hiery = new_ens.select(
            new_ens.atom_selection_cache().selection(sel_string),
            copy_atoms=True)

        log.bar(True, False)
        log('Outputting conformer(s) {} to {}'.format(''.join(this_confs),
                                                      this_path))
        log.bar()
        log('Keeping ANY atom with conformer id: {}'.format(
            ' or '.join(['" "'] + this_confs)))
        log('Selection: \n\t' + sel_string)

        if params.options.pruning.prune_duplicates:
            log.bar()
            log('Pruning redundant conformers')
            # Remove an alternate conformers than are duplicated after selection
            prune_redundant_alternate_conformations(
                hierarchy=sel_hiery,
                required_altlocs=[a for a in sel_hiery.altloc_indices() if a],
                rmsd_cutoff=params.options.pruning.rmsd_cutoff,
                in_place=True,
                verbose=params.settings.verbose)

        if params.options.reset_altlocs:
            log.bar()
            # Change the altlocs so that they start from "A"
            if len(this_confs) == 1:
                conf_hash = {this_confs[0]: ' '}
            else:
                conf_hash = dict(
                    zip(this_confs, iotbx.pdb.systematic_chain_ids()))
            log('Resetting structure altlocs:')
            for k in sorted(conf_hash.keys()):
                log('\t{} -> "{}"'.format(k, conf_hash[k]))
            if params.settings.verbose: log.bar()
            for ag in sel_hiery.atom_groups():
                if ag.altloc in this_confs:
                    if params.settings.verbose:
                        log('{} -> alt {}'.format(Labeller.format(ag),
                                                  conf_hash[ag.altloc]))
                    ag.altloc = conf_hash[ag.altloc]

        if params.options.reset_occupancies:
            log.bar()
            log('Resetting output occupancies (maximum occupancy of 1.0, etc.)'
                )
            # Divide through by the smallest occupancy of any complete residues groups with occupancies of less than one
            rg_occs = [
                calculate_residue_group_occupancy(rg) for rg in
                residue_groups_with_complete_set_of_conformers(sel_hiery)
            ]
            non_uni = [v for v in numpy.unique(rg_occs) if 0.0 < v < 1.0]
            if non_uni:
                div_occ = min(non_uni)
                log('Dividing all occupancies by {}'.format(div_occ))
                sel_hiery.atoms().set_occ(sel_hiery.atoms().extract_occ() /
                                          div_occ)
            # Normalise the occupancies of any residue groups with more than unitary occupancy
            log('Fixing any residues that have greater than unitary occupancy')
            sanitise_occupancies(hierarchy=sel_hiery,
                                 min_occ=0.0,
                                 max_occ=1.0,
                                 in_place=True,
                                 verbose=params.settings.verbose)
            # Perform checks
            max_occ = max([
                calculate_residue_group_occupancy(rg)
                for rg in sel_hiery.residue_groups()
            ])
            log('Maximum occupancy of output structue: {}'.format(max_occ))
            assert max_occ >= 0.0, 'maximum occupancy is less than 0.0?!?!'
            assert max_occ <= 1.0, 'maximum occupancy is greater than 1.0?!?!'

        log.bar()
        log('Writing structure: {}'.format(this_path))
        log.bar(False, True)

        # Write header contents
        with open(this_path, 'w') as fh:
            fh.write(header_contents)
        # Write output file
        sel_hiery.write_pdb_file(this_path, open_append=True)

    return out_paths