def run(params): # Create log file log = Log(log_file=params.output.log, verbose=True) # Report log.heading('Validating input parameters and input files') # Check one or other have been provided assert params.input.pdb, 'No pdb files have been provided' for pdb in params.input.pdb: if not os.path.exists(pdb): raise Sorry('pdb does not exist: {}'.format(pdb)) for pdb in params.input.pdb: log.subheading('Reading pdb: {}'.format(pdb)) obj = strip_pdb_to_input(pdb, remove_ter=True) try: obj.hierarchy.only_model() except: raise Sorry('Input structures may only have one model') # Merge the hierarchies final = standardise_multiconformer_model( hierarchy=obj.hierarchy, pruning_rmsd=params.options.pruning_rmsd, in_place=True, verbose=params.settings.verbose) # Update the atoms numbering final.sort_atoms_in_place() # Write output file filename = os.path.splitext(pdb)[0] + params.output.suffix + '.pdb' log('Writing output structure to {}'.format(filename)) final.write_pdb_file(file_name=filename, crystal_symmetry=obj.crystal_symmetry()) log.heading('FINISHED') log.heading('Final Parameters') log(master_phil.format(params).as_str().strip()) return
def score_model(params, pdb1, mtz1, pdb2=None, mtz2=None, label_prefix='', verbose=False): """ Score residues against density, and generate other model quality indicators. Identified residues in pdb1 are scored against mtz1 (and mtz2, if provided) using edstats. Identified residues in pdb1 are compared to the equivalent residues in pdb2, if provided. B-factors ratios of identified residues to surrounding sidechains are calculated. """ if label_prefix: label_prefix = label_prefix + '-' # Extract the residues to look for res_names = params.selection.res_names_list print 'Reading input structure:', pdb1 # Extract Structure h1_all = non_h(strip_pdb_to_input(pdb1, remove_ter=True, remove_end=True).hierarchy) # Normalise hierarchy (standardise atomic naming, etc...) sanitise_hierarchy(h1_all) h1_pro = protein(h1_all) h1_bck = backbone(h1_all) h1_sch = sidechains(h1_all) # Pull out residues to analyse if res_names: rg_for_analysis = [rg for rg in h1_all.residue_groups() if [n for n in rg.unique_resnames() if n in res_names]] print 'Selecting residues named {}: {} residue(s)'.format(' or '.join(res_names), len(rg_for_analysis)) else: rg_for_analysis = h1_all.residue_groups() print 'Analysing all residues ({} residues)'.format(len(rg_for_analysis)) # Check residues to analyse or skip if not rg_for_analysis: raise Exception('There are no residues called {} in {}'.format(' or '.join(params.selection.res_names_list), pdb1)) # Extract PDB2 if pdb2 is not None: print 'Reading input structure:', pdb2 h2_all = non_h(strip_pdb_to_input(pdb2, remove_ter=True, remove_end=True).hierarchy) sanitise_hierarchy(h2_all) # Score MTZ1 if mtz1 is not None: print 'Scoring model against mtz file' print 'Scoring {} >>> {}'.format(pdb1, mtz1) mtz1_edstats_scores = Edstats(mtz_file=mtz1, pdb_file=pdb1, f_label=params.input.f_label) else: mtz1_edstats_scores = None # Score MTZ2 if mtz2 is not None: print 'Scoring model against mtz file' print 'Scoring {} >>> {}'.format(pdb1, mtz2) mtz2_edstats_scores = Edstats(mtz_file=mtz2, pdb_file=pdb1, f_label=params.input.f_label) else: mtz2_edstats_scores = None # Prepare output table data_table = prepare_table() for rg_sel in rg_for_analysis: # Create label for the output table #rg_label = (label_prefix+rg_sel.unique_resnames()[0]+'-'+rg_sel.parent().id+'-'+rg_sel.resseq+rg_sel.icode).replace(' ','') #rg_label = (label_prefix+rg_sel.parent().id+'-'+rg_sel.resseq+rg_sel.icode).replace(' ','') rg_label = ShortLabeller.format(rg_sel).replace(' ','') tab_label = label_prefix + rg_label if len(rg_sel.unique_resnames()) != 1: raise Exception(tab_label+': More than one residue name associated with residue group -- cannot process') # Append empty row to output table data_table.loc[tab_label] = None data_table.set_value(index = tab_label, col = 'PDB', value = pdb1 ) data_table.set_value(index = tab_label, col = 'Occupancy', value = calculate_residue_group_occupancy(residue_group=rg_sel) ) data_table = calculate_residue_group_bfactor_ratio(residue_group = rg_sel, hierarchy = h1_sch, data_table = data_table, rg_label = tab_label) if pdb2 is not None: data_table.set_value(index = tab_label, col = 'PDB-2', value = pdb2 ) # Extract the equivalent residue in pdb2 rg_sel_2 = [rg for rg in h2_all.residue_groups() if ShortLabeller.format(rg).replace(' ','') == rg_label] try: assert rg_sel_2, 'Residue is not present in pdb file: {} not in {}'.format(rg_label, pdb2) assert len(rg_sel_2) == 1, 'More than one residue has been selected for {} in {}'.format(rg_label, pdb2) except: raise # Extract occupancy data_table.set_value(index = tab_label, col = 'Occupancy-2', value = calculate_residue_group_occupancy(residue_group=rg_sel_2[0]) ) # Calculate the RMSD between the models try: confs1, confs2, rmsds = zip(*calculate_paired_conformer_rmsds(conformers_1=rg_sel.conformers(), conformers_2=rg_sel_2[0].conformers())) data_table.set_value(index=tab_label, col='Model RMSD', value=min(rmsds)) except: raise print 'Could not calculate RMSD between pdb_1 and pdb_2 for residue {}'.format(rg_label) pass # Extract Density Scores - MTZ 1 if mtz1 is not None: data_table.set_value(index=tab_label, col='MTZ', value=mtz1) if mtz1_edstats_scores is not None: data_table = mtz1_edstats_scores.extract_residue_group_scores( residue_group = rg_sel, data_table = data_table, rg_label = tab_label ) # Normalise the RSZO by the Occupancy of the ligand data_table['RSZO/OCC'] = data_table['RSZO']/data_table['Occupancy'] # Extract Density Scores - MTZ 2 if mtz2 is not None: data_table.set_value(index=tab_label, col='MTZ-2', value=mtz2) if mtz2_edstats_scores is not None: data_table = mtz2_edstats_scores.extract_residue_group_scores( residue_group = rg_sel, data_table = data_table, rg_label = tab_label, column_suffix = '-2' ) # Normalise the RSZO by the Occupancy of the ligand data_table['RSZO/OCC-2'] = data_table['RSZO-2']/data_table['Occupancy-2'] return data_table
def run(params): # Create log file log = Log(log_file=params.output.log, verbose=True) # Report log.heading('Validating input parameters and input files') # Check one or other have been provided if (params.input.major or params.input.minor ) and not (params.input.pdb == [None] or params.input.pdb == []): raise Exception( 'Have provided input.major & input.minor, as well as files to input.pdb. Specify either input.major & input.minor, or two input.pdb.' ) # Assign files to major and minor if necessary if not (params.input.major and params.input.minor): if len(params.input.pdb) != 2: raise Exception('Must provide zero or two pdb files to input.pdb') params.input.major = params.input.pdb[0] params.input.minor = params.input.pdb[1] # Check files exist if not os.path.exists(params.input.major): raise Exception('input.major does not exist: {}'.format( params.input.major)) if not os.path.exists(params.input.minor): raise Exception('input.minor does not exist: {}'.format( params.input.minor)) # Just check again... assert params.input.major assert params.input.minor assert params.output.pdb # Check existence of output pdb and delete as necessary if os.path.exists(params.output.pdb): if params.settings.overwrite: os.remove(params.output.pdb) else: raise Exception( 'Output file already exists: {}. Run with overwrite=True to remove this file' .format(params.output.pdb)) # Check that the input occupancies are valid if (params.options.minor_occupancy > 1.0) or (params.options.major_occupancy > 1.0): raise Exception( 'minor_occupancy and major_occupancy cannot be greater than 1.0 (currently {} and {})' .format(params.options.minor_occupancy, params.options.major_occupancy)) # Report validated parameters log.subheading('Processed merging parameters') for obj in master_phil.format(params).objects: if obj.name == 'restraints': continue log(obj.as_str().strip()) # Read in the ligand file and set each residue to the requested conformer log.subheading('Reading input files') maj_obj = strip_pdb_to_input(params.input.major, remove_ter=True) min_obj = strip_pdb_to_input(params.input.minor, remove_ter=True) # Check that ... something try: maj_obj.hierarchy.only_model() min_obj.hierarchy.only_model() except: raise Sorry('Input structures may only have one model') # Multiply the input hierarchies by occupancy multipliers log.subheading('Updating input occupancies prior to merging') log('Multiplying occupancies of input.major by {}'.format( params.options.major_occupancy)) maj_obj.hierarchy.atoms().set_occ(maj_obj.hierarchy.atoms().extract_occ() * params.options.major_occupancy) log('Multiplying occupancies of input.minor by {}'.format( params.options.minor_occupancy)) min_obj.hierarchy.atoms().set_occ(min_obj.hierarchy.atoms().extract_occ() * params.options.minor_occupancy) # Merge the hierarchies final_struct = merge_complementary_hierarchies( hierarchy_1=maj_obj.hierarchy, hierarchy_2=min_obj.hierarchy, prune_duplicates_rmsd=params.options.prune_duplicates_rmsd, in_place=True, verbose=params.settings.verbose) # Set output occupancies log.subheading('Post-processing occupancies') # Set all main-conf occupancies to 1.0 log('Setting all main-conf occupancies to 1.0') set_conformer_occupancy(hierarchy=final_struct, altlocs=[''], occupancy=1.0, in_place=True, verbose=params.settings.verbose) # Reset occupancies if required if params.options.reset_all_occupancies: # Calculate number of altlocs and associated occupancy altlocs = [a for a in final_struct.altloc_indices() if a] if altlocs: new_occ = 1.0 / len(altlocs) # Set the occupancies log('Setting all conformer ({}) occupancies to {}'.format( ','.join(altlocs), new_occ)) set_conformer_occupancy(hierarchy=final_struct, altlocs=altlocs, occupancy=new_occ, in_place=True, verbose=params.settings.verbose) # Update the atoms numbering final_struct.sort_atoms_in_place() final_struct.atoms_reset_serial() # Write output file log('Writing output structure to {}'.format(params.output.pdb)) final_struct.write_pdb_file(file_name=params.output.pdb, crystal_symmetry=maj_obj.crystal_symmetry()) # Run the restraint generation for the merged structure if requested if params.output.make_restraints: # Transfer the other phil objects from the master phil r_params = make_restraints.master_phil.extract() for name, obj in r_params.__dict__.items(): if name.startswith('_'): continue if name not in params.restraints.__dict__: params.restraints.__inject__(name, obj) # Apply the output of merging to input of restraints params.restraints.input.pdb = params.output.pdb # Rename output files to be in same folder as output structure if params.restraints.output.phenix: params.restraints.output.phenix = os.path.join( os.path.dirname(params.output.pdb), os.path.basename(params.restraints.output.phenix)) if params.restraints.output.refmac: params.restraints.output.refmac = os.path.join( os.path.dirname(params.output.pdb), os.path.basename(params.restraints.output.refmac)) # Set log file name to this program if one given if params.output.log: params.restraints.output.log = params.output.log elif params.restraints.output.log: params.restraints.output.log = os.path.join( os.path.dirname(params.output.pdb), os.path.basename(params.restraints.output.log)) # Which alternate conformations to generate restraints for params.restraints.local_restraints.altlocs = ','.join( [a for a in min_obj.hierarchy.altloc_indices() if a]) # Update settigns params.restraints.settings.verbose = params.settings.verbose params.restraints.settings.overwrite = params.settings.overwrite # Report log.heading('Parameters for generating restraints') log(master_phil.format(params).get('restraints').as_str().strip()) log.heading('Generating restraints') # Run make_restraints make_restraints.run(params.restraints) log.heading('FINISHED') log.heading('Final Parameters') log(master_phil.format(params).as_str().strip()) return
def from_file(cls, filename): # ih = iotbx.pdb.hierarchy.input(filename) ih = strip_pdb_to_input(filename, remove_ter=True) c = cls(input=ih.input, hierarchy=ih.hierarchy) c.filename = filename return c
def split_conformations(filename, params, log=None): if log is None: log = Log(verbose=True) # Read the pdb header - for writing later... header_contents = get_pdb_header(filename) # Read in and validate the input file ens_obj = strip_pdb_to_input(filename, remove_ter=True) ens_obj.hierarchy.only_model() # Create a new copy of the structures new_ens = ens_obj.hierarchy.deep_copy() # Extract conformers from the structure as set all_confs = set(ens_obj.hierarchy.altloc_indices()) all_confs.discard('') if params.options.mode == 'by_residue_name': sel_resnames = params.options.by_residue_name.resname.split(',') sel_confs = [ ag.altloc for ag in new_ens.atom_groups() if (ag.resname in sel_resnames) ] # List of conformers to output for each structure, and suffixes out_confs = map(sorted, [ all_confs.intersection(sel_confs), all_confs.difference(sel_confs) ]) out_suffs = [ params.options.by_residue_name.selected_name, params.options.by_residue_name.unselected_name ] elif params.options.mode == 'by_conformer': sel_resnames = None sel_confs = None # One structure for each conformer out_confs = [[c] for c in sorted(all_confs)] out_suffs = [''.join(c) for c in out_confs] elif params.options.mode == 'by_conformer_group': sel_resnames = None sel_confs = None # One structure for each set of supplied conformer sets out_confs = [ s.split(',') for s in params.options.by_conformer_group.conformers ] out_suffs = [''.join(c) for c in out_confs] else: raise Exception('Invalid selection for options.mode: {}'.format( params.options.mode)) assert len(out_confs) == len(out_suffs), '{} not same length as {}'.format( str(out_confs), str(out_suffs)) for confs, suffix in zip(out_confs, out_suffs): log('Conformers {} -> {}'.format(str(confs), suffix)) # Create paths from the suffixes out_paths = [ '.'.join([ os.path.splitext(filename)[0], params.output.suffix_prefix, suff, 'pdb' ]) for suff in out_suffs ] log.subheading('Processing {}'.format(filename[-70:])) for this_confs, this_path in zip(out_confs, out_paths): if not this_confs: continue # Select atoms to keep - no altloc, or altloc in selection sel_string = ' or '.join( ['altid " "'] + ['altid "{}"'.format(alt) for alt in this_confs]) # Extract selection from the hierarchy sel_hiery = new_ens.select( new_ens.atom_selection_cache().selection(sel_string), copy_atoms=True) log.bar(True, False) log('Outputting conformer(s) {} to {}'.format(''.join(this_confs), this_path)) log.bar() log('Keeping ANY atom with conformer id: {}'.format( ' or '.join(['" "'] + this_confs))) log('Selection: \n\t' + sel_string) if params.options.pruning.prune_duplicates: log.bar() log('Pruning redundant conformers') # Remove an alternate conformers than are duplicated after selection prune_redundant_alternate_conformations( hierarchy=sel_hiery, required_altlocs=[a for a in sel_hiery.altloc_indices() if a], rmsd_cutoff=params.options.pruning.rmsd_cutoff, in_place=True, verbose=params.settings.verbose) if params.options.reset_altlocs: log.bar() # Change the altlocs so that they start from "A" if len(this_confs) == 1: conf_hash = {this_confs[0]: ' '} else: conf_hash = dict( zip(this_confs, iotbx.pdb.systematic_chain_ids())) log('Resetting structure altlocs:') for k in sorted(conf_hash.keys()): log('\t{} -> "{}"'.format(k, conf_hash[k])) if params.settings.verbose: log.bar() for ag in sel_hiery.atom_groups(): if ag.altloc in this_confs: if params.settings.verbose: log('{} -> alt {}'.format(Labeller.format(ag), conf_hash[ag.altloc])) ag.altloc = conf_hash[ag.altloc] if params.options.reset_occupancies: log.bar() log('Resetting output occupancies (maximum occupancy of 1.0, etc.)' ) # Divide through by the smallest occupancy of any complete residues groups with occupancies of less than one rg_occs = [ calculate_residue_group_occupancy(rg) for rg in residue_groups_with_complete_set_of_conformers(sel_hiery) ] non_uni = [v for v in numpy.unique(rg_occs) if 0.0 < v < 1.0] if non_uni: div_occ = min(non_uni) log('Dividing all occupancies by {}'.format(div_occ)) sel_hiery.atoms().set_occ(sel_hiery.atoms().extract_occ() / div_occ) # Normalise the occupancies of any residue groups with more than unitary occupancy log('Fixing any residues that have greater than unitary occupancy') sanitise_occupancies(hierarchy=sel_hiery, min_occ=0.0, max_occ=1.0, in_place=True, verbose=params.settings.verbose) # Perform checks max_occ = max([ calculate_residue_group_occupancy(rg) for rg in sel_hiery.residue_groups() ]) log('Maximum occupancy of output structue: {}'.format(max_occ)) assert max_occ >= 0.0, 'maximum occupancy is less than 0.0?!?!' assert max_occ <= 1.0, 'maximum occupancy is greater than 1.0?!?!' log.bar() log('Writing structure: {}'.format(this_path)) log.bar(False, True) # Write header contents with open(this_path, 'w') as fh: fh.write(header_contents) # Write output file sel_hiery.write_pdb_file(this_path, open_append=True) return out_paths