def standardise_multiconformer_model(hierarchy, pruning_rmsd=0.1, in_place=False, verbose=False, log=None): """Standardise hierarchies by expanding alternate model conformations, and then trimming alternate conformations where possible""" if log is None: log = Log(verbose=True) # Alter the original files? if not in_place: # Copy the hierarchies hierarchy = hierarchy.deep_copy() # Sort the atoms hierarchy.sort_atoms_in_place() log.heading('Preparing to standardise structure') log.subheading( 'Explicitly expanding model to all conformations of the crystal') expand_alternate_conformations(hierarchy=hierarchy, in_place=True, verbose=verbose) log.subheading( 'Pruning unneccessary multi-conformer residues in the expanded structure' ) prune_redundant_alternate_conformations( hierarchy=hierarchy, required_altlocs=hierarchy.altloc_indices(), rmsd_cutoff=pruning_rmsd, in_place=True, verbose=verbose) return hierarchy
def run(params): # Create log object log = Log(log_file=os.path.abspath(params.output.log_file), verbose=True) # Change paths to absolute paths params.input.pandda_dir = os.path.abspath(params.input.pandda_dir) params.output.export_dir = os.path.abspath(params.output.export_dir) # Must be in the pandda directory (pandda objects use relative paths) os.chdir(params.input.pandda_dir) # Report modifed phil log.heading('Processed parameters') log(master_phil.format(params).as_str()) ############################################################################ log.heading('Identifying folders to export') # Find the dataset directories to be exported if params.input.select_datasets: selected_datasets = [] [ selected_datasets.extend(s.split(',')) for s in params.input.select_datasets ] export_dirs = sorted([ os.path.join(params.input.pandda_dir, 'processed_datasets', p) for p in selected_datasets ]) # Filter by existence of path export_dirs = [p for p in export_dirs if os.path.exists(p)] else: export_dirs = sorted( glob.glob( os.path.join(params.input.pandda_dir, 'processed_datasets', '*'))) assert export_dirs, 'No Export Directories Found' # Report log('Exporting:\n\t' + '\n\t'.join(export_dirs)) # Create output directory if not os.path.exists(params.output.export_dir): os.mkdir(params.output.export_dir) # Merge the fitted structures for dir in export_dirs: process_and_export_folder(dir=dir, params=params, log=log) log.heading('FINISHED')
def run(params): # Create log file log = Log(log_file=params.output.log, verbose=True) log.heading('Validating input parameters') assert params.input.pdb, 'No PDB files given' log.heading('Splitting multi-state structures') # Iterate through the input structures and extract the conformation for pdb in params.input.pdb: split_conformations(filename=pdb, params=params, log=log) log.heading('FINISHED')
def run(params): # Create log file log = Log(log_file=params.output.log, verbose=True) # Report log.heading('Validating input parameters and input files') # Check one or other have been provided assert params.input.pdb, 'No pdb files have been provided' for pdb in params.input.pdb: if not os.path.exists(pdb): raise Sorry('pdb does not exist: {}'.format(pdb)) for pdb in params.input.pdb: log.subheading('Reading pdb: {}'.format(pdb)) obj = strip_pdb_to_input(pdb, remove_ter=True) try: obj.hierarchy.only_model() except: raise Sorry('Input structures may only have one model') # Merge the hierarchies final = standardise_multiconformer_model( hierarchy=obj.hierarchy, pruning_rmsd=params.options.pruning_rmsd, in_place=True, verbose=params.settings.verbose) # Update the atoms numbering final.sort_atoms_in_place() # Write output file filename = os.path.splitext(pdb)[0] + params.output.suffix + '.pdb' log('Writing output structure to {}'.format(filename)) final.write_pdb_file(file_name=filename, crystal_symmetry=obj.crystal_symmetry()) log.heading('FINISHED') log.heading('Final Parameters') log(master_phil.format(params).as_str().strip()) return
def run(params): # Identify any existing output directories current_dirs = sorted(glob.glob(params.output.dir_prefix + '*')) if not current_dirs: next_int = 1 else: current_nums = [ s.replace(params.output.dir_prefix, '') for s in current_dirs ] next_int = sorted(map(int, current_nums))[-1] + 1 # Create output directory name from int out_dir = params.output.dir_prefix + '{:04}'.format(next_int) # Create output directory os.mkdir(out_dir) # Create log object log = Log(log_file=os.path.join( out_dir, params.output.out_prefix + '.quick-refine.log'), verbose=params.settings.verbose) # Report if current_dirs: log('Found existing refinement directories: \n\t{}'.format( '\n\t'.join(current_dirs))) log('') log('Creating new output directory: {}'.format(out_dir)) # Validate input parameters log.subheading('Validating input parameters') assert params.input.pdb is not None, 'No PDB given for refinement' assert params.input.mtz is not None, 'No MTZ given for refinement' if os.path.islink(params.input.mtz): log('Converting mtz path to real path:') log('{} -> {}'.format(params.input.mtz, os.path.realpath(params.input.mtz))) params.input.mtz = os.path.realpath(params.input.mtz) # Link input log('Copying/linking files to refinement folder') shutil.copy(params.input.pdb, os.path.abspath(os.path.join(out_dir, 'input.pdb'))) rel_symlink(params.input.mtz, os.path.abspath(os.path.join(out_dir, 'input.mtz'))) # Copy parameter file to output folder if params.input.params: shutil.copy(params.input.params, os.path.abspath(os.path.join(out_dir, 'input.params'))) # Create output prefixes output_prefix = os.path.join(out_dir, params.output.out_prefix) log('Real output file path prefixes: {}'.format(output_prefix)) log('Link output file path prefixes: {}'.format(params.output.link_prefix)) # Create command objects log.subheading('Preparing command line input for refinement program') # PHENIX if params.options.program == 'phenix': cm = CommandManager('phenix.refine') # Command line args cm.add_command_line_arguments([params.input.pdb, params.input.mtz]) cm.add_command_line_arguments( ['output.prefix={}'.format(output_prefix)]) if params.input.cif: cm.add_command_line_arguments(params.input.cif) if params.input.params and os.path.exists(params.input.params): cm.add_command_line_arguments([params.input.params]) # REFMAC elif params.options.program == 'refmac': cm = CommandManager('refmac5') # Command line args cm.add_command_line_arguments( ['xyzin', params.input.pdb, 'hklin', params.input.mtz]) cm.add_command_line_arguments([ 'xyzout', output_prefix + '.pdb', 'hklout', output_prefix + '.mtz' ]) if params.input.cif: for cif in params.input.cif: cm.add_command_line_arguments(['libin', cif]) # Standard input if params.input.params: cm.add_standard_input(open(params.input.params).read().split('\n')) cm.add_standard_input(['END']) elif params.options.program == "buster": cm = CommandManager('refine') # Command line arguments # inputs cm.add_command_line_arguments( ['-p', params.input.pdb, '-m', params.input.mtz, '-d', out_dir]) if params.input.cif: for cif in params.input.cif: cm.add_command_line_arguments(['-l', cif]) if params.input.params: cm.add_command_line_arguments(['-Gelly', params.input.params]) # Pass additional command line arguments? if params.input.args: cm.add_command_line_arguments(params.input.args) # Report log(str(cm)) log.bar() log('running refinement... ({})'.format(cm.program[0])) out = cm.run() log.subheading('Refinement output') if not log.verbose: log('output written to log file ({} lines)'.format( cm.output.count('\n'))) log('\n' + cm.output, show=False) if out != 0: log.subheading('Refinement Errors') log(cm.error) log.subheading('Post-processing output files') if params.options.program == "buster": log.subheading('Renaming buster output files') shutil.move(src=os.path.join(out_dir, 'refine.pdb'), dst=output_prefix + '.pdb') shutil.move(src=os.path.join(out_dir, 'refine.mtz'), dst=output_prefix + '.mtz') # Find output files try: real_pdb = glob.glob(output_prefix + '*.pdb')[0] real_mtz = glob.glob(output_prefix + '*.mtz')[0] except: log('Refinement has failed - output files do not exist') log('{}: {}'.format(output_prefix + '*.pdb', glob.glob(output_prefix + '*.pdb'))) log('{}: {}'.format(output_prefix + '*.mtz', glob.glob(output_prefix + '*.mtz'))) raise # List of links to make at the end of the run link_file_pairs = [(real_pdb, params.output.link_prefix + '.pdb'), (real_mtz, params.output.link_prefix + '.mtz')] # Split conformations if params.options.split_conformations: params.split_conformations.settings.verbose = params.settings.verbose log.subheading('Splitting refined structure conformations') # Running split conformations out_files = split_conformations.split_conformations( filename=real_pdb, params=params.split_conformations, log=log) # Link output files to top for real_file in out_files: link_file = params.output.link_prefix + os.path.basename( real_file.replace(os.path.splitext(real_pdb)[0], '')) link_file_pairs.append([real_file, link_file]) # Link output files log.subheading('linking output files') for real_file, link_file in link_file_pairs: log('Linking {} -> {}'.format(link_file, real_file)) if not os.path.exists(real_file): log('file does not exist: {}'.format(real_file)) continue if os.path.exists(link_file) and os.path.islink(link_file): log('removing existing link: {}'.format(link_file)) os.unlink(link_file) if not os.path.exists(link_file): rel_symlink(real_file, link_file) log.heading('finished - refinement')
def run(params): # Validate input files if not (params.input.pdb or params.input.mtz): raise Sorry( 'No pdb/mtz files have been provided: specify with input.pdb or input.mtz' ) # Check and create output directory if not params.output.out_dir: raise Sorry( 'No output directory has been specified: specify with output.out_dir' ) if not os.path.exists(params.output.out_dir): os.mkdir(params.output.out_dir) # Define and create image directory img_dir = os.path.join(params.output.out_dir, 'dendrograms') if not os.path.exists(img_dir): os.mkdir(img_dir) # Create log object log = Log(log_file=params.output.out_dir + '.clustering.log', verbose=True) # Define output_file_function to copy or symlink files as needed if params.output.file_mode == 'symlink': out_file_func = os.symlink elif params.output.file_mode == 'copy': out_file_func = shutil.copy log.heading('Processing input pdb/mtz files') log('Making dataset labels for {} pdb(s) and {} mtz(s)'.format( len(params.input.pdb), len(params.input.mtz))) try: if params.input.labels.pdb_label == 'filename': p_labels = [ os.path.basename(os.path.splitext(f)[0]) for f in params.input.pdb ] elif params.input.labels.pdb_label == 'foldername': p_labels = [ os.path.basename(os.path.dirname(f)) for f in params.input.pdb ] elif params.input.labels.pdb_regex: p_labels = [ re.findall(params.input.labels.pdb_regex, f)[0] for f in params.input.pdb ] else: p_labels = [ 'PDB-{:06d}'.format(i) for i in range(len(params.input.pdb)) ] if params.input.labels.mtz_label == 'filename': m_labels = [ os.path.basename(os.path.splitext(f)[0]) for f in params.input.mtz ] elif params.input.labels.mtz_label == 'foldername': m_labels = [ os.path.basename(os.path.dirname(f)) for f in params.input.mtz ] elif params.input.labels.mtz_regex: m_labels = [ re.findall(params.input.labels.mtz_regex, f)[0] for f in params.input.mtz ] else: m_labels = [ 'MTZ-{:06d}'.format(i) for i in range(len(params.input.mtz)) ] except: print 'Error reading file: {}'.format(f) raise # Check labels are unique set_m_labels = set(m_labels) set_p_labels = set(p_labels) if len(set_m_labels) != len(m_labels): raise Sorry('MTZ labels are not unique. Repeated labels: {}'.format( ' '.join([ '{}'.format(l) for l in set_m_labels if m_labels.count(l) != 1 ]))) if len(set_p_labels) != len(p_labels): raise Sorry('PDB labels are not unique. Repeated labels: {}'.format( ' '.join([l for l in set_p_labels if p_labels.count(l) != 1]))) # Report labels if p_labels: log.subheading('PDB Labels') log(', '.join(p_labels)) if m_labels: log.subheading('MTZ Labels') log(', '.join(m_labels)) # Load crystal summaries log.bar(True, True) log('Reading data for {} pdb(s) and {} mtz(s)'.format( len(params.input.pdb), len(params.input.mtz))) if params.input.pdb: pdb_summaries = [ CrystalSummary.from_pdb(pdb_file=f, id=lab) for f, lab in zip(params.input.pdb, p_labels) ] else: pdb_summaries = [] if params.input.mtz: mtz_summaries = [ CrystalSummary.from_mtz(mtz_file=f, id=lab) for f, lab in zip(params.input.mtz, m_labels) ] else: mtz_summaries = [] # Group by SpaceGroup log.subheading('Grouping {} crystals by space group...'.format( len(pdb_summaries + mtz_summaries))) crystal_groups = CrystalGroup.by_space_group(crystals=pdb_summaries + mtz_summaries) log('Grouped crystals into {} space groups'.format(len(crystal_groups))) log.heading('Analysing variation of unit cells for each space group') for cg in crystal_groups: sg_name = 'sg-{}'.format(cg.space_groups[0].split(' (')[0].replace( ' ', '_')) log.subheading('Space Group {}: {} dataset(s)'.format( cg.space_groups[0], len(cg.crystals))) log('Unit Cell Variation:') log(numpy.round(cg.uc_stats.as_pandas_table().T, 2)) log('') log('Making unit cell dendrogram for all crystals with this spacegroup' ) if len(cg.crystals) > 1: cg.dendrogram(fname=os.path.join(img_dir, '{}-all.png'.format(sg_name)), xlab='Crystal', ylab='Linear Cell Variation', annotate_y_min=params.clustering.label_nodes_above) log('') log('Clustering {} unit cells...'.format(len(cg.crystals))) sg_crystal_groups = cg.by_unit_cell( cg.crystals, cutoff=params.clustering.lcv_cutoff) log('Clustered crystals into {} groups'.format(len(sg_crystal_groups))) for i_cg2, cg2 in enumerate(sg_crystal_groups): cluster_name = '{}-cluster-{}'.format(sg_name, i_cg2 + 1) log.bar(True, False) log('Processing cluster: {}'.format(cluster_name)) log.bar(False, True) log('Unit Cell Variation:') log(numpy.round(cg.uc_stats.as_pandas_table().T, 2)) log('') log('Making unit cell dendrogram for this cluster of crystals') if len(cg2.crystals) > 1: cg2.dendrogram( fname=os.path.join(img_dir, '{}.png'.format(cluster_name)), xlab='Crystal', ylab='Linear Cell Variation', ylim=(0, params.clustering.lcv_cutoff), annotate_y_min=params.clustering.label_nodes_above) log('Copying files to output directory') # Go through and link the datasets for each of the spacegroups into a separate folder sub_dir = os.path.join(params.output.out_dir, cluster_name) if not os.path.exists(sub_dir): os.mkdir(sub_dir) # Split the mtzs and pdbs into separate directories -- or not if params.output.split_pdbs_and_mtzs: mtz_dir = os.path.join(sub_dir, 'mtzs') if not os.path.exists(mtz_dir): os.mkdir(mtz_dir) pdb_dir = os.path.join(sub_dir, 'pdbs') if not os.path.exists(pdb_dir): os.mkdir(pdb_dir) else: mtz_dir = pdb_dir = sub_dir for c in cg2.crystals: # Set parameters based on pdb or mtz if c.mtz_file: sub_sub_dir = os.path.join(mtz_dir, c.id) def_file = os.path.abspath(c.mtz_file) def_suff = '.mtz' pos_suff = '.pdb' elif c.pdb_file: sub_sub_dir = os.path.join(pdb_dir, c.id) def_file = os.path.abspath(c.pdb_file) def_suff = '.pdb' pos_suff = '.mtz' # Create subdirectory if not os.path.exists(sub_sub_dir): os.mkdir(sub_sub_dir) # Output file base template out_base = os.path.join(sub_sub_dir, c.id) # Export file out_file = out_base + def_suff if not os.path.exists(out_file): out_file_func(def_file, out_file) # output other as well if filenames are the same pos_file = def_file.replace(def_suff, pos_suff) out_file = out_base + pos_suff if os.path.exists(pos_file) and not os.path.exists(out_file): out_file_func(pos_file, out_file) log.heading('finished')
def run(params): # Identify any existing output directories current_dirs = sorted(glob.glob(params.output.dir_prefix + "*")) if not current_dirs: next_int = 1 else: current_nums = [ s.replace(params.output.dir_prefix, "") for s in current_dirs ] next_int = sorted(map(int, current_nums))[-1] + 1 # Create output directory name from int out_dir = params.output.dir_prefix + "{:04}".format(next_int) # Create output directory os.mkdir(out_dir) # Create log object log = Log( log_file=os.path.join(out_dir, params.output.out_prefix + ".quick-refine.log"), verbose=params.settings.verbose, ) # Report if current_dirs: log("Found existing refinement directories: \n\t{}".format( "\n\t".join(current_dirs))) log("") log("Creating new output directory: {}".format(out_dir)) # Validate input parameters log.subheading("Validating input parameters") assert params.input.pdb is not None, "No PDB given for refinement" assert params.input.mtz is not None, "No MTZ given for refinement" if os.path.islink(params.input.mtz): log("Converting mtz path to real path:") log("{} -> {}".format(params.input.mtz, os.path.realpath(params.input.mtz))) params.input.mtz = os.path.realpath(params.input.mtz) # Link input log("Copying/linking files to refinement folder") shutil.copy(params.input.pdb, os.path.abspath(os.path.join(out_dir, "input.pdb"))) rel_symlink(params.input.mtz, os.path.abspath(os.path.join(out_dir, "input.mtz"))) # Copy parameter file to output folder if params.input.params: shutil.copy(params.input.params, os.path.abspath(os.path.join(out_dir, "input.params"))) # Create output prefixes output_prefix = out_dir log("Real output file path prefixes: {}".format(output_prefix)) log("Link output file path prefixes: {}".format(params.output.link_prefix)) # Create command objects log.subheading("Preparing command line input for refinement program") # PHENIX if params.options.program == "phenix": cm = CommandManager("phenix.refine") # Command line args cm.add_command_line_arguments([params.input.pdb, params.input.mtz]) cm.add_command_line_arguments( ["output.prefix={}".format(output_prefix)]) if params.input.cif: cm.add_command_line_arguments(params.input.cif) if params.input.params and os.path.exists(params.input.params): cm.add_command_line_arguments([params.input.params]) # REFMAC elif params.options.program == "refmac": cm = CommandManager("refmac5") # Command line args cm.add_command_line_arguments( ["xyzin", params.input.pdb, "hklin", params.input.mtz]) cm.add_command_line_arguments([ "xyzout", output_prefix + ".pdb", "hklout", output_prefix + ".mtz" ]) if params.input.cif: for cif in params.input.cif: cm.add_command_line_arguments(["libin", cif]) # Standard input if params.input.params: cm.add_standard_input(open(params.input.params).read().split("\n")) cm.add_standard_input(["END"]) # Pass additional command line arguments? if params.input.args: cm.add_command_line_arguments(params.input.args) # Report log(str(cm)) log.bar() log("running refinement... ({})".format(cm.program[0])) out = cm.run() log.subheading("Refinement output") if not log.verbose: log("output written to log file ({} lines)".format( cm.output.count("\n"))) log("\n" + cm.output, show=False) if out != 0: log.subheading("Refinement Errors") log(cm.error) log.subheading("Post-processing output files") # Find output files try: real_pdb = os.path.join(output_prefix, params.output.out_prefix + ".pdb") real_mtz = os.path.join(output_prefix, params.output.out_prefix + ".mtz") print(real_pdb, "\n", real_mtz) except: log("Refinement has failed - output files do not exist") log("{}: {}".format(output_prefix + "*.pdb", glob.glob(output_prefix + "*.pdb"))) log("{}: {}".format(output_prefix + "*.mtz", glob.glob(output_prefix + "*.mtz"))) raise # List of links to make at the end of the run link_file_pairs = [ (real_pdb, params.output.link_prefix + ".pdb"), (real_mtz, params.output.link_prefix + ".mtz"), ] print(link_file_pairs) # Split conformations if params.options.split_conformations: params.split_conformations.settings.verbose = params.settings.verbose log.subheading("Splitting refined structure conformations") # Running split conformations out_files = split_conformations.split_conformations( filename=real_pdb, params=params.split_conformations, log=log) # Link output files to top for real_file in out_files: link_file = params.output.link_prefix + os.path.basename( real_file.replace(os.path.splitext(real_pdb)[0], "")) link_file_pairs.append([real_file, link_file]) # Link output files log.subheading("linking output files") for real_file, link_file in link_file_pairs: log("Linking {} -> {}".format(link_file, real_file)) if not os.path.exists(real_file): log("file does not exist: {}".format(real_file)) continue if os.path.exists(link_file) and os.path.islink(link_file): log("removing existing link: {}".format(link_file)) os.unlink(link_file) if not os.path.exists(link_file): rel_symlink(real_file, link_file) log.heading("finished - refinement")
def merge_complementary_hierarchies(hierarchy_1, hierarchy_2, prune_duplicates_rmsd=0.1, in_place=False, verbose=False, log=None): """Merge hierarchies that are alternate models of the same crystal by expanding alternate model conformations, merging, and then trimming alternate conformations where possible""" if log is None: log = Log(verbose=True) # Alter the original files? if not in_place: # Copy the hierarchies hierarchy_1 = hierarchy_1.deep_copy() hierarchy_2 = hierarchy_2.deep_copy() # Sort the atoms hierarchy_1.sort_atoms_in_place() hierarchy_2.sort_atoms_in_place() log.heading('Preparing to merge structures') log.subheading( 'Explicitly expanding models to all conformations of the crystal') log('Expanding alternate conformations in structure 1') expand_alternate_conformations(hierarchy=hierarchy_1, in_place=True, verbose=verbose) log('Expanding alternate conformations in structure 2') expand_alternate_conformations(hierarchy=hierarchy_2, in_place=True, verbose=verbose) log.subheading( 'Applying conformer shift to the second structure before merging') log('Identifying the altloc shift required from the number of alternate conformers in structure 1' ) conf_offset = find_next_conformer_idx( hierarchy=hierarchy_1, all_ids=iotbx.pdb.systematic_chain_ids()) log('Incrementing all altlocs in structure 2 by {}'.format(conf_offset)) increment_altlocs(hierarchy=hierarchy_2, offset=conf_offset, in_place=True, verbose=verbose) log.subheading('Renaming residues that do not align between structures') resolve_residue_id_clashes(fixed_hierarchy=hierarchy_1, moving_hierarchy=hierarchy_2, in_place=True, verbose=verbose) log.heading('Merging structures') log('Transferring residues from Structure 2 to Structure 1') transfer_residue_groups_from_other(acceptor_hierarchy=hierarchy_1, donor_hierarchy=hierarchy_2, in_place=True, verbose=verbose) log.heading('Post-processing structure') log('Pruning unneccessary multi-conformer residues in the merged structure' ) prune_redundant_alternate_conformations( hierarchy=hierarchy_1, required_altlocs=hierarchy_1.altloc_indices(), rmsd_cutoff=prune_duplicates_rmsd, in_place=True, verbose=verbose) return hierarchy_1
def run(params): # Create log file log = Log(log_file=params.output.log, verbose=True) # Report log.heading('Validating input parameters and input files') # Check one or other have been provided if (params.input.major or params.input.minor ) and not (params.input.pdb == [None] or params.input.pdb == []): raise Exception( 'Have provided input.major & input.minor, as well as files to input.pdb. Specify either input.major & input.minor, or two input.pdb.' ) # Assign files to major and minor if necessary if not (params.input.major and params.input.minor): if len(params.input.pdb) != 2: raise Exception('Must provide zero or two pdb files to input.pdb') params.input.major = params.input.pdb[0] params.input.minor = params.input.pdb[1] # Check files exist if not os.path.exists(params.input.major): raise Exception('input.major does not exist: {}'.format( params.input.major)) if not os.path.exists(params.input.minor): raise Exception('input.minor does not exist: {}'.format( params.input.minor)) # Just check again... assert params.input.major assert params.input.minor assert params.output.pdb # Check existence of output pdb and delete as necessary if os.path.exists(params.output.pdb): if params.settings.overwrite: os.remove(params.output.pdb) else: raise Exception( 'Output file already exists: {}. Run with overwrite=True to remove this file' .format(params.output.pdb)) # Check that the input occupancies are valid if (params.options.minor_occupancy > 1.0) or (params.options.major_occupancy > 1.0): raise Exception( 'minor_occupancy and major_occupancy cannot be greater than 1.0 (currently {} and {})' .format(params.options.minor_occupancy, params.options.major_occupancy)) # Report validated parameters log.subheading('Processed merging parameters') for obj in master_phil.format(params).objects: if obj.name == 'restraints': continue log(obj.as_str().strip()) # Read in the ligand file and set each residue to the requested conformer log.subheading('Reading input files') maj_obj = strip_pdb_to_input(params.input.major, remove_ter=True) min_obj = strip_pdb_to_input(params.input.minor, remove_ter=True) # Check that ... something try: maj_obj.hierarchy.only_model() min_obj.hierarchy.only_model() except: raise Sorry('Input structures may only have one model') # Multiply the input hierarchies by occupancy multipliers log.subheading('Updating input occupancies prior to merging') log('Multiplying occupancies of input.major by {}'.format( params.options.major_occupancy)) maj_obj.hierarchy.atoms().set_occ(maj_obj.hierarchy.atoms().extract_occ() * params.options.major_occupancy) log('Multiplying occupancies of input.minor by {}'.format( params.options.minor_occupancy)) min_obj.hierarchy.atoms().set_occ(min_obj.hierarchy.atoms().extract_occ() * params.options.minor_occupancy) # Merge the hierarchies final_struct = merge_complementary_hierarchies( hierarchy_1=maj_obj.hierarchy, hierarchy_2=min_obj.hierarchy, prune_duplicates_rmsd=params.options.prune_duplicates_rmsd, in_place=True, verbose=params.settings.verbose) # Set output occupancies log.subheading('Post-processing occupancies') # Set all main-conf occupancies to 1.0 log('Setting all main-conf occupancies to 1.0') set_conformer_occupancy(hierarchy=final_struct, altlocs=[''], occupancy=1.0, in_place=True, verbose=params.settings.verbose) # Reset occupancies if required if params.options.reset_all_occupancies: # Calculate number of altlocs and associated occupancy altlocs = [a for a in final_struct.altloc_indices() if a] if altlocs: new_occ = 1.0 / len(altlocs) # Set the occupancies log('Setting all conformer ({}) occupancies to {}'.format( ','.join(altlocs), new_occ)) set_conformer_occupancy(hierarchy=final_struct, altlocs=altlocs, occupancy=new_occ, in_place=True, verbose=params.settings.verbose) # Update the atoms numbering final_struct.sort_atoms_in_place() final_struct.atoms_reset_serial() # Write output file log('Writing output structure to {}'.format(params.output.pdb)) final_struct.write_pdb_file(file_name=params.output.pdb, crystal_symmetry=maj_obj.crystal_symmetry()) # Run the restraint generation for the merged structure if requested if params.output.make_restraints: # Transfer the other phil objects from the master phil r_params = make_restraints.master_phil.extract() for name, obj in r_params.__dict__.items(): if name.startswith('_'): continue if name not in params.restraints.__dict__: params.restraints.__inject__(name, obj) # Apply the output of merging to input of restraints params.restraints.input.pdb = params.output.pdb # Rename output files to be in same folder as output structure if params.restraints.output.phenix: params.restraints.output.phenix = os.path.join( os.path.dirname(params.output.pdb), os.path.basename(params.restraints.output.phenix)) if params.restraints.output.refmac: params.restraints.output.refmac = os.path.join( os.path.dirname(params.output.pdb), os.path.basename(params.restraints.output.refmac)) # Set log file name to this program if one given if params.output.log: params.restraints.output.log = params.output.log elif params.restraints.output.log: params.restraints.output.log = os.path.join( os.path.dirname(params.output.pdb), os.path.basename(params.restraints.output.log)) # Which alternate conformations to generate restraints for params.restraints.local_restraints.altlocs = ','.join( [a for a in min_obj.hierarchy.altloc_indices() if a]) # Update settigns params.restraints.settings.verbose = params.settings.verbose params.restraints.settings.overwrite = params.settings.overwrite # Report log.heading('Parameters for generating restraints') log(master_phil.format(params).get('restraints').as_str().strip()) log.heading('Generating restraints') # Run make_restraints make_restraints.run(params.restraints) log.heading('FINISHED') log.heading('Final Parameters') log(master_phil.format(params).as_str().strip()) return
def run(params): log = Log(log_file=params.output.log_file, verbose=True) # Process MTZs if params.input.mtz: log.heading('Processing {} MTZ Files'.format(len(params.input.mtz))) if params.input.file_label=='filename': labels = [os.path.basename(os.path.splitext(f)[0]) for f in params.input.mtz] elif params.input.file_label=='foldername': labels = [os.path.basename(os.path.dirname(f)) for f in params.input.mtz] else: raise Exception('MTZ labelling function not supported: {}'.format(params.input.file_label)) log.bar() log('Grouping {} mtz files by space group'.format(len(params.input.mtz))) crystal_groups = CrystalGroup.by_space_group(crystals=[CrystalSummary.from_mtz(mtz_file=f, id=lab) for f,lab in zip(params.input.mtz, labels)]) log('> Clustered into {} space group(s)'.format(len(crystal_groups))) log.bar() for cg in crystal_groups: log.subheading('Space group {} - {} datasets'.format(','.join(cg.space_groups), len(cg.crystals))) error = False for c in cg.crystals: for label in params.check_for.column_label: if label is None: continue if label not in c.column_labels: log('Checking: column "{}" not in diffraction data of {}. columns present are {}'.format(label, c.mtz_file, c.column_labels)) for label in params.summary.column_label: if label is None: continue if label not in c.column_labels: log('Required: column "{}" not in diffraction data of {}. columns present are {}'.format(label, c.mtz_file, c.column_labels)) error = True if error is True: raise Sorry('There are datasets that do not contain the right columns.') log(crystal_statistics('Wavelength', cg.crystals, value_func=lambda c: c.mtz_object().crystals()[1].datasets()[0].wavelength(), header=True)) log(crystal_statistics('Resolution (high)', cg.crystals, value_func=lambda c: c.high_res, header=False)) log(crystal_statistics('Resolution (low)', cg.crystals, value_func=lambda c: c.low_res, header=False)) log(crystal_statistics('Unit cell - vol', cg.crystals, value_func=lambda c: c.unit_cell.volume(), header=False)) log(crystal_statistics('Unit cell - a', cg.crystals, value_func=lambda c: c.unit_cell.parameters()[0], header=False)) log(crystal_statistics('Unit cell - b', cg.crystals, value_func=lambda c: c.unit_cell.parameters()[1], header=False)) log(crystal_statistics('Unit cell - c', cg.crystals, value_func=lambda c: c.unit_cell.parameters()[2], header=False)) log(crystal_statistics('Unit cell - alpha', cg.crystals, value_func=lambda c: c.unit_cell.parameters()[3], header=False)) log(crystal_statistics('Unit cell - beta', cg.crystals, value_func=lambda c: c.unit_cell.parameters()[4], header=False)) log(crystal_statistics('Unit cell - gamma', cg.crystals, value_func=lambda c: c.unit_cell.parameters()[5], header=False, footer=True)) for label in params.summary.column_label: if label is None: continue log(crystal_statistics('Column: {}'.format(label), cg.crystals, value_func=lambda c: c.mtz_object().get_column(label).n_valid_values(), header=False, footer=True)) log.bar(True, False) log('Smallest + Largest Values') log.bar() log(crystal_min_max('Resolution', cg.crystals, value_func=lambda c: c.high_res)) # Process PDBs if params.input.pdb: log.heading('Processing {} PDB Files'.format(len(params.input.pdb))) if params.input.file_label=='filename': labels = [os.path.basename(os.path.splitext(f)[0]) for f in params.input.pdb] elif params.input.file_label=='foldername': labels = [os.path.basename(os.path.dirname(f)) for f in params.input.pdb] else: raise Exception('PDB labelling function not supported: {}'.format(params.input.file_label)) log.bar() log('Grouping {} pdb files by space group'.format(len(params.input.pdb))) crystal_groups = CrystalGroup.by_space_group(crystals=[CrystalSummary.from_pdb(pdb_file=f, id=lab) for f,lab in zip(params.input.pdb, labels)]) log('> Clustered into {} space group(s)'.format(len(crystal_groups))) for cg in crystal_groups: log.subheading('Space group: {} - {} datasets'.format(','.join(cg.space_groups), len(cg.crystals))) log(crystal_statistics('R-work', cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_work, header=True)) log(crystal_statistics('R-free', cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_free, header=False, footer=True)) log.bar(True, False) log('Smallest + Largest Values') log.bar() log(crystal_min_max('R-free', cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_free)) log.heading('finished')