def load_reference_dataset(self, ref_pdb_path, ref_mtz_path): """Set the reference dataset, to which all other datasets will be aligned and scaled""" # ==============================> # Output links to reference files # ==============================> link_ref_pdb = ref_pdb_path link_ref_mtz = ref_mtz_path # ==============================> # Remove any old links to dataset # ==============================> if os.path.abspath(ref_pdb_path) != os.path.abspath(link_ref_pdb): if os.path.exists(link_ref_pdb): os.unlink(link_ref_pdb) if os.path.exists(link_ref_mtz): os.unlink(link_ref_mtz) # ==============================> # Create links to dataset # ==============================> if not os.path.exists(link_ref_pdb): rel_symlink(orig=ref_pdb_path, link=link_ref_pdb) if not os.path.exists(link_ref_mtz): rel_symlink(orig=ref_mtz_path, link=link_ref_mtz) # ==============================> # Create and set reference dataset # ==============================> # ref_dataset = PanddaReferenceDataset.from_file( # model_filename=os.path.relpath(link_ref_pdb, start=self.out_dir), # data_filename=os.path.relpath(link_ref_mtz, start=self.out_dir)).label(num=-1, tag='reference') ref_dataset = PanddaReferenceDataset.from_file( model_filename=str(link_ref_pdb), data_filename=str(link_ref_mtz)).label(num=-1, tag='reference') # ==============================> # Extract reference dataset SFs # ==============================> sf_cols = [[str(x) for x in self.reference_structure_factors.split(',')]] if self.reference_structure_factors \ else [[str(x) for x in sf.split(',')] for sf in self.structure_factors] # Record when a pair is found dataset_sfs = None # Extract mtz object from the reference dataset mtz_obj = ref_dataset.data.mtz_object() # Iterate through possible structure factor pairs for sf_pair in sf_cols: # Check that the data contains the appropriate column if mtz_obj.has_column(sf_pair[0]) and mtz_obj.has_column( sf_pair[1]): dataset_sfs = ','.join(sf_pair) break # Raise error if no columns are identified if dataset_sfs is None: raise Sorry( 'No matching structure factors were found in the reflection data for reference dataset. \n' + \ 'Looking for structure factors: \n\t{}\n'.format('\n\t'.join(map(' and '.join, sf_cols))) + \ 'Structure factors in this dataset: \n\t{}\n'.format('\n\t'.join(mtz_obj.column_labels())) + \ 'You may need to change the diffraction_data.structure_factors or the reference.structure_factors option.') # Store column labels for later ref_dataset.meta.column_labels = dataset_sfs # Load the diffraction data ref_dataset.data.miller_arrays[ dataset_sfs] = ref_dataset.data.get_structure_factors( columns=dataset_sfs) return ref_dataset
def run(params): # Identify any existing output directories current_dirs = sorted(glob.glob(params.output.dir_prefix + '*')) if not current_dirs: next_int = 1 else: current_nums = [ s.replace(params.output.dir_prefix, '') for s in current_dirs ] next_int = sorted(map(int, current_nums))[-1] + 1 # Create output directory name from int out_dir = params.output.dir_prefix + '{:04}'.format(next_int) # Create output directory os.mkdir(out_dir) # Create log object log = Log(log_file=os.path.join( out_dir, params.output.out_prefix + '.quick-refine.log'), verbose=params.settings.verbose) # Report if current_dirs: log('Found existing refinement directories: \n\t{}'.format( '\n\t'.join(current_dirs))) log('') log('Creating new output directory: {}'.format(out_dir)) # Validate input parameters log.subheading('Validating input parameters') assert params.input.pdb is not None, 'No PDB given for refinement' assert params.input.mtz is not None, 'No MTZ given for refinement' if os.path.islink(params.input.mtz): log('Converting mtz path to real path:') log('{} -> {}'.format(params.input.mtz, os.path.realpath(params.input.mtz))) params.input.mtz = os.path.realpath(params.input.mtz) # Link input log('Copying/linking files to refinement folder') shutil.copy(params.input.pdb, os.path.abspath(os.path.join(out_dir, 'input.pdb'))) rel_symlink(params.input.mtz, os.path.abspath(os.path.join(out_dir, 'input.mtz'))) # Copy parameter file to output folder if params.input.params: shutil.copy(params.input.params, os.path.abspath(os.path.join(out_dir, 'input.params'))) # Create output prefixes output_prefix = os.path.join(out_dir, params.output.out_prefix) log('Real output file path prefixes: {}'.format(output_prefix)) log('Link output file path prefixes: {}'.format(params.output.link_prefix)) # Create command objects log.subheading('Preparing command line input for refinement program') # PHENIX if params.options.program == 'phenix': cm = CommandManager('phenix.refine') # Command line args cm.add_command_line_arguments([params.input.pdb, params.input.mtz]) cm.add_command_line_arguments( ['output.prefix={}'.format(output_prefix)]) if params.input.cif: cm.add_command_line_arguments(params.input.cif) if params.input.params and os.path.exists(params.input.params): cm.add_command_line_arguments([params.input.params]) # REFMAC elif params.options.program == 'refmac': cm = CommandManager('refmac5') # Command line args cm.add_command_line_arguments( ['xyzin', params.input.pdb, 'hklin', params.input.mtz]) cm.add_command_line_arguments([ 'xyzout', output_prefix + '.pdb', 'hklout', output_prefix + '.mtz' ]) if params.input.cif: for cif in params.input.cif: cm.add_command_line_arguments(['libin', cif]) # Standard input if params.input.params: cm.add_standard_input(open(params.input.params).read().split('\n')) cm.add_standard_input(['END']) elif params.options.program == "buster": cm = CommandManager('refine') # Command line arguments # inputs cm.add_command_line_arguments( ['-p', params.input.pdb, '-m', params.input.mtz, '-d', out_dir]) if params.input.cif: for cif in params.input.cif: cm.add_command_line_arguments(['-l', cif]) if params.input.params: cm.add_command_line_arguments(['-Gelly', params.input.params]) # Pass additional command line arguments? if params.input.args: cm.add_command_line_arguments(params.input.args) # Report log(str(cm)) log.bar() log('running refinement... ({})'.format(cm.program[0])) out = cm.run() log.subheading('Refinement output') if not log.verbose: log('output written to log file ({} lines)'.format( cm.output.count('\n'))) log('\n' + cm.output, show=False) if out != 0: log.subheading('Refinement Errors') log(cm.error) log.subheading('Post-processing output files') if params.options.program == "buster": log.subheading('Renaming buster output files') shutil.move(src=os.path.join(out_dir, 'refine.pdb'), dst=output_prefix + '.pdb') shutil.move(src=os.path.join(out_dir, 'refine.mtz'), dst=output_prefix + '.mtz') # Find output files try: real_pdb = glob.glob(output_prefix + '*.pdb')[0] real_mtz = glob.glob(output_prefix + '*.mtz')[0] except: log('Refinement has failed - output files do not exist') log('{}: {}'.format(output_prefix + '*.pdb', glob.glob(output_prefix + '*.pdb'))) log('{}: {}'.format(output_prefix + '*.mtz', glob.glob(output_prefix + '*.mtz'))) raise # List of links to make at the end of the run link_file_pairs = [(real_pdb, params.output.link_prefix + '.pdb'), (real_mtz, params.output.link_prefix + '.mtz')] # Split conformations if params.options.split_conformations: params.split_conformations.settings.verbose = params.settings.verbose log.subheading('Splitting refined structure conformations') # Running split conformations out_files = split_conformations.split_conformations( filename=real_pdb, params=params.split_conformations, log=log) # Link output files to top for real_file in out_files: link_file = params.output.link_prefix + os.path.basename( real_file.replace(os.path.splitext(real_pdb)[0], '')) link_file_pairs.append([real_file, link_file]) # Link output files log.subheading('linking output files') for real_file, link_file in link_file_pairs: log('Linking {} -> {}'.format(link_file, real_file)) if not os.path.exists(real_file): log('file does not exist: {}'.format(real_file)) continue if os.path.exists(link_file) and os.path.islink(link_file): log('removing existing link: {}'.format(link_file)) os.unlink(link_file) if not os.path.exists(link_file): rel_symlink(real_file, link_file) log.heading('finished - refinement')
def __call__(self, mcd): # ==============================> # Clean previous PanDDA # ==============================> try: shutil.rmtree(self.out_dir, ignore_errors=True) except Exception as e: print(e) # ==============================> # Make output dir # ==============================> os.mkdir(str(self.out_dir)) # ==============================> # Get path objects # ==============================> dataset_template = {"event_map.ccp4": None, "ligand": {"dummy": None} } processed_datasets = {dtag: dataset_template for dtag, d in mcd.datasets.items()} analyses = {"pandda_analyse_events.csv": None} pandda = {"processed_datasets": processed_datasets, "analyses": analyses} # ==============================> # Initialise pandda output # ==============================> self.tree = Tree(str(self.out_dir), pandda) # ==============================> # Initialise dataset output # ==============================> errors = [] for dtag, dataset in mcd.datasets.items(): # ==============================> # Get path # ==============================> dataset_path = p.Path(self.tree(("processed_datasets", dtag))[0]) # ==============================> # Create links to input files # ==============================> # Links for the dataset input files # TODO: seems inelegant link_pdb = str(dataset_path / "pandda_input.pdb") link_mtz = str(dataset_path / "pandda_input.mtz") # Link the input files to the output folder if not os.path.exists(link_pdb): rel_symlink(orig=dataset.model.filename, link=link_pdb) if not os.path.exists(link_mtz): rel_symlink(orig=dataset.data.filename, link=link_mtz) # ==============================> # Search for ligand files and copy them to the output ligands folder # ==============================> lig_files = glob.glob(os.path.join(os.path.dirname(dataset.model.filename), self.lig_style)) for lig_file in lig_files: # Find all files with the same basename but allowing for different extensions. Then link to output folder. lig_base = os.path.splitext(lig_file)[0] + '.*' lig_matches = glob.glob(lig_base) for lig in lig_matches: out_path = os.path.join(str(dataset_path / 'ligand'), os.path.basename(lig)) if os.path.exists(lig) and (not os.path.exists(out_path)): try: shutil.copy(lig, out_path) except: pass # # ==============================> # # Lastly: Update the pointer to the new path (relative to the pandda directory) # # ==============================> # dataset.model.filename = os.path.relpath(link_pdb, start=self.out_dir) # dataset.data.filename = os.path.relpath(link_mtz, start=self.out_dir) return self.tree
def run(params): # Identify any existing output directories current_dirs = sorted(glob.glob(params.output.dir_prefix + "*")) if not current_dirs: next_int = 1 else: current_nums = [ s.replace(params.output.dir_prefix, "") for s in current_dirs ] next_int = sorted(map(int, current_nums))[-1] + 1 # Create output directory name from int out_dir = params.output.dir_prefix + "{:04}".format(next_int) # Create output directory os.mkdir(out_dir) # Create log object log = Log( log_file=os.path.join(out_dir, params.output.out_prefix + ".quick-refine.log"), verbose=params.settings.verbose, ) # Report if current_dirs: log("Found existing refinement directories: \n\t{}".format( "\n\t".join(current_dirs))) log("") log("Creating new output directory: {}".format(out_dir)) # Validate input parameters log.subheading("Validating input parameters") assert params.input.pdb is not None, "No PDB given for refinement" assert params.input.mtz is not None, "No MTZ given for refinement" if os.path.islink(params.input.mtz): log("Converting mtz path to real path:") log("{} -> {}".format(params.input.mtz, os.path.realpath(params.input.mtz))) params.input.mtz = os.path.realpath(params.input.mtz) # Link input log("Copying/linking files to refinement folder") shutil.copy(params.input.pdb, os.path.abspath(os.path.join(out_dir, "input.pdb"))) rel_symlink(params.input.mtz, os.path.abspath(os.path.join(out_dir, "input.mtz"))) # Copy parameter file to output folder if params.input.params: shutil.copy(params.input.params, os.path.abspath(os.path.join(out_dir, "input.params"))) # Create output prefixes output_prefix = out_dir log("Real output file path prefixes: {}".format(output_prefix)) log("Link output file path prefixes: {}".format(params.output.link_prefix)) # Create command objects log.subheading("Preparing command line input for refinement program") # PHENIX if params.options.program == "phenix": cm = CommandManager("phenix.refine") # Command line args cm.add_command_line_arguments([params.input.pdb, params.input.mtz]) cm.add_command_line_arguments( ["output.prefix={}".format(output_prefix)]) if params.input.cif: cm.add_command_line_arguments(params.input.cif) if params.input.params and os.path.exists(params.input.params): cm.add_command_line_arguments([params.input.params]) # REFMAC elif params.options.program == "refmac": cm = CommandManager("refmac5") # Command line args cm.add_command_line_arguments( ["xyzin", params.input.pdb, "hklin", params.input.mtz]) cm.add_command_line_arguments([ "xyzout", output_prefix + ".pdb", "hklout", output_prefix + ".mtz" ]) if params.input.cif: for cif in params.input.cif: cm.add_command_line_arguments(["libin", cif]) # Standard input if params.input.params: cm.add_standard_input(open(params.input.params).read().split("\n")) cm.add_standard_input(["END"]) # Pass additional command line arguments? if params.input.args: cm.add_command_line_arguments(params.input.args) # Report log(str(cm)) log.bar() log("running refinement... ({})".format(cm.program[0])) out = cm.run() log.subheading("Refinement output") if not log.verbose: log("output written to log file ({} lines)".format( cm.output.count("\n"))) log("\n" + cm.output, show=False) if out != 0: log.subheading("Refinement Errors") log(cm.error) log.subheading("Post-processing output files") # Find output files try: real_pdb = os.path.join(output_prefix, params.output.out_prefix + ".pdb") real_mtz = os.path.join(output_prefix, params.output.out_prefix + ".mtz") print(real_pdb, "\n", real_mtz) except: log("Refinement has failed - output files do not exist") log("{}: {}".format(output_prefix + "*.pdb", glob.glob(output_prefix + "*.pdb"))) log("{}: {}".format(output_prefix + "*.mtz", glob.glob(output_prefix + "*.mtz"))) raise # List of links to make at the end of the run link_file_pairs = [ (real_pdb, params.output.link_prefix + ".pdb"), (real_mtz, params.output.link_prefix + ".mtz"), ] print(link_file_pairs) # Split conformations if params.options.split_conformations: params.split_conformations.settings.verbose = params.settings.verbose log.subheading("Splitting refined structure conformations") # Running split conformations out_files = split_conformations.split_conformations( filename=real_pdb, params=params.split_conformations, log=log) # Link output files to top for real_file in out_files: link_file = params.output.link_prefix + os.path.basename( real_file.replace(os.path.splitext(real_pdb)[0], "")) link_file_pairs.append([real_file, link_file]) # Link output files log.subheading("linking output files") for real_file, link_file in link_file_pairs: log("Linking {} -> {}".format(link_file, real_file)) if not os.path.exists(real_file): log("file does not exist: {}".format(real_file)) continue if os.path.exists(link_file) and os.path.islink(link_file): log("removing existing link: {}".format(link_file)) os.unlink(link_file) if not os.path.exists(link_file): rel_symlink(real_file, link_file) log.heading("finished - refinement")
def symlink(source_path, target_path, ): rel_symlink(str(source_path), str(target_path), )