예제 #1
0
    def load_reference_dataset(self, ref_pdb_path, ref_mtz_path):
        """Set the reference dataset, to which all other datasets will be aligned and scaled"""

        # ==============================>
        # Output links to reference files
        # ==============================>
        link_ref_pdb = ref_pdb_path
        link_ref_mtz = ref_mtz_path
        # ==============================>
        # Remove any old links to dataset
        # ==============================>
        if os.path.abspath(ref_pdb_path) != os.path.abspath(link_ref_pdb):
            if os.path.exists(link_ref_pdb): os.unlink(link_ref_pdb)
            if os.path.exists(link_ref_mtz): os.unlink(link_ref_mtz)
        # ==============================>
        # Create links to dataset
        # ==============================>
        if not os.path.exists(link_ref_pdb):
            rel_symlink(orig=ref_pdb_path, link=link_ref_pdb)
        if not os.path.exists(link_ref_mtz):
            rel_symlink(orig=ref_mtz_path, link=link_ref_mtz)
        # ==============================>
        # Create and set reference dataset
        # ==============================>

        # ref_dataset = PanddaReferenceDataset.from_file(
        #     model_filename=os.path.relpath(link_ref_pdb, start=self.out_dir),
        #     data_filename=os.path.relpath(link_ref_mtz, start=self.out_dir)).label(num=-1, tag='reference')
        ref_dataset = PanddaReferenceDataset.from_file(
            model_filename=str(link_ref_pdb),
            data_filename=str(link_ref_mtz)).label(num=-1, tag='reference')

        # ==============================>
        # Extract reference dataset SFs
        # ==============================>
        sf_cols = [[str(x) for x in self.reference_structure_factors.split(',')]] if self.reference_structure_factors \
            else [[str(x) for x in sf.split(',')] for sf in self.structure_factors]

        # Record when a pair is found
        dataset_sfs = None
        # Extract mtz object from the reference dataset
        mtz_obj = ref_dataset.data.mtz_object()
        # Iterate through possible structure factor pairs
        for sf_pair in sf_cols:
            # Check that the data contains the appropriate column
            if mtz_obj.has_column(sf_pair[0]) and mtz_obj.has_column(
                    sf_pair[1]):
                dataset_sfs = ','.join(sf_pair)
                break
        # Raise error if no columns are identified
        if dataset_sfs is None:
            raise Sorry(
                'No matching structure factors were found in the reflection data for reference dataset. \n' + \
                'Looking for structure factors: \n\t{}\n'.format('\n\t'.join(map(' and '.join, sf_cols))) + \
                'Structure factors in this dataset: \n\t{}\n'.format('\n\t'.join(mtz_obj.column_labels())) + \
                'You may need to change the diffraction_data.structure_factors or the reference.structure_factors option.')
        # Store column labels for later
        ref_dataset.meta.column_labels = dataset_sfs
        # Load the diffraction data
        ref_dataset.data.miller_arrays[
            dataset_sfs] = ref_dataset.data.get_structure_factors(
                columns=dataset_sfs)

        return ref_dataset
예제 #2
0
def run(params):

    # Identify any existing output directories
    current_dirs = sorted(glob.glob(params.output.dir_prefix + '*'))
    if not current_dirs:
        next_int = 1
    else:
        current_nums = [
            s.replace(params.output.dir_prefix, '') for s in current_dirs
        ]
        next_int = sorted(map(int, current_nums))[-1] + 1

    # Create output directory name from int
    out_dir = params.output.dir_prefix + '{:04}'.format(next_int)
    # Create output directory
    os.mkdir(out_dir)

    # Create log object
    log = Log(log_file=os.path.join(
        out_dir, params.output.out_prefix + '.quick-refine.log'),
              verbose=params.settings.verbose)

    # Report
    if current_dirs:
        log('Found existing refinement directories: \n\t{}'.format(
            '\n\t'.join(current_dirs)))
        log('')
    log('Creating new output directory: {}'.format(out_dir))

    # Validate input parameters
    log.subheading('Validating input parameters')
    assert params.input.pdb is not None, 'No PDB given for refinement'
    assert params.input.mtz is not None, 'No MTZ given for refinement'

    if os.path.islink(params.input.mtz):
        log('Converting mtz path to real path:')
        log('{} -> {}'.format(params.input.mtz,
                              os.path.realpath(params.input.mtz)))
        params.input.mtz = os.path.realpath(params.input.mtz)

    # Link input
    log('Copying/linking files to refinement folder')
    shutil.copy(params.input.pdb,
                os.path.abspath(os.path.join(out_dir, 'input.pdb')))
    rel_symlink(params.input.mtz,
                os.path.abspath(os.path.join(out_dir, 'input.mtz')))
    # Copy parameter file to output folder
    if params.input.params:
        shutil.copy(params.input.params,
                    os.path.abspath(os.path.join(out_dir, 'input.params')))

    # Create output prefixes
    output_prefix = os.path.join(out_dir, params.output.out_prefix)
    log('Real output file path prefixes: {}'.format(output_prefix))
    log('Link output file path prefixes: {}'.format(params.output.link_prefix))

    # Create command objects
    log.subheading('Preparing command line input for refinement program')

    # PHENIX
    if params.options.program == 'phenix':
        cm = CommandManager('phenix.refine')
        # Command line args
        cm.add_command_line_arguments([params.input.pdb, params.input.mtz])
        cm.add_command_line_arguments(
            ['output.prefix={}'.format(output_prefix)])
        if params.input.cif:
            cm.add_command_line_arguments(params.input.cif)
        if params.input.params and os.path.exists(params.input.params):
            cm.add_command_line_arguments([params.input.params])

    # REFMAC
    elif params.options.program == 'refmac':
        cm = CommandManager('refmac5')
        # Command line args
        cm.add_command_line_arguments(
            ['xyzin', params.input.pdb, 'hklin', params.input.mtz])

        cm.add_command_line_arguments([
            'xyzout', output_prefix + '.pdb', 'hklout', output_prefix + '.mtz'
        ])
        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(['libin', cif])
        # Standard input
        if params.input.params:
            cm.add_standard_input(open(params.input.params).read().split('\n'))

        cm.add_standard_input(['END'])

    elif params.options.program == "buster":
        cm = CommandManager('refine')
        # Command line arguments
        # inputs
        cm.add_command_line_arguments(
            ['-p', params.input.pdb, '-m', params.input.mtz, '-d', out_dir])

        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(['-l', cif])

        if params.input.params:
            cm.add_command_line_arguments(['-Gelly', params.input.params])

    # Pass additional command line arguments?
    if params.input.args:
        cm.add_command_line_arguments(params.input.args)

    # Report
    log(str(cm))

    log.bar()
    log('running refinement... ({})'.format(cm.program[0]))
    out = cm.run()

    log.subheading('Refinement output')
    if not log.verbose:
        log('output written to log file ({} lines)'.format(
            cm.output.count('\n')))

    log('\n' + cm.output, show=False)

    if out != 0:
        log.subheading('Refinement Errors')
        log(cm.error)

    log.subheading('Post-processing output files')

    if params.options.program == "buster":
        log.subheading('Renaming buster output files')

        shutil.move(src=os.path.join(out_dir, 'refine.pdb'),
                    dst=output_prefix + '.pdb')

        shutil.move(src=os.path.join(out_dir, 'refine.mtz'),
                    dst=output_prefix + '.mtz')

    # Find output files
    try:
        real_pdb = glob.glob(output_prefix + '*.pdb')[0]
        real_mtz = glob.glob(output_prefix + '*.mtz')[0]
    except:
        log('Refinement has failed - output files do not exist')
        log('{}: {}'.format(output_prefix + '*.pdb',
                            glob.glob(output_prefix + '*.pdb')))
        log('{}: {}'.format(output_prefix + '*.mtz',
                            glob.glob(output_prefix + '*.mtz')))
        raise

    # List of links to make at the end of the run
    link_file_pairs = [(real_pdb, params.output.link_prefix + '.pdb'),
                       (real_mtz, params.output.link_prefix + '.mtz')]

    # Split conformations
    if params.options.split_conformations:
        params.split_conformations.settings.verbose = params.settings.verbose
        log.subheading('Splitting refined structure conformations')
        # Running split conformations
        out_files = split_conformations.split_conformations(
            filename=real_pdb, params=params.split_conformations, log=log)
        # Link output files to top
        for real_file in out_files:
            link_file = params.output.link_prefix + os.path.basename(
                real_file.replace(os.path.splitext(real_pdb)[0], ''))
            link_file_pairs.append([real_file, link_file])

    # Link output files
    log.subheading('linking output files')
    for real_file, link_file in link_file_pairs:
        log('Linking {} -> {}'.format(link_file, real_file))
        if not os.path.exists(real_file):
            log('file does not exist: {}'.format(real_file))
            continue
        if os.path.exists(link_file) and os.path.islink(link_file):
            log('removing existing link: {}'.format(link_file))
            os.unlink(link_file)
        if not os.path.exists(link_file):
            rel_symlink(real_file, link_file)

    log.heading('finished - refinement')
예제 #3
0
    def __call__(self, mcd):

        # ==============================>
        # Clean previous PanDDA
        # ==============================>
        try:
            shutil.rmtree(self.out_dir,
                          ignore_errors=True)
        except Exception as e:
            print(e)

        # ==============================>
        # Make output dir
        # ==============================>
        os.mkdir(str(self.out_dir))

        # ==============================>
        # Get path objects
        # ==============================>
        dataset_template = {"event_map.ccp4": None,
                            "ligand": {"dummy": None}
                            }

        processed_datasets = {dtag: dataset_template for dtag, d in mcd.datasets.items()}

        analyses = {"pandda_analyse_events.csv": None}

        pandda = {"processed_datasets": processed_datasets,
                  "analyses": analyses}

        # ==============================>
        # Initialise pandda output
        # ==============================>
        self.tree = Tree(str(self.out_dir), pandda)

        # ==============================>
        # Initialise dataset output
        # ==============================>
        errors = []
        for dtag, dataset in mcd.datasets.items():

            # ==============================>
            # Get path
            # ==============================>
            dataset_path = p.Path(self.tree(("processed_datasets", dtag))[0])

            # ==============================>
            # Create links to input files
            # ==============================>
            # Links for the dataset input files
            # TODO: seems inelegant
            link_pdb = str(dataset_path / "pandda_input.pdb")
            link_mtz = str(dataset_path / "pandda_input.mtz")

            # Link the input files to the output folder
            if not os.path.exists(link_pdb): rel_symlink(orig=dataset.model.filename, link=link_pdb)
            if not os.path.exists(link_mtz): rel_symlink(orig=dataset.data.filename, link=link_mtz)
            # ==============================>
            # Search for ligand files and copy them to the output ligands folder
            # ==============================>
            lig_files = glob.glob(os.path.join(os.path.dirname(dataset.model.filename), self.lig_style))
            for lig_file in lig_files:
                # Find all files with the same basename but allowing for different extensions. Then link to output folder.
                lig_base = os.path.splitext(lig_file)[0] + '.*'
                lig_matches = glob.glob(lig_base)
                for lig in lig_matches:
                    out_path = os.path.join(str(dataset_path / 'ligand'), os.path.basename(lig))
                    if os.path.exists(lig) and (not os.path.exists(out_path)):
                        try:
                            shutil.copy(lig, out_path)
                        except:
                            pass
            # # ==============================>
            # # Lastly: Update the pointer to the new path (relative to the pandda directory)
            # # ==============================>
            # dataset.model.filename = os.path.relpath(link_pdb, start=self.out_dir)
            # dataset.data.filename = os.path.relpath(link_mtz, start=self.out_dir)

        return self.tree
예제 #4
0
def run(params):
    # Identify any existing output directories
    current_dirs = sorted(glob.glob(params.output.dir_prefix + "*"))
    if not current_dirs:
        next_int = 1
    else:
        current_nums = [
            s.replace(params.output.dir_prefix, "") for s in current_dirs
        ]
        next_int = sorted(map(int, current_nums))[-1] + 1

    # Create output directory name from int
    out_dir = params.output.dir_prefix + "{:04}".format(next_int)
    # Create output directory
    os.mkdir(out_dir)

    # Create log object
    log = Log(
        log_file=os.path.join(out_dir,
                              params.output.out_prefix + ".quick-refine.log"),
        verbose=params.settings.verbose,
    )

    # Report
    if current_dirs:
        log("Found existing refinement directories: \n\t{}".format(
            "\n\t".join(current_dirs)))
        log("")
    log("Creating new output directory: {}".format(out_dir))

    # Validate input parameters
    log.subheading("Validating input parameters")
    assert params.input.pdb is not None, "No PDB given for refinement"
    assert params.input.mtz is not None, "No MTZ given for refinement"

    if os.path.islink(params.input.mtz):
        log("Converting mtz path to real path:")
        log("{} -> {}".format(params.input.mtz,
                              os.path.realpath(params.input.mtz)))
        params.input.mtz = os.path.realpath(params.input.mtz)

    # Link input
    log("Copying/linking files to refinement folder")
    shutil.copy(params.input.pdb,
                os.path.abspath(os.path.join(out_dir, "input.pdb")))
    rel_symlink(params.input.mtz,
                os.path.abspath(os.path.join(out_dir, "input.mtz")))
    # Copy parameter file to output folder
    if params.input.params:
        shutil.copy(params.input.params,
                    os.path.abspath(os.path.join(out_dir, "input.params")))

    # Create output prefixes

    output_prefix = out_dir

    log("Real output file path prefixes: {}".format(output_prefix))
    log("Link output file path prefixes: {}".format(params.output.link_prefix))

    # Create command objects
    log.subheading("Preparing command line input for refinement program")

    # PHENIX
    if params.options.program == "phenix":
        cm = CommandManager("phenix.refine")
        # Command line args
        cm.add_command_line_arguments([params.input.pdb, params.input.mtz])
        cm.add_command_line_arguments(
            ["output.prefix={}".format(output_prefix)])
        if params.input.cif:
            cm.add_command_line_arguments(params.input.cif)
        if params.input.params and os.path.exists(params.input.params):
            cm.add_command_line_arguments([params.input.params])

    # REFMAC
    elif params.options.program == "refmac":
        cm = CommandManager("refmac5")
        # Command line args
        cm.add_command_line_arguments(
            ["xyzin", params.input.pdb, "hklin", params.input.mtz])
        cm.add_command_line_arguments([
            "xyzout", output_prefix + ".pdb", "hklout", output_prefix + ".mtz"
        ])
        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(["libin", cif])
        # Standard input
        if params.input.params:
            cm.add_standard_input(open(params.input.params).read().split("\n"))

        cm.add_standard_input(["END"])

    # Pass additional command line arguments?
    if params.input.args:
        cm.add_command_line_arguments(params.input.args)

    # Report
    log(str(cm))

    log.bar()
    log("running refinement... ({})".format(cm.program[0]))
    out = cm.run()

    log.subheading("Refinement output")
    if not log.verbose:
        log("output written to log file ({} lines)".format(
            cm.output.count("\n")))

    log("\n" + cm.output, show=False)

    if out != 0:
        log.subheading("Refinement Errors")
        log(cm.error)

    log.subheading("Post-processing output files")

    # Find output files
    try:
        real_pdb = os.path.join(output_prefix,
                                params.output.out_prefix + ".pdb")
        real_mtz = os.path.join(output_prefix,
                                params.output.out_prefix + ".mtz")

        print(real_pdb, "\n", real_mtz)

    except:
        log("Refinement has failed - output files do not exist")
        log("{}: {}".format(output_prefix + "*.pdb",
                            glob.glob(output_prefix + "*.pdb")))
        log("{}: {}".format(output_prefix + "*.mtz",
                            glob.glob(output_prefix + "*.mtz")))
        raise

    # List of links to make at the end of the run
    link_file_pairs = [
        (real_pdb, params.output.link_prefix + ".pdb"),
        (real_mtz, params.output.link_prefix + ".mtz"),
    ]

    print(link_file_pairs)

    # Split conformations
    if params.options.split_conformations:
        params.split_conformations.settings.verbose = params.settings.verbose
        log.subheading("Splitting refined structure conformations")
        # Running split conformations
        out_files = split_conformations.split_conformations(
            filename=real_pdb, params=params.split_conformations, log=log)
        # Link output files to top
        for real_file in out_files:
            link_file = params.output.link_prefix + os.path.basename(
                real_file.replace(os.path.splitext(real_pdb)[0], ""))
            link_file_pairs.append([real_file, link_file])

    # Link output files
    log.subheading("linking output files")
    for real_file, link_file in link_file_pairs:
        log("Linking {} -> {}".format(link_file, real_file))
        if not os.path.exists(real_file):
            log("file does not exist: {}".format(real_file))
            continue
        if os.path.exists(link_file) and os.path.islink(link_file):
            log("removing existing link: {}".format(link_file))
            os.unlink(link_file)
        if not os.path.exists(link_file):
            rel_symlink(real_file, link_file)

    log.heading("finished - refinement")
예제 #5
0
def symlink(source_path,
            target_path,
            ):
    rel_symlink(str(source_path),
                str(target_path),
                )