Ejemplo n.º 1
0
def standardise_multiconformer_model(hierarchy,
                                     pruning_rmsd=0.1,
                                     in_place=False,
                                     verbose=False,
                                     log=None):
    """Standardise hierarchies by expanding alternate model conformations, and then trimming alternate conformations where possible"""

    if log is None: log = Log(verbose=True)

    # Alter the original files?
    if not in_place:
        # Copy the hierarchies
        hierarchy = hierarchy.deep_copy()

    # Sort the atoms
    hierarchy.sort_atoms_in_place()

    log.heading('Preparing to standardise structure')

    log.subheading(
        'Explicitly expanding model to all conformations of the crystal')
    expand_alternate_conformations(hierarchy=hierarchy,
                                   in_place=True,
                                   verbose=verbose)

    log.subheading(
        'Pruning unneccessary multi-conformer residues in the expanded structure'
    )
    prune_redundant_alternate_conformations(
        hierarchy=hierarchy,
        required_altlocs=hierarchy.altloc_indices(),
        rmsd_cutoff=pruning_rmsd,
        in_place=True,
        verbose=verbose)

    return hierarchy
Ejemplo n.º 2
0
def run(params):

    # Create log object
    log = Log(log_file=os.path.abspath(params.output.log_file), verbose=True)

    # Change paths to absolute paths
    params.input.pandda_dir = os.path.abspath(params.input.pandda_dir)
    params.output.export_dir = os.path.abspath(params.output.export_dir)
    # Must be in the pandda directory (pandda objects use relative paths)
    os.chdir(params.input.pandda_dir)

    # Report modifed phil
    log.heading('Processed parameters')
    log(master_phil.format(params).as_str())

    ############################################################################

    log.heading('Identifying folders to export')

    # Find the dataset directories to be exported
    if params.input.select_datasets:
        selected_datasets = []
        [
            selected_datasets.extend(s.split(','))
            for s in params.input.select_datasets
        ]
        export_dirs = sorted([
            os.path.join(params.input.pandda_dir, 'processed_datasets', p)
            for p in selected_datasets
        ])
        # Filter by existence of path
        export_dirs = [p for p in export_dirs if os.path.exists(p)]
    else:
        export_dirs = sorted(
            glob.glob(
                os.path.join(params.input.pandda_dir, 'processed_datasets',
                             '*')))
    assert export_dirs, 'No Export Directories Found'

    # Report
    log('Exporting:\n\t' + '\n\t'.join(export_dirs))

    # Create output directory
    if not os.path.exists(params.output.export_dir):
        os.mkdir(params.output.export_dir)

    # Merge the fitted structures
    for dir in export_dirs:
        process_and_export_folder(dir=dir, params=params, log=log)

    log.heading('FINISHED')
Ejemplo n.º 3
0
def run(params):

    # Create log file
    log = Log(log_file=params.output.log, verbose=True)

    log.heading('Validating input parameters')

    assert params.input.pdb, 'No PDB files given'

    log.heading('Splitting multi-state structures')

    # Iterate through the input structures and extract the conformation
    for pdb in params.input.pdb:
        split_conformations(filename=pdb, params=params, log=log)

    log.heading('FINISHED')
Ejemplo n.º 4
0
def run(params):

    # Create log file
    log = Log(log_file=params.output.log, verbose=True)

    # Report
    log.heading('Validating input parameters and input files')

    # Check one or other have been provided
    assert params.input.pdb, 'No pdb files have been provided'
    for pdb in params.input.pdb:
        if not os.path.exists(pdb):
            raise Sorry('pdb does not exist: {}'.format(pdb))

    for pdb in params.input.pdb:

        log.subheading('Reading pdb: {}'.format(pdb))
        obj = strip_pdb_to_input(pdb, remove_ter=True)
        try:
            obj.hierarchy.only_model()
        except:
            raise Sorry('Input structures may only have one model')

        # Merge the hierarchies
        final = standardise_multiconformer_model(
            hierarchy=obj.hierarchy,
            pruning_rmsd=params.options.pruning_rmsd,
            in_place=True,
            verbose=params.settings.verbose)

        # Update the atoms numbering
        final.sort_atoms_in_place()

        # Write output file
        filename = os.path.splitext(pdb)[0] + params.output.suffix + '.pdb'
        log('Writing output structure to {}'.format(filename))
        final.write_pdb_file(file_name=filename,
                             crystal_symmetry=obj.crystal_symmetry())

    log.heading('FINISHED')
    log.heading('Final Parameters')
    log(master_phil.format(params).as_str().strip())

    return
Ejemplo n.º 5
0
def run(params):

    # Identify any existing output directories
    current_dirs = sorted(glob.glob(params.output.dir_prefix + '*'))
    if not current_dirs:
        next_int = 1
    else:
        current_nums = [
            s.replace(params.output.dir_prefix, '') for s in current_dirs
        ]
        next_int = sorted(map(int, current_nums))[-1] + 1

    # Create output directory name from int
    out_dir = params.output.dir_prefix + '{:04}'.format(next_int)
    # Create output directory
    os.mkdir(out_dir)

    # Create log object
    log = Log(log_file=os.path.join(
        out_dir, params.output.out_prefix + '.quick-refine.log'),
              verbose=params.settings.verbose)

    # Report
    if current_dirs:
        log('Found existing refinement directories: \n\t{}'.format(
            '\n\t'.join(current_dirs)))
        log('')
    log('Creating new output directory: {}'.format(out_dir))

    # Validate input parameters
    log.subheading('Validating input parameters')
    assert params.input.pdb is not None, 'No PDB given for refinement'
    assert params.input.mtz is not None, 'No MTZ given for refinement'

    if os.path.islink(params.input.mtz):
        log('Converting mtz path to real path:')
        log('{} -> {}'.format(params.input.mtz,
                              os.path.realpath(params.input.mtz)))
        params.input.mtz = os.path.realpath(params.input.mtz)

    # Link input
    log('Copying/linking files to refinement folder')
    shutil.copy(params.input.pdb,
                os.path.abspath(os.path.join(out_dir, 'input.pdb')))
    rel_symlink(params.input.mtz,
                os.path.abspath(os.path.join(out_dir, 'input.mtz')))
    # Copy parameter file to output folder
    if params.input.params:
        shutil.copy(params.input.params,
                    os.path.abspath(os.path.join(out_dir, 'input.params')))

    # Create output prefixes
    output_prefix = os.path.join(out_dir, params.output.out_prefix)
    log('Real output file path prefixes: {}'.format(output_prefix))
    log('Link output file path prefixes: {}'.format(params.output.link_prefix))

    # Create command objects
    log.subheading('Preparing command line input for refinement program')

    # PHENIX
    if params.options.program == 'phenix':
        cm = CommandManager('phenix.refine')
        # Command line args
        cm.add_command_line_arguments([params.input.pdb, params.input.mtz])
        cm.add_command_line_arguments(
            ['output.prefix={}'.format(output_prefix)])
        if params.input.cif:
            cm.add_command_line_arguments(params.input.cif)
        if params.input.params and os.path.exists(params.input.params):
            cm.add_command_line_arguments([params.input.params])

    # REFMAC
    elif params.options.program == 'refmac':
        cm = CommandManager('refmac5')
        # Command line args
        cm.add_command_line_arguments(
            ['xyzin', params.input.pdb, 'hklin', params.input.mtz])

        cm.add_command_line_arguments([
            'xyzout', output_prefix + '.pdb', 'hklout', output_prefix + '.mtz'
        ])
        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(['libin', cif])
        # Standard input
        if params.input.params:
            cm.add_standard_input(open(params.input.params).read().split('\n'))

        cm.add_standard_input(['END'])

    elif params.options.program == "buster":
        cm = CommandManager('refine')
        # Command line arguments
        # inputs
        cm.add_command_line_arguments(
            ['-p', params.input.pdb, '-m', params.input.mtz, '-d', out_dir])

        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(['-l', cif])

        if params.input.params:
            cm.add_command_line_arguments(['-Gelly', params.input.params])

    # Pass additional command line arguments?
    if params.input.args:
        cm.add_command_line_arguments(params.input.args)

    # Report
    log(str(cm))

    log.bar()
    log('running refinement... ({})'.format(cm.program[0]))
    out = cm.run()

    log.subheading('Refinement output')
    if not log.verbose:
        log('output written to log file ({} lines)'.format(
            cm.output.count('\n')))

    log('\n' + cm.output, show=False)

    if out != 0:
        log.subheading('Refinement Errors')
        log(cm.error)

    log.subheading('Post-processing output files')

    if params.options.program == "buster":
        log.subheading('Renaming buster output files')

        shutil.move(src=os.path.join(out_dir, 'refine.pdb'),
                    dst=output_prefix + '.pdb')

        shutil.move(src=os.path.join(out_dir, 'refine.mtz'),
                    dst=output_prefix + '.mtz')

    # Find output files
    try:
        real_pdb = glob.glob(output_prefix + '*.pdb')[0]
        real_mtz = glob.glob(output_prefix + '*.mtz')[0]
    except:
        log('Refinement has failed - output files do not exist')
        log('{}: {}'.format(output_prefix + '*.pdb',
                            glob.glob(output_prefix + '*.pdb')))
        log('{}: {}'.format(output_prefix + '*.mtz',
                            glob.glob(output_prefix + '*.mtz')))
        raise

    # List of links to make at the end of the run
    link_file_pairs = [(real_pdb, params.output.link_prefix + '.pdb'),
                       (real_mtz, params.output.link_prefix + '.mtz')]

    # Split conformations
    if params.options.split_conformations:
        params.split_conformations.settings.verbose = params.settings.verbose
        log.subheading('Splitting refined structure conformations')
        # Running split conformations
        out_files = split_conformations.split_conformations(
            filename=real_pdb, params=params.split_conformations, log=log)
        # Link output files to top
        for real_file in out_files:
            link_file = params.output.link_prefix + os.path.basename(
                real_file.replace(os.path.splitext(real_pdb)[0], ''))
            link_file_pairs.append([real_file, link_file])

    # Link output files
    log.subheading('linking output files')
    for real_file, link_file in link_file_pairs:
        log('Linking {} -> {}'.format(link_file, real_file))
        if not os.path.exists(real_file):
            log('file does not exist: {}'.format(real_file))
            continue
        if os.path.exists(link_file) and os.path.islink(link_file):
            log('removing existing link: {}'.format(link_file))
            os.unlink(link_file)
        if not os.path.exists(link_file):
            rel_symlink(real_file, link_file)

    log.heading('finished - refinement')
Ejemplo n.º 6
0
def run(params):

    # Validate input files
    if not (params.input.pdb or params.input.mtz):
        raise Sorry(
            'No pdb/mtz files have been provided: specify with input.pdb or input.mtz'
        )
    # Check and create output directory
    if not params.output.out_dir:
        raise Sorry(
            'No output directory has been specified: specify with output.out_dir'
        )
    if not os.path.exists(params.output.out_dir):
        os.mkdir(params.output.out_dir)
    # Define and create image directory
    img_dir = os.path.join(params.output.out_dir, 'dendrograms')
    if not os.path.exists(img_dir):
        os.mkdir(img_dir)

    # Create log object
    log = Log(log_file=params.output.out_dir + '.clustering.log', verbose=True)

    # Define output_file_function to copy or symlink files as needed
    if params.output.file_mode == 'symlink':
        out_file_func = os.symlink
    elif params.output.file_mode == 'copy':
        out_file_func = shutil.copy

    log.heading('Processing input pdb/mtz files')
    log('Making dataset labels for {} pdb(s) and {} mtz(s)'.format(
        len(params.input.pdb), len(params.input.mtz)))

    try:
        if params.input.labels.pdb_label == 'filename':
            p_labels = [
                os.path.basename(os.path.splitext(f)[0])
                for f in params.input.pdb
            ]
        elif params.input.labels.pdb_label == 'foldername':
            p_labels = [
                os.path.basename(os.path.dirname(f)) for f in params.input.pdb
            ]
        elif params.input.labels.pdb_regex:
            p_labels = [
                re.findall(params.input.labels.pdb_regex, f)[0]
                for f in params.input.pdb
            ]
        else:
            p_labels = [
                'PDB-{:06d}'.format(i) for i in range(len(params.input.pdb))
            ]
        if params.input.labels.mtz_label == 'filename':
            m_labels = [
                os.path.basename(os.path.splitext(f)[0])
                for f in params.input.mtz
            ]
        elif params.input.labels.mtz_label == 'foldername':
            m_labels = [
                os.path.basename(os.path.dirname(f)) for f in params.input.mtz
            ]
        elif params.input.labels.mtz_regex:
            m_labels = [
                re.findall(params.input.labels.mtz_regex, f)[0]
                for f in params.input.mtz
            ]
        else:
            m_labels = [
                'MTZ-{:06d}'.format(i) for i in range(len(params.input.mtz))
            ]
    except:
        print 'Error reading file: {}'.format(f)
        raise

    # Check labels are unique
    set_m_labels = set(m_labels)
    set_p_labels = set(p_labels)
    if len(set_m_labels) != len(m_labels):
        raise Sorry('MTZ labels are not unique. Repeated labels: {}'.format(
            ' '.join([
                '{}'.format(l) for l in set_m_labels if m_labels.count(l) != 1
            ])))
    if len(set_p_labels) != len(p_labels):
        raise Sorry('PDB labels are not unique. Repeated labels: {}'.format(
            ' '.join([l for l in set_p_labels if p_labels.count(l) != 1])))

    # Report labels
    if p_labels:
        log.subheading('PDB Labels')
        log(', '.join(p_labels))
    if m_labels:
        log.subheading('MTZ Labels')
        log(', '.join(m_labels))

    # Load crystal summaries
    log.bar(True, True)
    log('Reading data for {} pdb(s) and {} mtz(s)'.format(
        len(params.input.pdb), len(params.input.mtz)))

    if params.input.pdb:
        pdb_summaries = [
            CrystalSummary.from_pdb(pdb_file=f, id=lab)
            for f, lab in zip(params.input.pdb, p_labels)
        ]
    else:
        pdb_summaries = []
    if params.input.mtz:
        mtz_summaries = [
            CrystalSummary.from_mtz(mtz_file=f, id=lab)
            for f, lab in zip(params.input.mtz, m_labels)
        ]
    else:
        mtz_summaries = []

    # Group by SpaceGroup
    log.subheading('Grouping {} crystals by space group...'.format(
        len(pdb_summaries + mtz_summaries)))
    crystal_groups = CrystalGroup.by_space_group(crystals=pdb_summaries +
                                                 mtz_summaries)
    log('Grouped crystals into {} space groups'.format(len(crystal_groups)))

    log.heading('Analysing variation of unit cells for each space group')

    for cg in crystal_groups:

        sg_name = 'sg-{}'.format(cg.space_groups[0].split(' (')[0].replace(
            ' ', '_'))

        log.subheading('Space Group {}: {} dataset(s)'.format(
            cg.space_groups[0], len(cg.crystals)))

        log('Unit Cell Variation:')
        log(numpy.round(cg.uc_stats.as_pandas_table().T, 2))

        log('')
        log('Making unit cell dendrogram for all crystals with this spacegroup'
            )
        if len(cg.crystals) > 1:
            cg.dendrogram(fname=os.path.join(img_dir,
                                             '{}-all.png'.format(sg_name)),
                          xlab='Crystal',
                          ylab='Linear Cell Variation',
                          annotate_y_min=params.clustering.label_nodes_above)

        log('')
        log('Clustering {} unit cells...'.format(len(cg.crystals)))
        sg_crystal_groups = cg.by_unit_cell(
            cg.crystals, cutoff=params.clustering.lcv_cutoff)
        log('Clustered crystals into {} groups'.format(len(sg_crystal_groups)))

        for i_cg2, cg2 in enumerate(sg_crystal_groups):

            cluster_name = '{}-cluster-{}'.format(sg_name, i_cg2 + 1)

            log.bar(True, False)
            log('Processing cluster: {}'.format(cluster_name))
            log.bar(False, True)

            log('Unit Cell Variation:')
            log(numpy.round(cg.uc_stats.as_pandas_table().T, 2))

            log('')
            log('Making unit cell dendrogram for this cluster of crystals')
            if len(cg2.crystals) > 1:
                cg2.dendrogram(
                    fname=os.path.join(img_dir, '{}.png'.format(cluster_name)),
                    xlab='Crystal',
                    ylab='Linear Cell Variation',
                    ylim=(0, params.clustering.lcv_cutoff),
                    annotate_y_min=params.clustering.label_nodes_above)

            log('Copying files to output directory')

            # Go through and link the datasets for each of the spacegroups into a separate folder
            sub_dir = os.path.join(params.output.out_dir, cluster_name)
            if not os.path.exists(sub_dir): os.mkdir(sub_dir)

            # Split the mtzs and pdbs into separate directories -- or not
            if params.output.split_pdbs_and_mtzs:
                mtz_dir = os.path.join(sub_dir, 'mtzs')
                if not os.path.exists(mtz_dir): os.mkdir(mtz_dir)
                pdb_dir = os.path.join(sub_dir, 'pdbs')
                if not os.path.exists(pdb_dir): os.mkdir(pdb_dir)
            else:
                mtz_dir = pdb_dir = sub_dir

            for c in cg2.crystals:
                # Set parameters based on pdb or mtz
                if c.mtz_file:
                    sub_sub_dir = os.path.join(mtz_dir, c.id)
                    def_file = os.path.abspath(c.mtz_file)
                    def_suff = '.mtz'
                    pos_suff = '.pdb'
                elif c.pdb_file:
                    sub_sub_dir = os.path.join(pdb_dir, c.id)
                    def_file = os.path.abspath(c.pdb_file)
                    def_suff = '.pdb'
                    pos_suff = '.mtz'
                # Create subdirectory
                if not os.path.exists(sub_sub_dir): os.mkdir(sub_sub_dir)
                # Output file base template
                out_base = os.path.join(sub_sub_dir, c.id)
                # Export file
                out_file = out_base + def_suff
                if not os.path.exists(out_file):
                    out_file_func(def_file, out_file)
                # output other as well if filenames are the same
                pos_file = def_file.replace(def_suff, pos_suff)
                out_file = out_base + pos_suff
                if os.path.exists(pos_file) and not os.path.exists(out_file):
                    out_file_func(pos_file, out_file)

    log.heading('finished')
Ejemplo n.º 7
0
def run(params):
    # Identify any existing output directories
    current_dirs = sorted(glob.glob(params.output.dir_prefix + "*"))
    if not current_dirs:
        next_int = 1
    else:
        current_nums = [
            s.replace(params.output.dir_prefix, "") for s in current_dirs
        ]
        next_int = sorted(map(int, current_nums))[-1] + 1

    # Create output directory name from int
    out_dir = params.output.dir_prefix + "{:04}".format(next_int)
    # Create output directory
    os.mkdir(out_dir)

    # Create log object
    log = Log(
        log_file=os.path.join(out_dir,
                              params.output.out_prefix + ".quick-refine.log"),
        verbose=params.settings.verbose,
    )

    # Report
    if current_dirs:
        log("Found existing refinement directories: \n\t{}".format(
            "\n\t".join(current_dirs)))
        log("")
    log("Creating new output directory: {}".format(out_dir))

    # Validate input parameters
    log.subheading("Validating input parameters")
    assert params.input.pdb is not None, "No PDB given for refinement"
    assert params.input.mtz is not None, "No MTZ given for refinement"

    if os.path.islink(params.input.mtz):
        log("Converting mtz path to real path:")
        log("{} -> {}".format(params.input.mtz,
                              os.path.realpath(params.input.mtz)))
        params.input.mtz = os.path.realpath(params.input.mtz)

    # Link input
    log("Copying/linking files to refinement folder")
    shutil.copy(params.input.pdb,
                os.path.abspath(os.path.join(out_dir, "input.pdb")))
    rel_symlink(params.input.mtz,
                os.path.abspath(os.path.join(out_dir, "input.mtz")))
    # Copy parameter file to output folder
    if params.input.params:
        shutil.copy(params.input.params,
                    os.path.abspath(os.path.join(out_dir, "input.params")))

    # Create output prefixes

    output_prefix = out_dir

    log("Real output file path prefixes: {}".format(output_prefix))
    log("Link output file path prefixes: {}".format(params.output.link_prefix))

    # Create command objects
    log.subheading("Preparing command line input for refinement program")

    # PHENIX
    if params.options.program == "phenix":
        cm = CommandManager("phenix.refine")
        # Command line args
        cm.add_command_line_arguments([params.input.pdb, params.input.mtz])
        cm.add_command_line_arguments(
            ["output.prefix={}".format(output_prefix)])
        if params.input.cif:
            cm.add_command_line_arguments(params.input.cif)
        if params.input.params and os.path.exists(params.input.params):
            cm.add_command_line_arguments([params.input.params])

    # REFMAC
    elif params.options.program == "refmac":
        cm = CommandManager("refmac5")
        # Command line args
        cm.add_command_line_arguments(
            ["xyzin", params.input.pdb, "hklin", params.input.mtz])
        cm.add_command_line_arguments([
            "xyzout", output_prefix + ".pdb", "hklout", output_prefix + ".mtz"
        ])
        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(["libin", cif])
        # Standard input
        if params.input.params:
            cm.add_standard_input(open(params.input.params).read().split("\n"))

        cm.add_standard_input(["END"])

    # Pass additional command line arguments?
    if params.input.args:
        cm.add_command_line_arguments(params.input.args)

    # Report
    log(str(cm))

    log.bar()
    log("running refinement... ({})".format(cm.program[0]))
    out = cm.run()

    log.subheading("Refinement output")
    if not log.verbose:
        log("output written to log file ({} lines)".format(
            cm.output.count("\n")))

    log("\n" + cm.output, show=False)

    if out != 0:
        log.subheading("Refinement Errors")
        log(cm.error)

    log.subheading("Post-processing output files")

    # Find output files
    try:
        real_pdb = os.path.join(output_prefix,
                                params.output.out_prefix + ".pdb")
        real_mtz = os.path.join(output_prefix,
                                params.output.out_prefix + ".mtz")

        print(real_pdb, "\n", real_mtz)

    except:
        log("Refinement has failed - output files do not exist")
        log("{}: {}".format(output_prefix + "*.pdb",
                            glob.glob(output_prefix + "*.pdb")))
        log("{}: {}".format(output_prefix + "*.mtz",
                            glob.glob(output_prefix + "*.mtz")))
        raise

    # List of links to make at the end of the run
    link_file_pairs = [
        (real_pdb, params.output.link_prefix + ".pdb"),
        (real_mtz, params.output.link_prefix + ".mtz"),
    ]

    print(link_file_pairs)

    # Split conformations
    if params.options.split_conformations:
        params.split_conformations.settings.verbose = params.settings.verbose
        log.subheading("Splitting refined structure conformations")
        # Running split conformations
        out_files = split_conformations.split_conformations(
            filename=real_pdb, params=params.split_conformations, log=log)
        # Link output files to top
        for real_file in out_files:
            link_file = params.output.link_prefix + os.path.basename(
                real_file.replace(os.path.splitext(real_pdb)[0], ""))
            link_file_pairs.append([real_file, link_file])

    # Link output files
    log.subheading("linking output files")
    for real_file, link_file in link_file_pairs:
        log("Linking {} -> {}".format(link_file, real_file))
        if not os.path.exists(real_file):
            log("file does not exist: {}".format(real_file))
            continue
        if os.path.exists(link_file) and os.path.islink(link_file):
            log("removing existing link: {}".format(link_file))
            os.unlink(link_file)
        if not os.path.exists(link_file):
            rel_symlink(real_file, link_file)

    log.heading("finished - refinement")
Ejemplo n.º 8
0
def merge_complementary_hierarchies(hierarchy_1,
                                    hierarchy_2,
                                    prune_duplicates_rmsd=0.1,
                                    in_place=False,
                                    verbose=False,
                                    log=None):
    """Merge hierarchies that are alternate models of the same crystal by expanding alternate model conformations, merging, and then trimming alternate conformations where possible"""

    if log is None: log = Log(verbose=True)

    # Alter the original files?
    if not in_place:
        # Copy the hierarchies
        hierarchy_1 = hierarchy_1.deep_copy()
        hierarchy_2 = hierarchy_2.deep_copy()

    # Sort the atoms
    hierarchy_1.sort_atoms_in_place()
    hierarchy_2.sort_atoms_in_place()

    log.heading('Preparing to merge structures')

    log.subheading(
        'Explicitly expanding models to all conformations of the crystal')
    log('Expanding alternate conformations in structure 1')
    expand_alternate_conformations(hierarchy=hierarchy_1,
                                   in_place=True,
                                   verbose=verbose)
    log('Expanding alternate conformations in structure 2')
    expand_alternate_conformations(hierarchy=hierarchy_2,
                                   in_place=True,
                                   verbose=verbose)
    log.subheading(
        'Applying conformer shift to the second structure before merging')
    log('Identifying the altloc shift required from the number of alternate conformers in structure 1'
        )
    conf_offset = find_next_conformer_idx(
        hierarchy=hierarchy_1, all_ids=iotbx.pdb.systematic_chain_ids())
    log('Incrementing all altlocs in structure 2 by {}'.format(conf_offset))
    increment_altlocs(hierarchy=hierarchy_2,
                      offset=conf_offset,
                      in_place=True,
                      verbose=verbose)
    log.subheading('Renaming residues that do not align between structures')
    resolve_residue_id_clashes(fixed_hierarchy=hierarchy_1,
                               moving_hierarchy=hierarchy_2,
                               in_place=True,
                               verbose=verbose)

    log.heading('Merging structures')

    log('Transferring residues from Structure 2 to Structure 1')
    transfer_residue_groups_from_other(acceptor_hierarchy=hierarchy_1,
                                       donor_hierarchy=hierarchy_2,
                                       in_place=True,
                                       verbose=verbose)

    log.heading('Post-processing structure')

    log('Pruning unneccessary multi-conformer residues in the merged structure'
        )
    prune_redundant_alternate_conformations(
        hierarchy=hierarchy_1,
        required_altlocs=hierarchy_1.altloc_indices(),
        rmsd_cutoff=prune_duplicates_rmsd,
        in_place=True,
        verbose=verbose)

    return hierarchy_1
Ejemplo n.º 9
0
def run(params):

    # Create log file
    log = Log(log_file=params.output.log, verbose=True)

    # Report
    log.heading('Validating input parameters and input files')

    # Check one or other have been provided
    if (params.input.major or params.input.minor
        ) and not (params.input.pdb == [None] or params.input.pdb == []):
        raise Exception(
            'Have provided input.major & input.minor, as well as files to input.pdb. Specify either input.major & input.minor, or two input.pdb.'
        )
    # Assign files to major and minor if necessary
    if not (params.input.major and params.input.minor):
        if len(params.input.pdb) != 2:
            raise Exception('Must provide zero or two pdb files to input.pdb')
        params.input.major = params.input.pdb[0]
        params.input.minor = params.input.pdb[1]
    # Check files exist
    if not os.path.exists(params.input.major):
        raise Exception('input.major does not exist: {}'.format(
            params.input.major))
    if not os.path.exists(params.input.minor):
        raise Exception('input.minor does not exist: {}'.format(
            params.input.minor))
    # Just check again...
    assert params.input.major
    assert params.input.minor
    assert params.output.pdb
    # Check existence of output pdb and delete as necessary
    if os.path.exists(params.output.pdb):
        if params.settings.overwrite:
            os.remove(params.output.pdb)
        else:
            raise Exception(
                'Output file already exists: {}. Run with overwrite=True to remove this file'
                .format(params.output.pdb))

    # Check that the input occupancies are valid
    if (params.options.minor_occupancy >
            1.0) or (params.options.major_occupancy > 1.0):
        raise Exception(
            'minor_occupancy and major_occupancy cannot be greater than 1.0 (currently {} and {})'
            .format(params.options.minor_occupancy,
                    params.options.major_occupancy))

    # Report validated parameters
    log.subheading('Processed merging parameters')
    for obj in master_phil.format(params).objects:
        if obj.name == 'restraints': continue
        log(obj.as_str().strip())

    # Read in the ligand file and set each residue to the requested conformer
    log.subheading('Reading input files')
    maj_obj = strip_pdb_to_input(params.input.major, remove_ter=True)
    min_obj = strip_pdb_to_input(params.input.minor, remove_ter=True)

    # Check that ... something
    try:
        maj_obj.hierarchy.only_model()
        min_obj.hierarchy.only_model()
    except:
        raise Sorry('Input structures may only have one model')

    # Multiply the input hierarchies by occupancy multipliers
    log.subheading('Updating input occupancies prior to merging')
    log('Multiplying occupancies of input.major by {}'.format(
        params.options.major_occupancy))
    maj_obj.hierarchy.atoms().set_occ(maj_obj.hierarchy.atoms().extract_occ() *
                                      params.options.major_occupancy)
    log('Multiplying occupancies of input.minor by {}'.format(
        params.options.minor_occupancy))
    min_obj.hierarchy.atoms().set_occ(min_obj.hierarchy.atoms().extract_occ() *
                                      params.options.minor_occupancy)

    # Merge the hierarchies
    final_struct = merge_complementary_hierarchies(
        hierarchy_1=maj_obj.hierarchy,
        hierarchy_2=min_obj.hierarchy,
        prune_duplicates_rmsd=params.options.prune_duplicates_rmsd,
        in_place=True,
        verbose=params.settings.verbose)

    # Set output occupancies
    log.subheading('Post-processing occupancies')
    # Set all main-conf occupancies to 1.0
    log('Setting all main-conf occupancies to 1.0')
    set_conformer_occupancy(hierarchy=final_struct,
                            altlocs=[''],
                            occupancy=1.0,
                            in_place=True,
                            verbose=params.settings.verbose)
    # Reset occupancies if required
    if params.options.reset_all_occupancies:
        # Calculate number of altlocs and associated occupancy
        altlocs = [a for a in final_struct.altloc_indices() if a]
        if altlocs:
            new_occ = 1.0 / len(altlocs)
            # Set the occupancies
            log('Setting all conformer ({}) occupancies to {}'.format(
                ','.join(altlocs), new_occ))
            set_conformer_occupancy(hierarchy=final_struct,
                                    altlocs=altlocs,
                                    occupancy=new_occ,
                                    in_place=True,
                                    verbose=params.settings.verbose)

    # Update the atoms numbering
    final_struct.sort_atoms_in_place()
    final_struct.atoms_reset_serial()
    # Write output file
    log('Writing output structure to {}'.format(params.output.pdb))
    final_struct.write_pdb_file(file_name=params.output.pdb,
                                crystal_symmetry=maj_obj.crystal_symmetry())

    # Run the restraint generation for the merged structure if requested
    if params.output.make_restraints:

        # Transfer the other phil objects from the master phil
        r_params = make_restraints.master_phil.extract()
        for name, obj in r_params.__dict__.items():
            if name.startswith('_'): continue
            if name not in params.restraints.__dict__:
                params.restraints.__inject__(name, obj)

        # Apply the output of merging to input of restraints
        params.restraints.input.pdb = params.output.pdb
        # Rename output files to be in same folder as output structure
        if params.restraints.output.phenix:
            params.restraints.output.phenix = os.path.join(
                os.path.dirname(params.output.pdb),
                os.path.basename(params.restraints.output.phenix))
        if params.restraints.output.refmac:
            params.restraints.output.refmac = os.path.join(
                os.path.dirname(params.output.pdb),
                os.path.basename(params.restraints.output.refmac))
        # Set log file name to this program if one given
        if params.output.log:
            params.restraints.output.log = params.output.log
        elif params.restraints.output.log:
            params.restraints.output.log = os.path.join(
                os.path.dirname(params.output.pdb),
                os.path.basename(params.restraints.output.log))
        # Which alternate conformations to generate restraints for
        params.restraints.local_restraints.altlocs = ','.join(
            [a for a in min_obj.hierarchy.altloc_indices() if a])
        # Update settigns
        params.restraints.settings.verbose = params.settings.verbose
        params.restraints.settings.overwrite = params.settings.overwrite

        # Report
        log.heading('Parameters for generating restraints')
        log(master_phil.format(params).get('restraints').as_str().strip())
        log.heading('Generating restraints')
        # Run make_restraints
        make_restraints.run(params.restraints)

    log.heading('FINISHED')
    log.heading('Final Parameters')
    log(master_phil.format(params).as_str().strip())

    return
Ejemplo n.º 10
0
def run(params):

    log = Log(log_file=params.output.log_file, verbose=True)

    # Process MTZs
    if params.input.mtz:

        log.heading('Processing {} MTZ Files'.format(len(params.input.mtz)))

        if   params.input.file_label=='filename':   labels = [os.path.basename(os.path.splitext(f)[0]) for f in params.input.mtz]
        elif params.input.file_label=='foldername': labels = [os.path.basename(os.path.dirname(f)) for f in params.input.mtz]
        else: raise Exception('MTZ labelling function not supported: {}'.format(params.input.file_label))

        log.bar()
        log('Grouping {} mtz files by space group'.format(len(params.input.mtz)))
        crystal_groups = CrystalGroup.by_space_group(crystals=[CrystalSummary.from_mtz(mtz_file=f, id=lab) for f,lab in zip(params.input.mtz, labels)])
        log('> Clustered into {} space group(s)'.format(len(crystal_groups)))
        log.bar()

        for cg in crystal_groups:

            log.subheading('Space group {} - {} datasets'.format(','.join(cg.space_groups), len(cg.crystals)))

            error = False
            for c in cg.crystals:
                for label in params.check_for.column_label:
                    if label is None: continue
                    if label not in c.column_labels:
                        log('Checking: column "{}" not in diffraction data of {}. columns present are {}'.format(label, c.mtz_file, c.column_labels))
                for label in params.summary.column_label:
                    if label is None: continue
                    if label not in c.column_labels:
                        log('Required: column "{}" not in diffraction data of {}. columns present are {}'.format(label, c.mtz_file, c.column_labels))
                        error = True
            if error is True: raise Sorry('There are datasets that do not contain the right columns.')

            log(crystal_statistics('Wavelength',         cg.crystals, value_func=lambda c: c.mtz_object().crystals()[1].datasets()[0].wavelength(), header=True))
            log(crystal_statistics('Resolution (high)',  cg.crystals, value_func=lambda c: c.high_res,                                              header=False))
            log(crystal_statistics('Resolution (low)',   cg.crystals, value_func=lambda c: c.low_res,                                               header=False))
            log(crystal_statistics('Unit cell - vol',    cg.crystals, value_func=lambda c: c.unit_cell.volume(),                                    header=False))
            log(crystal_statistics('Unit cell - a',      cg.crystals, value_func=lambda c: c.unit_cell.parameters()[0],                             header=False))
            log(crystal_statistics('Unit cell - b',      cg.crystals, value_func=lambda c: c.unit_cell.parameters()[1],                             header=False))
            log(crystal_statistics('Unit cell - c',      cg.crystals, value_func=lambda c: c.unit_cell.parameters()[2],                             header=False))
            log(crystal_statistics('Unit cell - alpha',  cg.crystals, value_func=lambda c: c.unit_cell.parameters()[3],                             header=False))
            log(crystal_statistics('Unit cell - beta',   cg.crystals, value_func=lambda c: c.unit_cell.parameters()[4],                             header=False))
            log(crystal_statistics('Unit cell - gamma',  cg.crystals, value_func=lambda c: c.unit_cell.parameters()[5],                             header=False, footer=True))

            for label in params.summary.column_label:
                if label is None: continue
                log(crystal_statistics('Column: {}'.format(label), cg.crystals, value_func=lambda c: c.mtz_object().get_column(label).n_valid_values(),     header=False, footer=True))

            log.bar(True, False)
            log('Smallest + Largest Values')
            log.bar()

            log(crystal_min_max('Resolution', cg.crystals, value_func=lambda c: c.high_res))

    # Process PDBs
    if params.input.pdb:

        log.heading('Processing {} PDB Files'.format(len(params.input.pdb)))

        if   params.input.file_label=='filename':   labels = [os.path.basename(os.path.splitext(f)[0]) for f in params.input.pdb]
        elif params.input.file_label=='foldername': labels = [os.path.basename(os.path.dirname(f)) for f in params.input.pdb]
        else: raise Exception('PDB labelling function not supported: {}'.format(params.input.file_label))

        log.bar()
        log('Grouping {} pdb files by space group'.format(len(params.input.pdb)))
        crystal_groups = CrystalGroup.by_space_group(crystals=[CrystalSummary.from_pdb(pdb_file=f, id=lab) for f,lab in zip(params.input.pdb, labels)])
        log('> Clustered into {} space group(s)'.format(len(crystal_groups)))

        for cg in crystal_groups:

            log.subheading('Space group: {} - {} datasets'.format(','.join(cg.space_groups), len(cg.crystals)))

            log(crystal_statistics('R-work', cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_work, header=True))
            log(crystal_statistics('R-free', cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_free, header=False, footer=True))

            log.bar(True, False)
            log('Smallest + Largest Values')
            log.bar()

            log(crystal_min_max('R-free',     cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_free))

    log.heading('finished')