def show_summary(self, log=None):
     if log is None: log = Log()
     log.subheading('Available datasets')
     for d in self.datasets:
         log.bar()
         d.show_summary(log=log)
     log.bar()
Exemple #2
0
def run(params):

    # Create log file
    log = Log(log_file=params.output.log, verbose=True)

    # Report
    log.heading('Validating input parameters and input files')

    # Check one or other have been provided
    assert params.input.pdb, 'No pdb files have been provided'
    for pdb in params.input.pdb:
        if not os.path.exists(pdb):
            raise Sorry('pdb does not exist: {}'.format(pdb))

    for pdb in params.input.pdb:

        log.subheading('Reading pdb: {}'.format(pdb))
        obj = strip_pdb_to_input(pdb, remove_ter=True)
        try:
            obj.hierarchy.only_model()
        except:
            raise Sorry('Input structures may only have one model')

        # Merge the hierarchies
        final = standardise_multiconformer_model(
            hierarchy=obj.hierarchy,
            pruning_rmsd=params.options.pruning_rmsd,
            in_place=True,
            verbose=params.settings.verbose)

        # Update the atoms numbering
        final.sort_atoms_in_place()

        # Write output file
        filename = os.path.splitext(pdb)[0] + params.output.suffix + '.pdb'
        log('Writing output structure to {}'.format(filename))
        final.write_pdb_file(file_name=filename,
                             crystal_symmetry=obj.crystal_symmetry())

    log.heading('FINISHED')
    log.heading('Final Parameters')
    log(master_phil.format(params).as_str().strip())

    return
Exemple #3
0
def make_local_restraints(params, input_hierarchy, log=None):
    """Create local restraints for a hierarchy"""

    if log is None: log = Log(verbose=True)

    log.subheading('Generating local structure restraints')

    atom_d_pairs = find_atoms_around_alternate_conformers(
        hierarchy=input_hierarchy.hierarchy,
        altlocs=params.local_restraints.altlocs.split(',')
        if params.local_restraints.altlocs else None,
        dist_cutoff=params.local_restraints.max_distance)
    # Filter the 0-distance restraints
    atom_d_pairs = [(a1, a2, d) for a1, a2, d in atom_d_pairs
                    if d > params.local_restraints.min_distance]

    log('Created {} local restraints for {} conformers with distance cutoff of {}-{}A'
        .format(
            len(atom_d_pairs), params.local_restraints.altlocs
            if params.local_restraints.altlocs else 'all',
            params.local_restraints.min_distance,
            params.local_restraints.max_distance))
    log('')

    if params.output.refmac:
        restraint_list = [
            RefmacFormatter.make_distance_restraint(
                atm_1=a1,
                atm_2=a2,
                value=d,
                sigma=params.local_restraints.sigma_xyz)
            for a1, a2, d in atom_d_pairs
        ]
        rest_block = RefmacFormatter.format_distance_restraints(
            restraint_list=restraint_list)
        with open(params.output.refmac, 'a') as fh:
            fh.write(rest_block + '\n')
        if params.settings.verbose:
            log.subheading('refmac local structural restraints')
            log(rest_block[:1000] + '...' * (len(rest_block) > 1000))
            log('')

    if params.output.phenix:
        restraint_list = [
            PhenixFormatter.make_distance_restraint(
                atm_1=a1,
                atm_2=a2,
                value=d,
                sigma=params.local_restraints.sigma_xyz)
            for a1, a2, d in atom_d_pairs
        ]
        rest_block = PhenixFormatter.format_distance_restraints(
            restraint_list=restraint_list)
        with open(params.output.phenix, 'a') as fh:
            fh.write(rest_block + '\n')
        if params.settings.verbose:
            log.subheading('phenix duplicate conformer restraints')
            log(rest_block[:1000] + '...' * (len(rest_block) > 1000))
            log('')
Exemple #4
0
def standardise_multiconformer_model(hierarchy,
                                     pruning_rmsd=0.1,
                                     in_place=False,
                                     verbose=False,
                                     log=None):
    """Standardise hierarchies by expanding alternate model conformations, and then trimming alternate conformations where possible"""

    if log is None: log = Log(verbose=True)

    # Alter the original files?
    if not in_place:
        # Copy the hierarchies
        hierarchy = hierarchy.deep_copy()

    # Sort the atoms
    hierarchy.sort_atoms_in_place()

    log.heading('Preparing to standardise structure')

    log.subheading(
        'Explicitly expanding model to all conformations of the crystal')
    expand_alternate_conformations(hierarchy=hierarchy,
                                   in_place=True,
                                   verbose=verbose)

    log.subheading(
        'Pruning unneccessary multi-conformer residues in the expanded structure'
    )
    prune_redundant_alternate_conformations(
        hierarchy=hierarchy,
        required_altlocs=hierarchy.altloc_indices(),
        rmsd_cutoff=pruning_rmsd,
        in_place=True,
        verbose=verbose)

    return hierarchy
Exemple #5
0
class BFactorRefinementFactory(object):

    _refine = refine_phenix

    def __init__(self,
                 pdb_file,
                 mtz_file,
                 out_dir,
                 cif_files=[],
                 tag=None,
                 tls_selections=None,
                 prefix='refined'):

        self.pdb_file = pdb_file
        self.mtz_file = mtz_file
        self.cif_files = cif_files
        self.out_dir = easy_directory(out_dir)
        self.tag = tag
        self.tls_selections = []
        self.tls_matrices = None

        self.initial_pdb = os.path.join(self.out_dir, 'initial.pdb')
        self.out_template = os.path.join(self.out_dir, prefix)

        shutil.copy(self.pdb_file, self.initial_pdb)

        self.log = Log(verbose=True)

        if not tls_selections:
            tls_selections = self.determine_tls_groups(pdb_file=pdb_file)

        # Sanitise the tls selections
        for tls in tls_selections:
            if tls.startswith('"') and tls.endswith('"'):
                tls = tls[1:-1]
            assert '\"' not in tls, 'TLS selection cannot include \": {}'.format(
                tls)
            self.tls_selections.append(tls)

    def determine_tls_groups(self, pdb_file):

        self.log.subheading('Determining TLS groups for: {}'.format(pdb_file))

        tls_selections = phenix_find_tls_groups(pdb_file)

        self.log.subheading('Identified TLS Selections:')
        for s in tls_selections:
            self.log(s)

        return tls_selections

#    def initial_tls_parameters(self):
#        """Characterise TLS with phenix.tls - legacy function"""
#
#        self.log.subheading('Fitting TLS Matrices to selections')
#        self.log('writing to output file: {}'.format(self.tls_initial_pdb))
#
#        cmd = CommandManager('phenix.tls')
#        cmd.add_command_line_arguments(self.pdb_file)
#        cmd.add_command_line_arguments(self.cif_files)
#        cmd.add_command_line_arguments('extract_tls=True')
#        cmd.add_command_line_arguments([r'selection="{}"'.format(s) for s in self.tls_selections if s is not None])
#        cmd.add_command_line_arguments('output_file_name={}'.format(self.tls_initial_pdb))
#
#        cmd.print_settings()
#        ret_code = cmd.run()
#        cmd.write_output(self.tls_initial_pdb.replace('.pdb', '.log'))
#
#        if ret_code != 0:
#            self.log(cmd.output)
#            self.log(cmd.error)
#            raise Exception('Failed to determine TLS parameters: {}'.format(' '.join(cmd.program)))
#
#        return self.tls_initial_pdb, self.extract_tls_from_pdb(self.tls_initial_pdb)

    def refine_b_factors(self, mode='tls', suffix=None):
        """Refine the model with phenix.refine, including the TLS model"""

        assert mode in ['isotropic', 'tls', 'anisotropic']

        if suffix is None: suffix = mode

        strategy = "individual_sites+individual_adp+occupancies"

        if mode == 'isotropic':
            strategy += ''
            params = [r'convert_to_isotropic=True']
        elif mode == 'tls':
            strategy += '+tls'
            params = [
                r'refinement.refine.adp.tls="{}"'.format(t)
                for t in self.tls_selections
            ]
        else:
            strategy += ''
            params = [
                r'refinement.refine.adp.individual.anisotropic="{}"'.format(
                    ' or '.join(['(' + t + ')' for t in self.tls_selections]))
            ]

        self.log.subheading('Refining B-factor model with {}'.format(
            self._refine.program))
        obj = self._refine(pdb_file=self.pdb_file,
                           mtz_file=self.mtz_file,
                           cif_file=self.cif_files,
                           out_prefix=self.out_template + '-' + suffix,
                           strategy=strategy,
                           n_cycles=3,
                           manual_args=params)

        return obj.out_pdb_file, obj.out_mtz_file

    @staticmethod
    def extract_tls_from_pdb(pdb_file):
        ih = iotbx.pdb.hierarchy.input(pdb_file)
        tls_params = ih.input.extract_tls_params(ih.hierarchy)
        return tls_params

    def show_tls_params(self, tls_params=None, pdb_file=None):
        if pdb_file: tls_params = self.extract_tls_from_pdb(pdb_file=pdb_file)
        T = tls_params.tls_params[0].t
        L = tls_params.tls_params[0].l
        S = tls_params.tls_params[0].s

        o = ""
        for tls in tls_params.tls_params:
            o += '\n'
            o += 'selection: {}\n'.format(tls.selection_string)
            o += 'origin: {}\n'.format(tls.origin)
            o += 'T: ' + str(tls.t) + '\n'
            o += 'L: ' + str(tls.l) + '\n'
            o += 'S: ' + str(tls.s) + '\n'
        o += '\n'
        self.log(o)
Exemple #6
0
def run(params):

    # Identify any existing output directories
    current_dirs = sorted(glob.glob(params.output.dir_prefix + '*'))
    if not current_dirs:
        next_int = 1
    else:
        current_nums = [
            s.replace(params.output.dir_prefix, '') for s in current_dirs
        ]
        next_int = sorted(map(int, current_nums))[-1] + 1

    # Create output directory name from int
    out_dir = params.output.dir_prefix + '{:04}'.format(next_int)
    # Create output directory
    os.mkdir(out_dir)

    # Create log object
    log = Log(log_file=os.path.join(
        out_dir, params.output.out_prefix + '.quick-refine.log'),
              verbose=params.settings.verbose)

    # Report
    if current_dirs:
        log('Found existing refinement directories: \n\t{}'.format(
            '\n\t'.join(current_dirs)))
        log('')
    log('Creating new output directory: {}'.format(out_dir))

    # Validate input parameters
    log.subheading('Validating input parameters')
    assert params.input.pdb is not None, 'No PDB given for refinement'
    assert params.input.mtz is not None, 'No MTZ given for refinement'

    if os.path.islink(params.input.mtz):
        log('Converting mtz path to real path:')
        log('{} -> {}'.format(params.input.mtz,
                              os.path.realpath(params.input.mtz)))
        params.input.mtz = os.path.realpath(params.input.mtz)

    # Link input
    log('Copying/linking files to refinement folder')
    shutil.copy(params.input.pdb,
                os.path.abspath(os.path.join(out_dir, 'input.pdb')))
    rel_symlink(params.input.mtz,
                os.path.abspath(os.path.join(out_dir, 'input.mtz')))
    # Copy parameter file to output folder
    if params.input.params:
        shutil.copy(params.input.params,
                    os.path.abspath(os.path.join(out_dir, 'input.params')))

    # Create output prefixes
    output_prefix = os.path.join(out_dir, params.output.out_prefix)
    log('Real output file path prefixes: {}'.format(output_prefix))
    log('Link output file path prefixes: {}'.format(params.output.link_prefix))

    # Create command objects
    log.subheading('Preparing command line input for refinement program')

    # PHENIX
    if params.options.program == 'phenix':
        cm = CommandManager('phenix.refine')
        # Command line args
        cm.add_command_line_arguments([params.input.pdb, params.input.mtz])
        cm.add_command_line_arguments(
            ['output.prefix={}'.format(output_prefix)])
        if params.input.cif:
            cm.add_command_line_arguments(params.input.cif)
        if params.input.params and os.path.exists(params.input.params):
            cm.add_command_line_arguments([params.input.params])

    # REFMAC
    elif params.options.program == 'refmac':
        cm = CommandManager('refmac5')
        # Command line args
        cm.add_command_line_arguments(
            ['xyzin', params.input.pdb, 'hklin', params.input.mtz])

        cm.add_command_line_arguments([
            'xyzout', output_prefix + '.pdb', 'hklout', output_prefix + '.mtz'
        ])
        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(['libin', cif])
        # Standard input
        if params.input.params:
            cm.add_standard_input(open(params.input.params).read().split('\n'))

        cm.add_standard_input(['END'])

    elif params.options.program == "buster":
        cm = CommandManager('refine')
        # Command line arguments
        # inputs
        cm.add_command_line_arguments(
            ['-p', params.input.pdb, '-m', params.input.mtz, '-d', out_dir])

        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(['-l', cif])

        if params.input.params:
            cm.add_command_line_arguments(['-Gelly', params.input.params])

    # Pass additional command line arguments?
    if params.input.args:
        cm.add_command_line_arguments(params.input.args)

    # Report
    log(str(cm))

    log.bar()
    log('running refinement... ({})'.format(cm.program[0]))
    out = cm.run()

    log.subheading('Refinement output')
    if not log.verbose:
        log('output written to log file ({} lines)'.format(
            cm.output.count('\n')))

    log('\n' + cm.output, show=False)

    if out != 0:
        log.subheading('Refinement Errors')
        log(cm.error)

    log.subheading('Post-processing output files')

    if params.options.program == "buster":
        log.subheading('Renaming buster output files')

        shutil.move(src=os.path.join(out_dir, 'refine.pdb'),
                    dst=output_prefix + '.pdb')

        shutil.move(src=os.path.join(out_dir, 'refine.mtz'),
                    dst=output_prefix + '.mtz')

    # Find output files
    try:
        real_pdb = glob.glob(output_prefix + '*.pdb')[0]
        real_mtz = glob.glob(output_prefix + '*.mtz')[0]
    except:
        log('Refinement has failed - output files do not exist')
        log('{}: {}'.format(output_prefix + '*.pdb',
                            glob.glob(output_prefix + '*.pdb')))
        log('{}: {}'.format(output_prefix + '*.mtz',
                            glob.glob(output_prefix + '*.mtz')))
        raise

    # List of links to make at the end of the run
    link_file_pairs = [(real_pdb, params.output.link_prefix + '.pdb'),
                       (real_mtz, params.output.link_prefix + '.mtz')]

    # Split conformations
    if params.options.split_conformations:
        params.split_conformations.settings.verbose = params.settings.verbose
        log.subheading('Splitting refined structure conformations')
        # Running split conformations
        out_files = split_conformations.split_conformations(
            filename=real_pdb, params=params.split_conformations, log=log)
        # Link output files to top
        for real_file in out_files:
            link_file = params.output.link_prefix + os.path.basename(
                real_file.replace(os.path.splitext(real_pdb)[0], ''))
            link_file_pairs.append([real_file, link_file])

    # Link output files
    log.subheading('linking output files')
    for real_file, link_file in link_file_pairs:
        log('Linking {} -> {}'.format(link_file, real_file))
        if not os.path.exists(real_file):
            log('file does not exist: {}'.format(real_file))
            continue
        if os.path.exists(link_file) and os.path.islink(link_file):
            log('removing existing link: {}'.format(link_file))
            os.unlink(link_file)
        if not os.path.exists(link_file):
            rel_symlink(real_file, link_file)

    log.heading('finished - refinement')
Exemple #7
0
def run(params):

    # Validate input files
    if not (params.input.pdb or params.input.mtz):
        raise Sorry(
            'No pdb/mtz files have been provided: specify with input.pdb or input.mtz'
        )
    # Check and create output directory
    if not params.output.out_dir:
        raise Sorry(
            'No output directory has been specified: specify with output.out_dir'
        )
    if not os.path.exists(params.output.out_dir):
        os.mkdir(params.output.out_dir)
    # Define and create image directory
    img_dir = os.path.join(params.output.out_dir, 'dendrograms')
    if not os.path.exists(img_dir):
        os.mkdir(img_dir)

    # Create log object
    log = Log(log_file=params.output.out_dir + '.clustering.log', verbose=True)

    # Define output_file_function to copy or symlink files as needed
    if params.output.file_mode == 'symlink':
        out_file_func = os.symlink
    elif params.output.file_mode == 'copy':
        out_file_func = shutil.copy

    log.heading('Processing input pdb/mtz files')
    log('Making dataset labels for {} pdb(s) and {} mtz(s)'.format(
        len(params.input.pdb), len(params.input.mtz)))

    try:
        if params.input.labels.pdb_label == 'filename':
            p_labels = [
                os.path.basename(os.path.splitext(f)[0])
                for f in params.input.pdb
            ]
        elif params.input.labels.pdb_label == 'foldername':
            p_labels = [
                os.path.basename(os.path.dirname(f)) for f in params.input.pdb
            ]
        elif params.input.labels.pdb_regex:
            p_labels = [
                re.findall(params.input.labels.pdb_regex, f)[0]
                for f in params.input.pdb
            ]
        else:
            p_labels = [
                'PDB-{:06d}'.format(i) for i in range(len(params.input.pdb))
            ]
        if params.input.labels.mtz_label == 'filename':
            m_labels = [
                os.path.basename(os.path.splitext(f)[0])
                for f in params.input.mtz
            ]
        elif params.input.labels.mtz_label == 'foldername':
            m_labels = [
                os.path.basename(os.path.dirname(f)) for f in params.input.mtz
            ]
        elif params.input.labels.mtz_regex:
            m_labels = [
                re.findall(params.input.labels.mtz_regex, f)[0]
                for f in params.input.mtz
            ]
        else:
            m_labels = [
                'MTZ-{:06d}'.format(i) for i in range(len(params.input.mtz))
            ]
    except:
        print 'Error reading file: {}'.format(f)
        raise

    # Check labels are unique
    set_m_labels = set(m_labels)
    set_p_labels = set(p_labels)
    if len(set_m_labels) != len(m_labels):
        raise Sorry('MTZ labels are not unique. Repeated labels: {}'.format(
            ' '.join([
                '{}'.format(l) for l in set_m_labels if m_labels.count(l) != 1
            ])))
    if len(set_p_labels) != len(p_labels):
        raise Sorry('PDB labels are not unique. Repeated labels: {}'.format(
            ' '.join([l for l in set_p_labels if p_labels.count(l) != 1])))

    # Report labels
    if p_labels:
        log.subheading('PDB Labels')
        log(', '.join(p_labels))
    if m_labels:
        log.subheading('MTZ Labels')
        log(', '.join(m_labels))

    # Load crystal summaries
    log.bar(True, True)
    log('Reading data for {} pdb(s) and {} mtz(s)'.format(
        len(params.input.pdb), len(params.input.mtz)))

    if params.input.pdb:
        pdb_summaries = [
            CrystalSummary.from_pdb(pdb_file=f, id=lab)
            for f, lab in zip(params.input.pdb, p_labels)
        ]
    else:
        pdb_summaries = []
    if params.input.mtz:
        mtz_summaries = [
            CrystalSummary.from_mtz(mtz_file=f, id=lab)
            for f, lab in zip(params.input.mtz, m_labels)
        ]
    else:
        mtz_summaries = []

    # Group by SpaceGroup
    log.subheading('Grouping {} crystals by space group...'.format(
        len(pdb_summaries + mtz_summaries)))
    crystal_groups = CrystalGroup.by_space_group(crystals=pdb_summaries +
                                                 mtz_summaries)
    log('Grouped crystals into {} space groups'.format(len(crystal_groups)))

    log.heading('Analysing variation of unit cells for each space group')

    for cg in crystal_groups:

        sg_name = 'sg-{}'.format(cg.space_groups[0].split(' (')[0].replace(
            ' ', '_'))

        log.subheading('Space Group {}: {} dataset(s)'.format(
            cg.space_groups[0], len(cg.crystals)))

        log('Unit Cell Variation:')
        log(numpy.round(cg.uc_stats.as_pandas_table().T, 2))

        log('')
        log('Making unit cell dendrogram for all crystals with this spacegroup'
            )
        if len(cg.crystals) > 1:
            cg.dendrogram(fname=os.path.join(img_dir,
                                             '{}-all.png'.format(sg_name)),
                          xlab='Crystal',
                          ylab='Linear Cell Variation',
                          annotate_y_min=params.clustering.label_nodes_above)

        log('')
        log('Clustering {} unit cells...'.format(len(cg.crystals)))
        sg_crystal_groups = cg.by_unit_cell(
            cg.crystals, cutoff=params.clustering.lcv_cutoff)
        log('Clustered crystals into {} groups'.format(len(sg_crystal_groups)))

        for i_cg2, cg2 in enumerate(sg_crystal_groups):

            cluster_name = '{}-cluster-{}'.format(sg_name, i_cg2 + 1)

            log.bar(True, False)
            log('Processing cluster: {}'.format(cluster_name))
            log.bar(False, True)

            log('Unit Cell Variation:')
            log(numpy.round(cg.uc_stats.as_pandas_table().T, 2))

            log('')
            log('Making unit cell dendrogram for this cluster of crystals')
            if len(cg2.crystals) > 1:
                cg2.dendrogram(
                    fname=os.path.join(img_dir, '{}.png'.format(cluster_name)),
                    xlab='Crystal',
                    ylab='Linear Cell Variation',
                    ylim=(0, params.clustering.lcv_cutoff),
                    annotate_y_min=params.clustering.label_nodes_above)

            log('Copying files to output directory')

            # Go through and link the datasets for each of the spacegroups into a separate folder
            sub_dir = os.path.join(params.output.out_dir, cluster_name)
            if not os.path.exists(sub_dir): os.mkdir(sub_dir)

            # Split the mtzs and pdbs into separate directories -- or not
            if params.output.split_pdbs_and_mtzs:
                mtz_dir = os.path.join(sub_dir, 'mtzs')
                if not os.path.exists(mtz_dir): os.mkdir(mtz_dir)
                pdb_dir = os.path.join(sub_dir, 'pdbs')
                if not os.path.exists(pdb_dir): os.mkdir(pdb_dir)
            else:
                mtz_dir = pdb_dir = sub_dir

            for c in cg2.crystals:
                # Set parameters based on pdb or mtz
                if c.mtz_file:
                    sub_sub_dir = os.path.join(mtz_dir, c.id)
                    def_file = os.path.abspath(c.mtz_file)
                    def_suff = '.mtz'
                    pos_suff = '.pdb'
                elif c.pdb_file:
                    sub_sub_dir = os.path.join(pdb_dir, c.id)
                    def_file = os.path.abspath(c.pdb_file)
                    def_suff = '.pdb'
                    pos_suff = '.mtz'
                # Create subdirectory
                if not os.path.exists(sub_sub_dir): os.mkdir(sub_sub_dir)
                # Output file base template
                out_base = os.path.join(sub_sub_dir, c.id)
                # Export file
                out_file = out_base + def_suff
                if not os.path.exists(out_file):
                    out_file_func(def_file, out_file)
                # output other as well if filenames are the same
                pos_file = def_file.replace(def_suff, pos_suff)
                out_file = out_base + pos_suff
                if os.path.exists(pos_file) and not os.path.exists(out_file):
                    out_file_func(pos_file, out_file)

    log.heading('finished')
Exemple #8
0
def run(params):
    # Identify any existing output directories
    current_dirs = sorted(glob.glob(params.output.dir_prefix + "*"))
    if not current_dirs:
        next_int = 1
    else:
        current_nums = [
            s.replace(params.output.dir_prefix, "") for s in current_dirs
        ]
        next_int = sorted(map(int, current_nums))[-1] + 1

    # Create output directory name from int
    out_dir = params.output.dir_prefix + "{:04}".format(next_int)
    # Create output directory
    os.mkdir(out_dir)

    # Create log object
    log = Log(
        log_file=os.path.join(out_dir,
                              params.output.out_prefix + ".quick-refine.log"),
        verbose=params.settings.verbose,
    )

    # Report
    if current_dirs:
        log("Found existing refinement directories: \n\t{}".format(
            "\n\t".join(current_dirs)))
        log("")
    log("Creating new output directory: {}".format(out_dir))

    # Validate input parameters
    log.subheading("Validating input parameters")
    assert params.input.pdb is not None, "No PDB given for refinement"
    assert params.input.mtz is not None, "No MTZ given for refinement"

    if os.path.islink(params.input.mtz):
        log("Converting mtz path to real path:")
        log("{} -> {}".format(params.input.mtz,
                              os.path.realpath(params.input.mtz)))
        params.input.mtz = os.path.realpath(params.input.mtz)

    # Link input
    log("Copying/linking files to refinement folder")
    shutil.copy(params.input.pdb,
                os.path.abspath(os.path.join(out_dir, "input.pdb")))
    rel_symlink(params.input.mtz,
                os.path.abspath(os.path.join(out_dir, "input.mtz")))
    # Copy parameter file to output folder
    if params.input.params:
        shutil.copy(params.input.params,
                    os.path.abspath(os.path.join(out_dir, "input.params")))

    # Create output prefixes

    output_prefix = out_dir

    log("Real output file path prefixes: {}".format(output_prefix))
    log("Link output file path prefixes: {}".format(params.output.link_prefix))

    # Create command objects
    log.subheading("Preparing command line input for refinement program")

    # PHENIX
    if params.options.program == "phenix":
        cm = CommandManager("phenix.refine")
        # Command line args
        cm.add_command_line_arguments([params.input.pdb, params.input.mtz])
        cm.add_command_line_arguments(
            ["output.prefix={}".format(output_prefix)])
        if params.input.cif:
            cm.add_command_line_arguments(params.input.cif)
        if params.input.params and os.path.exists(params.input.params):
            cm.add_command_line_arguments([params.input.params])

    # REFMAC
    elif params.options.program == "refmac":
        cm = CommandManager("refmac5")
        # Command line args
        cm.add_command_line_arguments(
            ["xyzin", params.input.pdb, "hklin", params.input.mtz])
        cm.add_command_line_arguments([
            "xyzout", output_prefix + ".pdb", "hklout", output_prefix + ".mtz"
        ])
        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(["libin", cif])
        # Standard input
        if params.input.params:
            cm.add_standard_input(open(params.input.params).read().split("\n"))

        cm.add_standard_input(["END"])

    # Pass additional command line arguments?
    if params.input.args:
        cm.add_command_line_arguments(params.input.args)

    # Report
    log(str(cm))

    log.bar()
    log("running refinement... ({})".format(cm.program[0]))
    out = cm.run()

    log.subheading("Refinement output")
    if not log.verbose:
        log("output written to log file ({} lines)".format(
            cm.output.count("\n")))

    log("\n" + cm.output, show=False)

    if out != 0:
        log.subheading("Refinement Errors")
        log(cm.error)

    log.subheading("Post-processing output files")

    # Find output files
    try:
        real_pdb = os.path.join(output_prefix,
                                params.output.out_prefix + ".pdb")
        real_mtz = os.path.join(output_prefix,
                                params.output.out_prefix + ".mtz")

        print(real_pdb, "\n", real_mtz)

    except:
        log("Refinement has failed - output files do not exist")
        log("{}: {}".format(output_prefix + "*.pdb",
                            glob.glob(output_prefix + "*.pdb")))
        log("{}: {}".format(output_prefix + "*.mtz",
                            glob.glob(output_prefix + "*.mtz")))
        raise

    # List of links to make at the end of the run
    link_file_pairs = [
        (real_pdb, params.output.link_prefix + ".pdb"),
        (real_mtz, params.output.link_prefix + ".mtz"),
    ]

    print(link_file_pairs)

    # Split conformations
    if params.options.split_conformations:
        params.split_conformations.settings.verbose = params.settings.verbose
        log.subheading("Splitting refined structure conformations")
        # Running split conformations
        out_files = split_conformations.split_conformations(
            filename=real_pdb, params=params.split_conformations, log=log)
        # Link output files to top
        for real_file in out_files:
            link_file = params.output.link_prefix + os.path.basename(
                real_file.replace(os.path.splitext(real_pdb)[0], ""))
            link_file_pairs.append([real_file, link_file])

    # Link output files
    log.subheading("linking output files")
    for real_file, link_file in link_file_pairs:
        log("Linking {} -> {}".format(link_file, real_file))
        if not os.path.exists(real_file):
            log("file does not exist: {}".format(real_file))
            continue
        if os.path.exists(link_file) and os.path.islink(link_file):
            log("removing existing link: {}".format(link_file))
            os.unlink(link_file)
        if not os.path.exists(link_file):
            rel_symlink(real_file, link_file)

    log.heading("finished - refinement")
def merge_complementary_hierarchies(hierarchy_1,
                                    hierarchy_2,
                                    prune_duplicates_rmsd=0.1,
                                    in_place=False,
                                    verbose=False,
                                    log=None):
    """Merge hierarchies that are alternate models of the same crystal by expanding alternate model conformations, merging, and then trimming alternate conformations where possible"""

    if log is None: log = Log(verbose=True)

    # Alter the original files?
    if not in_place:
        # Copy the hierarchies
        hierarchy_1 = hierarchy_1.deep_copy()
        hierarchy_2 = hierarchy_2.deep_copy()

    # Sort the atoms
    hierarchy_1.sort_atoms_in_place()
    hierarchy_2.sort_atoms_in_place()

    log.heading('Preparing to merge structures')

    log.subheading(
        'Explicitly expanding models to all conformations of the crystal')
    log('Expanding alternate conformations in structure 1')
    expand_alternate_conformations(hierarchy=hierarchy_1,
                                   in_place=True,
                                   verbose=verbose)
    log('Expanding alternate conformations in structure 2')
    expand_alternate_conformations(hierarchy=hierarchy_2,
                                   in_place=True,
                                   verbose=verbose)
    log.subheading(
        'Applying conformer shift to the second structure before merging')
    log('Identifying the altloc shift required from the number of alternate conformers in structure 1'
        )
    conf_offset = find_next_conformer_idx(
        hierarchy=hierarchy_1, all_ids=iotbx.pdb.systematic_chain_ids())
    log('Incrementing all altlocs in structure 2 by {}'.format(conf_offset))
    increment_altlocs(hierarchy=hierarchy_2,
                      offset=conf_offset,
                      in_place=True,
                      verbose=verbose)
    log.subheading('Renaming residues that do not align between structures')
    resolve_residue_id_clashes(fixed_hierarchy=hierarchy_1,
                               moving_hierarchy=hierarchy_2,
                               in_place=True,
                               verbose=verbose)

    log.heading('Merging structures')

    log('Transferring residues from Structure 2 to Structure 1')
    transfer_residue_groups_from_other(acceptor_hierarchy=hierarchy_1,
                                       donor_hierarchy=hierarchy_2,
                                       in_place=True,
                                       verbose=verbose)

    log.heading('Post-processing structure')

    log('Pruning unneccessary multi-conformer residues in the merged structure'
        )
    prune_redundant_alternate_conformations(
        hierarchy=hierarchy_1,
        required_altlocs=hierarchy_1.altloc_indices(),
        rmsd_cutoff=prune_duplicates_rmsd,
        in_place=True,
        verbose=verbose)

    return hierarchy_1
def run(params):

    # Create log file
    log = Log(log_file=params.output.log, verbose=True)

    # Report
    log.heading('Validating input parameters and input files')

    # Check one or other have been provided
    if (params.input.major or params.input.minor
        ) and not (params.input.pdb == [None] or params.input.pdb == []):
        raise Exception(
            'Have provided input.major & input.minor, as well as files to input.pdb. Specify either input.major & input.minor, or two input.pdb.'
        )
    # Assign files to major and minor if necessary
    if not (params.input.major and params.input.minor):
        if len(params.input.pdb) != 2:
            raise Exception('Must provide zero or two pdb files to input.pdb')
        params.input.major = params.input.pdb[0]
        params.input.minor = params.input.pdb[1]
    # Check files exist
    if not os.path.exists(params.input.major):
        raise Exception('input.major does not exist: {}'.format(
            params.input.major))
    if not os.path.exists(params.input.minor):
        raise Exception('input.minor does not exist: {}'.format(
            params.input.minor))
    # Just check again...
    assert params.input.major
    assert params.input.minor
    assert params.output.pdb
    # Check existence of output pdb and delete as necessary
    if os.path.exists(params.output.pdb):
        if params.settings.overwrite:
            os.remove(params.output.pdb)
        else:
            raise Exception(
                'Output file already exists: {}. Run with overwrite=True to remove this file'
                .format(params.output.pdb))

    # Check that the input occupancies are valid
    if (params.options.minor_occupancy >
            1.0) or (params.options.major_occupancy > 1.0):
        raise Exception(
            'minor_occupancy and major_occupancy cannot be greater than 1.0 (currently {} and {})'
            .format(params.options.minor_occupancy,
                    params.options.major_occupancy))

    # Report validated parameters
    log.subheading('Processed merging parameters')
    for obj in master_phil.format(params).objects:
        if obj.name == 'restraints': continue
        log(obj.as_str().strip())

    # Read in the ligand file and set each residue to the requested conformer
    log.subheading('Reading input files')
    maj_obj = strip_pdb_to_input(params.input.major, remove_ter=True)
    min_obj = strip_pdb_to_input(params.input.minor, remove_ter=True)

    # Check that ... something
    try:
        maj_obj.hierarchy.only_model()
        min_obj.hierarchy.only_model()
    except:
        raise Sorry('Input structures may only have one model')

    # Multiply the input hierarchies by occupancy multipliers
    log.subheading('Updating input occupancies prior to merging')
    log('Multiplying occupancies of input.major by {}'.format(
        params.options.major_occupancy))
    maj_obj.hierarchy.atoms().set_occ(maj_obj.hierarchy.atoms().extract_occ() *
                                      params.options.major_occupancy)
    log('Multiplying occupancies of input.minor by {}'.format(
        params.options.minor_occupancy))
    min_obj.hierarchy.atoms().set_occ(min_obj.hierarchy.atoms().extract_occ() *
                                      params.options.minor_occupancy)

    # Merge the hierarchies
    final_struct = merge_complementary_hierarchies(
        hierarchy_1=maj_obj.hierarchy,
        hierarchy_2=min_obj.hierarchy,
        prune_duplicates_rmsd=params.options.prune_duplicates_rmsd,
        in_place=True,
        verbose=params.settings.verbose)

    # Set output occupancies
    log.subheading('Post-processing occupancies')
    # Set all main-conf occupancies to 1.0
    log('Setting all main-conf occupancies to 1.0')
    set_conformer_occupancy(hierarchy=final_struct,
                            altlocs=[''],
                            occupancy=1.0,
                            in_place=True,
                            verbose=params.settings.verbose)
    # Reset occupancies if required
    if params.options.reset_all_occupancies:
        # Calculate number of altlocs and associated occupancy
        altlocs = [a for a in final_struct.altloc_indices() if a]
        if altlocs:
            new_occ = 1.0 / len(altlocs)
            # Set the occupancies
            log('Setting all conformer ({}) occupancies to {}'.format(
                ','.join(altlocs), new_occ))
            set_conformer_occupancy(hierarchy=final_struct,
                                    altlocs=altlocs,
                                    occupancy=new_occ,
                                    in_place=True,
                                    verbose=params.settings.verbose)

    # Update the atoms numbering
    final_struct.sort_atoms_in_place()
    final_struct.atoms_reset_serial()
    # Write output file
    log('Writing output structure to {}'.format(params.output.pdb))
    final_struct.write_pdb_file(file_name=params.output.pdb,
                                crystal_symmetry=maj_obj.crystal_symmetry())

    # Run the restraint generation for the merged structure if requested
    if params.output.make_restraints:

        # Transfer the other phil objects from the master phil
        r_params = make_restraints.master_phil.extract()
        for name, obj in r_params.__dict__.items():
            if name.startswith('_'): continue
            if name not in params.restraints.__dict__:
                params.restraints.__inject__(name, obj)

        # Apply the output of merging to input of restraints
        params.restraints.input.pdb = params.output.pdb
        # Rename output files to be in same folder as output structure
        if params.restraints.output.phenix:
            params.restraints.output.phenix = os.path.join(
                os.path.dirname(params.output.pdb),
                os.path.basename(params.restraints.output.phenix))
        if params.restraints.output.refmac:
            params.restraints.output.refmac = os.path.join(
                os.path.dirname(params.output.pdb),
                os.path.basename(params.restraints.output.refmac))
        # Set log file name to this program if one given
        if params.output.log:
            params.restraints.output.log = params.output.log
        elif params.restraints.output.log:
            params.restraints.output.log = os.path.join(
                os.path.dirname(params.output.pdb),
                os.path.basename(params.restraints.output.log))
        # Which alternate conformations to generate restraints for
        params.restraints.local_restraints.altlocs = ','.join(
            [a for a in min_obj.hierarchy.altloc_indices() if a])
        # Update settigns
        params.restraints.settings.verbose = params.settings.verbose
        params.restraints.settings.overwrite = params.settings.overwrite

        # Report
        log.heading('Parameters for generating restraints')
        log(master_phil.format(params).get('restraints').as_str().strip())
        log.heading('Generating restraints')
        # Run make_restraints
        make_restraints.run(params.restraints)

    log.heading('FINISHED')
    log.heading('Final Parameters')
    log(master_phil.format(params).as_str().strip())

    return
Exemple #11
0
def run(params):

    log = Log(log_file=params.output.log_file, verbose=True)

    # Process MTZs
    if params.input.mtz:

        log.heading('Processing {} MTZ Files'.format(len(params.input.mtz)))

        if   params.input.file_label=='filename':   labels = [os.path.basename(os.path.splitext(f)[0]) for f in params.input.mtz]
        elif params.input.file_label=='foldername': labels = [os.path.basename(os.path.dirname(f)) for f in params.input.mtz]
        else: raise Exception('MTZ labelling function not supported: {}'.format(params.input.file_label))

        log.bar()
        log('Grouping {} mtz files by space group'.format(len(params.input.mtz)))
        crystal_groups = CrystalGroup.by_space_group(crystals=[CrystalSummary.from_mtz(mtz_file=f, id=lab) for f,lab in zip(params.input.mtz, labels)])
        log('> Clustered into {} space group(s)'.format(len(crystal_groups)))
        log.bar()

        for cg in crystal_groups:

            log.subheading('Space group {} - {} datasets'.format(','.join(cg.space_groups), len(cg.crystals)))

            error = False
            for c in cg.crystals:
                for label in params.check_for.column_label:
                    if label is None: continue
                    if label not in c.column_labels:
                        log('Checking: column "{}" not in diffraction data of {}. columns present are {}'.format(label, c.mtz_file, c.column_labels))
                for label in params.summary.column_label:
                    if label is None: continue
                    if label not in c.column_labels:
                        log('Required: column "{}" not in diffraction data of {}. columns present are {}'.format(label, c.mtz_file, c.column_labels))
                        error = True
            if error is True: raise Sorry('There are datasets that do not contain the right columns.')

            log(crystal_statistics('Wavelength',         cg.crystals, value_func=lambda c: c.mtz_object().crystals()[1].datasets()[0].wavelength(), header=True))
            log(crystal_statistics('Resolution (high)',  cg.crystals, value_func=lambda c: c.high_res,                                              header=False))
            log(crystal_statistics('Resolution (low)',   cg.crystals, value_func=lambda c: c.low_res,                                               header=False))
            log(crystal_statistics('Unit cell - vol',    cg.crystals, value_func=lambda c: c.unit_cell.volume(),                                    header=False))
            log(crystal_statistics('Unit cell - a',      cg.crystals, value_func=lambda c: c.unit_cell.parameters()[0],                             header=False))
            log(crystal_statistics('Unit cell - b',      cg.crystals, value_func=lambda c: c.unit_cell.parameters()[1],                             header=False))
            log(crystal_statistics('Unit cell - c',      cg.crystals, value_func=lambda c: c.unit_cell.parameters()[2],                             header=False))
            log(crystal_statistics('Unit cell - alpha',  cg.crystals, value_func=lambda c: c.unit_cell.parameters()[3],                             header=False))
            log(crystal_statistics('Unit cell - beta',   cg.crystals, value_func=lambda c: c.unit_cell.parameters()[4],                             header=False))
            log(crystal_statistics('Unit cell - gamma',  cg.crystals, value_func=lambda c: c.unit_cell.parameters()[5],                             header=False, footer=True))

            for label in params.summary.column_label:
                if label is None: continue
                log(crystal_statistics('Column: {}'.format(label), cg.crystals, value_func=lambda c: c.mtz_object().get_column(label).n_valid_values(),     header=False, footer=True))

            log.bar(True, False)
            log('Smallest + Largest Values')
            log.bar()

            log(crystal_min_max('Resolution', cg.crystals, value_func=lambda c: c.high_res))

    # Process PDBs
    if params.input.pdb:

        log.heading('Processing {} PDB Files'.format(len(params.input.pdb)))

        if   params.input.file_label=='filename':   labels = [os.path.basename(os.path.splitext(f)[0]) for f in params.input.pdb]
        elif params.input.file_label=='foldername': labels = [os.path.basename(os.path.dirname(f)) for f in params.input.pdb]
        else: raise Exception('PDB labelling function not supported: {}'.format(params.input.file_label))

        log.bar()
        log('Grouping {} pdb files by space group'.format(len(params.input.pdb)))
        crystal_groups = CrystalGroup.by_space_group(crystals=[CrystalSummary.from_pdb(pdb_file=f, id=lab) for f,lab in zip(params.input.pdb, labels)])
        log('> Clustered into {} space group(s)'.format(len(crystal_groups)))

        for cg in crystal_groups:

            log.subheading('Space group: {} - {} datasets'.format(','.join(cg.space_groups), len(cg.crystals)))

            log(crystal_statistics('R-work', cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_work, header=True))
            log(crystal_statistics('R-free', cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_free, header=False, footer=True))

            log.bar(True, False)
            log('Smallest + Largest Values')
            log.bar()

            log(crystal_min_max('R-free',     cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_free))

    log.heading('finished')
Exemple #12
0
def split_conformations(filename, params, log=None):

    if log is None: log = Log(verbose=True)

    # Read the pdb header - for writing later...
    header_contents = get_pdb_header(filename)

    # Read in and validate the input file
    ens_obj = strip_pdb_to_input(filename, remove_ter=True)
    ens_obj.hierarchy.only_model()

    # Create a new copy of the structures
    new_ens = ens_obj.hierarchy.deep_copy()

    # Extract conformers from the structure as set
    all_confs = set(ens_obj.hierarchy.altloc_indices())
    all_confs.discard('')

    if params.options.mode == 'by_residue_name':
        sel_resnames = params.options.by_residue_name.resname.split(',')
        sel_confs = [
            ag.altloc for ag in new_ens.atom_groups()
            if (ag.resname in sel_resnames)
        ]
        # List of conformers to output for each structure, and suffixes
        out_confs = map(sorted, [
            all_confs.intersection(sel_confs),
            all_confs.difference(sel_confs)
        ])
        out_suffs = [
            params.options.by_residue_name.selected_name,
            params.options.by_residue_name.unselected_name
        ]
    elif params.options.mode == 'by_conformer':
        sel_resnames = None
        sel_confs = None
        # One structure for each conformer
        out_confs = [[c] for c in sorted(all_confs)]
        out_suffs = [''.join(c) for c in out_confs]
    elif params.options.mode == 'by_conformer_group':
        sel_resnames = None
        sel_confs = None
        # One structure for each set of supplied conformer sets
        out_confs = [
            s.split(',') for s in params.options.by_conformer_group.conformers
        ]
        out_suffs = [''.join(c) for c in out_confs]
    else:
        raise Exception('Invalid selection for options.mode: {}'.format(
            params.options.mode))

    assert len(out_confs) == len(out_suffs), '{} not same length as {}'.format(
        str(out_confs), str(out_suffs))

    for confs, suffix in zip(out_confs, out_suffs):
        log('Conformers {} -> {}'.format(str(confs), suffix))

    # Create paths from the suffixes
    out_paths = [
        '.'.join([
            os.path.splitext(filename)[0], params.output.suffix_prefix, suff,
            'pdb'
        ]) for suff in out_suffs
    ]

    log.subheading('Processing {}'.format(filename[-70:]))

    for this_confs, this_path in zip(out_confs, out_paths):

        if not this_confs: continue

        # Select atoms to keep - no altloc, or altloc in selection
        sel_string = ' or '.join(
            ['altid " "'] + ['altid "{}"'.format(alt) for alt in this_confs])
        # Extract selection from the hierarchy
        sel_hiery = new_ens.select(
            new_ens.atom_selection_cache().selection(sel_string),
            copy_atoms=True)

        log.bar(True, False)
        log('Outputting conformer(s) {} to {}'.format(''.join(this_confs),
                                                      this_path))
        log.bar()
        log('Keeping ANY atom with conformer id: {}'.format(
            ' or '.join(['" "'] + this_confs)))
        log('Selection: \n\t' + sel_string)

        if params.options.pruning.prune_duplicates:
            log.bar()
            log('Pruning redundant conformers')
            # Remove an alternate conformers than are duplicated after selection
            prune_redundant_alternate_conformations(
                hierarchy=sel_hiery,
                required_altlocs=[a for a in sel_hiery.altloc_indices() if a],
                rmsd_cutoff=params.options.pruning.rmsd_cutoff,
                in_place=True,
                verbose=params.settings.verbose)

        if params.options.reset_altlocs:
            log.bar()
            # Change the altlocs so that they start from "A"
            if len(this_confs) == 1:
                conf_hash = {this_confs[0]: ' '}
            else:
                conf_hash = dict(
                    zip(this_confs, iotbx.pdb.systematic_chain_ids()))
            log('Resetting structure altlocs:')
            for k in sorted(conf_hash.keys()):
                log('\t{} -> "{}"'.format(k, conf_hash[k]))
            if params.settings.verbose: log.bar()
            for ag in sel_hiery.atom_groups():
                if ag.altloc in this_confs:
                    if params.settings.verbose:
                        log('{} -> alt {}'.format(Labeller.format(ag),
                                                  conf_hash[ag.altloc]))
                    ag.altloc = conf_hash[ag.altloc]

        if params.options.reset_occupancies:
            log.bar()
            log('Resetting output occupancies (maximum occupancy of 1.0, etc.)'
                )
            # Divide through by the smallest occupancy of any complete residues groups with occupancies of less than one
            rg_occs = [
                calculate_residue_group_occupancy(rg) for rg in
                residue_groups_with_complete_set_of_conformers(sel_hiery)
            ]
            non_uni = [v for v in numpy.unique(rg_occs) if 0.0 < v < 1.0]
            if non_uni:
                div_occ = min(non_uni)
                log('Dividing all occupancies by {}'.format(div_occ))
                sel_hiery.atoms().set_occ(sel_hiery.atoms().extract_occ() /
                                          div_occ)
            # Normalise the occupancies of any residue groups with more than unitary occupancy
            log('Fixing any residues that have greater than unitary occupancy')
            sanitise_occupancies(hierarchy=sel_hiery,
                                 min_occ=0.0,
                                 max_occ=1.0,
                                 in_place=True,
                                 verbose=params.settings.verbose)
            # Perform checks
            max_occ = max([
                calculate_residue_group_occupancy(rg)
                for rg in sel_hiery.residue_groups()
            ])
            log('Maximum occupancy of output structue: {}'.format(max_occ))
            assert max_occ >= 0.0, 'maximum occupancy is less than 0.0?!?!'
            assert max_occ <= 1.0, 'maximum occupancy is greater than 1.0?!?!'

        log.bar()
        log('Writing structure: {}'.format(this_path))
        log.bar(False, True)

        # Write header contents
        with open(this_path, 'w') as fh:
            fh.write(header_contents)
        # Write output file
        sel_hiery.write_pdb_file(this_path, open_append=True)

    return out_paths
Exemple #13
0
def make_occupancy_constraints(params, input_hierarchy, log=None):
    """Create occupancy groups for a hierarchy"""

    if log is None: log = Log(verbose=True)

    log.subheading('Generating occupancy-constrained groups')

    # Ligand resname identifiers
    resnames = params.occupancy.resname.split(',')
    if params.settings.verbose:
        log('Looking for ligands with resname {!s}'.format(
            ' or '.join(resnames)))
        log('')

    # Make occupancy groups
    occupancy_groups = overlapping_occupancy_groups(
        hierarchy=input_hierarchy.hierarchy,
        resnames=resnames,
        group_dist=params.occupancy.group_dist,
        overlap_dist=params.occupancy.overlap_dist,
        complete_groups=params.occupancy.complete_groups,
        exclude_altlocs=params.occupancy.exclude_altlocs.split(',')
        if params.occupancy.exclude_altlocs else [],
        verbose=params.settings.verbose)
    # Record whether the occupancy groups are complete (occs sum to 1)
    if params.occupancy.complete_groups:
        occupancy_complete = [True] * len(occupancy_groups)
    else:
        occupancy_complete = [False] * len(occupancy_groups)

    if not occupancy_groups:
        log('No matching residues were found (no occupancy constraints created)'
            )
        return

    log.bar()
    log('')
    log('Created {} occupancy groups for overlapping conformers'.format(
        len(occupancy_groups)))
    log('')

    # Ref-make the default occupancy groups?
    if params.occupancy.simple_groups:
        log('simple_groups=={}: Remaking default occupancy restraints for residues'
            .format(params.occupancy.simple_groups))
        if params.settings.verbose: log('')
        simple_groups = simple_occupancy_groups(
            hierarchy=input_hierarchy.hierarchy,
            verbose=params.settings.verbose)
        num_alts = len(
            [a for a in input_hierarchy.hierarchy.altloc_indices() if a != ''])
        occupancy_complete += [
            True if len(g) == num_alts else False for g in simple_groups
        ]
        occupancy_groups += simple_groups
        if params.settings.verbose: log('')
        log('Increased number of occupancy groups to {}'.format(
            len(occupancy_groups)))
        log('')

    if params.output.refmac:
        restraint_list = RefmacFormatter.make_occupancy_restraints(
            list_of_lists_of_groups=occupancy_groups,
            group_completeness=occupancy_complete)
        rest_block = RefmacFormatter.format_occupancy_restraints(
            restraint_list=restraint_list)
        with open(params.output.refmac, 'a') as fh:
            fh.write(rest_block + '\n')
        if params.settings.verbose:
            log.subheading('refmac occupancy restraints')
            log(rest_block[:1000] + '...' * (len(rest_block) > 1000))
            log('')

    if params.output.phenix:
        restraint_list = PhenixFormatter.make_occupancy_restraints(
            list_of_lists_of_groups=occupancy_groups,
            group_completeness=occupancy_complete)
        rest_block = PhenixFormatter.format_occupancy_restraints(
            restraint_list=restraint_list)
        with open(params.output.phenix, 'a') as fh:
            fh.write(rest_block + '\n')
        if params.settings.verbose:
            log.subheading('phenix occupancy restraints')
            log(rest_block[:1000] + '...' * (len(rest_block) > 1000))
            log('')
Exemple #14
0
def make_duplication_restraints(params, input_hierarchy, log=None):
    """Create coordinate and b-factor restraints for duplicated conformers"""

    if log is None: log = Log(verbose=True)

    log.subheading('Generating restraints for duplicated conformers')

    dup_groups = []

    for chn in input_hierarchy.hierarchy.chains():

        if (params.duplicates.make_for == 'protein') and not chn.is_protein():
            continue
        elif (params.duplicates.make_for == 'het') and chn.is_protein():
            continue

        for rg in chn.residue_groups():
            dup_groups += find_duplicated_conformers_and_generate_atom_pairs(
                residue_group=rg, rmsd_cutoff=params.duplicates.rmsd_cutoff)

    if not dup_groups:
        log('No duplicated conformers (no restraints created)')
        return

    # Concatenate atoms into one list
    atom_pairs = []
    [atom_pairs.extend(l) for l in dup_groups]

    log('Found {} duplicated conformers consisting of {} atoms'.format(
        len(dup_groups), len(atom_pairs)))
    log('')

    if params.output.refmac:
        restraint_list = [
            RefmacFormatter.make_distance_restraint(
                atm_1=a1,
                atm_2=a2,
                value=0.0,
                sigma=params.duplicates.sigma_xyz) for a1, a2 in atom_pairs
        ]
        rest_block = RefmacFormatter.format_distance_restraints(
            restraint_list=restraint_list)
        with open(params.output.refmac, 'a') as fh:
            fh.write(rest_block + '\n')
        if params.settings.verbose:
            log.subheading('refmac duplicate conformer restraints')
            log(rest_block[:1000] + '...' * (len(rest_block) > 1000))
            log('')

    if params.output.phenix:
        restraint_list = [
            PhenixFormatter.make_distance_restraint(
                atm_1=a1,
                atm_2=a2,
                value=0.0,
                sigma=params.duplicates.sigma_xyz) for a1, a2 in atom_pairs
        ]
        rest_block = PhenixFormatter.format_distance_restraints(
            restraint_list=restraint_list)
        with open(params.output.phenix, 'a') as fh:
            fh.write(rest_block + '\n')
        if params.settings.verbose:
            log.subheading('phenix duplicate conformer restraints')
            log(rest_block[:1000] + '...' * (len(rest_block) > 1000))
            log('')
Exemple #15
0
def make_link_records(params, input_hierarchy, link_file, log=None):
    """Create link records to make a continuous peptide chain"""

    if log is None: log = Log(verbose=True)

    log.subheading('Checking the continuity of the protein backbone')

    links, warnings = generate_set_of_alternate_conformer_peptide_links(
        hierarchy=input_hierarchy.hierarchy)

    if warnings:
        log.bar()
        log('WARNINGS:')
        log.bar()
        for w in warnings:
            log(w)
        log.bar()
        log('')

    if (not links) and (not warnings):
        log('No breaks in the backbone - hooray! (nothing needs to be done here)'
            )
        return
    elif (not links):
        log("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
            )
        log("!!! >>> There are breaks in the backbone but I'm not able to do anything to fix them    <<< !!!"
            )
        log("!!! >>> You'll need to check them manually to see if these are going to be a problem... <<< !!!"
            )
        log("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
            )
        return

    link_block = '\n'.join([
        format_link_record(atom_1=a1,
                           atom_2=a2,
                           chain_id_1=c1,
                           chain_id_2=c2,
                           link_type=lt) for a1, a2, c1, c2, lt in links
    ])

    log('Need to apply {} links to make the backbone continuous:'.format(
        len(links)))
    log('')
    log(link_block)
    log('')

    log('Writing hierarchy with new link records to {}'.format(link_file))
    log('(This file can only be used for refinement with REFMAC)')
    log('')
    log('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
        )
    log('!!! ALTHOUGH THE FILE WITH BACKBONE LINKS HAS BEEN OUTPUT, IT SHOULD BE USED WITH CAUTION !!!'
        )
    log('!!!   THE CONNECTION OF ALTERNATE CONFORMATIONS OF THE BACKBONE IS GENERALLY "INCORRECT"  !!!'
        )
    log('!!!          THERE SHOULD BE A VERY GOOD REASON FOR THESE RESTRAINTS TO BE USED           !!!'
        )
    log('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
        )

    input_hierarchy.hierarchy.write_pdb_file(
        file_name=link_file,
        crystal_symmetry=input_hierarchy.crystal_symmetry(),
        link_records=link_block)