Python Log.bar Examples

Programming Language: Python

Namespace/Package Name: bamboo.common.logs

Class/Type: Log

Method/Function: bar

Examples at hotexamples.com: 9

Python Log.bar - 9 examples found. These are the top rated real world Python examples of bamboo.common.logs.Log.bar extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Log(24)

subheading(15)

heading(10)

bar(9)

Example #1

Show file

File: download_test_data.py Project: reskyner/pandda

 def show_summary(self, log=None):
     if log is None: log = Log()
     log.subheading('Available datasets')
     for d in self.datasets:
         log.bar()
         d.show_summary(log=log)
     log.bar()

Example #2

Show file

File: functions.py Project: nelse003/pandda

    def run(self):
        """Process the dataset"""

        dataset, dataset_map, grid, map_analyser, args, verbose = self.data

        # TODO Hardcoded check - to be removed? TODO
        assert dataset_map.is_sparse()

        # ============================================================================>
        # Prepare output objects
        # ============================================================================>
        log_strs = []
        log_file = dataset.file_manager.get_file('dataset_log')
        log = Log(log_file=log_file, verbose=False, silent=True)

        # ============================================================================>
        # Build new blob search object
        # ============================================================================>
        blob_finder = PanddaZMapAnalyser(params=args.params.z_map_analysis,
                                         grid=grid,
                                         log=log)
        print('Writing log for dataset {!s} to ...{}'.format(
            dataset.tag, log_file[log_file.index('processed'):]))

        # ============================================================================>
        # Extract the global mask object from the grid
        # ============================================================================>
        dset_total_temp = grid.global_mask().total_mask_binary().copy()

        # ============================================================================>
        # Generate symmetry masks for this dataset
        # ============================================================================>
        log.bar()
        log('Masking symetry contacts from Z-map.')
        # Generate symmetry contacts for this dataset and align to reference frame
        dataset_sym_copies = dataset.model.crystal_contacts(
            distance_cutoff=args.params.masks.outer_mask + 5,
            combine_copies=True)
        dataset_sym_copies.atoms().set_xyz(
            dataset.model.alignment.nat2ref(
                dataset_sym_copies.atoms().extract_xyz()))
        # Only need to write if writing reference frame maps
        if args.output.developer.write_reference_frame_maps:
            dataset_sym_copies.write_pdb_file(
                dataset.file_manager.get_file('symmetry_copies'))
        # Extract protein atoms from the symmetry copies
        dataset_sym_sites_cart = non_water(
            dataset_sym_copies).atoms().extract_xyz()
        # Generate symmetry contacts grid mask
        dataset_mask = GridMask(parent=grid,
                                sites_cart=dataset_sym_sites_cart,
                                max_dist=args.params.masks.outer_mask,
                                min_dist=args.params.masks.inner_mask_symmetry)
        # Combine with the total mask to generate custom mask for this dataset
        dset_total_temp.put(dataset_mask.inner_mask_indices(), 0)
        dset_total_idxs = numpy.where(dset_total_temp)[0]
        log('After masking with symmetry contacts: {} points for Z-map analysis'
            .format(len(dset_total_idxs)))
        # Write map of grid + symmetry mask
        if args.output.developer.write_reference_frame_grid_masks:
            grid.write_indices_as_map(
                indices=dset_total_idxs,
                f_name=dataset.file_manager.get_file('grid_mask'),
                origin_shift=True)

        # ============================================================================>
        # Generate custom masks for this dataset
        # ============================================================================>
        if args.params.z_map_analysis.masks.selection_string is not None:
            log.bar()
            log('Applying custom mask to the Z-map: "{}"'.format(
                args.params.z_map_analysis.masks.selection_string))
            cache = dataset.model.hierarchy.atom_selection_cache()
            custom_mask_selection = cache.selection(
                args.params.z_map_analysis.masks.selection_string)
            custom_mask_sites = dataset.model.hierarchy.select(
                custom_mask_selection).atoms().extract_xyz()
            log('Masking with {} atoms'.format(len(custom_mask_sites)))
            # Generate custom grid mask
            dataset_mask = GridMask(
                parent=grid,
                sites_cart=custom_mask_sites,
                max_dist=args.params.z_map_analysis.masks.outer_mask,
                min_dist=args.params.z_map_analysis.masks.inner_mask)
            # Combine with the total mask to generate custom mask for this dataset
            dset_total_temp *= dataset_mask.total_mask_binary()
            dset_total_idxs = numpy.where(dset_total_temp)[0]
            log('After masking with custom mask: {} points for Z-map analysis'.
                format(len(dset_total_idxs)))
            # Write out mask
            grid.write_indices_as_map(
                indices=dset_total_idxs,
                f_name=dataset.file_manager.get_file('z_map_mask'),
                origin_shift=True)

        # ============================================================================>
        #####
        # CALCULATE Z-MAPS AND LOOK FOR LARGE BLOBS
        #####
        # ============================================================================>
        # Check maps and that all maps are sparse
        # ============================================================================>
        assert dataset_map.data is not None, 'Something has gone wrong - this dataset has no loaded map'
        assert dataset_map.is_sparse(
        ) is map_analyser.statistical_maps.mean_map.is_sparse()
        assert dataset_map.is_sparse(
        ) is map_analyser.statistical_maps.medn_map.is_sparse()
        assert dataset_map.is_sparse(
        ) is map_analyser.statistical_maps.stds_map.is_sparse()
        assert dataset_map.is_sparse(
        ) is map_analyser.statistical_maps.sadj_map.is_sparse()
        # ============================================================================>
        # CALCULATE MEAN-DIFF MAPS
        # ============================================================================>
        mean_diff_map = map_analyser.calculate_z_map(map=dataset_map,
                                                     method='none')
        #        # ============================================================================>
        #        # NAIVE Z-MAP - NOT USING UNCERTAINTY ESTIMATION OR ADJUSTED STDS
        #        # ============================================================================>
        #        z_map_naive = map_analyser.calculate_z_map(map=dataset_map, method='naive')
        #        z_map_naive_normalised = z_map_naive.normalised_copy()
        # ============================================================================>
        # UNCERTAINTY Z-MAP - NOT USING ADJUSTED STDS
        # ============================================================================>
        z_map_uncty = map_analyser.calculate_z_map(
            map=dataset_map,
            uncertainty=dataset_map.meta.map_uncertainty,
            method='uncertainty')
        z_map_uncty_normalised = z_map_uncty.normalised_copy()
        # ============================================================================>
        # ADJUSTED+UNCERTAINTY Z-MAP
        # ============================================================================>
        z_map_compl = map_analyser.calculate_z_map(
            map=dataset_map,
            uncertainty=dataset_map.meta.map_uncertainty,
            method='adjusted+uncertainty')
        z_map_compl_normalised = z_map_compl.normalised_copy()

        # ============================================================================>
        # SELECT WHICH MAP TO DO THE BLOB SEARCHING ON
        # ============================================================================>
        #        if args.params.statistical_maps.z_map_type == 'naive':
        #            z_map = z_map_naive_normalised
        #            z_map_stats = basic_statistics(flex.double(z_map_naive.data))
        if args.params.statistical_maps.z_map_type == 'uncertainty':
            z_map = z_map_uncty_normalised
            z_map_stats = basic_statistics(flex.double(z_map_uncty.data))
        elif args.params.statistical_maps.z_map_type == 'adjusted+uncertainty':
            z_map = z_map_compl_normalised
            z_map_stats = basic_statistics(flex.double(z_map_compl.data))
        else:
            raise Exception('Invalid Z-map type')

        # ============================================================================>
        # RECORD Z-MAP FOR STATISTICS
        # ============================================================================>
        # Calculate statistics of z-maps
        dataset_map.meta.z_mean = z_map_stats.mean
        dataset_map.meta.z_stdv = z_map_stats.bias_corrected_standard_deviation
        dataset_map.meta.z_skew = z_map_stats.skew
        dataset_map.meta.z_kurt = z_map_stats.kurtosis
        # ============================================================================>
        z_map.meta.type = 'z-map'
        # ============================================================================>

        # ============================================================================>
        #####
        # WRITE ALL MAP DISTRIBUTIONS (THESE DON'T USE MUCH SPACE)
        #####
        # ============================================================================>
        # Sampled Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file('s_map_png'),
            plot_vals=dataset_map.get_map_data(sparse=True))
        # Mean-Difference
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file('d_mean_map_png'),
            plot_vals=mean_diff_map.get_map_data(sparse=True))
        #        # Naive Z-Map
        #        analyse_graphs.map_value_distribution(f_name      = dataset.file_manager.get_file('z_map_naive_png'),
        #                                              plot_vals   = z_map_naive.get_map_data(sparse=True),
        #                                              plot_normal = True)
        #        # Normalised Naive Z-Map
        #        analyse_graphs.map_value_distribution(f_name      = dataset.file_manager.get_file('z_map_naive_normalised_png'),
        #                                              plot_vals   = z_map_naive_normalised.get_map_data(sparse=True),
        #                                              plot_normal = True)
        # Uncertainty Z-Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file('z_map_uncertainty_png'),
            plot_vals=z_map_uncty.get_map_data(sparse=True),
            plot_normal=True)
        # Normalised Uncertainty Z-Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file(
                'z_map_uncertainty_normalised_png'),
            plot_vals=z_map_uncty_normalised.get_map_data(sparse=True),
            plot_normal=True)
        # Corrected Z-Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file('z_map_corrected_png'),
            plot_vals=z_map_compl.get_map_data(sparse=True),
            plot_normal=True)
        # Normalised Corrected Z-Map
        analyse_graphs.map_value_distribution(
            f_name=dataset.file_manager.get_file(
                'z_map_corrected_normalised_png'),
            plot_vals=z_map_compl_normalised.get_map_data(sparse=True),
            plot_normal=True)
        # Plot Q-Q Plot of Corrected Z-Map to see how normal it is
        analyse_graphs.qq_plot_against_normal(
            f_name=dataset.file_manager.get_file('z_map_qq_plot_png'),
            plot_vals=z_map_compl_normalised.get_map_data(sparse=True))

        # ============================================================================>
        #####
        # LOOK FOR CLUSTERS OF LARGE Z-SCORES
        #####
        # ============================================================================>
        # Contour the grid at a particular Z-Value
        # ============================================================================>
        num_clusters, z_clusters = blob_finder.cluster_high_z_values(
            z_map_data=z_map.get_map_data(sparse=False),
            point_mask_idx=dset_total_idxs)
        # ============================================================================>
        # Too many points to cluster -- probably a bad dataset
        # ============================================================================>
        if num_clusters == -1:
            # This dataset is too noisy to analyse - flag!
            log_strs.append(
                'Z-Map too noisy to analyse -- not sure what has gone wrong here...'
            )
            return dataset, dataset_map.meta, log_strs

        # ============================================================================>
        #####
        # FILTER/SELECT CLUSTERS OF Z-SCORES
        #####
        # ============================================================================>
        # Filter the clusters by size and peak height
        # ============================================================================>
        if num_clusters > 0:
            num_clusters, z_clusters = blob_finder.filter_z_clusters_1(
                z_clusters=z_clusters)
            blob_finder.validate_clusters(z_clusters)
            if num_clusters == 0:
                log_strs.append('===> Minimum cluster peak/size not reached.')
        # ============================================================================>
        # Filter the clusters by distance from protein
        # ============================================================================>
        if num_clusters > 0:
            num_clusters, z_clusters = blob_finder.filter_z_clusters_2(
                z_clusters=z_clusters, dataset=dataset)
            blob_finder.validate_clusters(z_clusters)
            if num_clusters == 0:
                log_strs.append('===> Clusters too far from protein.')
        # ============================================================================>
        # Group Nearby Clusters Together
        # ============================================================================>
        if num_clusters > 0:
            num_clusters, z_clusters = blob_finder.group_clusters(
                z_clusters=z_clusters)
            blob_finder.validate_clusters(z_clusters)
        # ============================================================================>
        # Filter the clusters by symmetry equivalence
        # ============================================================================>
        if num_clusters > 0:
            num_clusters, z_clusters = blob_finder.filter_z_clusters_3(
                z_clusters=z_clusters, dataset=dataset)
            blob_finder.validate_clusters(z_clusters)

        # ============================================================================>
        #####
        # WRITE MAPS
        #####
        # ============================================================================>
        # write dataset maps in the reference frame
        # ============================================================================>
        if args.output.developer.write_reference_frame_maps:
            dataset_map.to_file(
                filename=dataset.file_manager.get_file('sampled_map'),
                space_group=grid.space_group())
            mean_diff_map.to_file(
                filename=dataset.file_manager.get_file('mean_diff_map'),
                space_group=grid.space_group())
            z_map.to_file(filename=dataset.file_manager.get_file('z_map'),
                          space_group=grid.space_group())
        # ============================================================================>
        # Write out mask of the high z-values
        # ============================================================================>
        if args.output.developer.write_reference_frame_grid_masks:
            # Write map of where the blobs are (high-Z mask)
            highz_points = []
            [highz_points.extend(list(x[0])) for x in z_clusters]
            highz_points = [map(int, v) for v in highz_points]
            highz_indices = map(grid.indexer(), list(highz_points))
            grid.write_indices_as_map(
                indices=highz_indices,
                f_name=dataset.file_manager.get_file('high_z_mask'),
                origin_shift=True)
        # ============================================================================>
        # Write different Z-Maps? (Probably only needed for testing)
        # ============================================================================>
        if args.output.developer.write_reference_frame_all_z_map_types:
            #            z_map_naive.to_file(filename=dataset.file_manager.get_file('z_map_naive'), space_group=grid.space_group())
            #            z_map_naive_normalised.to_file(filename=dataset.file_manager.get_file('z_map_naive_normalised'), space_group=grid.space_group())
            z_map_uncty.to_file(
                filename=dataset.file_manager.get_file('z_map_uncertainty'),
                space_group=grid.space_group())
            z_map_uncty_normalised.to_file(
                filename=dataset.file_manager.get_file(
                    'z_map_uncertainty_normalised'),
                space_group=grid.space_group())
            z_map_compl.to_file(
                filename=dataset.file_manager.get_file('z_map_corrected'),
                space_group=grid.space_group())
            z_map_compl_normalised.to_file(
                filename=dataset.file_manager.get_file(
                    'z_map_corrected_normalised'),
                space_group=grid.space_group())

        # ============================================================================>
        # Skip to next dataset if no clusters found
        # ============================================================================>
        if num_clusters > 0:
            log_strs.append('===> {!s} Cluster(s) found.'.format(num_clusters))
        else:
            log_strs.append('===> No Clusters found.')
            return (dataset, dataset_map.meta, log_strs)
        assert num_clusters > 0, 'NUMBER OF CLUSTERS AFTER FILTERING == 0!'

        # ============================================================================>
        # Extract the map data in non-sparse format
        # ============================================================================>
        dset_map_data = dataset_map.get_map_data(sparse=False)
        avrg_map_data = map_analyser.average_map().get_map_data(sparse=False)
        # ============================================================================>
        # Process the identified features
        # ============================================================================>
        for event_idx, (event_points, event_values) in enumerate(z_clusters):
            # Number events from 1
            event_num = event_idx + 1
            # Create a unique identifier for this event
            event_key = (dataset.tag, event_num)
            # ============================================================================>
            # Create a point cluster object
            # ============================================================================>
            point_cluster = PointCluster(id=event_key,
                                         points=event_points,
                                         values=event_values)
            # ============================================================================>
            # Estimate the background correction of the detected feature
            # ============================================================================>
            # Extract sites for this cluster and estimate the background correction for the event
            log_strs.append('----------------------------------->>>')
            log_strs.append(
                'Estimating Event {!s} Background Correction'.format(
                    event_num))
            # Generate custom grid mask for this dataset
            event_mask = GridMask(parent=grid,
                                  sites_cart=grid.grid2cart(
                                      point_cluster.points, origin_shift=True),
                                  max_dist=2.0,
                                  min_dist=0.0)
            log_strs.append(
                '=> Event sites ({!s} points) expanded to {!s} points'.format(
                    len(point_cluster.points),
                    len(event_mask.outer_mask_indices())))
            # Select masks to define regions for bdc calculation
            exp_event_idxs = flex.size_t(event_mask.outer_mask_indices())
            reference_idxs = flex.size_t(
                grid.global_mask().inner_mask_indices())
            # ============================================================================>
            # Generate BDC-estimation curve and estimate BDC
            # ============================================================================>
            event_remains, event_corrs, global_corrs = calculate_varying_bdc_correlations(
                ref_map_data=avrg_map_data,
                query_map_data=dset_map_data,
                feature_idxs=exp_event_idxs,
                reference_idxs=reference_idxs,
                min_remain=1.0 - args.params.background_correction.max_bdc,
                max_remain=1.0 - args.params.background_correction.min_bdc,
                bdc_increment=args.params.background_correction.increment,
                verbose=verbose)
            event_remain_est = calculate_maximum_series_discrepancy(
                labels=event_remains,
                series_1=global_corrs,
                series_2=event_corrs)
            analyse_graphs.write_occupancy_graph(
                f_name=dataset.file_manager.get_file('bdc_est_png').format(
                    event_num),
                x_values=event_remains,
                global_values=global_corrs,
                local_values=event_corrs)
            log_strs.append(
                '=> Event Background Correction estimated as {!s}'.format(
                    1 - event_remain_est))
            # Reporting (log is normally silenced)
            blob_finder.log('Min-Max: {} {}'.format(
                1.0 - args.params.background_correction.max_bdc,
                1.0 - args.params.background_correction.min_bdc))
            blob_finder.log('Event number: {}'.format(event_num))
            blob_finder.log('Event Remains: {}'.format(','.join(
                map(str, event_remains))))
            blob_finder.log('Event Corrs:  {}'.format(','.join(
                map(str, event_corrs))))
            blob_finder.log('Global Corrs: {}'.format(','.join(
                map(str, global_corrs))))
            # Apply multiplier if provided
            blob_finder.log('Applying multiplier to output 1-BDC: {}'.format(
                args.params.background_correction.output_multiplier))
            event_remain_est = min(
                event_remain_est *
                args.params.background_correction.output_multiplier,
                1.0 - args.params.background_correction.min_bdc)
            # ============================================================================>
            # Calculate the map correlations at the selected BDC
            # ============================================================================>
            event_map_data = calculate_bdc_subtracted_map(
                ref_map_data=avrg_map_data,
                query_map_data=dset_map_data,
                bdc=1.0 - event_remain_est)
            global_corr = numpy.corrcoef(
                event_map_data.select(reference_idxs),
                avrg_map_data.select(reference_idxs))[0, 1]
            local_corr = numpy.corrcoef(
                event_map_data.select(exp_event_idxs),
                avrg_map_data.select(exp_event_idxs))[0, 1]
            # ============================================================================>
            # Write out EVENT map (in the reference frame) and grid masks
            # ============================================================================>
            if args.output.developer.write_reference_frame_maps:
                event_map = dataset_map.new_from_template(event_map_data,
                                                          sparse=False)
                event_map.to_file(
                    filename=dataset.file_manager.get_file('event_map').format(
                        event_num, event_remain_est),
                    space_group=grid.space_group())
            if args.output.developer.write_reference_frame_grid_masks:
                grid.write_indices_as_map(
                    indices=event_mask.outer_mask_indices(),
                    f_name=dataset.file_manager.get_file('grid_mask').replace(
                        '.ccp4', '') + '-event-mask-{}.ccp4'.format(event_num))

            # ============================================================================>
            # Find the nearest atom to the event
            # ============================================================================>
            atm = find_nearest_atoms(atoms=list(
                protein(dataset.model.hierarchy).atoms_with_labels()),
                                     query=dataset.model.alignment.ref2nat(
                                         grid.grid2cart(sites_grid=[
                                             map(int, point_cluster.centroid)
                                         ],
                                                        origin_shift=True)))[0]
            log_strs.append(
                '=> Nearest Residue to event: Chain {}, Residue {} {}'.format(
                    atm.chain_id, atm.resname, atm.resid()))
            # ============================================================================>
            # Create an event object
            # ============================================================================>
            event_obj = Event(id=point_cluster.id, cluster=point_cluster)
            event_obj.info.estimated_pseudo_occupancy = event_remain_est
            event_obj.info.estimated_bdc = 1.0 - event_remain_est
            event_obj.info.global_correlation = global_corr
            event_obj.info.local_correlation = local_corr
            # ============================================================================>
            # Append to dataset handler
            # ============================================================================>
            dataset.events.append(event_obj)

        # ============================================================================>
        # Write out pymol script to load all of the maps easily
        # ============================================================================>
        pml = PythonScript()
        pml.set_normalise_maps(False)
        # Load Structures
        name = pml.load_pdb(
            f_name=dataset.file_manager.get_file('aligned_model'))
        pml.repr_as(obj=name, style='sticks')
        name = pml.load_pdb(
            f_name=dataset.file_manager.get_file('symmetry_copies'))
        pml.repr_hide(obj=name)
        # Load Sampled Map
        name = pml.load_map(
            f_name=dataset.file_manager.get_file('sampled_map'))
        mesh = pml.make_mesh(obj=name, contour_level=1.0, colour='blue')
        # Load Z-maps
        name = pml.load_map(f_name=dataset.file_manager.get_file('z_map'))
        mesh = pml.make_mesh(obj=name,
                             mesh_suffix='.plus',
                             contour_level=3.0,
                             colour='green')
        mesh = pml.make_mesh(obj=name,
                             mesh_suffix='.mins',
                             contour_level=-3.0,
                             colour='red')
        # Load Event maps
        for f in sorted(
                glob.glob(
                    dataset.file_manager.get_file('event_map').format(
                        '*', '*'))):
            name = pml.load_map(f_name=f)
            mesh = pml.make_mesh(obj=name,
                                 contour_level=float(f.split('_')[-2]),
                                 colour='hotpink')
        # Load Miscellaneous maps (e.g. masks)
        for f in sorted(
                glob.glob(
                    os.path.join(dataset.file_manager.get_dir('root'),
                                 '*mask*.ccp4'))):
            name = pml.load_map(f_name=f)
            mesh = pml.make_mesh(obj=name, contour_level=0.0, colour='grey')

        pml.write_script(f_name=dataset.file_manager.get_file('pymol_script'),
                         overwrite=True)

        return (dataset, dataset_map.meta, log_strs)

Example #3

Show file

def run(params):

    # Validate input files
    if not (params.input.pdb or params.input.mtz):
        raise Sorry(
            'No pdb/mtz files have been provided: specify with input.pdb or input.mtz'
        )
    # Check and create output directory
    if not params.output.out_dir:
        raise Sorry(
            'No output directory has been specified: specify with output.out_dir'
        )
    if not os.path.exists(params.output.out_dir):
        os.mkdir(params.output.out_dir)
    # Define and create image directory
    img_dir = os.path.join(params.output.out_dir, 'dendrograms')
    if not os.path.exists(img_dir):
        os.mkdir(img_dir)

    # Create log object
    log = Log(log_file=params.output.out_dir + '.clustering.log', verbose=True)

    # Define output_file_function to copy or symlink files as needed
    if params.output.file_mode == 'symlink':
        out_file_func = os.symlink
    elif params.output.file_mode == 'copy':
        out_file_func = shutil.copy

    log.heading('Processing input pdb/mtz files')
    log('Making dataset labels for {} pdb(s) and {} mtz(s)'.format(
        len(params.input.pdb), len(params.input.mtz)))

    try:
        if params.input.labels.pdb_label == 'filename':
            p_labels = [
                os.path.basename(os.path.splitext(f)[0])
                for f in params.input.pdb
            ]
        elif params.input.labels.pdb_label == 'foldername':
            p_labels = [
                os.path.basename(os.path.dirname(f)) for f in params.input.pdb
            ]
        elif params.input.labels.pdb_regex:
            p_labels = [
                re.findall(params.input.labels.pdb_regex, f)[0]
                for f in params.input.pdb
            ]
        else:
            p_labels = [
                'PDB-{:06d}'.format(i) for i in range(len(params.input.pdb))
            ]
        if params.input.labels.mtz_label == 'filename':
            m_labels = [
                os.path.basename(os.path.splitext(f)[0])
                for f in params.input.mtz
            ]
        elif params.input.labels.mtz_label == 'foldername':
            m_labels = [
                os.path.basename(os.path.dirname(f)) for f in params.input.mtz
            ]
        elif params.input.labels.mtz_regex:
            m_labels = [
                re.findall(params.input.labels.mtz_regex, f)[0]
                for f in params.input.mtz
            ]
        else:
            m_labels = [
                'MTZ-{:06d}'.format(i) for i in range(len(params.input.mtz))
            ]
    except:
        print 'Error reading file: {}'.format(f)
        raise

    # Check labels are unique
    set_m_labels = set(m_labels)
    set_p_labels = set(p_labels)
    if len(set_m_labels) != len(m_labels):
        raise Sorry('MTZ labels are not unique. Repeated labels: {}'.format(
            ' '.join([
                '{}'.format(l) for l in set_m_labels if m_labels.count(l) != 1
            ])))
    if len(set_p_labels) != len(p_labels):
        raise Sorry('PDB labels are not unique. Repeated labels: {}'.format(
            ' '.join([l for l in set_p_labels if p_labels.count(l) != 1])))

    # Report labels
    if p_labels:
        log.subheading('PDB Labels')
        log(', '.join(p_labels))
    if m_labels:
        log.subheading('MTZ Labels')
        log(', '.join(m_labels))

    # Load crystal summaries
    log.bar(True, True)
    log('Reading data for {} pdb(s) and {} mtz(s)'.format(
        len(params.input.pdb), len(params.input.mtz)))

    if params.input.pdb:
        pdb_summaries = [
            CrystalSummary.from_pdb(pdb_file=f, id=lab)
            for f, lab in zip(params.input.pdb, p_labels)
        ]
    else:
        pdb_summaries = []
    if params.input.mtz:
        mtz_summaries = [
            CrystalSummary.from_mtz(mtz_file=f, id=lab)
            for f, lab in zip(params.input.mtz, m_labels)
        ]
    else:
        mtz_summaries = []

    # Group by SpaceGroup
    log.subheading('Grouping {} crystals by space group...'.format(
        len(pdb_summaries + mtz_summaries)))
    crystal_groups = CrystalGroup.by_space_group(crystals=pdb_summaries +
                                                 mtz_summaries)
    log('Grouped crystals into {} space groups'.format(len(crystal_groups)))

    log.heading('Analysing variation of unit cells for each space group')

    for cg in crystal_groups:

        sg_name = 'sg-{}'.format(cg.space_groups[0].split(' (')[0].replace(
            ' ', '_'))

        log.subheading('Space Group {}: {} dataset(s)'.format(
            cg.space_groups[0], len(cg.crystals)))

        log('Unit Cell Variation:')
        log(numpy.round(cg.uc_stats.as_pandas_table().T, 2))

        log('')
        log('Making unit cell dendrogram for all crystals with this spacegroup'
            )
        if len(cg.crystals) > 1:
            cg.dendrogram(fname=os.path.join(img_dir,
                                             '{}-all.png'.format(sg_name)),
                          xlab='Crystal',
                          ylab='Linear Cell Variation',
                          annotate_y_min=params.clustering.label_nodes_above)

        log('')
        log('Clustering {} unit cells...'.format(len(cg.crystals)))
        sg_crystal_groups = cg.by_unit_cell(
            cg.crystals, cutoff=params.clustering.lcv_cutoff)
        log('Clustered crystals into {} groups'.format(len(sg_crystal_groups)))

        for i_cg2, cg2 in enumerate(sg_crystal_groups):

            cluster_name = '{}-cluster-{}'.format(sg_name, i_cg2 + 1)

            log.bar(True, False)
            log('Processing cluster: {}'.format(cluster_name))
            log.bar(False, True)

            log('Unit Cell Variation:')
            log(numpy.round(cg.uc_stats.as_pandas_table().T, 2))

            log('')
            log('Making unit cell dendrogram for this cluster of crystals')
            if len(cg2.crystals) > 1:
                cg2.dendrogram(
                    fname=os.path.join(img_dir, '{}.png'.format(cluster_name)),
                    xlab='Crystal',
                    ylab='Linear Cell Variation',
                    ylim=(0, params.clustering.lcv_cutoff),
                    annotate_y_min=params.clustering.label_nodes_above)

            log('Copying files to output directory')

            # Go through and link the datasets for each of the spacegroups into a separate folder
            sub_dir = os.path.join(params.output.out_dir, cluster_name)
            if not os.path.exists(sub_dir): os.mkdir(sub_dir)

            # Split the mtzs and pdbs into separate directories -- or not
            if params.output.split_pdbs_and_mtzs:
                mtz_dir = os.path.join(sub_dir, 'mtzs')
                if not os.path.exists(mtz_dir): os.mkdir(mtz_dir)
                pdb_dir = os.path.join(sub_dir, 'pdbs')
                if not os.path.exists(pdb_dir): os.mkdir(pdb_dir)
            else:
                mtz_dir = pdb_dir = sub_dir

            for c in cg2.crystals:
                # Set parameters based on pdb or mtz
                if c.mtz_file:
                    sub_sub_dir = os.path.join(mtz_dir, c.id)
                    def_file = os.path.abspath(c.mtz_file)
                    def_suff = '.mtz'
                    pos_suff = '.pdb'
                elif c.pdb_file:
                    sub_sub_dir = os.path.join(pdb_dir, c.id)
                    def_file = os.path.abspath(c.pdb_file)
                    def_suff = '.pdb'
                    pos_suff = '.mtz'
                # Create subdirectory
                if not os.path.exists(sub_sub_dir): os.mkdir(sub_sub_dir)
                # Output file base template
                out_base = os.path.join(sub_sub_dir, c.id)
                # Export file
                out_file = out_base + def_suff
                if not os.path.exists(out_file):
                    out_file_func(def_file, out_file)
                # output other as well if filenames are the same
                pos_file = def_file.replace(def_suff, pos_suff)
                out_file = out_base + pos_suff
                if os.path.exists(pos_file) and not os.path.exists(out_file):
                    out_file_func(pos_file, out_file)

    log.heading('finished')

Example #4

Show file

File: quick_refine.py Project: nelse003/pandda

def run(params):

    # Identify any existing output directories
    current_dirs = sorted(glob.glob(params.output.dir_prefix + '*'))
    if not current_dirs:
        next_int = 1
    else:
        current_nums = [
            s.replace(params.output.dir_prefix, '') for s in current_dirs
        ]
        next_int = sorted(map(int, current_nums))[-1] + 1

    # Create output directory name from int
    out_dir = params.output.dir_prefix + '{:04}'.format(next_int)
    # Create output directory
    os.mkdir(out_dir)

    # Create log object
    log = Log(log_file=os.path.join(
        out_dir, params.output.out_prefix + '.quick-refine.log'),
              verbose=params.settings.verbose)

    # Report
    if current_dirs:
        log('Found existing refinement directories: \n\t{}'.format(
            '\n\t'.join(current_dirs)))
        log('')
    log('Creating new output directory: {}'.format(out_dir))

    # Validate input parameters
    log.subheading('Validating input parameters')
    assert params.input.pdb is not None, 'No PDB given for refinement'
    assert params.input.mtz is not None, 'No MTZ given for refinement'

    if os.path.islink(params.input.mtz):
        log('Converting mtz path to real path:')
        log('{} -> {}'.format(params.input.mtz,
                              os.path.realpath(params.input.mtz)))
        params.input.mtz = os.path.realpath(params.input.mtz)

    # Link input
    log('Copying/linking files to refinement folder')
    shutil.copy(params.input.pdb,
                os.path.abspath(os.path.join(out_dir, 'input.pdb')))
    rel_symlink(params.input.mtz,
                os.path.abspath(os.path.join(out_dir, 'input.mtz')))
    # Copy parameter file to output folder
    if params.input.params:
        shutil.copy(params.input.params,
                    os.path.abspath(os.path.join(out_dir, 'input.params')))

    # Create output prefixes
    output_prefix = os.path.join(out_dir, params.output.out_prefix)
    log('Real output file path prefixes: {}'.format(output_prefix))
    log('Link output file path prefixes: {}'.format(params.output.link_prefix))

    # Create command objects
    log.subheading('Preparing command line input for refinement program')

    # PHENIX
    if params.options.program == 'phenix':
        cm = CommandManager('phenix.refine')
        # Command line args
        cm.add_command_line_arguments([params.input.pdb, params.input.mtz])
        cm.add_command_line_arguments(
            ['output.prefix={}'.format(output_prefix)])
        if params.input.cif:
            cm.add_command_line_arguments(params.input.cif)
        if params.input.params and os.path.exists(params.input.params):
            cm.add_command_line_arguments([params.input.params])

    # REFMAC
    elif params.options.program == 'refmac':
        cm = CommandManager('refmac5')
        # Command line args
        cm.add_command_line_arguments(
            ['xyzin', params.input.pdb, 'hklin', params.input.mtz])

        cm.add_command_line_arguments([
            'xyzout', output_prefix + '.pdb', 'hklout', output_prefix + '.mtz'
        ])
        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(['libin', cif])
        # Standard input
        if params.input.params:
            cm.add_standard_input(open(params.input.params).read().split('\n'))

        cm.add_standard_input(['END'])

    elif params.options.program == "buster":
        cm = CommandManager('refine')
        # Command line arguments
        # inputs
        cm.add_command_line_arguments(
            ['-p', params.input.pdb, '-m', params.input.mtz, '-d', out_dir])

        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(['-l', cif])

        if params.input.params:
            cm.add_command_line_arguments(['-Gelly', params.input.params])

    # Pass additional command line arguments?
    if params.input.args:
        cm.add_command_line_arguments(params.input.args)

    # Report
    log(str(cm))

    log.bar()
    log('running refinement... ({})'.format(cm.program[0]))
    out = cm.run()

    log.subheading('Refinement output')
    if not log.verbose:
        log('output written to log file ({} lines)'.format(
            cm.output.count('\n')))

    log('\n' + cm.output, show=False)

    if out != 0:
        log.subheading('Refinement Errors')
        log(cm.error)

    log.subheading('Post-processing output files')

    if params.options.program == "buster":
        log.subheading('Renaming buster output files')

        shutil.move(src=os.path.join(out_dir, 'refine.pdb'),
                    dst=output_prefix + '.pdb')

        shutil.move(src=os.path.join(out_dir, 'refine.mtz'),
                    dst=output_prefix + '.mtz')

    # Find output files
    try:
        real_pdb = glob.glob(output_prefix + '*.pdb')[0]
        real_mtz = glob.glob(output_prefix + '*.mtz')[0]
    except:
        log('Refinement has failed - output files do not exist')
        log('{}: {}'.format(output_prefix + '*.pdb',
                            glob.glob(output_prefix + '*.pdb')))
        log('{}: {}'.format(output_prefix + '*.mtz',
                            glob.glob(output_prefix + '*.mtz')))
        raise

    # List of links to make at the end of the run
    link_file_pairs = [(real_pdb, params.output.link_prefix + '.pdb'),
                       (real_mtz, params.output.link_prefix + '.mtz')]

    # Split conformations
    if params.options.split_conformations:
        params.split_conformations.settings.verbose = params.settings.verbose
        log.subheading('Splitting refined structure conformations')
        # Running split conformations
        out_files = split_conformations.split_conformations(
            filename=real_pdb, params=params.split_conformations, log=log)
        # Link output files to top
        for real_file in out_files:
            link_file = params.output.link_prefix + os.path.basename(
                real_file.replace(os.path.splitext(real_pdb)[0], ''))
            link_file_pairs.append([real_file, link_file])

    # Link output files
    log.subheading('linking output files')
    for real_file, link_file in link_file_pairs:
        log('Linking {} -> {}'.format(link_file, real_file))
        if not os.path.exists(real_file):
            log('file does not exist: {}'.format(real_file))
            continue
        if os.path.exists(link_file) and os.path.islink(link_file):
            log('removing existing link: {}'.format(link_file))
            os.unlink(link_file)
        if not os.path.exists(link_file):
            rel_symlink(real_file, link_file)

    log.heading('finished - refinement')

Example #5

Show file

def run(params):
    # Identify any existing output directories
    current_dirs = sorted(glob.glob(params.output.dir_prefix + "*"))
    if not current_dirs:
        next_int = 1
    else:
        current_nums = [
            s.replace(params.output.dir_prefix, "") for s in current_dirs
        ]
        next_int = sorted(map(int, current_nums))[-1] + 1

    # Create output directory name from int
    out_dir = params.output.dir_prefix + "{:04}".format(next_int)
    # Create output directory
    os.mkdir(out_dir)

    # Create log object
    log = Log(
        log_file=os.path.join(out_dir,
                              params.output.out_prefix + ".quick-refine.log"),
        verbose=params.settings.verbose,
    )

    # Report
    if current_dirs:
        log("Found existing refinement directories: \n\t{}".format(
            "\n\t".join(current_dirs)))
        log("")
    log("Creating new output directory: {}".format(out_dir))

    # Validate input parameters
    log.subheading("Validating input parameters")
    assert params.input.pdb is not None, "No PDB given for refinement"
    assert params.input.mtz is not None, "No MTZ given for refinement"

    if os.path.islink(params.input.mtz):
        log("Converting mtz path to real path:")
        log("{} -> {}".format(params.input.mtz,
                              os.path.realpath(params.input.mtz)))
        params.input.mtz = os.path.realpath(params.input.mtz)

    # Link input
    log("Copying/linking files to refinement folder")
    shutil.copy(params.input.pdb,
                os.path.abspath(os.path.join(out_dir, "input.pdb")))
    rel_symlink(params.input.mtz,
                os.path.abspath(os.path.join(out_dir, "input.mtz")))
    # Copy parameter file to output folder
    if params.input.params:
        shutil.copy(params.input.params,
                    os.path.abspath(os.path.join(out_dir, "input.params")))

    # Create output prefixes

    output_prefix = out_dir

    log("Real output file path prefixes: {}".format(output_prefix))
    log("Link output file path prefixes: {}".format(params.output.link_prefix))

    # Create command objects
    log.subheading("Preparing command line input for refinement program")

    # PHENIX
    if params.options.program == "phenix":
        cm = CommandManager("phenix.refine")
        # Command line args
        cm.add_command_line_arguments([params.input.pdb, params.input.mtz])
        cm.add_command_line_arguments(
            ["output.prefix={}".format(output_prefix)])
        if params.input.cif:
            cm.add_command_line_arguments(params.input.cif)
        if params.input.params and os.path.exists(params.input.params):
            cm.add_command_line_arguments([params.input.params])

    # REFMAC
    elif params.options.program == "refmac":
        cm = CommandManager("refmac5")
        # Command line args
        cm.add_command_line_arguments(
            ["xyzin", params.input.pdb, "hklin", params.input.mtz])
        cm.add_command_line_arguments([
            "xyzout", output_prefix + ".pdb", "hklout", output_prefix + ".mtz"
        ])
        if params.input.cif:
            for cif in params.input.cif:
                cm.add_command_line_arguments(["libin", cif])
        # Standard input
        if params.input.params:
            cm.add_standard_input(open(params.input.params).read().split("\n"))

        cm.add_standard_input(["END"])

    # Pass additional command line arguments?
    if params.input.args:
        cm.add_command_line_arguments(params.input.args)

    # Report
    log(str(cm))

    log.bar()
    log("running refinement... ({})".format(cm.program[0]))
    out = cm.run()

    log.subheading("Refinement output")
    if not log.verbose:
        log("output written to log file ({} lines)".format(
            cm.output.count("\n")))

    log("\n" + cm.output, show=False)

    if out != 0:
        log.subheading("Refinement Errors")
        log(cm.error)

    log.subheading("Post-processing output files")

    # Find output files
    try:
        real_pdb = os.path.join(output_prefix,
                                params.output.out_prefix + ".pdb")
        real_mtz = os.path.join(output_prefix,
                                params.output.out_prefix + ".mtz")

        print(real_pdb, "\n", real_mtz)

    except:
        log("Refinement has failed - output files do not exist")
        log("{}: {}".format(output_prefix + "*.pdb",
                            glob.glob(output_prefix + "*.pdb")))
        log("{}: {}".format(output_prefix + "*.mtz",
                            glob.glob(output_prefix + "*.mtz")))
        raise

    # List of links to make at the end of the run
    link_file_pairs = [
        (real_pdb, params.output.link_prefix + ".pdb"),
        (real_mtz, params.output.link_prefix + ".mtz"),
    ]

    print(link_file_pairs)

    # Split conformations
    if params.options.split_conformations:
        params.split_conformations.settings.verbose = params.settings.verbose
        log.subheading("Splitting refined structure conformations")
        # Running split conformations
        out_files = split_conformations.split_conformations(
            filename=real_pdb, params=params.split_conformations, log=log)
        # Link output files to top
        for real_file in out_files:
            link_file = params.output.link_prefix + os.path.basename(
                real_file.replace(os.path.splitext(real_pdb)[0], ""))
            link_file_pairs.append([real_file, link_file])

    # Link output files
    log.subheading("linking output files")
    for real_file, link_file in link_file_pairs:
        log("Linking {} -> {}".format(link_file, real_file))
        if not os.path.exists(real_file):
            log("file does not exist: {}".format(real_file))
            continue
        if os.path.exists(link_file) and os.path.islink(link_file):
            log("removing existing link: {}".format(link_file))
            os.unlink(link_file)
        if not os.path.exists(link_file):
            rel_symlink(real_file, link_file)

    log.heading("finished - refinement")

Example #6

Show file

def run(params):

    log = Log(log_file=params.output.log_file, verbose=True)

    # Process MTZs
    if params.input.mtz:

        log.heading('Processing {} MTZ Files'.format(len(params.input.mtz)))

        if   params.input.file_label=='filename':   labels = [os.path.basename(os.path.splitext(f)[0]) for f in params.input.mtz]
        elif params.input.file_label=='foldername': labels = [os.path.basename(os.path.dirname(f)) for f in params.input.mtz]
        else: raise Exception('MTZ labelling function not supported: {}'.format(params.input.file_label))

        log.bar()
        log('Grouping {} mtz files by space group'.format(len(params.input.mtz)))
        crystal_groups = CrystalGroup.by_space_group(crystals=[CrystalSummary.from_mtz(mtz_file=f, id=lab) for f,lab in zip(params.input.mtz, labels)])
        log('> Clustered into {} space group(s)'.format(len(crystal_groups)))
        log.bar()

        for cg in crystal_groups:

            log.subheading('Space group {} - {} datasets'.format(','.join(cg.space_groups), len(cg.crystals)))

            error = False
            for c in cg.crystals:
                for label in params.check_for.column_label:
                    if label is None: continue
                    if label not in c.column_labels:
                        log('Checking: column "{}" not in diffraction data of {}. columns present are {}'.format(label, c.mtz_file, c.column_labels))
                for label in params.summary.column_label:
                    if label is None: continue
                    if label not in c.column_labels:
                        log('Required: column "{}" not in diffraction data of {}. columns present are {}'.format(label, c.mtz_file, c.column_labels))
                        error = True
            if error is True: raise Sorry('There are datasets that do not contain the right columns.')

            log(crystal_statistics('Wavelength',         cg.crystals, value_func=lambda c: c.mtz_object().crystals()[1].datasets()[0].wavelength(), header=True))
            log(crystal_statistics('Resolution (high)',  cg.crystals, value_func=lambda c: c.high_res,                                              header=False))
            log(crystal_statistics('Resolution (low)',   cg.crystals, value_func=lambda c: c.low_res,                                               header=False))
            log(crystal_statistics('Unit cell - vol',    cg.crystals, value_func=lambda c: c.unit_cell.volume(),                                    header=False))
            log(crystal_statistics('Unit cell - a',      cg.crystals, value_func=lambda c: c.unit_cell.parameters()[0],                             header=False))
            log(crystal_statistics('Unit cell - b',      cg.crystals, value_func=lambda c: c.unit_cell.parameters()[1],                             header=False))
            log(crystal_statistics('Unit cell - c',      cg.crystals, value_func=lambda c: c.unit_cell.parameters()[2],                             header=False))
            log(crystal_statistics('Unit cell - alpha',  cg.crystals, value_func=lambda c: c.unit_cell.parameters()[3],                             header=False))
            log(crystal_statistics('Unit cell - beta',   cg.crystals, value_func=lambda c: c.unit_cell.parameters()[4],                             header=False))
            log(crystal_statistics('Unit cell - gamma',  cg.crystals, value_func=lambda c: c.unit_cell.parameters()[5],                             header=False, footer=True))

            for label in params.summary.column_label:
                if label is None: continue
                log(crystal_statistics('Column: {}'.format(label), cg.crystals, value_func=lambda c: c.mtz_object().get_column(label).n_valid_values(),     header=False, footer=True))

            log.bar(True, False)
            log('Smallest + Largest Values')
            log.bar()

            log(crystal_min_max('Resolution', cg.crystals, value_func=lambda c: c.high_res))

    # Process PDBs
    if params.input.pdb:

        log.heading('Processing {} PDB Files'.format(len(params.input.pdb)))

        if   params.input.file_label=='filename':   labels = [os.path.basename(os.path.splitext(f)[0]) for f in params.input.pdb]
        elif params.input.file_label=='foldername': labels = [os.path.basename(os.path.dirname(f)) for f in params.input.pdb]
        else: raise Exception('PDB labelling function not supported: {}'.format(params.input.file_label))

        log.bar()
        log('Grouping {} pdb files by space group'.format(len(params.input.pdb)))
        crystal_groups = CrystalGroup.by_space_group(crystals=[CrystalSummary.from_pdb(pdb_file=f, id=lab) for f,lab in zip(params.input.pdb, labels)])
        log('> Clustered into {} space group(s)'.format(len(crystal_groups)))

        for cg in crystal_groups:

            log.subheading('Space group: {} - {} datasets'.format(','.join(cg.space_groups), len(cg.crystals)))

            log(crystal_statistics('R-work', cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_work, header=True))
            log(crystal_statistics('R-free', cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_free, header=False, footer=True))

            log.bar(True, False)
            log('Smallest + Largest Values')
            log.bar()

            log(crystal_min_max('R-free',     cg.crystals, value_func=lambda c: c.pdb_input().get_r_rfree_sigma().r_free))

    log.heading('finished')

Example #7

Show file

def split_conformations(filename, params, log=None):

    if log is None: log = Log(verbose=True)

    # Read the pdb header - for writing later...
    header_contents = get_pdb_header(filename)

    # Read in and validate the input file
    ens_obj = strip_pdb_to_input(filename, remove_ter=True)
    ens_obj.hierarchy.only_model()

    # Create a new copy of the structures
    new_ens = ens_obj.hierarchy.deep_copy()

    # Extract conformers from the structure as set
    all_confs = set(ens_obj.hierarchy.altloc_indices())
    all_confs.discard('')

    if params.options.mode == 'by_residue_name':
        sel_resnames = params.options.by_residue_name.resname.split(',')
        sel_confs = [
            ag.altloc for ag in new_ens.atom_groups()
            if (ag.resname in sel_resnames)
        ]
        # List of conformers to output for each structure, and suffixes
        out_confs = map(sorted, [
            all_confs.intersection(sel_confs),
            all_confs.difference(sel_confs)
        ])
        out_suffs = [
            params.options.by_residue_name.selected_name,
            params.options.by_residue_name.unselected_name
        ]
    elif params.options.mode == 'by_conformer':
        sel_resnames = None
        sel_confs = None
        # One structure for each conformer
        out_confs = [[c] for c in sorted(all_confs)]
        out_suffs = [''.join(c) for c in out_confs]
    elif params.options.mode == 'by_conformer_group':
        sel_resnames = None
        sel_confs = None
        # One structure for each set of supplied conformer sets
        out_confs = [
            s.split(',') for s in params.options.by_conformer_group.conformers
        ]
        out_suffs = [''.join(c) for c in out_confs]
    else:
        raise Exception('Invalid selection for options.mode: {}'.format(
            params.options.mode))

    assert len(out_confs) == len(out_suffs), '{} not same length as {}'.format(
        str(out_confs), str(out_suffs))

    for confs, suffix in zip(out_confs, out_suffs):
        log('Conformers {} -> {}'.format(str(confs), suffix))

    # Create paths from the suffixes
    out_paths = [
        '.'.join([
            os.path.splitext(filename)[0], params.output.suffix_prefix, suff,
            'pdb'
        ]) for suff in out_suffs
    ]

    log.subheading('Processing {}'.format(filename[-70:]))

    for this_confs, this_path in zip(out_confs, out_paths):

        if not this_confs: continue

        # Select atoms to keep - no altloc, or altloc in selection
        sel_string = ' or '.join(
            ['altid " "'] + ['altid "{}"'.format(alt) for alt in this_confs])
        # Extract selection from the hierarchy
        sel_hiery = new_ens.select(
            new_ens.atom_selection_cache().selection(sel_string),
            copy_atoms=True)

        log.bar(True, False)
        log('Outputting conformer(s) {} to {}'.format(''.join(this_confs),
                                                      this_path))
        log.bar()
        log('Keeping ANY atom with conformer id: {}'.format(
            ' or '.join(['" "'] + this_confs)))
        log('Selection: \n\t' + sel_string)

        if params.options.pruning.prune_duplicates:
            log.bar()
            log('Pruning redundant conformers')
            # Remove an alternate conformers than are duplicated after selection
            prune_redundant_alternate_conformations(
                hierarchy=sel_hiery,
                required_altlocs=[a for a in sel_hiery.altloc_indices() if a],
                rmsd_cutoff=params.options.pruning.rmsd_cutoff,
                in_place=True,
                verbose=params.settings.verbose)

        if params.options.reset_altlocs:
            log.bar()
            # Change the altlocs so that they start from "A"
            if len(this_confs) == 1:
                conf_hash = {this_confs[0]: ' '}
            else:
                conf_hash = dict(
                    zip(this_confs, iotbx.pdb.systematic_chain_ids()))
            log('Resetting structure altlocs:')
            for k in sorted(conf_hash.keys()):
                log('\t{} -> "{}"'.format(k, conf_hash[k]))
            if params.settings.verbose: log.bar()
            for ag in sel_hiery.atom_groups():
                if ag.altloc in this_confs:
                    if params.settings.verbose:
                        log('{} -> alt {}'.format(Labeller.format(ag),
                                                  conf_hash[ag.altloc]))
                    ag.altloc = conf_hash[ag.altloc]

        if params.options.reset_occupancies:
            log.bar()
            log('Resetting output occupancies (maximum occupancy of 1.0, etc.)'
                )
            # Divide through by the smallest occupancy of any complete residues groups with occupancies of less than one
            rg_occs = [
                calculate_residue_group_occupancy(rg) for rg in
                residue_groups_with_complete_set_of_conformers(sel_hiery)
            ]
            non_uni = [v for v in numpy.unique(rg_occs) if 0.0 < v < 1.0]
            if non_uni:
                div_occ = min(non_uni)
                log('Dividing all occupancies by {}'.format(div_occ))
                sel_hiery.atoms().set_occ(sel_hiery.atoms().extract_occ() /
                                          div_occ)
            # Normalise the occupancies of any residue groups with more than unitary occupancy
            log('Fixing any residues that have greater than unitary occupancy')
            sanitise_occupancies(hierarchy=sel_hiery,
                                 min_occ=0.0,
                                 max_occ=1.0,
                                 in_place=True,
                                 verbose=params.settings.verbose)
            # Perform checks
            max_occ = max([
                calculate_residue_group_occupancy(rg)
                for rg in sel_hiery.residue_groups()
            ])
            log('Maximum occupancy of output structue: {}'.format(max_occ))
            assert max_occ >= 0.0, 'maximum occupancy is less than 0.0?!?!'
            assert max_occ <= 1.0, 'maximum occupancy is greater than 1.0?!?!'

        log.bar()
        log('Writing structure: {}'.format(this_path))
        log.bar(False, True)

        # Write header contents
        with open(this_path, 'w') as fh:
            fh.write(header_contents)
        # Write output file
        sel_hiery.write_pdb_file(this_path, open_append=True)

    return out_paths

Example #8

Show file

def make_occupancy_constraints(params, input_hierarchy, log=None):
    """Create occupancy groups for a hierarchy"""

    if log is None: log = Log(verbose=True)

    log.subheading('Generating occupancy-constrained groups')

    # Ligand resname identifiers
    resnames = params.occupancy.resname.split(',')
    if params.settings.verbose:
        log('Looking for ligands with resname {!s}'.format(
            ' or '.join(resnames)))
        log('')

    # Make occupancy groups
    occupancy_groups = overlapping_occupancy_groups(
        hierarchy=input_hierarchy.hierarchy,
        resnames=resnames,
        group_dist=params.occupancy.group_dist,
        overlap_dist=params.occupancy.overlap_dist,
        complete_groups=params.occupancy.complete_groups,
        exclude_altlocs=params.occupancy.exclude_altlocs.split(',')
        if params.occupancy.exclude_altlocs else [],
        verbose=params.settings.verbose)
    # Record whether the occupancy groups are complete (occs sum to 1)
    if params.occupancy.complete_groups:
        occupancy_complete = [True] * len(occupancy_groups)
    else:
        occupancy_complete = [False] * len(occupancy_groups)

    if not occupancy_groups:
        log('No matching residues were found (no occupancy constraints created)'
            )
        return

    log.bar()
    log('')
    log('Created {} occupancy groups for overlapping conformers'.format(
        len(occupancy_groups)))
    log('')

    # Ref-make the default occupancy groups?
    if params.occupancy.simple_groups:
        log('simple_groups=={}: Remaking default occupancy restraints for residues'
            .format(params.occupancy.simple_groups))
        if params.settings.verbose: log('')
        simple_groups = simple_occupancy_groups(
            hierarchy=input_hierarchy.hierarchy,
            verbose=params.settings.verbose)
        num_alts = len(
            [a for a in input_hierarchy.hierarchy.altloc_indices() if a != ''])
        occupancy_complete += [
            True if len(g) == num_alts else False for g in simple_groups
        ]
        occupancy_groups += simple_groups
        if params.settings.verbose: log('')
        log('Increased number of occupancy groups to {}'.format(
            len(occupancy_groups)))
        log('')

    if params.output.refmac:
        restraint_list = RefmacFormatter.make_occupancy_restraints(
            list_of_lists_of_groups=occupancy_groups,
            group_completeness=occupancy_complete)
        rest_block = RefmacFormatter.format_occupancy_restraints(
            restraint_list=restraint_list)
        with open(params.output.refmac, 'a') as fh:
            fh.write(rest_block + '\n')
        if params.settings.verbose:
            log.subheading('refmac occupancy restraints')
            log(rest_block[:1000] + '...' * (len(rest_block) > 1000))
            log('')

    if params.output.phenix:
        restraint_list = PhenixFormatter.make_occupancy_restraints(
            list_of_lists_of_groups=occupancy_groups,
            group_completeness=occupancy_complete)
        rest_block = PhenixFormatter.format_occupancy_restraints(
            restraint_list=restraint_list)
        with open(params.output.phenix, 'a') as fh:
            fh.write(rest_block + '\n')
        if params.settings.verbose:
            log.subheading('phenix occupancy restraints')
            log(rest_block[:1000] + '...' * (len(rest_block) > 1000))
            log('')

Example #9

Show file

def make_link_records(params, input_hierarchy, link_file, log=None):
    """Create link records to make a continuous peptide chain"""

    if log is None: log = Log(verbose=True)

    log.subheading('Checking the continuity of the protein backbone')

    links, warnings = generate_set_of_alternate_conformer_peptide_links(
        hierarchy=input_hierarchy.hierarchy)

    if warnings:
        log.bar()
        log('WARNINGS:')
        log.bar()
        for w in warnings:
            log(w)
        log.bar()
        log('')

    if (not links) and (not warnings):
        log('No breaks in the backbone - hooray! (nothing needs to be done here)'
            )
        return
    elif (not links):
        log("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
            )
        log("!!! >>> There are breaks in the backbone but I'm not able to do anything to fix them    <<< !!!"
            )
        log("!!! >>> You'll need to check them manually to see if these are going to be a problem... <<< !!!"
            )
        log("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
            )
        return

    link_block = '\n'.join([
        format_link_record(atom_1=a1,
                           atom_2=a2,
                           chain_id_1=c1,
                           chain_id_2=c2,
                           link_type=lt) for a1, a2, c1, c2, lt in links
    ])

    log('Need to apply {} links to make the backbone continuous:'.format(
        len(links)))
    log('')
    log(link_block)
    log('')

    log('Writing hierarchy with new link records to {}'.format(link_file))
    log('(This file can only be used for refinement with REFMAC)')
    log('')
    log('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
        )
    log('!!! ALTHOUGH THE FILE WITH BACKBONE LINKS HAS BEEN OUTPUT, IT SHOULD BE USED WITH CAUTION !!!'
        )
    log('!!!   THE CONNECTION OF ALTERNATE CONFORMATIONS OF THE BACKBONE IS GENERALLY "INCORRECT"  !!!'
        )
    log('!!!          THERE SHOULD BE A VERY GOOD REASON FOR THESE RESTRAINTS TO BE USED           !!!'
        )
    log('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
        )

    input_hierarchy.hierarchy.write_pdb_file(
        file_name=link_file,
        crystal_symmetry=input_hierarchy.crystal_symmetry(),
        link_records=link_block)