예제 #1
0
 def get_program_name(self):
   software_name = self.cif_block.get('_software.name')
   software_classification = self.cif_block.get('_software.classification')
   if isinstance(software_classification, string_types):
     if software_classification == 'refinement':
       return software_name
   elif software_classification is not None:
     i = flex.first_index(software_classification, 'refinement')
     if i >= 0: return software_name[i]
예제 #2
0
 def get_program_name(self):
   software_name = self.cif_block.get('_software.name')
   software_classification = self.cif_block.get('_software.classification')
   if (isinstance(software_classification, basestring) and
       software_classification == 'refinement'):
     return software_name
   if software_classification is not None:
     i = flex.first_index(software_classification, 'refinement')
     if i >= 0: return software_name[i]
예제 #3
0
 def deposition_date(self):
     # date format: yyyy-mm-dd
     cif_block = self.cif_model.values()[0]
     rev_num = cif_block.get('_database_PDB_rev.num')
     if rev_num is not None:
         date_original = cif_block.get('_database_PDB_rev.date_original')
         if isinstance(rev_num, basestring):
             return date_original
         else:
             i = flex.first_index(rev_num, '1')
             if date_original is not None:
                 return date_original[i]
예제 #4
0
 def deposition_date(self):
   # date format: yyyy-mm-dd
   cif_block = self.cif_model.values()[0]
   rev_num = cif_block.get('_database_PDB_rev.num')
   if rev_num is not None:
     date_original = cif_block.get('_database_PDB_rev.date_original')
     if isinstance(rev_num, basestring):
       return date_original
     else:
       i = flex.first_index(rev_num, '1')
       if date_original is not None:
         return date_original[i]
예제 #5
0
 def had_phase_transition(self):
   if len(self.differences) < 5: return False
   i_max = flex.max_index(self.differences)
   noise_before = (self.differences
                   < self.noise_level_before*self.differences[i_max])
   before = flex.last_index(noise_before[:i_max], True)
   if before is None: before = -1
   before += 1
   if i_max - before < 4: return False
   negative_after = self.differences < 0
   after = flex.first_index(negative_after[i_max:], True)
   if after is None: return False
   after += i_max
   if after - before < 10: return False
   if len(self.values) - after < 10: return False
   tail_stats = scitbx.math.basic_statistics(self.differences[-5:])
   if (tail_stats.max_absolute
       > self.noise_level_after*self.differences[i_max]): return False
   return True
예제 #6
0
 def had_phase_transition(self):
     if len(self.differences) < 5: return False
     i_max = flex.max_index(self.differences)
     noise_before = (self.differences <
                     self.noise_level_before * self.differences[i_max])
     before = flex.last_index(noise_before[:i_max], True)
     if before is None: before = -1
     before += 1
     if i_max - before < 4: return False
     negative_after = self.differences < 0
     after = flex.first_index(negative_after[i_max:], True)
     if after is None: return False
     after += i_max
     if after - before < 10: return False
     if len(self.values) - after < 10: return False
     tail_stats = scitbx.math.basic_statistics(self.differences[-5:])
     if (tail_stats.max_absolute >
             self.noise_level_after * self.differences[i_max]):
         return False
     return True
예제 #7
0
def run(args):
    if len(args) == 0:
        args = ["1hbb"]

    for arg in args:
        import iotbx.pdb.fetch

        if os.path.isfile(arg):
            mmcif_file = arg
            pdb_id = os.path.splitext(os.path.basename(mmcif_file))[0]
            iotbx.pdb.fetch.validate_pdb_id(pdb_id)
        else:
            # download pdbx/mmcif file from the PDB
            pdb_id = arg
            mirror = "pdbe"
            mmcif_file = iotbx.pdb.fetch.get_pdb(pdb_id,
                                                 data_type="pdb",
                                                 mirror=mirror,
                                                 log=sys.stdout,
                                                 format="cif")

        # read the cif file and get an iotbx.cif object
        import iotbx.cif
        cif_reader = iotbx.cif.reader(file_path=mmcif_file)
        cif_object = cif_reader.model()
        cif_block = cif_object[pdb_id]
        # get single items from cif_block
        print("PDB id:", cif_block["_entry.id"])
        # get a looped item from cif_block
        print("Authors:")
        for author in cif_block.get_looped_item("_citation_author.name"):
            print(author)
        print()
        print("Molecular Entities:")
        for pdbx_entity in cif_block.get_looped_item(
                "_entity.pdbx_description"):
            print(pdbx_entity)
        print()

        # extract crystal symmetry information
        import iotbx.cif.builders
        builder = iotbx.cif.builders.crystal_symmetry_builder(cif_block)
        builder.crystal_symmetry.show_summary()

        # 1) this works also for .pdb files, but re-reads the file
        import iotbx.pdb
        pdb_input = iotbx.pdb.input(file_name=mmcif_file)
        hierarchy = pdb_input.construct_hierarchy()

        # 2) This only works for mmcif files, but re-uses the cif_object from above:
        import iotbx.pdb.mmcif
        pdb_input = iotbx.pdb.mmcif.cif_input(cif_object=cif_object)
        hierarchy = pdb_input.construct_hierarchy()

        # some convenience methods of pdb_input object
        print("Software:", pdb_input.get_program_name())
        print("Experiment type:", pdb_input.get_experiment_type())
        print("Solvent content:", pdb_input.get_solvent_content())
        print("Deposition date:", pdb_input.deposition_date())
        r_rfree_sigma = pdb_input.get_r_rfree_sigma(mmcif_file)
        print("R-work/R-free: %s/%s" %
              (r_rfree_sigma.r_work, r_rfree_sigma.r_free))
        # can also get crystal_symmetry from pdb_input object
        crystal_symmetry = pdb_input.crystal_symmetry()

        print()
        hierarchy.overall_counts().show()
        # level_id can be "model", "chain", "residue_group", "atom_group" or "atom"
        hierarchy.show(level_id="chain")
        # for a more detailed example of interacting with a pdb.hierarchy object,
        # see iotbx/examples/pdb_hierarchy.py

        # extract atom sites
        atoms = hierarchy.atoms()
        sites_cart = atoms.extract_xyz()
        print()
        for i in range(10):
            print(atoms[i].id_str(), atoms[i].xyz)
        print()

        # read some sequence information
        entity_poly_entity_id = cif_block.get_looped_item(
            "_entity_poly.entity_id")
        entity_id = cif_block.get_looped_item("_entity.id")
        entity_pdbx_description = cif_block.get_looped_item(
            "_entity.pdbx_description")
        entity_poly_one_letter_code = cif_block.get_looped_item(
            "_entity_poly.pdbx_seq_one_letter_code")

        from cctbx.array_family import flex
        for i in range(len(entity_poly_one_letter_code)):
            idx = flex.first_index(entity_id, entity_poly_entity_id[i])
            print(entity_id[idx], entity_pdbx_description[i], end=' ')
            print("".join(entity_poly_one_letter_code[i].split()))
예제 #8
0
파일: validation.py 프로젝트: dials/cctbx
 def validate_loop(self, loop, block):
     list_category = None
     for key, value in six.iteritems(loop):
         try:
             definition = self.get_definition(key)
         except KeyError:
             continue
         self.validate_enumeration(key, value, definition)
         self.validate_dependent(key, block, definition)
         self.validate_related(key, block, definition)
         _list = definition.get("_list")
         if self.DDL_version == 1 and _list in ('no', None):
             self.report_error(2501, key=key)  # not allowed in list
         definition_category = definition.category
         if (definition_category is not None
                 and not isinstance(definition_category, string_types)):
             definition_name = definition.name
             i = flex.first_index(definition_name, key)
             definition_category = definition_category[i]
         if list_category is None:
             list_category = definition_category
         elif (isinstance(list_category, string_types)
               and definition_category is not None
               and list_category != definition_category):
             print(list_category, list(definition_category))
             self.report_error(2502, key=key)  # multiple categories in loop
         mandatory = definition.mandatory == 'yes'
         references = definition.get('_list_reference')
         if references is not None:
             if isinstance(references, string_types):
                 references = [references]
             for reference in references:
                 try:
                     ref_data = self.get_definition(reference)
                 except KeyError:
                     ref_data = self.get_definition(key)
                 ref_names = ref_data['_name']
                 if isinstance(ref_names, string_types):
                     ref_names = [ref_names]
                 for name in ref_names:
                     if name not in loop:
                         self.report_error(
                             2505, key=key,
                             reference=name)  # missing _list_reference
         elif (self.DDL_version == 2
               and isinstance(definition.category, string_types)):
             category_def = self.get_definition(definition.category)
             if category_def.category_key is not None:
                 category_keys = category_def.category_key
                 if isinstance(category_keys, string_types):
                     category_keys = [category_keys]
                 for cat_key in category_keys:
                     cat_key_def = self.get_definition(cat_key)
                 if (cat_key_def.mandatory == 'yes'
                         and isinstance(cat_key_def.mandatory, string_types)
                         and cat_key_def.name not in block):
                     self.report_error(2203,
                                       key=cat_key_def.name,
                                       category=definition.category)
         #
         link_parent = definition.get('_list_link_parent',
                                      self.child_parent_relations.get(key))
         if link_parent is not None:
             parent_values = loop.get(link_parent, block.get(link_parent))
             if parent_values is not None:
                 for v in loop[key]:
                     if v != '.' and v not in parent_values:
                         # missing parent value
                         self.report_error(2503,
                                           value=v,
                                           child=key,
                                           parent=link_parent)
             else:
                 self.report_error(2504, child=key,
                                   parent=link_parent)  # missing parent
예제 #9
0
    def __init__(self, **kwargs):
        group_args.__init__(self, **kwargs)
        print('finished Dij, now calculating rho_i and density')
        from xfel.clustering import Rodriguez_Laio_clustering_2014 as RL
        R = RL(distance_matrix=self.Dij, d_c=self.d_c)
        #from clustering.plot_with_dimensional_embedding import plot_with_dimensional_embedding
        #plot_with_dimensional_embedding(1-self.Dij/flex.max(self.Dij), show_plot=True)
        if hasattr(self, 'strategy') is False:
            self.strategy = 'default'
        self.rho = rho = R.get_rho()
        ave_rho = flex.mean(rho.as_double())
        NN = self.Dij.focus()[0]
        i_max = flex.max_index(rho)
        delta_i_max = flex.max(
            flex.double([self.Dij[i_max, j] for j in range(NN)]))
        rho_order = flex.sort_permutation(rho, reverse=True)
        rho_order_list = list(rho_order)
        self.delta = delta = R.get_delta(rho_order=rho_order,
                                         delta_i_max=delta_i_max)
        cluster_id = flex.int(NN, -1)  # -1 means no cluster
        delta_order = flex.sort_permutation(delta, reverse=True)
        MAX_PERCENTILE_RHO = self.max_percentile_rho  # cluster centers have to be in the top percentile
        n_cluster = 0
        #
        #
        print('Z_DELTA = ', self.Z_delta)

        pick_top_solution = False
        rho_stdev = flex.mean_and_variance(
            rho.as_double()).unweighted_sample_standard_deviation()
        delta_stdev = flex.mean_and_variance(
            delta).unweighted_sample_standard_deviation()
        if rho_stdev != 0.0 and delta_stdev != 0:
            rho_z = (rho.as_double() -
                     flex.mean(rho.as_double())) / (rho_stdev)
            delta_z = (delta - flex.mean(delta)) / (delta_stdev)
        else:
            pick_top_solution = True
            if rho_stdev == 0.0:
                centroids = [flex.first_index(delta, flex.max(delta))]
            elif delta_stdev == 0.0:
                centroids = [flex.first_index(rho, flex.max(rho))]

        significant_delta = []
        significant_rho = []
        # Define strategy to decide cluster center here. Only one should be true
        debug_fix_clustering = True
        if self.strategy == 'one_cluster':
            debug_fix_clustering = False
            strategy2 = True
        if self.strategy == 'strategy_3':
            debug_fix_clustering = False
            strategy3 = True
            strategy2 = False

        if debug_fix_clustering:
            if not pick_top_solution:
                delta_z_cutoff = min(1.0, max(delta_z))
                rho_z_cutoff = min(1.0, max(rho_z))
                for ic in range(NN):
                    # test the density & rho
                    if delta_z[ic] >= delta_z_cutoff or delta_z[
                            ic] <= -delta_z_cutoff:
                        significant_delta.append(ic)
                    if rho_z[ic] >= rho_z_cutoff or rho_z[ic] <= -rho_z_cutoff:
                        significant_rho.append(ic)
                if True:
                    # Use idea quoted in Rodriguez Laio 2014 paper
                    # " Thus, cluster centers are recognized as points for which the value of delta is anomalously large."
                    centroid_candidates = list(significant_delta)
                    candidate_delta_z = flex.double()
                    for ic in centroid_candidates:
                        if ic == rho_order[0]:
                            delta_z_of_rho_order_0 = delta_z[ic]
                        candidate_delta_z.append(delta_z[ic])
                    i_sorted = flex.sort_permutation(candidate_delta_z,
                                                     reverse=True)
                    # Check that once sorted the top one is not equal to the 2nd or 3rd position
                    # If there is a tie, assign centroid to the first one in rho order
                    centroids = []
                    # rho_order[0] has to be a centroid
                    centroids.append(rho_order[0])

                    #centroids.append(centroid_candidates[i_sorted[0]])
                    for i in range(0, len(i_sorted[:])):
                        if centroid_candidates[i_sorted[i]] == rho_order[0]:
                            continue
                        if delta_z_of_rho_order_0 - candidate_delta_z[
                                i_sorted[i]] > 1.0:
                            if i > 1:
                                if -candidate_delta_z[i_sorted[
                                        i - 1]] + candidate_delta_z[
                                            i_sorted[0]] > 1.0:
                                    centroids.append(
                                        centroid_candidates[i_sorted[i]])
                            else:
                                centroids.append(
                                    centroid_candidates[i_sorted[i]])
                        else:
                            break
                if False:
                    centroid_candidates = list(
                        set(significant_delta).intersection(
                            set(significant_rho)))
                    # Now compare the relative orders of the max delta_z and max rho_z to make sure they are within 1 stdev
                    centroids = []
                    max_delta_z_candidates = -999.9
                    max_rho_z_candidates = -999.9
                    for ic in centroid_candidates:
                        if delta_z[ic] > max_delta_z_candidates:
                            max_delta_z_candidates = delta_z[ic]
                        if rho_z[ic] > max_rho_z_candidates:
                            max_rho_z_candidates = rho_z[ic]
                    for ic in centroid_candidates:
                        if max_delta_z_candidates - delta_z[
                                ic] < 1.0 and max_rho_z_candidates - rho_z[
                                    ic] < 1.0:
                            centroids.append(ic)

            #item_idxs = [delta_order[ic] for ic,centroid in enumerate(centroids)]
            item_idxs = centroids
            for item_idx in item_idxs:
                cluster_id[item_idx] = n_cluster
                print('CLUSTERING_STATS', item_idx, cluster_id[item_idx])
                n_cluster += 1
                ####
        elif strategy2:
            # Go through list of clusters, see which one has highest joint rank in both rho and delta lists
            # This will only assign one cluster center based on highest product of rho and delta ranks
            product_list_of_ranks = []
            for ic in range(NN):
                rho_tmp = self.rho[ic]
                delta_tmp = self.delta[ic]
                product_list_of_ranks.append(rho_tmp * delta_tmp)
            import numpy as np
            item_idx = np.argmax(product_list_of_ranks)
            cluster_id[item_idx] = n_cluster  # Only cluster assigned
            print('CLUSTERING_STATS', item_idx, cluster_id[item_idx])
            n_cluster += 1
        elif strategy3:
            # use product of delta and rho and pick out top candidates
            # have to use a significance z_score to filter out the very best
            product_list_of_ranks = flex.double()
            for ic in range(NN):
                rho_tmp = self.rho[ic]
                delta_tmp = self.delta[ic]
                product_list_of_ranks.append(rho_tmp * delta_tmp)
            import numpy as np
            iid_sorted = flex.sort_permutation(product_list_of_ranks,
                                               reverse=True)
            cluster_id[
                iid_sorted[0]] = n_cluster  # first point always a cluster
            n_cluster += 1
            print('CLUSTERING_STATS S3', iid_sorted[0],
                  cluster_id[iid_sorted[0]])
            #product_list_of_ranks[iid_sorted[0]]=0.0 # set this to 0.0 so that the mean/stdev does not get biased by one point
            stdev = np.std(product_list_of_ranks)
            mean = np.mean(product_list_of_ranks)
            n_sorted = 3
            #if stdev == 0.0:
            #  n_sorted=1

            z_critical = 3.0  # 2 sigma significance ?
            # Only go through say 3-4 datapoints
            # basically there won't be more than 2-3 lattices on an image realistically
            for iid in iid_sorted[1:n_sorted]:
                z_score = (product_list_of_ranks[iid] - mean) / stdev
                if z_score > z_critical:
                    cluster_id[iid] = n_cluster
                    n_cluster += 1
                    print('CLUSTERING_STATS S3', iid, cluster_id[iid])
                else:
                    break  # No point going over all points once below threshold z_score

        else:
            for ic in range(NN):
                item_idx = delta_order[ic]
                if ic != 0:
                    if delta[item_idx] <= 0.25 * delta[
                            delta_order[0]]:  # too low to be a medoid
                        continue
                item_rho_order = rho_order_list.index(item_idx)
                if (item_rho_order) / NN < MAX_PERCENTILE_RHO:
                    cluster_id[item_idx] = n_cluster
                    print('CLUSTERING_STATS', ic, item_idx, item_rho_order,
                          cluster_id[item_idx])
                    n_cluster += 1
        ###


#
        print('Found %d clusters' % n_cluster)
        for x in range(NN):
            if cluster_id[x] >= 0:
                print("XC", x, cluster_id[x], rho[x], delta[x])
        self.cluster_id_maxima = cluster_id.deep_copy()
        R.cluster_assignment(rho_order, cluster_id, rho)
        self.cluster_id_full = cluster_id.deep_copy()

        #halo = flex.bool(NN,False)
        #border = R.get_border( cluster_id = cluster_id )

        #for ic in range(n_cluster): #loop thru all border regions; find highest density
        #  this_border = (cluster_id == ic) & (border==True)
        #  if this_border.count(True)>0:
        #    highest_density = flex.max(rho.select(this_border))
        #    halo_selection = (rho < highest_density) & (this_border==True)
        #    if halo_selection.count(True)>0:
        #      cluster_id.set_selected(halo_selection,-1)
        #    core_selection = (cluster_id == ic) & ~halo_selection
        #    highest_density = flex.max(rho.select(core_selection))
        #    too_sparse = core_selection & (rho.as_double() < highest_density/10.) # another heuristic
        #    if too_sparse.count(True)>0:
        #      cluster_id.set_selected(too_sparse,-1)
        self.cluster_id_final = cluster_id.deep_copy()
예제 #10
0
def get_uc_consensus(experiments_list,
                     show_plot=False,
                     save_plot=False,
                     return_only_first_indexed_model=False,
                     finalize_method='reindex_with_known_crystal_models',
                     clustering_params=None):
    '''
  Uses the Rodriguez Laio 2014 method to do a hierarchical clustering of the crystal models and
  then vote for the highest consensus crystal mode. Input needs to be a list of experiments object.
  Clustering code taken from github.com/cctbx-xfel/cluster_regression
  Clustering is first done first based on unit cell dimensions. Then for each of the clusters identified,
  a further clustering is done based on orientational matrix A
  '''
    if return_only_first_indexed_model:
        return [experiments_list[0].crystals()[0]], None
    cells = []

    from xfel.clustering.singleframe import CellOnlyFrame
    # Flag for testing Lysozyme data from NKS.Make sure cluster_regression repository is present and configured
    # Program will exit after plots are displayed if this flag is true
    test_nks = False
    if clustering_params is None:
        clustering_params = clustering_iota_scope

    if test_nks:
        from cctbx import crystal
        import libtbx.load_env
        cluster_regression = libtbx.env.find_in_repositories(
            relative_path="cluster_regression", test=os.path.isdir)
        file_name = os.path.join(cluster_regression, 'examples',
                                 'lysozyme1341.txt')
        for line in open(file_name, "r").xreadlines():
            tokens = line.strip().split()
            unit_cell = tuple(float(x) for x in tokens[0:6])
            space_group_symbol = tokens[6]
            crystal_symmetry = crystal.symmetry(
                unit_cell=unit_cell, space_group_symbol=space_group_symbol)
            cells.append(CellOnlyFrame(crystal_symmetry))
    else:
        clustered_experiments_list = flex.int()
        for experiment in experiments_list:
            if len(experiment.crystals()) > 1:
                print('IOTA:Should have only one crystal model')
            crystal_symmetry = experiment.crystals()[0].get_crystal_symmetry()
            cells.append(CellOnlyFrame(crystal_symmetry))
            # Maintain a list which is meaningless right now that will finally contain the
            # final clustering results
            clustered_experiments_list.append(-1)
    MM = [c.mm for c in cells]  # metrical matrices
    MM_double = flex.double()
    for i in range(len(MM)):
        Tup = MM[i]
        for j in range(6):
            MM_double.append(Tup[j])
    print('There are %d cells' % len(MM))
    coord_x = flex.double([c.uc[0] for c in cells])
    coord_y = flex.double([c.uc[1] for c in cells])
    if show_plot or save_plot:
        import matplotlib
        if not show_plot:
            matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        plt.plot([c.uc[0] for c in cells], [c.uc[1] for c in cells],
                 "k.",
                 markersize=3.)
        plt.axes().set_aspect("equal")
    if save_plot:
        plot_name = 'uc_cluster.png'
        plt.savefig(plot_name,
                    size_inches=(10, 10),
                    dpi=300,
                    bbox_inches='tight')
    if show_plot:
        plt.show()
    print('Now constructing a Dij matrix: Starting Unit Cell clustering')
    NN = len(MM)
    from cctbx.uctbx.determine_unit_cell import NCDist_flatten
    Dij = NCDist_flatten(MM_double)
    from scitbx.math import five_number_summary
    d_c = clustering_params.d_c  #five_number_summary(list(Dij))[1]
    d_c = estimate_d_c(Dij)
    #d_c = flex.mean_and_variance(Dij.as_1d()).unweighted_sample_standard_deviation()
    print('d_c = ', d_c)
    if len(cells) < 5:
        return [experiments_list[0].crystals()[0]], None
    CM = clustering_manager(
        Dij=Dij,
        d_c=d_c,
        max_percentile_rho=clustering_params.max_percentile_rho_uc,
        Z_delta=clustering_params.Z_delta,
        strategy='strategy_3')
    n_cluster = 1 + flex.max(CM.cluster_id_final)
    print(len(cells), ' datapoints have been analyzed')
    print('%d CLUSTERS' % n_cluster)
    for i in range(n_cluster):
        item = flex.first_index(CM.cluster_id_maxima, i)
        print('Cluster %d central Unit cell = %d' % (i, item))
        cells[item].crystal_symmetry.show_summary()

    # More plots for debugging
    appcolors = [
        'b', 'r', '#ff7f0e', '#2ca02c', '#9467bd', '#8c564b', '#e377c2',
        '#7f7f7f', '#bcbd22', '#17becf'
    ]
    if show_plot:
        # Decision graph
        import matplotlib.pyplot as plt
        plt.plot(CM.rho, CM.delta, "r.", markersize=3.)
        for x in range(NN):
            if CM.cluster_id_maxima[x] >= 0:
                plt.plot([CM.rho[x]], [CM.delta[x]], "ro")
        plt.show()

    if show_plot:
        import matplotlib.pyplot as plt
        colors = [appcolors[i % 10] for i in CM.cluster_id_full]
        plt.scatter(coord_x,
                    coord_y,
                    marker='o',
                    color=colors,
                    linewidth=0.4,
                    edgecolor='k')
        for i in range(n_cluster):
            item = flex.first_index(CM.cluster_id_maxima, i)
            plt.plot([cells[item].uc[0]], cells[item].uc[1], 'y.')
            plt.axes().set_aspect("equal")
            plt.show()
    if test_nks:
        exit()

    # Now look at each unit cell cluster for orientational clustering
    # idea is to cluster the orientational component in each of the unit cell clusters
    #
    do_orientational_clustering = not return_only_first_indexed_model  # temporary.
    dxtbx_crystal_models = []
    if do_orientational_clustering:
        print('IOTA: Starting orientational clustering')
        Dij_ori = {}  # dictionary to store Dij for each cluster
        uc_experiments_list = {
        }  # dictionary to store experiments_lists for each cluster
        from collections import Counter
        uc_cluster_count = Counter(list(CM.cluster_id_final))
        # instantiate the Dij_ori flat 1-d array
        # Put all experiments list from same uc cluster together
        if True:
            from scitbx.matrix import sqr
            from cctbx_orientation_ext import crystal_orientation
            crystal_orientation_list = []
            all_A = []
            for i in range(len(experiments_list)):
                crystal_orientation_list.append(
                    crystal_orientation(
                        experiments_list[i].crystals()[0].get_A(), True))
                #exit()
                A_direct = sqr(crystal_orientation_list[i].reciprocal_matrix()
                               ).transpose().inverse()
                all_A.append(A_direct[0])
                #print ("Direct A matrix 1st element = %12.6f %12.6f %12.6f"%(A_direct[0], A_direct[1], A_direct[2]))
        #  exit()
        CM_mapping = {}
        for i in range(len(experiments_list)):
            if CM.cluster_id_full[i] not in uc_experiments_list:
                uc_experiments_list[CM.cluster_id_full[i]] = []
                CM_mapping[CM.cluster_id_full[i]] = []
            uc_experiments_list[CM.cluster_id_full[i]].append(
                experiments_list[i])
            # Maintain mapping between original experiments_list and uc_exeriments_list
            # Mapping: key> index_in_experiments_list | value> cluster_id, index_in_uc_cluster
            CM_mapping[CM.cluster_id_full[i]].append(
                (i, len(uc_experiments_list[CM.cluster_id_full[i]]) - 1))
        for cluster in uc_cluster_count:
            # Make sure there are atleast a minimum number of samples in the cluster
            if uc_cluster_count[cluster] < clustering_params.min_datapts:
                continue
            Dij_ori[cluster] = flex.double(
                [[0.0] * uc_cluster_count[cluster]] *
                uc_cluster_count[cluster])
            # Now populate the Dij_ori array
            N_samples_in_cluster = len(uc_experiments_list[cluster])
            for i in range(N_samples_in_cluster - 1):
                for j in range(i + 1, N_samples_in_cluster):
                    dij_ori = get_dij_ori(
                        uc_experiments_list[cluster][i].crystals()[0],
                        uc_experiments_list[cluster][j].crystals()[0])
                    A_direct_i = sqr(
                        uc_experiments_list[cluster][i].crystals()
                        [0].get_A()).transpose().inverse()
                    A_direct_j = sqr(
                        uc_experiments_list[cluster][j].crystals()
                        [0].get_A()).transpose().inverse()
                    #print ("Direct A matrix 1st element = %12.6f %12.6f %12.6f %12.6f %12.6f %12.6f %12.6f"%(dij_ori, A_direct_i[0], A_direct_j[0], A_direct_i[1],A_direct_j[1], A_direct_i[2], A_direct_j[2] ))
                    Dij_ori[cluster][N_samples_in_cluster * i + j] = dij_ori
                    Dij_ori[cluster][N_samples_in_cluster * j + i] = dij_ori

        # Now do the orientational cluster analysis
        d_c_ori = clustering_params.d_c_ori  # 0.13
        from exafel_project.ADSE13_25.clustering.plot_with_dimensional_embedding import plot_with_dimensional_embedding
        #plot_with_dimensional_embedding(1-Dij_ori[1]/flex.max(Dij_ori[1]), show_plot=True)
        A_matrices = []
        for cluster in Dij_ori:
            #if cluster == 2:
            #  CM_ori = clustering_manager(Dij=Dij_ori[cluster], d_c=d_c_ori, max_percentile_rho=0.85, debug=True)
            d_c_ori = estimate_d_c(Dij_ori[cluster])
            #else:
            #d_c_ori=flex.mean_and_variance(Dij_ori[cluster].as_1d()).unweighted_sample_standard_deviation()
            print('d_c_ori=', d_c_ori)
            CM_ori = clustering_manager(
                Dij=Dij_ori[cluster],
                d_c=d_c_ori,
                max_percentile_rho=clustering_params.max_percentile_rho_ori,
                Z_delta=clustering_params.Z_delta,
                strategy='strategy_3')
            n_cluster_ori = 1 + flex.max(CM_ori.cluster_id_final)
            #from IPython import embed; embed(); exit()
            for i in range(n_cluster_ori):
                if len([zz for zz in CM_ori.cluster_id_final if zz == i
                        ]) < clustering_params.min_datapts:
                    continue
                item = flex.first_index(CM_ori.cluster_id_maxima, i)
                dxtbx_crystal_model = uc_experiments_list[cluster][
                    item].crystals()[0]
                dxtbx_crystal_models.append(dxtbx_crystal_model)
                # Map the orientational clusters to the original experiments_list indices
                # This should be the final list of clusters!
                for j, ori_cluster_id in enumerate(CM_ori.cluster_id_final):
                    if ori_cluster_id == i:
                        xx, yy = CM_mapping[cluster][j]
                        clustered_experiments_list[xx] = len(
                            dxtbx_crystal_models) - 1
                from scitbx.matrix import sqr
                from cctbx_orientation_ext import crystal_orientation
                crystal_orientation = crystal_orientation(
                    dxtbx_crystal_model.get_A(), True)
                A_direct = sqr(crystal_orientation.reciprocal_matrix()
                               ).transpose().inverse()
                A_matrices.append(A_direct)
                print(
                    "IOTA: Direct A matrix 1st element of orientational cluster %d  = %12.6f"
                    % (i, A_direct[0]))
                print(A_direct)
            if show_plot:
                # Decision graph
                stretch_plot_factor = 1.05  # (1+fraction of limits by which xlim,ylim should be set)
                import matplotlib.pyplot as plt
                plt.plot(CM_ori.rho, CM_ori.delta, "r.", markersize=3.)
                for x in range(len(list(CM_ori.cluster_id_final))):
                    if CM_ori.cluster_id_maxima[x] >= 0:
                        plt.plot([CM_ori.rho[x]], [CM_ori.delta[x]], "ro")
                #exit()
                plt.xlim([-10, stretch_plot_factor * flex.max(CM_ori.rho)])
                plt.ylim([-10, stretch_plot_factor * flex.max(CM_ori.delta)])
                plt.show()
    # FIXME Still to be worked out what exactly should be returned
    #if return_only_first_indexed_model:
    #  return [experiments_list[0].crystals()[0]], clustered_experiments_list
    # Make sure the crystal models are not too close to each other
    # FIXME should be a PHIL
    #from IPython import embed; embed(); exit()
    min_angle = 5.0  # taken from indexer.py
    close_models_list = []
    # Not used really; other fixes have been made to code to figure out outliers
    # Still keeping this in case it it useful later on.
    if len(dxtbx_crystal_models) > 10000:
        from dials.algorithms.indexing.compare_orientation_matrices import difference_rotation_matrix_axis_angle
        from cctbx_orientation_ext import crystal_orientation
        from dxtbx.model import Crystal
        for i_a in range(0, len(dxtbx_crystal_models) - 1):
            for i_b in range(i_a + 1, len(dxtbx_crystal_models)):
                cryst_a = dxtbx_crystal_models[i_a]
                cryst_b = dxtbx_crystal_models[i_b]
                cryst_a_ori = crystal_orientation(cryst_a.get_A(), True)
                cryst_b_ori = crystal_orientation(cryst_b.get_A(), True)
                try:
                    best_similarity_transform = cryst_b_ori.best_similarity_transformation(
                        other=cryst_a_ori,
                        fractional_length_tolerance=20.00,
                        unimodular_generator_range=1)
                    cryst_b_ori_best = cryst_b_ori.change_basis(
                        best_similarity_transform)
                except Exception as e:
                    cryst_b_ori_best = cryst_b_ori

                # FIXME hardcoded space group for myoglobin LS49
                cryst_b_best = Crystal(cryst_b_ori_best.direct_matrix()[0:3],
                                       cryst_b_ori_best.direct_matrix()[3:6],
                                       cryst_b_ori_best.direct_matrix()[6:9],
                                       'P 1 21 1')
                R_ab, axis, angle, cb_op_ab = difference_rotation_matrix_axis_angle(
                    cryst_a, cryst_b_best)
                # FIXME
                if abs(angle) < min_angle:  # degrees
                    close_models_list.append((i_a, i_b))

    # Now prune the dxtbx_crystal_models list
        unique_experiments_list = flex.int(range(len(dxtbx_crystal_models)))
        for close_models in close_models_list:
            i_a, i_b = close_models
            if dxtbx_crystal_models[i_a] is not None and dxtbx_crystal_models[
                    i_b] is not None:
                dxtbx_crystal_models[i_b] = None
                unique_experiments_list[i_b] = i_a
                clustered_experiments_list.set_selected(
                    clustered_experiments_list == i_b, i_a)

        counter = -1
        for ii, model in enumerate(dxtbx_crystal_models):
            if model is not None:
                counter += 1
                clustered_experiments_list.set_selected(
                    clustered_experiments_list == unique_experiments_list[ii],
                    counter)
        dxtbx_crystal_models = [
            x for x in dxtbx_crystal_models if x is not None
        ]

    #from IPython import embed; embed(); exit()
    if len(dxtbx_crystal_models) > 0:
        return dxtbx_crystal_models, list(clustered_experiments_list)
    else:
        # If nothing works, atleast return the 1st crystal model that was found
        return [experiments_list[0].crystals()[0]], None
예제 #11
0
class pdb_hierarchy_builder(crystal_symmetry_builder):

    # The recommended translation for ATOM records can be found at:
    #   http://mmcif.rcsb.org/dictionaries/pdb-correspondence/pdb2mmcif-2010.html#ATOM

    def __init__(self, cif_block):
        crystal_symmetry_builder.__init__(self, cif_block)

        self.hierarchy = hierarchy.root()

        # These items are mandatory for the _atom_site loop, all others are optional
        type_symbol = cif_block.get("_atom_site.type_symbol")
        atom_labels = cif_block.get("_atom_site.auth_atom_id")
        if atom_labels is None:
            atom_labels = cif_block.get("_atom_site.label_atom_id"
                                        )  # corresponds to chem comp atom name
        alt_id = cif_block.get(
            "_atom_site.label_alt_id")  # alternate conformer id
        label_asym_id = cif_block.get("_atom_site.label_asym_id")  # chain id
        auth_asym_id = cif_block.get("_atom_site.auth_asym_id")
        if label_asym_id is None: label_asym_id = auth_asym_id
        if auth_asym_id is None: auth_asym_id = label_asym_id
        comp_id = cif_block.get("_atom_site.auth_comp_id")
        if comp_id is None:
            comp_id = cif_block.get("_atom_site.label_comp_id")  # residue name
        entity_id = cif_block.get("_atom_site.label_entity_id")
        seq_id = cif_block.get("_atom_site.auth_seq_id")
        if seq_id is None:
            seq_id = cif_block.get("_atom_site.label_seq_id")  # residue number
        assert [atom_labels, alt_id, auth_asym_id, comp_id, entity_id,
                seq_id].count(None) == 0
        assert type_symbol is not None

        atom_site_fp = cif_block.get('_atom_site.phenix_scat_dispersion_real')
        atom_site_fdp = cif_block.get('_atom_site.phenix_scat_dispersion_imag')

        pdb_ins_code = cif_block.get(
            "_atom_site.pdbx_PDB_ins_code")  # insertion code
        model_ids = cif_block.get("_atom_site.pdbx_PDB_model_num")
        atom_site_id = cif_block.get("_atom_site.id")
        # only permitted values are ATOM or HETATM
        group_PDB = cif_block.get("_atom_site.group_PDB")

        # TODO: read esds
        B_iso_or_equiv = flex.double(
            cif_block.get("_atom_site.B_iso_or_equiv"))
        cart_x = flex.double(cif_block.get("_atom_site.Cartn_x"))
        cart_y = flex.double(cif_block.get("_atom_site.Cartn_y"))
        cart_z = flex.double(cif_block.get("_atom_site.Cartn_z"))
        occu = flex.double(cif_block.get("_atom_site.occupancy"))
        formal_charge = cif_block.get("_atom_site.pdbx_formal_charge")

        # anisotropic b-factors
        # TODO: read esds
        anisotrop_id = cif_block.get("_atom_site_anisotrop.id")
        adps = None
        if anisotrop_id is not None:
            u_ij = [
                cif_block.get("_atom_site_anisotrop.U[%s][%s]" %
                              (ij[0], ij[1]))
                for ij in ("11", "22", "33", "12", "13", "23")
            ]
            assert u_ij.count(None) in (0, 6)
            if u_ij.count(None) == 0:
                adps = u_ij
            else:
                assert u_ij.count(None) == 6
                b_ij = [
                    cif_block.get("_atom_site_anisotrop.B[%s][%s]" %
                                  (ij[0], ij[1]))
                    for ij in ("11", "22", "33", "12", "13", "23")
                ]
                assert b_ij.count(None) in (0, 6)
                if b_ij.count(None) == 0:
                    adps = adptbx.b_as_u(b_ij)
                assert not (u_ij.count(None) and b_ij.count(None)
                            )  # illegal for both to be present
            if adps is not None:
                try:
                    adps = [flex.double(adp) for adp in adps]
                except ValueError, e:
                    raise CifBuilderError("Error interpreting ADPs: " + str(e))
                adps = flex.sym_mat3_double(*adps)

        current_model_id = None
        current_label_asym_id = None
        current_auth_asym_id = None
        current_residue_id = None
        current_ins_code = None

        for i_atom in range(atom_labels.size()):
            # model(s)
            last_model_id = current_model_id
            current_model_id = model_ids[i_atom]
            assert current_model_id is not None
            if current_model_id != last_model_id:
                model = hierarchy.model(id=current_model_id)
                self.hierarchy.append_model(model)

            # chain(s)
            last_label_asym_id = current_label_asym_id
            current_label_asym_id = label_asym_id[i_atom]
            assert current_label_asym_id is not None
            last_auth_asym_id = current_auth_asym_id
            current_auth_asym_id = auth_asym_id[i_atom]
            if current_auth_asym_id == ".": current_auth_asym_id = " "
            assert current_label_asym_id is not None
            if current_label_asym_id != last_label_asym_id:
                chain = hierarchy.chain(id=current_auth_asym_id)
                model.append_chain(chain)
            else:
                assert current_auth_asym_id == last_auth_asym_id

            # residue_group(s)
            # defined by residue id and insertion code
            last_residue_id = current_residue_id
            current_residue_id = seq_id[i_atom]
            assert current_residue_id is not None
            last_ins_code = current_ins_code
            if pdb_ins_code is not None:
                current_ins_code = pdb_ins_code[i_atom]
                if current_ins_code in ("?", ".", None): current_ins_code = " "
            if (current_residue_id != last_residue_id
                    or current_ins_code != last_ins_code
                    or current_label_asym_id != last_label_asym_id):
                try:
                    resseq = hy36encode(width=4, value=int(current_residue_id))
                except ValueError, e:
                    resseq = current_residue_id
                    assert len(resseq) == 4
                residue_group = hierarchy.residue_group(resseq=resseq,
                                                        icode=current_ins_code)
                chain.append_residue_group(residue_group)
                atom_groups = OrderedDict()  # reset atom_groups cache

            # atom_group(s)
            # defined by resname and altloc id
            current_altloc = alt_id[i_atom]
            if current_altloc == ".": current_altloc = ""  # Main chain atoms
            current_resname = comp_id[i_atom]
            if (current_altloc, current_resname) not in atom_groups:
                atom_group = hierarchy.atom_group(altloc=current_altloc,
                                                  resname=current_resname)
                atom_groups[(current_altloc, current_resname)] = atom_group
                if current_altloc == "":
                    residue_group.insert_atom_group(0, atom_group)
                else:
                    residue_group.append_atom_group(atom_group)
            else:
                atom_group = atom_groups[(current_altloc, current_resname)]

            # atom(s)
            atom = hierarchy.atom()
            atom_group.append_atom(atom)
            atom.set_element(type_symbol[i_atom])
            atom.set_name(
                format_pdb_atom_name(atom_labels[i_atom], type_symbol[i_atom]))
            atom.set_xyz(new_xyz=(cart_x[i_atom], cart_y[i_atom],
                                  cart_z[i_atom]))
            atom.set_b(B_iso_or_equiv[i_atom])
            atom.set_occ(occu[i_atom])
            # hy36encode should go once the pdb.hierarchy has been
            # modified to no longer store fixed-width strings
            atom.set_serial(
                hy36encode(width=5, value=int(atom_site_id[i_atom])))
            # some code relies on an empty segid being 4 spaces
            atom.set_segid("    ")
            if group_PDB is not None and group_PDB[i_atom] == "HETATM":
                atom.hetero = True
            if formal_charge is not None:
                charge = formal_charge[i_atom]
                if charge not in ("?", "."):
                    if charge.endswith("-") or charge.startswith("-"):
                        sign = "-"
                    else:
                        sign = "+"
                    charge = charge.strip(" -+")
                    charge = int(charge)
                    if charge == 0: sign = ""
                    atom.set_charge("%i%s" % (charge, sign))
            if atom_site_fp is not None:
                fp = atom_site_fp[i_atom]
                if fp not in ("?", "."):
                    atom.set_fp(new_fp=float(fp))
            if atom_site_fdp is not None:
                fdp = atom_site_fdp[i_atom]
                if fdp not in ("?", "."):
                    atom.set_fdp(new_fdp=float(fdp))
            if anisotrop_id is not None and adps is not None:
                u_ij_index = flex.first_index(anisotrop_id,
                                              atom.serial.strip())
                if u_ij_index is not None:
                    u_ij = adps[u_ij_index]
                    atom.set_uij(u_ij)
                else:
                    pass
    def __init__(self, **kwargs):
        group_args.__init__(self, **kwargs)
        print('finished Dij, now calculating rho_i and density')
        from xfel.clustering import Rodriguez_Laio_clustering_2014 as RL
        R = RL(distance_matrix=self.Dij, d_c=self.d_c)
        #from IPython import embed; embed(); exit()
        #from clustering.plot_with_dimensional_embedding import plot_with_dimensional_embedding
        #plot_with_dimensional_embedding(1-self.Dij/flex.max(self.Dij), show_plot=True)
        self.rho = rho = R.get_rho()
        ave_rho = flex.mean(rho.as_double())
        NN = self.Dij.focus()[0]
        i_max = flex.max_index(rho)
        delta_i_max = flex.max(
            flex.double([self.Dij[i_max, j] for j in range(NN)]))
        rho_order = flex.sort_permutation(rho, reverse=True)
        rho_order_list = list(rho_order)
        self.delta = delta = R.get_delta(rho_order=rho_order,
                                         delta_i_max=delta_i_max)
        cluster_id = flex.int(NN, -1)  # -1 means no cluster
        delta_order = flex.sort_permutation(delta, reverse=True)
        MAX_PERCENTILE_RHO = self.max_percentile_rho  # cluster centers have to be in the top percentile
        n_cluster = 0
        #
        pick_top_solution = False
        rho_stdev = flex.mean_and_variance(
            rho.as_double()).unweighted_sample_standard_deviation()
        delta_stdev = flex.mean_and_variance(
            delta).unweighted_sample_standard_deviation()
        if rho_stdev != 0.0 and delta_stdev != 0:
            rho_z = (rho.as_double() -
                     flex.mean(rho.as_double())) / (rho_stdev)
            delta_z = (delta - flex.mean(delta)) / (delta_stdev)
        else:
            pick_top_solution = True
            if rho_stdev == 0.0:
                centroids = [flex.first_index(delta, flex.max(delta))]
            elif delta_stdev == 0.0:
                centroids = [flex.first_index(rho, flex.max(rho))]

        significant_delta = []
        significant_rho = []
        debug_fix_clustering = True
        if debug_fix_clustering:
            if not pick_top_solution:
                delta_z_cutoff = min(1.0, max(delta_z))
                rho_z_cutoff = min(1.0, max(rho_z))
                for ic in range(NN):
                    # test the density & rho
                    if delta_z[ic] >= delta_z_cutoff:
                        significant_delta.append(ic)
                    if rho_z[ic] >= rho_z_cutoff:
                        significant_rho.append(ic)
                centroid_candidates = list(
                    set(significant_delta).intersection(set(significant_rho)))
                # Now compare the relative orders of the max delta_z and max rho_z to make sure they are within 1 stdev
                centroids = []
                max_delta_z_candidates = -999.9
                max_rho_z_candidates = -999.9
                for ic in centroid_candidates:
                    if delta_z[ic] > max_delta_z_candidates:
                        max_delta_z_candidates = delta_z[ic]
                    if rho_z[ic] > max_rho_z_candidates:
                        max_rho_z_candidates = rho_z[ic]
                for ic in centroid_candidates:
                    if max_delta_z_candidates - delta_z[
                            ic] < 1.0 and max_rho_z_candidates - rho_z[
                                ic] < 1.0:
                        centroids.append(ic)

            item_idxs = [
                delta_order[ic] for ic, centroid in enumerate(centroids)
            ]
            for item_idx in item_idxs:
                cluster_id[item_idx] = n_cluster
                print('CLUSTERING_STATS', item_idx, cluster_id[item_idx])
                n_cluster += 1
                ####
        else:
            for ic in range(NN):
                item_idx = delta_order[ic]
                if ic != 0:
                    if delta[item_idx] <= 0.25 * delta[
                            delta_order[0]]:  # too low to be a medoid
                        continue
                item_rho_order = rho_order_list.index(item_idx)
                if (item_rho_order) / NN < MAX_PERCENTILE_RHO:
                    cluster_id[item_idx] = n_cluster
                    print('CLUSTERING_STATS', ic, item_idx, item_rho_order,
                          cluster_id[item_idx])
                    n_cluster += 1
        ###


#
#
        print('Found %d clusters' % n_cluster)
        for x in range(NN):
            if cluster_id[x] >= 0:
                print("XC", x, cluster_id[x], rho[x], delta[x])
        self.cluster_id_maxima = cluster_id.deep_copy()
        R.cluster_assignment(rho_order, cluster_id)
        self.cluster_id_full = cluster_id.deep_copy()

        #halo = flex.bool(NN,False)
        #border = R.get_border( cluster_id = cluster_id )

        #for ic in range(n_cluster): #loop thru all border regions; find highest density
        #  this_border = (cluster_id == ic) & (border==True)
        #  if this_border.count(True)>0:
        #    highest_density = flex.max(rho.select(this_border))
        #    halo_selection = (rho < highest_density) & (this_border==True)
        #    if halo_selection.count(True)>0:
        #      cluster_id.set_selected(halo_selection,-1)
        #    core_selection = (cluster_id == ic) & ~halo_selection
        #    highest_density = flex.max(rho.select(core_selection))
        #    too_sparse = core_selection & (rho.as_double() < highest_density/10.) # another heuristic
        #    if too_sparse.count(True)>0:
        #      cluster_id.set_selected(too_sparse,-1)
        self.cluster_id_final = cluster_id.deep_copy()
예제 #13
0
class crystal_structure_builder(crystal_symmetry_builder):
    def __init__(self, cif_block):
        # XXX To do: interpret _atom_site_refinement_flags
        crystal_symmetry_builder.__init__(self, cif_block, strict=True)
        atom_sites_frac = [
            as_double_or_none_if_all_question_marks(
                _, column_name='_atom_site_fract_%s' % axis) for _ in [
                    cif_block.get('_atom_site_fract_%s' % axis)
                    for axis in ('x', 'y', 'z')
                ]
        ]
        if atom_sites_frac.count(None) == 3:
            atom_sites_cart = [
                as_double_or_none_if_all_question_marks(
                    _, column_name='_atom_site_Cartn_%s' % axis) for _ in [
                        cif_block.get('_atom_site_Cartn_%s' % axis)
                        for axis in ('x', 'y', 'z')
                    ]
            ]
            if atom_sites_cart.count(None) != 0:
                raise CifBuilderError("No atomic coordinates could be found")
            atom_sites_cart = flex.vec3_double(*atom_sites_cart)
            # XXX do we need to take account of _atom_sites_Cartn_tran_matrix_ ?
            atom_sites_frac = self.crystal_symmetry.unit_cell().fractionalize(
                atom_sites_cart)
        else:
            if atom_sites_frac.count(None) != 0:
                raise CifBuilderError("No atomic coordinates could be found")
            atom_sites_frac = flex.vec3_double(*atom_sites_frac)
        labels = cif_block.get('_atom_site_label')
        type_symbol = cif_block.get('_atom_site_type_symbol')
        U_iso_or_equiv = flex_double_else_none(
            cif_block.get('_atom_site_U_iso_or_equiv',
                          cif_block.get('_atom_site_U_equiv_geom_mean')))
        if U_iso_or_equiv is None:
            B_iso_or_equiv = flex_double_else_none(
                cif_block.get('_atom_site_B_iso_or_equiv',
                              cif_block.get('_atom_site_B_equiv_geom_mean')))
        adp_type = cif_block.get('_atom_site_adp_type')
        occupancy = flex_double_else_none(
            cif_block.get('_atom_site_occupancy'))
        scatterers = flex.xray_scatterer()
        atom_site_aniso_label = flex_std_string_else_none(
            cif_block.get('_atom_site_aniso_label'))
        if atom_site_aniso_label is not None:
            atom_site_aniso_label = atom_site_aniso_label
            adps = [
                cif_block.get('_atom_site_aniso_U_%i' % i)
                for i in (11, 22, 33, 12, 13, 23)
            ]
            have_Bs = False
            if adps.count(None) > 0:
                adps = [
                    cif_block.get('_atom_site_aniso_B_%i' % i)
                    for i in (11, 22, 33, 12, 13, 23)
                ]
                have_Bs = True
            if adps.count(None) == 6:
                adps = None
            elif adps.count(None) > 0:
                CifBuilderError("Some ADP items are missing")
            else:
                sel = None
                for adp in adps:
                    f = (adp == "?")
                    if (sel is None): sel = f
                    else: sel &= f
                sel = ~sel
                atom_site_aniso_label = atom_site_aniso_label.select(sel)
                try:
                    adps = [flex.double(adp.select(sel)) for adp in adps]
                except ValueError, e:
                    raise CifBuilderError("Error interpreting ADPs: " + str(e))
                adps = flex.sym_mat3_double(*adps)
        for i in range(len(atom_sites_frac)):
            kwds = {}
            if labels is not None:
                kwds.setdefault('label', str(labels[i]))
            if type_symbol is not None:
                kwds.setdefault('scattering_type', str(type_symbol[i]))
            if (atom_site_aniso_label is not None and adps is not None
                    and labels is not None
                    and labels[i] in atom_site_aniso_label):
                adp = adps[flex.first_index(atom_site_aniso_label, labels[i])]
                if have_Bs: adp = adptbx.b_as_u(adp)
                kwds.setdefault(
                    'u',
                    adptbx.u_cif_as_u_star(self.crystal_symmetry.unit_cell(),
                                           adp))
            elif U_iso_or_equiv is not None:
                kwds.setdefault('u', float_from_string(U_iso_or_equiv[i]))
            elif B_iso_or_equiv is not None:
                kwds.setdefault('b', float_from_string(B_iso_or_equiv[i]))
            if occupancy is not None:
                kwds.setdefault('occupancy', float_from_string(occupancy[i]))
            scatterers.append(xray.scatterer(**kwds))
        scatterers.set_sites(atom_sites_frac)

        special_position_settings = crystal.special_position_settings(
            crystal_symmetry=self.crystal_symmetry,
            min_distance_sym_equiv=0.0001)

        self.structure = xray.structure(
            special_position_settings=special_position_settings,
            scatterers=scatterers)
예제 #14
0
def run(args):
  if len(args) == 0:
    args = ["1hbb"]

  for arg in args:
    import iotbx.pdb.fetch

    if os.path.isfile(arg):
      mmcif_file = arg
      pdb_id = os.path.splitext(os.path.basename(mmcif_file))[0]
      iotbx.pdb.fetch.validate_pdb_id(pdb_id)
    else:
      # download pdbx/mmcif file from the PDB
      pdb_id = arg
      mirror = "pdbe"
      mmcif_file = iotbx.pdb.fetch.get_pdb(
        pdb_id, data_type="pdb", mirror=mirror, log=sys.stdout, format="cif")

    # read the cif file and get an iotbx.cif object
    import iotbx.cif
    cif_reader = iotbx.cif.reader(file_path=mmcif_file)
    cif_object = cif_reader.model()
    cif_block = cif_object[pdb_id]
    # get single items from cif_block
    print "PDB id:", cif_block["_entry.id"]
    # get a looped item from cif_block
    print "Authors:"
    for author in cif_block.get_looped_item("_citation_author.name"):
      print author
    print
    print "Molecular Entities:"
    for pdbx_entity in cif_block.get_looped_item("_entity.pdbx_description"):
      print pdbx_entity
    print

    # extract crystal symmetry information
    import iotbx.cif.builders
    builder = iotbx.cif.builders.crystal_symmetry_builder(cif_block)
    builder.crystal_symmetry.show_summary()

    # 1) this works also for .pdb files, but re-reads the file
    import iotbx.pdb
    pdb_input = iotbx.pdb.input(file_name=mmcif_file)
    hierarchy = pdb_input.construct_hierarchy()

    # 2) This only works for mmcif files, but re-uses the cif_object from above:
    import iotbx.pdb.mmcif
    pdb_input = iotbx.pdb.mmcif.cif_input(cif_object=cif_object)
    hierarchy = pdb_input.construct_hierarchy()

    # some convenience methods of pdb_input object
    print "Software:", pdb_input.get_program_name()
    print "Experiment type:", pdb_input.get_experiment_type()
    print "Solvent content:", pdb_input.get_solvent_content()
    print "Deposition date:", pdb_input.deposition_date()
    r_rfree_sigma = pdb_input.get_r_rfree_sigma(mmcif_file)
    print "R-work/R-free: %s/%s" %(r_rfree_sigma.r_work, r_rfree_sigma.r_free)
    # can also get crystal_symmetry from pdb_input object
    crystal_symmetry = pdb_input.crystal_symmetry()

    print
    hierarchy.overall_counts().show()
    # level_id can be "model", "chain", "residue_group", "atom_group" or "atom"
    hierarchy.show(level_id="chain")
    # for a more detailed example of interacting with a pdb.hierarchy object,
    # see iotbx/examples/pdb_hierarchy.py

    # extract atom sites
    atoms = hierarchy.atoms()
    sites_cart = atoms.extract_xyz()
    print
    for i in range(10):
      print atoms[i].id_str(), atoms[i].xyz
    print

    # read some sequence information
    entity_poly_entity_id = cif_block.get_looped_item("_entity_poly.entity_id")
    entity_id = cif_block.get_looped_item("_entity.id")
    entity_pdbx_description = cif_block.get_looped_item("_entity.pdbx_description")
    entity_poly_one_letter_code = cif_block.get_looped_item(
      "_entity_poly.pdbx_seq_one_letter_code")

    from cctbx.array_family import flex
    for i in range(len(entity_poly_one_letter_code)):
      idx = flex.first_index(entity_id, entity_poly_entity_id[i])
      print entity_id[idx], entity_pdbx_description[i],
      print "".join(entity_poly_one_letter_code[i].split())
예제 #15
0
def run_detail(show_plot, save_plot):
    P = Profiler("0. Read data")
    import sys
    file_name = sys.argv[1]
    from xfel.clustering.singleframe import CellOnlyFrame
    cells = []
    for line in open(file_name, "r").xreadlines():
        tokens = line.strip().split()
        cells.append(CellOnlyFrame(args=tokens, path=None))
    MM = [c.mm for c in cells]  # get all metrical matrices
    MM_double = flex.double()
    for i in xrange(len(MM)):
        Tup = MM[i]
        for j in xrange(6):
            MM_double.append(Tup[j])

    print("There are %d cells X" % (len(MM)))
    CX = 0
    CY = 3
    coord_x = flex.double([c.uc[CX] for c in cells])
    coord_y = flex.double([c.uc[CY] for c in cells])
    if show_plot or save_plot:
        import matplotlib
        if not show_plot:
            # http://matplotlib.org/faq/howto_faq.html#generate-images-without-having-a-window-appear
            matplotlib.use('Agg')  # use a non-interactive backend
        from matplotlib import pyplot as plt
        plt.plot(coord_x, coord_y, "k.", markersize=3.)
        #plt.axes().set_aspect("equal")
        if save_plot:
            plt.savefig(plot_name,
                        size_inches=(10, 10),
                        dpi=300,
                        bbox_inches='tight')
        if show_plot:
            plt.show()

    print "Now constructing a Dij matrix."
    P = Profiler("1. compute Dij matrix")
    NN = len(MM)

    from cctbx.uctbx.determine_unit_cell import NCDist_matrix, NCDist_flatten
    #Dij = NCDist_matrix(MM_double)
    Dij = NCDist_flatten(MM_double)

    #from cctbx.uctbx.determine_unit_cell import NCDist # can this be refactored with MPI?
    #Dij = flex.double(flex.grid(NN,NN))
    #for i in xrange(NN):
    #  for j in xrange(i+1,NN):
    #    Dij[i,j] = NCDist(MM[i], MM[j])
    del P

    d_c = 10000  # the distance cutoff, such that average item neighbors 1-2% of all items
    CM = clustering_manager(Dij=Dij, d_c=d_c)

    # Summarize the results here
    n_cluster = 1 + flex.max(CM.cluster_id_final)
    print len(cells), "have been analyzed"
    print("# ------------   %d CLUSTERS  ----------------" % (n_cluster))
    for i in xrange(n_cluster):
        item = flex.first_index(CM.cluster_id_maxima, i)
        print "Cluster %d.  Central unit cell: item %d" % (i, item)
        cells[item].crystal_symmetry.show_summary()
        print "Cluster has %d items, or %d after trimming borders" % (
            (CM.cluster_id_full == i).count(True),
            (CM.cluster_id_final == i).count(True))
        print

    appcolors = [
        'b', 'r', '#ff7f0e', '#2ca02c', '#9467bd', '#8c564b', '#e377c2',
        '#7f7f7f', '#bcbd22', '#17becf'
    ]
    if show_plot:
        #Decision graph
        from matplotlib import pyplot as plt

        plt.plot(CM.rho, CM.delta, "r.", markersize=3.)
        for x in xrange(NN):
            if CM.cluster_id_maxima[x] >= 0:
                plt.plot([CM.rho[x]], [CM.delta[x]], "ro")
        plt.show()

        #No-halo plot
        from matplotlib import pyplot as plt
        colors = [appcolors[i % 10] for i in CM.cluster_id_full]

        plt.scatter(coord_x,
                    coord_y,
                    marker='o',
                    color=colors,
                    linewidths=0.4,
                    edgecolor='k')
        for i in xrange(n_cluster):
            item = flex.first_index(CM.cluster_id_maxima, i)
            plt.plot([cells[item].uc[CX]], [cells[item].uc[CY]], 'y.')
        #plt.axes().set_aspect("equal")
        plt.show()

        #Final plot
        halo = (CM.cluster_id_final == -1)
        core = ~halo
        plt.plot(coord_x.select(halo), coord_y.select(halo), "k.")
        colors = [appcolors[i % 10] for i in CM.cluster_id_final.select(core)]
        plt.scatter(coord_x.select(core),
                    coord_y.select(core),
                    marker="o",
                    color=colors,
                    linewidths=0.4,
                    edgecolor='k')
        for i in xrange(n_cluster):
            item = flex.first_index(CM.cluster_id_maxima, i)
            plt.plot([cells[item].uc[CX]], [cells[item].uc[CY]], 'y.')
        #plt.axes().set_aspect("equal")
        plt.show()
예제 #16
0
 def validate_loop(self, loop, block):
   list_category = None
   for key, value in loop.iteritems():
     try:
       definition = self.get_definition(key)
     except KeyError: continue
     self.validate_enumeration(key, value, definition)
     self.validate_dependent(key, block, definition)
     self.validate_related(key, block, definition)
     _list = definition.get("_list")
     if self.DDL_version == 1 and _list in ('no', None):
       self.report_error(2501, key=key) # not allowed in list
     definition_category = definition.category
     if (definition_category is not None and
         not isinstance(definition_category, basestring)):
       definition_name = definition.name
       i = flex.first_index(definition_name, key)
       definition_category = definition_category[i]
     if list_category is None:
       list_category = definition_category
     elif (isinstance(list_category, basestring)
           and definition_category is not None
           and list_category != definition_category):
       print list_category, list(definition_category)
       self.report_error(2502, key=key) # multiple categories in loop
     mandatory = definition.mandatory == 'yes'
     references = definition.get('_list_reference')
     if references is not None:
       if isinstance(references, basestring):
         references = [references]
       for reference in references:
         ref_data = self.get_definition(reference)
         ref_names = ref_data['_name']
         if isinstance(ref_names, basestring):
           ref_names = [ref_names]
         for name in ref_names:
           if name not in loop:
             self.report_error(2505, key=key, reference=name) # missing _list_reference
     elif (self.DDL_version == 2
           and isinstance(definition.category, basestring)):
       category_def = self.get_definition(definition.category)
       if category_def.category_key is not None:
         category_keys = category_def.category_key
         if isinstance(category_keys, basestring):
           category_keys = [category_keys]
         for cat_key in category_keys:
           cat_key_def = self.get_definition(cat_key)
         if (cat_key_def.mandatory == 'yes'
             and isinstance(cat_key_def.mandatory, basestring)
             and cat_key_def.name not in block):
           self.report_error(
             2203, key=cat_key_def.name, category=definition.category)
     #
     link_parent = definition.get(
       '_list_link_parent', self.child_parent_relations.get(key))
     if link_parent is not None:
       parent_values = loop.get(link_parent, block.get(link_parent))
       if parent_values is not None:
         for v in loop[key]:
           if v != '.' and v not in parent_values:
             # missing parent value
             self.report_error(2503, value=v, child=key, parent=link_parent)
       else:
         self.report_error(2504, child=key, parent=link_parent) # missing parent
def get_uc_consensus(experiments_list,
                     show_plot=False,
                     return_only_first_indexed_model=False,
                     finalize_method=None,
                     clustering_params=None):
    '''
  Uses the Rodriguez Laio 2014 method to do a clustering of the unit cells and then vote for the highest
  consensus unit cell. Input needs to be a list of experiments object.
  Clustering code taken from github.com/cctbx-xfel/cluster_regression
  Returns an experiment object with crystal unit cell from the cluster with the most points
  '''
    if return_only_first_indexed_model:
        return [experiments_list[0].crystals()[0]], None
    cells = []
    from xfel.clustering.singleframe import CellOnlyFrame
    save_plot = False
    # Flag for testing Lysozyme data from NKS.Make sure cluster_regression repository is present and configured
    # Program will exit after plots are displayed if this flag is true
    test_nks = False
    if test_nks:
        from cctbx import crystal
        import libtbx.load_env
        cluster_regression = libtbx.env.find_in_repositories(
            relative_path="cluster_regression", test=os.path.isdir)
        file_name = os.path.join(cluster_regression, 'examples',
                                 'lysozyme1341.txt')
        for line in open(file_name, "r").xreadlines():
            tokens = line.strip().split()
            unit_cell = tuple(float(x) for x in tokens[0:6])
            space_group_symbol = tokens[6]
            crystal_symmetry = crystal.symmetry(
                unit_cell=unit_cell, space_group_symbol=space_group_symbol)
            cells.append(CellOnlyFrame(crystal_symmetry))
    else:
        for experiment in experiments_list:
            if len(experiment.crystals()) > 1:
                print('IOTA:Should have only one crystal model')
            crystal_symmetry = experiment.crystals()[0].get_crystal_symmetry()
            cells.append(CellOnlyFrame(crystal_symmetry))
    MM = [c.mm for c in cells]  # metrical matrices
    MM_double = flex.double()
    for i in range(len(MM)):
        Tup = MM[i]
        for j in range(6):
            MM_double.append(Tup[j])
    print('There are %d cells' % len(MM))
    coord_x = flex.double([c.uc[0] for c in cells])
    coord_y = flex.double([c.uc[1] for c in cells])
    if show_plot or save_plot:
        import matplotlib
        if not show_plot:
            matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        #from IPython import embed; embed(); exit()
        plt.plot([c.uc[0] for c in cells], [c.uc[1] for c in cells],
                 "k.",
                 markersize=3.)
        plt.axes().set_aspect("equal")
    if save_plot:
        plot_name = 'uc_cluster.png'
        plt.savefig(plot_name,
                    size_inches=(10, 10),
                    dpi=300,
                    bbox_inches='tight')
    if show_plot:
        plt.show()
    print('Now constructing a Dij matrix: Starting Unit Cell clustering')
    NN = len(MM)
    from cctbx.uctbx.determine_unit_cell import NCDist_flatten
    Dij = NCDist_flatten(MM_double)
    d_c = flex.mean_and_variance(
        Dij.as_1d()).unweighted_sample_standard_deviation()  #6.13
    #FIXME should be a PHIL param
    if len(cells) < 5:
        return [experiments_list[0].crystals()[0]], None
    CM = clustering_manager(Dij=Dij, d_c=d_c, max_percentile_rho=0.95)
    n_cluster = 1 + flex.max(CM.cluster_id_final)
    print(len(cells), ' datapoints have been analyzed')
    print('%d CLUSTERS' % n_cluster)
    for i in range(n_cluster):
        item = flex.first_index(CM.cluster_id_maxima, i)
        print('Cluster %d central Unit cell = %d' % (i, item))
        cells[item].crystal_symmetry.show_summary()

    # More plots for debugging
    appcolors = [
        'b', 'r', '#ff7f0e', '#2ca02c', '#9467bd', '#8c564b', '#e377c2',
        '#7f7f7f', '#bcbd22', '#17becf'
    ]
    if show_plot:
        # Decision graph
        import matplotlib.pyplot as plt
        plt.plot(CM.rho, CM.delta, "r.", markersize=3.)
        for x in range(NN):
            if CM.cluster_id_maxima[x] >= 0:
                plt.plot([CM.rho[x]], [CM.delta[x]], "ro")
        plt.show()

    if show_plot:
        import matplotlib.pyplot as plt
        colors = [appcolors[i % 10] for i in CM.cluster_id_full]
        plt.scatter(coord_x,
                    coord_y,
                    marker='o',
                    color=colors,
                    linewidth=0.4,
                    edgecolor='k')
        for i in range(n_cluster):
            item = flex.first_index(CM.cluster_id_maxima, i)
            plt.plot([cells[item].uc[0]], cells[item].uc[1], 'y.')
            plt.axes().set_aspect("equal")
            plt.show()
    if test_nks:
        exit()

    # Now look at each unit cell cluster for orientational clustering
    # idea is to cluster the orientational component in each of the unit cell clusters
    #
    do_orientational_clustering = not return_only_first_indexed_model  # temporary.
    dxtbx_crystal_models = []
    if do_orientational_clustering:
        print('IOTA: Starting orientational clustering')
        Dij_ori = {}  # dictionary to store Dij for each cluster
        uc_experiments_list = {
        }  # dictionary to store experiments_lists for each cluster
        from collections import Counter
        uc_cluster_count = Counter(list(CM.cluster_id_final))
        # instantiate the Dij_ori flat 1-d array
        # Put all experiments list from same uc cluster together
        if True:
            from scitbx.matrix import sqr
            from cctbx_orientation_ext import crystal_orientation
            #crystal_orientation_list = []
            #for i in range(len(experiments_list)):
            #  crystal_orientation_list.append(crystal_orientation(experiments_list[i].crystals()[0].get_A(), True))
            #from IPython import embed; embed(); exit()
            #A_direct = sqr(crystal_orientation_list[i].reciprocal_matrix()).transpose().inverse()
            #print ("Direct A matrix 1st element = %12.6f"%A_direct[0])
        for i in range(len(experiments_list)):
            if CM.cluster_id_full[i] not in uc_experiments_list:
                uc_experiments_list[CM.cluster_id_full[i]] = []
            uc_experiments_list[CM.cluster_id_full[i]].append(
                experiments_list[i])
        for cluster in uc_cluster_count:
            # Make sure there are atleast a minimum number of samples in the cluster
            if uc_cluster_count[cluster] < 5:
                continue
            Dij_ori[cluster] = flex.double(
                [[0.0] * uc_cluster_count[cluster]] *
                uc_cluster_count[cluster])
            # Now populate the Dij_ori array
            N_samples_in_cluster = len(uc_experiments_list[cluster])
            for i in range(N_samples_in_cluster - 1):
                for j in range(i + 1, N_samples_in_cluster):
                    dij_ori = get_dij_ori(
                        uc_experiments_list[cluster][i].crystals()[0],
                        uc_experiments_list[cluster][j].crystals()[0])
                    Dij_ori[cluster][N_samples_in_cluster * i + j] = dij_ori
                    Dij_ori[cluster][N_samples_in_cluster * j + i] = dij_ori

        # Now do the orientational cluster analysis
        #from IPython import embed; embed(); exit()
        d_c_ori = 0.13
        from exafel_project.ADSE13_25.clustering.plot_with_dimensional_embedding import plot_with_dimensional_embedding
        #plot_with_dimensional_embedding(1-Dij_ori[1]/flex.max(Dij_ori[1]), show_plot=True)
        for cluster in Dij_ori:
            d_c_ori = flex.mean_and_variance(Dij_ori[cluster].as_1d(
            )).unweighted_sample_standard_deviation()
            CM_ori = clustering_manager(Dij=Dij_ori[cluster],
                                        d_c=d_c_ori,
                                        max_percentile_rho=0.85)
            n_cluster_ori = 1 + flex.max(CM_ori.cluster_id_final)
            #from IPython import embed; embed()
            #FIXME should be a PHIL param
            for i in range(n_cluster_ori):
                if len([zz for zz in CM_ori.cluster_id_final if zz == i]) < 5:
                    continue
                item = flex.first_index(CM_ori.cluster_id_maxima, i)
                dxtbx_crystal_model = uc_experiments_list[cluster][
                    item].crystals()[0]
                dxtbx_crystal_models.append(dxtbx_crystal_model)
                from scitbx.matrix import sqr
                from cctbx_orientation_ext import crystal_orientation
                crystal_orientation = crystal_orientation(
                    dxtbx_crystal_model.get_A(), True)
                A_direct = sqr(crystal_orientation.reciprocal_matrix()
                               ).transpose().inverse()
                print(
                    "IOTA: Direct A matrix 1st element of orientational cluster %d  = %12.6f"
                    % (i, A_direct[0]))
            if show_plot:
                # Decision graph
                stretch_plot_factor = 1.05  # (1+fraction of limits by which xlim,ylim should be set)
                import matplotlib.pyplot as plt
                plt.plot(CM_ori.rho, CM_ori.delta, "r.", markersize=3.)
                for x in range(len(list(CM_ori.cluster_id_final))):
                    if CM_ori.cluster_id_maxima[x] >= 0:
                        plt.plot([CM_ori.rho[x]], [CM_ori.delta[x]], "ro")
                #from IPython import embed; embed(); exit()
                plt.xlim([-10, stretch_plot_factor * flex.max(CM_ori.rho)])
                plt.ylim([-10, stretch_plot_factor * flex.max(CM_ori.delta)])
                plt.show()
    # Make sure the crystal models are not too close to each other
    # FIXME should be a PHIL
    min_angle = 5.0  # taken from indexer.py
    close_models_list = []
    if len(dxtbx_crystal_models) > 1:
        from dials.algorithms.indexing.compare_orientation_matrices import difference_rotation_matrix_axis_angle
        for i_a in range(0, len(dxtbx_crystal_models) - 1):
            for i_b in range(i_a, len(dxtbx_crystal_models)):
                cryst_a = dxtbx_crystal_models[i_a]
                cryst_b = dxtbx_crystal_models[i_b]
                R_ab, axis, angle, cb_op_ab = difference_rotation_matrix_axis_angle(
                    cryst_a, cryst_b)
                # FIXME
                if abs(angle) < min_angle:  # degrees
                    close_models_list.append((i_a, i_b))

    # Now prune the dxtbx_crystal_models list
    for close_models in close_models_list:
        i_a, i_b = close_models
        if dxtbx_crystal_models[i_a] is not None and dxtbx_crystal_models[
                i_b] is not None:
            dxtbx_crystal_models[i_a] = None

    dxtbx_crystal_models = [x for x in dxtbx_crystal_models if x is not None]
    if len(dxtbx_crystal_models) > 0:
        return dxtbx_crystal_models, None
    else:
        # If nothing works, atleast return the 1st crystal model that was found
        return [experiments_list[0].crystals()[0]], None