Exemple #1
0
 def __init__(self,
              pair_asu_table,
              site_labels,
              sites_frac=None,
              sites_cart=None,
              covariance_matrix=None,
              cell_covariance_matrix=None,
              parameter_map=None,
              include_bonds_to_hydrogen=False,
              fixed_distances=None,
              eps=2e-16):
   assert [sites_frac, sites_cart].count(None) == 1
   fmt = "%.4f"
   asu_mappings = pair_asu_table.asu_mappings()
   space_group_info = sgtbx.space_group_info(group=asu_mappings.space_group())
   unit_cell = asu_mappings.unit_cell()
   if sites_cart is not None:
     sites_frac = unit_cell.fractionalize(sites_cart)
   self.loop = model.loop(header=(
     "_geom_bond_atom_site_label_1",
     "_geom_bond_atom_site_label_2",
     "_geom_bond_distance",
     "_geom_bond_site_symmetry_2"
   ))
   distances = crystal.calculate_distances(
     pair_asu_table, sites_frac,
     covariance_matrix=covariance_matrix,
     cell_covariance_matrix=cell_covariance_matrix,
     parameter_map=parameter_map)
   for d in distances:
     if (not include_bonds_to_hydrogen
         and (site_labels[d.i_seq].startswith('H') or
              site_labels[d.j_seq].startswith('H'))):
       continue
     if (d.variance is not None and d.variance > eps
         and not(fixed_distances is not None and
                 ((d.i_seq, d.j_seq) in fixed_distances or
                  (d.j_seq, d.i_seq) in fixed_distances))):
       distance = format_float_with_su(d.distance, math.sqrt(d.variance))
     else:
       distance = fmt % d.distance
     sym_code = space_group_info.cif_symmetry_code(d.rt_mx_ji)
     if sym_code == "1": sym_code = "."
     self.loop.add_row((site_labels[d.i_seq],
                        site_labels[d.j_seq],
                        distance,
                        sym_code))
   self.distances = distances.distances
   self.variances = distances.variances
   self.pair_counts = distances.pair_counts
Exemple #2
0
  def find_basis_vector_combinations_cluster_analysis(self):
    # hijack the xray.structure class to facilitate calculation of distances
    xs = xray.structure(crystal_symmetry=self.crystal_symmetry)
    for i, site in enumerate(self.sites):
      xs.add_scatterer(xray.scatterer("C%i" %i, site=site))

    xs = xs.sites_mod_short()
    xs = xs.select(xs.sites_frac().norms() < 0.45)
    cell_multiplier = 10
    xs1 = xs.customized_copy(
      unit_cell=uctbx.unit_cell([xs.unit_cell().parameters()[0]*cell_multiplier]*3))
    xs1.set_sites_cart(xs.sites_cart())
    xs = xs1
    sites_cart = xs.sites_cart()
    lengths = flex.double([matrix.col(sc).length() for sc in sites_cart])
    xs = xs.select(flex.sort_permutation(lengths))
    if self.params.debug:
      with open('peaks.pdb', 'wb') as f:
        print >> f, xs.as_pdb_file()

    vector_heights = flex.double()

    sites_frac = xs.sites_frac()
    pair_asu_table = xs.pair_asu_table(distance_cutoff=self.params.max_cell)
    asu_mappings = pair_asu_table.asu_mappings()
    distances = crystal.calculate_distances(pair_asu_table, sites_frac)
    vectors = []
    difference_vectors = []
    pairs = []
    for di in distances:
      if di.distance < self.params.min_cell: continue
      i_seq, j_seq = di.i_seq, di.j_seq
      if i_seq > j_seq: continue
      pairs.append((i_seq, j_seq))
      rt_mx_ji = di.rt_mx_ji
      site_frac_ji = rt_mx_ji * sites_frac[j_seq]
      site_cart_ji = xs.unit_cell().orthogonalize(site_frac_ji)
      site_cart_i = xs.unit_cell().orthogonalize(sites_frac[i_seq])
      vectors.append(matrix.col(site_cart_ji))
      diff_vec = matrix.col(site_cart_i) - matrix.col(site_cart_ji)
      if diff_vec[0] < 0:
        # only one hemisphere of difference vector space
        diff_vec = -diff_vec
      difference_vectors.append(diff_vec)

    params = self.params.multiple_lattice_search.cluster_analysis
    if params.method == 'dbscan':
      i_cluster = self.cluster_analysis_dbscan(difference_vectors)
      min_cluster_size = 1
    elif params.method == 'hcluster':
      i_cluster = self.cluster_analysis_hcluster(difference_vectors)
      i_cluster -= 1 # hcluster starts counting at 1
      min_cluster_size = params.min_cluster_size

    if self.params.debug_plots:
      self.debug_plot_clusters(
        difference_vectors, i_cluster, min_cluster_size=min_cluster_size)


    clusters = []
    min_cluster_size = params.min_cluster_size
    for i in range(max(i_cluster)+1):
      isel = (i_cluster == i).iselection()
      if len(isel) < min_cluster_size:
        continue
      clusters.append(isel)

    cluster_point_sets = []
    centroids = []
    cluster_sizes = flex.int()

    difference_vectors = flex.vec3_double(difference_vectors)

    from libtbx.utils import flat_list
    for cluster in clusters:
      points = flat_list([pairs[i] for i in cluster])
      cluster_point_sets.append(set(points))
      d_vectors = difference_vectors.select(cluster)
      cluster_sizes.append(len(d_vectors))
      centroids.append(d_vectors.mean())

    # build a graph where each node is a centroid from the difference vector
    # cluster analysis above, and an edge is defined when there is a
    # significant overlap between the sets of peaks in the FFT map that
    # contributed to the difference vectors in two clusters
    import networkx as nx
    G = nx.Graph()
    G.add_nodes_from(range(len(cluster_point_sets)))

    cutoff_frac = 0.25
    for i in range(len(cluster_point_sets)):
      for j in range(i+1, len(cluster_point_sets)):
        intersection_ij = cluster_point_sets[i].intersection(
            cluster_point_sets[j])
        union_ij = cluster_point_sets[i].union(cluster_point_sets[j])
        frac_connected = len(intersection_ij)/len(union_ij)
        if frac_connected > cutoff_frac:
          G.add_edge(i, j)

    # iteratively find the maximum cliques in the graph
    # break from the loop if there are no cliques remaining or there are
    # fewer than 3 vectors in the remaining maximum clique
    # Allow 1 basis vector to be shared between two cliques, to allow for
    # cases where two lattices share one basis vectors (e.g. two plate
    # crystals exactly aligned in one direction, but not in the other two)
    distinct_cliques = []
    cliques = list(nx.find_cliques(G))
    cliques = sorted(cliques, key=len, reverse=True)
    for i, clique in enumerate(cliques):
      clique = set(clique)
      if len(clique) < 3:
        break
      is_distinct = True
      for c in distinct_cliques:
        if len(c.intersection(clique)) > 1:
          is_distinct = False
          break
      if is_distinct:
        distinct_cliques.append(clique)
        this_set = set()
        for i_cluster in clique:
          this_set = this_set.union(cluster_point_sets[i_cluster])
        logger.info("Clique %i: %i lattice points" %(i+1, len(this_set)))

    assert len(distinct_cliques) > 0

    logger.info("Estimated number of lattices: %i" %len(distinct_cliques))

    self.candidate_basis_vectors = []
    self.candidate_crystal_models = []

    for clique in distinct_cliques:
      sel = flex.size_t(list(clique))
      vectors = flex.vec3_double(centroids).select(sel)
      perm = flex.sort_permutation(vectors.norms())
      vectors = [matrix.col(vectors[p]) for p in perm]

      # exclude vectors that are (approximately) integer multiples of a shorter
      # vector
      unique_vectors = []
      for v in vectors:
        is_unique = True
        for v_u in unique_vectors:
          if is_approximate_integer_multiple(v_u, v,
                                             relative_tolerance=0.01,
                                             angular_tolerance=0.5):
            is_unique = False
            break
        if is_unique:
          unique_vectors.append(v)
      vectors = unique_vectors

      self.candidate_basis_vectors.extend(vectors)
      candidate_orientation_matrices \
        = self.find_candidate_orientation_matrices(
          vectors,
          max_combinations=self.params.basis_vector_combinations.max_try)
      if len(candidate_orientation_matrices) == 0:
        continue
      crystal_model, n_indexed = self.choose_best_orientation_matrix(
        candidate_orientation_matrices)
      if crystal_model is None: continue
      # map to minimum reduced cell
      crystal_symmetry = crystal.symmetry(
        unit_cell=crystal_model.get_unit_cell(),
        space_group=crystal_model.get_space_group())
      cb_op = crystal_symmetry.change_of_basis_op_to_minimum_cell()
      crystal_model = crystal_model.change_basis(cb_op)
      self.candidate_crystal_models.append(crystal_model)

    if self.params.debug:
      file_name = "vectors.pdb"
      a = self.params.max_cell
      cs = crystal.symmetry(unit_cell=(a,a,a,90,90,90), space_group="P1")
      xs = xray.structure(crystal_symmetry=cs)
      for v in difference_vectors:
        v = matrix.col(v)
        xs.add_scatterer(xray.scatterer("C", site=v/(a/10)))
      xs.sites_mod_short()
      with open(file_name, 'wb') as f:
        print >> f, xs.as_pdb_file()

    for crystal_model in self.candidate_crystal_models:
      logger.debug(crystal_model)