def __init__(self, pair_asu_table, site_labels, sites_frac=None, sites_cart=None, covariance_matrix=None, cell_covariance_matrix=None, parameter_map=None, include_bonds_to_hydrogen=False, fixed_distances=None, eps=2e-16): assert [sites_frac, sites_cart].count(None) == 1 fmt = "%.4f" asu_mappings = pair_asu_table.asu_mappings() space_group_info = sgtbx.space_group_info(group=asu_mappings.space_group()) unit_cell = asu_mappings.unit_cell() if sites_cart is not None: sites_frac = unit_cell.fractionalize(sites_cart) self.loop = model.loop(header=( "_geom_bond_atom_site_label_1", "_geom_bond_atom_site_label_2", "_geom_bond_distance", "_geom_bond_site_symmetry_2" )) distances = crystal.calculate_distances( pair_asu_table, sites_frac, covariance_matrix=covariance_matrix, cell_covariance_matrix=cell_covariance_matrix, parameter_map=parameter_map) for d in distances: if (not include_bonds_to_hydrogen and (site_labels[d.i_seq].startswith('H') or site_labels[d.j_seq].startswith('H'))): continue if (d.variance is not None and d.variance > eps and not(fixed_distances is not None and ((d.i_seq, d.j_seq) in fixed_distances or (d.j_seq, d.i_seq) in fixed_distances))): distance = format_float_with_su(d.distance, math.sqrt(d.variance)) else: distance = fmt % d.distance sym_code = space_group_info.cif_symmetry_code(d.rt_mx_ji) if sym_code == "1": sym_code = "." self.loop.add_row((site_labels[d.i_seq], site_labels[d.j_seq], distance, sym_code)) self.distances = distances.distances self.variances = distances.variances self.pair_counts = distances.pair_counts
def find_basis_vector_combinations_cluster_analysis(self): # hijack the xray.structure class to facilitate calculation of distances xs = xray.structure(crystal_symmetry=self.crystal_symmetry) for i, site in enumerate(self.sites): xs.add_scatterer(xray.scatterer("C%i" %i, site=site)) xs = xs.sites_mod_short() xs = xs.select(xs.sites_frac().norms() < 0.45) cell_multiplier = 10 xs1 = xs.customized_copy( unit_cell=uctbx.unit_cell([xs.unit_cell().parameters()[0]*cell_multiplier]*3)) xs1.set_sites_cart(xs.sites_cart()) xs = xs1 sites_cart = xs.sites_cart() lengths = flex.double([matrix.col(sc).length() for sc in sites_cart]) xs = xs.select(flex.sort_permutation(lengths)) if self.params.debug: with open('peaks.pdb', 'wb') as f: print >> f, xs.as_pdb_file() vector_heights = flex.double() sites_frac = xs.sites_frac() pair_asu_table = xs.pair_asu_table(distance_cutoff=self.params.max_cell) asu_mappings = pair_asu_table.asu_mappings() distances = crystal.calculate_distances(pair_asu_table, sites_frac) vectors = [] difference_vectors = [] pairs = [] for di in distances: if di.distance < self.params.min_cell: continue i_seq, j_seq = di.i_seq, di.j_seq if i_seq > j_seq: continue pairs.append((i_seq, j_seq)) rt_mx_ji = di.rt_mx_ji site_frac_ji = rt_mx_ji * sites_frac[j_seq] site_cart_ji = xs.unit_cell().orthogonalize(site_frac_ji) site_cart_i = xs.unit_cell().orthogonalize(sites_frac[i_seq]) vectors.append(matrix.col(site_cart_ji)) diff_vec = matrix.col(site_cart_i) - matrix.col(site_cart_ji) if diff_vec[0] < 0: # only one hemisphere of difference vector space diff_vec = -diff_vec difference_vectors.append(diff_vec) params = self.params.multiple_lattice_search.cluster_analysis if params.method == 'dbscan': i_cluster = self.cluster_analysis_dbscan(difference_vectors) min_cluster_size = 1 elif params.method == 'hcluster': i_cluster = self.cluster_analysis_hcluster(difference_vectors) i_cluster -= 1 # hcluster starts counting at 1 min_cluster_size = params.min_cluster_size if self.params.debug_plots: self.debug_plot_clusters( difference_vectors, i_cluster, min_cluster_size=min_cluster_size) clusters = [] min_cluster_size = params.min_cluster_size for i in range(max(i_cluster)+1): isel = (i_cluster == i).iselection() if len(isel) < min_cluster_size: continue clusters.append(isel) cluster_point_sets = [] centroids = [] cluster_sizes = flex.int() difference_vectors = flex.vec3_double(difference_vectors) from libtbx.utils import flat_list for cluster in clusters: points = flat_list([pairs[i] for i in cluster]) cluster_point_sets.append(set(points)) d_vectors = difference_vectors.select(cluster) cluster_sizes.append(len(d_vectors)) centroids.append(d_vectors.mean()) # build a graph where each node is a centroid from the difference vector # cluster analysis above, and an edge is defined when there is a # significant overlap between the sets of peaks in the FFT map that # contributed to the difference vectors in two clusters import networkx as nx G = nx.Graph() G.add_nodes_from(range(len(cluster_point_sets))) cutoff_frac = 0.25 for i in range(len(cluster_point_sets)): for j in range(i+1, len(cluster_point_sets)): intersection_ij = cluster_point_sets[i].intersection( cluster_point_sets[j]) union_ij = cluster_point_sets[i].union(cluster_point_sets[j]) frac_connected = len(intersection_ij)/len(union_ij) if frac_connected > cutoff_frac: G.add_edge(i, j) # iteratively find the maximum cliques in the graph # break from the loop if there are no cliques remaining or there are # fewer than 3 vectors in the remaining maximum clique # Allow 1 basis vector to be shared between two cliques, to allow for # cases where two lattices share one basis vectors (e.g. two plate # crystals exactly aligned in one direction, but not in the other two) distinct_cliques = [] cliques = list(nx.find_cliques(G)) cliques = sorted(cliques, key=len, reverse=True) for i, clique in enumerate(cliques): clique = set(clique) if len(clique) < 3: break is_distinct = True for c in distinct_cliques: if len(c.intersection(clique)) > 1: is_distinct = False break if is_distinct: distinct_cliques.append(clique) this_set = set() for i_cluster in clique: this_set = this_set.union(cluster_point_sets[i_cluster]) logger.info("Clique %i: %i lattice points" %(i+1, len(this_set))) assert len(distinct_cliques) > 0 logger.info("Estimated number of lattices: %i" %len(distinct_cliques)) self.candidate_basis_vectors = [] self.candidate_crystal_models = [] for clique in distinct_cliques: sel = flex.size_t(list(clique)) vectors = flex.vec3_double(centroids).select(sel) perm = flex.sort_permutation(vectors.norms()) vectors = [matrix.col(vectors[p]) for p in perm] # exclude vectors that are (approximately) integer multiples of a shorter # vector unique_vectors = [] for v in vectors: is_unique = True for v_u in unique_vectors: if is_approximate_integer_multiple(v_u, v, relative_tolerance=0.01, angular_tolerance=0.5): is_unique = False break if is_unique: unique_vectors.append(v) vectors = unique_vectors self.candidate_basis_vectors.extend(vectors) candidate_orientation_matrices \ = self.find_candidate_orientation_matrices( vectors, max_combinations=self.params.basis_vector_combinations.max_try) if len(candidate_orientation_matrices) == 0: continue crystal_model, n_indexed = self.choose_best_orientation_matrix( candidate_orientation_matrices) if crystal_model is None: continue # map to minimum reduced cell crystal_symmetry = crystal.symmetry( unit_cell=crystal_model.get_unit_cell(), space_group=crystal_model.get_space_group()) cb_op = crystal_symmetry.change_of_basis_op_to_minimum_cell() crystal_model = crystal_model.change_basis(cb_op) self.candidate_crystal_models.append(crystal_model) if self.params.debug: file_name = "vectors.pdb" a = self.params.max_cell cs = crystal.symmetry(unit_cell=(a,a,a,90,90,90), space_group="P1") xs = xray.structure(crystal_symmetry=cs) for v in difference_vectors: v = matrix.col(v) xs.add_scatterer(xray.scatterer("C", site=v/(a/10))) xs.sites_mod_short() with open(file_name, 'wb') as f: print >> f, xs.as_pdb_file() for crystal_model in self.candidate_crystal_models: logger.debug(crystal_model)