def show_plot(widegrid, excursi): excursi.reshape(flex.grid(widegrid, widegrid)) plot_max = flex.max(excursi) idx_max = flex.max_index(excursi) def igrid(x): return x - (widegrid // 2) idxs = [igrid(i) * plot_px_sz for i in xrange(widegrid)] from matplotlib import pyplot as plt plt.figure() CS = plt.contour( [igrid(i) * plot_px_sz for i in xrange(widegrid)], [igrid(i) * plot_px_sz for i in xrange(widegrid)], excursi.as_numpy_array()) plt.clabel(CS, inline=1, fontsize=10, fmt="%6.3f") plt.title("Wide scope search for detector origin offset") plt.scatter([0.0], [0.0], color='g', marker='o') plt.scatter([new_offset[0]], [new_offset[1]], color='r', marker='*') plt.scatter([idxs[idx_max % widegrid]], [idxs[idx_max // widegrid]], color='k', marker='s') plt.axes().set_aspect("equal") plt.xlabel("offset (mm) along beamr1 vector") plt.ylabel("offset (mm) along beamr2 vector") plt.savefig("search_scope.png") #changing value trial_origin_offset = (idxs[idx_max % widegrid]) * beamr1 + ( idxs[idx_max // widegrid]) * beamr2 return trial_origin_offset
def show_plot(widegrid,excursi): excursi.reshape(flex.grid(widegrid, widegrid)) plot_max = flex.max(excursi) idx_max = flex.max_index(excursi) def igrid(x): return x - (widegrid//2) idxs = [igrid(i)*plot_px_sz for i in xrange(widegrid)] from matplotlib import pyplot as plt plt.figure() CS = plt.contour([igrid(i)*plot_px_sz for i in xrange(widegrid)], [igrid(i)*plot_px_sz for i in xrange(widegrid)], excursi.as_numpy_array()) plt.clabel(CS, inline=1, fontsize=10, fmt="%6.3f") plt.title("Wide scope search for detector origin offset") plt.scatter([0.0],[0.0],color='g',marker='o') plt.scatter([new_offset[0]] , [new_offset[1]],color='r',marker='*') plt.scatter([idxs[idx_max%widegrid]] , [idxs[idx_max//widegrid]],color='k',marker='s') plt.axes().set_aspect("equal") plt.xlabel("offset (mm) along beamr1 vector") plt.ylabel("offset (mm) along beamr2 vector") plt.show() #changing value trial_origin_offset = (idxs[idx_max%widegrid])*beamr1 + (idxs[idx_max//widegrid])*beamr2 return trial_origin_offset
def reduce_raw_data(raw_data, qmax, bandwidth, level=0.05, q_background=None, outfile=''): log2 = sys.stdout with open(outfile, "a") as log: print >> log, " ==== Data reduction ==== " print >> log, " Preprocessing of data increases efficiency of shape retrieval procedure.\n" print >> log, " - Interpolation stepsize : %4.3e" % bandwidth print >> log, " - Uniform density criteria: level is set to : %4.3e" % level print >> log, " maximum q to consider : %4.3e" % qmax print >> log2, " ==== Data reduction ==== " print >> log2, " Preprocessing of data increases efficiency of shape retrieval procedure.\n" print >> log2, " - Interpolation stepsize : %4.3e" % bandwidth print >> log2, " - Uniform density criteria: level is set to : %4.3e" % level print >> log2, " maximum q to consider : %4.3e" % qmax qmin_indx = flex.max_index(raw_data.i) qmin = raw_data.q[qmin_indx] if qmax > raw_data.q[-1]: qmax = raw_data.q[-1] with open(outfile, "a") as log: print >> log, " Resulting q range to use in search: q start : %4.3e" % qmin print >> log, " q stop : %4.3e" % qmax print >> log2, " Resulting q range to use in search: q start : %4.3e" % qmin print >> log2, " q stop : %4.3e" % qmax raw_q = raw_data.q[qmin_indx:] raw_i = raw_data.i[qmin_indx:] raw_s = raw_data.s[qmin_indx:] ### Take care of the background (set zero at very high q) ### if (q_background is not None): cutoff = flex.bool(raw_q > q_background) q_bk_indx = flex.last_index(cutoff, False) if (q_bk_indx < raw_q.size()): bkgrd = flex.mean(raw_i[q_bk_indx:]) with open(f, "a") as log: print >> log, "Background correction: I=I-background, where background=", bkgrd print >> log2, "Background correction: I=I-background, where background=", bkgrd raw_i = flex.abs(raw_i - bkgrd) q = flex.double(range(int( (qmax - qmin) / bandwidth) + 1)) * bandwidth + qmin raw_data.i = flex.linear_interpolation(raw_q, raw_i, q) raw_data.s = flex.linear_interpolation(raw_q, raw_s, q) raw_data.q = q return raw_data
def find_largest(self, matches, used_flags=None): sizes = flex.double() if used_flags is None: used_flags = flex.bool(len(matches), False) for match in matches: sizes.append(match[0].size()) multi = flex.double(len(matches), 1) multi = multi.set_selected(used_flags.iselection(), 0) sizes = sizes * multi max_size = flex.max(sizes) max_loc = flex.max_index(sizes) return max_size, max_loc
def find_largest(self, matches, used_flags=None): sizes = flex.double() if used_flags is None: used_flags = flex.bool(len(matches), False) for match in matches: sizes.append(match[0].size()) multi = flex.double(len(matches), 1) multi = multi.set_selected(used_flags.iselection(), 0) sizes = sizes * multi max_size = flex.max(sizes) max_loc = flex.max_index(sizes) return max_size, max_loc
def finite_difference_test(self): if(self.fmodel.r_work()>1.e-3): i_g_max = flex.max_index(flex.abs(self.g)) eps = 1.e-5 par_eps = list(self.par_min) par_eps[i_g_max] = self.par_min[i_g_max] + eps self.apply_shifts(par = par_eps) self.fmodel.update_xray_structure(update_f_calc=True) t1 = self.get_tg(compute_gradients=False).target() par_eps[i_g_max] = self.par_min[i_g_max] - eps self.apply_shifts(par = par_eps) del par_eps self.fmodel.update_xray_structure(update_f_calc=True) t2 = self.get_tg(compute_gradients=False).target() self.apply_shifts(par = self.par_min) self.fmodel.update_xray_structure(update_f_calc=True) self.buffer_ana.append(self.g[i_g_max]) self.buffer_fin.append((t1-t2)/(eps*2))
def finite_difference_test(self): if (self.fmodel.r_work() > 1.e-3): i_g_max = flex.max_index(flex.abs(self.g)) eps = 1.e-5 par_eps = list(self.par_min) par_eps[i_g_max] = self.par_min[i_g_max] + eps self.apply_shifts(par=par_eps) self.fmodel.update_xray_structure(update_f_calc=True) t1 = self.get_tg(compute_gradients=False).target() par_eps[i_g_max] = self.par_min[i_g_max] - eps self.apply_shifts(par=par_eps) del par_eps self.fmodel.update_xray_structure(update_f_calc=True) t2 = self.get_tg(compute_gradients=False).target() self.apply_shifts(par=self.par_min) self.fmodel.update_xray_structure(update_f_calc=True) self.buffer_ana.append(self.g[i_g_max]) self.buffer_fin.append((t1 - t2) / (eps * 2))
def unit_cell_histograms(crystals): params = [flex.double() for i in range(6)] for cryst in crystals: unit_cell = cryst.get_unit_cell().parameters() for i in range(6): params[i].append(unit_cell[i]) histograms = [] for i in range(6): histograms.append(flex.histogram(params[i], n_slots=100)) median_unit_cell = uctbx.unit_cell([flex.median(p) for p in params]) modal_unit_cell = uctbx.unit_cell( [h.slot_centers()[flex.max_index(h.slots())] for h in histograms] ) print("Modal unit cell: %s" % str(modal_unit_cell)) print("Median unit cell: %s" % str(median_unit_cell)) return histograms
def had_phase_transition(self): if len(self.differences) < 5: return False i_max = flex.max_index(self.differences) noise_before = (self.differences < self.noise_level_before*self.differences[i_max]) before = flex.last_index(noise_before[:i_max], True) if before is None: before = -1 before += 1 if i_max - before < 4: return False negative_after = self.differences < 0 after = flex.first_index(negative_after[i_max:], True) if after is None: return False after += i_max if after - before < 10: return False if len(self.values) - after < 10: return False tail_stats = scitbx.math.basic_statistics(self.differences[-5:]) if (tail_stats.max_absolute > self.noise_level_after*self.differences[i_max]): return False return True
def had_phase_transition(self): if len(self.differences) < 5: return False i_max = flex.max_index(self.differences) noise_before = (self.differences < self.noise_level_before * self.differences[i_max]) before = flex.last_index(noise_before[:i_max], True) if before is None: before = -1 before += 1 if i_max - before < 4: return False negative_after = self.differences < 0 after = flex.first_index(negative_after[i_max:], True) if after is None: return False after += i_max if after - before < 10: return False if len(self.values) - after < 10: return False tail_stats = scitbx.math.basic_statistics(self.differences[-5:]) if (tail_stats.max_absolute > self.noise_level_after * self.differences[i_max]): return False return True
def __init__(self, **kwargs): group_args.__init__(self, **kwargs) # require Dij, d_c P = Profiler("2. calculate rho density") print("finished Dij, now calculating rho_i, the density") from xfel.clustering import Rodriguez_Laio_clustering_2014 R = Rodriguez_Laio_clustering_2014(distance_matrix=self.Dij, d_c=self.d_c) self.rho = rho = R.get_rho() ave_rho = flex.mean(rho.as_double()) NN = self.Dij.focus()[0] print("The average rho_i is %5.2f, or %4.1f%%" % (ave_rho, 100 * ave_rho / NN)) i_max = flex.max_index(rho) P = Profiler("3.transition") print("the index with the highest density is %d" % (i_max)) delta_i_max = flex.max( flex.double([self.Dij[i_max, j] for j in range(NN)])) print("delta_i_max", delta_i_max) rho_order = flex.sort_permutation(rho, reverse=True) rho_order_list = list(rho_order) P = Profiler("4. delta") self.delta = delta = R.get_delta(rho_order=rho_order, delta_i_max=delta_i_max) P = Profiler("5. find cluster maxima") #---- Now hunting for clusters cluster_id = flex.int(NN, -1) # default -1 means no cluster delta_order = flex.sort_permutation(delta, reverse=True) N_CLUST = 10 # maximum of 10 points to be considered as possible clusters MAX_PERCENTILE_DELTA = 0.10 # cluster centers have to be in the top 10% percentile delta MAX_PERCENTILE_RHO = 0.75 # cluster centers have to be in the top 75% percentile rho n_cluster = 0 max_n_delta = min(N_CLUST, int(MAX_PERCENTILE_DELTA * NN)) for ic in range(max_n_delta): # test the density, rho item_idx = delta_order[ic] if delta[item_idx] < 0.25 * delta[ delta_order[0]]: # too low (another heuristic!) continue item_rho_order = rho_order_list.index(item_idx) if item_rho_order / NN < MAX_PERCENTILE_RHO: cluster_id[item_idx] = n_cluster print(ic, item_idx, item_rho_order, cluster_id[item_idx]) n_cluster += 1 print("Found %d clusters" % n_cluster) for x in range(NN): if cluster_id[x] >= 0: print("XC", x, cluster_id[x], rho[x], delta[x]) self.cluster_id_maxima = cluster_id.deep_copy() P = Profiler("6. assign all points") R.cluster_assignment(rho_order, cluster_id) self.cluster_id_full = cluster_id.deep_copy() # assign the halos P = Profiler("7. assign halos") halo = flex.bool(NN, False) border = R.get_border(cluster_id=cluster_id) for ic in range(n_cluster ): #loop thru all border regions; find highest density print("cluster", ic, "in border", border.count(True)) this_border = (cluster_id == ic) & (border == True) print(len(this_border), this_border.count(True)) if this_border.count(True) > 0: highest_density = flex.max(rho.select(this_border)) halo_selection = (rho < highest_density) & (this_border == True) if halo_selection.count(True) > 0: cluster_id.set_selected(halo_selection, -1) core_selection = (cluster_id == ic) & ~halo_selection highest_density = flex.max(rho.select(core_selection)) too_sparse = core_selection & ( rho.as_double() < highest_density / 10. ) # another heuristic if too_sparse.count(True) > 0: cluster_id.set_selected(too_sparse, -1) self.cluster_id_final = cluster_id.deep_copy() print("%d in the excluded halo" % ((cluster_id == -1).count(True)))
def __init__(self, **kwargs): group_args.__init__(self, **kwargs) print('finished Dij, now calculating rho_i and density') from xfel.clustering import Rodriguez_Laio_clustering_2014 as RL R = RL(distance_matrix=self.Dij, d_c=self.d_c) #from clustering.plot_with_dimensional_embedding import plot_with_dimensional_embedding #plot_with_dimensional_embedding(1-self.Dij/flex.max(self.Dij), show_plot=True) if hasattr(self, 'strategy') is False: self.strategy = 'default' self.rho = rho = R.get_rho() ave_rho = flex.mean(rho.as_double()) NN = self.Dij.focus()[0] i_max = flex.max_index(rho) delta_i_max = flex.max( flex.double([self.Dij[i_max, j] for j in range(NN)])) rho_order = flex.sort_permutation(rho, reverse=True) rho_order_list = list(rho_order) self.delta = delta = R.get_delta(rho_order=rho_order, delta_i_max=delta_i_max) cluster_id = flex.int(NN, -1) # -1 means no cluster delta_order = flex.sort_permutation(delta, reverse=True) MAX_PERCENTILE_RHO = self.max_percentile_rho # cluster centers have to be in the top percentile n_cluster = 0 # # print('Z_DELTA = ', self.Z_delta) pick_top_solution = False rho_stdev = flex.mean_and_variance( rho.as_double()).unweighted_sample_standard_deviation() delta_stdev = flex.mean_and_variance( delta).unweighted_sample_standard_deviation() if rho_stdev != 0.0 and delta_stdev != 0: rho_z = (rho.as_double() - flex.mean(rho.as_double())) / (rho_stdev) delta_z = (delta - flex.mean(delta)) / (delta_stdev) else: pick_top_solution = True if rho_stdev == 0.0: centroids = [flex.first_index(delta, flex.max(delta))] elif delta_stdev == 0.0: centroids = [flex.first_index(rho, flex.max(rho))] significant_delta = [] significant_rho = [] # Define strategy to decide cluster center here. Only one should be true debug_fix_clustering = True if self.strategy == 'one_cluster': debug_fix_clustering = False strategy2 = True if self.strategy == 'strategy_3': debug_fix_clustering = False strategy3 = True strategy2 = False if debug_fix_clustering: if not pick_top_solution: delta_z_cutoff = min(1.0, max(delta_z)) rho_z_cutoff = min(1.0, max(rho_z)) for ic in range(NN): # test the density & rho if delta_z[ic] >= delta_z_cutoff or delta_z[ ic] <= -delta_z_cutoff: significant_delta.append(ic) if rho_z[ic] >= rho_z_cutoff or rho_z[ic] <= -rho_z_cutoff: significant_rho.append(ic) if True: # Use idea quoted in Rodriguez Laio 2014 paper # " Thus, cluster centers are recognized as points for which the value of delta is anomalously large." centroid_candidates = list(significant_delta) candidate_delta_z = flex.double() for ic in centroid_candidates: if ic == rho_order[0]: delta_z_of_rho_order_0 = delta_z[ic] candidate_delta_z.append(delta_z[ic]) i_sorted = flex.sort_permutation(candidate_delta_z, reverse=True) # Check that once sorted the top one is not equal to the 2nd or 3rd position # If there is a tie, assign centroid to the first one in rho order centroids = [] # rho_order[0] has to be a centroid centroids.append(rho_order[0]) #centroids.append(centroid_candidates[i_sorted[0]]) for i in range(0, len(i_sorted[:])): if centroid_candidates[i_sorted[i]] == rho_order[0]: continue if delta_z_of_rho_order_0 - candidate_delta_z[ i_sorted[i]] > 1.0: if i > 1: if -candidate_delta_z[i_sorted[ i - 1]] + candidate_delta_z[ i_sorted[0]] > 1.0: centroids.append( centroid_candidates[i_sorted[i]]) else: centroids.append( centroid_candidates[i_sorted[i]]) else: break if False: centroid_candidates = list( set(significant_delta).intersection( set(significant_rho))) # Now compare the relative orders of the max delta_z and max rho_z to make sure they are within 1 stdev centroids = [] max_delta_z_candidates = -999.9 max_rho_z_candidates = -999.9 for ic in centroid_candidates: if delta_z[ic] > max_delta_z_candidates: max_delta_z_candidates = delta_z[ic] if rho_z[ic] > max_rho_z_candidates: max_rho_z_candidates = rho_z[ic] for ic in centroid_candidates: if max_delta_z_candidates - delta_z[ ic] < 1.0 and max_rho_z_candidates - rho_z[ ic] < 1.0: centroids.append(ic) #item_idxs = [delta_order[ic] for ic,centroid in enumerate(centroids)] item_idxs = centroids for item_idx in item_idxs: cluster_id[item_idx] = n_cluster print('CLUSTERING_STATS', item_idx, cluster_id[item_idx]) n_cluster += 1 #### elif strategy2: # Go through list of clusters, see which one has highest joint rank in both rho and delta lists # This will only assign one cluster center based on highest product of rho and delta ranks product_list_of_ranks = [] for ic in range(NN): rho_tmp = self.rho[ic] delta_tmp = self.delta[ic] product_list_of_ranks.append(rho_tmp * delta_tmp) import numpy as np item_idx = np.argmax(product_list_of_ranks) cluster_id[item_idx] = n_cluster # Only cluster assigned print('CLUSTERING_STATS', item_idx, cluster_id[item_idx]) n_cluster += 1 elif strategy3: # use product of delta and rho and pick out top candidates # have to use a significance z_score to filter out the very best product_list_of_ranks = flex.double() for ic in range(NN): rho_tmp = self.rho[ic] delta_tmp = self.delta[ic] product_list_of_ranks.append(rho_tmp * delta_tmp) import numpy as np iid_sorted = flex.sort_permutation(product_list_of_ranks, reverse=True) cluster_id[ iid_sorted[0]] = n_cluster # first point always a cluster n_cluster += 1 print('CLUSTERING_STATS S3', iid_sorted[0], cluster_id[iid_sorted[0]]) #product_list_of_ranks[iid_sorted[0]]=0.0 # set this to 0.0 so that the mean/stdev does not get biased by one point stdev = np.std(product_list_of_ranks) mean = np.mean(product_list_of_ranks) n_sorted = 3 #if stdev == 0.0: # n_sorted=1 z_critical = 3.0 # 2 sigma significance ? # Only go through say 3-4 datapoints # basically there won't be more than 2-3 lattices on an image realistically for iid in iid_sorted[1:n_sorted]: z_score = (product_list_of_ranks[iid] - mean) / stdev if z_score > z_critical: cluster_id[iid] = n_cluster n_cluster += 1 print('CLUSTERING_STATS S3', iid, cluster_id[iid]) else: break # No point going over all points once below threshold z_score else: for ic in range(NN): item_idx = delta_order[ic] if ic != 0: if delta[item_idx] <= 0.25 * delta[ delta_order[0]]: # too low to be a medoid continue item_rho_order = rho_order_list.index(item_idx) if (item_rho_order) / NN < MAX_PERCENTILE_RHO: cluster_id[item_idx] = n_cluster print('CLUSTERING_STATS', ic, item_idx, item_rho_order, cluster_id[item_idx]) n_cluster += 1 ### # print('Found %d clusters' % n_cluster) for x in range(NN): if cluster_id[x] >= 0: print("XC", x, cluster_id[x], rho[x], delta[x]) self.cluster_id_maxima = cluster_id.deep_copy() R.cluster_assignment(rho_order, cluster_id, rho) self.cluster_id_full = cluster_id.deep_copy() #halo = flex.bool(NN,False) #border = R.get_border( cluster_id = cluster_id ) #for ic in range(n_cluster): #loop thru all border regions; find highest density # this_border = (cluster_id == ic) & (border==True) # if this_border.count(True)>0: # highest_density = flex.max(rho.select(this_border)) # halo_selection = (rho < highest_density) & (this_border==True) # if halo_selection.count(True)>0: # cluster_id.set_selected(halo_selection,-1) # core_selection = (cluster_id == ic) & ~halo_selection # highest_density = flex.max(rho.select(core_selection)) # too_sparse = core_selection & (rho.as_double() < highest_density/10.) # another heuristic # if too_sparse.count(True)>0: # cluster_id.set_selected(too_sparse,-1) self.cluster_id_final = cluster_id.deep_copy()
def rho_stats(xray_structure, d_min, resolution_factor, electron_sum_radius, zero_out_f000): n_real = [] n_half_plus = [] n_half_minus = [] s2 = d_min * resolution_factor * 2 for l in xray_structure.unit_cell().parameters()[:3]: nh = ifloor(l / s2) n_real.append(2 * nh + 1) n_half_plus.append(nh) n_half_minus.append(-nh) n_real = tuple(n_real) n_real_product = matrix.col(n_real).product() crystal_gridding = maptbx.crystal_gridding( unit_cell=xray_structure.unit_cell(), space_group_info=xray_structure.space_group_info(), pre_determined_n_real=n_real) miller_indices = flex.miller_index() miller_indices.reserve(n_real_product) for h in flex.nested_loop(n_half_minus, n_half_plus, open_range=False): miller_indices.append(h) assert miller_indices.size() == n_real_product # miller_set = miller.set(crystal_symmetry=xray_structure, anomalous_flag=True, indices=miller_indices).sort(by_value="resolution") assert miller_set.indices()[0] == (0, 0, 0) f_calc = miller_set.structure_factors_from_scatterers( xray_structure=xray_structure, algorithm="direct", cos_sin_table=False).f_calc() if (zero_out_f000): f_calc.data()[0] = 0j # unit_cell_volume = xray_structure.unit_cell().volume() voxel_volume = unit_cell_volume / n_real_product number_of_miller_indices = [] rho_max = [] electron_sums_around_atoms = [] densities_along_x = [] for f in [f_calc, f_calc.resolution_filter(d_min=d_min)]: assert f.indices()[0] == (0, 0, 0) number_of_miller_indices.append(f.indices().size()) fft_map = miller.fft_map(crystal_gridding=crystal_gridding, fourier_coefficients=f) assert fft_map.n_real() == n_real rho = fft_map.real_map_unpadded() / unit_cell_volume assert approx_equal(voxel_volume * flex.sum(rho), f_calc.data()[0]) if (xray_structure.scatterers().size() == 1): assert flex.max_index(rho) == 0 rho_max.append(rho[0]) else: rho_max.append(flex.max(rho)) site_cart = xray_structure.sites_cart()[0] gias = maptbx.grid_indices_around_sites( unit_cell=xray_structure.unit_cell(), fft_n_real=n_real, fft_m_real=n_real, sites_cart=flex.vec3_double([site_cart]), site_radii=flex.double([electron_sum_radius])) electron_sums_around_atoms.append( flex.sum(rho.as_1d().select(gias)) * voxel_volume) # a = xray_structure.unit_cell().parameters()[0] nx = n_real[0] nxh = nx // 2 x = [] y = [] for ix in range(-nxh, nxh + 1): x.append(a * ix / nx) y.append(rho[(ix % nx, 0, 0)]) densities_along_x.append((x, y)) # print( "%3.1f %4.2f %-12s %5d %5d | %6.3f %6.3f | %6.3f %6.3f | %4.2f %5.1f" % (d_min, resolution_factor, n_real, number_of_miller_indices[0], number_of_miller_indices[1], electron_sums_around_atoms[0], electron_sums_around_atoms[1], rho_max[0], rho_max[1], f_calc.data()[0].real, u_as_b(xray_structure.scatterers()[0].u_iso))) # return densities_along_x
def rho_stats( xray_structure, d_min, resolution_factor, electron_sum_radius, zero_out_f000): n_real = [] n_half_plus = [] n_half_minus = [] s2 = d_min * resolution_factor * 2 for l in xray_structure.unit_cell().parameters()[:3]: nh = ifloor(l / s2) n_real.append(2*nh+1) n_half_plus.append(nh) n_half_minus.append(-nh) n_real = tuple(n_real) n_real_product = matrix.col(n_real).product() crystal_gridding = maptbx.crystal_gridding( unit_cell=xray_structure.unit_cell(), space_group_info=xray_structure.space_group_info(), pre_determined_n_real=n_real) miller_indices = flex.miller_index() miller_indices.reserve(n_real_product) for h in flex.nested_loop(n_half_minus, n_half_plus, open_range=False): miller_indices.append(h) assert miller_indices.size() == n_real_product # miller_set = miller.set( crystal_symmetry=xray_structure, anomalous_flag=True, indices=miller_indices).sort(by_value="resolution") assert miller_set.indices()[0] == (0,0,0) f_calc = miller_set.structure_factors_from_scatterers( xray_structure=xray_structure, algorithm="direct", cos_sin_table=False).f_calc() if (zero_out_f000): f_calc.data()[0] = 0j # unit_cell_volume = xray_structure.unit_cell().volume() voxel_volume = unit_cell_volume / n_real_product number_of_miller_indices = [] rho_max = [] electron_sums_around_atoms = [] densities_along_x = [] for f in [f_calc, f_calc.resolution_filter(d_min=d_min)]: assert f.indices()[0] == (0,0,0) number_of_miller_indices.append(f.indices().size()) fft_map = miller.fft_map( crystal_gridding=crystal_gridding, fourier_coefficients=f) assert fft_map.n_real() == n_real rho = fft_map.real_map_unpadded() / unit_cell_volume assert approx_equal(voxel_volume*flex.sum(rho), f_calc.data()[0]) if (xray_structure.scatterers().size() == 1): assert flex.max_index(rho) == 0 rho_max.append(rho[0]) else: rho_max.append(flex.max(rho)) site_cart = xray_structure.sites_cart()[0] gias = maptbx.grid_indices_around_sites( unit_cell=xray_structure.unit_cell(), fft_n_real=n_real, fft_m_real=n_real, sites_cart=flex.vec3_double([site_cart]), site_radii=flex.double([electron_sum_radius])) electron_sums_around_atoms.append( flex.sum(rho.as_1d().select(gias))*voxel_volume) # a = xray_structure.unit_cell().parameters()[0] nx = n_real[0] nxh = nx//2 x = [] y = [] for ix in xrange(-nxh,nxh+1): x.append(a*ix/nx) y.append(rho[(ix%nx,0,0)]) densities_along_x.append((x,y)) # print \ "%3.1f %4.2f %-12s %5d %5d | %6.3f %6.3f | %6.3f %6.3f | %4.2f %5.1f" % ( d_min, resolution_factor, n_real, number_of_miller_indices[0], number_of_miller_indices[1], electron_sums_around_atoms[0], electron_sums_around_atoms[1], rho_max[0], rho_max[1], f_calc.data()[0].real, u_as_b(xray_structure.scatterers()[0].u_iso)) # return densities_along_x
def __init__(self, **kwargs): group_args.__init__(self, **kwargs) print('finished Dij, now calculating rho_i and density') from xfel.clustering import Rodriguez_Laio_clustering_2014 as RL R = RL(distance_matrix=self.Dij, d_c=self.d_c) #from IPython import embed; embed(); exit() #from clustering.plot_with_dimensional_embedding import plot_with_dimensional_embedding #plot_with_dimensional_embedding(1-self.Dij/flex.max(self.Dij), show_plot=True) self.rho = rho = R.get_rho() ave_rho = flex.mean(rho.as_double()) NN = self.Dij.focus()[0] i_max = flex.max_index(rho) delta_i_max = flex.max( flex.double([self.Dij[i_max, j] for j in range(NN)])) rho_order = flex.sort_permutation(rho, reverse=True) rho_order_list = list(rho_order) self.delta = delta = R.get_delta(rho_order=rho_order, delta_i_max=delta_i_max) cluster_id = flex.int(NN, -1) # -1 means no cluster delta_order = flex.sort_permutation(delta, reverse=True) MAX_PERCENTILE_RHO = self.max_percentile_rho # cluster centers have to be in the top percentile n_cluster = 0 # pick_top_solution = False rho_stdev = flex.mean_and_variance( rho.as_double()).unweighted_sample_standard_deviation() delta_stdev = flex.mean_and_variance( delta).unweighted_sample_standard_deviation() if rho_stdev != 0.0 and delta_stdev != 0: rho_z = (rho.as_double() - flex.mean(rho.as_double())) / (rho_stdev) delta_z = (delta - flex.mean(delta)) / (delta_stdev) else: pick_top_solution = True if rho_stdev == 0.0: centroids = [flex.first_index(delta, flex.max(delta))] elif delta_stdev == 0.0: centroids = [flex.first_index(rho, flex.max(rho))] significant_delta = [] significant_rho = [] debug_fix_clustering = True if debug_fix_clustering: if not pick_top_solution: delta_z_cutoff = min(1.0, max(delta_z)) rho_z_cutoff = min(1.0, max(rho_z)) for ic in range(NN): # test the density & rho if delta_z[ic] >= delta_z_cutoff: significant_delta.append(ic) if rho_z[ic] >= rho_z_cutoff: significant_rho.append(ic) centroid_candidates = list( set(significant_delta).intersection(set(significant_rho))) # Now compare the relative orders of the max delta_z and max rho_z to make sure they are within 1 stdev centroids = [] max_delta_z_candidates = -999.9 max_rho_z_candidates = -999.9 for ic in centroid_candidates: if delta_z[ic] > max_delta_z_candidates: max_delta_z_candidates = delta_z[ic] if rho_z[ic] > max_rho_z_candidates: max_rho_z_candidates = rho_z[ic] for ic in centroid_candidates: if max_delta_z_candidates - delta_z[ ic] < 1.0 and max_rho_z_candidates - rho_z[ ic] < 1.0: centroids.append(ic) item_idxs = [ delta_order[ic] for ic, centroid in enumerate(centroids) ] for item_idx in item_idxs: cluster_id[item_idx] = n_cluster print('CLUSTERING_STATS', item_idx, cluster_id[item_idx]) n_cluster += 1 #### else: for ic in range(NN): item_idx = delta_order[ic] if ic != 0: if delta[item_idx] <= 0.25 * delta[ delta_order[0]]: # too low to be a medoid continue item_rho_order = rho_order_list.index(item_idx) if (item_rho_order) / NN < MAX_PERCENTILE_RHO: cluster_id[item_idx] = n_cluster print('CLUSTERING_STATS', ic, item_idx, item_rho_order, cluster_id[item_idx]) n_cluster += 1 ### # # print('Found %d clusters' % n_cluster) for x in range(NN): if cluster_id[x] >= 0: print("XC", x, cluster_id[x], rho[x], delta[x]) self.cluster_id_maxima = cluster_id.deep_copy() R.cluster_assignment(rho_order, cluster_id) self.cluster_id_full = cluster_id.deep_copy() #halo = flex.bool(NN,False) #border = R.get_border( cluster_id = cluster_id ) #for ic in range(n_cluster): #loop thru all border regions; find highest density # this_border = (cluster_id == ic) & (border==True) # if this_border.count(True)>0: # highest_density = flex.max(rho.select(this_border)) # halo_selection = (rho < highest_density) & (this_border==True) # if halo_selection.count(True)>0: # cluster_id.set_selected(halo_selection,-1) # core_selection = (cluster_id == ic) & ~halo_selection # highest_density = flex.max(rho.select(core_selection)) # too_sparse = core_selection & (rho.as_double() < highest_density/10.) # another heuristic # if too_sparse.count(True)>0: # cluster_id.set_selected(too_sparse,-1) self.cluster_id_final = cluster_id.deep_copy()
def __init__(self, **kwargs): group_args.__init__(self, **kwargs) # require Dij, d_c P = Profiler("2. calculate rho density") print "finished Dij, now calculating rho_i, the density" from xfel.clustering import Rodriguez_Laio_clustering_2014 # alternative clustering algorithms: see http://scikit-learn.org/stable/modules/clustering.html # also see https://cran.r-project.org/web/packages/dbscan/vignettes/hdbscan.html # see also https://en.wikipedia.org/wiki/Hausdorff_dimension R = Rodriguez_Laio_clustering_2014(distance_matrix=self.Dij, d_c=self.d_c) self.rho = rho = R.get_rho() ave_rho = flex.mean(rho.as_double()) NN = self.Dij.focus()[0] print "The average rho_i is %5.2f, or %4.1f%%" % (ave_rho, 100 * ave_rho / NN) i_max = flex.max_index(rho) P = Profiler("3.transition") print "the index with the highest density is %d" % (i_max) delta_i_max = flex.max( flex.double([self.Dij[i_max, j] for j in xrange(NN)])) print "delta_i_max", delta_i_max rho_order = flex.sort_permutation(rho, reverse=True) rho_order_list = list(rho_order) P = Profiler("4. delta") self.delta = delta = R.get_delta(rho_order=rho_order, delta_i_max=delta_i_max) P = Profiler("5. find cluster maxima") #---- Now hunting for clusters ---Lot's of room for improvement (or simplification) here!!! cluster_id = flex.int(NN, -1) # default -1 means no cluster delta_order = flex.sort_permutation(delta, reverse=True) N_CLUST = 10 # maximum of 10 points to be considered as possible clusters #MAX_PERCENTILE_DELTA = 0.99 # cluster centers have to be in the top 10% percentile delta MAX_PERCENTILE_RHO = 0.99 # cluster centers have to be in the top 75% percentile rho n_cluster = 0 #max_n_delta = min(N_CLUST, int(MAX_PERCENTILE_DELTA*NN)) for ic in xrange(NN): # test the density, rho item_idx = delta_order[ic] if delta[item_idx] > 100: print "A: iteration", ic, "delta", delta[ item_idx], delta[item_idx] < 0.25 * delta[delta_order[0]] if delta[item_idx] < 0.25 * delta[ delta_order[0]]: # too low (another heuristic!) continue item_rho_order = rho_order_list.index(item_idx) if delta[item_idx] > 100: print "B: iteration", ic, item_rho_order, item_rho_order / NN, MAX_PERCENTILE_RHO if item_rho_order / NN < MAX_PERCENTILE_RHO: cluster_id[item_idx] = n_cluster print ic, item_idx, item_rho_order, cluster_id[item_idx] n_cluster += 1 print "Found %d clusters" % n_cluster for x in xrange(NN): if cluster_id[x] >= 0: print "XC", x, cluster_id[x], rho[x], delta[x] self.cluster_id_maxima = cluster_id.deep_copy() P = Profiler("6. assign all points") R.cluster_assignment(rho_order, cluster_id) self.cluster_id_full = cluster_id.deep_copy() # assign the halos P = Profiler("7. assign halos") halo = flex.bool(NN, False) border = R.get_border(cluster_id=cluster_id) for ic in range(n_cluster ): #loop thru all border regions; find highest density print "cluster", ic, "in border", border.count(True) this_border = (cluster_id == ic) & (border == True) print len(this_border), this_border.count(True) if this_border.count(True) > 0: highest_density = flex.max(rho.select(this_border)) halo_selection = (rho < highest_density) & (this_border == True) if halo_selection.count(True) > 0: cluster_id.set_selected(halo_selection, -1) core_selection = (cluster_id == ic) & ~halo_selection highest_density = flex.max(rho.select(core_selection)) too_sparse = core_selection & ( rho.as_double() < highest_density / 10. ) # another heuristic if too_sparse.count(True) > 0: cluster_id.set_selected(too_sparse, -1) self.cluster_id_final = cluster_id.deep_copy() print "%d in the excluded halo" % ((cluster_id == -1).count(True))