def truncate_with_roots(m, fmodel, c1, c2, cutoff, scale, zero_all_interblob_region=True, as_int=False, average_peak_volume=None, selection=None): assert c1 >= c2 if (average_peak_volume is None): sites_cart = fmodel.xray_structure.sites_cart() if (selection is not None): sites_cart = sites_cart.select(selection) average_peak_volume = maptbx.peak_volume_estimate( map_data=m, sites_cart=sites_cart, crystal_symmetry=fmodel.xray_structure.crystal_symmetry(), cutoff=cutoff) if (average_peak_volume is None or int(average_peak_volume * scale) - 1 == 0): return None average_peak_volume = int( average_peak_volume * scale / 2) - 1 # XXX "/2" is ad hoc and I don't know why! co1 = maptbx.connectivity(map_data=m, threshold=c1) co2 = maptbx.connectivity(map_data=m, threshold=c2) result = co2.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=average_peak_volume, zero_all_interblob_region=zero_all_interblob_region) if (as_int): return result else: return result.as_double()
def exercise_expand_mask(): # case 1: standard cmap = flex.double(flex.grid(30, 30, 30)) cmap.fill(1) for i in range(10, 20): for j in range(10, 20): for k in range(10, 20): cmap[i, j, k] = 10 co = maptbx.connectivity(map_data=cmap, threshold=5) new_mask = co.expand_mask(id_to_expand=1, expand_size=1) for i in range(30): for j in range(30): for k in range(30): assert new_mask[i, j, k] == (i in range(9, 21) and j in range(9, 21) and k in range(9, 21)) # case 2: over boundaries cmap = flex.double(flex.grid(30, 30, 30)) cmap.fill(1) cmap[1, 1, 1] = 10 co = maptbx.connectivity(map_data=cmap, threshold=5) new_mask = co.expand_mask(id_to_expand=1, expand_size=2) for i in range(30): for j in range(30): for k in range(30): assert new_mask[i, j, k] == (i in [29, 0, 1, 2, 3] and j in [29, 0, 1, 2, 3] and k in [29, 0, 1, 2, 3])
def exercise_expand_mask(): # case 1: standard cmap = flex.double(flex.grid(30,30,30)) cmap.fill(1) for i in range(10,20): for j in range(10,20): for k in range(10,20): cmap[i,j,k] = 10 co = maptbx.connectivity(map_data=cmap, threshold=5) new_mask = co.expand_mask(id_to_expand=1, expand_size=1) for i in range(30): for j in range(30): for k in range(30): assert new_mask[i,j,k] == (i in range(9,21) and j in range(9,21) and k in range(9,21)) # case 2: over boundaries cmap = flex.double(flex.grid(30,30,30)) cmap.fill(1) cmap[1,1,1] = 10 co = maptbx.connectivity(map_data=cmap, threshold=5) new_mask = co.expand_mask(id_to_expand=1, expand_size=2) for i in range(30): for j in range(30): for k in range(30): assert new_mask[i,j,k] == (i in [29,0,1,2,3] and j in [29,0,1,2,3] and k in [29,0,1,2,3])
def __init__(self, model, map_data): adopt_init_args(self, locals()) # Find blob co = maptbx.connectivity(map_data=self.map_data, threshold=5.) #connectivity_map = co.result() #sorted_by_volume = sorted( # zip(co.regions(), range(0, co.regions().size())), key=lambda x: x[0], # reverse=True) #blob_indices = [] #for p in sorted_by_volume: # v, i = p # print v, i # if(i>0): # blob_indices.append(i) ####### # You get everything you need: map_result = co.result() volumes = co.regions() print volumes coors = co.maximum_coors() vals = co.maximum_values() minb, maxb = co.get_blobs_boundaries_tuples() # This will give you the order i_sorted_by_volume = flex.sort_permutation( data=volumes, reverse=True) # maybe co.regions() should go there for i in i_sorted_by_volume: print "blob #", i print coors[i] print vals[i] print maxb[i], minb[i]
def exercise_sample_all_mask_regions(): cmap = flex.double(flex.grid(30,30,30)) cmap.fill(1) for i in range(0,10): for j in range(0,10): for k in range(0,10): cmap[i,j,k] = 10 for i in range(15,25): for j in range(15,25): for k in range(15,25): cmap[i,j,k] = 20 co = maptbx.connectivity(map_data=cmap, threshold=5, wrapping=False) uc = uctbx.unit_cell((10,10,10)) mask_result = co.result() sample_regs_obj = maptbx.sample_all_mask_regions( mask=mask_result, volumes=flex.int([0, 1000,1000]), sampling_rates=flex.int([0, 10,10]), unit_cell=uc) a = sample_regs_obj.get_array(1) b = sample_regs_obj.get_array(2) assert a.size() == b.size() == 101 assert approx_equal(a[0], (0,0,0)) assert approx_equal(b[0], (5,5,5))
def prepare_maps(fofc, two_fofc, fem, fofc_cutoff=2, two_fofc_cutoff=0.5, fem_cutoff=0.5, connectivity_cutoff=0.5, local_average=True): """ - This takes 3 maps: mFo-DFc, 2mFo-DFc and FEM and combines them into one map that is most suitable for real-space refinement. - Maps are the boxes extracted around region of interest from the whole unit cell map. - All maps are expected to be normalized by standard deviation (sigma-scaled) BEFORE extracting the box. There is no way to assert it at this point. - Map gridding equivalence is asserted. """ m1,m2,m3 = fofc, two_fofc, fem # assert identical gridding for m_ in [m1,m2,m3]: for m__ in [m1,m2,m3]: assert m_.all() == m__.all() assert m_.focus() == m__.focus() assert m_.origin() == m__.origin() # binarize residual map sel = m1 <= fofc_cutoff mask = m1 .set_selected( sel, 0) mask = mask.set_selected(~sel, 1) del sel, m1 assert approx_equal([flex.max(mask), flex.min(mask)], [1,0]) def truncate_and_filter(m, cutoff, mask): return m.set_selected(m<=cutoff, 0)*mask # truncate and filter 2mFo-DFc map m2 = truncate_and_filter(m2, two_fofc_cutoff, mask) # truncate and filter FEM m3 = truncate_and_filter(m3, fem_cutoff, mask) del mask # combined maps def scale(m): sd = m.sample_standard_deviation() if(sd != 0): return m/sd else: return m m2 = scale(m2) m3 = scale(m3) m = (m2+m3)/2. del m2, m3 m = scale(m) # connectivity analysis co = maptbx.connectivity(map_data=m, threshold=connectivity_cutoff) v_max=-1.e+9 i_max=None for i, v in enumerate(co.regions()): if(i>0): if(v>v_max): v_max=v i_max=i mask2 = co.result() selection = mask2==i_max mask2 = mask2.set_selected(selection, 1) mask2 = mask2.set_selected(~selection, 0) assert mask2.count(1) == v_max # final filter m = m * mask2.as_double() if(local_average): maptbx.map_box_average(map_data=m, cutoff=0.5, index_span=1) return m
def exercise_volume_cutoff(): cmap = flex.double(flex.grid(100, 100, 100)) cmap.fill(0) for i in range(100): for j in range(100): for k in range(100): if (5 < i < 10) and (5 < j < 10) and (5 < k < 10): cmap[i, j, k] = 10 if (15 < i < 25) and (15 < j < 25) and (15 < k < 25): cmap[i, j, k] = 20 co = maptbx.connectivity(map_data=cmap, threshold=5) map_result = co.result() volumes = list(co.regions()) #print volumes #[999207, 64, 729] vol_mask = co.volume_cutoff_mask(volume_cutoff=10) assert (vol_mask == 1).count(True) == 793 assert (vol_mask == 0).count(True) == 999207 vol_mask = co.volume_cutoff_mask(volume_cutoff=100) assert (vol_mask == 1).count(True) == 729 assert (vol_mask == 0).count(True) == 999271 vol_mask = co.volume_cutoff_mask(volume_cutoff=1000) assert (vol_mask == 1).count(True) == 0 assert (vol_mask == 0).count(True) == 1000000
def exercise_volume_cutoff(): cmap = flex.double(flex.grid(100,100,100)) cmap.fill(0) for i in range(100): for j in range(100): for k in range(100): if (5<i<10) and (5<j<10) and (5<k<10): cmap[i,j,k] = 10 if (15<i<25) and (15<j<25) and (15<k<25): cmap[i,j,k] = 20 co = maptbx.connectivity(map_data=cmap, threshold=5) map_result = co.result() volumes = list(co.regions()) #print volumes #[999207, 64, 729] vol_mask = co.volume_cutoff_mask(volume_cutoff=10) assert (vol_mask==1).count(True) == 793 assert (vol_mask==0).count(True) == 999207 vol_mask = co.volume_cutoff_mask(volume_cutoff=100) assert (vol_mask==1).count(True) == 729 assert (vol_mask==0).count(True) == 999271 vol_mask = co.volume_cutoff_mask(volume_cutoff=1000) assert (vol_mask==1).count(True) == 0 assert (vol_mask==0).count(True) == 1000000
def filter_mask(mask_p1, volume_cutoff, crystal_symmetry, for_structure_factors=False): co = maptbx.connectivity(map_data=mask_p1, threshold=0.01, preprocess_against_shallow=True, wrapping=True) mi, ma = flex.min(mask_p1), flex.max(mask_p1) print(mask_p1.size(), (mask_p1 < 0).count(True)) assert mi == 0, mi assert ma == 1, ma a, b, c = crystal_symmetry.unit_cell().parameters()[:3] na, nb, nc = mask_p1.accessor().all() step = flex.mean(flex.double([a / na, b / nb, c / nc])) if (crystal_symmetry.space_group_number() != 1): co.merge_symmetry_related_regions( space_group=crystal_symmetry.space_group()) conn = co.result().as_double() z = zip(co.regions(), range(0, co.regions().size())) sorted_by_volume = sorted(z, key=lambda x: x[0], reverse=True) for i_seq, p in enumerate(sorted_by_volume): v, i = p if (i == 0): continue # skip macromolecule # skip small volume volume = v * step**3 if volume < volume_cutoff: conn = conn.set_selected(conn == i, 0) conn = conn.set_selected(conn > 0, 1) if for_structure_factors: conn = conn / crystal_symmetry.space_group().order_z() return conn
def exercise_get_blobs_boundaries(): cmap = flex.double(flex.grid(100, 100, 100)) cmap.fill(1) for i in range(10, 20): for j in range(10, 20): for k in range(10, 20): cmap[i, j, k] = 10 co = maptbx.connectivity(map_data=cmap, threshold=5) # raw function: boundaries = co.get_blobs_boundaries() # how to use this: # boundaries[min/max, n_blob, x/y/z] blob_0_min_boundaries = \ (boundaries[0,0,0], boundaries[0,0,1], boundaries[0,0,1]) blob_0_max_boundaries = \ (boundaries[1,0,0], boundaries[1,0,1], boundaries[1,0,1]) # 0th blob - under the limit, covering almost whole cell assert blob_0_min_boundaries == (0, 0, 0) assert blob_0_max_boundaries == (99, 99, 99) # 1st blob - covers coordinates from 10 to 19 by construction blob_1_min_boundaries = \ (boundaries[0,1,0], boundaries[0,1,1], boundaries[0,1,1]) blob_1_max_boundaries = \ (boundaries[1,1,0], boundaries[1,1,1], boundaries[1,1,1]) assert blob_1_min_boundaries == (10, 10, 10) assert blob_1_max_boundaries == (19, 19, 19) # convinient get_blobs_boundaries_tuples minb, maxb = co.get_blobs_boundaries_tuples() assert minb == [(0, 0, 0), (10, 10, 10)] assert maxb == [(99, 99, 99), (19, 19, 19)] # ============================== # two blobs test # just add a blob to the previous cmap for i in range(50, 70): for j in range(50, 80): for k in range(50, 90): cmap[i, j, k] = 10 co = maptbx.connectivity(map_data=cmap, threshold=5) minb, maxb = co.get_blobs_boundaries_tuples() assert minb == [(0, 0, 0), (10, 10, 10), (50, 50, 50)] assert maxb == [(99, 99, 99), (19, 19, 19), (69, 79, 89)]
def exercise_get_blobs_boundaries(): cmap = flex.double(flex.grid(100,100,100)) cmap.fill(1) for i in range(10,20): for j in range(10,20): for k in range(10,20): cmap[i,j,k] = 10 co = maptbx.connectivity(map_data=cmap, threshold=5) # raw function: boundaries = co.get_blobs_boundaries() # how to use this: # boundaries[min/max, n_blob, x/y/z] blob_0_min_boundaries = \ (boundaries[0,0,0], boundaries[0,0,1], boundaries[0,0,1]) blob_0_max_boundaries = \ (boundaries[1,0,0], boundaries[1,0,1], boundaries[1,0,1]) # 0th blob - under the limit, covering almost whole cell assert blob_0_min_boundaries == (0,0,0) assert blob_0_max_boundaries == (99,99,99) # 1st blob - covers coordinates from 10 to 19 by construction blob_1_min_boundaries = \ (boundaries[0,1,0], boundaries[0,1,1], boundaries[0,1,1]) blob_1_max_boundaries = \ (boundaries[1,1,0], boundaries[1,1,1], boundaries[1,1,1]) assert blob_1_min_boundaries == (10,10,10) assert blob_1_max_boundaries == (19,19,19) # convinient get_blobs_boundaries_tuples minb, maxb = co.get_blobs_boundaries_tuples() assert minb == [(0,0,0), (10,10,10)] assert maxb == [(99,99,99), (19,19,19)] # ============================== # two blobs test # just add a blob to the previous cmap for i in range(50,70): for j in range(50,80): for k in range(50,90): cmap[i,j,k] = 10 co = maptbx.connectivity(map_data=cmap, threshold=5) minb, maxb = co.get_blobs_boundaries_tuples() assert minb == [(0,0,0), (10,10,10), (50,50,50)] assert maxb == [(99,99,99), (19,19,19), (69,79,89)]
def getvs(cmap, threshold, wrap=True): co = maptbx.connectivity(map_data=cmap, threshold=threshold, wrapping=wrap) map_result = co.result() regs = co.regions() coors = co.maximum_coors() vals = co.maximum_values() assert len(list(regs)) == len(list(coors)) == len(list(vals)) # check dimensions assert cmap.all() == map_result.all() v=[0,0,0] for i in range(3): v[i] = (map_result==i).count(True) return v, list(co.regions())
def getvs(cmap, threshold, wrap=True): co = maptbx.connectivity(map_data=cmap, threshold=threshold, wrapping=wrap) map_result = co.result() regs = co.regions() coors = co.maximum_coors() vals = co.maximum_values() assert len(list(regs)) == len(list(coors)) == len(list(vals)) # check dimensions assert cmap.all() == map_result.all() v = [0, 0, 0] for i in range(3): v[i] = (map_result == i).count(True) return v, list(co.regions())
def truncate_with_roots( m, fmodel, c1, c2, cutoff, scale, zero_all_interblob_region=True, as_int=False, average_peak_volume=None, selection=None): assert c1>=c2 if(average_peak_volume is None): sites_cart = fmodel.xray_structure.sites_cart() if(selection is not None): sites_cart = sites_cart.select(selection) average_peak_volume = maptbx.peak_volume_estimate( map_data = m, sites_cart = sites_cart, crystal_symmetry = fmodel.xray_structure.crystal_symmetry(), cutoff = cutoff) if(average_peak_volume is None or int(average_peak_volume*scale)-1==0): return None average_peak_volume = int(average_peak_volume*scale/2)-1 # XXX "/2" is ad hoc and I don't know why! co1 = maptbx.connectivity(map_data=m, threshold=c1) co2 = maptbx.connectivity(map_data=m, threshold=c2) result = co2.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=average_peak_volume, zero_all_interblob_region=zero_all_interblob_region) if(as_int): return result else: return result.as_double()
def exercise_max_values(): cmap = flex.double(flex.grid(100, 100, 100)) cmap.fill(0) for i in range(100): for j in range(100): for k in range(100): if (5 < i < 10) and (5 < j < 10) and (5 < k < 10): cmap[i, j, k] = 10 if (15 < i < 25) and (15 < j < 25) and (15 < k < 25): cmap[i, j, k] = 20 cmap[7, 7, 7] = 15 cmap[20, 20, 20] = 25 co = maptbx.connectivity(map_data=cmap, threshold=5) m_coors = list(co.maximum_coors()) m_vals = list(co.maximum_values()) vols = list(co.regions()) assert len(m_coors) == len(m_vals) == len(vols) assert m_coors == [(0, 0, 0), (7, 7, 7), (20, 20, 20)] assert m_vals == [0.0, 15.0, 25.0]
def exercise_max_values(): cmap = flex.double(flex.grid(100,100,100)) cmap.fill(0) for i in range(100): for j in range(100): for k in range(100): if (5<i<10) and (5<j<10) and (5<k<10): cmap[i,j,k] = 10 if (15<i<25) and (15<j<25) and (15<k<25): cmap[i,j,k] = 20 cmap[7,7,7] = 15 cmap[20,20,20] = 25 co = maptbx.connectivity(map_data=cmap, threshold=5) m_coors = list(co.maximum_coors()) m_vals = list(co.maximum_values()) vols = list(co.regions()) assert len(m_coors) == len(m_vals) == len(vols) assert m_coors == [(0, 0, 0), (7, 7, 7), (20, 20, 20)] assert m_vals == [0.0, 15.0, 25.0]
def exercise1(): pdb_str = """ CRYST1 10.000 10.000 10.000 90.00 90.00 90.00 P 1 HETATM 1 C C 1 2.000 2.000 2.000 1.00 20.00 C END """ pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_str) xrs = pdb_inp.xray_structure_simple() cg = maptbx.crystal_gridding(unit_cell=xrs.unit_cell(), pre_determined_n_real=(100, 100, 100), space_group_info=xrs.space_group_info()) fc = xrs.structure_factors(d_min=1., algorithm="direct").f_calc() fft_map = miller.fft_map(crystal_gridding=cg, fourier_coefficients=fc) map_data = fft_map.real_map_unpadded() # pass map and threshold value co = maptbx.connectivity(map_data=map_data, threshold=100.) # get 'map' of the same size with integers: 0 where below threshold, # 1,2,3... - for connected regions map_result = co.result() # to find out the number of connected region for particular point: assert map_result[0, 0, 0] == 0 # means under threshold assert map_result[20, 20, 20] == 1 # blob 1 # get 1d array of integer volumes and transform it to list. volumes = list(co.regions()) # find max volume (except volume of 0-region which will be probably max) max_volume = max(volumes[1:]) # find number of the region with max volume max_index = volumes.index(max_volume) v = [0, 0, 0] for i in range(3): # !!! Do not do this because it's extremely slow! Used for test purposes. v[i] = (map_result == i).count(True) assert v[2] == 0 assert v[1] < 15000 assert v[0] + v[1] + v[2] == 1000000 assert volumes == v[:2]
def exercise1(): pdb_str=""" CRYST1 10.000 10.000 10.000 90.00 90.00 90.00 P 1 HETATM 1 C C 1 2.000 2.000 2.000 1.00 20.00 C END """ pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_str) xrs = pdb_inp.xray_structure_simple() cg = maptbx.crystal_gridding(unit_cell=xrs.unit_cell(), pre_determined_n_real=(100,100,100), space_group_info=xrs.space_group_info()) fc = xrs.structure_factors(d_min = 1., algorithm = "direct").f_calc() fft_map = miller.fft_map(crystal_gridding=cg, fourier_coefficients=fc) map_data = fft_map.real_map_unpadded() # pass map and threshold value co = maptbx.connectivity(map_data=map_data, threshold=100.) # get 'map' of the same size with integers: 0 where below threshold, # 1,2,3... - for connected regions map_result = co.result() # to find out the number of connected region for particular point: assert map_result[0,0,0] == 0 # means under threshold assert map_result[20,20,20] == 1 # blob 1 # get 1d array of integer volumes and transform it to list. volumes = list(co.regions()) # find max volume (except volume of 0-region which will be probably max) max_volume = max(volumes[1:]) # find number of the region with max volume max_index = volumes.index(max_volume) v=[0,0,0] for i in range(3): # !!! Do not do this because it's extremely slow! Used for test purposes. v[i] = (map_result==i).count(True) assert v[2] == 0 assert v[1] < 15000 assert v[0]+v[1]+v[2] == 1000000 assert volumes == v[:2]
def exercise_wrapping(): cmap = flex.double(flex.grid(30, 30, 30)) cmap.fill(1) for i in range(0, 5): for j in range(0, 5): for k in range(0, 5): cmap[i, j, k] = 10 for i in range(0, 5): for j in range(25, 30): for k in range(0, 5): cmap[i, j, k] = 10 for i in range(0, 5): for j in range(0, 5): for k in range(25, 30): cmap[i, j, k] = 10 for i in range(25, 30): for j in range(0, 5): for k in range(0, 5): cmap[i, j, k] = 10 for i in range(25, 30): for j in range(25, 30): for k in range(0, 5): cmap[i, j, k] = 10 for i in range(25, 30): for j in range(0, 5): for k in range(25, 30): cmap[i, j, k] = 10 n_in_blob = cmap.count(10) co = maptbx.connectivity(map_data=cmap, threshold=5, wrapping=True) dres = co.result().as_double() regs = list(co.regions()) minb, maxb = co.get_blobs_boundaries_tuples() assert n_in_blob == 750 assert regs == [26250, 750]
def __init__( self, xray_structure, step, volume_cutoff=None, mean_diff_map_threshold=None, compute_whole=False, largest_only=False, wrapping=True, # should be False if working with ASU f_obs=None, r_sol=1.1, r_shrink=0.9, f_calc=None, log=None, write_masks=False): adopt_init_args(self, locals()) # self.d_spacings = f_obs.d_spacings().data() self.sel_gte3 = self.d_spacings >= 3 self.miller_array = f_obs.select(self.sel_gte3) # self.crystal_symmetry = self.xray_structure.crystal_symmetry() # Compute mask in p1 (via ASU) self.crystal_gridding = maptbx.crystal_gridding( unit_cell=xray_structure.unit_cell(), space_group_info=xray_structure.space_group_info(), symmetry_flags=maptbx.use_space_group_symmetry, step=step) self.n_real = self.crystal_gridding.n_real() # XXX Where do we want to deal with H and occ==0? self._mask_p1 = self._compute_mask_in_p1() self.solvent_content = 100.*(self._mask_p1 != 0).count(True)/\ self._mask_p1.size() # Optionally compute Fmask from original whole mask, zero-ed at dmin<3A. self.f_mask_whole = self._compute_f_mask_whole() # Connectivity analysis co = maptbx.connectivity(map_data=self._mask_p1, threshold=0.01, preprocess_against_shallow=False, wrapping=wrapping) if (xray_structure.space_group().type().number() != 1): # not P1 co.merge_symmetry_related_regions( space_group=xray_structure.space_group()) # self.conn = co.result().as_double() z = zip(co.regions(), range(0, co.regions().size())) sorted_by_volume = sorted(z, key=lambda x: x[0], reverse=True) # f_mask_data_0 = flex.complex_double(f_obs.data().size(), 0) self.f_mask_0 = None self.FV = OrderedDict() self.mFoDFc_0 = None diff_map = None # mFo-DFc map computed using F_mask_0 (main mask) self.regions = OrderedDict() small_selection = None weak_selection = None # if (log is not None): print(" # volume_p1 uc(%) mFo-DFc: min,max,mean,sd", file=log) # for i_seq, p in enumerate(sorted_by_volume): v, i = p self._region_i_selection = None # must be here inside the loop! f_mask_i = None # must be here inside the loop! # skip macromolecule if (i == 0): continue # skip small volume and accumulate small volumes volume = v * step**3 uc_fraction = v * 100. / self.conn.size() if (volume_cutoff is not None and volume < volume_cutoff): if (volume >= 10): if (small_selection is None): small_selection = self._get_region_i_selection(i) else: small_selection |= self._get_region_i_selection(i) continue # Accumulate regions with volume greater than volume_cutoff (if # volume_cutoff is defined). Weak density regions are included. self.regions[i_seq] = group_args(id=i, i_seq=i_seq, volume=volume, uc_fraction=uc_fraction) # Compute i-th region mask mask_i_asu = self.compute_i_mask_asu( selection=self._get_region_i_selection(i), volume=volume) # Compute F_mask_0 (F_mask for main mask) if (uc_fraction >= 1): f_mask_i = self.compute_f_mask_i(mask_i_asu) f_mask_data_0 += f_mask_i.data() elif (largest_only): break # Compute mFo-DFc map using main mask (once done computing main mask!) if (uc_fraction < 1 and diff_map is None): diff_map = self.compute_diff_map(f_mask_data_0=f_mask_data_0) # Analyze mFo-DFc map in the i-th region mi, ma, me, sd = None, None, None, None if (diff_map is not None): iselection = self._get_region_i_selection(i).iselection() blob = diff_map.select(iselection) mean_diff_map = flex.mean(diff_map.select(iselection)) mi, ma, me = flex.min(blob), flex.max(blob), flex.mean(blob) sd = blob.sample_standard_deviation() if (log is not None): print("%3d" % i_seq, "%12.3f" % volume, "%8.4f" % round(uc_fraction, 4), "%7.3f %7.3f %7.3f %7.3f" % (mi, ma, me, sd), file=log) # Accumulate regions with weak density into one region, then skip if (mean_diff_map_threshold is not None): if (mean_diff_map <= mean_diff_map_threshold): if (mean_diff_map > 0.1): if (weak_selection is None): weak_selection = self._get_region_i_selection( i) else: weak_selection |= self._get_region_i_selection( i) continue else: if (log is not None): print("%3d" % i_seq, "%12.3f" % volume, "%8.4f" % round(uc_fraction, 4), "%7s" % str(None), file=log) # Compute F_maks for i-th region if (f_mask_i is None): f_mask_i = self.compute_f_mask_i(mask_i_asu) # Compose result object self.FV[f_mask_i] = [round(volume, 3), round(uc_fraction, 1)] # # Determine number of secondary regions. Must happen here! # Preliminarily if need to do mosaic. self.n_regions = len(self.FV.values()) self.do_mosaic = False if (self.n_regions > 1 and flex.max(self.d_spacings) > 6): self.do_mosaic = True # Add aggregated small regions (if present) self._add_from_aggregated(selection=small_selection, diff_map=diff_map) # Add aggregated weak map regions (if present) self._add_from_aggregated(selection=weak_selection, diff_map=diff_map) # Finalize main Fmask self.f_mask_0 = f_obs.customized_copy(data=f_mask_data_0) # Delete bulk whole mask from memory del self._mask_p1
def __init__(self, fmodel, log=None): # Commonly used objects xrs = fmodel.xray_structure sgt = xrs.space_group().type() # Compute default fmodel and decide on grid step fmodel, self.grid_step_factor = get_fmodel_and_grid_step( f_obs = fmodel.f_obs(), r_free_flags = fmodel.r_free_flags(), xrs = xrs) #fmodel.show() #print fmodel.r_work(), fmodel.r_free() ### mask_data_p1, n_real, crystal_gridding = get_mask_1(fmodel=fmodel, grid_step_factor=self.grid_step_factor) #ccp4_map(cg=crystal_gridding, file_name="m1.ccp4", map_data=mask_data_p1_) #xxx1 = fmodel.f_obs().structure_factors_from_map(map=mask_data_p1, # use_scale = True, anomalous_flag = False, use_sg = False) #mask_data_p1, n_real, crystal_gridding = get_mask_2(fmodel=fmodel, # grid_step_factor=self.grid_step_factor) #print n_real #STOP() #xxx2 = fmodel.f_obs().structure_factors_from_map(map=mask_data_p1, # use_scale = True, anomalous_flag = False, use_sg = False) # #assert approx_equal(xxx1.data(), xxx2.data()) #print mask_data_p1.all(), mask_data_p1.focus(), mask_data_p1.origin() #print mask_data_p1_2.all(), mask_data_p1_2.focus(), mask_data_p1_2.origin() #print mask_data_p1_1.count(0), mask_data_p1_2.count(0) #assert approx_equal(mask_data_p1_1, mask_data_p1_2) #STOP() ##### # Mask connectivity analysis co = maptbx.connectivity(map_data=mask_data_p1, threshold=0.01) conn = co.result().as_double() # Convert result of connectivity analysis from P1 to ASU (in-place) conn = asu_map_ext.asymmetric_map(sgt, conn).data() # Find unique indices and regions in reduced (P1->ASU) conn region_indices = flex.double() region_volumes = flex.double() for i in conn: if not i in region_indices: region_indices.append(i) for l in region_indices: szl = conn.count(l)*100./conn.size() region_volumes.append(szl) s = flex.sort_permutation(region_volumes, reverse=True) region_volumes = region_volumes.select(s) region_indices = region_indices.select(s) # Convert P1 mask into ASU mask_data_asu = asu_map_ext.asymmetric_map(sgt, mask_data_p1).data() conn.reshape(mask_data_asu.accessor()) #XXX still need it? f_masks = [] all_zero_found = False if(log is not None): print >> log, "Number of regions:", len(region_indices) mi,ma,me,diff_map_asu = None,None,None,None for ii, i in enumerate(region_indices): s = conn==i si = s.iselection() if(not all_zero_found and mask_data_asu.select(si).count(0.)>0): all_zero_found = True continue # DIFF MAP START if(region_volumes[ii]<1 and diff_map_asu is None):#(ii == 2): fmodel_tmp = mmtbx.f_model.manager( f_obs = fmodel.f_obs(), r_free_flags = fmodel.r_free_flags(), f_calc = fmodel.f_calc(), f_mask = f_masks[len(f_masks)-1]) fmodel_tmp.update_all_scales(remove_outliers=False, update_f_part1=False) diff_map_p1 = compute_map( fmodel = fmodel_tmp, crystal_gridding = crystal_gridding, map_type = "mFo-DFc") diff_map_asu = asu_map_ext.asymmetric_map(sgt, diff_map_p1).data() if(diff_map_asu is not None): mi,ma,me = diff_map_asu.select(si).min_max_mean().as_tuple() if(ma<0. or me<0.): continue # DIFF MAP END #XXX this is 4 loops, may be slow. move to C++ if slow. mask_data_asu_i = mask_data_asu.deep_copy() #mask_data_asu_i = mask_data_asu_i.set_selected(s, 1).set_selected(~s, 0) mask_data_asu_i = mask_data_asu_i.set_selected(~s, 0) #if(mi is None): # print "region: %5d fraction: %8.4f"%(ii, region_volumes[ii]), len(region_volumes) #else: # print "region: %5d fraction: %8.4f"%(ii, region_volumes[ii]), len(region_volumes), "%7.3f %7.3f %7.3f"%(mi,ma,me) if(log is not None): print >> log, "region: %5d fraction: %8.4f"%(ii, region_volumes[ii]) log.flush() f_mask_i = fmodel.f_obs().structure_factors_from_asu_map( asu_map_data = mask_data_asu_i, n_real = n_real) if(len(f_masks)>0 and region_volumes[ii]>1): f_masks[len(f_masks)-1] = f_masks[len(f_masks)-1].array(data = f_masks[len(f_masks)-1].data()+ f_mask_i.data()) else: f_masks.append(f_mask_i) # self.fmodel_result, self.method = helper_3( fmodel = fmodel, f_masks = f_masks, log = log) #self.fmodel_result.show() # self.n_regions = len(region_volumes[1:]) self.region_volumes = " ".join(["%8.4f"%(v) for v in region_volumes[1:][:10]]) # top 10
def exercise_noise_elimination_two_cutoffs(): # Purpose: eliminate noise. # We want to delete small blobs from the map. On the particular contouring # (cutoff) level we can set a threshold for volume and say: all blobs that # have volume less than threshold value should be deleted. # One more point is that we want to delete them with their 'root', meaning # that we are lowering threshold level and put zeros on that bigger regions. # But we are zeroing only those which are not merged with big good blobs. # Everything under second contouring level also will be zero. # ====================== # From another point of view. # We know some threshold value for volume of good blobs on t1 contouring # level. We want to keep only them and clear out everything else. But the # keeping and clearing should be done at lower t2 contouring level. # # The result (res_mask) is 3d integer array sized as original map. # res_mask contain 0 for noise, 1 for valuable information. # Mask corresponding to t2 contouring level. # # The option "zero_all_interblob_region" by default is True, and this means # that everything below threshold on t2 level will be 0. If # zero_all_interblob_region=False then everything below threshold on t2 # level will be 1. # #map preparation for test cmap = flex.double(flex.grid(100,2,2)) cmap.fill(10) for i in range(10,40): cmap[i,1,1] = i for i,v in zip(range(40,60), range(40,20,-1)): cmap[i,1,1] = v for i,v in zip(range(60,70), range(20,30)): cmap[i,1,1] = v for i,v in zip(range(70,90), range(30,10,-1)): cmap[i,1,1] = v #for i in range(100): # print "%d : %d" % (i, cmap[i,1,1]) co1 = maptbx.connectivity(map_data=cmap, threshold=25) co2 = maptbx.connectivity(map_data=cmap, threshold=22) co3 = maptbx.connectivity(map_data=cmap, threshold=18) # Example 1. We have one good blob (volume>12) and one bad (volume < 12). # After lowering contour level they are still separate, so we want to keep # only big first blob, which has volume=35 on t2 contour level. # Here is actual call to get a mask. res_mask = co2.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=12, zero_all_interblob_region=True) assert (res_mask!=0).count(True) == 35 # 2 good ===> 2 separate res_mask = co2.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=8) assert (res_mask!=0).count(True) == 50 # 1 good, 1 bad ===> 1 big res_mask = co3.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=12) assert (res_mask!=0).count(True) == 63 # 2 good ===> 1 big res_mask = co3.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=8) assert (res_mask!=0).count(True) == 63 # 2 bad ===> 1 big res_mask = co3.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=30) assert (res_mask!=0).count(True) == 0 # extreme case: nothing above t1 ==> result: everything is 0 on the mask co1 = maptbx.connectivity(map_data=cmap, threshold=40) co2 = maptbx.connectivity(map_data=cmap, threshold=22) res_mask = co2.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=10) assert (res_mask!=0).count(True) == 0 # extreme case: everything above t1 ==> result is undefined. # ================================================================= # same as above, but zero_all_interblob_region = False # In the first test we have 1 good blob and one bad blob. Bad one # will have volume=15 on t2 contouring level so we want to have 385 non-zeros # on resulting mask co1 = maptbx.connectivity(map_data=cmap, threshold=25) co2 = maptbx.connectivity(map_data=cmap, threshold=22) co3 = maptbx.connectivity(map_data=cmap, threshold=18) res_mask = co2.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=12, zero_all_interblob_region=False) #for i in range(100): # print "%d : %d | %d" % (i, cmap[i,1,1], res_mask[i,1,1]) assert (res_mask!=0).count(True) == 385 # 2 good ===> 2 separate res_mask = co2.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=8, zero_all_interblob_region=False) assert (res_mask==1).count(True) == 400 # 1 good, 1 bad ===> 1 big res_mask = co3.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=12, zero_all_interblob_region=False) assert (res_mask!=0).count(True) == 400 # 2 good ===> 1 big res_mask = co3.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=8, zero_all_interblob_region=False) assert (res_mask!=0).count(True) == 400 # 2 bad ===> 1 big res_mask = co3.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=30, zero_all_interblob_region=False) assert (res_mask!=0).count(True) == 337 # extreme case: nothing above t1, something above t2 ==> result: # everything between blobs on t2 will be 1. co1 = maptbx.connectivity(map_data=cmap, threshold=40) co2 = maptbx.connectivity(map_data=cmap, threshold=22) res_mask = co2.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=10, zero_all_interblob_region=False) assert (res_mask!=0).count(True) == 350
def exercise_noise_elimination_two_cutoffs(): # Purpose: eliminate noise. # We want to delete small blobs from the map. On the particular contouring # (cutoff) level we can set a threshold for volume and say: all blobs that # have volume less than threshold value should be deleted. # One more point is that we want to delete them with their 'root', meaning # that we are lowering threshold level and put zeros on that bigger regions. # But we are zeroing only those which are not merged with big good blobs. # Everything under second contouring level also will be zero. # ====================== # From another point of view. # We know some threshold value for volume of good blobs on t1 contouring # level. We want to keep only them and clear out everything else. But the # keeping and clearing should be done at lower t2 contouring level. # # The result (res_mask) is 3d integer array sized as original map. # res_mask contain 0 for noise, 1 for valuable information. # Mask corresponding to t2 contouring level. # # The option "zero_all_interblob_region" by default is True, and this means # that everything below threshold on t2 level will be 0. If # zero_all_interblob_region=False then everything below threshold on t2 # level will be 1. # #map preparation for test cmap = flex.double(flex.grid(100, 2, 2)) cmap.fill(10) for i in range(10, 40): cmap[i, 1, 1] = i for i, v in zip(range(40, 60), range(40, 20, -1)): cmap[i, 1, 1] = v for i, v in zip(range(60, 70), range(20, 30)): cmap[i, 1, 1] = v for i, v in zip(range(70, 90), range(30, 10, -1)): cmap[i, 1, 1] = v #for i in range(100): # print "%d : %d" % (i, cmap[i,1,1]) co1 = maptbx.connectivity(map_data=cmap, threshold=25) co2 = maptbx.connectivity(map_data=cmap, threshold=22) co3 = maptbx.connectivity(map_data=cmap, threshold=18) # Example 1. We have one good blob (volume>12) and one bad (volume < 12). # After lowering contour level they are still separate, so we want to keep # only big first blob, which has volume=35 on t2 contour level. # Here is actual call to get a mask. res_mask = co2.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=12, zero_all_interblob_region=True) assert (res_mask != 0).count(True) == 35 # 2 good ===> 2 separate res_mask = co2.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=8) assert (res_mask != 0).count(True) == 50 # 1 good, 1 bad ===> 1 big res_mask = co3.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=12) assert (res_mask != 0).count(True) == 63 # 2 good ===> 1 big res_mask = co3.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=8) assert (res_mask != 0).count(True) == 63 # 2 bad ===> 1 big res_mask = co3.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=30) assert (res_mask != 0).count(True) == 0 # extreme case: nothing above t1 ==> result: everything is 0 on the mask co1 = maptbx.connectivity(map_data=cmap, threshold=40) co2 = maptbx.connectivity(map_data=cmap, threshold=22) res_mask = co2.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=10) assert (res_mask != 0).count(True) == 0 # extreme case: everything above t1 ==> result is undefined. # ================================================================= # same as above, but zero_all_interblob_region = False # In the first test we have 1 good blob and one bad blob. Bad one # will have volume=15 on t2 contouring level so we want to have 385 non-zeros # on resulting mask co1 = maptbx.connectivity(map_data=cmap, threshold=25) co2 = maptbx.connectivity(map_data=cmap, threshold=22) co3 = maptbx.connectivity(map_data=cmap, threshold=18) res_mask = co2.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=12, zero_all_interblob_region=False) #for i in range(100): # print "%d : %d | %d" % (i, cmap[i,1,1], res_mask[i,1,1]) assert (res_mask != 0).count(True) == 385 # 2 good ===> 2 separate res_mask = co2.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=8, zero_all_interblob_region=False) assert (res_mask == 1).count(True) == 400 # 1 good, 1 bad ===> 1 big res_mask = co3.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=12, zero_all_interblob_region=False) assert (res_mask != 0).count(True) == 400 # 2 good ===> 1 big res_mask = co3.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=8, zero_all_interblob_region=False) assert (res_mask != 0).count(True) == 400 # 2 bad ===> 1 big res_mask = co3.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=30, zero_all_interblob_region=False) assert (res_mask != 0).count(True) == 337 # extreme case: nothing above t1, something above t2 ==> result: # everything between blobs on t2 will be 1. co1 = maptbx.connectivity(map_data=cmap, threshold=40) co2 = maptbx.connectivity(map_data=cmap, threshold=22) res_mask = co2.noise_elimination_two_cutoffs( connectivity_object_at_t1=co1, elimination_volume_threshold_at_t1=10, zero_all_interblob_region=False) assert (res_mask != 0).count(True) == 350
def __init__(self, fmodel, log=None): # Commonly used objects xrs = fmodel.xray_structure sgt = xrs.space_group().type() # Compute default fmodel and decide on grid step fmodel, self.grid_step_factor = get_fmodel_and_grid_step( f_obs=fmodel.f_obs(), r_free_flags=fmodel.r_free_flags(), xrs=xrs) #fmodel.show() #print fmodel.r_work(), fmodel.r_free() ### mask_data_p1, n_real, crystal_gridding = get_mask_1( fmodel=fmodel, grid_step_factor=self.grid_step_factor) #ccp4_map(cg=crystal_gridding, file_name="m1.ccp4", map_data=mask_data_p1_) #xxx1 = fmodel.f_obs().structure_factors_from_map(map=mask_data_p1, # use_scale = True, anomalous_flag = False, use_sg = False) #mask_data_p1, n_real, crystal_gridding = get_mask_2(fmodel=fmodel, # grid_step_factor=self.grid_step_factor) #print n_real #STOP() #xxx2 = fmodel.f_obs().structure_factors_from_map(map=mask_data_p1, # use_scale = True, anomalous_flag = False, use_sg = False) # #assert approx_equal(xxx1.data(), xxx2.data()) #print mask_data_p1.all(), mask_data_p1.focus(), mask_data_p1.origin() #print mask_data_p1_2.all(), mask_data_p1_2.focus(), mask_data_p1_2.origin() #print mask_data_p1_1.count(0), mask_data_p1_2.count(0) #assert approx_equal(mask_data_p1_1, mask_data_p1_2) #STOP() ##### # Mask connectivity analysis co = maptbx.connectivity(map_data=mask_data_p1, threshold=0.01) conn = co.result().as_double() # Convert result of connectivity analysis from P1 to ASU (in-place) conn = asu_map_ext.asymmetric_map(sgt, conn).data() # Find unique indices and regions in reduced (P1->ASU) conn region_indices = flex.double() region_volumes = flex.double() for i in conn: if not i in region_indices: region_indices.append(i) for l in region_indices: szl = conn.count(l) * 100. / conn.size() region_volumes.append(szl) s = flex.sort_permutation(region_volumes, reverse=True) region_volumes = region_volumes.select(s) region_indices = region_indices.select(s) # Convert P1 mask into ASU mask_data_asu = asu_map_ext.asymmetric_map(sgt, mask_data_p1).data() conn.reshape(mask_data_asu.accessor()) #XXX still need it? f_masks = [] all_zero_found = False if (log is not None): print("Number of regions:", len(region_indices), file=log) mi, ma, me, diff_map_asu = None, None, None, None for ii, i in enumerate(region_indices): s = conn == i si = s.iselection() if (not all_zero_found and mask_data_asu.select(si).count(0.) > 0): all_zero_found = True continue # DIFF MAP START if (region_volumes[ii] < 1 and diff_map_asu is None): #(ii == 2): fmodel_tmp = mmtbx.f_model.manager( f_obs=fmodel.f_obs(), r_free_flags=fmodel.r_free_flags(), f_calc=fmodel.f_calc(), f_mask=f_masks[len(f_masks) - 1]) fmodel_tmp.update_all_scales(remove_outliers=False, update_f_part1=False) diff_map_p1 = compute_map(fmodel=fmodel_tmp, crystal_gridding=crystal_gridding, map_type="mFo-DFc") diff_map_asu = asu_map_ext.asymmetric_map(sgt, diff_map_p1).data() if (diff_map_asu is not None): mi, ma, me = diff_map_asu.select(si).min_max_mean().as_tuple() if (ma < 0. or me < 0.): continue # DIFF MAP END #XXX this is 4 loops, may be slow. move to C++ if slow. mask_data_asu_i = mask_data_asu.deep_copy() #mask_data_asu_i = mask_data_asu_i.set_selected(s, 1).set_selected(~s, 0) mask_data_asu_i = mask_data_asu_i.set_selected(~s, 0) #if(mi is None): # print "region: %5d fraction: %8.4f"%(ii, region_volumes[ii]), len(region_volumes) #else: # print "region: %5d fraction: %8.4f"%(ii, region_volumes[ii]), len(region_volumes), "%7.3f %7.3f %7.3f"%(mi,ma,me) if (log is not None): print("region: %5d fraction: %8.4f" % (ii, region_volumes[ii]), file=log) log.flush() f_mask_i = fmodel.f_obs().structure_factors_from_asu_map( asu_map_data=mask_data_asu_i, n_real=n_real) if (len(f_masks) > 0 and region_volumes[ii] > 1): f_masks[len(f_masks) - 1] = f_masks[len(f_masks) - 1].array( data=f_masks[len(f_masks) - 1].data() + f_mask_i.data()) else: f_masks.append(f_mask_i) # self.fmodel_result, self.method = helper_3(fmodel=fmodel, f_masks=f_masks, log=log) #self.fmodel_result.show() # self.n_regions = len(region_volumes[1:]) self.region_volumes = " ".join( ["%8.4f" % (v) for v in region_volumes[1:][:10]]) # top 10
def remove_model_density(map_data, xrs, rad_inside=2): # map_data = map_data - flex.mean(map_data) map_data = map_data.set_selected(map_data < 0, 0) sd = map_data.sample_standard_deviation() assert sd != 0 map_data = map_data / sd # map_at_atoms = flex.double() for site_frac in xrs.sites_frac(): mv = map_data.tricubic_interpolation(site_frac) map_at_atoms.append( mv ) print (flex.mean(map_at_atoms), flex.max(map_at_atoms)) mmax = flex.max(map_at_atoms) cut = 0 print (dir(map_data)) while cut<mmax: map_data_ = map_data.deep_copy() map_data_ = map_data_.set_selected(map_data<cut, 0) map_data_ = map_data_.set_selected(map_data>=cut, 1) cut+=1 zz = flex.double() for site_frac in xrs.sites_frac(): mv = map_data_.value_at_closest_grid_point(site_frac) zz.append( mv ) print(cut, (zz==1).count(True)/zz.size()*100. ) # #radii = flex.double(xrs.sites_frac().size(), rad_inside) #mask = cctbx_maptbx_ext.mask( # sites_frac = xrs.sites_frac(), # unit_cell = xrs.unit_cell(), # n_real = map_data.all(), # mask_value_inside_molecule = 0, # mask_value_outside_molecule = 1, # radii = radii) mask = mmtbx.masks.mask_from_xray_structure( xray_structure = xrs, p1 = True, for_structure_factors = True, solvent_radius = None, shrink_truncation_radius = None, n_real = map_data.accessor().all(), in_asu = False).mask_data maptbx.unpad_in_place(map=mask) map_data = map_data * mask map_data = map_data.set_selected(map_data < flex.mean(map_at_atoms)/6, 0) # n = map_data.accessor().all() abc = xrs.unit_cell().parameters()[:3] print(abc[0]/n[0], abc[1]/n[1], abc[2]/n[2]) step = abc[0]/n[0] co = maptbx.connectivity( map_data = map_data.deep_copy(), threshold = 0.0, preprocess_against_shallow = True, wrapping = False) conn = co.result().as_double() z = zip(co.regions(),range(0,co.regions().size())) sorted_by_volume = sorted(z, key=lambda x: x[0], reverse=True) mask_ = flex.double(flex.grid(n), 0) for i_seq, p in enumerate(sorted_by_volume): v, i = p if i_seq==0: continue volume = v*step**3 print(v, volume) if 1:#(volume<3): sel = conn==i mask_ = mask_.set_selected(sel, 1) # return map_data*mask_
def prepare_maps(fofc, two_fofc, fem, fofc_cutoff=2, two_fofc_cutoff=0.5, fem_cutoff=0.5, connectivity_cutoff=0.5, local_average=True): """ - This takes 3 maps: mFo-DFc, 2mFo-DFc and FEM and combines them into one map that is most suitable for real-space refinement. - Maps are the boxes extracted around region of interest from the whole unit cell map. - All maps are expected to be normalized by standard deviation (sigma-scaled) BEFORE extracting the box. There is no way to assert it at this point. - Map gridding equivalence is asserted. """ m1, m2, m3 = fofc, two_fofc, fem # assert identical gridding for m_ in [m1, m2, m3]: for m__ in [m1, m2, m3]: assert m_.all() == m__.all() assert m_.focus() == m__.focus() assert m_.origin() == m__.origin() # binarize residual map sel = m1 <= fofc_cutoff mask = m1.set_selected(sel, 0) mask = mask.set_selected(~sel, 1) del sel, m1 assert approx_equal([flex.max(mask), flex.min(mask)], [1, 0]) def truncate_and_filter(m, cutoff, mask): return m.set_selected(m <= cutoff, 0) * mask # truncate and filter 2mFo-DFc map m2 = truncate_and_filter(m2, two_fofc_cutoff, mask) # truncate and filter FEM m3 = truncate_and_filter(m3, fem_cutoff, mask) del mask # combined maps def scale(m): sd = m.sample_standard_deviation() if (sd != 0): return m / sd else: return m m2 = scale(m2) m3 = scale(m3) m = (m2 + m3) / 2. del m2, m3 m = scale(m) # connectivity analysis co = maptbx.connectivity(map_data=m, threshold=connectivity_cutoff) v_max = -1.e+9 i_max = None for i, v in enumerate(co.regions()): if (i > 0): if (v > v_max): v_max = v i_max = i mask2 = co.result() selection = mask2 == i_max mask2 = mask2.set_selected(selection, 1) mask2 = mask2.set_selected(~selection, 0) assert mask2.count(1) == v_max # final filter m = m * mask2.as_double() if (local_average): maptbx.map_box_average(map_data=m, cutoff=0.5, index_span=1) return m
def exercise_work_in_asu(): pdb_str = """ CRYST1 10.000 10.000 10.000 90.00 90.00 90.00 P 4 HETATM 1 C C 1 2.000 2.000 2.000 1.00 20.00 C HETATM 2 C C 2 4.000 4.000 4.000 1.00 20.00 C END """ from time import time pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_str) xrs = pdb_inp.xray_structure_simple() # xrs.show_summary() d_min = 1 fc = xrs.structure_factors(d_min=d_min).f_calc() symmetry_flags = maptbx.use_space_group_symmetry fftmap = fc.fft_map(symmetry_flags=symmetry_flags) # rmup = fftmap.real_map_unpadded() rm = fftmap.real_map().deep_copy() maptbx.unpad_in_place(rm) mmm = rm.as_1d().min_max_mean() print(mmm.min, mmm.max, mmm.mean) # rmup = fftmap.real_map_unpadded() # print (dir(rm)) print("full size:", fftmap.real_map().accessor().focus()) print(rm[0, 0, 0]) # print (type(rm)) # print (dir(rm)) # STOP() # print(rmup[0,0,0]) amap0 = asymmetric_map(xrs.space_group().type(), rm) # print(dir(amap0)) mmm = amap0.data().as_1d().min_max_mean() print(mmm.min, mmm.max, mmm.mean) amap_data = amap0.data() write_ccp4_map('amap.ccp4', xrs.unit_cell(), xrs.space_group(), amap_data) write_ccp4_map('rm.ccp4', xrs.unit_cell(), xrs.space_group(), rm) # for i in range(50): # print(i, amap_data[i,0,0]) exp_map = amap0.symmetry_expanded_map() print(exp_map[0, 0, 0]) # for i in range(32): # for j in range(32): # for k in range(32): # assert approx_equal(rm[i,j,k], exp_map[i,j,k]) # print(dir(amap0)) # STOP() # This produces 2 separate blobs sg = xrs.space_group() print(dir(sg)) print(sg.all_ops()) print(sg.info()) print("amap0 size:", amap0.data().accessor().focus()) # STOP() print(type(amap0.data())) threshold = 0. preprocess_against_shallow = True print('threshold:', threshold) print('preprocess_against_shallow', preprocess_against_shallow) t0 = time() co_amap = maptbx.connectivity( map_data=amap0.data(), # threshold=threshold, # space_group=xrs.space_group(), # uc_dimensions=exp_map.accessor().focus(), # wrapping=False, preprocess_against_shallow=preprocess_against_shallow) t1 = time() print('amap time:', t1 - t0) original_regions = list(co_amap.regions()) print('start regions:', original_regions) print('max coords', list(co_amap.maximum_coors())) print('max vals', list(co_amap.maximum_values())) # print(dir(exp_map)) print(type(exp_map)) print("exp_map size:", exp_map.accessor().focus()) t0 = time() co_full = maptbx.connectivity( map_data=rm, threshold=threshold, wrapping=False, preprocess_against_shallow=preprocess_against_shallow) t1 = time() print('full time:', t1 - t0) original_regions = list(co_full.regions()) print('start regions:', original_regions) print('max coords', list(co_full.maximum_coors())) print('max vals', list(co_full.maximum_values())) # STOP() # co.experiment_with_symmetry( # space_group=xrs.space_group(), # uc_dims=exp_map.accessor().focus()) co_full.merge_symmetry_related_regions(space_group=xrs.space_group(), uc_dims=exp_map.accessor().focus()) new_regions = list(co_full.regions()) print('new regions:', new_regions) print('max coords', list(co_full.maximum_coors())) print('max vals', list(co_full.maximum_values()))
def __init__(self, xray_structure, step, volume_cutoff, f_obs, f_calc=None, log=sys.stdout, write_masks=False): adopt_init_args(self, locals()) # self.dsel = f_obs.d_spacings().data() >= 0 self.miller_array = f_obs.select(self.dsel) # self.crystal_symmetry = self.xray_structure.crystal_symmetry() # compute mask in p1 (via ASU) self.crystal_gridding = maptbx.crystal_gridding( unit_cell=xray_structure.unit_cell(), space_group_info=xray_structure.space_group_info(), symmetry_flags=maptbx.use_space_group_symmetry, step=step) self.n_real = self.crystal_gridding.n_real() # XXX Where do we want to deal with H and occ==0? mask_p1 = mmtbx.masks.mask_from_xray_structure( xray_structure=xray_structure, p1=True, for_structure_factors=True, n_real=self.n_real, in_asu=False).mask_data maptbx.unpad_in_place(map=mask_p1) self.solvent_content = 100. * mask_p1.count(1) / mask_p1.size() if (write_masks): write_map_file(crystal_symmetry=xray_structure.crystal_symmetry(), map_data=mask_p1, file_name="mask_whole.mrc") # conn analysis co = maptbx.connectivity(map_data=mask_p1, threshold=0.01, preprocess_against_shallow=True, wrapping=True) co.merge_symmetry_related_regions( space_group=xray_structure.space_group()) del mask_p1 self.conn = co.result().as_double() z = zip(co.regions(), range(0, co.regions().size())) sorted_by_volume = sorted(z, key=lambda x: x[0], reverse=True) f_mask_data_0 = flex.complex_double(f_obs.data().size(), 0) FM = OrderedDict() self.FV = OrderedDict() self.mc = None diff_map = None mean_diff_map = None self.regions = OrderedDict() print( " volume_p1 uc(%) volume_asu id mFo-DFc: min,max,mean,sd", file=log) # Check if self.anomaly self.anomaly = False if (len(sorted_by_volume) > 2): uc_fractions = [ round(p[0] * 100. / self.conn.size(), 0) for p in sorted_by_volume[1:] ] if (uc_fractions[0] / 4 < uc_fractions[1]): self.anomaly = True # for i_seq, p in enumerate(sorted_by_volume): v, i = p # skip macromolecule if (i == 0): continue # skip small volume volume = v * step**3 uc_fraction = v * 100. / self.conn.size() if (volume_cutoff is not None): if volume < volume_cutoff: continue selection = self.conn == i mask_i_asu = self.compute_i_mask_asu(selection=selection, volume=volume) volume_asu = (mask_i_asu > 0).count(True) * step**3 if (i_seq == 1 or uc_fraction > 5): f_mask_i = self.compute_f_mask_i(mask_i_asu) if (not self.anomaly): f_mask_data_0 += f_mask_i.data() if (uc_fraction < 5 and diff_map is None and not self.anomaly): diff_map = self.compute_diff_map(f_mask_data=f_mask_data_0) mi, ma, me, sd = None, None, None, None if (diff_map is not None): blob = diff_map.select(selection.iselection()) mean_diff_map = flex.mean( diff_map.select(selection.iselection())) mi, ma, me = flex.min(blob), flex.max(blob), flex.mean(blob) sd = blob.sample_standard_deviation() print("%12.3f" % volume, "%8.4f" % round(uc_fraction, 4), "%12.3f" % volume_asu, "%3d" % i, "%7s" % str(None) if diff_map is None else "%7.3f %7.3f %7.3f %7.3f" % (mi, ma, me, sd), file=log) if (uc_fraction < 1 and mean_diff_map is not None and mean_diff_map <= 0): continue self.regions[i_seq] = group_args(id=i, i_seq=i_seq, volume=volume, uc_fraction=uc_fraction, diff_map=group_args(mi=mi, ma=ma, me=me, sd=sd)) if (not (i_seq == 1 or uc_fraction > 5)): f_mask_i = self.compute_f_mask_i(mask_i_asu) FM.setdefault(round(volume, 3), []).append(f_mask_i.data()) self.FV[f_mask_i] = [round(volume, 3), round(uc_fraction, 1)] # f_mask_0 = f_obs.customized_copy(data=f_mask_data_0) # self.f_mask_0 = None if (not self.anomaly): self.f_mask_0 = f_obs.customized_copy(data=f_mask_data_0) self.do_mosaic = False if (len(self.FV.keys()) > 1): self.do_mosaic = True
def __init__(self, xray_structure, step, volume_cutoff=None, mean_diff_map_threshold=None, compute_whole=False, largest_only=False, wrapping=True, f_obs=None, r_sol=1.1, r_shrink=0.9, f_calc=None, log=None, write_masks=False): adopt_init_args(self, locals()) # self.d_spacings = f_obs.d_spacings().data() self.sel_3inf = self.d_spacings >= 3 self.miller_array = f_obs.select(self.sel_3inf) # self.crystal_symmetry = self.xray_structure.crystal_symmetry() # compute mask in p1 (via ASU) self.crystal_gridding = maptbx.crystal_gridding( unit_cell=xray_structure.unit_cell(), space_group_info=xray_structure.space_group_info(), symmetry_flags=maptbx.use_space_group_symmetry, step=step) self.n_real = self.crystal_gridding.n_real() # XXX Where do we want to deal with H and occ==0? mask_p1 = mmtbx.masks.mask_from_xray_structure( xray_structure=xray_structure, p1=True, for_structure_factors=True, solvent_radius=r_sol, shrink_truncation_radius=r_shrink, n_real=self.n_real, in_asu=False).mask_data maptbx.unpad_in_place(map=mask_p1) self.f_mask_whole = None if (compute_whole): mask = asu_map_ext.asymmetric_map( xray_structure.crystal_symmetry().space_group().type(), mask_p1).data() self.f_mask_whole = self._inflate( self.miller_array.structure_factors_from_asu_map( asu_map_data=mask, n_real=self.n_real)) self.solvent_content = 100. * mask_p1.count(1) / mask_p1.size() if (write_masks): write_map_file(crystal_symmetry=xray_structure.crystal_symmetry(), map_data=mask_p1, file_name="mask_whole.mrc") # conn analysis co = maptbx.connectivity(map_data=mask_p1, threshold=0.01, preprocess_against_shallow=False, wrapping=wrapping) co.merge_symmetry_related_regions( space_group=xray_structure.space_group()) del mask_p1 self.conn = co.result().as_double() z = zip(co.regions(), range(0, co.regions().size())) sorted_by_volume = sorted(z, key=lambda x: x[0], reverse=True) # f_mask_data_0 = flex.complex_double(f_obs.data().size(), 0) f_mask_data = flex.complex_double(f_obs.data().size(), 0) self.FV = OrderedDict() self.mc = None diff_map = None mean_diff_map = None self.regions = OrderedDict() self.f_mask_0 = None self.f_mask = None # if (log is not None): print(" # volume_p1 uc(%) mFo-DFc: min,max,mean,sd", file=log) # for i_seq, p in enumerate(sorted_by_volume): v, i = p # skip macromolecule if (i == 0): continue # skip small volume volume = v * step**3 uc_fraction = v * 100. / self.conn.size() if (volume_cutoff is not None): if volume < volume_cutoff: continue self.regions[i_seq] = group_args(id=i, i_seq=i_seq, volume=volume, uc_fraction=uc_fraction) selection = self.conn == i mask_i_asu = self.compute_i_mask_asu(selection=selection, volume=volume) volume_asu = (mask_i_asu > 0).count(True) * step**3 if (uc_fraction >= 1): f_mask_i = self.compute_f_mask_i(mask_i_asu) f_mask_data_0 += f_mask_i.data() elif (largest_only): break if (uc_fraction < 1 and diff_map is None): diff_map = self.compute_diff_map(f_mask_data=f_mask_data_0) mi, ma, me, sd = None, None, None, None if (diff_map is not None): blob = diff_map.select(selection.iselection()) mean_diff_map = flex.mean( diff_map.select(selection.iselection())) mi, ma, me = flex.min(blob), flex.max(blob), flex.mean(blob) sd = blob.sample_standard_deviation() if (log is not None): print("%3d" % i_seq, "%12.3f" % volume, "%8.4f" % round(uc_fraction, 4), "%7s" % str(None) if diff_map is None else "%7.3f %7.3f %7.3f %7.3f" % (mi, ma, me, sd), file=log) if (mean_diff_map_threshold is not None and mean_diff_map is not None and mean_diff_map <= mean_diff_map_threshold): continue f_mask_i = self.compute_f_mask_i(mask_i_asu) f_mask_data += f_mask_i.data() self.FV[f_mask_i] = [round(volume, 3), round(uc_fraction, 1)] # self.f_mask_0 = f_obs.customized_copy(data=f_mask_data_0) self.f_mask = f_obs.customized_copy(data=f_mask_data) self.do_mosaic = False # Determine number of secondary regions self.n_regions = len(self.FV.values()) if (self.n_regions > 1): self.do_mosaic = True
def exercise_symmetry_related_regions(): pdb_str = """ CRYST1 10.000 10.000 10.000 90.00 90.00 90.00 P 4 HETATM 1 C C 1 2.000 2.000 2.000 1.00 20.00 C HETATM 2 C C 2 4.000 4.000 4.000 1.00 20.00 C END """ pdb_inp = iotbx.pdb.input(source_info=None, lines=pdb_str) xrs = pdb_inp.xray_structure_simple() # xrs.show_summary() d_min = 1. fc = xrs.structure_factors(d_min=d_min).f_calc() symmetry_flags = maptbx.use_space_group_symmetry fftmap = fc.fft_map(symmetry_flags=symmetry_flags) rmup = fftmap.real_map_unpadded() # print ('rmup size', rmup.accessor().focus()) # This produces 4 separate blobs co = maptbx.connectivity(map_data=rmup, threshold=400, wrapping=False, preprocess_against_shallow=False) original_regions = list(co.regions()) # print ('regions', original_regions) assert len(original_regions) == 5 beg_mask = co.result() # dv_mask = co.volume_cutoff_mask(0).as_double() ??? # write_ccp4_map('volume_mask_1000.ccp4', fc.unit_cell(), fc.space_group(), dv_mask) co.merge_symmetry_related_regions(space_group=xrs.space_group()) new_mask = co.result() assert beg_mask.count(0) == new_mask.count(0) assert beg_mask.count(1) + beg_mask.count(3) == new_mask.count(1) assert beg_mask.count(2) + beg_mask.count(4) == new_mask.count(2) assert sum(original_regions[1:]) == sum(original_regions[1:]) new_regions = list(co.regions()) assert len(new_regions) == 3 assert list(co.maximum_values()) == [] assert list(co.maximum_coors()) == [] # ====================================================================== # At this threshold 2 carbons merge. But one of the blob is cutted, # therefore producing 3 separate regions in unit cell co = maptbx.connectivity( map_data=rmup, # threshold=1000, threshold=1.1, wrapping=False, preprocess_against_shallow=True) original_regions = list(co.regions()) assert len(original_regions) == 4 # print ('regions', original_regions) beg_mask = co.result() # Particular numbers here seem to be platform-dependent # These should work on Mac # assert beg_mask.count(0) == 29019 # assert beg_mask.count(1) == 1885 # assert beg_mask.count(2) == 1714 # assert beg_mask.count(3) == 150 # assert original_regions == [29019, 1885, 1714, 150] co.merge_symmetry_related_regions(space_group=xrs.space_group()) new_mask = co.result() # assert new_mask.count(0) == 29019 # assert new_mask.count(1) == 3749 assert beg_mask.count(0) == new_mask.count(0) assert beg_mask.count(1) + beg_mask.count(2) + beg_mask.count( 3) == new_mask.count(1) new_regions = list(co.regions()) assert len(new_regions) == 2 # print('new regs', new_regions) # assert new_regions == [29019, 3749] assert list(co.maximum_values()) == [] assert list(co.maximum_coors()) == []
def exercise_preprocess_against_shallow(): # case 1: simple cmap = flex.double(flex.grid(30, 30, 30)) cmap.fill(1) for i in range(10, 20): for j in range(10, 20): for k in range(10, 20): cmap[i, j, k] = 10 for i in range(10, 20): cmap[i, 5, 5] = 10 co = maptbx.connectivity(map_data=cmap, threshold=5) minb, maxb = co.get_blobs_boundaries_tuples() assert minb == [(0, 0, 0), (10, 5, 5), (10, 10, 10)] assert maxb == [(29, 29, 29), (19, 5, 5), (19, 19, 19)] co = maptbx.connectivity(map_data=cmap, threshold=5, preprocess_against_shallow=True) minb, maxb = co.get_blobs_boundaries_tuples() assert minb == [(0, 0, 0), (10, 10, 10)] assert maxb == [(29, 29, 29), (19, 19, 19)] # note dissapearance of (10,5,5)(19,5,5) # check new map values for i in range(10, 20): assert approx_equal(cmap[i, 5, 5], 4) # case 2: wrapping cmap = flex.double(flex.grid(30, 30, 30)) cmap.fill(1) for i in range(10, 20): for j in range(10, 20): for k in range(10, 20): cmap[i, j, k] = 10 for i in range(10, 20): for j in range(10, 20): cmap[i, j, 0] = 10 cmap[i, j, 29] = 10 # standard, no wrap, 4 regions co = maptbx.connectivity(map_data=cmap, threshold=5, wrapping=False, preprocess_against_shallow=False) minb, maxb = co.get_blobs_boundaries_tuples() assert minb == [(0, 0, 0), (10, 10, 0), (10, 10, 10), (10, 10, 29)] assert maxb == [(29, 29, 29), (19, 19, 0), (19, 19, 19), (19, 19, 29)] # 2 small regions merged co = maptbx.connectivity(map_data=cmap, threshold=5, wrapping=True, preprocess_against_shallow=False) minb, maxb = co.get_blobs_boundaries_tuples() assert minb == [(0, 0, 0), (10, 10, 0), (10, 10, 10)] assert maxb == [(29, 29, 29), (19, 19, 29), (19, 19, 19)] # with wrapping the region preserved co = maptbx.connectivity(map_data=cmap, threshold=5, wrapping=True, preprocess_against_shallow=True) minb, maxb = co.get_blobs_boundaries_tuples() assert minb == [(0, 0, 0), (10, 10, 0), (10, 10, 10)] assert maxb == [(29, 29, 29), (19, 19, 29), (19, 19, 19)] # without wrapping - no co = maptbx.connectivity(map_data=cmap, threshold=5, wrapping=False, preprocess_against_shallow=True) minb, maxb = co.get_blobs_boundaries_tuples() assert minb == [(0, 0, 0), (10, 10, 10)] assert maxb == [(29, 29, 29), (19, 19, 19)] # case 3: blob has a spike that needs to be 'shaved off' cmap = flex.double(flex.grid(30, 30, 30)) cmap.fill(1) for i in range(10, 20): for j in range(10, 20): for k in range(10, 20): cmap[i, j, k] = 10 for i in range(0, 10): cmap[i, 15, 15] = 10 co = maptbx.connectivity(map_data=cmap, threshold=5, preprocess_against_shallow=False) volumes = list(co.regions()) assert volumes == [25990, 1010] co = maptbx.connectivity(map_data=cmap, threshold=5, preprocess_against_shallow=True) volumes = list(co.regions()) assert volumes == [26000, 1000] for i in range(0, 10): assert approx_equal(cmap[i, 15, 15], 4) # case 4: need at least two passes cmap = flex.double(flex.grid(30, 30, 30)) cmap.fill(1) cmap[10, 10, 10] = 10 cmap[9, 10, 10] = 10 cmap[11, 10, 10] = 10 cmap[10, 9, 10] = 10 cmap[10, 11, 10] = 10 cmap[10, 10, 9] = 10 cmap[10, 10, 11] = 10 co = maptbx.connectivity(map_data=cmap, threshold=5, preprocess_against_shallow=False) volumes = list(co.regions()) assert volumes == [26993, 7] co = maptbx.connectivity(map_data=cmap, threshold=5, preprocess_against_shallow=True) volumes = list(co.regions()) assert volumes == [27000] assert co.preprocessing_changed_voxels == 7 assert co.preprocessing_n_passes == 3
def run_group(symbol, preprocess_against_shallow): group = space_group_info(symbol) print("\n== space group %d"%symbol) xrs = random_structure.xray_structure( space_group_info = group, volume_per_atom = 15., general_positions_only = False, elements = ('C', 'N', 'O', 'H')*31, min_distance = 1.0) sgt = xrs.space_group().type() # cg = maptbx.crystal_gridding( unit_cell = xrs.unit_cell(), space_group_info = xrs.space_group_info(), symmetry_flags = maptbx.use_space_group_symmetry, step = 0.4) n_real = cg.n_real() mask_p1 = mmtbx.masks.mask_from_xray_structure( xray_structure = xrs, p1 = True, for_structure_factors = True, n_real = n_real, in_asu = False).mask_data maptbx.unpad_in_place(map=mask_p1) assert flex.min(mask_p1)==0 assert flex.max(mask_p1)==1 # co = maptbx.connectivity( map_data = mask_p1, threshold = 0.01, preprocess_against_shallow = preprocess_against_shallow, wrapping = True) # print("Regions in P1") regions_p1 = list(co.regions()) s1 = flex.sum(flex.int(regions_p1)) print(regions_p1, s1) conn_map_p1 = co.result().as_double() print(flex.min(conn_map_p1), flex.max(conn_map_p1)) # print("Merge symmetry related") co.merge_symmetry_related_regions(space_group = xrs.space_group()) conn_map_p1_merged = co.result().as_double() regions_p1_merged = list(co.regions()) s2 = flex.sum(flex.int(regions_p1_merged)) print(list(regions_p1_merged), s2) amap = asu_map_ext.asymmetric_map(sgt, conn_map_p1_merged) conn_map_asu = amap.data() conn_map_p1_restored = amap.symmetry_expanded_map() print(flex.min(conn_map_asu), flex.max(conn_map_asu)) # mask_p1_1 = conn_map_p1_restored.set_selected(conn_map_p1_restored>0.01, 1) maptbx.unpad_in_place(map=mask_p1_1) co = maptbx.connectivity( map_data = mask_p1_1, threshold = 0.01, preprocess_against_shallow = preprocess_against_shallow, wrapping = True) print("Restored") regions_p1_restored = list(co.regions()) s3 = flex.sum(flex.int(regions_p1_restored)) print(regions_p1_restored, s3) conn_map_p1_restored = co.result().as_double() print(flex.min(conn_map_p1_restored), flex.max(conn_map_p1_restored)) assert regions_p1 == regions_p1_restored # assert s1 == s2 assert s2 == s3
def __init__(self, miller_array, xray_structure, step, volume_cutoff, f_obs=None, r_free_flags=None, f_calc=None, write_masks=False): adopt_init_args(self, locals()) assert [f_obs, f_calc, r_free_flags].count(None) in [0, 3] self.crystal_symmetry = self.xray_structure.crystal_symmetry() # compute mask in p1 (via ASU) self.crystal_gridding = maptbx.crystal_gridding( unit_cell=xray_structure.unit_cell(), space_group_info=xray_structure.space_group_info(), symmetry_flags=maptbx.use_space_group_symmetry, step=step) self.n_real = self.crystal_gridding.n_real() mask_p1 = mmtbx.masks.mask_from_xray_structure( xray_structure=xray_structure, p1=True, for_structure_factors=True, n_real=self.n_real, in_asu=False).mask_data maptbx.unpad_in_place(map=mask_p1) solvent_content = 100. * mask_p1.count(1) / mask_p1.size() if (write_masks): write_map_file(crystal_symmetry=xray_structure.crystal_symmetry(), map_data=mask_p1, file_name="mask_whole.mrc") # conn analysis co = maptbx.connectivity(map_data=mask_p1, threshold=0.01, preprocess_against_shallow=True, wrapping=True) del mask_p1 self.conn = co.result().as_double() z = zip(co.regions(), range(0, co.regions().size())) sorted_by_volume = sorted(z, key=lambda x: x[0], reverse=True) f_mask_data = flex.complex_double(miller_array.data().size(), 0) f_mask_data_0 = flex.complex_double(miller_array.data().size(), 0) #f_masks = [] FM = OrderedDict() diff_map = None mean_diff_map = None print(" volume_p1 uc(%) volume_asu id <mFo-DFc>") for p in sorted_by_volume: v, i = p volume = v * step**3 uc_fraction = v * 100. / self.conn.size() if (volume_cutoff is not None): if volume < volume_cutoff: continue if (i == 0): continue selection = self.conn == i mask_i_asu = self.compute_i_mask_asu(selection=selection, volume=volume) volume_asu = (mask_i_asu > 0).count(True) * step**3 if (volume_asu < 1.e-6): continue if (i == 1 or uc_fraction > 5): f_mask_i = miller_array.structure_factors_from_asu_map( asu_map_data=mask_i_asu, n_real=self.n_real) f_mask_data_0 += f_mask_i.data() f_mask_data += f_mask_i.data() if (uc_fraction < 5 and diff_map is None): diff_map = self.compute_diff_map(f_mask_data=f_mask_data_0) if (diff_map is not None): mean_diff_map = flex.mean( diff_map.select(selection.iselection())) print( "%12.3f" % volume, "%8.4f" % round(uc_fraction, 4), "%12.3f" % volume_asu, "%3d" % i, "%7s" % str(None) if diff_map is None else "%7.3f" % mean_diff_map) #if(mean_diff_map is not None and mean_diff_map<=0): continue if (not (i == 1 or uc_fraction > 5)): f_mask_i = miller_array.structure_factors_from_asu_map( asu_map_data=mask_i_asu, n_real=self.n_real) f_mask_data += f_mask_i.data() FM.setdefault(round(volume, 3), []).append(f_mask_i.data()) # group asu pices corresponding to the same region in P1 F_MASKS = [] for k, v in zip(FM.keys(), FM.values()): tmp = flex.complex_double(miller_array.data().size(), 0) for v_ in v: tmp += v_ F_MASKS.append(miller_array.customized_copy(data=tmp)) # f_mask = miller_array.customized_copy(data=f_mask_data) # self.f_mask = f_mask self.f_masks = F_MASKS