def print_completeness_vs_dose(self): '''Print the completeness vs. dose for each input reflection file. This will need to read the MTZ file, get the dataset cell constants and space group, compute the expected number of reflections, then as a function of dose compute what fraction of these are measured. For native data this is going to simply write out: dose, fraction but for MAD data it will need to print out: dose, fractionI+, fractionI-, fractionI, fractionI+andI- would be nice to have this described a little tidier.''' for crystal_name, dataset_name in sorted(self._reflections): reflections = self._reflections[(crystal_name, dataset_name)] uc = self._unit_cells[(crystal_name, dataset_name)] sg = self._space_groups[(crystal_name, dataset_name)] nref = len(compute_unique_reflections(uc, sg, self._anomalous, self._resolution_high, self._resolution_low)) nref_n = len(compute_unique_reflections(uc, sg, False, self._resolution_high, self._resolution_low)) print 'Cumulative completeness analysis for %s/%s' % \ (crystal_name, dataset_name) print 'Expecting %d reflections' % nref # Right, in here I need to get the lowest dose for a given # h, k, l and then add this reflection for completeness to all # high dose bins, as it is measured already. So, if it is # # centric and anomalous, add to I+ and I- # centric and not anomalous, add to I # acentric and anomalous, add to I+ or I- # acentric and not anomalous, add to I # # To do this the best thing to do is to read through all of the # reflections and keep a dictionary of the lowest dose at which # a given reflection was recorded. Then iterate through this # list to see how many we have as a function of dose... # # Ok, so after trying to implement this cleanly I think that the # only way to do this is to actually read all of the reflections # in and store them in e.g. a dictionary. Could be expensive # for large data sets, but worry about that ... later. Can at # least store this in-memory representation. Accordingly will also # need to read in the 'I' column... # this will be a dictionary indexed by the Miller indices and # containing anomalous flag (1: I+, 0:I- or native) baseline # intensity values and the error estimate.. # now construct the completeness tables for I or I+ & I-, # then populate from this list of lowest doses... if self._anomalous: print '$TABLE : Completeness vs. %s, %s/%s:' % \ (self._base_column, crystal_name, dataset_name) print '$GRAPHS: Completeness:N:1,2,3,4,5: $$' print '%8s %5s %5s %5s %5s $$ $$' % \ (self._base_column, 'I+', 'I-', 'I', 'dI') iplus_count = [] iminus_count = [] ieither_count = [] iboth_count = [] nsteps = 1 + int( (self._range_max - self._range_min) / self._range_width) for j in range(nsteps): iplus_count.append(0) iminus_count.append(0) ieither_count.append(0) iboth_count.append(0) for h, k, l in reflections: base_min_iplus = self._range_max + self._range_width base_min_iminus = self._range_max + self._range_width for pm, base, i, sigi in reflections[(h, k, l)]: if sg.is_centric((h, k, l)): if base < base_min_iplus: base_min_iplus = base if base < base_min_iminus: base_min_iminus = base elif pm: if base < base_min_iplus: base_min_iplus = base else: if base < base_min_iminus: base_min_iminus = base start_iplus = int((base_min_iplus - self._range_min) / self._range_width) start_iminus = int((base_min_iminus - self._range_min) / self._range_width) if start_iplus < nsteps: iplus_count[start_iplus] += 1 if start_iminus < nsteps: iminus_count[start_iminus] += 1 if min(start_iplus, start_iminus) < nsteps: ieither_count[min(start_iplus, start_iminus)] += 1 if max(start_iplus, start_iminus) < nsteps: iboth_count[max(start_iplus, start_iminus)] += 1 # now sum up for j in range(1, nsteps): iplus_count[j] += iplus_count[j - 1] iminus_count[j] += iminus_count[j - 1] ieither_count[j] += ieither_count[j - 1] iboth_count[j] += iboth_count[j - 1] for j in range(nsteps): iplus = iplus_count[j] / float(nref_n) iminus = iminus_count[j] / float(nref_n) ieither = ieither_count[j] / float(nref_n) iboth = iboth_count[j] / float(nref_n) print '%8.1f %5.3f %5.3f %5.3f %5.3f' % \ (self._range_min + j * self._range_width, iplus, iminus, ieither, iboth) print '$$' else: print '$TABLE : Completeness vs. %s, %s/%s:' % \ (self._base_column, crystal_name, dataset_name) print '$GRAPHS: Completeness:N:1, 2: $$' print '%8s %5s $$ $$' % (self._base_column, 'I') i_count = [] nsteps = 1 + int( (self._range_max - self._range_min) / self._range_width) for j in range(nsteps): i_count.append(0) for h, k, l in reflections: base_min = self._range_max + self._range_width for pm, base, i, sigi in reflections[(h, k, l)]: if base < base_min: base_min = base start = int((base_min - self._range_min) / self._range_width) # for j in range(start, nsteps): i_count[start] += 1 for j in range(1, nsteps): i_count[j] += i_count[j - 1] for j in range(nsteps): i = i_count[j] / float(nref) print '%8.1f %5.3f' % \ (self._range_min + j * self._range_width, i) print '$$' return
def parallel_calculate_completeness_vs_dose_anomalous(self, reflections): '''Compute arrays of completeness vs. dose for anomalous data for these reflections. This version will split the reflections into different chunks and accumulate afterwards.''' uc = self._unit_cells[(crystal_name, dataset_name)] sg = self._space_groups[(crystal_name, dataset_name)] nref = len(compute_unique_reflections(uc, sg, self._anomalous, self._resolution_high, self._resolution_low)) nref_n = len(compute_unique_reflections(uc, sg, False, self._resolution_high, self._resolution_low)) iplus_count = [] iminus_count = [] ieither_count = [] iboth_count = [] nsteps = 1 + int( (self._range_max - self._range_min) / self._range_width) for j in range(nsteps): iplus_count.append(0) iminus_count.append(0) ieither_count.append(0) iboth_count.append(0) hkls = list(reflections) chunk_size = len(hkls) / self._ncpu chunks = [hkls[j: j + chunk_size] \ for j in range(0, len(hkls), chunk_size)] # here need to spawn parallel threads for hkl in reflections: base_min_iplus = self._range_max + self._range_width base_min_iminus = self._range_max + self._range_width for pm, base, i, sigi in reflections[hkl]: if sg.is_centric(hkl): if base < base_min_iplus: base_min_iplus = base if base < base_min_iminus: base_min_iminus = base elif pm: if base < base_min_iplus: base_min_iplus = base else: if base < base_min_iminus: base_min_iminus = base start_iplus = int((base_min_iplus - self._range_min) / self._range_width) start_iminus = int((base_min_iminus - self._range_min) / self._range_width) if start_iplus < nsteps: iplus_count[start_iplus] += 1 if start_iminus < nsteps: iminus_count[start_iminus] += 1 if min(start_iplus, start_iminus) < nsteps: ieither_count[min(start_iplus, start_iminus)] += 1 if max(start_iplus, start_iminus) < nsteps: iboth_count[max(start_iplus, start_iminus)] += 1 # now sum up for j in range(1, nsteps): iplus_count[j] += iplus_count[j - 1] iminus_count[j] += iminus_count[j - 1] ieither_count[j] += ieither_count[j - 1] iboth_count[j] += iboth_count[j - 1] # now compute this as fractions comp_iplus = [ip / float(nref_n) for ip in iplus_count] comp_iminus = [im / float(nref_n) for im in iminus_count] comp_ieither = [ie / float(nref_n) for ie in iplus_count] comp_iboth = [ib / float(nref_n) for ib in iplus_count] # and return return comp_iplus, comp_iminus, comp_ieither, comp_iboth