def truncate(self): """Actually perform the truncation procedure.""" if not self._hklin: raise RuntimeError("hklin not defined") if not self._hklout: raise RuntimeError("hklout not defined") self.add_command_line("-hklin") self.add_command_line(self._hklin) self.add_command_line("-hklout") self.add_command_line(self._hklout) if self._nres: self.add_command_line("-nres") self.add_command_line("%d" % self._nres) if self._anomalous: self.add_command_line("-colano") self.add_command_line("/*/*/[I(+),SIGI(+),I(-),SIGI(-)]") self.add_command_line("-colin") self.add_command_line("/*/*/[IMEAN,SIGIMEAN]") self._xmlout = os.path.join(self.get_working_directory(), "%d_truncate.xml" % self.get_xpid()) self.add_command_line("-xmlout") self.add_command_line(self._xmlout) self.start() self.close_wait() try: self.check_for_errors() except RuntimeError as e: try: os.remove(self._hklout) except Exception: pass logger.debug(str(e)) raise RuntimeError("ctruncate failure") nref = 0 for record in self.get_all_output(): if "Number of reflections:" in record: nref = int(record.split()[-1]) if "Estimate of Wilson B factor:" in record: self._b_factor = float(record.split(":")[1].split()[0]) self._nref_in, self._nref_out = nref, nref self._nabsent = 0 moments = None results = self.parse_ccp4_loggraph() if "Acentric moments of E using Truncate method" in results: moments = transpose_loggraph( results["Acentric moments of E using Truncate method"]) elif "Acentric moments of I" in results: moments = transpose_loggraph(results["Acentric moments of I"]) elif "Acentric moments of E" in results: moments = transpose_loggraph(results["Acentric moments of E"]) else: logger.debug("Acentric moments of E/I not found") self._moments = moments
def _scale(self): "Perform all of the operations required to deliver the scaled data." epochs = self._sweep_handler.get_epochs() sc = self._updated_aimless() sc.set_hklin(self._prepared_reflections) sc.set_chef_unmerged(True) sc.set_new_scales_file("%s.scales" % self._scalr_xname) user_resolution_limits = {} for epoch in epochs: si = self._sweep_handler.get_sweep_information(epoch) pname, xname, dname = si.get_project_info() sname = si.get_sweep_name() intgr = si.get_integrater() if intgr.get_integrater_user_resolution(): dmin = intgr.get_integrater_high_resolution() if (dname, sname) not in user_resolution_limits: user_resolution_limits[(dname, sname)] = dmin elif dmin < user_resolution_limits[(dname, sname)]: user_resolution_limits[(dname, sname)] = dmin start, end = si.get_batch_range() if (dname, sname) in self._scalr_resolution_limits: resolution, _ = self._scalr_resolution_limits[(dname, sname)] sc.add_run(start, end, exclude=False, resolution=resolution, name=sname) else: sc.add_run(start, end, name=sname) sc.set_hklout( os.path.join( self.get_working_directory(), f"{self._scalr_pname}_{self._scalr_xname}_scaled_test.mtz", ) ) if self.get_scaler_anomalous(): sc.set_anomalous() # what follows, sucks failover = PhilIndex.params.xia2.settings.failover if failover: try: sc.scale() except RuntimeError as e: es = str(e) if ( "bad batch" in es or "negative scales run" in es or "no observations" in es ): # first ID the sweep from the batch no batch = int(es.split()[-1]) epoch = self._identify_sweep_epoch(batch) sweep = self._scalr_integraters[epoch].get_integrater_sweep() # then remove it from my parent xcrystal self.get_scaler_xcrystal().remove_sweep(sweep) # then remove it from the scaler list of intergraters # - this should really be a scaler interface method del self._scalr_integraters[epoch] # then tell the user what is happening logger.info( "Sweep %s gave negative scales - removing", sweep.get_name() ) # then reset the prepare, do, finish flags self.set_scaler_prepare_done(False) self.set_scaler_done(False) self.set_scaler_finish_done(False) # and return return else: raise e else: sc.scale() # then gather up all of the resulting reflection files # and convert them into the required formats (.sca, .mtz.) loggraph = sc.parse_ccp4_loggraph() resolution_info = {} reflection_files = sc.get_scaled_reflection_files() for dataset in reflection_files: FileHandler.record_temporary_file(reflection_files[dataset]) for key in loggraph: if "Analysis against resolution" in key: dataset = key.split(",")[-1].strip() resolution_info[dataset] = transpose_loggraph(loggraph[key]) # check in here that there is actually some data to scale..! if not resolution_info: raise RuntimeError("no resolution info") highest_suggested_resolution = self.assess_resolution_limits( sc.get_unmerged_reflection_file(), user_resolution_limits ) if not self.get_scaler_done(): logger.debug("Returning as scaling not finished...") return batch_info = {} for key in loggraph: if "Analysis against Batch" in key: dataset = key.split(",")[-1].strip() batch_info[dataset] = transpose_loggraph(loggraph[key]) sc = self._updated_aimless() FileHandler.record_log_file( f"{self._scalr_pname} {self._scalr_xname} aimless", sc.get_log_file() ) sc.set_hklin(self._prepared_reflections) sc.set_new_scales_file("%s_final.scales" % self._scalr_xname) for epoch in epochs: si = self._sweep_handler.get_sweep_information(epoch) pname, xname, dname = si.get_project_info() sname = si.get_sweep_name() start, end = si.get_batch_range() resolution_limit, _ = self._scalr_resolution_limits[(dname, sname)] sc.add_run( start, end, exclude=False, resolution=resolution_limit, name=xname ) sc.set_hklout( os.path.join( self.get_working_directory(), f"{self._scalr_pname}_{self._scalr_xname}_scaled.mtz", ) ) if self.get_scaler_anomalous(): sc.set_anomalous() sc.scale() FileHandler.record_xml_file( f"{self._scalr_pname} {self._scalr_xname} aimless", sc.get_xmlout() ) data = sc.get_summary() scales_file = sc.get_new_scales_file() loggraph = sc.parse_ccp4_loggraph() standard_deviation_info = {} for key in loggraph: if "standard deviation v. Intensity" in key: dataset = key.split(",")[-1].strip() standard_deviation_info[dataset] = transpose_loggraph(loggraph[key]) resolution_info = {} for key in loggraph: if "Analysis against resolution" in key: dataset = key.split(",")[-1].strip() resolution_info[dataset] = transpose_loggraph(loggraph[key]) batch_info = {} for key in loggraph: if "Analysis against Batch" in key: dataset = key.split(",")[-1].strip() batch_info[dataset] = transpose_loggraph(loggraph[key]) # finally put all of the results "somewhere useful" self._scalr_statistics = data self._scalr_scaled_refl_files = copy.deepcopy(sc.get_scaled_reflection_files()) sc = self._updated_aimless() sc.set_hklin(self._prepared_reflections) sc.set_scales_file(scales_file) self._wavelengths_in_order = [] for epoch in epochs: si = self._sweep_handler.get_sweep_information(epoch) pname, xname, dname = si.get_project_info() sname = si.get_sweep_name() start, end = si.get_batch_range() resolution_limit, _ = self._scalr_resolution_limits[(dname, sname)] sc.add_run( start, end, exclude=False, resolution=resolution_limit, name=sname ) if dname not in self._wavelengths_in_order: self._wavelengths_in_order.append(dname) sc.set_hklout( os.path.join( self.get_working_directory(), f"{self._scalr_pname}_{self._scalr_xname}_scaled.mtz", ) ) sc.set_scalepack() if self.get_scaler_anomalous(): sc.set_anomalous() sc.scale() self._update_scaled_unit_cell() self._scalr_scaled_reflection_files = {} self._scalr_scaled_reflection_files["sca"] = {} self._scalr_scaled_reflection_files["sca_unmerged"] = {} self._scalr_scaled_reflection_files["mtz_unmerged"] = {} for key in self._scalr_scaled_refl_files: hklout = self._scalr_scaled_refl_files[key] scaout = "%s.sca" % hklout[:-4] self._scalr_scaled_reflection_files["sca"][key] = scaout FileHandler.record_data_file(scaout) scalepack = os.path.join( os.path.split(hklout)[0], os.path.split(hklout)[1] .replace("_scaled", "_scaled_unmerged") .replace(".mtz", ".sca"), ) self._scalr_scaled_reflection_files["sca_unmerged"][key] = scalepack FileHandler.record_data_file(scalepack) mtz_unmerged = os.path.splitext(scalepack)[0] + ".mtz" self._scalr_scaled_reflection_files["mtz_unmerged"][key] = mtz_unmerged FileHandler.record_data_file(mtz_unmerged) if self._scalr_cell_esd is not None: # patch .mtz and overwrite unit cell information import xia2.Modules.Scaler.tools as tools override_cell = self._scalr_cell_dict.get( f"{self._scalr_pname}_{self._scalr_xname}_{key}" )[0] tools.patch_mtz_unit_cell(mtz_unmerged, override_cell) tools.patch_mtz_unit_cell(hklout, override_cell) self._scalr_scaled_reflection_files["mtz_unmerged"][key] = mtz_unmerged FileHandler.record_data_file(mtz_unmerged) if PhilIndex.params.xia2.settings.merging_statistics.source == "cctbx": for key in self._scalr_scaled_refl_files: stats = self._compute_scaler_statistics( self._scalr_scaled_reflection_files["mtz_unmerged"][key], selected_band=(highest_suggested_resolution, None), wave=key, ) self._scalr_statistics[ (self._scalr_pname, self._scalr_xname, key) ] = stats sc = self._updated_aimless() sc.set_hklin(self._prepared_reflections) sc.set_scales_file(scales_file) self._wavelengths_in_order = [] for epoch in epochs: si = self._sweep_handler.get_sweep_information(epoch) pname, xname, dname = si.get_project_info() sname = si.get_sweep_name() start, end = si.get_batch_range() resolution_limit, _ = self._scalr_resolution_limits[(dname, sname)] sc.add_run( start, end, exclude=False, resolution=resolution_limit, name=sname ) if dname not in self._wavelengths_in_order: self._wavelengths_in_order.append(dname) sc.set_hklout( os.path.join( self.get_working_directory(), f"{self._scalr_pname}_{self._scalr_xname}_chef.mtz", ) ) sc.set_chef_unmerged(True) if self.get_scaler_anomalous(): sc.set_anomalous() sc.scale() if not PhilIndex.params.dials.fast_mode: try: self._generate_absorption_map(sc) except Exception as e: # Map generation may fail for number of reasons, eg. matplotlib borken logger.debug("Could not generate absorption map (%s)", e)
def truncate(self): """Actually perform the truncation procedure.""" self.check_hklin() self.check_hklout() self.start() if self._anomalous: self.input("anomalous yes") else: self.input("anomalous no") if self._nres: self.input("nres %d" % self._nres) if not self._wilson: self.input("scale 1") self.close_wait() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError: try: os.remove(self.get_hklout()) except Exception: pass raise RuntimeError("truncate failure") # parse the output for interesting things, including the # numbers of reflections in and out (isn't that a standard CCP4 # report?) and the number of absent reflections. self._nref_in, self._nref_out = self.read_nref_hklin_hklout( self.get_all_output()) # FIXME guess I should be reading this properly... self._nabsent = self._nref_in - self._nref_out for line in self.get_all_output(): if "Least squares straight line gives" in line: list = line.replace("=", " ").split() if not "***" in list[6]: self._b_factor = float(list[6]) else: Debug.write("no B factor available") if "LSQ Line Gradient" in line: self._wilson_fit_grad = float(line.split()[-1]) resol_width = max(self._wilson_fit_range) - min( self._wilson_fit_range) if self._wilson_fit_grad > 0 and resol_width > 1.0 and False: raise RuntimeError( "wilson plot gradient positive: %.2f" % self._wilson_fit_grad) elif self._wilson_fit_grad > 0: Debug.write( "Positive gradient but not much wilson plot") if "Uncertainty in Gradient" in line: self._wilson_fit_grad_sd = float(line.split()[-1]) if "X Intercept" in line: self._wilson_fit_m = float(line.split()[-1]) if "Uncertainty in Intercept" in line: self._wilson_fit_m_sd = float(line.split()[-1]) if "Resolution range" in line: self._wilson_fit_range = map(float, line.split()[-2:]) results = self.parse_ccp4_loggraph() moments = transpose_loggraph( results["Acentric Moments of E for k = 1,3,4,6,8"]) # keys we want in this are "Resln_Range" "1/resol^2" and # MomentZ2. The last of these should be around two, but is # likely to be a little different to this. self._moments = moments
data = sc.get_summary() loggraph = sc.parse_ccp4_loggraph() resolution_info = { } reflection_files = sc.get_scaled_reflection_files() for dataset in reflection_files: FileHandler.record_temporary_file(reflection_files[dataset]) for key in loggraph: if 'Analysis against resolution' in key: dataset = key.split(',')[-1].strip() resolution_info[dataset] = transpose_loggraph( loggraph[key]) highest_resolution = 100.0 # check in here that there is actually some data to scale..! if len(resolution_info) == 0: raise RuntimeError, 'no resolution info' for epoch in epochs: si = self._sweep_handler.get_sweep_information(epoch) pname, xname, dname = si.get_project_info() sname = si.get_sweep_name() intgr = si.get_integrater() start, end = si.get_batch_range()
def parse(self): '''Parse the output of the chef run.''' results = self.parse_ccp4_loggraph() rd_keys = [] comp_keys = [] scp_data = None comp_data = { } rd_data = { } datasets_damaged = [] for key in results: if 'Completeness vs. ' in key: comp_keys.append(key) comp_data[key.split()[-1]] = transpose_loggraph( results[key]) elif 'R vs. ' in key: rd_keys.append(key) wavelength = key.split()[-1] rd_data[wavelength] = transpose_loggraph( results[key]) values = map(float, rd_data[wavelength]['2_Rd']) digest = self.digest_rd(values) # stream.write('Rd score (%s): %.2f' % \ # (wavelength, digest)) if digest > 3: datasets_damaged.append((wavelength, digest)) elif 'Normalised radiation' in key: scp_data = transpose_loggraph(results[key]) elif 'Dose vs. BATCH' in key: self._dose_profile = transpose_loggraph(results[key]) # right, so first work through these to define the limits from # where the first set is 50% complete to 90% complete, which # will establish the benchmark, then calculate a kinda # Z-score for the subsequent Scp values lowest_50 = None lowest_90 = None i_col = '2_I' dose_col = '1_DOSE' for dataset in comp_data: if '5_dI' in comp_data[dataset]: i_col = '4_I' if '1_BATCH' in comp_data[dataset]: dose_col = '1_BATCH' completeness = comp_data[dataset][i_col] local_50 = None local_90 = None max_comp = max(map(float, completeness)) for j, dose in enumerate(comp_data[dataset][dose_col]): comp = float(completeness[j]) if comp > (0.5 * max_comp) and not local_50: local_50 = float(dose) if comp > (0.9 * max_comp) and not local_90: local_90 = float(dose) # check if we have dose profile etc available stop_doses = [] groups = [] if self._dose_profile: wedges = sorted(self.digest_dose_profile()) # given these and the completeness curves, need to make a # choice as to when to stop... will be necessary here # to have an indication of the logical wavelength to # which the measurements belong # wedges is a list of: # FIRST_DOSE FIRST_BATCH SIZE EXPOSURE DATASET # digest this as follows: if sweeps switch between # A and B, or A, B and C then these are tied wedges: aim # for uniform total rotation / number of images. if # there is the same data set in subsequent chunks, # these are tied inverse beams # ok, logic. expect at most four wavelengths interleaved, # most likely two or three. also assume that the sizes of # the wedges should be the same. only want to "block" these. # if len(wedges) == 1: can consider any point at which # to cut off the data. N.B. don't forget EDNA strategies... # ok, easiest thing is encode a set of rules. stop_doses, groups = digest_wedges(wedges) for j, g in enumerate(groups): stream.write('Group %d: %s' % \ (j + 1, g)) if not lowest_50: lowest_50 = local_50 if local_50 < lowest_50: lowest_50 = local_50 if not lowest_90: lowest_90 = local_90 if local_90 < lowest_90: lowest_90 = local_90 # now build up the reference population scp_reference = [] scp_key = None for k in scp_data: if 'Scp(d)' in k: scp_key = k for j, d in enumerate(scp_data[dose_col]): dose = float(d) if dose >= lowest_50 and dose <= lowest_90: scp_reference.append(float(scp_data[scp_key][j])) m, s = mean_sd(scp_reference) dose = scp_data[dose_col][0] scp_max = 0.0 if s == 0.0: stream.write('Insufficient measurements for analysis') return for j, d in enumerate(scp_data[dose_col]): dose = float(d) scp = float(scp_data[scp_key][j]) z = (scp - m) / s if dose < lowest_90: scp_max = max(scp, scp_max) continue if z > 3 and scp > scp_max: break scp_max = max(scp, scp_max) if not datasets_damaged: stream.write('No significant radiation damage detected') return if not groups: stop_dose = dose elif groups == ['Single wedge']: stop_dose = dose else: for stop_dose in stop_doses: if stop_dose > dose: break stream.write('Significant radiation damage detected:') for wavelength, digest in datasets_damaged: stream.write('Rd analysis (%s): %.2f' % (wavelength, digest)) if stop_dose == float(scp_data[dose_col][-1]): stream.write('Conclusion: use all data') else: stream.write('Conclusion: cut off after %s ~ %.1f' % \ (dose_col.replace('1_', ''), stop_dose)) return
def _scale(self): '''Actually scale all of the data together.''' from xia2.Handlers.Environment import debug_memory_usage debug_memory_usage() Journal.block( 'scaling', self.get_scaler_xcrystal().get_name(), 'XSCALE', {'scaling model':'default (all)'}) epochs = self._sweep_information.keys() epochs.sort() xscale = self.XScale() xscale.set_spacegroup_number(self._xds_spacegroup) xscale.set_cell(self._scalr_cell) Debug.write('Set CELL: %.2f %.2f %.2f %.2f %.2f %.2f' % \ tuple(self._scalr_cell)) Debug.write('Set SPACEGROUP_NUMBER: %d' % \ self._xds_spacegroup) Debug.write('Gathering measurements for scaling') for epoch in epochs: # get the prepared reflections reflections = self._sweep_information[epoch][ 'prepared_reflections'] # and the get wavelength that this belongs to dname = self._sweep_information[epoch]['dname'] sname = self._sweep_information[epoch]['sname'] # and the resolution range for the reflections intgr = self._sweep_information[epoch]['integrater'] Debug.write('Epoch: %d' % epoch) Debug.write('HKL: %s (%s/%s)' % (reflections, dname, sname)) resolution_low = intgr.get_integrater_low_resolution() resolution_high, _ = self._scalr_resolution_limits.get((dname, sname), (0.0, None)) resolution = (resolution_high, resolution_low) xscale.add_reflection_file(reflections, dname, resolution) # set the global properties of the sample xscale.set_crystal(self._scalr_xname) xscale.set_anomalous(self._scalr_anomalous) debug_memory_usage() xscale.run() scale_factor = xscale.get_scale_factor() Debug.write('XSCALE scale factor found to be: %e' % scale_factor) # record the log file pname = self._scalr_pname xname = self._scalr_xname FileHandler.record_log_file('%s %s XSCALE' % \ (pname, xname), os.path.join(self.get_working_directory(), 'XSCALE.LP')) # check for outlier reflections and if a number are found # then iterate (that is, rerun XSCALE, rejecting these outliers) if not PhilIndex.params.dials.fast_mode and not PhilIndex.params.xds.keep_outliers: xscale_remove = xscale.get_remove() if xscale_remove: current_remove = [] final_remove = [] # first ensure that there are no duplicate entries... if os.path.exists(os.path.join( self.get_working_directory(), 'REMOVE.HKL')): for line in open(os.path.join( self.get_working_directory(), 'REMOVE.HKL'), 'r').readlines(): h, k, l = map(int, line.split()[:3]) z = float(line.split()[3]) if not (h, k, l, z) in current_remove: current_remove.append((h, k, l, z)) for c in xscale_remove: if c in current_remove: continue final_remove.append(c) Debug.write( '%d alien reflections are already removed' % \ (len(xscale_remove) - len(final_remove))) else: # we want to remove all of the new dodgy reflections final_remove = xscale_remove remove_hkl = open(os.path.join( self.get_working_directory(), 'REMOVE.HKL'), 'w') z_min = PhilIndex.params.xds.z_min rejected = 0 # write in the old reflections for remove in current_remove: z = remove[3] if z >= z_min: remove_hkl.write('%d %d %d %f\n' % remove) else: rejected += 1 Debug.write('Wrote %d old reflections to REMOVE.HKL' % \ (len(current_remove) - rejected)) Debug.write('Rejected %d as z < %f' % \ (rejected, z_min)) # and the new reflections rejected = 0 used = 0 for remove in final_remove: z = remove[3] if z >= z_min: used += 1 remove_hkl.write('%d %d %d %f\n' % remove) else: rejected += 1 Debug.write('Wrote %d new reflections to REMOVE.HKL' % \ (len(final_remove) - rejected)) Debug.write('Rejected %d as z < %f' % \ (rejected, z_min)) remove_hkl.close() # we want to rerun the finishing step so... # unless we have added no new reflections if used: self.set_scaler_done(False) if not self.get_scaler_done(): Chatter.write('Excluding outlier reflections Z > %.2f' % PhilIndex.params.xds.z_min) return debug_memory_usage() # now get the reflection files out and merge them with aimless output_files = xscale.get_output_reflection_files() wavelength_names = output_files.keys() # these are per wavelength - also allow for user defined resolution # limits a la bug # 3183. No longer... for epoch in self._sweep_information.keys(): input = self._sweep_information[epoch] intgr = input['integrater'] rkey = input['dname'], input['sname'] if intgr.get_integrater_user_resolution(): dmin = intgr.get_integrater_high_resolution() if rkey not in self._user_resolution_limits: self._scalr_resolution_limits[rkey] = (dmin, None) self._user_resolution_limits[rkey] = dmin elif dmin < self._user_resolution_limits[rkey]: self._scalr_resolution_limits[rkey] = (dmin, None) self._user_resolution_limits[rkey] = dmin self._scalr_scaled_refl_files = { } self._scalr_statistics = { } max_batches = 0 mtz_dict = { } project_info = { } for epoch in self._sweep_information.keys(): pname = self._scalr_pname xname = self._scalr_xname dname = self._sweep_information[epoch]['dname'] reflections = os.path.split( self._sweep_information[epoch]['prepared_reflections'])[-1] project_info[reflections] = (pname, xname, dname) for epoch in self._sweep_information.keys(): self._sweep_information[epoch]['scaled_reflections'] = None debug_memory_usage() for wavelength in wavelength_names: hklin = output_files[wavelength] xsh = XDSScalerHelper() xsh.set_working_directory(self.get_working_directory()) ref = xsh.split_and_convert_xscale_output( hklin, 'SCALED_', project_info, 1.0 / scale_factor) for hklout in ref.keys(): for epoch in self._sweep_information.keys(): if os.path.split(self._sweep_information[epoch][ 'prepared_reflections'])[-1] == \ os.path.split(hklout)[-1]: if self._sweep_information[epoch][ 'scaled_reflections'] is not None: raise RuntimeError, 'duplicate entries' self._sweep_information[epoch][ 'scaled_reflections'] = ref[hklout] del(xsh) debug_memory_usage() for epoch in self._sweep_information.keys(): hklin = self._sweep_information[epoch]['scaled_reflections'] dname = self._sweep_information[epoch]['dname'] sname = self._sweep_information[epoch]['sname'] hkl_copy = os.path.join(self.get_working_directory(), 'R_%s' % os.path.split(hklin)[-1]) if not os.path.exists(hkl_copy): shutil.copyfile(hklin, hkl_copy) # let's properly listen to the user's resolution limit needs... if self._user_resolution_limits.get((dname, sname), False): resolution = self._user_resolution_limits[(dname, sname)] else: if PhilIndex.params.xia2.settings.resolution.keep_all_reflections == True: try: resolution = intgr.get_detector().get_max_resolution(intgr.get_beam_obj().get_s0()) Debug.write('keep_all_reflections set, using detector limits') except Exception: resolution = self._estimate_resolution_limit(hklin) else: resolution = self._estimate_resolution_limit(hklin) Chatter.write('Resolution for sweep %s/%s: %.2f' % \ (dname, sname, resolution)) if (dname, sname) not in self._scalr_resolution_limits: self._scalr_resolution_limits[(dname, sname)] = (resolution, None) self.set_scaler_done(False) else: if resolution < self._scalr_resolution_limits[(dname, sname)][0]: self._scalr_resolution_limits[(dname, sname)] = (resolution, None) self.set_scaler_done(False) debug_memory_usage() if not self.get_scaler_done(): Debug.write('Returning as scaling not finished...') return self._sort_together_data_xds() highest_resolution = min(limit for limit, _ in self._scalr_resolution_limits.values()) self._scalr_highest_resolution = highest_resolution Debug.write('Scaler highest resolution set to %5.2f' % \ highest_resolution) if not self.get_scaler_done(): Debug.write('Returning as scaling not finished...') return sdadd_full = 0.0 sdb_full = 0.0 # ---------- FINAL MERGING ---------- sc = self._factory.Aimless() FileHandler.record_log_file('%s %s aimless' % (self._scalr_pname, self._scalr_xname), sc.get_log_file()) sc.set_resolution(highest_resolution) sc.set_hklin(self._prepared_reflections) sc.set_new_scales_file('%s_final.scales' % self._scalr_xname) if sdadd_full == 0.0 and sdb_full == 0.0: pass else: sc.add_sd_correction('both', 1.0, sdadd_full, sdb_full) for epoch in epochs: input = self._sweep_information[epoch] start, end = (min(input['batches']), max(input['batches'])) rkey = input['dname'], input['sname'] run_resolution_limit, _ = self._scalr_resolution_limits[rkey] sc.add_run(start, end, exclude = False, resolution = run_resolution_limit, name = input['sname']) sc.set_hklout(os.path.join(self.get_working_directory(), '%s_%s_scaled.mtz' % \ (self._scalr_pname, self._scalr_xname))) if self.get_scaler_anomalous(): sc.set_anomalous() sc.multi_merge() FileHandler.record_xml_file('%s %s aimless xml' % (self._scalr_pname, self._scalr_xname), sc.get_xmlout()) data = sc.get_summary() loggraph = sc.parse_ccp4_loggraph() standard_deviation_info = { } for key in loggraph.keys(): if 'standard deviation v. Intensity' in key: dataset = key.split(',')[-1].strip() standard_deviation_info[dataset] = transpose_loggraph( loggraph[key]) resolution_info = { } for key in loggraph.keys(): if 'Analysis against resolution' in key: dataset = key.split(',')[-1].strip() resolution_info[dataset] = transpose_loggraph( loggraph[key]) # and also radiation damage stuff... batch_info = { } for key in loggraph.keys(): if 'Analysis against Batch' in key: dataset = key.split(',')[-1].strip() batch_info[dataset] = transpose_loggraph( loggraph[key]) # finally put all of the results "somewhere useful" self._scalr_statistics = data self._scalr_scaled_refl_files = copy.deepcopy( sc.get_scaled_reflection_files()) self._scalr_scaled_reflection_files = { } # also output the unmerged scalepack format files... sc = self._factory.Aimless() sc.set_resolution(highest_resolution) sc.set_hklin(self._prepared_reflections) sc.set_scalepack() for epoch in epochs: input = self._sweep_information[epoch] start, end = (min(input['batches']), max(input['batches'])) rkey = input['dname'], input['sname'] run_resolution_limit, _ = self._scalr_resolution_limits[rkey] sc.add_run(start, end, exclude = False, resolution = run_resolution_limit, name = input['sname']) sc.set_hklout(os.path.join(self.get_working_directory(), '%s_%s_scaled.mtz' % \ (self._scalr_pname, self._scalr_xname))) if self.get_scaler_anomalous(): sc.set_anomalous() sc.multi_merge() self._scalr_scaled_reflection_files['sca_unmerged'] = { } self._scalr_scaled_reflection_files['mtz_unmerged'] = { } for dataset in sc.get_scaled_reflection_files().keys(): hklout = sc.get_scaled_reflection_files()[dataset] # then mark the scalepack files for copying... scalepack = os.path.join(os.path.split(hklout)[0], os.path.split(hklout)[1].replace( '_scaled', '_scaled_unmerged').replace('.mtz', '.sca')) self._scalr_scaled_reflection_files['sca_unmerged'][ dataset] = scalepack FileHandler.record_data_file(scalepack) mtz_unmerged = os.path.splitext(scalepack)[0] + '.mtz' self._scalr_scaled_reflection_files['mtz_unmerged'][dataset] = mtz_unmerged FileHandler.record_data_file(mtz_unmerged) if PhilIndex.params.xia2.settings.merging_statistics.source == 'cctbx': for key in self._scalr_scaled_refl_files: stats = self._compute_scaler_statistics( self._scalr_scaled_reflection_files['mtz_unmerged'][key], wave=key) self._scalr_statistics[ (self._scalr_pname, self._scalr_xname, key)] = stats # convert reflection files to .sca format - use mtz2various for this self._scalr_scaled_reflection_files['sca'] = { } self._scalr_scaled_reflection_files['hkl'] = { } for key in self._scalr_scaled_refl_files: f = self._scalr_scaled_refl_files[key] scaout = '%s.sca' % f[:-4] m2v = self._factory.Mtz2various() m2v.set_hklin(f) m2v.set_hklout(scaout) m2v.convert() self._scalr_scaled_reflection_files['sca'][key] = scaout FileHandler.record_data_file(scaout) if PhilIndex.params.xia2.settings.small_molecule == True: hklout = '%s.hkl' % f[:-4] m2v = self._factory.Mtz2various() m2v.set_hklin(f) m2v.set_hklout(hklout) m2v.convert_shelx() self._scalr_scaled_reflection_files['hkl'][key] = hklout FileHandler.record_data_file(hklout)
def truncate(self): '''Actually perform the truncation procedure.''' if not self._hklin: raise RuntimeError('hklin not defined') if not self._hklout: raise RuntimeError('hklout not defined') self.add_command_line('-hklin') self.add_command_line(self._hklin) self.add_command_line('-hklout') self.add_command_line(self._hklout) if self._nres: self.add_command_line('-nres') self.add_command_line('%d' % self._nres) if self._anomalous: self.add_command_line('-colano') self.add_command_line('/*/*/[I(+),SIGI(+),I(-),SIGI(-)]') self.add_command_line('-colin') self.add_command_line('/*/*/[IMEAN,SIGIMEAN]') self._xmlout = os.path.join(self.get_working_directory(), '%d_truncate.xml' % self.get_xpid()) self.add_command_line('-xmlout') self.add_command_line(self._xmlout) self.start() self.close_wait() try: self.check_for_errors() except RuntimeError as e: try: os.remove(self._hklout) except Exception: pass Debug.write(str(e)) raise RuntimeError('ctruncate failure') nref = 0 for record in self.get_all_output(): if 'Number of reflections:' in record: nref = int(record.split()[-1]) if 'Estimate of Wilson B factor:' in record: self._b_factor = float(record.split(':')[1].split()[0]) self._nref_in, self._nref_out = nref, nref self._nabsent = 0 moments = None results = self.parse_ccp4_loggraph() if 'Acentric moments of E using Truncate method' in results: moments = transpose_loggraph( results['Acentric moments of E using Truncate method']) elif 'Acentric moments of I' in results: moments = transpose_loggraph(results['Acentric moments of I']) elif 'Acentric moments of E' in results: moments = transpose_loggraph(results['Acentric moments of E']) else: Debug.write('Acentric moments of E/I not found') self._moments = moments
def parse(self): """Parse the output of the chef run.""" results = self.parse_ccp4_loggraph() rd_keys = [] comp_keys = [] scp_data = None comp_data = {} rd_data = {} datasets_damaged = [] for key in results: if "Completeness vs. " in key: comp_keys.append(key) comp_data[key.split()[-1]] = transpose_loggraph( results[key]) elif "R vs. " in key: rd_keys.append(key) wavelength = key.split()[-1] rd_data[wavelength] = transpose_loggraph(results[key]) values = [float(x) for x in rd_data[wavelength]["2_Rd"]] digest = self.digest_rd(values) # logger.info('Rd score (%s): %.2f' , \ # (wavelength, digest)) if digest > 3: datasets_damaged.append((wavelength, digest)) elif "Normalised radiation" in key: scp_data = transpose_loggraph(results[key]) elif "Dose vs. BATCH" in key: self._dose_profile = transpose_loggraph(results[key]) # right, so first work through these to define the limits from # where the first set is 50% complete to 90% complete, which # will establish the benchmark, then calculate a kinda # Z-score for the subsequent Scp values lowest_50 = None lowest_90 = None i_col = "2_I" dose_col = "1_DOSE" for dataset in comp_data: if "5_dI" in comp_data[dataset]: i_col = "4_I" if "1_BATCH" in comp_data[dataset]: dose_col = "1_BATCH" completeness = comp_data[dataset][i_col] local_50 = None local_90 = None max_comp = max(map(float, completeness)) for j, dose in enumerate(comp_data[dataset][dose_col]): comp = float(completeness[j]) if comp > (0.5 * max_comp) and not local_50: local_50 = float(dose) if comp > (0.9 * max_comp) and not local_90: local_90 = float(dose) # check if we have dose profile etc available stop_doses = [] groups = [] if self._dose_profile: wedges = sorted(self.digest_dose_profile()) # given these and the completeness curves, need to make a # choice as to when to stop... will be necessary here # to have an indication of the logical wavelength to # which the measurements belong # wedges is a list of: # FIRST_DOSE FIRST_BATCH SIZE EXPOSURE DATASET # digest this as follows: if sweeps switch between # A and B, or A, B and C then these are tied wedges: aim # for uniform total rotation / number of images. if # there is the same data set in subsequent chunks, # these are tied inverse beams # ok, logic. expect at most four wavelengths interleaved, # most likely two or three. also assume that the sizes of # the wedges should be the same. only want to "block" these. # if len(wedges) == 1: can consider any point at which # to cut off the data. N.B. don't forget EDNA strategies... # ok, easiest thing is encode a set of rules. stop_doses, groups = digest_wedges(wedges) for j, g in enumerate(groups): logger.info("Group %d: %s", (j + 1, g)) if not lowest_50: lowest_50 = local_50 if local_50 < lowest_50: lowest_50 = local_50 if not lowest_90: lowest_90 = local_90 if local_90 < lowest_90: lowest_90 = local_90 # now build up the reference population scp_reference = [] scp_key = None for k in scp_data: if "Scp(d)" in k: scp_key = k for j, d in enumerate(scp_data[dose_col]): dose = float(d) if dose >= lowest_50 and dose <= lowest_90: scp_reference.append(float(scp_data[scp_key][j])) m, s = mean_sd(scp_reference) dose = scp_data[dose_col][0] scp_max = 0.0 if s == 0.0: logger.info("Insufficient measurements for analysis") return for j, d in enumerate(scp_data[dose_col]): dose = float(d) scp = float(scp_data[scp_key][j]) z = (scp - m) / s if dose < lowest_90: scp_max = max(scp, scp_max) continue if z > 3 and scp > scp_max: break scp_max = max(scp, scp_max) if not datasets_damaged: logger.info("No significant radiation damage detected") return if not groups: stop_dose = dose elif groups == ["Single wedge"]: stop_dose = dose else: for stop_dose in stop_doses: if stop_dose > dose: break logger.info("Significant radiation damage detected:") for wavelength, digest in datasets_damaged: logger.info("Rd analysis (%s): %.2f", wavelength, digest) if stop_dose == float(scp_data[dose_col][-1]): logger.info("Conclusion: use all data") else: logger.info( "Conclusion: cut off after %s ~ %.1f", dose_col.replace("1_", ""), stop_dose, )
def _scale(self): '''Perform all of the operations required to deliver the scaled data.''' epochs = self._sweep_handler.get_epochs() if self._scalr_corrections: Journal.block( 'scaling', self.get_scaler_xcrystal().get_name(), 'CCP4', {'scaling model':'automatic', 'absorption':self._scalr_correct_absorption, 'decay':self._scalr_correct_decay }) else: Journal.block( 'scaling', self.get_scaler_xcrystal().get_name(), 'CCP4', {'scaling model':'default'}) sc = self._updated_aimless() sc.set_hklin(self._prepared_reflections) sc.set_chef_unmerged(True) sc.set_new_scales_file('%s.scales' % self._scalr_xname) user_resolution_limits = {} for epoch in epochs: si = self._sweep_handler.get_sweep_information(epoch) pname, xname, dname = si.get_project_info() sname = si.get_sweep_name() intgr = si.get_integrater() if intgr.get_integrater_user_resolution(): dmin = intgr.get_integrater_high_resolution() if (dname, sname) not in user_resolution_limits: user_resolution_limits[(dname, sname)] = dmin elif dmin < user_resolution_limits[(dname, sname)]: user_resolution_limits[(dname, sname)] = dmin start, end = si.get_batch_range() if (dname, sname) in self._scalr_resolution_limits: resolution, _ = self._scalr_resolution_limits[(dname, sname)] sc.add_run(start, end, exclude=False, resolution=resolution, name=sname) else: sc.add_run(start, end, name=sname) sc.set_hklout(os.path.join(self.get_working_directory(), '%s_%s_scaled_test.mtz' % \ (self._scalr_pname, self._scalr_xname))) if self.get_scaler_anomalous(): sc.set_anomalous() # what follows, sucks failover = PhilIndex.params.xia2.settings.failover if failover: try: sc.scale() except RuntimeError as e: es = str(e) if 'bad batch' in es or \ 'negative scales run' in es or \ 'no observations' in es: # first ID the sweep from the batch no batch = int(es.split()[-1]) epoch = self._identify_sweep_epoch(batch) sweep = self._scalr_integraters[epoch].get_integrater_sweep() # then remove it from my parent xcrystal self.get_scaler_xcrystal().remove_sweep(sweep) # then remove it from the scaler list of intergraters # - this should really be a scaler interface method del self._scalr_integraters[epoch] # then tell the user what is happening Chatter.write( 'Sweep %s gave negative scales - removing' % \ sweep.get_name()) # then reset the prepare, do, finish flags self.set_scaler_prepare_done(False) self.set_scaler_done(False) self.set_scaler_finish_done(False) # and return return else: raise e else: sc.scale() # then gather up all of the resulting reflection files # and convert them into the required formats (.sca, .mtz.) data = sc.get_summary() loggraph = sc.parse_ccp4_loggraph() resolution_info = {} reflection_files = sc.get_scaled_reflection_files() for dataset in reflection_files: FileHandler.record_temporary_file(reflection_files[dataset]) for key in loggraph: if 'Analysis against resolution' in key: dataset = key.split(',')[-1].strip() resolution_info[dataset] = transpose_loggraph(loggraph[key]) highest_resolution = 100.0 highest_suggested_resolution = None # check in here that there is actually some data to scale..! if len(resolution_info) == 0: raise RuntimeError('no resolution info') for epoch in epochs: si = self._sweep_handler.get_sweep_information(epoch) pname, xname, dname = si.get_project_info() sname = si.get_sweep_name() intgr = si.get_integrater() start, end = si.get_batch_range() if (dname, sname) in self._scalr_resolution_limits: continue elif (dname, sname) in user_resolution_limits: limit = user_resolution_limits[(dname, sname)] self._scalr_resolution_limits[(dname, sname)] = (limit, None) if limit < highest_resolution: highest_resolution = limit Chatter.write('Resolution limit for %s: %5.2f (user provided)' % \ (dname, limit)) continue hklin = sc.get_unmerged_reflection_file() limit, reasoning = self._estimate_resolution_limit( hklin, batch_range=(start, end)) if PhilIndex.params.xia2.settings.resolution.keep_all_reflections == True: suggested = limit if highest_suggested_resolution is None or limit < highest_suggested_resolution: highest_suggested_resolution = limit limit = intgr.get_detector().get_max_resolution(intgr.get_beam_obj().get_s0()) self._scalr_resolution_limits[(dname, sname)] = (limit, suggested) Debug.write('keep_all_reflections set, using detector limits') Debug.write('Resolution for sweep %s: %.2f' % \ (sname, limit)) if not (dname, sname) in self._scalr_resolution_limits: self._scalr_resolution_limits[(dname, sname)] = (limit, None) self.set_scaler_done(False) if limit < highest_resolution: highest_resolution = limit limit, suggested = self._scalr_resolution_limits[(dname, sname)] if suggested is None or limit == suggested: reasoning_str = '' if reasoning: reasoning_str = ' (%s)' % reasoning Chatter.write('Resolution for sweep %s/%s: %.2f%s' % \ (dname, sname, limit, reasoning_str)) else: Chatter.write('Resolution limit for %s/%s: %5.2f (%5.2f suggested)' % \ (dname, sname, limit, suggested)) if highest_suggested_resolution is not None and \ highest_resolution >= (highest_suggested_resolution - 0.004): Debug.write('Dropping resolution cut-off suggestion since it is' ' essentially identical to the actual resolution limit.') highest_suggested_resolution = None self._scalr_highest_resolution = highest_resolution self._scalr_highest_suggested_resolution = highest_suggested_resolution if highest_suggested_resolution is not None: Debug.write('Suggested highest resolution is %5.2f (%5.2f suggested)' % \ (highest_resolution, highest_suggested_resolution)) else: Debug.write('Scaler highest resolution set to %5.2f' % \ highest_resolution) if not self.get_scaler_done(): Debug.write('Returning as scaling not finished...') return batch_info = {} for key in loggraph: if 'Analysis against Batch' in key: dataset = key.split(',')[-1].strip() batch_info[dataset] = transpose_loggraph(loggraph[key]) sc = self._updated_aimless() FileHandler.record_log_file('%s %s aimless' % (self._scalr_pname, self._scalr_xname), sc.get_log_file()) sc.set_hklin(self._prepared_reflections) sc.set_new_scales_file('%s_final.scales' % self._scalr_xname) for epoch in epochs: si = self._sweep_handler.get_sweep_information(epoch) pname, xname, dname = si.get_project_info() sname = si.get_sweep_name() start, end = si.get_batch_range() resolution_limit, _ = self._scalr_resolution_limits[(dname, sname)] sc.add_run(start, end, exclude=False, resolution=resolution_limit, name=xname) sc.set_hklout(os.path.join(self.get_working_directory(), '%s_%s_scaled.mtz' % \ (self._scalr_pname, self._scalr_xname))) if self.get_scaler_anomalous(): sc.set_anomalous() sc.scale() FileHandler.record_xml_file('%s %s aimless xml' % (self._scalr_pname, self._scalr_xname), sc.get_xmlout()) data = sc.get_summary() scales_file = sc.get_new_scales_file() loggraph = sc.parse_ccp4_loggraph() standard_deviation_info = {} for key in loggraph: if 'standard deviation v. Intensity' in key: dataset = key.split(',')[-1].strip() standard_deviation_info[dataset] = transpose_loggraph(loggraph[key]) resolution_info = {} for key in loggraph: if 'Analysis against resolution' in key: dataset = key.split(',')[-1].strip() resolution_info[dataset] = transpose_loggraph(loggraph[key]) batch_info = {} for key in loggraph: if 'Analysis against Batch' in key: dataset = key.split(',')[-1].strip() batch_info[dataset] = transpose_loggraph(loggraph[key]) # finally put all of the results "somewhere useful" self._scalr_statistics = data self._scalr_scaled_refl_files = copy.deepcopy( sc.get_scaled_reflection_files()) sc = self._updated_aimless() sc.set_hklin(self._prepared_reflections) sc.set_scales_file(scales_file) self._wavelengths_in_order = [] for epoch in epochs: si = self._sweep_handler.get_sweep_information(epoch) pname, xname, dname = si.get_project_info() sname = si.get_sweep_name() start, end = si.get_batch_range() resolution_limit, _ = self._scalr_resolution_limits[(dname, sname)] sc.add_run(start, end, exclude=False, resolution=resolution_limit, name=sname) if not dname in self._wavelengths_in_order: self._wavelengths_in_order.append(dname) sc.set_hklout(os.path.join(self.get_working_directory(), '%s_%s_scaled.mtz' % \ (self._scalr_pname, self._scalr_xname))) sc.set_scalepack() if self.get_scaler_anomalous(): sc.set_anomalous() sc.scale() self._update_scaled_unit_cell() self._scalr_scaled_reflection_files = {} self._scalr_scaled_reflection_files['sca'] = {} self._scalr_scaled_reflection_files['sca_unmerged'] = {} self._scalr_scaled_reflection_files['mtz_unmerged'] = {} for key in self._scalr_scaled_refl_files: hklout = self._scalr_scaled_refl_files[key] scaout = '%s.sca' % hklout[:-4] self._scalr_scaled_reflection_files['sca'][key] = scaout FileHandler.record_data_file(scaout) scalepack = os.path.join(os.path.split(hklout)[0], os.path.split(hklout)[1].replace( '_scaled', '_scaled_unmerged').replace('.mtz', '.sca')) self._scalr_scaled_reflection_files['sca_unmerged'][key] = scalepack FileHandler.record_data_file(scalepack) mtz_unmerged = os.path.splitext(scalepack)[0] + '.mtz' self._scalr_scaled_reflection_files['mtz_unmerged'][key] = mtz_unmerged FileHandler.record_data_file(mtz_unmerged) if self._scalr_cell_esd is not None: # patch .mtz and overwrite unit cell information import xia2.Modules.Scaler.tools as tools override_cell = self._scalr_cell_dict.get('%s_%s_%s' % (self._scalr_pname, self._scalr_xname, key))[0] tools.patch_mtz_unit_cell(mtz_unmerged, override_cell) tools.patch_mtz_unit_cell(hklout, override_cell) self._scalr_scaled_reflection_files['mtz_unmerged'][key] = mtz_unmerged FileHandler.record_data_file(mtz_unmerged) if PhilIndex.params.xia2.settings.merging_statistics.source == 'cctbx': for key in self._scalr_scaled_refl_files: stats = self._compute_scaler_statistics( self._scalr_scaled_reflection_files['mtz_unmerged'][key], selected_band=(highest_suggested_resolution, None), wave=key) self._scalr_statistics[ (self._scalr_pname, self._scalr_xname, key)] = stats sc = self._updated_aimless() sc.set_hklin(self._prepared_reflections) sc.set_scales_file(scales_file) self._wavelengths_in_order = [] for epoch in epochs: si = self._sweep_handler.get_sweep_information(epoch) pname, xname, dname = si.get_project_info() sname = si.get_sweep_name() start, end = si.get_batch_range() resolution_limit, _ = self._scalr_resolution_limits[(dname, sname)] sc.add_run(start, end, exclude=False, resolution=resolution_limit, name=sname) if not dname in self._wavelengths_in_order: self._wavelengths_in_order.append(dname) sc.set_hklout(os.path.join(self.get_working_directory(), '%s_%s_chef.mtz' % \ (self._scalr_pname, self._scalr_xname))) sc.set_chef_unmerged(True) if self.get_scaler_anomalous(): sc.set_anomalous() sc.scale() if not PhilIndex.params.dials.fast_mode: try: self._generate_absorption_map(sc) except Exception as e: # Map generation may fail for number of reasons, eg. matplotlib borken Debug.write("Could not generate absorption map (%s)" % e)