def bravais_lattice_to_space_groups(chiral_only=True): from cctbx import sgtbx from cctbx.sgtbx import bravais_types from libtbx.containers import OrderedDict bravais_lattice_to_sg = OrderedDict() for sgn in range(230): sg = sgtbx.space_group_info(number=sgn+1).group() if (not chiral_only) or (sg.is_chiral()): bravais_lattice = bravais_types.bravais_lattice(group=sg) bravais_lattice_to_sg.setdefault(str(bravais_lattice), []) bravais_lattice_to_sg[str(bravais_lattice)].append(sg) return bravais_lattice_to_sg
def unique_beams(self): ''' Iterate through unique beams. ''' from dxtbx.imageset import ImageSweep from libtbx.containers import OrderedDict obj = OrderedDict() for iset in self._imagesets: if isinstance(iset, ImageSweep): obj[iset.get_beam()] = None else: for i in range(len(iset)): obj[iset.get_beam(i)] = None return obj.keys()
class cctbx_data_structures_from_cif(object): def __init__(self, file_object=None, file_path=None, cif_model=None, data_structure_builder=None, data_block_name=None, base_array_info=None, **kwds): assert file_object is None or cif_model is None if data_structure_builder is None: data_structure_builders = ( builders.miller_array_builder, builders.crystal_structure_builder) else: assert data_structure_builder in ( builders.miller_array_builder, builders.crystal_structure_builder) data_structure_builders = (data_structure_builder,) self.xray_structures = OrderedDict() self.miller_arrays = OrderedDict() if cif_model is None: cif_model = reader(file_path=file_path, file_object=file_object).model() if not len(cif_model): raise Sorry("No data block found in CIF") if data_block_name is not None and not data_block_name in cif_model: if (file_path is None): msg = 'Unknown CIF data block name: "%s"' % data_block_name else: msg = 'Unknown CIF data block name "%s" in file: "%s"' % ( data_block_name, file_path) raise RuntimeError(msg) errors = [] wavelengths = {} for key, block in cif_model.items(): if data_block_name is not None and key != data_block_name: continue for builder in data_structure_builders: if builder == builders.crystal_structure_builder: if '_atom_site_fract_x' in block or '_atom_site_Cartn_x' in block: self.xray_structures.setdefault(key, builder(block).structure) elif builder == builders.miller_array_builder: block_wavelengths = builders.get_wavelengths(block) if (block_wavelengths is not None) : wavelengths = block_wavelengths if base_array_info is not None: base_array_info = base_array_info.customized_copy(labels=[key]) if ( '_refln_index_h' in block or '_refln.index_h' in block or '_diffrn_refln' in block ): self.miller_arrays.setdefault( key, builder(block, base_array_info=base_array_info, wavelengths=wavelengths).arrays())
class image_data_cache(object): def __init__(self, imageset, size=10): self.imageset = imageset self.size = size self._image_data = OrderedDict() def __getitem__(self, i): image_data = self._image_data.get(i) if image_data is None: image_data = self.imageset.get_raw_data(i) if len(self._image_data) >= self.size: # remove the oldest entry in the cache del self._image_data[self._image_data.keys()[0]] self._image_data[i] = image_data return image_data
def _unique_detectors_dict(self): ''' Returns an ordered dictionary of detector objects. ''' from dxtbx.imageset import ImageSweep from libtbx.containers import OrderedDict obj = OrderedDict() for iset in self._imagesets: if isinstance(iset, ImageSweep): obj[iset.get_detector()] = None else: for i in range(len(iset)): obj[iset.get_detector(i)] = None detector_id = 0 for detector in obj.keys(): obj[detector] = detector_id detector_id = detector_id + 1 return obj
def unique_scans(self): ''' Iterate through unique scans. ''' from dxtbx.imageset import ImageSweep from libtbx.containers import OrderedDict obj = OrderedDict() for iset in self._imagesets: if isinstance(iset, ImageSweep): obj[iset.get_scan()] = None else: for i in range(len(iset)): try: model = iset.get_scan(i) if model is not None: obj[model] = None except Exception: pass return obj.keys()
def exercise_odict(): from libtbx.containers import OrderedDict as odict d = odict([('banana',3), ('apple',4), ('pear',1)]) d.setdefault('orange', 2) assert d.has_key('orange') assert d['orange'] == 2 assert d.keys() == ['banana', 'apple', 'pear', 'orange'] assert d.values() == [3, 4, 1, 2] d = odict.fromkeys(('b','c','a')) assert d.keys() == ['b', 'c', 'a']
def __init__(self, header=None, data=None): self._columns = OrderedDict() self.keys_lower = {} if header is not None: for key in header: self.setdefault(key, flex.std_string()) if data is not None: # the number of data items must be an exact multiple of the number of headers assert len(data) % len(header) == 0, "Wrong number of data items for loop" n_rows = len(data)//len(header) n_columns = len(header) for i in range(n_rows): self.add_row([data[i*n_columns+j] for j in range(n_columns)]) elif header is None and data is not None: assert isinstance(data, dict) or isinstance(data, OrderedDict) self.add_columns(data) self.keys_lower = dict( [(key.lower(), key) for key in self._columns.keys()])
def __init__(self, unmerged_intensities, batches_all, n_bins=20, d_min=None, id_to_batches=None): sel = unmerged_intensities.sigmas() > 0 unmerged_intensities = unmerged_intensities.select(sel) batches_all = batches_all.select(sel) unmerged_intensities.setup_binner(n_bins=n_bins) unmerged_intensities.show_summary() self.unmerged_intensities = unmerged_intensities self.merged_intensities = unmerged_intensities.merge_equivalents().array() separate = separate_unmerged( unmerged_intensities, batches_all, id_to_batches=id_to_batches) self.intensities = separate.intensities self.batches = separate.batches run_id_to_batch_id = separate.run_id_to_batch_id self.individual_merged_intensities = OrderedDict() for k in self.intensities.keys(): self.intensities[k] = self.intensities[k].resolution_filter(d_min=d_min) self.batches[k] = self.batches[k].resolution_filter(d_min=d_min) self.individual_merged_intensities[k] = self.intensities[k].merge_equivalents().array() if run_id_to_batch_id is not None: labels = run_id_to_batch_id.values() else: labels = None racc = self.relative_anomalous_cc() if racc is not None: self.plot_relative_anomalous_cc(racc, labels=labels) correlation_matrix, linkage_matrix = self.compute_correlation_coefficient_matrix() self._cluster_dict = self.to_dict(correlation_matrix, linkage_matrix) self.plot_cc_matrix(correlation_matrix, linkage_matrix, labels=labels) self.write_output()
def __init__(self, xinfo_file, sweep_ids=None, sweep_ranges=None): '''Initialise myself from an input .xinfo file.''' # first initialise all of the data structures which will hold the # information... self._project = None self._crystals = OrderedDict() if sweep_ids is not None: sweep_ids = [s.lower() for s in sweep_ids] if sweep_ranges is not None: assert sweep_ids is not None assert len(sweep_ids) == len(sweep_ranges) self._sweep_ids = sweep_ids self._sweep_ranges = sweep_ranges # read the contents of the xinfo file self._parse_project(xinfo_file) self._validate() return
def run(args): from dials.util.options import OptionParser from dials.util.options import flatten_experiments from dials.util.options import flatten_datablocks from dials.util.options import flatten_reflections import libtbx.load_env usage = "%s [options] datablock.json | experiments.json | image_*.cbf" % ( libtbx.env.dispatcher_name) parser = OptionParser(usage=usage, phil=phil_scope, read_experiments=True, read_datablocks=True, read_datablocks_from_images=True, read_reflections=True, check_format=False, epilog=help_message) params, options = parser.parse_args(show_diff_phil=True) experiments = flatten_experiments(params.input.experiments) datablocks = flatten_datablocks(params.input.datablock) reflections = flatten_reflections(params.input.reflections) if len(datablocks) == 0 and len(experiments) == 0 and len( reflections) == 0: parser.print_help() exit() for i_expt, expt in enumerate(experiments): print "Experiment %i:" % i_expt print str(expt.detector) print 'Max resolution (at corners): %f' % ( expt.detector.get_max_resolution(expt.beam.get_s0())) print 'Max resolution (inscribed): %f' % ( expt.detector.get_max_inscribed_resolution(expt.beam.get_s0())) if params.show_panel_distance: for ipanel, panel in enumerate(expt.detector): from scitbx import matrix fast = matrix.col(panel.get_fast_axis()) slow = matrix.col(panel.get_slow_axis()) normal = fast.cross(slow).normalize() origin = matrix.col(panel.get_origin()) distance = origin.dot(normal) fast_origin = -(origin - distance * normal).dot(fast) slow_origin = -(origin - distance * normal).dot(slow) print 'Panel %d: distance %.2f origin %.2f %.2f' % \ (ipanel, distance, fast_origin, slow_origin) print '' print '' print show_beam(expt.detector, expt.beam) if expt.scan is not None: print expt.scan if expt.goniometer is not None: print expt.goniometer expt.crystal.show(show_scan_varying=params.show_scan_varying) if expt.crystal.num_scan_points: from scitbx.array_family import flex from cctbx import uctbx abc = flex.vec3_double() angles = flex.vec3_double() for n in range(expt.crystal.num_scan_points): a, b, c, alpha, beta, gamma = expt.crystal.get_unit_cell_at_scan_point( n).parameters() abc.append((a, b, c)) angles.append((alpha, beta, gamma)) a, b, c = abc.mean() alpha, beta, gamma = angles.mean() mean_unit_cell = uctbx.unit_cell((a, b, c, alpha, beta, gamma)) print " Average unit cell: %s" % mean_unit_cell print if expt.profile is not None: print expt.profile for datablock in datablocks: if datablock.format_class() is not None: print 'Format: %s' % datablock.format_class() imagesets = datablock.extract_imagesets() for imageset in imagesets: try: print imageset.get_template() except Exception: pass detector = imageset.get_detector() print str(detector) print 'Max resolution (at corners): %f' % ( detector.get_max_resolution(imageset.get_beam().get_s0())) print 'Max resolution (inscribed): %f' % ( detector.get_max_inscribed_resolution( imageset.get_beam().get_s0())) if params.show_panel_distance: for ipanel, panel in enumerate(detector): from scitbx import matrix fast = matrix.col(panel.get_fast_axis()) slow = matrix.col(panel.get_slow_axis()) normal = fast.cross(slow) origin = matrix.col(panel.get_origin()) distance = origin.dot(normal) fast_origin = -(origin - distance * normal).dot(fast) slow_origin = -(origin - distance * normal).dot(slow) print 'Panel %d: distance %.2f origin %.2f %.2f' % \ (ipanel, distance, fast_origin, slow_origin) print '' print '' print show_beam(detector, imageset.get_beam()) if imageset.get_scan() is not None: print imageset.get_scan() if imageset.get_goniometer() is not None: print imageset.get_goniometer() from libtbx.containers import OrderedDict, OrderedSet formats = OrderedDict([ ('miller_index', '%i, %i, %i'), ('d', '%.2f'), ('dqe', '%.3f'), ('id', '%i'), ('imageset_id', '%i'), ('panel', '%i'), ('flags', '%i'), ('background.mean', '%.1f'), ('background.dispersion', '%.1f'), ('background.mse', '%.1f'), ('background.sum.value', '%.1f'), ('background.sum.variance', '%.1f'), ('intensity.prf.value', '%.1f'), ('intensity.prf.variance', '%.1f'), ('intensity.sum.value', '%.1f'), ('intensity.sum.variance', '%.1f'), ('intensity.cor.value', '%.1f'), ('intensity.cor.variance', '%.1f'), ('lp', '%.3f'), ('num_pixels.background', '%i'), ('num_pixels.background_used', '%i'), ('num_pixels.foreground', '%i'), ('num_pixels.valid', '%i'), ('partial_id', '%i'), ('partiality', '%.4f'), ('profile.correlation', '%.3f'), ('profile.rmsd', '%.3f'), ('xyzcal.mm', '%.2f, %.2f, %.2f'), ('xyzcal.px', '%.2f, %.2f, %.2f'), ('delpsical.rad', '%.3f'), ('delpsical2', '%.3f'), ('delpsical.weights', '%.3f'), ('xyzobs.mm.value', '%.2f, %.2f, %.2f'), ('xyzobs.mm.variance', '%.4e, %.4e, %.4e'), ('xyzobs.px.value', '%.2f, %.2f, %.2f'), ('xyzobs.px.variance', '%.4f, %.4f, %.4f'), ('s1', '%.4f, %.4f, %.4f'), ('rlp', '%.4f, %.4f, %.4f'), ('zeta', '%.3f'), ('x_resid', '%.3f'), ('x_resid2', '%.3f'), ('y_resid', '%.3f'), ('y_resid2', '%.3f'), ('kapton_absorption_correction', '%.3f'), ('kapton_absorption_correction_sigmas', '%.3f'), ]) for rlist in reflections: from cctbx.array_family import flex print print "Reflection list contains %i reflections" % (len(rlist)) if len(rlist) == 0: continue rows = [["Column", "min", "max", "mean"]] for k, col in rlist.cols(): if type(col) in (flex.double, flex.int, flex.size_t): if type(col) in (flex.int, flex.size_t): col = col.as_double() rows.append([ k, formats[k] % flex.min(col), formats[k] % flex.max(col), formats[k] % flex.mean(col) ]) elif type(col) in (flex.vec3_double, flex.miller_index): if type(col) == flex.miller_index: col = col.as_vec3_double() rows.append([ k, formats[k] % col.min(), formats[k] % col.max(), formats[k] % col.mean() ]) from libtbx import table_utils print table_utils.format(rows, has_header=True, prefix="| ", postfix=" |") intensity_keys = ('miller_index', 'd', 'intensity.prf.value', 'intensity.prf.variance', 'intensity.sum.value', 'intensity.sum.variance', 'background.mean', 'profile.correlation', 'profile.rmsd') profile_fit_keys = ( 'miller_index', 'd', ) centroid_keys = ('miller_index', 'd', 'xyzcal.mm', 'xyzcal.px', 'xyzobs.mm.value', 'xyzobs.mm.variance', 'xyzobs.px.value', 'xyzobs.px.variance') keys_to_print = OrderedSet() if params.show_intensities: for k in intensity_keys: keys_to_print.add(k) if params.show_profile_fit: for k in profile_fit_keys: keys_to_print.add(k) if params.show_centroids: for k in centroid_keys: keys_to_print.add(k) if params.show_all_reflection_data: for k in formats: keys_to_print.add(k) def format_column(key, data, format_strings=None): if isinstance(data, flex.vec3_double): c_strings = [ c.as_string(format_strings[i].strip()) for i, c in enumerate(data.parts()) ] elif isinstance(data, flex.miller_index): c_strings = [ c.as_string(format_strings[i].strip()) for i, c in enumerate(data.as_vec3_double().parts()) ] elif isinstance(data, flex.size_t): c_strings = [data.as_int().as_string(format_strings[0].strip())] else: c_strings = [data.as_string(format_strings[0].strip())] column = flex.std_string() max_element_lengths = [c.max_element_length() for c in c_strings] for i in range(len(c_strings[0])): column.append(('%%%is' % len(key)) % ', '.join( ('%%%is' % max_element_lengths[j]) % c_strings[j][i] for j in range(len(c_strings)))) return column if keys_to_print: keys = [k for k in keys_to_print if k in rlist] rows = [keys] max_reflections = len(rlist) if params.max_reflections is not None: max_reflections = min(len(rlist), params.max_reflections) columns = [] for k in keys: columns.append( format_column(k, rlist[k], format_strings=formats[k].split(','))) print print "Printing %i of %i reflections:" % (max_reflections, len(rlist)) for j in range(len(columns)): key = keys[j] width = max(len(key), columns[j].max_element_length()) print("%%%is" % width) % key, print for i in range(max_reflections): for j in range(len(columns)): print columns[j][i], print return
class miller_array_builder(crystal_symmetry_builder): # Changes to this class should pass regression tests: # cctbx_project\mmtbx\regression\tst_cif_as_mtz_wavelengths.py # cctbx_project\iotbx\cif\tests\tst_lex_parse_build.py # phenix_regression\cif_as_mtz\tst_cif_as_mtz.py observation_types = { # known types of column data to be tagged as either amplitudes or intensities as per # https://www.iucr.org/__data/iucr/cifdic_html/2/cif_mm.dic/index.html '_refln.F_squared': xray.intensity(), '_refln_F_squared': xray.intensity(), '_refln.intensity': xray.intensity(), '_refln.I(+)': xray.intensity(), '_refln.I(-)': xray.intensity(), '_refln.F_calc': xray.amplitude(), '_refln.F_meas': xray.amplitude(), '_refln.FP': xray.amplitude(), '_refln.F-obs': xray.amplitude(), '_refln.Fobs': xray.amplitude(), '_refln.F-calc': xray.amplitude(), '_refln.Fcalc': xray.amplitude(), '_refln.pdbx_F_': xray.amplitude(), '_refln.pdbx_I_': xray.intensity(), '_refln.pdbx_anom_difference': xray.amplitude(), } def guess_observationtype(self, labl): for okey in self.observation_types.keys(): if labl.startswith(okey): return self.observation_types[okey] return None def __init__(self, cif_block, base_array_info=None, wavelengths=None): crystal_symmetry_builder.__init__(self, cif_block) self._arrays = OrderedDict() self._origarrays = OrderedDict( ) # used for presenting raw data tables in HKLviewer basearraylabels = [] if base_array_info is not None: self.crystal_symmetry = self.crystal_symmetry.join_symmetry( other_symmetry=base_array_info.crystal_symmetry_from_file, force=True) if base_array_info.labels: basearraylabels = base_array_info.labels if (wavelengths is None): wavelengths = {} if base_array_info is None: base_array_info = miller.array_info(source_type="cif") refln_containing_loops = self.get_miller_indices_containing_loops() for self.indices, refln_loop in refln_containing_loops: self.wavelength_id_array = None self.crystal_id_array = None self.scale_group_array = None wavelength_ids = [None] crystal_ids = [None] scale_groups = [None] for key, value in six.iteritems(refln_loop): # Get wavelength_ids, crystal_id, scale_group_code columns for selecting data of other # columns in self.get_selection() used by self.flex_std_string_as_miller_array() if (key.endswith('wavelength_id') or key.endswith('crystal_id') or key.endswith('scale_group_code')): data = as_int_or_none_if_all_question_marks( value, column_name=key) if data is None: continue counts = data.counts() if key.endswith('wavelength_id'): wavelength_ids = list(counts.keys()) if len(counts) == 1: continue array = miller.array( miller.set(self.crystal_symmetry, self.indices).auto_anomalous(), data) if key.endswith('wavelength_id'): self.wavelength_id_array = array wavelength_ids = list(counts.keys()) elif key.endswith('crystal_id'): self.crystal_id_array = array crystal_ids = list(counts.keys()) elif key.endswith('scale_group_code'): self.scale_group_array = array scale_groups = list(counts.keys()) labelsuffix = [] wavelbl = [] cryslbl = [] scalegrplbl = [] self._origarrays["HKLs"] = self.indices alllabels = list(sorted(refln_loop.keys())) remaininglabls = alllabels[:] # deep copy the list # Parse labels matching cif column conventions # https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/refln.html # and extract groups of labels or just single columns. # Groups corresponds to the map coefficients, phase and amplitudes, # amplitudes or intensities with sigmas and hendrickson-lattman columns. phaseamplabls, remaininglabls = self.get_phase_amplitude_labels( remaininglabls) mapcoefflabls, remaininglabls = self.get_mapcoefficient_labels( remaininglabls) HLcoefflabls, remaininglabls = self.get_HL_labels(remaininglabls) data_sig_obstype_labls, remaininglabls = self.get_FSigF_ISigI_labels( remaininglabls) for w_id in wavelength_ids: for crys_id in crystal_ids: for scale_group in scale_groups: # If reflection data files contain more than one crystal, wavelength or scalegroup # then add their id(s) as a suffix to data labels computed below. Needed for avoiding # ambuguity but avoid when not needed to make labels more human readable! if (len(wavelength_ids) > 1 or len(wavelengths) > 1) and w_id is not None: wavelbl = ["wavelength_id=%i" % w_id] if len(crystal_ids) > 1 and crys_id is not None: cryslbl = ["crystal_id=%i" % crys_id] if len(scale_groups) > 1 and scale_group is not None: scalegrplbl = ["scale_group_code=%i" % scale_group] labelsuffix = scalegrplbl + cryslbl + wavelbl jlablsufx = "" if len(labelsuffix): jlablsufx = "," + ",".join(labelsuffix) for mapcoefflabl in mapcoefflabls: A_array = refln_loop[mapcoefflabl[0]] B_array = refln_loop[mapcoefflabl[1]] # deselect any ? marks in the two arrays, assuming both A and B have the same ? marks selection = self.get_selection( A_array, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) A_array = A_array.select(selection) B_array = B_array.select(selection) # form the miller array with map coefficients data = flex.complex_double(flex.double(A_array), flex.double(B_array)) millarr = miller.array( miller.set(self.crystal_symmetry, self.indices.select( selection)).auto_anomalous(), data) # millarr will be None for column data not matching w_id,crys_id,scale_group values if millarr is None: continue labl = basearraylabels + mapcoefflabl + labelsuffix millarr.set_info( base_array_info.customized_copy( labels=labl, wavelength=wavelengths.get(w_id, None))) self._arrays[mapcoefflabl[0] + jlablsufx] = millarr for phaseamplabl in phaseamplabls: amplitudestrarray = refln_loop[phaseamplabl[0]] phasestrarray = refln_loop[phaseamplabl[1]] millarr = self.flex_std_string_as_miller_array( amplitudestrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) phasesmillarr = self.flex_std_string_as_miller_array( phasestrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) # millarr will be None for column data not matching w_id,crys_id,scale_group values if millarr is None or phasesmillarr is None: continue phases = as_flex_double(phasesmillarr, phaseamplabl[1]) millarr = millarr.phase_transfer(phases, deg=True) labl = basearraylabels + phaseamplabl + labelsuffix millarr.set_info( base_array_info.customized_copy( labels=labl, wavelength=wavelengths.get(w_id, None))) self._arrays[phaseamplabl[0] + jlablsufx] = millarr for datlabl, siglabl, otype in data_sig_obstype_labls: datastrarray = refln_loop[datlabl] millarr = self.flex_std_string_as_miller_array( datastrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) # millarr will be None for column data not matching w_id,crys_id,scale_group values if millarr is None: continue millarr = as_flex_double(millarr, datlabl) datsiglabl = [datlabl] if siglabl: sigmasstrarray = refln_loop[siglabl] sigmas = self.flex_std_string_as_miller_array( sigmasstrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) sigmas = as_flex_double(sigmas, siglabl) millarr.set_sigmas(sigmas.data()) datsiglabl = [datlabl, siglabl] datsiglabl = basearraylabels + datsiglabl + labelsuffix millarr.set_info( base_array_info.customized_copy( labels=datsiglabl, wavelength=wavelengths.get(w_id, None))) if otype is not None: millarr.set_observation_type(otype) self._arrays[datlabl + jlablsufx] = millarr for hl_labels in HLcoefflabls: hl_values = [ cif_block.get(hl_key) for hl_key in hl_labels ] if hl_values.count(None) == 0: selection = self.get_selection( hl_values[0], wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) hl_values = [ as_double_or_none_if_all_question_marks( hl.select(selection), column_name=lab) for hl, lab in zip(hl_values, hl_labels) ] # hl_values will be None for column data not matching w_id,crys_id,scale_group values if hl_values == [None, None, None, None]: continue millarr = miller.array( miller.set( self.crystal_symmetry, self.indices.select( selection)).auto_anomalous(), flex.hendrickson_lattman(*hl_values)) hlabels = basearraylabels + hl_labels + labelsuffix millarr.set_info( base_array_info.customized_copy( labels=hlabels, wavelength=wavelengths.get(w_id, None))) self._arrays[hl_labels[0] + jlablsufx] = millarr # pick up remaining columns if any that weren't identified above for label in alllabels: if "index_" in label: continue datastrarray = refln_loop[label] if label in remaininglabls: labels = basearraylabels + [label ] + labelsuffix lablsufx = jlablsufx millarr = self.flex_std_string_as_miller_array( datastrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) # millarr will be None for column data not matching w_id,crys_id,scale_group values if (label.endswith( 'wavelength_id' ) or label.endswith( 'crystal_id' ) or # get full array if any of these labels, not just subsets label.endswith('scale_group_code')): millarr = self.flex_std_string_as_miller_array( datastrarray, wavelength_id=None, crystal_id=None, scale_group_code=None) lablsufx = "" labels = basearraylabels + [label] if millarr is None: continue otype = self.guess_observationtype(label) if otype is not None: millarr.set_observation_type(otype) millarr.set_info( base_array_info.customized_copy( labels=labels, wavelength=wavelengths.get(w_id, None))) self._arrays[label + lablsufx] = millarr origarr = self.flex_std_string_as_miller_array( datastrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) newlabel = label.replace("_refln.", "") newlabel2 = newlabel.replace("_refln_", "") if origarr: # want only genuine miller arrays self._origarrays[newlabel2 + jlablsufx] = origarr.data() # Convert any groups of I+,I-,SigI+,SigI- (or amplitudes) arrays into anomalous arrays # i.e. both friedel mates in the same array for key, array in six.iteritems(self._arrays.copy()): plus_key = "" if '_minus' in key: minus_key = key plus_key = key.replace('_minus', '_plus') elif '-' in key: minus_key = key plus_key = key.replace('-', '+') elif '_plus' in key: plus_key = key minus_key = key.replace('_plus', '_minus') elif '+' in key: plus_key = key minus_key = key.replace('+', '-') if plus_key in self._arrays and minus_key in self._arrays: plus_array = self._arrays.pop(plus_key) minus_array = self._arrays.pop(minus_key) minus_array = minus_array.customized_copy( indices=-minus_array.indices()).set_info( minus_array.info()) array = plus_array.concatenate( minus_array, assert_is_similar_symmetry=False) array = array.customized_copy(anomalous_flag=True) array.set_info(minus_array.info().customized_copy(labels=list( OrderedSet(plus_array.info().labels + minus_array.info().labels)))) array.set_observation_type(plus_array.observation_type()) self._arrays.setdefault(key, array) if len(self._arrays) == 0: raise CifBuilderError("No reflection data present in cif block") # Sort the ordered dictionary to resemble the order of columns in the cif file # This is to avoid any F_meas arrays accidentally being put adjacent to # pdbx_anom_difference arrays in the self._arrays OrderedDict. Otherwise these # arrays may unintentionally be combined into a reconstructed anomalous amplitude # array when saving as an mtz file due to a problem in the iotbx/mtz module. # See http://phenix-online.org/pipermail/cctbxbb/2021-March/002289.html arrlstord = [] arrlst = list(self._arrays) for arr in arrlst: for i, k in enumerate(refln_loop.keys()): if arr.split(",")[0] == k: arrlstord.append((arr, i)) # arrlstord must have the same keys as in the self._arrays dictionary assert sorted(arrlst) == sorted([e[0] for e in arrlstord]) sortarrlst = sorted(arrlstord, key=lambda arrord: arrord[1]) self._ordarrays = OrderedDict() for sortkey, i in sortarrlst: self._ordarrays.setdefault(sortkey, self._arrays[sortkey]) self._arrays = self._ordarrays def get_HL_labels(self, keys): lstkeys = list(keys) # cast into list if not a list HLquads = [] alllabels = " ".join(lstkeys) """ Hendrickson-Lattmann labels could look like: 'HLAM', 'HLBM', 'HLCM', 'HLDM' or like 'HLanomA', 'HLanomB', 'HLanomC', 'HLanomD' Use a regular expression to group them accordingly """ allmatches = re.findall(r"(\S*(HL(\S*)[abcdABCD](\S*)))", alllabels) HLtagslst = list(set([(e[2], e[3]) for e in allmatches])) usedkeys = [] for m in HLtagslst: hllist = [] for hm in allmatches: if m == (hm[2], hm[3]): hllist.append((hm[0], hm[1])) if len(hllist) == 4: HLquads.append([e[0] for e in hllist]) for e in hllist: usedkeys.append(e[0]) remainingkeys = [] for e in lstkeys: if e not in usedkeys: remainingkeys.append(e) return HLquads, remainingkeys def get_mapcoefficient_labels(self, keys): # extract map coeffficients labels from list of cif column labels # e.g. ( _refln.A_calc_au _refln.B_calc_au ) , ( _refln.A_calc _refln.B_calc ) lstkeys = list(keys) # cast into list if not a list remainingkeys = lstkeys[:] # deep copy the list alllabels = " ".join(lstkeys) mapcoefflabels = [] A_matches = re.findall( r"( (\s*_refln[\._]A_)(\S*) )", alllabels, re.VERBOSE ) # [('_refln.PHWT', '_refln.PH', 'WT'), ('_refln.PHDELWT', '_refln.PH', 'DELWT')] for label in lstkeys: for m in A_matches: Blabel = m[1].replace("A_", "B_") + m[2] if Blabel == label: mapcoefflabels.append([m[0], label]) remainingkeys.remove(m[0]) remainingkeys.remove(label) return mapcoefflabels, remainingkeys def get_phase_amplitude_labels(self, keys): # extract phase and amplitudes labels from list of cif column labels # e.g. ( _refln.F_calc _refln.phase_calc ) , ( _refln.FC_ALL _refln.PHIC_ALL ), ( _refln.FWT _refln.PHWT ) lstkeys = list(keys) # cast into list if not a list remainingkeys = lstkeys[:] # deep copy the list alllabels = " ".join(lstkeys) phase_amplitudelabels = [] PHmatches = re.findall( r"((\S*PH)([^I]\S*))", alllabels ) # [('_refln.PHWT', '_refln.PH', 'WT'), ('_refln.PHDELWT', '_refln.PH', 'DELWT')] for label in lstkeys: for m in PHmatches: PFlabel = m[1].replace("PH", "F") + m[2] Flabel = m[1].replace("PH", "") + m[2] if Flabel == label or PFlabel == label: phase_amplitudelabels.append([label, m[0]]) remainingkeys.remove(label) remainingkeys.remove(m[0]) alllabels = " ".join(remainingkeys) PHImatches = re.findall( r"((\S*PHI)(\S*))", alllabels ) # [('_refln.PHIC', '_refln.PHI', 'C'), ('_refln.PHIC_ALL', '_refln.PHI', 'C_ALL')] for label in lstkeys: for m in PHImatches: PFlabel = m[1].replace("PHI", "F") + m[2] Flabel = m[1].replace("PHI", "") + m[2] if Flabel == label or PFlabel == label: phase_amplitudelabels.append([label, m[0]]) remainingkeys.remove(label) remainingkeys.remove(m[0]) alllabels = " ".join(remainingkeys) PHDELmatches = re.findall( r"(((\S*)PH)([^I]\S*(WT)))", alllabels ) # [('_refln.PHDELWT', '_refln.PH', '_refln.', 'DELWT', 'WT')] for label in lstkeys: for m in PHDELmatches: Flabel = m[2] + m[3].replace("WT", "FWT") if Flabel == label: phase_amplitudelabels.append([label, m[0]]) remainingkeys.remove(label) remainingkeys.remove(m[0]) alllabels = " ".join(remainingkeys) phase_matches = re.findall( r"((\S*[\._])phase(\S*))", alllabels) # [('_refln.phase_calc', '_refln.', '')] for label in lstkeys: for m in phase_matches: phaselabel = m[0] Flabl = m[1] + m[2] Flabel = m[1] + "F" + m[2] Faulabel = m[1] + "F" + m[2] + "_au" if Flabl in label or Flabel in label or Faulabel in label: # in case of _refln.F_calc_au and _refln.phase_calc if label in remainingkeys and m[ 0] in remainingkeys: # in case if (Flabel + "_sigma_au") in remainingkeys or ( Flabel + "_sigma") in remainingkeys: continue # give priority to F_meas, F_meas_sigma or F_meas_au, F_meas_sigma_au phase_amplitudelabels.append([label, m[0]]) remainingkeys.remove(label) remainingkeys.remove(m[0]) return phase_amplitudelabels, remainingkeys def get_FSigF_ISigI_labels(self, keys): # extract amplitudea, sigmas or intensitiy, sigmas labels from list of cif column labels # e.g. ( _refln.F_meas_sigma_au _refln.F_meas), ( _refln.intensity_sigma _refln.intensity ) , # ( _refln.pdbx_I_plus_sigma _refln.pdbx_I_plus ) lstkeys = list(keys) # cast into list if not a list remainingkeys = lstkeys[:] # deep copy the list alllabels = " ".join(lstkeys) labelpairs = [] sigma_matches = re.findall( r"((\S*[\._])SIG(\S*))", alllabels) # catch label pairs like F(+),SIGF(+) for label in lstkeys: for m in sigma_matches: FIlabel = m[1] + m[2] if FIlabel == label: labelpairs.append( [label, m[0], self.guess_observationtype(label)]) remainingkeys.remove(label) remainingkeys.remove(m[0]) alllabels = " ".join(remainingkeys) sigma_matches = re.findall( r"((\S*)_sigma(_*\S*))", alllabels ) # [('_refln.F_meas_sigma_au', '_refln.F_meas', '_au'), ('_refln.intensity_sigma', '_refln.intensity', ''), ('_refln.pdbx_I_plus_sigma', '_refln.pdbx_I_plus', '')] for label in lstkeys: for m in sigma_matches: FIlabel = m[1] + m[2] if FIlabel == label: labelpairs.append( [label, m[0], self.guess_observationtype(label)]) remainingkeys.remove(label) remainingkeys.remove(m[0]) alllabels = " ".join(remainingkeys) # catch generic meas and sigma labels anymeas_matches = re.findall(r"((\S*)_meas(\S*))", alllabels) + re.findall( r"((\S*)_calc(\S*))", alllabels) anysigma_matches = re.findall(r"((\S*)_sigma(\S*))", alllabels) for mmatch in anymeas_matches: for smatch in anysigma_matches: if mmatch[1] == smatch[1] and mmatch[2] == smatch[2]: remainingkeys.remove(mmatch[0]) if smatch[ 0] in remainingkeys: # in case of say F_squared_calc, F_squared_meas, F_squared_sigma all being present remainingkeys.remove(smatch[0]) labelpairs.append([ mmatch[0], smatch[0], self.guess_observationtype(mmatch[0]) ]) else: labelpairs.append([ mmatch[0], None, self.guess_observationtype(mmatch[0]) ]) return labelpairs, remainingkeys def get_miller_indices_containing_loops(self): loops = [] for loop in self.cif_block.loops.values(): for key in loop.keys(): if 'index_h' not in key: continue hkl_str = [ loop.get(key.replace('index_h', 'index_%s' % i)) for i in 'hkl' ] if hkl_str.count(None) > 0: raise CifBuilderError( "Miller indices missing from current CIF block (%s)" % key.replace('index_h', 'index_%s' % 'hkl'[hkl_str.index(None)])) hkl_int = [] for i, h_str in enumerate(hkl_str): try: h_int = flex.int(h_str) except ValueError as e: raise CifBuilderError( "Invalid item for Miller index %s: %s" % ("HKL"[i], str(e))) hkl_int.append(h_int) indices = flex.miller_index(*hkl_int) loops.append((indices, loop)) break return loops def get_selection(self, value, wavelength_id=None, crystal_id=None, scale_group_code=None): selection = ~((value == '.') | (value == '?')) if self.wavelength_id_array is not None and wavelength_id is not None: selection &= (self.wavelength_id_array.data() == wavelength_id) if self.crystal_id_array is not None and crystal_id is not None: selection &= (self.crystal_id_array.data() == crystal_id) if self.scale_group_array is not None and scale_group_code is not None: selection &= (self.scale_group_array.data() == scale_group_code) return selection def flex_std_string_as_miller_array(self, value, wavelength_id=None, crystal_id=None, scale_group_code=None): # Create a miller_array object of only the data and indices matching the # wavelength_id, crystal_id and scale_group_code submitted or full array if these are None selection = self.get_selection(value, wavelength_id=wavelength_id, crystal_id=crystal_id, scale_group_code=scale_group_code) data = value.select(selection) #if not isinstance(data, flex.double): try: data = flex.int(data) indices = self.indices.select(selection) except ValueError: try: data = flex.double(data) indices = self.indices.select(selection) except ValueError: # if flex.std_string return all values including '.' and '?' data = value indices = self.indices if data.size() == 0: return None return miller.array( miller.set(self.crystal_symmetry, indices).auto_anomalous(), data) def arrays(self): return self._arrays def origarrays(self): """ return dictionary of raw data found in cif file cast into flex.double arrays or just string arrays as a fall back. """ return self._origarrays
def get_raw_data(self): if self._raw_data is None: import numpy from scitbx.array_family import flex from libtbx.containers import OrderedDict self._raw_data = [] cbf = self._get_cbf_handle() cbf.find_category('array_structure') cbf.find_column('encoding_type') cbf.select_row(0) types = [] for i in xrange(cbf.count_rows()): types.append(cbf.get_value()) cbf.next_row() assert len(types) == cbf.count_rows() # read the data data = OrderedDict() cbf.find_category("array_data") for i in xrange(cbf.count_rows()): cbf.find_column("array_id") name = cbf.get_value() cbf.find_column("data") assert cbf.get_typeofvalue().find('bnry') > -1 if types[i] == 'signed 32-bit integer': array_string = cbf.get_integerarray_as_string() array = flex.int( numpy.fromstring(array_string, numpy.int32)) parameters = cbf.get_integerarrayparameters_wdims_fs() array_size = (parameters[11], parameters[10], parameters[9]) elif types[i] == 'signed 64-bit real IEEE': array_string = cbf.get_realarray_as_string() array = flex.double( numpy.fromstring(array_string, numpy.float)) parameters = cbf.get_realarrayparameters_wdims_fs() array_size = (parameters[7], parameters[6], parameters[5]) else: return None # type not supported array.reshape(flex.grid(*array_size)) data[name] = array cbf.next_row() # extract the data for each panel if cbf.has_sections(): section_shapes = OrderedDict() for i in xrange(cbf.count_rows()): cbf.find_column("id") section_name = cbf.get_value() if not section_name in section_shapes: section_shapes[section_name] = {} cbf.find_column("array_id") if not "array_id" in section_shapes[section_name]: section_shapes[section_name][ "array_id"] = cbf.get_value() else: assert section_shapes[section_name][ "array_id"] == cbf.get_value() cbf.find_column("index") axis_index = int(cbf.get_value()) - 1 cbf.find_column("start") axis_start = int(cbf.get_value()) - 1 cbf.find_column("end") axis_end = int(cbf.get_value()) section_shapes[section_name][axis_index] = slice( axis_start, axis_end) cbf.next_row() for section_name in section_shapes: section_shape = section_shapes[section_name] section = data[section_shape["array_id"]][ \ section_shape[2], section_shape[1], section_shape[0]] section.reshape( flex.grid(section.focus()[-2], section.focus()[-1])) self._raw_data.append(section) else: for key in data: data[key].reshape( flex.grid(data[key].focus()[-2], data[key].focus()[-1])) self._raw_data.append(data[key]) d = self.get_detector() assert len(d) == len(self._raw_data) return tuple(self._raw_data)
def __init__(self, pdb_hierarchy, sequences, alignment_params=None, crystal_symmetry=None, coordinate_precision=5, occupancy_precision=3, b_iso_precision=5, u_aniso_precision=5): pdb_hierarchy_as_cif_block.__init__( self, pdb_hierarchy, crystal_symmetry=crystal_symmetry, coordinate_precision=coordinate_precision, occupancy_precision=occupancy_precision, b_iso_precision=b_iso_precision, u_aniso_precision=u_aniso_precision) import mmtbx.validation.sequence validation = mmtbx.validation.sequence.validation( pdb_hierarchy=pdb_hierarchy, sequences=sequences, params=alignment_params, extract_residue_groups=True, log=null_out(), # silence output ) entity_loop = iotbx.cif.model.loop(header=( '_entity.id', '_entity.type', #'_entity.src_method', #'_entity.pdbx_description', '_entity.formula_weight', '_entity.pdbx_number_of_molecules', #'_entity.details', #'_entity.pdbx_mutation', #'_entity.pdbx_fragment', #'_entity.pdbx_ec' )) entity_poly_loop = iotbx.cif.model.loop(header=( '_entity_poly.entity_id', '_entity_poly.type', '_entity_poly.nstd_chirality', '_entity_poly.nstd_linkage', '_entity_poly.nstd_monomer', '_entity_poly.pdbx_seq_one_letter_code', '_entity_poly.pdbx_seq_one_letter_code_can', '_entity_poly.pdbx_strand_id', '_entity_poly.type_details' )) entity_poly_seq_loop = iotbx.cif.model.loop(header=( '_entity_poly_seq.entity_id', '_entity_poly_seq.num', '_entity_poly_seq.mon_id', '_entity_poly_seq.hetero', )) sequence_counts = OrderedDict() sequence_to_chain_ids = {} entity_id = 0 sequence_to_entity_id = {} chain_id_to_entity_id = {} sequence_to_chains = {} residue_group_to_seq_num_mapping = {} aligned_pdb_chains = OrderedSet() non_polymer_counts = dict_with_default_0() non_polymer_resname_to_entity_id = OrderedDict() for chain in validation.chains: sequence = chain.alignment.b if sequence not in sequence_to_entity_id: entity_id += 1 sequence_to_entity_id[sequence] = entity_id sequence_counts.setdefault(sequence, 0) sequence_counts[sequence] += 1 sequence_to_chain_ids.setdefault(sequence, []) sequence_to_chain_ids[sequence].append(chain.chain_id) sequence_to_chains.setdefault(sequence, []) sequence_to_chains[sequence].append(chain) chain_id_to_entity_id[chain.chain_id] = sequence_to_entity_id[sequence] aligned_pdb_chains.add(chain.residue_groups[0].parent()) unaligned_pdb_chains = OrderedSet(pdb_hierarchy.chains()) - aligned_pdb_chains assert len(chain.residue_groups) + chain.n_missing_start + chain.n_missing_end == len(sequence) residue_groups = [None] * chain.n_missing_start + chain.residue_groups + [None] * chain.n_missing_end i = chain.n_missing_start seq_num = 0 for i, residue_group in enumerate(residue_groups): if residue_group is None and chain.alignment.b[i] == '-': # a deletion continue seq_num += 1 if residue_group is not None: residue_group_to_seq_num_mapping[ residue_group] = seq_num for pdb_chain in unaligned_pdb_chains: for residue_group in pdb_chain.residue_groups(): for resname in residue_group.unique_resnames(): if resname not in non_polymer_resname_to_entity_id: entity_id += 1 non_polymer_resname_to_entity_id[resname] = entity_id non_polymer_counts[resname] += 1 for sequence, count in sequence_counts.iteritems(): entity_poly_seq_num = 0 entity_id = sequence_to_entity_id[sequence] entity_loop.add_row(( entity_id, 'polymer', #polymer/non-polymer/macrolide/water #'?', #src_method #'?', # pdbx_description '?', # formula_weight len(sequence_to_chains[sequence]), # pdbx_number_of_molecules #'?', # details #'?', # pdbx_mutation #'?', # pdbx_fragment #'?' # pdbx_ec )) # The definition of the cif item _entity_poly.pdbx_seq_one_letter_code # says that modifications and non-standard amino acids should be encoded # as 'X', however in practice the PDB seem to encode them as the three-letter # code in parentheses. pdbx_seq_one_letter_code = [] pdbx_seq_one_letter_code_can = [] chains = sequence_to_chains[sequence] from iotbx.pdb import amino_acid_codes chain = chains[0] matches = chain.alignment.matches() for i, one_letter_code in enumerate(sequence): #Data items in the ENTITY_POLY_SEQ category specify the sequence #of monomers in a polymer. Allowance is made for the possibility #of microheterogeneity in a sample by allowing a given sequence #number to be correlated with more than one monomer ID. The #corresponding ATOM_SITE entries should reflect this #heterogeneity. monomer_id = None if i >= chain.n_missing_start and i < (len(sequence) - chain.n_missing_end): monomer_id = chain.resnames[i-chain.n_missing_start] if monomer_id is None and one_letter_code == '-': continue pdbx_seq_one_letter_code_can.append(one_letter_code) if monomer_id is None: if sequence_to_chains[sequence][0].chain_type == mmtbx.validation.sequence.PROTEIN: monomer_id = amino_acid_codes.three_letter_given_one_letter.get( one_letter_code, "UNK") # XXX else: monomer_id = one_letter_code else: if sequence_to_chains[sequence][0].chain_type == mmtbx.validation.sequence.PROTEIN: one_letter_code = amino_acid_codes.one_letter_given_three_letter.get( monomer_id, "(%s)" %monomer_id) pdbx_seq_one_letter_code.append(one_letter_code) entity_poly_seq_num += 1 entity_poly_seq_loop.add_row(( entity_id, entity_poly_seq_num, monomer_id, 'no', #XXX )) entity_poly_type = '?' entity_nstd_chirality = 'n' # we should probably determine the chirality more correctly by examining # the chirality of the backbone chain rather than relying on the residue # names to be correct if chain.chain_type == mmtbx.validation.sequence.PROTEIN: n_d_peptides = 0 n_l_peptides = 0 n_achiral_peptides = 0 n_unknown = 0 for resname in chain.resnames: if resname == "GLY": n_achiral_peptides += 1 elif resname in iotbx.pdb.common_residue_names_amino_acid: n_l_peptides += 1 elif resname in amino_acid_codes.three_letter_l_given_three_letter_d: n_d_peptides += 1 else: n_unknown += 1 n_total = sum([n_d_peptides, n_l_peptides, n_achiral_peptides, n_unknown]) if (n_l_peptides + n_achiral_peptides)/n_total > 0.5: entity_poly_type = 'polypeptide(L)' if n_d_peptides > 0: entity_nstd_chirality = 'y' elif (n_d_peptides + n_achiral_peptides)/n_total > 0.5: entity_poly_type = 'polypeptide(D)' if n_l_peptides > 0: entity_nstd_chirality = 'y' elif chain.chain_type == mmtbx.validation.sequence.NUCLEIC_ACID: n_dna = 0 n_rna = 0 n_unknown = 0 for resname in chain.resnames: if resname is not None and resname.strip().upper() in ( 'AD', 'CD', 'GD', 'TD', 'DA', 'DC', 'DG', 'DT'): n_dna += 1 elif resname is not None and resname.strip().upper() in ( 'A', 'C', 'G', 'T', '+A', '+C', '+G', '+T'): n_rna += 1 else: n_unknown += 1 n_total = sum([n_dna + n_rna + n_unknown]) if n_dna/n_total > 0.5 and n_rna == 0: entity_poly_type = 'polydeoxyribonucleotide' elif n_rna/n_total > 0.5 and n_dna == 0: entity_poly_type = 'polyribonucleotide' elif (n_rna + n_dna)/n_total > 0.5: entity_poly_type = 'polydeoxyribonucleotide/polyribonucleotide hybrid' entity_poly_loop.add_row(( entity_id, entity_poly_type, entity_nstd_chirality, 'no', 'no', wrap_always("".join(pdbx_seq_one_letter_code), width=80).strip(), wrap_always("".join(pdbx_seq_one_letter_code_can), width=80).strip(), ','.join(sequence_to_chain_ids[sequence]), '?' )) for resname, entity_id in non_polymer_resname_to_entity_id.iteritems(): entity_type = "non-polymer" if resname == "HOH": entity_type = "water" # XXX entity_loop.add_row(( entity_id, entity_type, #polymer/non-polymer/macrolide/water #'?', #src_method #'?', # pdbx_description '?', # formula_weight non_polymer_counts[resname], # pdbx_number_of_molecules #'?', # details #'?', # pdbx_mutation #'?', # pdbx_fragment #'?' # pdbx_ec )) self.cif_block.add_loop(entity_loop) self.cif_block.add_loop(entity_poly_loop) self.cif_block.add_loop(entity_poly_seq_loop) self.cif_block.update(pdb_hierarchy.as_cif_block()) label_entity_id = self.cif_block['_atom_site.label_entity_id'] auth_seq_id = self.cif_block['_atom_site.auth_seq_id'] ins_code = self.cif_block['_atom_site.pdbx_PDB_ins_code'] auth_asym_id = self.cif_block['_atom_site.auth_asym_id'] label_seq_id = flex.std_string(auth_seq_id.size(), '.') ins_code = ins_code.deep_copy() ins_code.set_selected(ins_code == '?', '') for residue_group, seq_num in residue_group_to_seq_num_mapping.iteritems(): sel = ((auth_asym_id == residue_group.parent().id) & (ins_code == residue_group.icode.strip()) & (auth_seq_id == residue_group.resseq.strip())) label_seq_id.set_selected(sel, str(seq_num)) label_entity_id.set_selected( sel, str(chain_id_to_entity_id[residue_group.parent().id])) for pdb_chain in unaligned_pdb_chains: for residue_group in pdb_chain.residue_groups(): sel = ((auth_asym_id == residue_group.parent().id) & (ins_code == residue_group.icode.strip()) & (auth_seq_id == residue_group.resseq.strip())) label_entity_id.set_selected( sel, str(non_polymer_resname_to_entity_id[residue_group.unique_resnames()[0]])) self.cif_block['_atom_site.label_seq_id'] = label_seq_id # reorder the loops atom_site_loop = self.cif_block['_atom_site'] atom_site_aniso_loop = self.cif_block.get('_atom_site_anisotrop') del self.cif_block['_atom_site'] self.cif_block.add_loop(atom_site_loop) if atom_site_aniso_loop is not None: del self.cif_block['_atom_site_anisotrop'] self.cif_block.add_loop(atom_site_aniso_loop)
def __init__(self, cif_block): crystal_symmetry_builder.__init__(self, cif_block) self.hierarchy = hierarchy.root() # These items are mandatory for the _atom_site loop, all others are optional type_symbol = self._wrap_loop_if_needed(cif_block, "_atom_site.type_symbol") atom_labels = self._wrap_loop_if_needed(cif_block, "_atom_site.auth_atom_id") if atom_labels is None: atom_labels = self._wrap_loop_if_needed( cif_block, "_atom_site.label_atom_id" ) # corresponds to chem comp atom name alt_id = self._wrap_loop_if_needed( cif_block, "_atom_site.label_alt_id") # alternate conformer id label_asym_id = self._wrap_loop_if_needed( cif_block, "_atom_site.label_asym_id") # chain id auth_asym_id = self._wrap_loop_if_needed(cif_block, "_atom_site.auth_asym_id") if label_asym_id is None: label_asym_id = auth_asym_id if auth_asym_id is None: auth_asym_id = label_asym_id comp_id = self._wrap_loop_if_needed(cif_block, "_atom_site.auth_comp_id") if comp_id is None: comp_id = self._wrap_loop_if_needed( cif_block, "_atom_site.label_comp_id") # residue name entity_id = self._wrap_loop_if_needed(cif_block, "_atom_site.label_entity_id") seq_id = self._wrap_loop_if_needed(cif_block, "_atom_site.auth_seq_id") if seq_id is None: seq_id = self._wrap_loop_if_needed( cif_block, "_atom_site.label_seq_id") # residue number assert [atom_labels, alt_id, auth_asym_id, comp_id, entity_id, seq_id].count(None) == 0, "something is not present" assert type_symbol is not None atom_site_fp = cif_block.get('_atom_site.phenix_scat_dispersion_real') atom_site_fdp = cif_block.get('_atom_site.phenix_scat_dispersion_imag') pdb_ins_code = cif_block.get( "_atom_site.pdbx_PDB_ins_code") # insertion code model_ids = cif_block.get("_atom_site.pdbx_PDB_model_num") atom_site_id = cif_block.get("_atom_site.id") # only permitted values are ATOM or HETATM group_PDB = cif_block.get("_atom_site.group_PDB") # TODO: read esds B_iso_or_equiv = flex.double( self._wrap_loop_if_needed(cif_block, "_atom_site.B_iso_or_equiv")) cart_x = flex.double( self._wrap_loop_if_needed(cif_block, "_atom_site.Cartn_x")) cart_y = flex.double( self._wrap_loop_if_needed(cif_block, "_atom_site.Cartn_y")) cart_z = flex.double( self._wrap_loop_if_needed(cif_block, "_atom_site.Cartn_z")) occu = flex.double( self._wrap_loop_if_needed(cif_block, "_atom_site.occupancy")) formal_charge = self._wrap_loop_if_needed( cif_block, "_atom_site.pdbx_formal_charge") # anisotropic b-factors # TODO: read esds anisotrop_id = self._wrap_loop_if_needed(cif_block, "_atom_site_anisotrop.id") adps = None if anisotrop_id is not None: u_ij = [ self._wrap_loop_if_needed( cif_block, "_atom_site_anisotrop.U[%s][%s]" % (ij[0], ij[1])) for ij in ("11", "22", "33", "12", "13", "23") ] assert u_ij.count(None) in (0, 6) if u_ij.count(None) == 0: adps = u_ij else: assert u_ij.count(None) == 6 b_ij = [ self._wrap_loop_if_needed( cif_block, "_atom_site_anisotrop.B[%s][%s]" % (ij[0], ij[1])) for ij in ("11", "22", "33", "12", "13", "23") ] assert b_ij.count(None) in (0, 6) if b_ij.count(None) == 0: adps = adptbx.b_as_u(b_ij) assert not (u_ij.count(None) and b_ij.count(None) ) # illegal for both to be present if adps is not None: try: adps = [flex.double(adp) for adp in adps] except ValueError as e: raise CifBuilderError("Error interpreting ADPs: " + str(e)) adps = flex.sym_mat3_double(*adps) py_adps = {} if anisotrop_id is not None and adps is not None: for an_id, adp in zip(list(anisotrop_id), list(adps)): py_adps[an_id] = adp current_model_id = None current_label_asym_id = None current_auth_asym_id = None current_residue_id = None current_ins_code = None for i_atom in range(atom_labels.size()): # model(s) last_model_id = current_model_id current_model_id = model_ids[i_atom] assert current_model_id is not None if current_model_id != last_model_id: model = hierarchy.model(id=current_model_id) self.hierarchy.append_model(model) # chain(s) last_label_asym_id = current_label_asym_id current_label_asym_id = label_asym_id[i_atom] assert current_label_asym_id is not None last_auth_asym_id = current_auth_asym_id current_auth_asym_id = auth_asym_id[i_atom] assert current_auth_asym_id not in [".", "?", " "], "mmCIF file contains " + \ "record with empty auth_asym_id, which is wrong." assert current_label_asym_id is not None if (current_auth_asym_id != last_auth_asym_id or current_model_id != last_model_id): chain = hierarchy.chain(id=current_auth_asym_id) model.append_chain(chain) else: assert current_auth_asym_id == last_auth_asym_id # residue_group(s) # defined by residue id and insertion code last_residue_id = current_residue_id current_residue_id = seq_id[i_atom] assert current_residue_id is not None last_ins_code = current_ins_code if pdb_ins_code is not None: current_ins_code = pdb_ins_code[i_atom] if current_ins_code in ("?", ".", None): current_ins_code = " " if (current_residue_id != last_residue_id or current_ins_code != last_ins_code or current_auth_asym_id != last_auth_asym_id or current_model_id != last_model_id): try: resseq = hy36encode(width=4, value=int(current_residue_id)) except ValueError as e: resseq = current_residue_id assert len(resseq) == 4 residue_group = hierarchy.residue_group(resseq=resseq, icode=current_ins_code) chain.append_residue_group(residue_group) atom_groups = OrderedDict() # reset atom_groups cache # atom_group(s) # defined by resname and altloc id current_altloc = alt_id[i_atom] if current_altloc == "." or current_altloc == "?": current_altloc = "" # Main chain atoms current_resname = comp_id[i_atom] if (current_altloc, current_resname) not in atom_groups: atom_group = hierarchy.atom_group(altloc=current_altloc, resname=current_resname) atom_groups[(current_altloc, current_resname)] = atom_group if current_altloc == "": residue_group.insert_atom_group(0, atom_group) else: residue_group.append_atom_group(atom_group) else: atom_group = atom_groups[(current_altloc, current_resname)] # atom(s) atom = hierarchy.atom() atom_group.append_atom(atom) atom.set_element(type_symbol[i_atom]) atom.set_name( format_pdb_atom_name(atom_labels[i_atom], type_symbol[i_atom])) atom.set_xyz(new_xyz=(cart_x[i_atom], cart_y[i_atom], cart_z[i_atom])) atom.set_b(B_iso_or_equiv[i_atom]) atom.set_occ(occu[i_atom]) # hy36encode should go once the pdb.hierarchy has been # modified to no longer store fixed-width strings atom.set_serial( hy36encode(width=5, value=int(atom_site_id[i_atom]))) # some code relies on an empty segid being 4 spaces atom.set_segid(" ") if group_PDB is not None and group_PDB[i_atom] == "HETATM": atom.hetero = True if formal_charge is not None: charge = formal_charge[i_atom] if charge not in ("?", "."): if charge.endswith("-") or charge.startswith("-"): sign = "-" else: sign = "+" charge = charge.strip(" -+") charge = int(charge) if charge == 0: sign = "" atom.set_charge("%i%s" % (charge, sign)) if atom_site_fp is not None: fp = atom_site_fp[i_atom] if fp not in ("?", "."): atom.set_fp(new_fp=float(fp)) if atom_site_fdp is not None: fdp = atom_site_fdp[i_atom] if fdp not in ("?", "."): atom.set_fdp(new_fdp=float(fdp)) if anisotrop_id is not None and adps is not None: py_u_ij = py_adps.get(atom.serial.strip(), None) if py_u_ij is not None: atom.set_uij(py_u_ij) if len(self.hierarchy.models()) == 1: # for compatibility with single-model PDB files self.hierarchy.models()[0].id = ""
def cache_restraint(self, cmd, cmd_residue, line, args): from libtbx.containers import OrderedDict if cmd not in self.cached_restraints: self.cached_restraints.setdefault(cmd, OrderedDict()) self.cached_restraints[cmd].setdefault(line, (cmd_residue, args))
class loop(DictMixin): def __init__(self, header=None, data=None): self._columns = OrderedDict() self.keys_lower = {} if header is not None: for key in header: self.setdefault(key, flex.std_string()) if data is not None: # the number of data items must be an exact multiple of the number of headers assert len(data) % len(header) == 0, "Wrong number of data items for loop" n_rows = len(data)//len(header) n_columns = len(header) for i in range(n_rows): self.add_row([data[i*n_columns+j] for j in range(n_columns)]) elif header is None and data is not None: assert isinstance(data, dict) or isinstance(data, OrderedDict) self.add_columns(data) self.keys_lower = dict( [(key.lower(), key) for key in self._columns.keys()]) def __setitem__(self, key, value): if not re.match(tag_re, key): raise Sorry("%s is not a valid data name" %key) if len(self) > 0: assert len(value) == self.size() if not isinstance(value, flex.std_string): for flex_numeric_type in (flex.int, flex.double): if isinstance(value, flex_numeric_type): value = value.as_string() else: try: value = flex_numeric_type(value).as_string() except TypeError: continue else: break if not isinstance(value, flex.std_string): value = flex.std_string(value) # value must be a mutable type assert hasattr(value, '__setitem__') self._columns[key] = value self.keys_lower[key.lower()] = key def __getitem__(self, key): return self._columns[self.keys_lower[key.lower()]] def __delitem__(self, key): del self._columns[self.keys_lower[key.lower()]] del self.keys_lower[key.lower()] def keys(self): return self._columns.keys() def __repr__(self): return repr(OrderedDict(self.iteritems())) def name(self): return common_substring(self.keys()).rstrip('_').rstrip('.') def size(self): size = 0 for column in self.values(): size = max(size, len(column)) return size def n_rows(self): size = 0 for column in self.values(): size = max(size, len(column)) return size def n_columns(self): return len(self.keys()) def add_row(self, row, default_value="?"): if isinstance(row, dict): for key in self: if key in row: self[key].append(str(row[key])) else: self[key].append(default_value) else: assert len(row) == len(self) for i, key in enumerate(self): self[key].append(str(row[i])) def add_column(self, key, values): if self.size() != 0: assert len(values) == self.size() self[key] = values self.keys_lower[key.lower()] = key def add_columns(self, columns): assert isinstance(columns, dict) or isinstance(columns, OrderedDict) for key, value in columns.iteritems(): self.add_column(key, value) def update_column(self, key, values): assert type(key)==type(""), "first argument is column key string" if self.size() != 0: assert len(values) == self.size(), "len(values) %d != self.size() %d" % ( len(values), self.size(), ) self[key] = values self.keys_lower[key.lower()] = key def delete_row(self, index): assert index < self.n_rows() for column in self._columns.values(): del column[index] def __copy__(self): new = loop() new._columns = self._columns.copy() new.keys_lower = self.keys_lower.copy() return new copy = __copy__ def __deepcopy__(self, memo): new = loop() new._columns = copy.deepcopy(self._columns, memo) new.keys_lower = copy.deepcopy(self.keys_lower, memo) return new def deepcopy(self): return copy.deepcopy(self) def show(self, out=None, indent=" ", indent_row=None, fmt_str=None, align_columns=True): assert self.n_rows() > 0 and self.n_columns() > 0, "keys: %s %d %d" % ( self.keys(), self.n_rows(), self.n_columns(), ) if out is None: out = sys.stdout if indent_row is None: indent_row = indent assert indent.strip() == "" assert indent_row.strip() == "" print >> out, "loop_" for k in self.keys(): print >> out, indent + k values = self._columns.values() if fmt_str is not None: # Pretty printing: # The user is responsible for providing a valid format string. # Values are not quoted - it is the user's responsibility to place # appropriate quotes in the format string if a particular value may # contain spaces. values = copy.deepcopy(values) for i, v in enumerate(values): for flex_numeric_type in (flex.int, flex.double): if not isinstance(v, flex_numeric_type): try: values[i] = flex_numeric_type(v) except ValueError: continue else: break if fmt_str is None: fmt_str = indent_row + ' '.join(["%s"]*len(values)) for i in range(self.size()): print >> out, fmt_str % tuple([values[j][i] for j in range(len(values))]) elif align_columns: fmt_str = [] for i, (k, v) in enumerate(self.iteritems()): for i_v in range(v.size()): v[i_v] = format_value(v[i_v]) # exclude and semicolon text fields from column width calculation v_ = flex.std_string(item for item in v if "\n" not in item) width = v_.max_element_length() # See if column contains only number, '.' or '?' # right-align numerical columns, left-align everything else v = v.select(~( (v == ".") | (v == "?") )) try: flex.double(v) except ValueError: width *= -1 fmt_str.append("%%%is" %width) fmt_str = indent_row + " ".join(fmt_str) for i in range(self.size()): print >> out, (fmt_str % tuple([values[j][i] for j in range(len(values))])).rstrip() else: for i in range(self.size()): values_to_print = [format_value(values[j][i]) for j in range(len(values))] print >> out, ' '.join([indent] + values_to_print) def __str__(self): s = StringIO() self.show(out=s) return s.getvalue() def iterrows(self): keys = self.keys() for j in range(self.size()): yield OrderedDict(zip(keys, [self.values()[i][j] for i in range(len(self))])) def sort(self, key=None, reverse=False): self._columns = OrderedDict( sorted(self._columns.items(), key=key, reverse=reverse)) def order(self, order): def _cmp_key(k1, k2): for i, o in enumerate(order): if k1==o: break for j, o in enumerate(order): if k2==o: break if k1<k2: return -1 return 1 keys = self._columns.keys() keys.sort(_cmp_key) tmp = OrderedDict() for o in order: tmp[o]=self._columns[o] self._columns = tmp def __eq__(self, other): if (len(self) != len(other) or self.size() != other.size() or self.keys() != other.keys()): return False for value, other_value in zip(self.values(), other.values()): if (value == other_value).count(True) != len(value): return False return True
def __init__(self, cif_block, base_array_info=None, wavelengths=None): crystal_symmetry_builder.__init__(self, cif_block) self._arrays = OrderedDict() self._origarrays = OrderedDict( ) # used for presenting raw data tables in HKLviewer basearraylabels = [] if base_array_info is not None: self.crystal_symmetry = self.crystal_symmetry.join_symmetry( other_symmetry=base_array_info.crystal_symmetry_from_file, force=True) if base_array_info.labels: basearraylabels = base_array_info.labels if (wavelengths is None): wavelengths = {} if base_array_info is None: base_array_info = miller.array_info(source_type="cif") refln_containing_loops = self.get_miller_indices_containing_loops() for self.indices, refln_loop in refln_containing_loops: self.wavelength_id_array = None self.crystal_id_array = None self.scale_group_array = None wavelength_ids = [None] crystal_ids = [None] scale_groups = [None] for key, value in six.iteritems(refln_loop): # Get wavelength_ids, crystal_id, scale_group_code columns for selecting data of other # columns in self.get_selection() used by self.flex_std_string_as_miller_array() if (key.endswith('wavelength_id') or key.endswith('crystal_id') or key.endswith('scale_group_code')): data = as_int_or_none_if_all_question_marks( value, column_name=key) if data is None: continue counts = data.counts() if key.endswith('wavelength_id'): wavelength_ids = list(counts.keys()) if len(counts) == 1: continue array = miller.array( miller.set(self.crystal_symmetry, self.indices).auto_anomalous(), data) if key.endswith('wavelength_id'): self.wavelength_id_array = array wavelength_ids = list(counts.keys()) elif key.endswith('crystal_id'): self.crystal_id_array = array crystal_ids = list(counts.keys()) elif key.endswith('scale_group_code'): self.scale_group_array = array scale_groups = list(counts.keys()) labelsuffix = [] wavelbl = [] cryslbl = [] scalegrplbl = [] self._origarrays["HKLs"] = self.indices alllabels = list(sorted(refln_loop.keys())) remaininglabls = alllabels[:] # deep copy the list # Parse labels matching cif column conventions # https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/refln.html # and extract groups of labels or just single columns. # Groups corresponds to the map coefficients, phase and amplitudes, # amplitudes or intensities with sigmas and hendrickson-lattman columns. phaseamplabls, remaininglabls = self.get_phase_amplitude_labels( remaininglabls) mapcoefflabls, remaininglabls = self.get_mapcoefficient_labels( remaininglabls) HLcoefflabls, remaininglabls = self.get_HL_labels(remaininglabls) data_sig_obstype_labls, remaininglabls = self.get_FSigF_ISigI_labels( remaininglabls) for w_id in wavelength_ids: for crys_id in crystal_ids: for scale_group in scale_groups: # If reflection data files contain more than one crystal, wavelength or scalegroup # then add their id(s) as a suffix to data labels computed below. Needed for avoiding # ambuguity but avoid when not needed to make labels more human readable! if (len(wavelength_ids) > 1 or len(wavelengths) > 1) and w_id is not None: wavelbl = ["wavelength_id=%i" % w_id] if len(crystal_ids) > 1 and crys_id is not None: cryslbl = ["crystal_id=%i" % crys_id] if len(scale_groups) > 1 and scale_group is not None: scalegrplbl = ["scale_group_code=%i" % scale_group] labelsuffix = scalegrplbl + cryslbl + wavelbl jlablsufx = "" if len(labelsuffix): jlablsufx = "," + ",".join(labelsuffix) for mapcoefflabl in mapcoefflabls: A_array = refln_loop[mapcoefflabl[0]] B_array = refln_loop[mapcoefflabl[1]] # deselect any ? marks in the two arrays, assuming both A and B have the same ? marks selection = self.get_selection( A_array, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) A_array = A_array.select(selection) B_array = B_array.select(selection) # form the miller array with map coefficients data = flex.complex_double(flex.double(A_array), flex.double(B_array)) millarr = miller.array( miller.set(self.crystal_symmetry, self.indices.select( selection)).auto_anomalous(), data) # millarr will be None for column data not matching w_id,crys_id,scale_group values if millarr is None: continue labl = basearraylabels + mapcoefflabl + labelsuffix millarr.set_info( base_array_info.customized_copy( labels=labl, wavelength=wavelengths.get(w_id, None))) self._arrays[mapcoefflabl[0] + jlablsufx] = millarr for phaseamplabl in phaseamplabls: amplitudestrarray = refln_loop[phaseamplabl[0]] phasestrarray = refln_loop[phaseamplabl[1]] millarr = self.flex_std_string_as_miller_array( amplitudestrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) phasesmillarr = self.flex_std_string_as_miller_array( phasestrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) # millarr will be None for column data not matching w_id,crys_id,scale_group values if millarr is None or phasesmillarr is None: continue phases = as_flex_double(phasesmillarr, phaseamplabl[1]) millarr = millarr.phase_transfer(phases, deg=True) labl = basearraylabels + phaseamplabl + labelsuffix millarr.set_info( base_array_info.customized_copy( labels=labl, wavelength=wavelengths.get(w_id, None))) self._arrays[phaseamplabl[0] + jlablsufx] = millarr for datlabl, siglabl, otype in data_sig_obstype_labls: datastrarray = refln_loop[datlabl] millarr = self.flex_std_string_as_miller_array( datastrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) # millarr will be None for column data not matching w_id,crys_id,scale_group values if millarr is None: continue millarr = as_flex_double(millarr, datlabl) datsiglabl = [datlabl] if siglabl: sigmasstrarray = refln_loop[siglabl] sigmas = self.flex_std_string_as_miller_array( sigmasstrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) sigmas = as_flex_double(sigmas, siglabl) millarr.set_sigmas(sigmas.data()) datsiglabl = [datlabl, siglabl] datsiglabl = basearraylabels + datsiglabl + labelsuffix millarr.set_info( base_array_info.customized_copy( labels=datsiglabl, wavelength=wavelengths.get(w_id, None))) if otype is not None: millarr.set_observation_type(otype) self._arrays[datlabl + jlablsufx] = millarr for hl_labels in HLcoefflabls: hl_values = [ cif_block.get(hl_key) for hl_key in hl_labels ] if hl_values.count(None) == 0: selection = self.get_selection( hl_values[0], wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) hl_values = [ as_double_or_none_if_all_question_marks( hl.select(selection), column_name=lab) for hl, lab in zip(hl_values, hl_labels) ] # hl_values will be None for column data not matching w_id,crys_id,scale_group values if hl_values == [None, None, None, None]: continue millarr = miller.array( miller.set( self.crystal_symmetry, self.indices.select( selection)).auto_anomalous(), flex.hendrickson_lattman(*hl_values)) hlabels = basearraylabels + hl_labels + labelsuffix millarr.set_info( base_array_info.customized_copy( labels=hlabels, wavelength=wavelengths.get(w_id, None))) self._arrays[hl_labels[0] + jlablsufx] = millarr # pick up remaining columns if any that weren't identified above for label in alllabels: if "index_" in label: continue datastrarray = refln_loop[label] if label in remaininglabls: labels = basearraylabels + [label ] + labelsuffix lablsufx = jlablsufx millarr = self.flex_std_string_as_miller_array( datastrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) # millarr will be None for column data not matching w_id,crys_id,scale_group values if (label.endswith( 'wavelength_id' ) or label.endswith( 'crystal_id' ) or # get full array if any of these labels, not just subsets label.endswith('scale_group_code')): millarr = self.flex_std_string_as_miller_array( datastrarray, wavelength_id=None, crystal_id=None, scale_group_code=None) lablsufx = "" labels = basearraylabels + [label] if millarr is None: continue otype = self.guess_observationtype(label) if otype is not None: millarr.set_observation_type(otype) millarr.set_info( base_array_info.customized_copy( labels=labels, wavelength=wavelengths.get(w_id, None))) self._arrays[label + lablsufx] = millarr origarr = self.flex_std_string_as_miller_array( datastrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) newlabel = label.replace("_refln.", "") newlabel2 = newlabel.replace("_refln_", "") if origarr: # want only genuine miller arrays self._origarrays[newlabel2 + jlablsufx] = origarr.data() # Convert any groups of I+,I-,SigI+,SigI- (or amplitudes) arrays into anomalous arrays # i.e. both friedel mates in the same array for key, array in six.iteritems(self._arrays.copy()): plus_key = "" if '_minus' in key: minus_key = key plus_key = key.replace('_minus', '_plus') elif '-' in key: minus_key = key plus_key = key.replace('-', '+') elif '_plus' in key: plus_key = key minus_key = key.replace('_plus', '_minus') elif '+' in key: plus_key = key minus_key = key.replace('+', '-') if plus_key in self._arrays and minus_key in self._arrays: plus_array = self._arrays.pop(plus_key) minus_array = self._arrays.pop(minus_key) minus_array = minus_array.customized_copy( indices=-minus_array.indices()).set_info( minus_array.info()) array = plus_array.concatenate( minus_array, assert_is_similar_symmetry=False) array = array.customized_copy(anomalous_flag=True) array.set_info(minus_array.info().customized_copy(labels=list( OrderedSet(plus_array.info().labels + minus_array.info().labels)))) array.set_observation_type(plus_array.observation_type()) self._arrays.setdefault(key, array) if len(self._arrays) == 0: raise CifBuilderError("No reflection data present in cif block") # Sort the ordered dictionary to resemble the order of columns in the cif file # This is to avoid any F_meas arrays accidentally being put adjacent to # pdbx_anom_difference arrays in the self._arrays OrderedDict. Otherwise these # arrays may unintentionally be combined into a reconstructed anomalous amplitude # array when saving as an mtz file due to a problem in the iotbx/mtz module. # See http://phenix-online.org/pipermail/cctbxbb/2021-March/002289.html arrlstord = [] arrlst = list(self._arrays) for arr in arrlst: for i, k in enumerate(refln_loop.keys()): if arr.split(",")[0] == k: arrlstord.append((arr, i)) # arrlstord must have the same keys as in the self._arrays dictionary assert sorted(arrlst) == sorted([e[0] for e in arrlstord]) sortarrlst = sorted(arrlstord, key=lambda arrord: arrord[1]) self._ordarrays = OrderedDict() for sortkey, i in sortarrlst: self._ordarrays.setdefault(sortkey, self._arrays[sortkey]) self._arrays = self._ordarrays
class cif(DictMixin): def __init__(self, blocks=None): self._errors = None if blocks is not None: self.blocks = OrderedDict(blocks) else: self.blocks = OrderedDict() self.keys_lower = dict([(key.lower(), key) for key in self.blocks.keys()]) def __setitem__(self, key, value): assert isinstance(value, block) if not re.match(tag_re, '_'+key): raise Sorry("%s is not a valid data block name" %key) self.blocks[key] = value self.keys_lower[key.lower()] = key def get(self, key, default=None): key_lower = self.keys_lower.get(key.lower()) if (key_lower is None): return default return self.blocks.get(key_lower, default) def __getitem__(self, key): result = self.get(key) if (result is None): raise KeyError('Unknown CIF data block name: "%s"' % key) return result def __delitem__(self, key): del self.blocks[self.keys_lower[key.lower()]] del self.keys_lower[key.lower()] def keys(self): return self.blocks.keys() def __repr__(self): return repr(OrderedDict(self.iteritems())) def __copy__(self): return cif(self.blocks.copy()) copy = __copy__ def __deepcopy__(self, memo): return cif(copy.deepcopy(self.blocks, memo)) def deepcopy(self): return copy.deepcopy(self) def show(self, out=None, indent=" ", indent_row=None, data_name_field_width=34, loop_format_strings=None, align_columns=True): if out is None: out = sys.stdout for name, block in self.items(): print >> out, "data_%s" %name block.show( out=out, indent=indent, indent_row=indent_row, data_name_field_width=data_name_field_width, loop_format_strings=loop_format_strings, align_columns=align_columns) def __str__(self): s = StringIO() self.show(out=s) return s.getvalue() def validate(self, dictionary, show_warnings=True, error_handler=None, out=None): if out is None: out = sys.stdout from iotbx.cif import validation self._errors = {} if error_handler is None: error_handler = validation.ErrorHandler() for key, block in self.blocks.iteritems(): error_handler = error_handler.__class__() dictionary.set_error_handler(error_handler) block.validate(dictionary) self._errors.setdefault(key, error_handler) if error_handler.error_count or error_handler.warning_count: error_handler.show(show_warnings=show_warnings, out=out) return error_handler def get_errors(self): return self._errors def sort(self, recursive=False, key=None, reverse=False): self.blocks = OrderedDict(sorted(self.blocks.items(), key=key, reverse=reverse)) if recursive: for b in self.blocks.values(): b.sort(recursive=recursive, reverse=reverse)
def __init__(self, cif_block, base_array_info=None, wavelengths=None): crystal_symmetry_builder.__init__(self, cif_block) if base_array_info is not None: self.crystal_symmetry = self.crystal_symmetry.join_symmetry( other_symmetry=base_array_info.crystal_symmetry_from_file, force=True) self._arrays = OrderedDict() if (wavelengths is None): wavelengths = {} if base_array_info is None: base_array_info = miller.array_info(source_type="cif") refln_containing_loops = self.get_miller_indices_containing_loops() for self.indices, refln_loop in refln_containing_loops: self.wavelength_id_array = None self.crystal_id_array = None self.scale_group_array = None wavelength_ids = [None] crystal_ids = [None] scale_groups = [None] for key, value in refln_loop.iteritems(): # need to get these arrays first if (key.endswith('wavelength_id') or key.endswith('crystal_id') or key.endswith('scale_group_code')): data = as_int_or_none_if_all_question_marks( value, column_name=key) if data is None: continue counts = data.counts() if key.endswith('wavelength_id'): wavelength_ids = counts.keys() if len(counts) == 1: continue array = miller.array( miller.set(self.crystal_symmetry, self.indices).auto_anomalous(), data) if key.endswith('wavelength_id'): self.wavelength_id_array = array wavelength_ids = counts.keys() elif key.endswith('crystal_id'): self.crystal_id_array = array crystal_ids = counts.keys() elif key.endswith('scale_group_code'): self.scale_group_array = array scale_groups = counts.keys() for label, value in sorted(refln_loop.items()): for w_id in wavelength_ids: for crys_id in crystal_ids: for scale_group in scale_groups: if 'index_' in label: continue key = label labels = [label] wavelength = None if (key.endswith('wavelength_id') or key.endswith('crystal_id') or key.endswith('scale_group_code')): w_id = None crys_id = None scale_group = None key_suffix = '' if w_id is not None: key_suffix += '_%i' % w_id labels.insert(0, "wavelength_id=%i" % w_id) wavelength = wavelengths.get(w_id, None) if crys_id is not None: key_suffix += '_%i' % crys_id labels.insert(0, "crystal_id=%i" % crys_id) if scale_group is not None: key_suffix += '_%i' % scale_group labels.insert( 0, "scale_group_code=%i" % scale_group) key += key_suffix sigmas = None if key in self._arrays: continue array = self.flex_std_string_as_miller_array( value, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) if array is None: continue if '_sigma' in key: sigmas_label = label key = None for suffix in ('', '_meas', '_calc'): if sigmas_label.replace( '_sigma', suffix) in refln_loop: key = sigmas_label.replace( '_sigma', suffix) + key_suffix break if key is None: key = sigmas_label + key_suffix elif key in self._arrays and self._arrays[ key].sigmas() is None: sigmas = array array = self._arrays[key] if (not check_array_sizes( array, sigmas, key, sigmas_label)): continue sigmas = as_flex_double( sigmas, sigmas_label) array.set_sigmas(sigmas.data()) info = array.info() array.set_info( info.customized_copy( labels=info.labels + [sigmas_label], wavelength=wavelength)) continue elif 'PHWT' in key: phwt_label = label fwt_label = label.replace('PHWT', 'FWT') if fwt_label not in refln_loop: continue phwt_array = array if fwt_label in self._arrays: array = self._arrays[fwt_label] if (not check_array_sizes( array, phwt_array, fwt_label, phwt_label)): continue phases = as_flex_double( phwt_array, phwt_label) info = array.info() array = array.phase_transfer(phases, deg=True) array.set_info( info.customized_copy( labels=info.labels + [phwt_label])) self._arrays[fwt_label] = array continue elif 'HL_' in key: hl_letter = key[key.find('HL_') + 3] hl_key = 'HL_' + hl_letter key = key.replace(hl_key, 'HL_A') if key in self._arrays: continue # this array is already dealt with hl_labels = [ label.replace(hl_key, 'HL_' + letter) for letter in 'ABCD' ] hl_keys = [ key.replace(hl_key, 'HL_' + letter) for letter in 'ABCD' ] hl_values = [ cif_block.get(hl_key) for hl_key in hl_labels ] if hl_values.count(None) == 0: selection = self.get_selection( hl_values[0], wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) hl_values = [ as_double_or_none_if_all_question_marks( hl.select(selection), column_name=lab) for hl, lab in zip( hl_values, hl_labels) ] array = miller.array( miller.set( self.crystal_symmetry, self.indices.select( selection)).auto_anomalous(), flex.hendrickson_lattman(*hl_values)) labels = labels[:-1] + hl_labels elif '.B_' in key or '_B_' in key: if '.B_' in key: key, key_b = key.replace('.B_', '.A_'), key label, label_b = label.replace( '.B_', '.A_'), label elif '_B_' in key: key, key_b = key.replace('_B', '_A'), key label, label_b = label.replace('_B', '_A'), label if key in refln_loop and key_b in refln_loop: b_part = array.data() if key in self._arrays: info = self._arrays[key].info() a_part = self._arrays[key].data() self._arrays[key] = self._arrays[ key].array( data=flex.complex_double( a_part, b_part)) self._arrays[key].set_info( info.customized_copy( labels=info.labels + [key_b])) continue elif ('phase_' in key and not "_meas" in key and self.crystal_symmetry.space_group() is not None): alt_key1 = label.replace('phase_', 'F_') alt_key2 = alt_key1 + '_au' if alt_key1 in refln_loop: phase_key = label key = alt_key1 + key_suffix elif alt_key2 in refln_loop: phase_key = label key = alt_key2 + key_suffix else: phase_key = None if phase_key is not None: phases = array.data() if key in self._arrays: array = self._arrays[key] array = as_flex_double(array, key) if (not check_array_sizes( array, phases, key, phase_key)): continue info = self._arrays[key].info() self._arrays[ key] = array.phase_transfer( phases, deg=True) self._arrays[key].set_info( info.customized_copy( labels=info.labels + [phase_key])) else: array = self.flex_std_string_as_miller_array( refln_loop[label], wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) if (not check_array_sizes( array, phases, key, phase_key)): continue array.phase_transfer(phases, deg=True) labels = labels + [label, phase_key] if base_array_info.labels is not None: labels = base_array_info.labels + labels def rstrip_substrings(string, substrings): for substr in substrings: if substr == '': continue if string.endswith(substr): string = string[:-len(substr)] return string # determine observation type stripped_key = rstrip_substrings( key, [ key_suffix, '_au', '_meas', '_calc', '_plus', '_minus' ]) if (stripped_key.endswith('F_squared') or stripped_key.endswith('intensity') or stripped_key.endswith('.I') or stripped_key.endswith('_I')) and ( array.is_real_array() or array.is_integer_array()): array.set_observation_type_xray_intensity() elif (stripped_key.endswith('F') and (array.is_real_array() or array.is_integer_array())): array.set_observation_type_xray_amplitude() if (array.is_xray_amplitude_array() or array.is_xray_amplitude_array()): # e.g. merge_equivalents treats integer arrays differently, so must # convert integer observation arrays here to be safe if isinstance(array.data(), flex.int): array = array.customized_copy( data=array.data().as_double()) array.set_info( base_array_info.customized_copy(labels=labels)) if (array.is_xray_amplitude_array() or array.is_xray_amplitude_array()): info = array.info() array.set_info( info.customized_copy( wavelength=wavelength)) self._arrays.setdefault(key, array) for key, array in self._arrays.copy().iteritems(): if (key.endswith('_minus') or '_minus_' in key or key.endswith('_plus') or '_plus_' in key): if '_minus' in key: minus_key = key plus_key = key.replace('_minus', '_plus') elif '_plus' in key: plus_key = key minus_key = key.replace('_plus', '_minus') if plus_key in self._arrays and minus_key in self._arrays: plus_array = self._arrays.pop(plus_key) minus_array = self._arrays.pop(minus_key) minus_array = minus_array.customized_copy( indices=-minus_array.indices()).set_info( minus_array.info()) array = plus_array.concatenate( minus_array, assert_is_similar_symmetry=False) array = array.customized_copy(anomalous_flag=True) array.set_info( minus_array.info().customized_copy(labels=list( OrderedSet(plus_array.info().labels + minus_array.info().labels)))) array.set_observation_type(plus_array.observation_type()) self._arrays.setdefault(key, array) if len(self._arrays) == 0: raise CifBuilderError("No reflection data present in cif block")
class XInfo(object): '''A class to represent all of the input to the xia2dpa system, with enough information to allow structure solution, as parsed from a .xinfo file, an example of which is in the source code.''' def __init__(self, xinfo_file, sweep_ids=None, sweep_ranges=None): '''Initialise myself from an input .xinfo file.''' # first initialise all of the data structures which will hold the # information... self._project = None self._crystals = OrderedDict() if sweep_ids is not None: sweep_ids = [s.lower() for s in sweep_ids] if sweep_ranges is not None: assert sweep_ids is not None assert len(sweep_ids) == len(sweep_ranges) self._sweep_ids = sweep_ids self._sweep_ranges = sweep_ranges # read the contents of the xinfo file self._parse_project(xinfo_file) self._validate() return def get_output(self): '''Generate a string representation of the project.''' text = 'Project %s\n' % self._project for crystal in self._crystals.keys(): text += 'Crystal %s\n' % crystal text += '%s\n' % self._crystals[crystal].get_output() # remove a trailing newline... return text[:-1] def get_project(self): return self._project def get_crystals(self): return self._crystals def _validate(self): '''Validate the structure of this object, ensuring that everything looks right... raise exception if I find something wrong.''' return True def _parse_project(self, xinfo_file): '''Parse & validate the contents of the .xinfo file. This parses the project element (i.e. the whole thing..)''' project_records = [] for r in open(xinfo_file, 'r').readlines(): record = r.strip() if not record: pass elif record[0] == '!' or record[0] == '#': pass else : # then it may contain something useful... project_records.append(record) # so now we have loaded the whole file into memory stripping # out the crud... let's look for something useful for i in range(len(project_records)): record = project_records[i] if 'BEGIN PROJECT' in record: self._project = record.replace('BEGIN PROJECT', '').strip() if 'END PROJECT' in record: if not self._project == record.replace( 'END PROJECT', '').strip(): raise RuntimeError, 'error parsing END PROJECT record' # next look for crystals if 'BEGIN CRYSTAL ' in record: crystal_records = [record] while True: i += 1 record = project_records[i] crystal_records.append(record) if 'END CRYSTAL ' in record: break self._parse_crystal(crystal_records) # that's everything, because parse_crystal handles # the rest... return def _parse_crystal(self, crystal_records): '''Parse the interesting information out of the crystal description.''' crystal = '' for i in range(len(crystal_records)): record = crystal_records[i] if 'BEGIN CRYSTAL ' in record: # we should only ever have one of these records in # a call to this method if crystal != '': raise RuntimeError, 'error in BEGIN CRYSTAL record' crystal = record.replace('BEGIN CRYSTAL ', '').strip() if crystal in self._crystals: raise RuntimeError, 'crystal %s already exists' % \ crystal # cardinality: # # sequence - exactly one, a long string # wavelengths - a dictionary of data structures keyed by the # wavelength id # sweeps - a dictionary of data structures keyed by the # sweep id # ha_info - exactly one dictionary containing the heavy atom # information self._crystals[crystal] = { 'sequence':'', 'wavelengths': OrderedDict(), 'samples': OrderedDict(), 'sweeps': OrderedDict(), 'ha_info': OrderedDict(), 'crystal_data': OrderedDict() } # next look for interesting stuff in the data structure... # starting with the sequence if 'BEGIN AA_SEQUENCE' in record: sequence = '' i += 1 record = crystal_records[i] while record != 'END AA_SEQUENCE': if not '#' in record or '!' in record: sequence += record.strip() i += 1 record = crystal_records[i] if self._crystals[crystal]['sequence'] != '': raise RuntimeError, 'error two SEQUENCE records found' self._crystals[crystal]['sequence'] = sequence # look for heavy atom information if 'BEGIN HA_INFO' in record: i += 1 record = crystal_records[i] while record != 'END HA_INFO': key = record.split()[0].lower() value = record.split()[1] # things which are numbers are integers... if 'number' in key: value = int(value) self._crystals[crystal]['ha_info'][key] = value i += 1 record = crystal_records[i] if 'BEGIN SAMPLE' in record: sample = record.replace('BEGIN SAMPLE ', '').strip() i += 1 record = crystal_records[i] while not 'END SAMPLE' in record: i += 1 record = crystal_records[i] self._crystals[crystal]['samples'][sample] = {} # look for wavelength definitions # FIXME need to check that there are not two wavelength # definitions with the same numerical value for the wavelength - # unless this is some way of handling RIP? maybe a NOFIXME. # look for data blocks if 'BEGIN CRYSTAL_DATA' in record: i += 1 record = crystal_records[i] while not 'END CRYSTAL_DATA' in record: key = record.split()[0].lower() value = record.replace(record.split()[0], '').strip() self._crystals[crystal]['crystal_data'][key] = value i += 1 record = crystal_records[i] if 'BEGIN WAVELENGTH ' in record: wavelength = record.replace('BEGIN WAVELENGTH ', '').strip() # check that this is a new wavelength definition if wavelength in self._crystals[crystal]['wavelengths']: raise RuntimeError, \ 'wavelength %s already exists for crystal %s' % \ (wavelength, crystal) self._crystals[crystal]['wavelengths'][wavelength] = { } i += 1 record = crystal_records[i] # populate this with interesting things while not 'END WAVELENGTH' in record: # deal with a nested WAVELENGTH_STATISTICS block if 'BEGIN WAVELENGTH_STATISTICS' in record: self._crystals[crystal]['wavelengths'][ wavelength]['statistics'] = { } i += 1 record = crystal_records[i] while not 'END WAVELENGTH_STATISTICS' in record: key, value = tuple(record.split()) self._crystals[crystal]['wavelengths'][ wavelength]['statistics'][ key.lower()] = float(value) i += 1 record = crystal_records[i] # else deal with the usual tokens key = record.split()[0].lower() if key == 'resolution': lst = record.split() if len(lst) < 2 or len(lst) > 3: raise RuntimeError, 'resolution dmin [dmax]' if len(lst) == 2: dmin = float(lst[1]) self._crystals[crystal]['wavelengths'][ wavelength]['dmin'] = dmin else: dmin = min(map(float, lst[1:])) dmax = max(map(float, lst[1:])) self._crystals[crystal]['wavelengths'][ wavelength]['dmin'] = dmin self._crystals[crystal]['wavelengths'][ wavelength]['dmax'] = dmax i += 1 record = crystal_records[i] continue if len(record.split()) == 1: raise RuntimeError, 'missing value for token %s' % \ record.split()[0] try: value = float(record.split()[1]) except ValueError, e: value = record.replace(record.split()[0], '').strip() self._crystals[crystal]['wavelengths'][ wavelength][key] = value i += 1 record = crystal_records[i] # next look for sweeps, checking that the wavelength # definitions match up... if 'BEGIN SWEEP' in record: sweep = record.replace('BEGIN SWEEP', '').strip() if self._sweep_ids is not None and sweep.lower() not in self._sweep_ids: continue elif self._sweep_ranges is not None: start_end = self._sweep_ranges[self._sweep_ids.index(sweep.lower())] else: start_end = None if sweep in self._crystals[crystal]['sweeps']: raise RuntimeError, \ 'sweep %s already exists for crystal %s' % \ (sweep, crystal) self._crystals[crystal]['sweeps'][sweep] = { } self._crystals[crystal]['sweeps'][sweep][ 'excluded_regions'] = [] if start_end is not None: self._crystals[crystal]['sweeps'][sweep][ 'start_end'] = start_end # in here I expect to find IMAGE, DIRECTORY, WAVELENGTH # and optionally BEAM # FIXME 30/OCT/06 this may not be the case, for instance # if an INTEGRATED_REFLECTION_FILE record is in there... # c/f XProject.py, XSweep.py i += 1 record = crystal_records[i] # populate this with interesting things while not 'END SWEEP' in record: # allow for WAVELENGTH_ID (bug # 2358) if 'WAVELENGTH_ID' == record.split()[0]: record = record.replace('WAVELENGTH_ID', 'WAVELENGTH') if 'WAVELENGTH' == record.split()[0]: wavelength = record.replace('WAVELENGTH', '').strip() if not wavelength in self._crystals[crystal]['wavelengths'].keys(): raise RuntimeError, \ 'wavelength %s unknown for crystal %s' % \ (wavelength, crystal) self._crystals[crystal]['sweeps'][sweep]['wavelength'] = wavelength elif 'SAMPLE' == record.split()[0]: sample = record.replace('SAMPLE ', '').strip() if not sample in self._crystals[crystal]['samples'].keys(): raise RuntimeError, \ 'sample %s unknown for crystal %s' % (sample, crystal) self._crystals[crystal]['sweeps'][sweep]['sample'] = sample elif 'BEAM' == record.split()[0]: beam = map(float, record.split()[1:]) self._crystals[crystal]['sweeps'][sweep]['beam'] = beam elif 'DISTANCE' == record.split()[0]: distance = float(record.split()[1]) self._crystals[crystal]['sweeps'][sweep]['distance'] = distance elif 'EPOCH' == record.split()[0]: epoch = int(record.split()[1]) self._crystals[crystal]['sweeps'][sweep]['epoch'] = epoch elif 'REVERSEPHI' == record.split()[0]: self._crystals[crystal]['sweeps'][sweep]['reversephi'] = True elif 'START_END' == record.split()[0]: if 'start_end' not in self._crystals[crystal]['sweeps'][sweep]: start_end = map(int, record.split()[1:]) if len(start_end) != 2: raise RuntimeError, \ 'START_END start end, not "%s"' % record self._crystals[crystal]['sweeps'][sweep]['start_end'] = start_end elif 'EXCLUDE' == record.split()[0]: if record.split()[1].upper() == 'ICE': self._crystals[crystal]['sweeps'][sweep]['ice'] = True else: excluded_region = map(float, record.split()[1:]) if len(excluded_region) != 2: raise RuntimeError, \ 'EXCLUDE upper lower, not "%s". \ eg. EXCLUDE 2.28 2.22' % record if excluded_region[0] <= excluded_region[1]: raise RuntimeError, \ 'EXCLUDE upper lower, where upper \ must be greater than lower (not "%s").\n\ eg. EXCLUDE 2.28 2.22' % record self._crystals[crystal]['sweeps'][sweep]['excluded_regions'].append( excluded_region) else: key = record.split()[0] value = record.replace(key, '').strip() self._crystals[crystal]['sweeps'][sweep][key] = value i += 1 record = crystal_records[i] # now look for one-record things if 'SCALED_MERGED_REFLECTION_FILE' in record: self._crystals[crystal][ 'scaled_merged_reflection_file'] = \ record.replace('SCALED_MERGED_REFLECTION_FILE', '').strip() if 'REFERENCE_REFLECTION_FILE' in record: self._crystals[crystal][ 'reference_reflection_file'] = \ record.replace('REFERENCE_REFLECTION_FILE', '').strip() if 'FREER_FILE' in record: # free file also needs to be used for indexing reference to # make any sense at all... self._crystals[crystal][ 'freer_file'] = record.replace('FREER_FILE', '').strip() self._crystals[crystal][ 'reference_reflection_file'] = \ record.replace('FREER_FILE', '').strip() # user assigned spacegroup and cell constants if 'USER_SPACEGROUP' in record: self._crystals[crystal][ 'user_spacegroup'] = record.replace( 'USER_SPACEGROUP', '').strip() if 'USER_CELL' in record: self._crystals[crystal][ 'user_cell'] = tuple(map(float, record.split()[1:]))
def as_json(self, filename=None, compact=False, split=False): ''' Dump experiment list as json ''' import json from os.path import splitext from libtbx.containers import OrderedDict # Get the dictionary and get the JSON string dictionary = self._experiment_list.to_dict() # Split into separate files if filename is not None and split: # Get lists of models by filename basepath = splitext(filename)[0] ilist = [('%s_imageset_%d.json' % (basepath, i), d) for i, d in enumerate(dictionary['imageset'])] blist = [('%s_beam_%d.json' % (basepath, i), d) for i, d in enumerate(dictionary['beam'])] dlist = [('%s_detector_%d.json' % (basepath, i), d) for i, d in enumerate(dictionary['detector'])] glist = [('%s_goniometer_%d.json' % (basepath, i), d) for i, d in enumerate(dictionary['goniometer'])] slist = [('%s_scan_%d.json' % (basepath, i), d) for i, d in enumerate(dictionary['scan'])] clist = [('%s_crystal_%d.json' % (basepath, i), d) for i, d in enumerate(dictionary['crystal'])] plist = [('%s_profile_%d.json' % (basepath, i), d) for i, d in enumerate(dictionary['profile'])] scalelist = [('%s_scaling_model_%d.json' % (basepath, i), d) for i, d in enumerate(dictionary['scaling_model'])] # Get the list of experiments edict = OrderedDict([('__id__', 'ExperimentList'), ('experiment', dictionary['experiment'])]) # Set paths rather than indices for e in edict['experiment']: if 'imageset' in e: e['imageset'] = ilist[e['imageset']][0] if 'beam' in e: e['beam'] = blist[e['beam']][0] if 'detector' in e: e['detector'] = dlist[e['detector']][0] if 'goniometer' in e: e['goniometer'] = glist[e['goniometer']][0] if 'scan' in e: e['scan'] = slist[e['scan']][0] if 'crystal' in e: e['crystal'] = clist[e['crystal']][0] if 'profile' in e: e['profile'] = plist[e['profile']][0] if 'scaling_model' in e: e['scaling_model'] = scalelist[e['scaling_model']][0] to_write = ilist + blist + dlist + glist + \ slist + clist + plist + scalelist + [(filename, edict)] else: to_write = [(filename, dictionary)] for fname, obj in to_write: if compact: text = json.dumps(obj, separators=(',', ':'), ensure_ascii=True) else: text = json.dumps(obj, indent=2, ensure_ascii=True) # If a filename is set then dump to file otherwise return string if fname is not None: with open(fname, 'w') as outfile: outfile.write(text) else: return text
class cif(DictMixin): def __init__(self, blocks=None): if blocks is not None: self.blocks = OrderedDict(blocks) else: self.blocks = OrderedDict() self.keys_lower = dict([(key.lower(), key) for key in self.blocks.keys()]) def __setitem__(self, key, value): assert isinstance(value, block) if not re.match(tag_re, '_'+key): raise Sorry("%s is not a valid data block name" %key) self.blocks[key] = value self.keys_lower[key.lower()] = key def get(self, key, default=None): key_lower = self.keys_lower.get(key.lower()) if (key_lower is None): return default return self.blocks.get(key_lower, default) def __getitem__(self, key): result = self.get(key) if (result is None): raise KeyError('Unknown CIF data block name: "%s"' % key) return result def __delitem__(self, key): del self.blocks[self.keys_lower[key.lower()]] del self.keys_lower[key.lower()] def keys(self): return self.blocks.keys() def __repr__(self): return repr(OrderedDict(self.iteritems())) def __copy__(self): return cif(self.blocks.copy()) copy = __copy__ def __deepcopy__(self, memo): return cif(copy.deepcopy(self.blocks, memo)) def deepcopy(self): return copy.deepcopy(self) def show(self, out=None, indent=" ", indent_row=None, data_name_field_width=34, loop_format_strings=None): if out is None: out = sys.stdout for name, block in self.items(): print >> out, "data_%s" %name block.show( out=out, indent=indent, indent_row=indent_row, data_name_field_width=data_name_field_width, loop_format_strings=loop_format_strings) def __str__(self): s = StringIO() self.show(out=s) return s.getvalue() def validate(self, dictionary, show_warnings=True, error_handler=None, out=None): if out is None: out = sys.stdout from iotbx.cif import validation errors = {} if error_handler is None: error_handler = validation.ErrorHandler() for key, block in self.blocks.iteritems(): error_handler = error_handler.__class__() dictionary.set_error_handler(error_handler) block.validate(dictionary) errors.setdefault(key, error_handler) if error_handler.error_count or error_handler.warning_count: error_handler.show(show_warnings=show_warnings, out=out) return error_handler def sort(self, recursive=False, key=None, reverse=False): self.blocks = OrderedDict(sorted(self.blocks.items(), key=key, reverse=reverse)) if recursive: for b in self.blocks.values(): b.sort(recursive=recursive, reverse=reverse)
def sort(self, key=None, reverse=False): self._columns = OrderedDict( sorted(self._columns.items(), key=key, reverse=reverse))
def __init__(self, blocks=None): if blocks is not None: self.blocks = OrderedDict(blocks) else: self.blocks = OrderedDict() self.keys_lower = dict([(key.lower(), key) for key in self.blocks.keys()])
def __init__(self, imageset, size=10): self.imageset = imageset self.size = size self._image_data = OrderedDict()
def add_miller_array(self, array, array_type=None, column_name=None, column_names=None): """ Accepts a miller array, and one of array_type, column_name or column_names. """ assert [array_type, column_name, column_names].count(None) == 2 if array_type is not None: assert array_type in ('calc', 'meas') elif column_name is not None: column_names = [column_name] if array.is_complex_array(): if column_names is None: column_names = [ self.prefix + 'F_' + array_type, self.prefix + 'phase_' + array_type ] else: assert len(column_names) == 2 if (('_A_' in column_names[0] and '_B_' in column_names[1]) or ('.A_' in column_names[0] and '.B_' in column_names[1])): data = [ flex.real(array.data()).as_string(), flex.imag(array.data()).as_string() ] else: data = [ flex.abs(array.data()).as_string(), array.phases(deg=True).data().as_string() ] elif array.is_hendrickson_lattman_array(): if column_names is None: column_names = [ self.prefix + 'HL_%s_iso' % abcd for abcd in 'ABCD' ] else: assert len(column_names) == 4 data = [d.as_string() for d in array.data().as_abcd()] else: if array_type is not None: if array.is_xray_intensity_array(): obs_ext = 'squared_' else: obs_ext = '' column_names = [self.prefix + 'F_' + obs_ext + array_type] if array.sigmas() is not None: column_names.append(self.prefix + 'F_' + obs_ext + 'sigma') if isinstance(array.data(), flex.std_string): data = [array.data()] else: data = [array.data().as_string()] if array.anomalous_flag(): if ((array.sigmas() is not None and len(column_names) == 4) or (array.sigmas() is None and len(column_names) == 2)): data = [] asu, matches = array.match_bijvoet_mates() for anomalous_sign in ("+", "-"): sel = matches.pairs_hemisphere_selection( anomalous_sign) sel.extend( matches.singles_hemisphere_selection( anomalous_sign)) if (anomalous_sign == "+"): indices = asu.indices().select(sel) hemisphere_column_names = column_names[:len( column_names) // 2] else: indices = -asu.indices().select(sel) hemisphere_column_names = column_names[ len(column_names) // 2:] hemisphere_data = asu.data().select(sel) hemisphere_array = miller.array( miller.set(array.crystal_symmetry(), indices), hemisphere_data) if array.sigmas() is not None: hemisphere_array.set_sigmas( asu.sigmas().select(sel)) if self.refln_loop is None: # then this is the first array to be added to the loop, # hack so we don't have both hemispheres of indices self.indices = indices self.add_miller_array( hemisphere_array, column_names=hemisphere_column_names) return if array.sigmas() is not None and len(column_names) == 2: data.append(array.sigmas().as_string()) if not (self.indices.size() == array.indices().size() and self.indices.all_eq(array.indices())): from cctbx.miller import match_indices other_indices = array.indices().deep_copy() match = match_indices(self.indices, other_indices) if match.singles(0).size(): # array is missing some reflections indices that already appear in the loop # therefore pad the data with '?' values other_indices.extend( self.indices.select(match.single_selection(0))) for d in data: d.extend( flex.std_string(['?'] * (other_indices.size() - d.size()))) for d in data: assert d.size() == other_indices.size() match = match_indices(self.indices, other_indices) if match.singles(1).size(): # this array contains some reflections that are not already present in the # cif loop, therefore need to add rows of '?' values single_indices = other_indices.select( match.single_selection(1)) self.indices.extend(single_indices) n_data_columns = len(self.refln_loop) - 3 for hkl in single_indices: row = list(hkl) + ['?'] * n_data_columns self.refln_loop.add_row(row) match = match_indices(self.indices, other_indices) match = match_indices(self.indices, other_indices) perm = match.permutation() data = [d.select(perm) for d in data] if self.refln_loop is None: self.refln_loop = miller_indices_as_cif_loop(self.indices, prefix=self.prefix) columns = OrderedDict(zip(column_names, data)) for key in columns: assert key not in self.refln_loop self.refln_loop.add_columns(columns)
def sort(self, recursive=False, key=None, reverse=False): self.blocks = OrderedDict(sorted(self.blocks.items(), key=key, reverse=reverse)) if recursive: for b in self.blocks.values(): b.sort(recursive=recursive, reverse=reverse)
class loop(DictMixin): def __init__(self, header=None, data=None): self._columns = OrderedDict() self.keys_lower = {} if header is not None: for key in header: self.setdefault(key, flex.std_string()) if data is not None: # the number of data items must be an exact multiple of the number of headers assert len(data) % len(header) == 0, "Wrong number of data items for loop" n_rows = len(data)//len(header) n_columns = len(header) for i in range(n_rows): self.add_row([data[i*n_columns+j] for j in range(n_columns)]) elif header is None and data is not None: assert isinstance(data, dict) or isinstance(data, OrderedDict) self.add_columns(data) self.keys_lower = dict( [(key.lower(), key) for key in self._columns.keys()]) def __setitem__(self, key, value): if not re.match(tag_re, key): raise Sorry("%s is not a valid data name" %key) if len(self) > 0: assert len(value) == self.size() if not isinstance(value, flex.std_string): for flex_numeric_type in (flex.int, flex.double): if isinstance(value, flex_numeric_type): value = value.as_string() else: try: value = flex_numeric_type(value).as_string() except TypeError: continue else: break if not isinstance(value, flex.std_string): value = flex.std_string(value) # value must be a mutable type assert hasattr(value, '__setitem__') self._columns[key] = value self.keys_lower[key.lower()] = key def __getitem__(self, key): return self._columns[self.keys_lower[key.lower()]] def __delitem__(self, key): del self._columns[self.keys_lower[key.lower()]] del self.keys_lower[key.lower()] def keys(self): return self._columns.keys() def __repr__(self): return repr(OrderedDict(self.iteritems())) def name(self): return common_substring(self.keys()).rstrip('_').rstrip('.') def size(self): size = 0 for column in self.values(): size = max(size, len(column)) return size def n_rows(self): return self.size() def n_columns(self): return len(self.keys()) def add_row(self, row, default_value="?"): if isinstance(row, dict): for key in self: if key in row: self[key].append(str(row[key])) else: self[key].append(default_value) else: assert len(row) == len(self) for i, key in enumerate(self): self[key].append(str(row[i])) def add_column(self, key, values): if self.size() != 0: assert len(values) == self.size() self[key] = values self.keys_lower[key.lower()] = key def add_columns(self, columns): assert isinstance(columns, dict) or isinstance(columns, OrderedDict) for key, value in columns.iteritems(): self.add_column(key, value) def update_column(self, key, values): assert type(key)==type(""), "first argument is column key string" if self.size() != 0: assert len(values) == self.size(), "len(values) %d != self.size() %d" % ( len(values), self.size(), ) self[key] = values self.keys_lower[key.lower()] = key def delete_row(self, index): assert index < self.n_rows() for column in self._columns.values(): del column[index] def __copy__(self): new = loop() new._columns = self._columns.copy() new.keys_lower = self.keys_lower.copy() return new copy = __copy__ def __deepcopy__(self, memo): new = loop() new._columns = copy.deepcopy(self._columns, memo) new.keys_lower = copy.deepcopy(self.keys_lower, memo) return new def deepcopy(self): return copy.deepcopy(self) def show(self, out=None, indent=" ", indent_row=None, fmt_str=None, align_columns=True): assert self.n_rows() > 0 and self.n_columns() > 0, "keys: %s %d %d" % ( self.keys(), self.n_rows(), self.n_columns(), ) if out is None: out = sys.stdout if indent_row is None: indent_row = indent assert indent.strip() == "" assert indent_row.strip() == "" print >> out, "loop_" for k in self.keys(): print >> out, indent + k values = self._columns.values() range_len_values = range(len(values)) if fmt_str is not None: # Pretty printing: # The user is responsible for providing a valid format string. # Values are not quoted - it is the user's responsibility to place # appropriate quotes in the format string if a particular value may # contain spaces. values = copy.deepcopy(values) for i, v in enumerate(values): for flex_numeric_type in (flex.int, flex.double): if not isinstance(v, flex_numeric_type): try: values[i] = flex_numeric_type(v) except ValueError: continue else: break if fmt_str is None: fmt_str = indent_row + ' '.join(["%s"]*len(values)) for i in range(self.size()): print >> out, fmt_str % tuple([values[j][i] for j in range_len_values]) elif align_columns: fmt_str = [] for i, (k, v) in enumerate(self.iteritems()): for i_v in range(v.size()): v[i_v] = format_value(v[i_v]) # exclude and semicolon text fields from column width calculation v_ = flex.std_string(item for item in v if "\n" not in item) width = v_.max_element_length() # See if column contains only number, '.' or '?' # right-align numerical columns, left-align everything else v = v.select(~( (v == ".") | (v == "?") )) try: flex.double(v) except ValueError: width *= -1 fmt_str.append("%%%is" %width) fmt_str = indent_row + " ".join(fmt_str) for i in range(self.size()): print >> out, (fmt_str % tuple([values[j][i] for j in range_len_values])).rstrip() else: for i in range(self.size()): values_to_print = [format_value(values[j][i]) for j in range_len_values] print >> out, ' '.join([indent] + values_to_print) def __str__(self): s = StringIO() self.show(out=s) return s.getvalue() def iterrows(self): """ Warning! Still super-slow! """ keys = self.keys() s_values = self.values() range_len_self = range(len(self)) # range is 1% faster than xrange in this particular place. # tuple (s_values...) is slightly faster than list for j in range(self.size()): yield OrderedDict(zip(keys, (s_values[i][j] for i in range_len_self))) def find_row(self, kv_dict): self_keys = self.keys() for k in kv_dict.keys(): assert k in self_keys result = [] s_values = self.values() range_len_self = range(len(self)) for i in range(self.size()): goodrow = True for k, v in kv_dict.iteritems(): if self[k][i] != v: goodrow = False break if goodrow: result.append(OrderedDict(zip(self_keys, [s_values[j][i] for j in range_len_self]))) return result def sort(self, key=None, reverse=False): self._columns = OrderedDict( sorted(self._columns.items(), key=key, reverse=reverse)) def order(self, order): def _cmp_key(k1, k2): for i, o in enumerate(order): if k1==o: break for j, o in enumerate(order): if k2==o: break if k1<k2: return -1 return 1 keys = self._columns.keys() keys.sort(_cmp_key) tmp = OrderedDict() for o in order: tmp[o]=self._columns[o] self._columns = tmp def __eq__(self, other): if (len(self) != len(other) or self.size() != other.size() or self.keys() != other.keys()): return False for value, other_value in zip(self.values(), other.values()): if (value == other_value).count(True) != len(value): return False return True
def __init__(self, unmerged_intensities, batches_all, n_bins=20, d_min=None, id_to_batches=None): intensities = OrderedDict() individual_merged_intensities = OrderedDict() batches = OrderedDict() #m_isym = OrderedDict() sel = unmerged_intensities.sigmas() > 0 unmerged_intensities = unmerged_intensities.select(sel) batches_all = batches_all.select(sel) if id_to_batches is None: run_id_to_batch_id = None run_id = 0 unique_batches = sorted(set(batches_all.data())) last_batch = None run_start = unique_batches[0] for i, batch in enumerate(unique_batches): if last_batch is not None and batch > (last_batch + 1) or (i+1) == len(unique_batches): batch_sel = (batches_all.data() >= run_start) & (batches_all.data() <= last_batch) batches[run_id] = batches_all.select(batch_sel).resolution_filter(d_min=d_min) intensities[run_id] = unmerged_intensities.select(batch_sel).resolution_filter(d_min=d_min) individual_merged_intensities[run_id] = intensities[run_id].merge_equivalents().array() Debug.write("run %i batch %i to %i" %(run_id+1, run_start, last_batch)) run_id += 1 run_start = batch last_batch = batch else: run_id_to_batch_id = OrderedDict() run_id = 0 for batch_id, batch_range in id_to_batches.iteritems(): run_id_to_batch_id[run_id] = batch_id run_start, last_batch = batch_range batch_sel = (batches_all.data() >= run_start) & (batches_all.data() <= last_batch) batches[run_id] = batches_all.select(batch_sel).resolution_filter(d_min=d_min) intensities[run_id] = unmerged_intensities.select(batch_sel).resolution_filter(d_min=d_min) individual_merged_intensities[run_id] = intensities[run_id].merge_equivalents().array() Debug.write("run %i batch %i to %i" %(run_id+1, run_start, last_batch)) run_id += 1 unmerged_intensities.setup_binner(n_bins=n_bins) unmerged_intensities.show_summary() #result = unmerged_intensities.cc_one_half(use_binning=True) #result.show() self.unmerged_intensities = unmerged_intensities self.merged_intensities = unmerged_intensities.merge_equivalents().array() self.intensities = intensities self.individual_merged_intensities = individual_merged_intensities self.batches = batches if run_id_to_batch_id is not None: labels = run_id_to_batch_id.values() else: labels = None racc = self.relative_anomalous_cc() if racc is not None: self.plot_relative_anomalous_cc(racc, labels=labels) correlation_matrix, linkage_matrix = self.compute_correlation_coefficient_matrix() self._cluster_dict = self.to_dict(correlation_matrix, linkage_matrix) self.plot_cc_matrix(correlation_matrix, linkage_matrix, labels=labels) self.write_output()
def __repr__(self): return repr(OrderedDict(self.iteritems()))
class multi_crystal_analysis(object): def __init__(self, unmerged_intensities, batches_all, n_bins=20, d_min=None, id_to_batches=None): sel = unmerged_intensities.sigmas() > 0 unmerged_intensities = unmerged_intensities.select(sel) batches_all = batches_all.select(sel) unmerged_intensities.setup_binner(n_bins=n_bins) unmerged_intensities.show_summary() self.unmerged_intensities = unmerged_intensities self.merged_intensities = unmerged_intensities.merge_equivalents().array() separate = separate_unmerged( unmerged_intensities, batches_all, id_to_batches=id_to_batches) self.intensities = separate.intensities self.batches = separate.batches run_id_to_batch_id = separate.run_id_to_batch_id self.individual_merged_intensities = OrderedDict() for k in self.intensities.keys(): self.intensities[k] = self.intensities[k].resolution_filter(d_min=d_min) self.batches[k] = self.batches[k].resolution_filter(d_min=d_min) self.individual_merged_intensities[k] = self.intensities[k].merge_equivalents().array() if run_id_to_batch_id is not None: labels = run_id_to_batch_id.values() else: labels = None racc = self.relative_anomalous_cc() if racc is not None: self.plot_relative_anomalous_cc(racc, labels=labels) correlation_matrix, linkage_matrix = self.compute_correlation_coefficient_matrix() self._cluster_dict = self.to_dict(correlation_matrix, linkage_matrix) self.plot_cc_matrix(correlation_matrix, linkage_matrix, labels=labels) self.write_output() def to_dict(self, correlation_matrix, linkage_matrix): from scipy.cluster import hierarchy tree = hierarchy.to_tree(linkage_matrix, rd=False) leaves_list = hierarchy.leaves_list(linkage_matrix) d = {} # http://w3facility.org/question/scipy-dendrogram-to-json-for-d3-js-tree-visualisation/ # https://gist.github.com/mdml/7537455 def add_node(node): if node.is_leaf(): return cluster_id = node.get_id() - len(linkage_matrix) - 1 row = linkage_matrix[cluster_id] d[cluster_id+1] = { 'datasets': [i+1 for i in sorted(node.pre_order())], 'height': row[2], } # Recursively add the current node's children if node.left: add_node(node.left) if node.right: add_node(node.right) add_node(tree) return d def relative_anomalous_cc(self): if self.unmerged_intensities.anomalous_flag(): d_min = min([ma.d_min() for ma in self.intensities.values()]) racc = flex.double() full_set_anom_diffs = self.merged_intensities.anomalous_differences() for i_wedge in self.individual_merged_intensities.keys(): ma_i = self.individual_merged_intensities[i_wedge].resolution_filter(d_min=d_min) anom_i = ma_i.anomalous_differences() anom_cc = anom_i.correlation(full_set_anom_diffs, assert_is_similar_symmetry=False).coefficient() racc.append(anom_cc) return racc def plot_relative_anomalous_cc(self, racc, labels=None): perm = flex.sort_permutation(racc) fig = pyplot.figure(dpi=1200, figsize=(16,12)) pyplot.bar(range(len(racc)), list(racc.select(perm))) if labels is None: labels = ["%.0f" %(j+1) for j in perm] assert len(labels) == len(racc) pyplot.xticks([i+0.5 for i in range(len(racc))], labels) locs, labels = pyplot.xticks() pyplot.setp(labels, rotation=70) pyplot.xlabel("Dataset") pyplot.ylabel("Relative anomalous correlation coefficient") fig.savefig("racc.png") def compute_correlation_coefficient_matrix(self): from scipy.cluster import hierarchy import scipy.spatial.distance as ssd correlation_matrix = flex.double( flex.grid(len(self.intensities), len(self.intensities))) d_min = min([ma.d_min() for ma in self.intensities.values()]) for i_wedge in self.individual_merged_intensities.keys(): for j_wedge in self.individual_merged_intensities.keys(): if j_wedge < i_wedge: continue ma_i = self.individual_merged_intensities[i_wedge].resolution_filter(d_min=d_min) ma_j = self.individual_merged_intensities[j_wedge].resolution_filter(d_min=d_min) cc_ij = ma_i.correlation(ma_j).coefficient() correlation_matrix[(i_wedge,j_wedge)] = cc_ij correlation_matrix[j_wedge,i_wedge] = cc_ij diffraction_dissimilarity = 1-correlation_matrix dist_mat = diffraction_dissimilarity.as_numpy_array() # convert the redundant n*n square matrix form into a condensed nC2 array dist_mat = ssd.squareform(dist_mat) # distArray[{n choose 2}-{n-i choose 2} + (j-i-1)] is the distance between points i and j method = ['single', 'complete', 'average', 'weighted'][2] linkage_matrix = hierarchy.linkage(dist_mat, method=method) return correlation_matrix, linkage_matrix def plot_cc_matrix(self, correlation_matrix, linkage_matrix, labels=None): from scipy.cluster import hierarchy ind = hierarchy.fcluster(linkage_matrix, t=0.05, criterion='distance') # Compute and plot dendrogram. fig = pyplot.figure(dpi=1200, figsize=(16,12)) axdendro = fig.add_axes([0.09,0.1,0.2,0.8]) Y = linkage_matrix Z = hierarchy.dendrogram(Y, color_threshold=0.05, orientation='right') axdendro.set_xticks([]) axdendro.set_yticks([]) # Plot distance matrix. axmatrix = fig.add_axes([0.3,0.1,0.6,0.8]) index = Z['leaves'] D = correlation_matrix.as_numpy_array() D = D[index,:] D = D[:,index] im = axmatrix.matshow(D, aspect='auto', origin='lower') axmatrix.yaxis.tick_right() if labels is not None: axmatrix.xaxis.tick_bottom() axmatrix.set_xticks(list(range(len(labels)))) axmatrix.set_xticklabels([labels[i] for i in index], rotation=70) axmatrix.yaxis.set_ticks([]) # Plot colorbar. axcolor = fig.add_axes([0.91,0.1,0.02,0.8]) pyplot.colorbar(im, cax=axcolor) # Display and save figure. fig.savefig('correlation_matrix.png') fig.clear() fig = pyplot.figure(dpi=1200, figsize=(16,12)) if labels is None: labels = ['%i' %(i+1) for i in range(len(self.intensities))] ddict = hierarchy.dendrogram(linkage_matrix, #truncate_mode='lastp', color_threshold=0.05, labels=labels, #leaf_rotation=90, show_leaf_counts=False) locs, labels = pyplot.xticks() pyplot.setp(labels, rotation=70) fig.savefig('dendrogram.png') import copy y2_dict = scipy_dendrogram_to_plotly_json(ddict) # above heatmap x2_dict = copy.deepcopy(y2_dict) # left of heatmap, rotated for d in y2_dict['data']: d['yaxis'] = 'y2' d['xaxis'] = 'x2' for d in x2_dict['data']: x = d['x'] y = d['y'] d['x'] = y d['y'] = x d['yaxis'] = 'y3' d['xaxis'] = 'x3' ccdict = { 'data': [{ 'name': 'correlation_matrix', 'x': list(range(D.shape[0])), 'y': list(range(D.shape[1])), 'z': D.tolist(), 'type': 'heatmap', 'colorbar': { 'title': 'Correlation coefficient', 'titleside': 'right', #'x': 0.96, #'y': 0.9, #'titleside': 'top', #'xanchor': 'right', 'xpad': 0, #'yanchor': 'top' }, 'colorscale': 'Jet', 'xaxis': 'x', 'yaxis': 'y', }], 'layout': { 'autosize': False, 'bargap': 0, 'height': 1000, 'hovermode': 'closest', 'margin': { 'r': 20, 't': 50, 'autoexpand': True, 'l': 20 }, 'showlegend': False, 'title': 'Dendrogram Heatmap', 'width': 1000, 'xaxis': { 'domain': [0.2, 0.9], 'mirror': 'allticks', 'showgrid': False, 'showline': False, 'showticklabels': True, 'tickmode': 'array', 'ticks': '', 'ticktext': y2_dict['layout']['xaxis']['ticktext'], 'tickvals': list(range(len(y2_dict['layout']['xaxis']['ticktext']))), 'tickangle': 300, 'title': '', 'type': 'linear', 'zeroline': False }, 'yaxis': { 'domain': [0, 0.78], 'anchor': 'x', 'mirror': 'allticks', 'showgrid': False, 'showline': False, 'showticklabels': True, 'tickmode': 'array', 'ticks': '', 'ticktext': y2_dict['layout']['xaxis']['ticktext'], 'tickvals': list(range(len(y2_dict['layout']['xaxis']['ticktext']))), 'title': '', 'type': 'linear', 'zeroline': False }, 'xaxis2': { 'domain': [0.2, 0.9], 'anchor': 'y2', 'showgrid': False, 'showline': False, 'showticklabels': False, 'zeroline': False }, 'yaxis2': { 'domain': [0.8, 1], 'anchor': 'x2', 'showgrid': False, 'showline': False, 'zeroline': False }, 'xaxis3': { 'domain': [0.0, 0.1], 'anchor': 'y3', 'range': [max(max(d['x']) for d in x2_dict['data']), 0], 'showgrid': False, 'showline': False, 'tickangle': 300, 'zeroline': False }, 'yaxis3': { 'domain': [0, 0.78], 'anchor': 'x3', 'showgrid': False, 'showline': False, 'showticklabels': False, 'zeroline': False }, } } d = ccdict d['data'].extend(y2_dict['data']) d['data'].extend(x2_dict['data']) d['clusters'] = self._cluster_dict import json with open('intensity_clusters.json', 'wb') as f: json.dump(d, f, indent=2) def write_output(self): rows = [["cluster_id", "# datasets", "height", "datasets"]] for cid in sorted(self._cluster_dict.keys()): cluster = self._cluster_dict[cid] datasets = cluster['datasets'] rows.append([str(cid), str(len(datasets)), '%.2f' %cluster['height'], ' '.join(['%s'] * len(datasets)) % tuple(datasets)]) with open('intensity_clustering.txt', 'wb') as f: from libtbx import table_utils print >> f, table_utils.format( rows, has_header=True, prefix="|", postfix="|")
def __init__(self, cif_block, base_array_info=None, wavelengths=None): crystal_symmetry_builder.__init__(self, cif_block) if base_array_info is not None: self.crystal_symmetry = self.crystal_symmetry.join_symmetry( other_symmetry=base_array_info.crystal_symmetry_from_file, force=True) self._arrays = OrderedDict() if (wavelengths is None) : wavelengths = {} if base_array_info is None: base_array_info = miller.array_info(source_type="cif") refln_containing_loops = self.get_miller_indices_containing_loops() for self.indices, refln_loop in refln_containing_loops: self.wavelength_id_array = None self.crystal_id_array = None self.scale_group_array = None wavelength_ids = [None] crystal_ids = [None] scale_groups = [None] for key, value in refln_loop.iteritems(): # need to get these arrays first if (key.endswith('wavelength_id') or key.endswith('crystal_id') or key.endswith('scale_group_code')): data = as_int_or_none_if_all_question_marks(value, column_name=key) if data is None: continue counts = data.counts() if key.endswith('wavelength_id'): wavelength_ids = counts.keys() if len(counts) == 1: continue array = miller.array( miller.set(self.crystal_symmetry, self.indices).auto_anomalous(), data) if key.endswith('wavelength_id'): self.wavelength_id_array = array wavelength_ids = counts.keys() elif key.endswith('crystal_id'): self.crystal_id_array = array crystal_ids = counts.keys() elif key.endswith('scale_group_code'): self.scale_group_array = array scale_groups = counts.keys() for label, value in sorted(refln_loop.items()): for w_id in wavelength_ids: for crys_id in crystal_ids: for scale_group in scale_groups: if 'index_' in label: continue key = label labels = [label] wavelength = None if (key.endswith('wavelength_id') or key.endswith('crystal_id') or key.endswith('scale_group_code')): w_id = None crys_id = None scale_group = None key_suffix = '' if w_id is not None: key_suffix += '_%i' %w_id labels.insert(0, "wavelength_id=%i" %w_id) wavelength = wavelengths.get(w_id, None) if crys_id is not None: key_suffix += '_%i' %crys_id labels.insert(0, "crystal_id=%i" %crys_id) if scale_group is not None: key_suffix += '_%i' %scale_group labels.insert(0, "scale_group_code=%i" %scale_group) key += key_suffix sigmas = None if key in self._arrays: continue array = self.flex_std_string_as_miller_array( value, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) if array is None: continue if '_sigma' in key: sigmas_label = label key = None for suffix in ('', '_meas', '_calc'): if sigmas_label.replace('_sigma', suffix) in refln_loop: key = sigmas_label.replace('_sigma', suffix) + key_suffix break if key is None: key = sigmas_label + key_suffix elif key in self._arrays and self._arrays[key].sigmas() is None: sigmas = array array = self._arrays[key] check_array_sizes(array, sigmas, key, sigmas_label) sigmas = as_flex_double(sigmas, sigmas_label) array.set_sigmas(sigmas.data()) info = array.info() array.set_info( info.customized_copy(labels=info.labels+[sigmas_label], wavelength=wavelength)) continue elif 'PHWT' in key: phwt_label = label fwt_label = label.replace('PHWT', 'FWT') if fwt_label not in refln_loop: continue phwt_array = array if fwt_label in self._arrays: array = self._arrays[fwt_label] check_array_sizes(array, phwt_array, fwt_label, phwt_label) phases = as_flex_double(phwt_array, phwt_label) info = array.info() array = array.phase_transfer(phases, deg=True) array.set_info( info.customized_copy(labels=info.labels+[phwt_label])) self._arrays[fwt_label] = array continue elif 'HL_' in key: hl_letter = key[key.find('HL_')+3] hl_key = 'HL_' + hl_letter key = key.replace(hl_key, 'HL_A') if key in self._arrays: continue # this array is already dealt with hl_labels = [label.replace(hl_key, 'HL_'+letter) for letter in 'ABCD'] hl_keys = [key.replace(hl_key, 'HL_'+letter) for letter in 'ABCD'] hl_values = [cif_block.get(hl_key) for hl_key in hl_labels] if hl_values.count(None) == 0: selection = self.get_selection( hl_values[0], wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) hl_values = [as_double_or_none_if_all_question_marks( hl.select(selection), column_name=lab) for hl, lab in zip(hl_values, hl_labels)] array = miller.array(miller.set( self.crystal_symmetry, self.indices.select(selection) ).auto_anomalous(), flex.hendrickson_lattman(*hl_values)) labels = labels[:-1]+hl_labels elif '.B_' in key or '_B_' in key: if '.B_' in key: key, key_b = key.replace('.B_', '.A_'), key label, label_b = label.replace('.B_', '.A_'), label elif '_B_' in key: key, key_b = key.replace('_B', '_A'), key label, label_b = label.replace('_B', '_A'), label if key in refln_loop and key_b in refln_loop: b_part = array.data() if key in self._arrays: info = self._arrays[key].info() a_part = self._arrays[key].data() self._arrays[key] = self._arrays[key].array( data=flex.complex_double(a_part, b_part)) self._arrays[key].set_info( info.customized_copy(labels=info.labels+[key_b])) continue elif ('phase_' in key and not "_meas" in key and self.crystal_symmetry.space_group() is not None): alt_key1 = label.replace('phase_', 'F_') alt_key2 = alt_key1 + '_au' if alt_key1 in refln_loop: phase_key = label key = alt_key1+key_suffix elif alt_key2 in refln_loop: phase_key = label key = alt_key2+key_suffix else: phase_key = None if phase_key is not None: phases = array.data() if key in self._arrays: array = self._arrays[key] array = as_flex_double(array, key) check_array_sizes(array, phases, key, phase_key) info = self._arrays[key].info() self._arrays[key] = array.phase_transfer(phases, deg=True) self._arrays[key].set_info( info.customized_copy(labels=info.labels+[phase_key])) else: array = self.flex_std_string_as_miller_array( refln_loop[label], wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) check_array_sizes(array, phases, key, phase_key) array.phase_transfer(phases, deg=True) labels = labels+[label, phase_key] if base_array_info.labels is not None: labels = base_array_info.labels + labels def rstrip_substrings(string, substrings): for substr in substrings: if substr == '': continue if string.endswith(substr): string = string[:-len(substr)] return string # determine observation type stripped_key = rstrip_substrings( key, [key_suffix, '_au', '_meas', '_calc', '_plus', '_minus']) if (stripped_key.endswith('F_squared') or stripped_key.endswith('intensity') or stripped_key.endswith('.I') or stripped_key.endswith('_I')) and ( array.is_real_array() or array.is_integer_array()): array.set_observation_type_xray_intensity() elif (stripped_key.endswith('F') and ( array.is_real_array() or array.is_integer_array())): array.set_observation_type_xray_amplitude() if (array.is_xray_amplitude_array() or array.is_xray_amplitude_array()): # e.g. merge_equivalents treats integer arrays differently, so must # convert integer observation arrays here to be safe if isinstance(array.data(), flex.int): array = array.customized_copy(data=array.data().as_double()) array.set_info(base_array_info.customized_copy(labels=labels)) if (array.is_xray_amplitude_array() or array.is_xray_amplitude_array()): info = array.info() array.set_info(info.customized_copy(wavelength=wavelength)) self._arrays.setdefault(key, array) for key, array in self._arrays.copy().iteritems(): if ( key.endswith('_minus') or '_minus_' in key or key.endswith('_plus') or '_plus_' in key): if '_minus' in key: minus_key = key plus_key = key.replace('_minus', '_plus') elif '_plus' in key: plus_key = key minus_key = key.replace('_plus', '_minus') if plus_key in self._arrays and minus_key in self._arrays: plus_array = self._arrays.pop(plus_key) minus_array = self._arrays.pop(minus_key) minus_array = minus_array.customized_copy( indices=-minus_array.indices()).set_info(minus_array.info()) array = plus_array.concatenate( minus_array, assert_is_similar_symmetry=False) array = array.customized_copy(anomalous_flag=True) array.set_info(minus_array.info().customized_copy( labels=list( OrderedSet(plus_array.info().labels+minus_array.info().labels)))) array.set_observation_type(plus_array.observation_type()) self._arrays.setdefault(key, array) if len(self._arrays) == 0: raise CifBuilderError("No reflection data present in cif block")
def sort(self, recursive=False, key=None, reverse=False): self.blocks = OrderedDict( sorted(self.blocks.items(), key=key, reverse=reverse)) if recursive: for b in self.blocks.values(): b.sort(recursive=recursive, reverse=reverse)