class cif(DictMixin): def __init__(self, blocks=None): if blocks is not None: self.blocks = OrderedDict(blocks) else: self.blocks = OrderedDict() self.keys_lower = dict([(key.lower(), key) for key in self.blocks.keys()]) def __setitem__(self, key, value): assert isinstance(value, block) if not re.match(tag_re, '_' + key): raise Sorry("%s is not a valid data block name" % key) self.blocks[key] = value self.keys_lower[key.lower()] = key def get(self, key, default=None): key_lower = self.keys_lower.get(key.lower()) if (key_lower is None): return default return self.blocks.get(key_lower, default) def __getitem__(self, key): result = self.get(key) if (result is None): raise KeyError('Unknown CIF data block name: "%s"' % key) return result def __delitem__(self, key): del self.blocks[self.keys_lower[key.lower()]] del self.keys_lower[key.lower()] def keys(self): return self.blocks.keys() def __repr__(self): return repr(OrderedDict(self.iteritems())) def __copy__(self): return cif(self.blocks.copy()) copy = __copy__ def __deepcopy__(self, memo): return cif(copy.deepcopy(self.blocks, memo)) def deepcopy(self): return copy.deepcopy(self) def show(self, out=None, indent=" ", indent_row=None, data_name_field_width=34, loop_format_strings=None, align_columns=True): if out is None: out = sys.stdout for name, block in self.items(): print >> out, "data_%s" % name block.show(out=out, indent=indent, indent_row=indent_row, data_name_field_width=data_name_field_width, loop_format_strings=loop_format_strings, align_columns=align_columns) def __str__(self): s = StringIO() self.show(out=s) return s.getvalue() def validate(self, dictionary, show_warnings=True, error_handler=None, out=None): if out is None: out = sys.stdout from iotbx.cif import validation errors = {} if error_handler is None: error_handler = validation.ErrorHandler() for key, block in self.blocks.iteritems(): error_handler = error_handler.__class__() dictionary.set_error_handler(error_handler) block.validate(dictionary) errors.setdefault(key, error_handler) if error_handler.error_count or error_handler.warning_count: error_handler.show(show_warnings=show_warnings, out=out) return error_handler def sort(self, recursive=False, key=None, reverse=False): self.blocks = OrderedDict( sorted(self.blocks.items(), key=key, reverse=reverse)) if recursive: for b in self.blocks.values(): b.sort(recursive=recursive, reverse=reverse)
class loop(DictMixin): def __init__(self, header=None, data=None): self._columns = OrderedDict() self.keys_lower = {} if header is not None: for key in header: self.setdefault(key, flex.std_string()) if data is not None: # the number of data items must be an exact multiple of the number of headers assert len(data) % len( header) == 0, "Wrong number of data items for loop" n_rows = len(data) // len(header) n_columns = len(header) for i in range(n_rows): self.add_row( [data[i * n_columns + j] for j in range(n_columns)]) elif header is None and data is not None: assert isinstance(data, dict) or isinstance(data, OrderedDict) self.add_columns(data) self.keys_lower = dict([(key.lower(), key) for key in self._columns.keys()]) def __setitem__(self, key, value): if not re.match(tag_re, key): raise Sorry("%s is not a valid data name" % key) if len(self) > 0: assert len(value) == self.size() if not isinstance(value, flex.std_string): for flex_numeric_type in (flex.int, flex.double): if isinstance(value, flex_numeric_type): value = value.as_string() else: try: value = flex_numeric_type(value).as_string() except TypeError: continue else: break if not isinstance(value, flex.std_string): value = flex.std_string(value) # value must be a mutable type assert hasattr(value, '__setitem__') self._columns[key] = value self.keys_lower[key.lower()] = key def __getitem__(self, key): return self._columns[self.keys_lower[key.lower()]] def __delitem__(self, key): del self._columns[self.keys_lower[key.lower()]] del self.keys_lower[key.lower()] def keys(self): return self._columns.keys() def __repr__(self): return repr(OrderedDict(self.iteritems())) def name(self): return common_substring(self.keys()).rstrip('_').rstrip('.') def size(self): size = 0 for column in self.values(): size = max(size, len(column)) return size def n_rows(self): return self.size() def n_columns(self): return len(self.keys()) def add_row(self, row, default_value="?"): if isinstance(row, dict): for key in self: if key in row: self[key].append(str(row[key])) else: self[key].append(default_value) else: assert len(row) == len(self) for i, key in enumerate(self): self[key].append(str(row[i])) def add_column(self, key, values): if self.size() != 0: assert len(values) == self.size() self[key] = values self.keys_lower[key.lower()] = key def add_columns(self, columns): assert isinstance(columns, dict) or isinstance(columns, OrderedDict) for key, value in columns.iteritems(): self.add_column(key, value) def update_column(self, key, values): assert type(key) == type(""), "first argument is column key string" if self.size() != 0: assert len( values) == self.size(), "len(values) %d != self.size() %d" % ( len(values), self.size(), ) self[key] = values self.keys_lower[key.lower()] = key def delete_row(self, index): assert index < self.n_rows() for column in self._columns.values(): del column[index] def __copy__(self): new = loop() new._columns = self._columns.copy() new.keys_lower = self.keys_lower.copy() return new copy = __copy__ def __deepcopy__(self, memo): new = loop() new._columns = copy.deepcopy(self._columns, memo) new.keys_lower = copy.deepcopy(self.keys_lower, memo) return new def deepcopy(self): return copy.deepcopy(self) def show(self, out=None, indent=" ", indent_row=None, fmt_str=None, align_columns=True): assert self.n_rows() > 0 and self.n_columns() > 0, "keys: %s %d %d" % ( self.keys(), self.n_rows(), self.n_columns(), ) if out is None: out = sys.stdout if indent_row is None: indent_row = indent assert indent.strip() == "" assert indent_row.strip() == "" print >> out, "loop_" for k in self.keys(): print >> out, indent + k values = self._columns.values() range_len_values = range(len(values)) if fmt_str is not None: # Pretty printing: # The user is responsible for providing a valid format string. # Values are not quoted - it is the user's responsibility to place # appropriate quotes in the format string if a particular value may # contain spaces. values = copy.deepcopy(values) for i, v in enumerate(values): for flex_numeric_type in (flex.int, flex.double): if not isinstance(v, flex_numeric_type): try: values[i] = flex_numeric_type(v) except ValueError: continue else: break if fmt_str is None: fmt_str = indent_row + ' '.join(["%s"] * len(values)) for i in range(self.size()): print >> out, fmt_str % tuple( [values[j][i] for j in range_len_values]) elif align_columns: fmt_str = [] for i, (k, v) in enumerate(self.iteritems()): for i_v in range(v.size()): v[i_v] = format_value(v[i_v]) # exclude and semicolon text fields from column width calculation v_ = flex.std_string(item for item in v if "\n" not in item) width = v_.max_element_length() # See if column contains only number, '.' or '?' # right-align numerical columns, left-align everything else v = v.select(~((v == ".") | (v == "?"))) try: flex.double(v) except ValueError: width *= -1 fmt_str.append("%%%is" % width) fmt_str = indent_row + " ".join(fmt_str) for i in range(self.size()): print >> out, (fmt_str % tuple([values[j][i] for j in range_len_values])).rstrip() else: for i in range(self.size()): values_to_print = [ format_value(values[j][i]) for j in range_len_values ] print >> out, ' '.join([indent] + values_to_print) def __str__(self): s = StringIO() self.show(out=s) return s.getvalue() def iterrows(self): """ Warning! Still super-slow! """ keys = self.keys() s_values = self.values() range_len_self = range(len(self)) # range is 1% faster than xrange in this particular place. # tuple (s_values...) is slightly faster than list for j in range(self.size()): yield OrderedDict( zip(keys, (s_values[i][j] for i in range_len_self))) def find_row(self, kv_dict): self_keys = self.keys() for k in kv_dict.keys(): assert k in self_keys result = [] s_values = self.values() range_len_self = range(len(self)) for i in range(self.size()): goodrow = True for k, v in kv_dict.iteritems(): if self[k][i] != v: goodrow = False break if goodrow: result.append( OrderedDict( zip(self_keys, [s_values[j][i] for j in range_len_self]))) return result def sort(self, key=None, reverse=False): self._columns = OrderedDict( sorted(self._columns.items(), key=key, reverse=reverse)) def order(self, order): def _cmp_key(k1, k2): for i, o in enumerate(order): if k1 == o: break for j, o in enumerate(order): if k2 == o: break if k1 < k2: return -1 return 1 keys = self._columns.keys() keys.sort(_cmp_key) tmp = OrderedDict() for o in order: tmp[o] = self._columns[o] self._columns = tmp def __eq__(self, other): if (len(self) != len(other) or self.size() != other.size() or self.keys() != other.keys()): return False for value, other_value in zip(self.values(), other.values()): if (value == other_value).count(True) != len(value): return False return True
class miller_array_builder(crystal_symmetry_builder): observation_types = { '_refln_F_squared': xray.intensity(), '_refln_intensity': xray.intensity(), '_refln_F': xray.amplitude(), '_refln_A': None, } def __init__(self, cif_block, base_array_info=None, wavelengths=None): crystal_symmetry_builder.__init__(self, cif_block) if base_array_info is not None: self.crystal_symmetry = self.crystal_symmetry.join_symmetry( other_symmetry=base_array_info.crystal_symmetry_from_file, force=True) self._arrays = OrderedDict() if (wavelengths is None) : wavelengths = {} if base_array_info is None: base_array_info = miller.array_info(source_type="cif") refln_containing_loops = self.get_miller_indices_containing_loops() for self.indices, refln_loop in refln_containing_loops: self.wavelength_id_array = None self.crystal_id_array = None self.scale_group_array = None wavelength_ids = [None] crystal_ids = [None] scale_groups = [None] for key, value in refln_loop.iteritems(): # need to get these arrays first if (key.endswith('wavelength_id') or key.endswith('crystal_id') or key.endswith('scale_group_code')): data = as_int_or_none_if_all_question_marks(value, column_name=key) if data is None: continue counts = data.counts() if key.endswith('wavelength_id'): wavelength_ids = counts.keys() if len(counts) == 1: continue array = miller.array( miller.set(self.crystal_symmetry, self.indices).auto_anomalous(), data) if key.endswith('wavelength_id'): self.wavelength_id_array = array wavelength_ids = counts.keys() elif key.endswith('crystal_id'): self.crystal_id_array = array crystal_ids = counts.keys() elif key.endswith('scale_group_code'): self.scale_group_array = array scale_groups = counts.keys() for label, value in sorted(refln_loop.items()): for w_id in wavelength_ids: for crys_id in crystal_ids: for scale_group in scale_groups: if 'index_' in label: continue key = label labels = [label] wavelength = None if (key.endswith('wavelength_id') or key.endswith('crystal_id') or key.endswith('scale_group_code')): w_id = None crys_id = None scale_group = None key_suffix = '' if w_id is not None: key_suffix += '_%i' %w_id labels.insert(0, "wavelength_id=%i" %w_id) wavelength = wavelengths.get(w_id, None) if crys_id is not None: key_suffix += '_%i' %crys_id labels.insert(0, "crystal_id=%i" %crys_id) if scale_group is not None: key_suffix += '_%i' %scale_group labels.insert(0, "scale_group_code=%i" %scale_group) key += key_suffix sigmas = None if key in self._arrays: continue array = self.flex_std_string_as_miller_array( value, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) if array is None: continue if '_sigma' in key: sigmas_label = label key = None for suffix in ('', '_meas', '_calc'): if sigmas_label.replace('_sigma', suffix) in refln_loop: key = sigmas_label.replace('_sigma', suffix) + key_suffix break if key is None: key = sigmas_label + key_suffix elif key in self._arrays and self._arrays[key].sigmas() is None: sigmas = array array = self._arrays[key] check_array_sizes(array, sigmas, key, sigmas_label) sigmas = as_flex_double(sigmas, sigmas_label) array.set_sigmas(sigmas.data()) info = array.info() array.set_info( info.customized_copy(labels=info.labels+[sigmas_label], wavelength=wavelength)) continue elif 'PHWT' in key: phwt_label = label fwt_label = label.replace('PHWT', 'FWT') if fwt_label not in refln_loop: continue phwt_array = array if fwt_label in self._arrays: array = self._arrays[fwt_label] check_array_sizes(array, phwt_array, fwt_label, phwt_label) phases = as_flex_double(phwt_array, phwt_label) info = array.info() array = array.phase_transfer(phases, deg=True) array.set_info( info.customized_copy(labels=info.labels+[phwt_label])) self._arrays[fwt_label] = array continue elif 'HL_' in key: hl_letter = key[key.find('HL_')+3] hl_key = 'HL_' + hl_letter key = key.replace(hl_key, 'HL_A') if key in self._arrays: continue # this array is already dealt with hl_labels = [label.replace(hl_key, 'HL_'+letter) for letter in 'ABCD'] hl_keys = [key.replace(hl_key, 'HL_'+letter) for letter in 'ABCD'] hl_values = [cif_block.get(hl_key) for hl_key in hl_labels] if hl_values.count(None) == 0: selection = self.get_selection( hl_values[0], wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) hl_values = [as_double_or_none_if_all_question_marks( hl.select(selection), column_name=lab) for hl, lab in zip(hl_values, hl_labels)] array = miller.array(miller.set( self.crystal_symmetry, self.indices.select(selection) ).auto_anomalous(), flex.hendrickson_lattman(*hl_values)) labels = labels[:-1]+hl_labels elif '.B_' in key or '_B_' in key: if '.B_' in key: key, key_b = key.replace('.B_', '.A_'), key label, label_b = label.replace('.B_', '.A_'), label elif '_B_' in key: key, key_b = key.replace('_B', '_A'), key label, label_b = label.replace('_B', '_A'), label if key in refln_loop and key_b in refln_loop: b_part = array.data() if key in self._arrays: info = self._arrays[key].info() a_part = self._arrays[key].data() self._arrays[key] = self._arrays[key].array( data=flex.complex_double(a_part, b_part)) self._arrays[key].set_info( info.customized_copy(labels=info.labels+[key_b])) continue elif ('phase_' in key and not "_meas" in key and self.crystal_symmetry.space_group() is not None): alt_key1 = label.replace('phase_', 'F_') alt_key2 = alt_key1 + '_au' if alt_key1 in refln_loop: phase_key = label key = alt_key1+key_suffix elif alt_key2 in refln_loop: phase_key = label key = alt_key2+key_suffix else: phase_key = None if phase_key is not None: phases = array.data() if key in self._arrays: array = self._arrays[key] array = as_flex_double(array, key) check_array_sizes(array, phases, key, phase_key) info = self._arrays[key].info() self._arrays[key] = array.phase_transfer(phases, deg=True) self._arrays[key].set_info( info.customized_copy(labels=info.labels+[phase_key])) else: array = self.flex_std_string_as_miller_array( refln_loop[label], wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) check_array_sizes(array, phases, key, phase_key) array.phase_transfer(phases, deg=True) labels = labels+[label, phase_key] if base_array_info.labels is not None: labels = base_array_info.labels + labels def rstrip_substrings(string, substrings): for substr in substrings: if substr == '': continue if string.endswith(substr): string = string[:-len(substr)] return string # determine observation type stripped_key = rstrip_substrings( key, [key_suffix, '_au', '_meas', '_calc', '_plus', '_minus']) if (stripped_key.endswith('F_squared') or stripped_key.endswith('intensity') or stripped_key.endswith('.I') or stripped_key.endswith('_I')) and ( array.is_real_array() or array.is_integer_array()): array.set_observation_type_xray_intensity() elif (stripped_key.endswith('F') and ( array.is_real_array() or array.is_integer_array())): array.set_observation_type_xray_amplitude() if (array.is_xray_amplitude_array() or array.is_xray_amplitude_array()): # e.g. merge_equivalents treats integer arrays differently, so must # convert integer observation arrays here to be safe if isinstance(array.data(), flex.int): array = array.customized_copy(data=array.data().as_double()) array.set_info(base_array_info.customized_copy(labels=labels)) if (array.is_xray_amplitude_array() or array.is_xray_amplitude_array()): info = array.info() array.set_info(info.customized_copy(wavelength=wavelength)) self._arrays.setdefault(key, array) for key, array in self._arrays.copy().iteritems(): if ( key.endswith('_minus') or '_minus_' in key or key.endswith('_plus') or '_plus_' in key): if '_minus' in key: minus_key = key plus_key = key.replace('_minus', '_plus') elif '_plus' in key: plus_key = key minus_key = key.replace('_plus', '_minus') if plus_key in self._arrays and minus_key in self._arrays: plus_array = self._arrays.pop(plus_key) minus_array = self._arrays.pop(minus_key) minus_array = minus_array.customized_copy( indices=-minus_array.indices()).set_info(minus_array.info()) array = plus_array.concatenate( minus_array, assert_is_similar_symmetry=False) array = array.customized_copy(anomalous_flag=True) array.set_info(minus_array.info().customized_copy( labels=list( OrderedSet(plus_array.info().labels+minus_array.info().labels)))) array.set_observation_type(plus_array.observation_type()) self._arrays.setdefault(key, array) if len(self._arrays) == 0: raise CifBuilderError("No reflection data present in cif block") def get_miller_indices_containing_loops(self): loops = [] for loop in self.cif_block.loops.values(): for key in loop.keys(): if 'index_h' not in key: continue hkl_str = [loop.get(key.replace('index_h', 'index_%s' %i)) for i in 'hkl'] if hkl_str.count(None) > 0: raise CifBuilderError( "Miller indices missing from current CIF block (%s)" %key.replace('index_h', 'index_%s' %'hkl'[hkl_str.index(None)])) hkl_int = [] for i,h_str in enumerate(hkl_str): try: h_int = flex.int(h_str) except ValueError, e: raise CifBuilderError( "Invalid item for Miller index %s: %s" % ("HKL"[i], str(e))) hkl_int.append(h_int) indices = flex.miller_index(*hkl_int) loops.append((indices, loop)) break return loops
class loop(DictMixin): def __init__(self, header=None, data=None): self._columns = OrderedDict() self.keys_lower = {} if header is not None: for key in header: self.setdefault(key, flex.std_string()) if data is not None: # the number of data items must be an exact multiple of the number of headers assert len(data) % len(header) == 0, "Wrong number of data items for loop" n_rows = len(data)//len(header) n_columns = len(header) for i in range(n_rows): self.add_row([data[i*n_columns+j] for j in range(n_columns)]) elif header is None and data is not None: assert isinstance(data, dict) or isinstance(data, OrderedDict) self.add_columns(data) self.keys_lower = dict( [(key.lower(), key) for key in self._columns.keys()]) def __setitem__(self, key, value): if not re.match(tag_re, key): raise Sorry("%s is not a valid data name" %key) if len(self) > 0: assert len(value) == self.size() if not isinstance(value, flex.std_string): for flex_numeric_type in (flex.int, flex.double): if isinstance(value, flex_numeric_type): value = value.as_string() else: try: value = flex_numeric_type(value).as_string() except TypeError: continue else: break if not isinstance(value, flex.std_string): value = flex.std_string(value) # value must be a mutable type assert hasattr(value, '__setitem__') self._columns[key] = value self.keys_lower[key.lower()] = key def __getitem__(self, key): return self._columns[self.keys_lower[key.lower()]] def __delitem__(self, key): del self._columns[self.keys_lower[key.lower()]] del self.keys_lower[key.lower()] def keys(self): return self._columns.keys() def __repr__(self): return repr(OrderedDict(self.iteritems())) def name(self): return common_substring(self.keys()).rstrip('_').rstrip('.') def size(self): size = 0 for column in self.values(): size = max(size, len(column)) return size def n_rows(self): size = 0 for column in self.values(): size = max(size, len(column)) return size def n_columns(self): return len(self.keys()) def add_row(self, row, default_value="?"): if isinstance(row, dict): for key in self: if key in row: self[key].append(str(row[key])) else: self[key].append(default_value) else: assert len(row) == len(self) for i, key in enumerate(self): self[key].append(str(row[i])) def add_column(self, key, values): if self.size() != 0: assert len(values) == self.size() self[key] = values self.keys_lower[key.lower()] = key def add_columns(self, columns): assert isinstance(columns, dict) or isinstance(columns, OrderedDict) for key, value in columns.iteritems(): self.add_column(key, value) def update_column(self, key, values): assert type(key)==type(""), "first argument is column key string" if self.size() != 0: assert len(values) == self.size(), "len(values) %d != self.size() %d" % ( len(values), self.size(), ) self[key] = values self.keys_lower[key.lower()] = key def delete_row(self, index): assert index < self.n_rows() for column in self._columns.values(): del column[index] def __copy__(self): new = loop() new._columns = self._columns.copy() new.keys_lower = self.keys_lower.copy() return new copy = __copy__ def __deepcopy__(self, memo): new = loop() new._columns = copy.deepcopy(self._columns, memo) new.keys_lower = copy.deepcopy(self.keys_lower, memo) return new def deepcopy(self): return copy.deepcopy(self) def show(self, out=None, indent=" ", indent_row=None, fmt_str=None, align_columns=True): assert self.n_rows() > 0 and self.n_columns() > 0, "keys: %s %d %d" % ( self.keys(), self.n_rows(), self.n_columns(), ) if out is None: out = sys.stdout if indent_row is None: indent_row = indent assert indent.strip() == "" assert indent_row.strip() == "" print >> out, "loop_" for k in self.keys(): print >> out, indent + k values = self._columns.values() if fmt_str is not None: # Pretty printing: # The user is responsible for providing a valid format string. # Values are not quoted - it is the user's responsibility to place # appropriate quotes in the format string if a particular value may # contain spaces. values = copy.deepcopy(values) for i, v in enumerate(values): for flex_numeric_type in (flex.int, flex.double): if not isinstance(v, flex_numeric_type): try: values[i] = flex_numeric_type(v) except ValueError: continue else: break if fmt_str is None: fmt_str = indent_row + ' '.join(["%s"]*len(values)) for i in range(self.size()): print >> out, fmt_str % tuple([values[j][i] for j in range(len(values))]) elif align_columns: fmt_str = [] for i, (k, v) in enumerate(self.iteritems()): for i_v in range(v.size()): v[i_v] = format_value(v[i_v]) # exclude and semicolon text fields from column width calculation v_ = flex.std_string(item for item in v if "\n" not in item) width = v_.max_element_length() # See if column contains only number, '.' or '?' # right-align numerical columns, left-align everything else v = v.select(~( (v == ".") | (v == "?") )) try: flex.double(v) except ValueError: width *= -1 fmt_str.append("%%%is" %width) fmt_str = indent_row + " ".join(fmt_str) for i in range(self.size()): print >> out, (fmt_str % tuple([values[j][i] for j in range(len(values))])).rstrip() else: for i in range(self.size()): values_to_print = [format_value(values[j][i]) for j in range(len(values))] print >> out, ' '.join([indent] + values_to_print) def __str__(self): s = StringIO() self.show(out=s) return s.getvalue() def iterrows(self): keys = self.keys() for j in range(self.size()): yield OrderedDict(zip(keys, [self.values()[i][j] for i in range(len(self))])) def sort(self, key=None, reverse=False): self._columns = OrderedDict( sorted(self._columns.items(), key=key, reverse=reverse)) def order(self, order): def _cmp_key(k1, k2): for i, o in enumerate(order): if k1==o: break for j, o in enumerate(order): if k2==o: break if k1<k2: return -1 return 1 keys = self._columns.keys() keys.sort(_cmp_key) tmp = OrderedDict() for o in order: tmp[o]=self._columns[o] self._columns = tmp def __eq__(self, other): if (len(self) != len(other) or self.size() != other.size() or self.keys() != other.keys()): return False for value, other_value in zip(self.values(), other.values()): if (value == other_value).count(True) != len(value): return False return True
class cif(DictMixin): def __init__(self, blocks=None): if blocks is not None: self.blocks = OrderedDict(blocks) else: self.blocks = OrderedDict() self.keys_lower = dict([(key.lower(), key) for key in self.blocks.keys()]) def __setitem__(self, key, value): assert isinstance(value, block) if not re.match(tag_re, '_'+key): raise Sorry("%s is not a valid data block name" %key) self.blocks[key] = value self.keys_lower[key.lower()] = key def get(self, key, default=None): key_lower = self.keys_lower.get(key.lower()) if (key_lower is None): return default return self.blocks.get(key_lower, default) def __getitem__(self, key): result = self.get(key) if (result is None): raise KeyError('Unknown CIF data block name: "%s"' % key) return result def __delitem__(self, key): del self.blocks[self.keys_lower[key.lower()]] del self.keys_lower[key.lower()] def keys(self): return self.blocks.keys() def __repr__(self): return repr(OrderedDict(self.iteritems())) def __copy__(self): return cif(self.blocks.copy()) copy = __copy__ def __deepcopy__(self, memo): return cif(copy.deepcopy(self.blocks, memo)) def deepcopy(self): return copy.deepcopy(self) def show(self, out=None, indent=" ", indent_row=None, data_name_field_width=34, loop_format_strings=None): if out is None: out = sys.stdout for name, block in self.items(): print >> out, "data_%s" %name block.show( out=out, indent=indent, indent_row=indent_row, data_name_field_width=data_name_field_width, loop_format_strings=loop_format_strings) def __str__(self): s = StringIO() self.show(out=s) return s.getvalue() def validate(self, dictionary, show_warnings=True, error_handler=None, out=None): if out is None: out = sys.stdout from iotbx.cif import validation errors = {} if error_handler is None: error_handler = validation.ErrorHandler() for key, block in self.blocks.iteritems(): error_handler = error_handler.__class__() dictionary.set_error_handler(error_handler) block.validate(dictionary) errors.setdefault(key, error_handler) if error_handler.error_count or error_handler.warning_count: error_handler.show(show_warnings=show_warnings, out=out) return error_handler def sort(self, recursive=False, key=None, reverse=False): self.blocks = OrderedDict(sorted(self.blocks.items(), key=key, reverse=reverse)) if recursive: for b in self.blocks.values(): b.sort(recursive=recursive, reverse=reverse)
class miller_array_builder(crystal_symmetry_builder): observation_types = { '_refln_F_squared': xray.intensity(), '_refln_intensity': xray.intensity(), '_refln_F': xray.amplitude(), '_refln_A': None, } def __init__(self, cif_block, base_array_info=None): crystal_symmetry_builder.__init__(self, cif_block) if base_array_info is not None: self.crystal_symmetry = self.crystal_symmetry.join_symmetry( other_symmetry=base_array_info.crystal_symmetry_from_file, force=True) self._arrays = OrderedDict() if base_array_info is None: base_array_info = miller.array_info(source_type="cif") refln_containing_loops = self.get_miller_indices_containing_loops() for self.indices, refln_loop in refln_containing_loops: self.wavelength_id_array = None self.crystal_id_array = None self.scale_group_array = None wavelength_ids = [None] crystal_ids = [None] scale_groups = [None] for key, value in refln_loop.iteritems(): # need to get these arrays first if (key.endswith('wavelength_id') or key.endswith('crystal_id') or key.endswith('scale_group_code')): data = as_int_or_none_if_all_question_marks( value, column_name=key) if data is None: continue counts = data.counts() if len(counts) == 1: continue array = miller.array( miller.set(self.crystal_symmetry, self.indices).auto_anomalous(), data) if key.endswith('wavelength_id'): self.wavelength_id_array = array wavelength_ids = counts.keys() elif key.endswith('crystal_id'): self.crystal_id_array = array crystal_ids = counts.keys() elif key.endswith('scale_group_code'): self.scale_group_array = array scale_groups = counts.keys() for label, value in sorted(refln_loop.items()): for w_id in wavelength_ids: for crys_id in crystal_ids: for scale_group in scale_groups: if 'index_' in label: continue key = label labels = [label] if (key.endswith('wavelength_id') or key.endswith('crystal_id') or key.endswith('scale_group_code')): w_id = None crys_id = None scale_group = None key_suffix = '' if w_id is not None: key_suffix += '_%i' % w_id labels.insert(0, "wavelength_id=%i" % w_id) if crys_id is not None: key_suffix += '_%i' % crys_id labels.insert(0, "crystal_id=%i" % crys_id) if scale_group is not None: key_suffix += '_%i' % scale_group labels.insert( 0, "scale_group_code=%i" % scale_group) key += key_suffix sigmas = None if key in self._arrays: continue array = self.flex_std_string_as_miller_array( value, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) if array is None: continue if '_sigma' in key: sigmas_label = label key = None for suffix in ('', '_meas', '_calc'): if sigmas_label.replace( '_sigma', suffix) in refln_loop: key = sigmas_label.replace( '_sigma', suffix) + key_suffix break if key is None: key = sigmas_label + key_suffix elif key in self._arrays and self._arrays[ key].sigmas() is None: sigmas = array array = self._arrays[key] check_array_sizes(array, sigmas, key, sigmas_label) sigmas = as_flex_double( sigmas, sigmas_label) array.set_sigmas(sigmas.data()) info = array.info() array.set_info( info.customized_copy( labels=info.labels + [sigmas_label])) continue elif 'PHWT' in key: phwt_label = label fwt_label = label.replace('PHWT', 'FWT') if fwt_label not in refln_loop: continue phwt_array = array if fwt_label in self._arrays: array = self._arrays[fwt_label] check_array_sizes(array, phwt_array, fwt_label, phwt_label) phases = as_flex_double( phwt_array, phwt_label) info = array.info() array = array.phase_transfer(phases, deg=True) array.set_info( info.customized_copy( labels=info.labels + [phwt_label])) self._arrays[fwt_label] = array continue elif 'HL_' in key: hl_letter = key[key.find('HL_') + 3] hl_key = 'HL_' + hl_letter key = key.replace(hl_key, 'HL_A') if key in self._arrays: continue # this array is already dealt with hl_labels = [ label.replace(hl_key, 'HL_' + letter) for letter in 'ABCD' ] hl_keys = [ key.replace(hl_key, 'HL_' + letter) for letter in 'ABCD' ] hl_values = [ cif_block.get(hl_key) for hl_key in hl_labels ] if hl_values.count(None) == 0: selection = self.get_selection( hl_values[0], wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) hl_values = [ as_double_or_none_if_all_question_marks( hl.select(selection), column_name=lab) for hl, lab in zip( hl_values, hl_labels) ] array = miller.array( miller.set( self.crystal_symmetry, self.indices.select( selection)).auto_anomalous(), flex.hendrickson_lattman(*hl_values)) labels = labels[:-1] + hl_labels elif '.B_' in key or '_B_' in key: if '.B_' in key: key, key_b = key.replace('.B_', '.A_'), key label, label_b = label.replace( '.B_', '.A_'), label elif '_B_' in key: key, key_b = key.replace('_B', '_A'), key label, label_b = label.replace('_B', '_A'), label if key in refln_loop and key_b in refln_loop: b_part = array.data() if key in self._arrays: info = self._arrays[key].info() a_part = self._arrays[key].data() self._arrays[key] = self._arrays[ key].array( data=flex.complex_double( a_part, b_part)) self._arrays[key].set_info( info.customized_copy( labels=info.labels + [key_b])) continue elif ('phase_' in key and not key.endswith('_meas') and self.crystal_symmetry.space_group() is not None): alt_key1 = label.replace('phase_', 'F_') alt_key2 = alt_key1 + '_au' if alt_key1 in refln_loop: phase_key = label key = alt_key1 + key_suffix elif alt_key2 in refln_loop: phase_key = label key = alt_key2 + key_suffix else: phase_key = None if phase_key is not None: phases = array.data() if key in self._arrays: array = self._arrays[key] array = as_flex_double(array, key) check_array_sizes( array, phases, key, phase_key) info = self._arrays[key].info() self._arrays[ key] = array.phase_transfer( phases, deg=True) self._arrays[key].set_info( info.customized_copy( labels=info.labels + [phase_key])) else: array = self.flex_std_string_as_miller_array( refln_loop[label], wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) check_array_sizes( array, phases, key, phase_key) array.phase_transfer(phases, deg=True) labels = labels + [label, phase_key] if base_array_info.labels is not None: labels = base_array_info.labels + labels def rstrip_substrings(string, substrings): for substr in substrings: if substr == '': continue if string.endswith(substr): string = string[:-len(substr)] return string # determine observation type stripped_key = rstrip_substrings( key, [ key_suffix, '_au', '_meas', '_calc', '_plus', '_minus' ]) if (stripped_key.endswith('F_squared') or stripped_key.endswith('intensity') or stripped_key.endswith('.I') or stripped_key.endswith('_I')) and ( array.is_real_array() or array.is_integer_array()): array.set_observation_type_xray_intensity() elif (stripped_key.endswith('F') and (array.is_real_array() or array.is_integer_array())): array.set_observation_type_xray_amplitude() if (array.is_xray_amplitude_array() or array.is_xray_amplitude_array()): # e.g. merge_equivalents treats integer arrays differently, so must # convert integer observation arrays here to be safe if isinstance(array.data(), flex.int): array = array.customized_copy( data=array.data().as_double()) array.set_info( base_array_info.customized_copy(labels=labels)) self._arrays.setdefault(key, array) for key, array in self._arrays.copy().iteritems(): if (key.endswith('_minus') or '_minus_' in key or key.endswith('_plus') or '_plus_' in key): if '_minus' in key: minus_key = key plus_key = key.replace('_minus', '_plus') elif '_plus' in key: plus_key = key minus_key = key.replace('_plus', '_minus') if plus_key in self._arrays and minus_key in self._arrays: plus_array = self._arrays.pop(plus_key) minus_array = self._arrays.pop(minus_key) minus_array = minus_array.customized_copy( indices=-minus_array.indices()).set_info( minus_array.info()) array = plus_array.concatenate( minus_array, assert_is_similar_symmetry=False) array = array.customized_copy(anomalous_flag=True) array.set_info( minus_array.info().customized_copy(labels=list( OrderedSet(plus_array.info().labels + minus_array.info().labels)))) array.set_observation_type(plus_array.observation_type()) self._arrays.setdefault(key, array) if len(self._arrays) == 0: raise CifBuilderError("No reflection data present in cif block") def get_miller_indices_containing_loops(self): loops = [] for loop in self.cif_block.loops.values(): for key in loop.keys(): if 'index_h' not in key: continue hkl_str = [ loop.get(key.replace('index_h', 'index_%s' % i)) for i in 'hkl' ] if hkl_str.count(None) > 0: raise CifBuilderError( "Miller indices missing from current CIF block (%s)" % key.replace('index_h', 'index_%s' % 'hkl'[hkl_str.index(None)])) hkl_int = [] for i, h_str in enumerate(hkl_str): try: h_int = flex.int(h_str) except ValueError, e: raise CifBuilderError( "Invalid item for Miller index %s: %s" % ("HKL"[i], str(e))) hkl_int.append(h_int) indices = flex.miller_index(*hkl_int) loops.append((indices, loop)) break return loops
class miller_array_builder(crystal_symmetry_builder): # Changes to this class should pass regression tests: # cctbx_project\mmtbx\regression\tst_cif_as_mtz_wavelengths.py # cctbx_project\iotbx\cif\tests\tst_lex_parse_build.py # phenix_regression\cif_as_mtz\tst_cif_as_mtz.py observation_types = { # known types of column data to be tagged as either amplitudes or intensities as per # https://www.iucr.org/__data/iucr/cifdic_html/2/cif_mm.dic/index.html '_refln.F_squared': xray.intensity(), '_refln_F_squared': xray.intensity(), '_refln.intensity': xray.intensity(), '_refln.I(+)': xray.intensity(), '_refln.I(-)': xray.intensity(), '_refln.F_calc': xray.amplitude(), '_refln.F_meas': xray.amplitude(), '_refln.FP': xray.amplitude(), '_refln.F-obs': xray.amplitude(), '_refln.Fobs': xray.amplitude(), '_refln.F-calc': xray.amplitude(), '_refln.Fcalc': xray.amplitude(), '_refln.pdbx_F_': xray.amplitude(), '_refln.pdbx_I_': xray.intensity(), '_refln.pdbx_anom_difference': xray.amplitude(), } def guess_observationtype(self, labl): for okey in self.observation_types.keys(): if labl.startswith(okey): return self.observation_types[okey] return None def __init__(self, cif_block, base_array_info=None, wavelengths=None): crystal_symmetry_builder.__init__(self, cif_block) self._arrays = OrderedDict() self._origarrays = OrderedDict( ) # used for presenting raw data tables in HKLviewer basearraylabels = [] if base_array_info is not None: self.crystal_symmetry = self.crystal_symmetry.join_symmetry( other_symmetry=base_array_info.crystal_symmetry_from_file, force=True) if base_array_info.labels: basearraylabels = base_array_info.labels if (wavelengths is None): wavelengths = {} if base_array_info is None: base_array_info = miller.array_info(source_type="cif") refln_containing_loops = self.get_miller_indices_containing_loops() for self.indices, refln_loop in refln_containing_loops: self.wavelength_id_array = None self.crystal_id_array = None self.scale_group_array = None wavelength_ids = [None] crystal_ids = [None] scale_groups = [None] for key, value in six.iteritems(refln_loop): # Get wavelength_ids, crystal_id, scale_group_code columns for selecting data of other # columns in self.get_selection() used by self.flex_std_string_as_miller_array() if (key.endswith('wavelength_id') or key.endswith('crystal_id') or key.endswith('scale_group_code')): data = as_int_or_none_if_all_question_marks( value, column_name=key) if data is None: continue counts = data.counts() if key.endswith('wavelength_id'): wavelength_ids = list(counts.keys()) if len(counts) == 1: continue array = miller.array( miller.set(self.crystal_symmetry, self.indices).auto_anomalous(), data) if key.endswith('wavelength_id'): self.wavelength_id_array = array wavelength_ids = list(counts.keys()) elif key.endswith('crystal_id'): self.crystal_id_array = array crystal_ids = list(counts.keys()) elif key.endswith('scale_group_code'): self.scale_group_array = array scale_groups = list(counts.keys()) labelsuffix = [] wavelbl = [] cryslbl = [] scalegrplbl = [] self._origarrays["HKLs"] = self.indices alllabels = list(sorted(refln_loop.keys())) remaininglabls = alllabels[:] # deep copy the list # Parse labels matching cif column conventions # https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/refln.html # and extract groups of labels or just single columns. # Groups corresponds to the map coefficients, phase and amplitudes, # amplitudes or intensities with sigmas and hendrickson-lattman columns. phaseamplabls, remaininglabls = self.get_phase_amplitude_labels( remaininglabls) mapcoefflabls, remaininglabls = self.get_mapcoefficient_labels( remaininglabls) HLcoefflabls, remaininglabls = self.get_HL_labels(remaininglabls) data_sig_obstype_labls, remaininglabls = self.get_FSigF_ISigI_labels( remaininglabls) for w_id in wavelength_ids: for crys_id in crystal_ids: for scale_group in scale_groups: # If reflection data files contain more than one crystal, wavelength or scalegroup # then add their id(s) as a suffix to data labels computed below. Needed for avoiding # ambuguity but avoid when not needed to make labels more human readable! if (len(wavelength_ids) > 1 or len(wavelengths) > 1) and w_id is not None: wavelbl = ["wavelength_id=%i" % w_id] if len(crystal_ids) > 1 and crys_id is not None: cryslbl = ["crystal_id=%i" % crys_id] if len(scale_groups) > 1 and scale_group is not None: scalegrplbl = ["scale_group_code=%i" % scale_group] labelsuffix = scalegrplbl + cryslbl + wavelbl jlablsufx = "" if len(labelsuffix): jlablsufx = "," + ",".join(labelsuffix) for mapcoefflabl in mapcoefflabls: A_array = refln_loop[mapcoefflabl[0]] B_array = refln_loop[mapcoefflabl[1]] # deselect any ? marks in the two arrays, assuming both A and B have the same ? marks selection = self.get_selection( A_array, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) A_array = A_array.select(selection) B_array = B_array.select(selection) # form the miller array with map coefficients data = flex.complex_double(flex.double(A_array), flex.double(B_array)) millarr = miller.array( miller.set(self.crystal_symmetry, self.indices.select( selection)).auto_anomalous(), data) # millarr will be None for column data not matching w_id,crys_id,scale_group values if millarr is None: continue labl = basearraylabels + mapcoefflabl + labelsuffix millarr.set_info( base_array_info.customized_copy( labels=labl, wavelength=wavelengths.get(w_id, None))) self._arrays[mapcoefflabl[0] + jlablsufx] = millarr for phaseamplabl in phaseamplabls: amplitudestrarray = refln_loop[phaseamplabl[0]] phasestrarray = refln_loop[phaseamplabl[1]] millarr = self.flex_std_string_as_miller_array( amplitudestrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) phasesmillarr = self.flex_std_string_as_miller_array( phasestrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) # millarr will be None for column data not matching w_id,crys_id,scale_group values if millarr is None or phasesmillarr is None: continue phases = as_flex_double(phasesmillarr, phaseamplabl[1]) millarr = millarr.phase_transfer(phases, deg=True) labl = basearraylabels + phaseamplabl + labelsuffix millarr.set_info( base_array_info.customized_copy( labels=labl, wavelength=wavelengths.get(w_id, None))) self._arrays[phaseamplabl[0] + jlablsufx] = millarr for datlabl, siglabl, otype in data_sig_obstype_labls: datastrarray = refln_loop[datlabl] millarr = self.flex_std_string_as_miller_array( datastrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) # millarr will be None for column data not matching w_id,crys_id,scale_group values if millarr is None: continue millarr = as_flex_double(millarr, datlabl) datsiglabl = [datlabl] if siglabl: sigmasstrarray = refln_loop[siglabl] sigmas = self.flex_std_string_as_miller_array( sigmasstrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) sigmas = as_flex_double(sigmas, siglabl) millarr.set_sigmas(sigmas.data()) datsiglabl = [datlabl, siglabl] datsiglabl = basearraylabels + datsiglabl + labelsuffix millarr.set_info( base_array_info.customized_copy( labels=datsiglabl, wavelength=wavelengths.get(w_id, None))) if otype is not None: millarr.set_observation_type(otype) self._arrays[datlabl + jlablsufx] = millarr for hl_labels in HLcoefflabls: hl_values = [ cif_block.get(hl_key) for hl_key in hl_labels ] if hl_values.count(None) == 0: selection = self.get_selection( hl_values[0], wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) hl_values = [ as_double_or_none_if_all_question_marks( hl.select(selection), column_name=lab) for hl, lab in zip(hl_values, hl_labels) ] # hl_values will be None for column data not matching w_id,crys_id,scale_group values if hl_values == [None, None, None, None]: continue millarr = miller.array( miller.set( self.crystal_symmetry, self.indices.select( selection)).auto_anomalous(), flex.hendrickson_lattman(*hl_values)) hlabels = basearraylabels + hl_labels + labelsuffix millarr.set_info( base_array_info.customized_copy( labels=hlabels, wavelength=wavelengths.get(w_id, None))) self._arrays[hl_labels[0] + jlablsufx] = millarr # pick up remaining columns if any that weren't identified above for label in alllabels: if "index_" in label: continue datastrarray = refln_loop[label] if label in remaininglabls: labels = basearraylabels + [label ] + labelsuffix lablsufx = jlablsufx millarr = self.flex_std_string_as_miller_array( datastrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) # millarr will be None for column data not matching w_id,crys_id,scale_group values if (label.endswith( 'wavelength_id' ) or label.endswith( 'crystal_id' ) or # get full array if any of these labels, not just subsets label.endswith('scale_group_code')): millarr = self.flex_std_string_as_miller_array( datastrarray, wavelength_id=None, crystal_id=None, scale_group_code=None) lablsufx = "" labels = basearraylabels + [label] if millarr is None: continue otype = self.guess_observationtype(label) if otype is not None: millarr.set_observation_type(otype) millarr.set_info( base_array_info.customized_copy( labels=labels, wavelength=wavelengths.get(w_id, None))) self._arrays[label + lablsufx] = millarr origarr = self.flex_std_string_as_miller_array( datastrarray, wavelength_id=w_id, crystal_id=crys_id, scale_group_code=scale_group) newlabel = label.replace("_refln.", "") newlabel2 = newlabel.replace("_refln_", "") if origarr: # want only genuine miller arrays self._origarrays[newlabel2 + jlablsufx] = origarr.data() # Convert any groups of I+,I-,SigI+,SigI- (or amplitudes) arrays into anomalous arrays # i.e. both friedel mates in the same array for key, array in six.iteritems(self._arrays.copy()): plus_key = "" if '_minus' in key: minus_key = key plus_key = key.replace('_minus', '_plus') elif '-' in key: minus_key = key plus_key = key.replace('-', '+') elif '_plus' in key: plus_key = key minus_key = key.replace('_plus', '_minus') elif '+' in key: plus_key = key minus_key = key.replace('+', '-') if plus_key in self._arrays and minus_key in self._arrays: plus_array = self._arrays.pop(plus_key) minus_array = self._arrays.pop(minus_key) minus_array = minus_array.customized_copy( indices=-minus_array.indices()).set_info( minus_array.info()) array = plus_array.concatenate( minus_array, assert_is_similar_symmetry=False) array = array.customized_copy(anomalous_flag=True) array.set_info(minus_array.info().customized_copy(labels=list( OrderedSet(plus_array.info().labels + minus_array.info().labels)))) array.set_observation_type(plus_array.observation_type()) self._arrays.setdefault(key, array) if len(self._arrays) == 0: raise CifBuilderError("No reflection data present in cif block") # Sort the ordered dictionary to resemble the order of columns in the cif file # This is to avoid any F_meas arrays accidentally being put adjacent to # pdbx_anom_difference arrays in the self._arrays OrderedDict. Otherwise these # arrays may unintentionally be combined into a reconstructed anomalous amplitude # array when saving as an mtz file due to a problem in the iotbx/mtz module. # See http://phenix-online.org/pipermail/cctbxbb/2021-March/002289.html arrlstord = [] arrlst = list(self._arrays) for arr in arrlst: for i, k in enumerate(refln_loop.keys()): if arr.split(",")[0] == k: arrlstord.append((arr, i)) # arrlstord must have the same keys as in the self._arrays dictionary assert sorted(arrlst) == sorted([e[0] for e in arrlstord]) sortarrlst = sorted(arrlstord, key=lambda arrord: arrord[1]) self._ordarrays = OrderedDict() for sortkey, i in sortarrlst: self._ordarrays.setdefault(sortkey, self._arrays[sortkey]) self._arrays = self._ordarrays def get_HL_labels(self, keys): lstkeys = list(keys) # cast into list if not a list HLquads = [] alllabels = " ".join(lstkeys) """ Hendrickson-Lattmann labels could look like: 'HLAM', 'HLBM', 'HLCM', 'HLDM' or like 'HLanomA', 'HLanomB', 'HLanomC', 'HLanomD' Use a regular expression to group them accordingly """ allmatches = re.findall(r"(\S*(HL(\S*)[abcdABCD](\S*)))", alllabels) HLtagslst = list(set([(e[2], e[3]) for e in allmatches])) usedkeys = [] for m in HLtagslst: hllist = [] for hm in allmatches: if m == (hm[2], hm[3]): hllist.append((hm[0], hm[1])) if len(hllist) == 4: HLquads.append([e[0] for e in hllist]) for e in hllist: usedkeys.append(e[0]) remainingkeys = [] for e in lstkeys: if e not in usedkeys: remainingkeys.append(e) return HLquads, remainingkeys def get_mapcoefficient_labels(self, keys): # extract map coeffficients labels from list of cif column labels # e.g. ( _refln.A_calc_au _refln.B_calc_au ) , ( _refln.A_calc _refln.B_calc ) lstkeys = list(keys) # cast into list if not a list remainingkeys = lstkeys[:] # deep copy the list alllabels = " ".join(lstkeys) mapcoefflabels = [] A_matches = re.findall( r"( (\s*_refln[\._]A_)(\S*) )", alllabels, re.VERBOSE ) # [('_refln.PHWT', '_refln.PH', 'WT'), ('_refln.PHDELWT', '_refln.PH', 'DELWT')] for label in lstkeys: for m in A_matches: Blabel = m[1].replace("A_", "B_") + m[2] if Blabel == label: mapcoefflabels.append([m[0], label]) remainingkeys.remove(m[0]) remainingkeys.remove(label) return mapcoefflabels, remainingkeys def get_phase_amplitude_labels(self, keys): # extract phase and amplitudes labels from list of cif column labels # e.g. ( _refln.F_calc _refln.phase_calc ) , ( _refln.FC_ALL _refln.PHIC_ALL ), ( _refln.FWT _refln.PHWT ) lstkeys = list(keys) # cast into list if not a list remainingkeys = lstkeys[:] # deep copy the list alllabels = " ".join(lstkeys) phase_amplitudelabels = [] PHmatches = re.findall( r"((\S*PH)([^I]\S*))", alllabels ) # [('_refln.PHWT', '_refln.PH', 'WT'), ('_refln.PHDELWT', '_refln.PH', 'DELWT')] for label in lstkeys: for m in PHmatches: PFlabel = m[1].replace("PH", "F") + m[2] Flabel = m[1].replace("PH", "") + m[2] if Flabel == label or PFlabel == label: phase_amplitudelabels.append([label, m[0]]) remainingkeys.remove(label) remainingkeys.remove(m[0]) alllabels = " ".join(remainingkeys) PHImatches = re.findall( r"((\S*PHI)(\S*))", alllabels ) # [('_refln.PHIC', '_refln.PHI', 'C'), ('_refln.PHIC_ALL', '_refln.PHI', 'C_ALL')] for label in lstkeys: for m in PHImatches: PFlabel = m[1].replace("PHI", "F") + m[2] Flabel = m[1].replace("PHI", "") + m[2] if Flabel == label or PFlabel == label: phase_amplitudelabels.append([label, m[0]]) remainingkeys.remove(label) remainingkeys.remove(m[0]) alllabels = " ".join(remainingkeys) PHDELmatches = re.findall( r"(((\S*)PH)([^I]\S*(WT)))", alllabels ) # [('_refln.PHDELWT', '_refln.PH', '_refln.', 'DELWT', 'WT')] for label in lstkeys: for m in PHDELmatches: Flabel = m[2] + m[3].replace("WT", "FWT") if Flabel == label: phase_amplitudelabels.append([label, m[0]]) remainingkeys.remove(label) remainingkeys.remove(m[0]) alllabels = " ".join(remainingkeys) phase_matches = re.findall( r"((\S*[\._])phase(\S*))", alllabels) # [('_refln.phase_calc', '_refln.', '')] for label in lstkeys: for m in phase_matches: phaselabel = m[0] Flabl = m[1] + m[2] Flabel = m[1] + "F" + m[2] Faulabel = m[1] + "F" + m[2] + "_au" if Flabl in label or Flabel in label or Faulabel in label: # in case of _refln.F_calc_au and _refln.phase_calc if label in remainingkeys and m[ 0] in remainingkeys: # in case if (Flabel + "_sigma_au") in remainingkeys or ( Flabel + "_sigma") in remainingkeys: continue # give priority to F_meas, F_meas_sigma or F_meas_au, F_meas_sigma_au phase_amplitudelabels.append([label, m[0]]) remainingkeys.remove(label) remainingkeys.remove(m[0]) return phase_amplitudelabels, remainingkeys def get_FSigF_ISigI_labels(self, keys): # extract amplitudea, sigmas or intensitiy, sigmas labels from list of cif column labels # e.g. ( _refln.F_meas_sigma_au _refln.F_meas), ( _refln.intensity_sigma _refln.intensity ) , # ( _refln.pdbx_I_plus_sigma _refln.pdbx_I_plus ) lstkeys = list(keys) # cast into list if not a list remainingkeys = lstkeys[:] # deep copy the list alllabels = " ".join(lstkeys) labelpairs = [] sigma_matches = re.findall( r"((\S*[\._])SIG(\S*))", alllabels) # catch label pairs like F(+),SIGF(+) for label in lstkeys: for m in sigma_matches: FIlabel = m[1] + m[2] if FIlabel == label: labelpairs.append( [label, m[0], self.guess_observationtype(label)]) remainingkeys.remove(label) remainingkeys.remove(m[0]) alllabels = " ".join(remainingkeys) sigma_matches = re.findall( r"((\S*)_sigma(_*\S*))", alllabels ) # [('_refln.F_meas_sigma_au', '_refln.F_meas', '_au'), ('_refln.intensity_sigma', '_refln.intensity', ''), ('_refln.pdbx_I_plus_sigma', '_refln.pdbx_I_plus', '')] for label in lstkeys: for m in sigma_matches: FIlabel = m[1] + m[2] if FIlabel == label: labelpairs.append( [label, m[0], self.guess_observationtype(label)]) remainingkeys.remove(label) remainingkeys.remove(m[0]) alllabels = " ".join(remainingkeys) # catch generic meas and sigma labels anymeas_matches = re.findall(r"((\S*)_meas(\S*))", alllabels) + re.findall( r"((\S*)_calc(\S*))", alllabels) anysigma_matches = re.findall(r"((\S*)_sigma(\S*))", alllabels) for mmatch in anymeas_matches: for smatch in anysigma_matches: if mmatch[1] == smatch[1] and mmatch[2] == smatch[2]: remainingkeys.remove(mmatch[0]) if smatch[ 0] in remainingkeys: # in case of say F_squared_calc, F_squared_meas, F_squared_sigma all being present remainingkeys.remove(smatch[0]) labelpairs.append([ mmatch[0], smatch[0], self.guess_observationtype(mmatch[0]) ]) else: labelpairs.append([ mmatch[0], None, self.guess_observationtype(mmatch[0]) ]) return labelpairs, remainingkeys def get_miller_indices_containing_loops(self): loops = [] for loop in self.cif_block.loops.values(): for key in loop.keys(): if 'index_h' not in key: continue hkl_str = [ loop.get(key.replace('index_h', 'index_%s' % i)) for i in 'hkl' ] if hkl_str.count(None) > 0: raise CifBuilderError( "Miller indices missing from current CIF block (%s)" % key.replace('index_h', 'index_%s' % 'hkl'[hkl_str.index(None)])) hkl_int = [] for i, h_str in enumerate(hkl_str): try: h_int = flex.int(h_str) except ValueError as e: raise CifBuilderError( "Invalid item for Miller index %s: %s" % ("HKL"[i], str(e))) hkl_int.append(h_int) indices = flex.miller_index(*hkl_int) loops.append((indices, loop)) break return loops def get_selection(self, value, wavelength_id=None, crystal_id=None, scale_group_code=None): selection = ~((value == '.') | (value == '?')) if self.wavelength_id_array is not None and wavelength_id is not None: selection &= (self.wavelength_id_array.data() == wavelength_id) if self.crystal_id_array is not None and crystal_id is not None: selection &= (self.crystal_id_array.data() == crystal_id) if self.scale_group_array is not None and scale_group_code is not None: selection &= (self.scale_group_array.data() == scale_group_code) return selection def flex_std_string_as_miller_array(self, value, wavelength_id=None, crystal_id=None, scale_group_code=None): # Create a miller_array object of only the data and indices matching the # wavelength_id, crystal_id and scale_group_code submitted or full array if these are None selection = self.get_selection(value, wavelength_id=wavelength_id, crystal_id=crystal_id, scale_group_code=scale_group_code) data = value.select(selection) #if not isinstance(data, flex.double): try: data = flex.int(data) indices = self.indices.select(selection) except ValueError: try: data = flex.double(data) indices = self.indices.select(selection) except ValueError: # if flex.std_string return all values including '.' and '?' data = value indices = self.indices if data.size() == 0: return None return miller.array( miller.set(self.crystal_symmetry, indices).auto_anomalous(), data) def arrays(self): return self._arrays def origarrays(self): """ return dictionary of raw data found in cif file cast into flex.double arrays or just string arrays as a fall back. """ return self._origarrays