class loop(DictMixin): def __init__(self, header=None, data=None): self._columns = OrderedDict() self.keys_lower = {} if header is not None: for key in header: self.setdefault(key, flex.std_string()) if data is not None: # the number of data items must be an exact multiple of the number of headers assert len(data) % len( header) == 0, "Wrong number of data items for loop" n_rows = len(data) // len(header) n_columns = len(header) for i in range(n_rows): self.add_row( [data[i * n_columns + j] for j in range(n_columns)]) elif header is None and data is not None: assert isinstance(data, dict) or isinstance(data, OrderedDict) self.add_columns(data) self.keys_lower = dict([(key.lower(), key) for key in self._columns.keys()]) def __setitem__(self, key, value): if not re.match(tag_re, key): raise Sorry("%s is not a valid data name" % key) if len(self) > 0: assert len(value) == self.size() if not isinstance(value, flex.std_string): for flex_numeric_type in (flex.int, flex.double): if isinstance(value, flex_numeric_type): value = value.as_string() else: try: value = flex_numeric_type(value).as_string() except TypeError: continue else: break if not isinstance(value, flex.std_string): value = flex.std_string(value) # value must be a mutable type assert hasattr(value, '__setitem__') self._columns[key] = value self.keys_lower[key.lower()] = key def __getitem__(self, key): return self._columns[self.keys_lower[key.lower()]] def __delitem__(self, key): del self._columns[self.keys_lower[key.lower()]] del self.keys_lower[key.lower()] def keys(self): return self._columns.keys() def __repr__(self): return repr(OrderedDict(self.iteritems())) def name(self): return common_substring(self.keys()).rstrip('_').rstrip('.') def size(self): size = 0 for column in self.values(): size = max(size, len(column)) return size def n_rows(self): return self.size() def n_columns(self): return len(self.keys()) def add_row(self, row, default_value="?"): if isinstance(row, dict): for key in self: if key in row: self[key].append(str(row[key])) else: self[key].append(default_value) else: assert len(row) == len(self) for i, key in enumerate(self): self[key].append(str(row[i])) def add_column(self, key, values): if self.size() != 0: assert len(values) == self.size() self[key] = values self.keys_lower[key.lower()] = key def add_columns(self, columns): assert isinstance(columns, dict) or isinstance(columns, OrderedDict) for key, value in columns.iteritems(): self.add_column(key, value) def update_column(self, key, values): assert type(key) == type(""), "first argument is column key string" if self.size() != 0: assert len( values) == self.size(), "len(values) %d != self.size() %d" % ( len(values), self.size(), ) self[key] = values self.keys_lower[key.lower()] = key def delete_row(self, index): assert index < self.n_rows() for column in self._columns.values(): del column[index] def __copy__(self): new = loop() new._columns = self._columns.copy() new.keys_lower = self.keys_lower.copy() return new copy = __copy__ def __deepcopy__(self, memo): new = loop() new._columns = copy.deepcopy(self._columns, memo) new.keys_lower = copy.deepcopy(self.keys_lower, memo) return new def deepcopy(self): return copy.deepcopy(self) def show(self, out=None, indent=" ", indent_row=None, fmt_str=None, align_columns=True): assert self.n_rows() > 0 and self.n_columns() > 0, "keys: %s %d %d" % ( self.keys(), self.n_rows(), self.n_columns(), ) if out is None: out = sys.stdout if indent_row is None: indent_row = indent assert indent.strip() == "" assert indent_row.strip() == "" print >> out, "loop_" for k in self.keys(): print >> out, indent + k values = self._columns.values() range_len_values = range(len(values)) if fmt_str is not None: # Pretty printing: # The user is responsible for providing a valid format string. # Values are not quoted - it is the user's responsibility to place # appropriate quotes in the format string if a particular value may # contain spaces. values = copy.deepcopy(values) for i, v in enumerate(values): for flex_numeric_type in (flex.int, flex.double): if not isinstance(v, flex_numeric_type): try: values[i] = flex_numeric_type(v) except ValueError: continue else: break if fmt_str is None: fmt_str = indent_row + ' '.join(["%s"] * len(values)) for i in range(self.size()): print >> out, fmt_str % tuple( [values[j][i] for j in range_len_values]) elif align_columns: fmt_str = [] for i, (k, v) in enumerate(self.iteritems()): for i_v in range(v.size()): v[i_v] = format_value(v[i_v]) # exclude and semicolon text fields from column width calculation v_ = flex.std_string(item for item in v if "\n" not in item) width = v_.max_element_length() # See if column contains only number, '.' or '?' # right-align numerical columns, left-align everything else v = v.select(~((v == ".") | (v == "?"))) try: flex.double(v) except ValueError: width *= -1 fmt_str.append("%%%is" % width) fmt_str = indent_row + " ".join(fmt_str) for i in range(self.size()): print >> out, (fmt_str % tuple([values[j][i] for j in range_len_values])).rstrip() else: for i in range(self.size()): values_to_print = [ format_value(values[j][i]) for j in range_len_values ] print >> out, ' '.join([indent] + values_to_print) def __str__(self): s = StringIO() self.show(out=s) return s.getvalue() def iterrows(self): """ Warning! Still super-slow! """ keys = self.keys() s_values = self.values() range_len_self = range(len(self)) # range is 1% faster than xrange in this particular place. # tuple (s_values...) is slightly faster than list for j in range(self.size()): yield OrderedDict( zip(keys, (s_values[i][j] for i in range_len_self))) def find_row(self, kv_dict): self_keys = self.keys() for k in kv_dict.keys(): assert k in self_keys result = [] s_values = self.values() range_len_self = range(len(self)) for i in range(self.size()): goodrow = True for k, v in kv_dict.iteritems(): if self[k][i] != v: goodrow = False break if goodrow: result.append( OrderedDict( zip(self_keys, [s_values[j][i] for j in range_len_self]))) return result def sort(self, key=None, reverse=False): self._columns = OrderedDict( sorted(self._columns.items(), key=key, reverse=reverse)) def order(self, order): def _cmp_key(k1, k2): for i, o in enumerate(order): if k1 == o: break for j, o in enumerate(order): if k2 == o: break if k1 < k2: return -1 return 1 keys = self._columns.keys() keys.sort(_cmp_key) tmp = OrderedDict() for o in order: tmp[o] = self._columns[o] self._columns = tmp def __eq__(self, other): if (len(self) != len(other) or self.size() != other.size() or self.keys() != other.keys()): return False for value, other_value in zip(self.values(), other.values()): if (value == other_value).count(True) != len(value): return False return True
class cif(DictMixin): def __init__(self, blocks=None): if blocks is not None: self.blocks = OrderedDict(blocks) else: self.blocks = OrderedDict() self.keys_lower = dict([(key.lower(), key) for key in self.blocks.keys()]) def __setitem__(self, key, value): assert isinstance(value, block) if not re.match(tag_re, '_' + key): raise Sorry("%s is not a valid data block name" % key) self.blocks[key] = value self.keys_lower[key.lower()] = key def get(self, key, default=None): key_lower = self.keys_lower.get(key.lower()) if (key_lower is None): return default return self.blocks.get(key_lower, default) def __getitem__(self, key): result = self.get(key) if (result is None): raise KeyError('Unknown CIF data block name: "%s"' % key) return result def __delitem__(self, key): del self.blocks[self.keys_lower[key.lower()]] del self.keys_lower[key.lower()] def keys(self): return self.blocks.keys() def __repr__(self): return repr(OrderedDict(self.iteritems())) def __copy__(self): return cif(self.blocks.copy()) copy = __copy__ def __deepcopy__(self, memo): return cif(copy.deepcopy(self.blocks, memo)) def deepcopy(self): return copy.deepcopy(self) def show(self, out=None, indent=" ", indent_row=None, data_name_field_width=34, loop_format_strings=None, align_columns=True): if out is None: out = sys.stdout for name, block in self.items(): print >> out, "data_%s" % name block.show(out=out, indent=indent, indent_row=indent_row, data_name_field_width=data_name_field_width, loop_format_strings=loop_format_strings, align_columns=align_columns) def __str__(self): s = StringIO() self.show(out=s) return s.getvalue() def validate(self, dictionary, show_warnings=True, error_handler=None, out=None): if out is None: out = sys.stdout from iotbx.cif import validation errors = {} if error_handler is None: error_handler = validation.ErrorHandler() for key, block in self.blocks.iteritems(): error_handler = error_handler.__class__() dictionary.set_error_handler(error_handler) block.validate(dictionary) errors.setdefault(key, error_handler) if error_handler.error_count or error_handler.warning_count: error_handler.show(show_warnings=show_warnings, out=out) return error_handler def sort(self, recursive=False, key=None, reverse=False): self.blocks = OrderedDict( sorted(self.blocks.items(), key=key, reverse=reverse)) if recursive: for b in self.blocks.values(): b.sort(recursive=recursive, reverse=reverse)
class cif(DictMixin): def __init__(self, blocks=None): if blocks is not None: self.blocks = OrderedDict(blocks) else: self.blocks = OrderedDict() self.keys_lower = dict([(key.lower(), key) for key in self.blocks.keys()]) def __setitem__(self, key, value): assert isinstance(value, block) if not re.match(tag_re, '_'+key): raise Sorry("%s is not a valid data block name" %key) self.blocks[key] = value self.keys_lower[key.lower()] = key def get(self, key, default=None): key_lower = self.keys_lower.get(key.lower()) if (key_lower is None): return default return self.blocks.get(key_lower, default) def __getitem__(self, key): result = self.get(key) if (result is None): raise KeyError('Unknown CIF data block name: "%s"' % key) return result def __delitem__(self, key): del self.blocks[self.keys_lower[key.lower()]] del self.keys_lower[key.lower()] def keys(self): return self.blocks.keys() def __repr__(self): return repr(OrderedDict(self.iteritems())) def __copy__(self): return cif(self.blocks.copy()) copy = __copy__ def __deepcopy__(self, memo): return cif(copy.deepcopy(self.blocks, memo)) def deepcopy(self): return copy.deepcopy(self) def show(self, out=None, indent=" ", indent_row=None, data_name_field_width=34, loop_format_strings=None): if out is None: out = sys.stdout for name, block in self.items(): print >> out, "data_%s" %name block.show( out=out, indent=indent, indent_row=indent_row, data_name_field_width=data_name_field_width, loop_format_strings=loop_format_strings) def __str__(self): s = StringIO() self.show(out=s) return s.getvalue() def validate(self, dictionary, show_warnings=True, error_handler=None, out=None): if out is None: out = sys.stdout from iotbx.cif import validation errors = {} if error_handler is None: error_handler = validation.ErrorHandler() for key, block in self.blocks.iteritems(): error_handler = error_handler.__class__() dictionary.set_error_handler(error_handler) block.validate(dictionary) errors.setdefault(key, error_handler) if error_handler.error_count or error_handler.warning_count: error_handler.show(show_warnings=show_warnings, out=out) return error_handler def sort(self, recursive=False, key=None, reverse=False): self.blocks = OrderedDict(sorted(self.blocks.items(), key=key, reverse=reverse)) if recursive: for b in self.blocks.values(): b.sort(recursive=recursive, reverse=reverse)
class loop(DictMixin): def __init__(self, header=None, data=None): self._columns = OrderedDict() self.keys_lower = {} if header is not None: for key in header: self.setdefault(key, flex.std_string()) if data is not None: # the number of data items must be an exact multiple of the number of headers assert len(data) % len(header) == 0, "Wrong number of data items for loop" n_rows = len(data)//len(header) n_columns = len(header) for i in range(n_rows): self.add_row([data[i*n_columns+j] for j in range(n_columns)]) elif header is None and data is not None: assert isinstance(data, dict) or isinstance(data, OrderedDict) self.add_columns(data) self.keys_lower = dict( [(key.lower(), key) for key in self._columns.keys()]) def __setitem__(self, key, value): if not re.match(tag_re, key): raise Sorry("%s is not a valid data name" %key) if len(self) > 0: assert len(value) == self.size() if not isinstance(value, flex.std_string): for flex_numeric_type in (flex.int, flex.double): if isinstance(value, flex_numeric_type): value = value.as_string() else: try: value = flex_numeric_type(value).as_string() except TypeError: continue else: break if not isinstance(value, flex.std_string): value = flex.std_string(value) # value must be a mutable type assert hasattr(value, '__setitem__') self._columns[key] = value self.keys_lower[key.lower()] = key def __getitem__(self, key): return self._columns[self.keys_lower[key.lower()]] def __delitem__(self, key): del self._columns[self.keys_lower[key.lower()]] del self.keys_lower[key.lower()] def keys(self): return self._columns.keys() def __repr__(self): return repr(OrderedDict(self.iteritems())) def name(self): return common_substring(self.keys()).rstrip('_').rstrip('.') def size(self): size = 0 for column in self.values(): size = max(size, len(column)) return size def n_rows(self): size = 0 for column in self.values(): size = max(size, len(column)) return size def n_columns(self): return len(self.keys()) def add_row(self, row, default_value="?"): if isinstance(row, dict): for key in self: if key in row: self[key].append(str(row[key])) else: self[key].append(default_value) else: assert len(row) == len(self) for i, key in enumerate(self): self[key].append(str(row[i])) def add_column(self, key, values): if self.size() != 0: assert len(values) == self.size() self[key] = values self.keys_lower[key.lower()] = key def add_columns(self, columns): assert isinstance(columns, dict) or isinstance(columns, OrderedDict) for key, value in columns.iteritems(): self.add_column(key, value) def update_column(self, key, values): assert type(key)==type(""), "first argument is column key string" if self.size() != 0: assert len(values) == self.size(), "len(values) %d != self.size() %d" % ( len(values), self.size(), ) self[key] = values self.keys_lower[key.lower()] = key def delete_row(self, index): assert index < self.n_rows() for column in self._columns.values(): del column[index] def __copy__(self): new = loop() new._columns = self._columns.copy() new.keys_lower = self.keys_lower.copy() return new copy = __copy__ def __deepcopy__(self, memo): new = loop() new._columns = copy.deepcopy(self._columns, memo) new.keys_lower = copy.deepcopy(self.keys_lower, memo) return new def deepcopy(self): return copy.deepcopy(self) def show(self, out=None, indent=" ", indent_row=None, fmt_str=None, align_columns=True): assert self.n_rows() > 0 and self.n_columns() > 0, "keys: %s %d %d" % ( self.keys(), self.n_rows(), self.n_columns(), ) if out is None: out = sys.stdout if indent_row is None: indent_row = indent assert indent.strip() == "" assert indent_row.strip() == "" print >> out, "loop_" for k in self.keys(): print >> out, indent + k values = self._columns.values() if fmt_str is not None: # Pretty printing: # The user is responsible for providing a valid format string. # Values are not quoted - it is the user's responsibility to place # appropriate quotes in the format string if a particular value may # contain spaces. values = copy.deepcopy(values) for i, v in enumerate(values): for flex_numeric_type in (flex.int, flex.double): if not isinstance(v, flex_numeric_type): try: values[i] = flex_numeric_type(v) except ValueError: continue else: break if fmt_str is None: fmt_str = indent_row + ' '.join(["%s"]*len(values)) for i in range(self.size()): print >> out, fmt_str % tuple([values[j][i] for j in range(len(values))]) elif align_columns: fmt_str = [] for i, (k, v) in enumerate(self.iteritems()): for i_v in range(v.size()): v[i_v] = format_value(v[i_v]) # exclude and semicolon text fields from column width calculation v_ = flex.std_string(item for item in v if "\n" not in item) width = v_.max_element_length() # See if column contains only number, '.' or '?' # right-align numerical columns, left-align everything else v = v.select(~( (v == ".") | (v == "?") )) try: flex.double(v) except ValueError: width *= -1 fmt_str.append("%%%is" %width) fmt_str = indent_row + " ".join(fmt_str) for i in range(self.size()): print >> out, (fmt_str % tuple([values[j][i] for j in range(len(values))])).rstrip() else: for i in range(self.size()): values_to_print = [format_value(values[j][i]) for j in range(len(values))] print >> out, ' '.join([indent] + values_to_print) def __str__(self): s = StringIO() self.show(out=s) return s.getvalue() def iterrows(self): keys = self.keys() for j in range(self.size()): yield OrderedDict(zip(keys, [self.values()[i][j] for i in range(len(self))])) def sort(self, key=None, reverse=False): self._columns = OrderedDict( sorted(self._columns.items(), key=key, reverse=reverse)) def order(self, order): def _cmp_key(k1, k2): for i, o in enumerate(order): if k1==o: break for j, o in enumerate(order): if k2==o: break if k1<k2: return -1 return 1 keys = self._columns.keys() keys.sort(_cmp_key) tmp = OrderedDict() for o in order: tmp[o]=self._columns[o] self._columns = tmp def __eq__(self, other): if (len(self) != len(other) or self.size() != other.size() or self.keys() != other.keys()): return False for value, other_value in zip(self.values(), other.values()): if (value == other_value).count(True) != len(value): return False return True
def __init__(self, unmerged_intensities, batches_all, n_bins=20, d_min=None, id_to_batches=None): intensities = OrderedDict() individual_merged_intensities = OrderedDict() batches = OrderedDict() #m_isym = OrderedDict() sel = unmerged_intensities.sigmas() > 0 unmerged_intensities = unmerged_intensities.select(sel) batches_all = batches_all.select(sel) if id_to_batches is None: run_id_to_batch_id = None run_id = 0 unique_batches = sorted(set(batches_all.data())) last_batch = None run_start = unique_batches[0] for i, batch in enumerate(unique_batches): if last_batch is not None and batch > (last_batch + 1) or (i+1) == len(unique_batches): batch_sel = (batches_all.data() >= run_start) & (batches_all.data() <= last_batch) batches[run_id] = batches_all.select(batch_sel).resolution_filter(d_min=d_min) intensities[run_id] = unmerged_intensities.select(batch_sel).resolution_filter(d_min=d_min) individual_merged_intensities[run_id] = intensities[run_id].merge_equivalents().array() Debug.write("run %i batch %i to %i" %(run_id+1, run_start, last_batch)) run_id += 1 run_start = batch last_batch = batch else: run_id_to_batch_id = OrderedDict() run_id = 0 for batch_id, batch_range in id_to_batches.iteritems(): run_id_to_batch_id[run_id] = batch_id run_start, last_batch = batch_range batch_sel = (batches_all.data() >= run_start) & (batches_all.data() <= last_batch) batches[run_id] = batches_all.select(batch_sel).resolution_filter(d_min=d_min) intensities[run_id] = unmerged_intensities.select(batch_sel).resolution_filter(d_min=d_min) individual_merged_intensities[run_id] = intensities[run_id].merge_equivalents().array() Debug.write("run %i batch %i to %i" %(run_id+1, run_start, last_batch)) run_id += 1 unmerged_intensities.setup_binner(n_bins=n_bins) unmerged_intensities.show_summary() #result = unmerged_intensities.cc_one_half(use_binning=True) #result.show() self.unmerged_intensities = unmerged_intensities self.merged_intensities = unmerged_intensities.merge_equivalents().array() self.intensities = intensities self.individual_merged_intensities = individual_merged_intensities self.batches = batches if run_id_to_batch_id is not None: labels = run_id_to_batch_id.values() else: labels = None racc = self.relative_anomalous_cc() if racc is not None: self.plot_relative_anomalous_cc(racc, labels=labels) correlation_matrix, linkage_matrix = self.compute_correlation_coefficient_matrix() self._cluster_dict = self.to_dict(correlation_matrix, linkage_matrix) self.plot_cc_matrix(correlation_matrix, linkage_matrix, labels=labels) self.write_output()