Exemple #1
0
class loop(DictMixin):
    def __init__(self, header=None, data=None):
        self._columns = OrderedDict()
        self.keys_lower = {}
        if header is not None:
            for key in header:
                self.setdefault(key, flex.std_string())
            if data is not None:
                # the number of data items must be an exact multiple of the number of headers
                assert len(data) % len(
                    header) == 0, "Wrong number of data items for loop"
                n_rows = len(data) // len(header)
                n_columns = len(header)
                for i in range(n_rows):
                    self.add_row(
                        [data[i * n_columns + j] for j in range(n_columns)])
        elif header is None and data is not None:
            assert isinstance(data, dict) or isinstance(data, OrderedDict)
            self.add_columns(data)
            self.keys_lower = dict([(key.lower(), key)
                                    for key in self._columns.keys()])

    def __setitem__(self, key, value):
        if not re.match(tag_re, key):
            raise Sorry("%s is not a valid data name" % key)
        if len(self) > 0:
            assert len(value) == self.size()
        if not isinstance(value, flex.std_string):
            for flex_numeric_type in (flex.int, flex.double):
                if isinstance(value, flex_numeric_type):
                    value = value.as_string()
                else:
                    try:
                        value = flex_numeric_type(value).as_string()
                    except TypeError:
                        continue
                    else:
                        break
            if not isinstance(value, flex.std_string):
                value = flex.std_string(value)
        # value must be a mutable type
        assert hasattr(value, '__setitem__')
        self._columns[key] = value
        self.keys_lower[key.lower()] = key

    def __getitem__(self, key):
        return self._columns[self.keys_lower[key.lower()]]

    def __delitem__(self, key):
        del self._columns[self.keys_lower[key.lower()]]
        del self.keys_lower[key.lower()]

    def keys(self):
        return self._columns.keys()

    def __repr__(self):
        return repr(OrderedDict(self.iteritems()))

    def name(self):
        return common_substring(self.keys()).rstrip('_').rstrip('.')

    def size(self):
        size = 0
        for column in self.values():
            size = max(size, len(column))
        return size

    def n_rows(self):
        return self.size()

    def n_columns(self):
        return len(self.keys())

    def add_row(self, row, default_value="?"):
        if isinstance(row, dict):
            for key in self:
                if key in row:
                    self[key].append(str(row[key]))
                else:
                    self[key].append(default_value)
        else:
            assert len(row) == len(self)
            for i, key in enumerate(self):
                self[key].append(str(row[i]))

    def add_column(self, key, values):
        if self.size() != 0:
            assert len(values) == self.size()
        self[key] = values
        self.keys_lower[key.lower()] = key

    def add_columns(self, columns):
        assert isinstance(columns, dict) or isinstance(columns, OrderedDict)
        for key, value in columns.iteritems():
            self.add_column(key, value)

    def update_column(self, key, values):
        assert type(key) == type(""), "first argument is column key string"
        if self.size() != 0:
            assert len(
                values) == self.size(), "len(values) %d != self.size() %d" % (
                    len(values),
                    self.size(),
                )
        self[key] = values
        self.keys_lower[key.lower()] = key

    def delete_row(self, index):
        assert index < self.n_rows()
        for column in self._columns.values():
            del column[index]

    def __copy__(self):
        new = loop()
        new._columns = self._columns.copy()
        new.keys_lower = self.keys_lower.copy()
        return new

    copy = __copy__

    def __deepcopy__(self, memo):
        new = loop()
        new._columns = copy.deepcopy(self._columns, memo)
        new.keys_lower = copy.deepcopy(self.keys_lower, memo)
        return new

    def deepcopy(self):
        return copy.deepcopy(self)

    def show(self,
             out=None,
             indent="  ",
             indent_row=None,
             fmt_str=None,
             align_columns=True):
        assert self.n_rows() > 0 and self.n_columns() > 0, "keys: %s %d %d" % (
            self.keys(),
            self.n_rows(),
            self.n_columns(),
        )
        if out is None:
            out = sys.stdout
        if indent_row is None:
            indent_row = indent
        assert indent.strip() == ""
        assert indent_row.strip() == ""
        print >> out, "loop_"
        for k in self.keys():
            print >> out, indent + k
        values = self._columns.values()
        range_len_values = range(len(values))
        if fmt_str is not None:
            # Pretty printing:
            #   The user is responsible for providing a valid format string.
            #   Values are not quoted - it is the user's responsibility to place
            #   appropriate quotes in the format string if a particular value may
            #   contain spaces.
            values = copy.deepcopy(values)
            for i, v in enumerate(values):
                for flex_numeric_type in (flex.int, flex.double):
                    if not isinstance(v, flex_numeric_type):
                        try:
                            values[i] = flex_numeric_type(v)
                        except ValueError:
                            continue
                        else:
                            break
            if fmt_str is None:
                fmt_str = indent_row + ' '.join(["%s"] * len(values))
            for i in range(self.size()):
                print >> out, fmt_str % tuple(
                    [values[j][i] for j in range_len_values])
        elif align_columns:
            fmt_str = []
            for i, (k, v) in enumerate(self.iteritems()):
                for i_v in range(v.size()):
                    v[i_v] = format_value(v[i_v])
                # exclude and semicolon text fields from column width calculation
                v_ = flex.std_string(item for item in v if "\n" not in item)
                width = v_.max_element_length()
                # See if column contains only number, '.' or '?'
                # right-align numerical columns, left-align everything else
                v = v.select(~((v == ".") | (v == "?")))
                try:
                    flex.double(v)
                except ValueError:
                    width *= -1
                fmt_str.append("%%%is" % width)
            fmt_str = indent_row + "  ".join(fmt_str)
            for i in range(self.size()):
                print >> out, (fmt_str %
                               tuple([values[j][i]
                                      for j in range_len_values])).rstrip()
        else:
            for i in range(self.size()):
                values_to_print = [
                    format_value(values[j][i]) for j in range_len_values
                ]
                print >> out, ' '.join([indent] + values_to_print)

    def __str__(self):
        s = StringIO()
        self.show(out=s)
        return s.getvalue()

    def iterrows(self):
        """ Warning! Still super-slow! """
        keys = self.keys()
        s_values = self.values()
        range_len_self = range(len(self))
        # range is 1% faster than xrange in this particular place.
        # tuple (s_values...) is slightly faster than list
        for j in range(self.size()):
            yield OrderedDict(
                zip(keys, (s_values[i][j] for i in range_len_self)))

    def find_row(self, kv_dict):
        self_keys = self.keys()
        for k in kv_dict.keys():
            assert k in self_keys
        result = []
        s_values = self.values()
        range_len_self = range(len(self))
        for i in range(self.size()):
            goodrow = True
            for k, v in kv_dict.iteritems():
                if self[k][i] != v:
                    goodrow = False
                    break
            if goodrow:
                result.append(
                    OrderedDict(
                        zip(self_keys,
                            [s_values[j][i] for j in range_len_self])))
        return result

    def sort(self, key=None, reverse=False):
        self._columns = OrderedDict(
            sorted(self._columns.items(), key=key, reverse=reverse))

    def order(self, order):
        def _cmp_key(k1, k2):
            for i, o in enumerate(order):
                if k1 == o: break
            for j, o in enumerate(order):
                if k2 == o: break
            if k1 < k2: return -1
            return 1

        keys = self._columns.keys()
        keys.sort(_cmp_key)
        tmp = OrderedDict()
        for o in order:
            tmp[o] = self._columns[o]
        self._columns = tmp

    def __eq__(self, other):
        if (len(self) != len(other) or self.size() != other.size()
                or self.keys() != other.keys()):
            return False
        for value, other_value in zip(self.values(), other.values()):
            if (value == other_value).count(True) != len(value):
                return False
        return True
Exemple #2
0
class cif(DictMixin):
    def __init__(self, blocks=None):
        if blocks is not None:
            self.blocks = OrderedDict(blocks)
        else:
            self.blocks = OrderedDict()
        self.keys_lower = dict([(key.lower(), key)
                                for key in self.blocks.keys()])

    def __setitem__(self, key, value):
        assert isinstance(value, block)
        if not re.match(tag_re, '_' + key):
            raise Sorry("%s is not a valid data block name" % key)
        self.blocks[key] = value
        self.keys_lower[key.lower()] = key

    def get(self, key, default=None):
        key_lower = self.keys_lower.get(key.lower())
        if (key_lower is None):
            return default
        return self.blocks.get(key_lower, default)

    def __getitem__(self, key):
        result = self.get(key)
        if (result is None):
            raise KeyError('Unknown CIF data block name: "%s"' % key)
        return result

    def __delitem__(self, key):
        del self.blocks[self.keys_lower[key.lower()]]
        del self.keys_lower[key.lower()]

    def keys(self):
        return self.blocks.keys()

    def __repr__(self):
        return repr(OrderedDict(self.iteritems()))

    def __copy__(self):
        return cif(self.blocks.copy())

    copy = __copy__

    def __deepcopy__(self, memo):
        return cif(copy.deepcopy(self.blocks, memo))

    def deepcopy(self):
        return copy.deepcopy(self)

    def show(self,
             out=None,
             indent="  ",
             indent_row=None,
             data_name_field_width=34,
             loop_format_strings=None,
             align_columns=True):
        if out is None:
            out = sys.stdout
        for name, block in self.items():
            print >> out, "data_%s" % name
            block.show(out=out,
                       indent=indent,
                       indent_row=indent_row,
                       data_name_field_width=data_name_field_width,
                       loop_format_strings=loop_format_strings,
                       align_columns=align_columns)

    def __str__(self):
        s = StringIO()
        self.show(out=s)
        return s.getvalue()

    def validate(self,
                 dictionary,
                 show_warnings=True,
                 error_handler=None,
                 out=None):
        if out is None: out = sys.stdout
        from iotbx.cif import validation
        errors = {}
        if error_handler is None:
            error_handler = validation.ErrorHandler()
        for key, block in self.blocks.iteritems():
            error_handler = error_handler.__class__()
            dictionary.set_error_handler(error_handler)
            block.validate(dictionary)
            errors.setdefault(key, error_handler)
            if error_handler.error_count or error_handler.warning_count:
                error_handler.show(show_warnings=show_warnings, out=out)
        return error_handler

    def sort(self, recursive=False, key=None, reverse=False):
        self.blocks = OrderedDict(
            sorted(self.blocks.items(), key=key, reverse=reverse))
        if recursive:
            for b in self.blocks.values():
                b.sort(recursive=recursive, reverse=reverse)
Exemple #3
0
class cif(DictMixin):
  def __init__(self, blocks=None):
    if blocks is not None:
      self.blocks = OrderedDict(blocks)
    else:
      self.blocks = OrderedDict()
    self.keys_lower = dict([(key.lower(), key) for key in self.blocks.keys()])

  def __setitem__(self, key, value):
    assert isinstance(value, block)
    if not re.match(tag_re, '_'+key):
      raise Sorry("%s is not a valid data block name" %key)
    self.blocks[key] = value
    self.keys_lower[key.lower()] = key

  def get(self, key, default=None):
    key_lower = self.keys_lower.get(key.lower())
    if (key_lower is None):
      return default
    return self.blocks.get(key_lower, default)

  def __getitem__(self, key):
    result = self.get(key)
    if (result is None):
      raise KeyError('Unknown CIF data block name: "%s"' % key)
    return result

  def __delitem__(self, key):
    del self.blocks[self.keys_lower[key.lower()]]
    del self.keys_lower[key.lower()]

  def keys(self):
    return self.blocks.keys()

  def __repr__(self):
    return repr(OrderedDict(self.iteritems()))

  def __copy__(self):
    return cif(self.blocks.copy())

  copy = __copy__

  def __deepcopy__(self, memo):
    return cif(copy.deepcopy(self.blocks, memo))

  def deepcopy(self):
    return copy.deepcopy(self)

  def show(self, out=None, indent="  ", indent_row=None,
           data_name_field_width=34,
           loop_format_strings=None):
    if out is None:
      out = sys.stdout
    for name, block in self.items():
      print >> out, "data_%s" %name
      block.show(
        out=out, indent=indent, indent_row=indent_row,
        data_name_field_width=data_name_field_width,
        loop_format_strings=loop_format_strings)

  def __str__(self):
    s = StringIO()
    self.show(out=s)
    return s.getvalue()

  def validate(self, dictionary, show_warnings=True, error_handler=None, out=None):
    if out is None: out = sys.stdout
    from iotbx.cif import validation
    errors = {}
    if error_handler is None:
      error_handler = validation.ErrorHandler()
    for key, block in self.blocks.iteritems():
      error_handler = error_handler.__class__()
      dictionary.set_error_handler(error_handler)
      block.validate(dictionary)
      errors.setdefault(key, error_handler)
      if error_handler.error_count or error_handler.warning_count:
        error_handler.show(show_warnings=show_warnings, out=out)
    return error_handler

  def sort(self, recursive=False, key=None, reverse=False):
    self.blocks = OrderedDict(sorted(self.blocks.items(), key=key, reverse=reverse))
    if recursive:
      for b in self.blocks.values():
        b.sort(recursive=recursive, reverse=reverse)
Exemple #4
0
class loop(DictMixin):
  def __init__(self, header=None, data=None):
    self._columns = OrderedDict()
    self.keys_lower = {}
    if header is not None:
      for key in header:
        self.setdefault(key, flex.std_string())
      if data is not None:
        # the number of data items must be an exact multiple of the number of headers
        assert len(data) % len(header) == 0, "Wrong number of data items for loop"
        n_rows = len(data)//len(header)
        n_columns = len(header)
        for i in range(n_rows):
          self.add_row([data[i*n_columns+j] for j in range(n_columns)])
    elif header is None and data is not None:
      assert isinstance(data, dict) or isinstance(data, OrderedDict)
      self.add_columns(data)
      self.keys_lower = dict(
        [(key.lower(), key) for key in self._columns.keys()])

  def __setitem__(self, key, value):
    if not re.match(tag_re, key):
      raise Sorry("%s is not a valid data name" %key)
    if len(self) > 0:
      assert len(value) == self.size()
    if not isinstance(value, flex.std_string):
      for flex_numeric_type in (flex.int, flex.double):
        if isinstance(value, flex_numeric_type):
          value = value.as_string()
        else:
          try:
            value = flex_numeric_type(value).as_string()
          except TypeError:
            continue
          else:
            break
      if not isinstance(value, flex.std_string):
        value = flex.std_string(value)
    # value must be a mutable type
    assert hasattr(value, '__setitem__')
    self._columns[key] = value
    self.keys_lower[key.lower()] = key

  def __getitem__(self, key):
    return self._columns[self.keys_lower[key.lower()]]

  def __delitem__(self, key):
    del self._columns[self.keys_lower[key.lower()]]
    del self.keys_lower[key.lower()]

  def keys(self):
    return self._columns.keys()

  def __repr__(self):
    return repr(OrderedDict(self.iteritems()))

  def name(self):
    return common_substring(self.keys()).rstrip('_').rstrip('.')

  def size(self):
    size = 0
    for column in self.values():
      size = max(size, len(column))
    return size

  def n_rows(self):
    size = 0
    for column in self.values():
      size = max(size, len(column))
    return size

  def n_columns(self):
    return len(self.keys())

  def add_row(self, row, default_value="?"):
    if isinstance(row, dict):
      for key in self:
        if key in row:
          self[key].append(str(row[key]))
        else:
          self[key].append(default_value)
    else:
      assert len(row) == len(self)
      for i, key in enumerate(self):
        self[key].append(str(row[i]))

  def add_column(self, key, values):
    if self.size() != 0:
      assert len(values) == self.size()
    self[key] = values
    self.keys_lower[key.lower()] = key

  def add_columns(self, columns):
    assert isinstance(columns, dict) or isinstance(columns, OrderedDict)
    for key, value in columns.iteritems():
      self.add_column(key, value)

  def update_column(self, key, values):
    assert type(key)==type(""), "first argument is column key string"
    if self.size() != 0:
      assert len(values) == self.size(), "len(values) %d != self.size() %d" % (
        len(values),
        self.size(),
        )
    self[key] = values
    self.keys_lower[key.lower()] = key

  def delete_row(self, index):
    assert index < self.n_rows()
    for column in self._columns.values():
      del column[index]

  def __copy__(self):
    new = loop()
    new._columns = self._columns.copy()
    new.keys_lower = self.keys_lower.copy()
    return new

  copy = __copy__

  def __deepcopy__(self, memo):
    new = loop()
    new._columns = copy.deepcopy(self._columns, memo)
    new.keys_lower = copy.deepcopy(self.keys_lower, memo)
    return new

  def deepcopy(self):
    return copy.deepcopy(self)

  def show(self, out=None, indent="  ", indent_row=None, fmt_str=None, align_columns=True):
    assert self.n_rows() > 0 and self.n_columns() > 0, "keys: %s %d %d" % (
      self.keys(),
      self.n_rows(),
      self.n_columns(),
      )
    if out is None:
      out = sys.stdout
    if indent_row is None:
      indent_row = indent
    assert indent.strip() == ""
    assert indent_row.strip() == ""
    print >> out, "loop_"
    for k in self.keys():
      print >> out, indent + k
    values = self._columns.values()
    if fmt_str is not None:
      # Pretty printing:
      #   The user is responsible for providing a valid format string.
      #   Values are not quoted - it is the user's responsibility to place
      #   appropriate quotes in the format string if a particular value may
      #   contain spaces.
      values = copy.deepcopy(values)
      for i, v in enumerate(values):
        for flex_numeric_type in (flex.int, flex.double):
          if not isinstance(v, flex_numeric_type):
            try:
              values[i] = flex_numeric_type(v)
            except ValueError:
              continue
            else:
              break
      if fmt_str is None:
        fmt_str = indent_row + ' '.join(["%s"]*len(values))
      for i in range(self.size()):
        print >> out, fmt_str % tuple([values[j][i] for j in range(len(values))])
    elif align_columns:
      fmt_str = []
      for i, (k, v) in enumerate(self.iteritems()):
        for i_v in range(v.size()):
          v[i_v] = format_value(v[i_v])
        # exclude and semicolon text fields from column width calculation
        v_ = flex.std_string(item for item in v if "\n" not in item)
        width = v_.max_element_length()
        # See if column contains only number, '.' or '?'
        # right-align numerical columns, left-align everything else
        v = v.select(~( (v == ".") | (v == "?") ))
        try:
          flex.double(v)
        except ValueError:
          width *= -1
        fmt_str.append("%%%is" %width)
      fmt_str = indent_row + "  ".join(fmt_str)
      for i in range(self.size()):
        print >> out, (fmt_str %
                       tuple([values[j][i]
                              for j in range(len(values))])).rstrip()
    else:
      for i in range(self.size()):
        values_to_print = [format_value(values[j][i]) for j in range(len(values))]
        print >> out, ' '.join([indent] + values_to_print)

  def __str__(self):
    s = StringIO()
    self.show(out=s)
    return s.getvalue()

  def iterrows(self):
    keys = self.keys()
    for j in range(self.size()):
      yield OrderedDict(zip(keys, [self.values()[i][j] for i in range(len(self))]))

  def sort(self, key=None, reverse=False):
    self._columns = OrderedDict(
      sorted(self._columns.items(), key=key, reverse=reverse))

  def order(self, order):
    def _cmp_key(k1, k2):
      for i, o in enumerate(order):
        if k1==o: break
      for j, o in enumerate(order):
        if k2==o: break
      if k1<k2: return -1
      return 1
    keys = self._columns.keys()
    keys.sort(_cmp_key)
    tmp = OrderedDict()
    for o in order:
      tmp[o]=self._columns[o]
    self._columns = tmp

  def __eq__(self, other):
    if (len(self) != len(other) or
        self.size() != other.size() or
        self.keys() != other.keys()):
      return False
    for value, other_value in zip(self.values(), other.values()):
      if (value == other_value).count(True) != len(value):
        return False
    return True
Exemple #5
0
  def __init__(self, unmerged_intensities, batches_all, n_bins=20, d_min=None,
               id_to_batches=None):

    intensities = OrderedDict()
    individual_merged_intensities = OrderedDict()
    batches = OrderedDict()
    #m_isym = OrderedDict()

    sel = unmerged_intensities.sigmas() > 0
    unmerged_intensities = unmerged_intensities.select(sel)
    batches_all = batches_all.select(sel)

    if id_to_batches is None:
      run_id_to_batch_id = None
      run_id = 0
      unique_batches = sorted(set(batches_all.data()))
      last_batch = None
      run_start = unique_batches[0]
      for i, batch in enumerate(unique_batches):
        if last_batch is not None and batch > (last_batch + 1) or (i+1) == len(unique_batches):
          batch_sel = (batches_all.data() >= run_start) & (batches_all.data() <= last_batch)
          batches[run_id] = batches_all.select(batch_sel).resolution_filter(d_min=d_min)
          intensities[run_id] = unmerged_intensities.select(batch_sel).resolution_filter(d_min=d_min)
          individual_merged_intensities[run_id] = intensities[run_id].merge_equivalents().array()
          Debug.write("run %i batch %i to %i" %(run_id+1, run_start, last_batch))
          run_id += 1
          run_start = batch
        last_batch = batch

    else:
      run_id_to_batch_id = OrderedDict()
      run_id = 0
      for batch_id, batch_range in id_to_batches.iteritems():
        run_id_to_batch_id[run_id] = batch_id
        run_start, last_batch = batch_range
        batch_sel = (batches_all.data() >= run_start) & (batches_all.data() <= last_batch)
        batches[run_id] = batches_all.select(batch_sel).resolution_filter(d_min=d_min)
        intensities[run_id] = unmerged_intensities.select(batch_sel).resolution_filter(d_min=d_min)
        individual_merged_intensities[run_id] = intensities[run_id].merge_equivalents().array()
        Debug.write("run %i batch %i to %i" %(run_id+1, run_start, last_batch))
        run_id += 1

    unmerged_intensities.setup_binner(n_bins=n_bins)
    unmerged_intensities.show_summary()
    #result = unmerged_intensities.cc_one_half(use_binning=True)
    #result.show()

    self.unmerged_intensities = unmerged_intensities
    self.merged_intensities = unmerged_intensities.merge_equivalents().array()
    self.intensities = intensities
    self.individual_merged_intensities = individual_merged_intensities
    self.batches = batches

    if run_id_to_batch_id is not None:
      labels = run_id_to_batch_id.values()
    else:
      labels = None
    racc = self.relative_anomalous_cc()
    if racc is not None:
      self.plot_relative_anomalous_cc(racc, labels=labels)
    correlation_matrix, linkage_matrix = self.compute_correlation_coefficient_matrix()

    self._cluster_dict = self.to_dict(correlation_matrix, linkage_matrix)

    self.plot_cc_matrix(correlation_matrix, linkage_matrix, labels=labels)

    self.write_output()