def bravais_lattice_to_space_groups(chiral_only=True):
  from cctbx import sgtbx
  from cctbx.sgtbx import bravais_types
  from libtbx.containers import OrderedDict
  bravais_lattice_to_sg = OrderedDict()
  for sgn in range(230):
    sg = sgtbx.space_group_info(number=sgn+1).group()
    if (not chiral_only) or (sg.is_chiral()):
      bravais_lattice = bravais_types.bravais_lattice(group=sg)
      bravais_lattice_to_sg.setdefault(str(bravais_lattice), [])
      bravais_lattice_to_sg[str(bravais_lattice)].append(sg)
  return bravais_lattice_to_sg
Exemple #2
0
 def unique_beams(self):
   ''' Iterate through unique beams. '''
   from dxtbx.imageset import ImageSweep
   from libtbx.containers import OrderedDict
   obj = OrderedDict()
   for iset in self._imagesets:
     if isinstance(iset, ImageSweep):
       obj[iset.get_beam()] = None
     else:
       for i in range(len(iset)):
         obj[iset.get_beam(i)] = None
   return obj.keys()
class cctbx_data_structures_from_cif(object):
  def __init__(self,
               file_object=None,
               file_path=None,
               cif_model=None,
               data_structure_builder=None,
               data_block_name=None,
               base_array_info=None,
               **kwds):
    assert file_object is None or cif_model is None
    if data_structure_builder is None:
      data_structure_builders = (
        builders.miller_array_builder, builders.crystal_structure_builder)
    else:
      assert data_structure_builder in (
        builders.miller_array_builder, builders.crystal_structure_builder)
      data_structure_builders = (data_structure_builder,)

    self.xray_structures = OrderedDict()
    self.miller_arrays = OrderedDict()
    if cif_model is None:
      cif_model = reader(file_path=file_path, file_object=file_object).model()
    if not len(cif_model):
      raise Sorry("No data block found in CIF")
    if data_block_name is not None and not data_block_name in cif_model:
      if (file_path is None):
        msg = 'Unknown CIF data block name: "%s"' % data_block_name
      else:
        msg = 'Unknown CIF data block name "%s" in file: "%s"' % (
          data_block_name, file_path)
      raise RuntimeError(msg)
    errors = []
    wavelengths = {}
    for key, block in cif_model.items():
      if data_block_name is not None and key != data_block_name: continue
      for builder in data_structure_builders:
        if builder == builders.crystal_structure_builder:
          if '_atom_site_fract_x' in block or '_atom_site_Cartn_x' in block:
            self.xray_structures.setdefault(key, builder(block).structure)
        elif builder == builders.miller_array_builder:
          block_wavelengths = builders.get_wavelengths(block)
          if (block_wavelengths is not None) :
            wavelengths = block_wavelengths
          if base_array_info is not None:
            base_array_info = base_array_info.customized_copy(labels=[key])
          if ( '_refln_index_h' in block or '_refln.index_h' in block or
               '_diffrn_refln' in block
               ):
            self.miller_arrays.setdefault(
              key, builder(block, base_array_info=base_array_info,
                wavelengths=wavelengths).arrays())
Exemple #4
0
    class image_data_cache(object):
      def __init__(self, imageset, size=10):
        self.imageset = imageset
        self.size = size
        self._image_data = OrderedDict()

      def __getitem__(self, i):
        image_data = self._image_data.get(i)
        if image_data is None:
          image_data = self.imageset.get_raw_data(i)
          if len(self._image_data) >= self.size:
            # remove the oldest entry in the cache
            del self._image_data[self._image_data.keys()[0]]
          self._image_data[i] = image_data
        return image_data
Exemple #5
0
 def _unique_detectors_dict(self):
   ''' Returns an ordered dictionary of detector objects. '''
   from dxtbx.imageset import ImageSweep
   from libtbx.containers import OrderedDict
   obj = OrderedDict()
   for iset in self._imagesets:
     if isinstance(iset, ImageSweep):
       obj[iset.get_detector()] = None
     else:
       for i in range(len(iset)):
         obj[iset.get_detector(i)] = None
   detector_id = 0
   for detector in obj.keys():
     obj[detector] = detector_id
     detector_id = detector_id + 1
   return obj
Exemple #6
0
 def unique_scans(self):
   ''' Iterate through unique scans. '''
   from dxtbx.imageset import ImageSweep
   from libtbx.containers import OrderedDict
   obj = OrderedDict()
   for iset in self._imagesets:
     if isinstance(iset, ImageSweep):
       obj[iset.get_scan()] = None
     else:
       for i in range(len(iset)):
         try:
           model = iset.get_scan(i)
           if model is not None:
             obj[model] = None
         except Exception:
           pass
   return obj.keys()
def exercise_odict():
  from libtbx.containers import OrderedDict as odict
  d = odict([('banana',3), ('apple',4), ('pear',1)])
  d.setdefault('orange', 2)
  assert d.has_key('orange')
  assert d['orange'] == 2
  assert d.keys() == ['banana', 'apple', 'pear', 'orange']
  assert d.values() == [3, 4, 1, 2]
  d = odict.fromkeys(('b','c','a'))
  assert d.keys() == ['b', 'c', 'a']
Exemple #8
0
 def __init__(self, header=None, data=None):
   self._columns = OrderedDict()
   self.keys_lower = {}
   if header is not None:
     for key in header:
       self.setdefault(key, flex.std_string())
     if data is not None:
       # the number of data items must be an exact multiple of the number of headers
       assert len(data) % len(header) == 0, "Wrong number of data items for loop"
       n_rows = len(data)//len(header)
       n_columns = len(header)
       for i in range(n_rows):
         self.add_row([data[i*n_columns+j] for j in range(n_columns)])
   elif header is None and data is not None:
     assert isinstance(data, dict) or isinstance(data, OrderedDict)
     self.add_columns(data)
     self.keys_lower = dict(
       [(key.lower(), key) for key in self._columns.keys()])
Exemple #9
0
  def __init__(self, unmerged_intensities, batches_all, n_bins=20, d_min=None,
               id_to_batches=None):

    sel = unmerged_intensities.sigmas() > 0
    unmerged_intensities = unmerged_intensities.select(sel)
    batches_all = batches_all.select(sel)

    unmerged_intensities.setup_binner(n_bins=n_bins)
    unmerged_intensities.show_summary()
    self.unmerged_intensities = unmerged_intensities
    self.merged_intensities = unmerged_intensities.merge_equivalents().array()

    separate = separate_unmerged(
      unmerged_intensities, batches_all, id_to_batches=id_to_batches)
    self.intensities = separate.intensities
    self.batches = separate.batches
    run_id_to_batch_id = separate.run_id_to_batch_id
    self.individual_merged_intensities = OrderedDict()
    for k in self.intensities.keys():
      self.intensities[k] = self.intensities[k].resolution_filter(d_min=d_min)
      self.batches[k] = self.batches[k].resolution_filter(d_min=d_min)
      self.individual_merged_intensities[k] = self.intensities[k].merge_equivalents().array()

    if run_id_to_batch_id is not None:
      labels = run_id_to_batch_id.values()
    else:
      labels = None
    racc = self.relative_anomalous_cc()
    if racc is not None:
      self.plot_relative_anomalous_cc(racc, labels=labels)
    correlation_matrix, linkage_matrix = self.compute_correlation_coefficient_matrix()

    self._cluster_dict = self.to_dict(correlation_matrix, linkage_matrix)

    self.plot_cc_matrix(correlation_matrix, linkage_matrix, labels=labels)

    self.write_output()
Exemple #10
0
  def __init__(self, xinfo_file, sweep_ids=None, sweep_ranges=None):
    '''Initialise myself from an input .xinfo file.'''

    # first initialise all of the data structures which will hold the
    # information...

    self._project = None
    self._crystals = OrderedDict()

    if sweep_ids is not None:
      sweep_ids = [s.lower() for s in sweep_ids]
    if sweep_ranges is not None:
      assert sweep_ids is not None
      assert len(sweep_ids) == len(sweep_ranges)
    self._sweep_ids = sweep_ids
    self._sweep_ranges = sweep_ranges

    # read the contents of the xinfo file

    self._parse_project(xinfo_file)

    self._validate()

    return
Exemple #11
0
def run(args):

    from dials.util.options import OptionParser
    from dials.util.options import flatten_experiments
    from dials.util.options import flatten_datablocks
    from dials.util.options import flatten_reflections
    import libtbx.load_env

    usage = "%s [options] datablock.json | experiments.json | image_*.cbf" % (
        libtbx.env.dispatcher_name)

    parser = OptionParser(usage=usage,
                          phil=phil_scope,
                          read_experiments=True,
                          read_datablocks=True,
                          read_datablocks_from_images=True,
                          read_reflections=True,
                          check_format=False,
                          epilog=help_message)

    params, options = parser.parse_args(show_diff_phil=True)
    experiments = flatten_experiments(params.input.experiments)
    datablocks = flatten_datablocks(params.input.datablock)
    reflections = flatten_reflections(params.input.reflections)

    if len(datablocks) == 0 and len(experiments) == 0 and len(
            reflections) == 0:
        parser.print_help()
        exit()

    for i_expt, expt in enumerate(experiments):
        print "Experiment %i:" % i_expt
        print str(expt.detector)
        print 'Max resolution (at corners): %f' % (
            expt.detector.get_max_resolution(expt.beam.get_s0()))
        print 'Max resolution (inscribed):  %f' % (
            expt.detector.get_max_inscribed_resolution(expt.beam.get_s0()))
        if params.show_panel_distance:
            for ipanel, panel in enumerate(expt.detector):
                from scitbx import matrix
                fast = matrix.col(panel.get_fast_axis())
                slow = matrix.col(panel.get_slow_axis())
                normal = fast.cross(slow).normalize()
                origin = matrix.col(panel.get_origin())
                distance = origin.dot(normal)
                fast_origin = -(origin - distance * normal).dot(fast)
                slow_origin = -(origin - distance * normal).dot(slow)
                print 'Panel %d: distance %.2f origin %.2f %.2f' % \
                  (ipanel, distance, fast_origin, slow_origin)
            print ''
        print ''
        print show_beam(expt.detector, expt.beam)
        if expt.scan is not None:
            print expt.scan
        if expt.goniometer is not None:
            print expt.goniometer
        expt.crystal.show(show_scan_varying=params.show_scan_varying)
        if expt.crystal.num_scan_points:
            from scitbx.array_family import flex
            from cctbx import uctbx
            abc = flex.vec3_double()
            angles = flex.vec3_double()
            for n in range(expt.crystal.num_scan_points):
                a, b, c, alpha, beta, gamma = expt.crystal.get_unit_cell_at_scan_point(
                    n).parameters()
                abc.append((a, b, c))
                angles.append((alpha, beta, gamma))
            a, b, c = abc.mean()
            alpha, beta, gamma = angles.mean()
            mean_unit_cell = uctbx.unit_cell((a, b, c, alpha, beta, gamma))
            print "  Average unit cell: %s" % mean_unit_cell
        print
        if expt.profile is not None:
            print expt.profile

    for datablock in datablocks:
        if datablock.format_class() is not None:
            print 'Format: %s' % datablock.format_class()
        imagesets = datablock.extract_imagesets()
        for imageset in imagesets:
            try:
                print imageset.get_template()
            except Exception:
                pass
            detector = imageset.get_detector()
            print str(detector)
            print 'Max resolution (at corners): %f' % (
                detector.get_max_resolution(imageset.get_beam().get_s0()))
            print 'Max resolution (inscribed):  %f' % (
                detector.get_max_inscribed_resolution(
                    imageset.get_beam().get_s0()))
            if params.show_panel_distance:
                for ipanel, panel in enumerate(detector):
                    from scitbx import matrix
                    fast = matrix.col(panel.get_fast_axis())
                    slow = matrix.col(panel.get_slow_axis())
                    normal = fast.cross(slow)
                    origin = matrix.col(panel.get_origin())
                    distance = origin.dot(normal)
                    fast_origin = -(origin - distance * normal).dot(fast)
                    slow_origin = -(origin - distance * normal).dot(slow)
                    print 'Panel %d: distance %.2f origin %.2f %.2f' % \
                      (ipanel, distance, fast_origin, slow_origin)
                print ''
            print ''
            print show_beam(detector, imageset.get_beam())
            if imageset.get_scan() is not None:
                print imageset.get_scan()
            if imageset.get_goniometer() is not None:
                print imageset.get_goniometer()

    from libtbx.containers import OrderedDict, OrderedSet
    formats = OrderedDict([
        ('miller_index', '%i, %i, %i'),
        ('d', '%.2f'),
        ('dqe', '%.3f'),
        ('id', '%i'),
        ('imageset_id', '%i'),
        ('panel', '%i'),
        ('flags', '%i'),
        ('background.mean', '%.1f'),
        ('background.dispersion', '%.1f'),
        ('background.mse', '%.1f'),
        ('background.sum.value', '%.1f'),
        ('background.sum.variance', '%.1f'),
        ('intensity.prf.value', '%.1f'),
        ('intensity.prf.variance', '%.1f'),
        ('intensity.sum.value', '%.1f'),
        ('intensity.sum.variance', '%.1f'),
        ('intensity.cor.value', '%.1f'),
        ('intensity.cor.variance', '%.1f'),
        ('lp', '%.3f'),
        ('num_pixels.background', '%i'),
        ('num_pixels.background_used', '%i'),
        ('num_pixels.foreground', '%i'),
        ('num_pixels.valid', '%i'),
        ('partial_id', '%i'),
        ('partiality', '%.4f'),
        ('profile.correlation', '%.3f'),
        ('profile.rmsd', '%.3f'),
        ('xyzcal.mm', '%.2f, %.2f, %.2f'),
        ('xyzcal.px', '%.2f, %.2f, %.2f'),
        ('delpsical.rad', '%.3f'),
        ('delpsical2', '%.3f'),
        ('delpsical.weights', '%.3f'),
        ('xyzobs.mm.value', '%.2f, %.2f, %.2f'),
        ('xyzobs.mm.variance', '%.4e, %.4e, %.4e'),
        ('xyzobs.px.value', '%.2f, %.2f, %.2f'),
        ('xyzobs.px.variance', '%.4f, %.4f, %.4f'),
        ('s1', '%.4f, %.4f, %.4f'),
        ('rlp', '%.4f, %.4f, %.4f'),
        ('zeta', '%.3f'),
        ('x_resid', '%.3f'),
        ('x_resid2', '%.3f'),
        ('y_resid', '%.3f'),
        ('y_resid2', '%.3f'),
        ('kapton_absorption_correction', '%.3f'),
        ('kapton_absorption_correction_sigmas', '%.3f'),
    ])

    for rlist in reflections:
        from cctbx.array_family import flex
        print
        print "Reflection list contains %i reflections" % (len(rlist))

        if len(rlist) == 0:
            continue

        rows = [["Column", "min", "max", "mean"]]
        for k, col in rlist.cols():
            if type(col) in (flex.double, flex.int, flex.size_t):
                if type(col) in (flex.int, flex.size_t):
                    col = col.as_double()
                rows.append([
                    k, formats[k] % flex.min(col), formats[k] % flex.max(col),
                    formats[k] % flex.mean(col)
                ])
            elif type(col) in (flex.vec3_double, flex.miller_index):
                if type(col) == flex.miller_index:
                    col = col.as_vec3_double()
                rows.append([
                    k, formats[k] % col.min(), formats[k] % col.max(),
                    formats[k] % col.mean()
                ])

        from libtbx import table_utils
        print table_utils.format(rows,
                                 has_header=True,
                                 prefix="| ",
                                 postfix=" |")

    intensity_keys = ('miller_index', 'd', 'intensity.prf.value',
                      'intensity.prf.variance', 'intensity.sum.value',
                      'intensity.sum.variance', 'background.mean',
                      'profile.correlation', 'profile.rmsd')

    profile_fit_keys = (
        'miller_index',
        'd',
    )

    centroid_keys = ('miller_index', 'd', 'xyzcal.mm', 'xyzcal.px',
                     'xyzobs.mm.value', 'xyzobs.mm.variance',
                     'xyzobs.px.value', 'xyzobs.px.variance')

    keys_to_print = OrderedSet()

    if params.show_intensities:
        for k in intensity_keys:
            keys_to_print.add(k)
    if params.show_profile_fit:
        for k in profile_fit_keys:
            keys_to_print.add(k)
    if params.show_centroids:
        for k in centroid_keys:
            keys_to_print.add(k)
    if params.show_all_reflection_data:
        for k in formats:
            keys_to_print.add(k)

    def format_column(key, data, format_strings=None):
        if isinstance(data, flex.vec3_double):
            c_strings = [
                c.as_string(format_strings[i].strip())
                for i, c in enumerate(data.parts())
            ]
        elif isinstance(data, flex.miller_index):
            c_strings = [
                c.as_string(format_strings[i].strip())
                for i, c in enumerate(data.as_vec3_double().parts())
            ]
        elif isinstance(data, flex.size_t):
            c_strings = [data.as_int().as_string(format_strings[0].strip())]
        else:
            c_strings = [data.as_string(format_strings[0].strip())]

        column = flex.std_string()
        max_element_lengths = [c.max_element_length() for c in c_strings]
        for i in range(len(c_strings[0])):

            column.append(('%%%is' % len(key)) % ', '.join(
                ('%%%is' % max_element_lengths[j]) % c_strings[j][i]
                for j in range(len(c_strings))))
        return column

    if keys_to_print:
        keys = [k for k in keys_to_print if k in rlist]
        rows = [keys]
        max_reflections = len(rlist)
        if params.max_reflections is not None:
            max_reflections = min(len(rlist), params.max_reflections)

        columns = []

        for k in keys:
            columns.append(
                format_column(k,
                              rlist[k],
                              format_strings=formats[k].split(',')))

        print
        print "Printing %i of %i reflections:" % (max_reflections, len(rlist))
        for j in range(len(columns)):
            key = keys[j]
            width = max(len(key), columns[j].max_element_length())
            print("%%%is" % width) % key,
        print
        for i in range(max_reflections):
            for j in range(len(columns)):
                print columns[j][i],
            print

    return
Exemple #12
0
class miller_array_builder(crystal_symmetry_builder):
    # Changes to this class should pass regression tests:
    # cctbx_project\mmtbx\regression\tst_cif_as_mtz_wavelengths.py
    # cctbx_project\iotbx\cif\tests\tst_lex_parse_build.py
    # phenix_regression\cif_as_mtz\tst_cif_as_mtz.py

    observation_types = {
        # known types of column data to be tagged as either amplitudes or intensities as per
        # https://www.iucr.org/__data/iucr/cifdic_html/2/cif_mm.dic/index.html
        '_refln.F_squared': xray.intensity(),
        '_refln_F_squared': xray.intensity(),
        '_refln.intensity': xray.intensity(),
        '_refln.I(+)': xray.intensity(),
        '_refln.I(-)': xray.intensity(),
        '_refln.F_calc': xray.amplitude(),
        '_refln.F_meas': xray.amplitude(),
        '_refln.FP': xray.amplitude(),
        '_refln.F-obs': xray.amplitude(),
        '_refln.Fobs': xray.amplitude(),
        '_refln.F-calc': xray.amplitude(),
        '_refln.Fcalc': xray.amplitude(),
        '_refln.pdbx_F_': xray.amplitude(),
        '_refln.pdbx_I_': xray.intensity(),
        '_refln.pdbx_anom_difference': xray.amplitude(),
    }

    def guess_observationtype(self, labl):
        for okey in self.observation_types.keys():
            if labl.startswith(okey):
                return self.observation_types[okey]
        return None

    def __init__(self, cif_block, base_array_info=None, wavelengths=None):
        crystal_symmetry_builder.__init__(self, cif_block)
        self._arrays = OrderedDict()
        self._origarrays = OrderedDict(
        )  # used for presenting raw data tables in HKLviewer
        basearraylabels = []
        if base_array_info is not None:
            self.crystal_symmetry = self.crystal_symmetry.join_symmetry(
                other_symmetry=base_array_info.crystal_symmetry_from_file,
                force=True)
            if base_array_info.labels:
                basearraylabels = base_array_info.labels
        if (wavelengths is None):
            wavelengths = {}
        if base_array_info is None:
            base_array_info = miller.array_info(source_type="cif")
        refln_containing_loops = self.get_miller_indices_containing_loops()
        for self.indices, refln_loop in refln_containing_loops:
            self.wavelength_id_array = None
            self.crystal_id_array = None
            self.scale_group_array = None
            wavelength_ids = [None]
            crystal_ids = [None]
            scale_groups = [None]
            for key, value in six.iteritems(refln_loop):
                # Get wavelength_ids, crystal_id, scale_group_code columns for selecting data of other
                # columns in self.get_selection() used by self.flex_std_string_as_miller_array()
                if (key.endswith('wavelength_id') or key.endswith('crystal_id')
                        or key.endswith('scale_group_code')):
                    data = as_int_or_none_if_all_question_marks(
                        value, column_name=key)
                    if data is None:
                        continue
                    counts = data.counts()
                    if key.endswith('wavelength_id'):
                        wavelength_ids = list(counts.keys())
                    if len(counts) == 1: continue
                    array = miller.array(
                        miller.set(self.crystal_symmetry,
                                   self.indices).auto_anomalous(), data)
                    if key.endswith('wavelength_id'):
                        self.wavelength_id_array = array
                        wavelength_ids = list(counts.keys())
                    elif key.endswith('crystal_id'):
                        self.crystal_id_array = array
                        crystal_ids = list(counts.keys())
                    elif key.endswith('scale_group_code'):
                        self.scale_group_array = array
                        scale_groups = list(counts.keys())
            labelsuffix = []
            wavelbl = []
            cryslbl = []
            scalegrplbl = []
            self._origarrays["HKLs"] = self.indices
            alllabels = list(sorted(refln_loop.keys()))
            remaininglabls = alllabels[:]  # deep copy the list
            # Parse labels matching cif column conventions
            # https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/refln.html
            # and extract groups of labels or just single columns.
            # Groups corresponds to the map coefficients, phase and amplitudes,
            # amplitudes or intensities with sigmas and hendrickson-lattman columns.
            phaseamplabls, remaininglabls = self.get_phase_amplitude_labels(
                remaininglabls)
            mapcoefflabls, remaininglabls = self.get_mapcoefficient_labels(
                remaininglabls)
            HLcoefflabls, remaininglabls = self.get_HL_labels(remaininglabls)
            data_sig_obstype_labls, remaininglabls = self.get_FSigF_ISigI_labels(
                remaininglabls)
            for w_id in wavelength_ids:
                for crys_id in crystal_ids:
                    for scale_group in scale_groups:
                        # If reflection data files contain more than one crystal, wavelength or scalegroup
                        # then add their id(s) as a suffix to data labels computed below. Needed for avoiding
                        # ambuguity but avoid when not needed to make labels more human readable!
                        if (len(wavelength_ids) > 1
                                or len(wavelengths) > 1) and w_id is not None:
                            wavelbl = ["wavelength_id=%i" % w_id]
                        if len(crystal_ids) > 1 and crys_id is not None:
                            cryslbl = ["crystal_id=%i" % crys_id]
                        if len(scale_groups) > 1 and scale_group is not None:
                            scalegrplbl = ["scale_group_code=%i" % scale_group]
                        labelsuffix = scalegrplbl + cryslbl + wavelbl
                        jlablsufx = ""
                        if len(labelsuffix):
                            jlablsufx = "," + ",".join(labelsuffix)
                        for mapcoefflabl in mapcoefflabls:
                            A_array = refln_loop[mapcoefflabl[0]]
                            B_array = refln_loop[mapcoefflabl[1]]
                            # deselect any ? marks in the two arrays, assuming both A and B have the same ? marks
                            selection = self.get_selection(
                                A_array,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            A_array = A_array.select(selection)
                            B_array = B_array.select(selection)
                            # form the miller array with map coefficients
                            data = flex.complex_double(flex.double(A_array),
                                                       flex.double(B_array))
                            millarr = miller.array(
                                miller.set(self.crystal_symmetry,
                                           self.indices.select(
                                               selection)).auto_anomalous(),
                                data)
                            # millarr will be None for column data not matching w_id,crys_id,scale_group values
                            if millarr is None: continue
                            labl = basearraylabels + mapcoefflabl + labelsuffix
                            millarr.set_info(
                                base_array_info.customized_copy(
                                    labels=labl,
                                    wavelength=wavelengths.get(w_id, None)))
                            self._arrays[mapcoefflabl[0] + jlablsufx] = millarr
                        for phaseamplabl in phaseamplabls:
                            amplitudestrarray = refln_loop[phaseamplabl[0]]
                            phasestrarray = refln_loop[phaseamplabl[1]]
                            millarr = self.flex_std_string_as_miller_array(
                                amplitudestrarray,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            phasesmillarr = self.flex_std_string_as_miller_array(
                                phasestrarray,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            # millarr will be None for column data not matching w_id,crys_id,scale_group values
                            if millarr is None or phasesmillarr is None:
                                continue
                            phases = as_flex_double(phasesmillarr,
                                                    phaseamplabl[1])
                            millarr = millarr.phase_transfer(phases, deg=True)
                            labl = basearraylabels + phaseamplabl + labelsuffix
                            millarr.set_info(
                                base_array_info.customized_copy(
                                    labels=labl,
                                    wavelength=wavelengths.get(w_id, None)))
                            self._arrays[phaseamplabl[0] + jlablsufx] = millarr
                        for datlabl, siglabl, otype in data_sig_obstype_labls:
                            datastrarray = refln_loop[datlabl]
                            millarr = self.flex_std_string_as_miller_array(
                                datastrarray,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            # millarr will be None for column data not matching w_id,crys_id,scale_group values
                            if millarr is None: continue
                            millarr = as_flex_double(millarr, datlabl)
                            datsiglabl = [datlabl]
                            if siglabl:
                                sigmasstrarray = refln_loop[siglabl]
                                sigmas = self.flex_std_string_as_miller_array(
                                    sigmasstrarray,
                                    wavelength_id=w_id,
                                    crystal_id=crys_id,
                                    scale_group_code=scale_group)
                                sigmas = as_flex_double(sigmas, siglabl)
                                millarr.set_sigmas(sigmas.data())
                                datsiglabl = [datlabl, siglabl]
                            datsiglabl = basearraylabels + datsiglabl + labelsuffix
                            millarr.set_info(
                                base_array_info.customized_copy(
                                    labels=datsiglabl,
                                    wavelength=wavelengths.get(w_id, None)))
                            if otype is not None:
                                millarr.set_observation_type(otype)
                            self._arrays[datlabl + jlablsufx] = millarr
                        for hl_labels in HLcoefflabls:
                            hl_values = [
                                cif_block.get(hl_key) for hl_key in hl_labels
                            ]
                            if hl_values.count(None) == 0:
                                selection = self.get_selection(
                                    hl_values[0],
                                    wavelength_id=w_id,
                                    crystal_id=crys_id,
                                    scale_group_code=scale_group)
                                hl_values = [
                                    as_double_or_none_if_all_question_marks(
                                        hl.select(selection), column_name=lab)
                                    for hl, lab in zip(hl_values, hl_labels)
                                ]
                                # hl_values will be None for column data not matching w_id,crys_id,scale_group values
                                if hl_values == [None, None, None, None]:
                                    continue
                                millarr = miller.array(
                                    miller.set(
                                        self.crystal_symmetry,
                                        self.indices.select(
                                            selection)).auto_anomalous(),
                                    flex.hendrickson_lattman(*hl_values))
                                hlabels = basearraylabels + hl_labels + labelsuffix
                                millarr.set_info(
                                    base_array_info.customized_copy(
                                        labels=hlabels,
                                        wavelength=wavelengths.get(w_id,
                                                                   None)))
                                self._arrays[hl_labels[0] +
                                             jlablsufx] = millarr
                        # pick up remaining columns if any that weren't identified above
                        for label in alllabels:
                            if "index_" in label:
                                continue
                            datastrarray = refln_loop[label]
                            if label in remaininglabls:
                                labels = basearraylabels + [label
                                                            ] + labelsuffix
                                lablsufx = jlablsufx
                                millarr = self.flex_std_string_as_miller_array(
                                    datastrarray,
                                    wavelength_id=w_id,
                                    crystal_id=crys_id,
                                    scale_group_code=scale_group)
                                # millarr will be None for column data not matching w_id,crys_id,scale_group values
                                if (label.endswith(
                                        'wavelength_id'
                                ) or label.endswith(
                                        'crystal_id'
                                ) or  # get full array if any of these labels, not just subsets
                                        label.endswith('scale_group_code')):
                                    millarr = self.flex_std_string_as_miller_array(
                                        datastrarray,
                                        wavelength_id=None,
                                        crystal_id=None,
                                        scale_group_code=None)
                                    lablsufx = ""
                                    labels = basearraylabels + [label]
                                if millarr is None: continue
                                otype = self.guess_observationtype(label)
                                if otype is not None:
                                    millarr.set_observation_type(otype)
                                millarr.set_info(
                                    base_array_info.customized_copy(
                                        labels=labels,
                                        wavelength=wavelengths.get(w_id,
                                                                   None)))
                                self._arrays[label + lablsufx] = millarr
                            origarr = self.flex_std_string_as_miller_array(
                                datastrarray,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            newlabel = label.replace("_refln.", "")
                            newlabel2 = newlabel.replace("_refln_", "")
                            if origarr:  # want only genuine miller arrays
                                self._origarrays[newlabel2 +
                                                 jlablsufx] = origarr.data()
        # Convert any groups of I+,I-,SigI+,SigI- (or amplitudes) arrays into anomalous arrays
        # i.e. both friedel mates in the same array
        for key, array in six.iteritems(self._arrays.copy()):
            plus_key = ""
            if '_minus' in key:
                minus_key = key
                plus_key = key.replace('_minus', '_plus')
            elif '-' in key:
                minus_key = key
                plus_key = key.replace('-', '+')
            elif '_plus' in key:
                plus_key = key
                minus_key = key.replace('_plus', '_minus')
            elif '+' in key:
                plus_key = key
                minus_key = key.replace('+', '-')
            if plus_key in self._arrays and minus_key in self._arrays:
                plus_array = self._arrays.pop(plus_key)
                minus_array = self._arrays.pop(minus_key)
                minus_array = minus_array.customized_copy(
                    indices=-minus_array.indices()).set_info(
                        minus_array.info())
                array = plus_array.concatenate(
                    minus_array, assert_is_similar_symmetry=False)
                array = array.customized_copy(anomalous_flag=True)
                array.set_info(minus_array.info().customized_copy(labels=list(
                    OrderedSet(plus_array.info().labels +
                               minus_array.info().labels))))
                array.set_observation_type(plus_array.observation_type())
                self._arrays.setdefault(key, array)
        if len(self._arrays) == 0:
            raise CifBuilderError("No reflection data present in cif block")
        # Sort the ordered dictionary to resemble the order of columns in the cif file
        # This is to avoid any F_meas arrays accidentally being put adjacent to
        # pdbx_anom_difference arrays in the self._arrays OrderedDict. Otherwise these
        # arrays may unintentionally be combined into a reconstructed anomalous amplitude
        # array when saving as an mtz file due to a problem in the iotbx/mtz module.
        # See http://phenix-online.org/pipermail/cctbxbb/2021-March/002289.html
        arrlstord = []
        arrlst = list(self._arrays)
        for arr in arrlst:
            for i, k in enumerate(refln_loop.keys()):
                if arr.split(",")[0] == k:
                    arrlstord.append((arr, i))
        # arrlstord must have the same keys as in the self._arrays dictionary
        assert sorted(arrlst) == sorted([e[0] for e in arrlstord])
        sortarrlst = sorted(arrlstord, key=lambda arrord: arrord[1])
        self._ordarrays = OrderedDict()
        for sortkey, i in sortarrlst:
            self._ordarrays.setdefault(sortkey, self._arrays[sortkey])
        self._arrays = self._ordarrays

    def get_HL_labels(self, keys):
        lstkeys = list(keys)  # cast into list if not a list
        HLquads = []
        alllabels = " ".join(lstkeys)
        """ Hendrickson-Lattmann labels could look like: 'HLAM', 'HLBM', 'HLCM', 'HLDM'
    or like 'HLanomA', 'HLanomB', 'HLanomC', 'HLanomD'
    Use a regular expression to group them accordingly
    """
        allmatches = re.findall(r"(\S*(HL(\S*)[abcdABCD](\S*)))", alllabels)
        HLtagslst = list(set([(e[2], e[3]) for e in allmatches]))
        usedkeys = []
        for m in HLtagslst:
            hllist = []
            for hm in allmatches:
                if m == (hm[2], hm[3]):
                    hllist.append((hm[0], hm[1]))
            if len(hllist) == 4:
                HLquads.append([e[0] for e in hllist])
                for e in hllist:
                    usedkeys.append(e[0])
        remainingkeys = []
        for e in lstkeys:
            if e not in usedkeys:
                remainingkeys.append(e)
        return HLquads, remainingkeys

    def get_mapcoefficient_labels(self, keys):
        # extract map coeffficients labels from list of cif column labels
        # e.g. ( _refln.A_calc_au _refln.B_calc_au ) , ( _refln.A_calc _refln.B_calc )
        lstkeys = list(keys)  # cast into list if not a list
        remainingkeys = lstkeys[:]  # deep copy the list
        alllabels = " ".join(lstkeys)
        mapcoefflabels = []
        A_matches = re.findall(
            r"( (\s*_refln[\._]A_)(\S*) )", alllabels, re.VERBOSE
        )  # [('_refln.PHWT', '_refln.PH', 'WT'), ('_refln.PHDELWT', '_refln.PH', 'DELWT')]
        for label in lstkeys:
            for m in A_matches:
                Blabel = m[1].replace("A_", "B_") + m[2]
                if Blabel == label:
                    mapcoefflabels.append([m[0], label])
                    remainingkeys.remove(m[0])
                    remainingkeys.remove(label)
        return mapcoefflabels, remainingkeys

    def get_phase_amplitude_labels(self, keys):
        # extract phase and amplitudes labels from list of cif column labels
        # e.g. ( _refln.F_calc _refln.phase_calc ) , ( _refln.FC_ALL _refln.PHIC_ALL ), ( _refln.FWT _refln.PHWT )
        lstkeys = list(keys)  # cast into list if not a list
        remainingkeys = lstkeys[:]  # deep copy the list
        alllabels = " ".join(lstkeys)
        phase_amplitudelabels = []
        PHmatches = re.findall(
            r"((\S*PH)([^I]\S*))", alllabels
        )  # [('_refln.PHWT', '_refln.PH', 'WT'), ('_refln.PHDELWT', '_refln.PH', 'DELWT')]
        for label in lstkeys:
            for m in PHmatches:
                PFlabel = m[1].replace("PH", "F") + m[2]
                Flabel = m[1].replace("PH", "") + m[2]
                if Flabel == label or PFlabel == label:
                    phase_amplitudelabels.append([label, m[0]])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        PHImatches = re.findall(
            r"((\S*PHI)(\S*))", alllabels
        )  # [('_refln.PHIC', '_refln.PHI', 'C'), ('_refln.PHIC_ALL', '_refln.PHI', 'C_ALL')]
        for label in lstkeys:
            for m in PHImatches:
                PFlabel = m[1].replace("PHI", "F") + m[2]
                Flabel = m[1].replace("PHI", "") + m[2]
                if Flabel == label or PFlabel == label:
                    phase_amplitudelabels.append([label, m[0]])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        PHDELmatches = re.findall(
            r"(((\S*)PH)([^I]\S*(WT)))", alllabels
        )  # [('_refln.PHDELWT', '_refln.PH', '_refln.', 'DELWT', 'WT')]
        for label in lstkeys:
            for m in PHDELmatches:
                Flabel = m[2] + m[3].replace("WT", "FWT")
                if Flabel == label:
                    phase_amplitudelabels.append([label, m[0]])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        phase_matches = re.findall(
            r"((\S*[\._])phase(\S*))",
            alllabels)  # [('_refln.phase_calc', '_refln.', '')]
        for label in lstkeys:
            for m in phase_matches:
                phaselabel = m[0]
                Flabl = m[1] + m[2]
                Flabel = m[1] + "F" + m[2]
                Faulabel = m[1] + "F" + m[2] + "_au"
                if Flabl in label or Flabel in label or Faulabel in label:  # in case of _refln.F_calc_au and _refln.phase_calc
                    if label in remainingkeys and m[
                            0] in remainingkeys:  # in case
                        if (Flabel + "_sigma_au") in remainingkeys or (
                                Flabel + "_sigma") in remainingkeys:
                            continue  # give priority to F_meas, F_meas_sigma or  F_meas_au, F_meas_sigma_au
                        phase_amplitudelabels.append([label, m[0]])
                        remainingkeys.remove(label)
                        remainingkeys.remove(m[0])
        return phase_amplitudelabels, remainingkeys

    def get_FSigF_ISigI_labels(self, keys):
        # extract amplitudea, sigmas or intensitiy, sigmas labels from list of cif column labels
        # e.g. ( _refln.F_meas_sigma_au _refln.F_meas), ( _refln.intensity_sigma _refln.intensity ) ,
        # ( _refln.pdbx_I_plus_sigma _refln.pdbx_I_plus )
        lstkeys = list(keys)  # cast into list if not a list
        remainingkeys = lstkeys[:]  # deep copy the list
        alllabels = " ".join(lstkeys)
        labelpairs = []
        sigma_matches = re.findall(
            r"((\S*[\._])SIG(\S*))",
            alllabels)  # catch label pairs like F(+),SIGF(+)
        for label in lstkeys:
            for m in sigma_matches:
                FIlabel = m[1] + m[2]
                if FIlabel == label:
                    labelpairs.append(
                        [label, m[0],
                         self.guess_observationtype(label)])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        sigma_matches = re.findall(
            r"((\S*)_sigma(_*\S*))", alllabels
        )  # [('_refln.F_meas_sigma_au', '_refln.F_meas', '_au'), ('_refln.intensity_sigma', '_refln.intensity', ''), ('_refln.pdbx_I_plus_sigma', '_refln.pdbx_I_plus', '')]
        for label in lstkeys:
            for m in sigma_matches:
                FIlabel = m[1] + m[2]
                if FIlabel == label:
                    labelpairs.append(
                        [label, m[0],
                         self.guess_observationtype(label)])
                    remainingkeys.remove(label)
                    remainingkeys.remove(m[0])
        alllabels = " ".join(remainingkeys)
        # catch generic meas and sigma labels
        anymeas_matches = re.findall(r"((\S*)_meas(\S*))",
                                     alllabels) + re.findall(
                                         r"((\S*)_calc(\S*))", alllabels)
        anysigma_matches = re.findall(r"((\S*)_sigma(\S*))", alllabels)
        for mmatch in anymeas_matches:
            for smatch in anysigma_matches:
                if mmatch[1] == smatch[1] and mmatch[2] == smatch[2]:
                    remainingkeys.remove(mmatch[0])
                    if smatch[
                            0] in remainingkeys:  # in case of say F_squared_calc, F_squared_meas, F_squared_sigma all being present
                        remainingkeys.remove(smatch[0])
                        labelpairs.append([
                            mmatch[0], smatch[0],
                            self.guess_observationtype(mmatch[0])
                        ])
                    else:
                        labelpairs.append([
                            mmatch[0], None,
                            self.guess_observationtype(mmatch[0])
                        ])
        return labelpairs, remainingkeys

    def get_miller_indices_containing_loops(self):
        loops = []
        for loop in self.cif_block.loops.values():
            for key in loop.keys():
                if 'index_h' not in key: continue
                hkl_str = [
                    loop.get(key.replace('index_h', 'index_%s' % i))
                    for i in 'hkl'
                ]
                if hkl_str.count(None) > 0:
                    raise CifBuilderError(
                        "Miller indices missing from current CIF block (%s)" %
                        key.replace('index_h',
                                    'index_%s' % 'hkl'[hkl_str.index(None)]))
                hkl_int = []
                for i, h_str in enumerate(hkl_str):
                    try:
                        h_int = flex.int(h_str)
                    except ValueError as e:
                        raise CifBuilderError(
                            "Invalid item for Miller index %s: %s" %
                            ("HKL"[i], str(e)))
                    hkl_int.append(h_int)
                indices = flex.miller_index(*hkl_int)
                loops.append((indices, loop))
                break
        return loops

    def get_selection(self,
                      value,
                      wavelength_id=None,
                      crystal_id=None,
                      scale_group_code=None):
        selection = ~((value == '.') | (value == '?'))
        if self.wavelength_id_array is not None and wavelength_id is not None:
            selection &= (self.wavelength_id_array.data() == wavelength_id)
        if self.crystal_id_array is not None and crystal_id is not None:
            selection &= (self.crystal_id_array.data() == crystal_id)
        if self.scale_group_array is not None and scale_group_code is not None:
            selection &= (self.scale_group_array.data() == scale_group_code)
        return selection

    def flex_std_string_as_miller_array(self,
                                        value,
                                        wavelength_id=None,
                                        crystal_id=None,
                                        scale_group_code=None):
        # Create a miller_array object of only the data and indices matching the
        # wavelength_id, crystal_id and scale_group_code submitted or full array if these are None
        selection = self.get_selection(value,
                                       wavelength_id=wavelength_id,
                                       crystal_id=crystal_id,
                                       scale_group_code=scale_group_code)
        data = value.select(selection)
        #if not isinstance(data, flex.double):
        try:
            data = flex.int(data)
            indices = self.indices.select(selection)
        except ValueError:
            try:
                data = flex.double(data)
                indices = self.indices.select(selection)
            except ValueError:
                # if flex.std_string return all values including '.' and '?'
                data = value
                indices = self.indices
        if data.size() == 0: return None
        return miller.array(
            miller.set(self.crystal_symmetry, indices).auto_anomalous(), data)

    def arrays(self):
        return self._arrays

    def origarrays(self):
        """
    return dictionary of raw data found in cif file cast into flex.double arrays
    or just string arrays as a fall back.
    """
        return self._origarrays
    def get_raw_data(self):
        if self._raw_data is None:
            import numpy
            from scitbx.array_family import flex
            from libtbx.containers import OrderedDict

            self._raw_data = []

            cbf = self._get_cbf_handle()
            cbf.find_category('array_structure')
            cbf.find_column('encoding_type')
            cbf.select_row(0)
            types = []
            for i in xrange(cbf.count_rows()):
                types.append(cbf.get_value())
                cbf.next_row()
            assert len(types) == cbf.count_rows()

            # read the data
            data = OrderedDict()
            cbf.find_category("array_data")
            for i in xrange(cbf.count_rows()):
                cbf.find_column("array_id")
                name = cbf.get_value()

                cbf.find_column("data")
                assert cbf.get_typeofvalue().find('bnry') > -1

                if types[i] == 'signed 32-bit integer':
                    array_string = cbf.get_integerarray_as_string()
                    array = flex.int(
                        numpy.fromstring(array_string, numpy.int32))
                    parameters = cbf.get_integerarrayparameters_wdims_fs()
                    array_size = (parameters[11], parameters[10],
                                  parameters[9])
                elif types[i] == 'signed 64-bit real IEEE':
                    array_string = cbf.get_realarray_as_string()
                    array = flex.double(
                        numpy.fromstring(array_string, numpy.float))
                    parameters = cbf.get_realarrayparameters_wdims_fs()
                    array_size = (parameters[7], parameters[6], parameters[5])
                else:
                    return None  # type not supported

                array.reshape(flex.grid(*array_size))
                data[name] = array
                cbf.next_row()

            # extract the data for each panel
            if cbf.has_sections():
                section_shapes = OrderedDict()
                for i in xrange(cbf.count_rows()):
                    cbf.find_column("id")
                    section_name = cbf.get_value()
                    if not section_name in section_shapes:
                        section_shapes[section_name] = {}
                    cbf.find_column("array_id")
                    if not "array_id" in section_shapes[section_name]:
                        section_shapes[section_name][
                            "array_id"] = cbf.get_value()
                    else:
                        assert section_shapes[section_name][
                            "array_id"] == cbf.get_value()
                    cbf.find_column("index")
                    axis_index = int(cbf.get_value()) - 1
                    cbf.find_column("start")
                    axis_start = int(cbf.get_value()) - 1
                    cbf.find_column("end")
                    axis_end = int(cbf.get_value())

                    section_shapes[section_name][axis_index] = slice(
                        axis_start, axis_end)
                    cbf.next_row()

                for section_name in section_shapes:
                    section_shape = section_shapes[section_name]
                    section = data[section_shape["array_id"]][ \
                      section_shape[2], section_shape[1], section_shape[0]]
                    section.reshape(
                        flex.grid(section.focus()[-2],
                                  section.focus()[-1]))
                    self._raw_data.append(section)
            else:
                for key in data:
                    data[key].reshape(
                        flex.grid(data[key].focus()[-2],
                                  data[key].focus()[-1]))
                    self._raw_data.append(data[key])

            d = self.get_detector()
            assert len(d) == len(self._raw_data)

        return tuple(self._raw_data)
Exemple #14
0
  def __init__(self, pdb_hierarchy,
               sequences,
               alignment_params=None,
               crystal_symmetry=None,
               coordinate_precision=5,
               occupancy_precision=3,
               b_iso_precision=5,
               u_aniso_precision=5):

    pdb_hierarchy_as_cif_block.__init__(
      self, pdb_hierarchy, crystal_symmetry=crystal_symmetry,
    coordinate_precision=coordinate_precision,
    occupancy_precision=occupancy_precision,
    b_iso_precision=b_iso_precision,
    u_aniso_precision=u_aniso_precision)

    import mmtbx.validation.sequence
    validation = mmtbx.validation.sequence.validation(
      pdb_hierarchy=pdb_hierarchy,
      sequences=sequences,
      params=alignment_params,
      extract_residue_groups=True,
      log=null_out(), # silence output
    )

    entity_loop = iotbx.cif.model.loop(header=(
      '_entity.id',
      '_entity.type',
      #'_entity.src_method',
      #'_entity.pdbx_description',
      '_entity.formula_weight',
      '_entity.pdbx_number_of_molecules',
      #'_entity.details',
      #'_entity.pdbx_mutation',
      #'_entity.pdbx_fragment',
      #'_entity.pdbx_ec'
    ))

    entity_poly_loop = iotbx.cif.model.loop(header=(
      '_entity_poly.entity_id',
      '_entity_poly.type',
      '_entity_poly.nstd_chirality',
      '_entity_poly.nstd_linkage',
      '_entity_poly.nstd_monomer',
      '_entity_poly.pdbx_seq_one_letter_code',
      '_entity_poly.pdbx_seq_one_letter_code_can',
      '_entity_poly.pdbx_strand_id',
      '_entity_poly.type_details'
    ))

    entity_poly_seq_loop = iotbx.cif.model.loop(header=(
      '_entity_poly_seq.entity_id',
      '_entity_poly_seq.num',
      '_entity_poly_seq.mon_id',
      '_entity_poly_seq.hetero',
    ))

    sequence_counts = OrderedDict()
    sequence_to_chain_ids = {}
    entity_id = 0
    sequence_to_entity_id = {}
    chain_id_to_entity_id = {}
    sequence_to_chains = {}
    residue_group_to_seq_num_mapping = {}
    aligned_pdb_chains = OrderedSet()
    non_polymer_counts = dict_with_default_0()
    non_polymer_resname_to_entity_id = OrderedDict()

    for chain in validation.chains:
      sequence = chain.alignment.b
      if sequence not in sequence_to_entity_id:
        entity_id += 1
        sequence_to_entity_id[sequence] = entity_id
      sequence_counts.setdefault(sequence, 0)
      sequence_counts[sequence] += 1
      sequence_to_chain_ids.setdefault(sequence, [])
      sequence_to_chain_ids[sequence].append(chain.chain_id)
      sequence_to_chains.setdefault(sequence, [])
      sequence_to_chains[sequence].append(chain)
      chain_id_to_entity_id[chain.chain_id] = sequence_to_entity_id[sequence]
      aligned_pdb_chains.add(chain.residue_groups[0].parent())
      unaligned_pdb_chains = OrderedSet(pdb_hierarchy.chains()) - aligned_pdb_chains

      assert len(chain.residue_groups) + chain.n_missing_start + chain.n_missing_end == len(sequence)
      residue_groups = [None] * chain.n_missing_start + chain.residue_groups + [None] * chain.n_missing_end
      i = chain.n_missing_start
      seq_num = 0
      for i, residue_group in enumerate(residue_groups):
        if residue_group is None and chain.alignment.b[i] == '-':
          # a deletion
          continue
        seq_num += 1
        if residue_group is not None:
          residue_group_to_seq_num_mapping[
            residue_group] = seq_num

    for pdb_chain in unaligned_pdb_chains:
      for residue_group in pdb_chain.residue_groups():
        for resname in residue_group.unique_resnames():
          if resname not in non_polymer_resname_to_entity_id:
            entity_id += 1
            non_polymer_resname_to_entity_id[resname] = entity_id
          non_polymer_counts[resname] += 1

    for sequence, count in sequence_counts.iteritems():
      entity_poly_seq_num = 0
      entity_id = sequence_to_entity_id[sequence]

      entity_loop.add_row((
        entity_id,
        'polymer', #polymer/non-polymer/macrolide/water
        #'?', #src_method
        #'?', # pdbx_description
        '?', # formula_weight
        len(sequence_to_chains[sequence]), # pdbx_number_of_molecules
        #'?', # details
        #'?', # pdbx_mutation
        #'?', # pdbx_fragment
        #'?' # pdbx_ec
      ))

      # The definition of the cif item _entity_poly.pdbx_seq_one_letter_code
      # says that modifications and non-standard amino acids should be encoded
      # as 'X', however in practice the PDB seem to encode them as the three-letter
      # code in parentheses.
      pdbx_seq_one_letter_code = []
      pdbx_seq_one_letter_code_can = []

      chains = sequence_to_chains[sequence]

      from iotbx.pdb import amino_acid_codes

      chain = chains[0]
      matches = chain.alignment.matches()

      for i, one_letter_code in enumerate(sequence):

        #Data items in the ENTITY_POLY_SEQ category specify the sequence
        #of monomers in a polymer. Allowance is made for the possibility
        #of microheterogeneity in a sample by allowing a given sequence
        #number to be correlated with more than one monomer ID. The
        #corresponding ATOM_SITE entries should reflect this
        #heterogeneity.

        monomer_id = None
        if i >= chain.n_missing_start and i < (len(sequence) - chain.n_missing_end):
          monomer_id = chain.resnames[i-chain.n_missing_start]

        if monomer_id is None and one_letter_code == '-': continue

        pdbx_seq_one_letter_code_can.append(one_letter_code)

        if monomer_id is None:
          if sequence_to_chains[sequence][0].chain_type == mmtbx.validation.sequence.PROTEIN:
            monomer_id = amino_acid_codes.three_letter_given_one_letter.get(
              one_letter_code, "UNK") # XXX
          else:
            monomer_id = one_letter_code
        else:
          if sequence_to_chains[sequence][0].chain_type == mmtbx.validation.sequence.PROTEIN:
            one_letter_code = amino_acid_codes.one_letter_given_three_letter.get(
              monomer_id, "(%s)" %monomer_id)

        pdbx_seq_one_letter_code.append(one_letter_code)

        entity_poly_seq_num += 1

        entity_poly_seq_loop.add_row((
          entity_id,
          entity_poly_seq_num,
          monomer_id,
          'no', #XXX
        ))

      entity_poly_type = '?'
      entity_nstd_chirality = 'n'
      # we should probably determine the chirality more correctly by examining
      # the chirality of the backbone chain rather than relying on the residue
      # names to be correct
      if chain.chain_type == mmtbx.validation.sequence.PROTEIN:
        n_d_peptides = 0
        n_l_peptides = 0
        n_achiral_peptides = 0
        n_unknown = 0
        for resname in chain.resnames:
          if resname == "GLY":
            n_achiral_peptides += 1
          elif resname in iotbx.pdb.common_residue_names_amino_acid:
            n_l_peptides += 1
          elif resname in amino_acid_codes.three_letter_l_given_three_letter_d:
            n_d_peptides += 1
          else:
            n_unknown += 1
        n_total = sum([n_d_peptides, n_l_peptides, n_achiral_peptides, n_unknown])
        if (n_l_peptides + n_achiral_peptides)/n_total > 0.5:
          entity_poly_type = 'polypeptide(L)'
          if n_d_peptides > 0:
            entity_nstd_chirality = 'y'
        elif (n_d_peptides + n_achiral_peptides)/n_total > 0.5:
          entity_poly_type = 'polypeptide(D)'
          if n_l_peptides > 0:
            entity_nstd_chirality = 'y'
      elif chain.chain_type == mmtbx.validation.sequence.NUCLEIC_ACID:
        n_dna = 0
        n_rna = 0
        n_unknown = 0
        for resname in chain.resnames:
          if resname is not None and resname.strip().upper() in (
            'AD', 'CD', 'GD', 'TD', 'DA', 'DC', 'DG', 'DT'):
            n_dna += 1
          elif resname is not None and resname.strip().upper() in (
            'A', 'C', 'G', 'T', '+A', '+C', '+G', '+T'):
            n_rna += 1
          else:
            n_unknown += 1
        n_total = sum([n_dna + n_rna + n_unknown])
        if n_dna/n_total > 0.5 and n_rna == 0:
          entity_poly_type = 'polydeoxyribonucleotide'
        elif n_rna/n_total > 0.5 and n_dna == 0:
          entity_poly_type = 'polyribonucleotide'
        elif (n_rna + n_dna)/n_total > 0.5:
          entity_poly_type = 'polydeoxyribonucleotide/polyribonucleotide hybrid'

      entity_poly_loop.add_row((
        entity_id,
        entity_poly_type,
        entity_nstd_chirality,
        'no',
        'no',
        wrap_always("".join(pdbx_seq_one_letter_code), width=80).strip(),
        wrap_always("".join(pdbx_seq_one_letter_code_can), width=80).strip(),
        ','.join(sequence_to_chain_ids[sequence]),
        '?'
      ))

    for resname, entity_id in non_polymer_resname_to_entity_id.iteritems():
      entity_type = "non-polymer"
      if resname == "HOH":
        entity_type = "water" # XXX
      entity_loop.add_row((
        entity_id,
        entity_type, #polymer/non-polymer/macrolide/water
        #'?', #src_method
        #'?', # pdbx_description
        '?', # formula_weight
        non_polymer_counts[resname], # pdbx_number_of_molecules
        #'?', # details
        #'?', # pdbx_mutation
        #'?', # pdbx_fragment
        #'?' # pdbx_ec
      ))

    self.cif_block.add_loop(entity_loop)
    self.cif_block.add_loop(entity_poly_loop)
    self.cif_block.add_loop(entity_poly_seq_loop)
    self.cif_block.update(pdb_hierarchy.as_cif_block())

    label_entity_id = self.cif_block['_atom_site.label_entity_id']
    auth_seq_id = self.cif_block['_atom_site.auth_seq_id']
    ins_code = self.cif_block['_atom_site.pdbx_PDB_ins_code']
    auth_asym_id = self.cif_block['_atom_site.auth_asym_id']
    label_seq_id = flex.std_string(auth_seq_id.size(), '.')
    ins_code = ins_code.deep_copy()
    ins_code.set_selected(ins_code == '?', '')
    for residue_group, seq_num in residue_group_to_seq_num_mapping.iteritems():
      sel = ((auth_asym_id == residue_group.parent().id) &
             (ins_code == residue_group.icode.strip()) &
             (auth_seq_id == residue_group.resseq.strip()))
      label_seq_id.set_selected(sel, str(seq_num))
      label_entity_id.set_selected(
        sel, str(chain_id_to_entity_id[residue_group.parent().id]))

    for pdb_chain in unaligned_pdb_chains:
      for residue_group in pdb_chain.residue_groups():
        sel = ((auth_asym_id == residue_group.parent().id) &
               (ins_code == residue_group.icode.strip()) &
               (auth_seq_id == residue_group.resseq.strip()))
        label_entity_id.set_selected(
          sel, str(non_polymer_resname_to_entity_id[residue_group.unique_resnames()[0]]))

    self.cif_block['_atom_site.label_seq_id'] = label_seq_id

    # reorder the loops
    atom_site_loop = self.cif_block['_atom_site']
    atom_site_aniso_loop = self.cif_block.get('_atom_site_anisotrop')
    del self.cif_block['_atom_site']
    self.cif_block.add_loop(atom_site_loop)
    if atom_site_aniso_loop is not None:
      del self.cif_block['_atom_site_anisotrop']
      self.cif_block.add_loop(atom_site_aniso_loop)
Exemple #15
0
    def __init__(self, cif_block):
        crystal_symmetry_builder.__init__(self, cif_block)

        self.hierarchy = hierarchy.root()
        # These items are mandatory for the _atom_site loop, all others are optional
        type_symbol = self._wrap_loop_if_needed(cif_block,
                                                "_atom_site.type_symbol")
        atom_labels = self._wrap_loop_if_needed(cif_block,
                                                "_atom_site.auth_atom_id")
        if atom_labels is None:
            atom_labels = self._wrap_loop_if_needed(
                cif_block, "_atom_site.label_atom_id"
            )  # corresponds to chem comp atom name
        alt_id = self._wrap_loop_if_needed(
            cif_block, "_atom_site.label_alt_id")  # alternate conformer id
        label_asym_id = self._wrap_loop_if_needed(
            cif_block, "_atom_site.label_asym_id")  # chain id
        auth_asym_id = self._wrap_loop_if_needed(cif_block,
                                                 "_atom_site.auth_asym_id")
        if label_asym_id is None: label_asym_id = auth_asym_id
        if auth_asym_id is None: auth_asym_id = label_asym_id
        comp_id = self._wrap_loop_if_needed(cif_block,
                                            "_atom_site.auth_comp_id")
        if comp_id is None:
            comp_id = self._wrap_loop_if_needed(
                cif_block, "_atom_site.label_comp_id")  # residue name
        entity_id = self._wrap_loop_if_needed(cif_block,
                                              "_atom_site.label_entity_id")
        seq_id = self._wrap_loop_if_needed(cif_block, "_atom_site.auth_seq_id")
        if seq_id is None:
            seq_id = self._wrap_loop_if_needed(
                cif_block, "_atom_site.label_seq_id")  # residue number
        assert [atom_labels, alt_id, auth_asym_id, comp_id, entity_id,
                seq_id].count(None) == 0, "something is not present"
        assert type_symbol is not None

        atom_site_fp = cif_block.get('_atom_site.phenix_scat_dispersion_real')
        atom_site_fdp = cif_block.get('_atom_site.phenix_scat_dispersion_imag')

        pdb_ins_code = cif_block.get(
            "_atom_site.pdbx_PDB_ins_code")  # insertion code
        model_ids = cif_block.get("_atom_site.pdbx_PDB_model_num")
        atom_site_id = cif_block.get("_atom_site.id")
        # only permitted values are ATOM or HETATM
        group_PDB = cif_block.get("_atom_site.group_PDB")
        # TODO: read esds
        B_iso_or_equiv = flex.double(
            self._wrap_loop_if_needed(cif_block, "_atom_site.B_iso_or_equiv"))
        cart_x = flex.double(
            self._wrap_loop_if_needed(cif_block, "_atom_site.Cartn_x"))
        cart_y = flex.double(
            self._wrap_loop_if_needed(cif_block, "_atom_site.Cartn_y"))
        cart_z = flex.double(
            self._wrap_loop_if_needed(cif_block, "_atom_site.Cartn_z"))
        occu = flex.double(
            self._wrap_loop_if_needed(cif_block, "_atom_site.occupancy"))
        formal_charge = self._wrap_loop_if_needed(
            cif_block, "_atom_site.pdbx_formal_charge")
        # anisotropic b-factors
        # TODO: read esds
        anisotrop_id = self._wrap_loop_if_needed(cif_block,
                                                 "_atom_site_anisotrop.id")
        adps = None
        if anisotrop_id is not None:
            u_ij = [
                self._wrap_loop_if_needed(
                    cif_block,
                    "_atom_site_anisotrop.U[%s][%s]" % (ij[0], ij[1]))
                for ij in ("11", "22", "33", "12", "13", "23")
            ]
            assert u_ij.count(None) in (0, 6)
            if u_ij.count(None) == 0:
                adps = u_ij
            else:
                assert u_ij.count(None) == 6
                b_ij = [
                    self._wrap_loop_if_needed(
                        cif_block,
                        "_atom_site_anisotrop.B[%s][%s]" % (ij[0], ij[1]))
                    for ij in ("11", "22", "33", "12", "13", "23")
                ]
                assert b_ij.count(None) in (0, 6)
                if b_ij.count(None) == 0:
                    adps = adptbx.b_as_u(b_ij)
                assert not (u_ij.count(None) and b_ij.count(None)
                            )  # illegal for both to be present
            if adps is not None:
                try:
                    adps = [flex.double(adp) for adp in adps]
                except ValueError as e:
                    raise CifBuilderError("Error interpreting ADPs: " + str(e))
                adps = flex.sym_mat3_double(*adps)
        py_adps = {}
        if anisotrop_id is not None and adps is not None:
            for an_id, adp in zip(list(anisotrop_id), list(adps)):
                py_adps[an_id] = adp
        current_model_id = None
        current_label_asym_id = None
        current_auth_asym_id = None
        current_residue_id = None
        current_ins_code = None

        for i_atom in range(atom_labels.size()):
            # model(s)
            last_model_id = current_model_id
            current_model_id = model_ids[i_atom]
            assert current_model_id is not None
            if current_model_id != last_model_id:
                model = hierarchy.model(id=current_model_id)
                self.hierarchy.append_model(model)

            # chain(s)
            last_label_asym_id = current_label_asym_id
            current_label_asym_id = label_asym_id[i_atom]
            assert current_label_asym_id is not None
            last_auth_asym_id = current_auth_asym_id
            current_auth_asym_id = auth_asym_id[i_atom]
            assert current_auth_asym_id not in [".", "?", " "], "mmCIF file contains " + \
              "record with empty auth_asym_id, which is wrong."
            assert current_label_asym_id is not None
            if (current_auth_asym_id != last_auth_asym_id
                    or current_model_id != last_model_id):
                chain = hierarchy.chain(id=current_auth_asym_id)
                model.append_chain(chain)
            else:
                assert current_auth_asym_id == last_auth_asym_id

            # residue_group(s)
            # defined by residue id and insertion code
            last_residue_id = current_residue_id
            current_residue_id = seq_id[i_atom]
            assert current_residue_id is not None
            last_ins_code = current_ins_code
            if pdb_ins_code is not None:
                current_ins_code = pdb_ins_code[i_atom]
                if current_ins_code in ("?", ".", None): current_ins_code = " "
            if (current_residue_id != last_residue_id
                    or current_ins_code != last_ins_code
                    or current_auth_asym_id != last_auth_asym_id
                    or current_model_id != last_model_id):
                try:
                    resseq = hy36encode(width=4, value=int(current_residue_id))
                except ValueError as e:
                    resseq = current_residue_id
                    assert len(resseq) == 4
                residue_group = hierarchy.residue_group(resseq=resseq,
                                                        icode=current_ins_code)
                chain.append_residue_group(residue_group)
                atom_groups = OrderedDict()  # reset atom_groups cache
            # atom_group(s)
            # defined by resname and altloc id
            current_altloc = alt_id[i_atom]
            if current_altloc == "." or current_altloc == "?":
                current_altloc = ""  # Main chain atoms
            current_resname = comp_id[i_atom]
            if (current_altloc, current_resname) not in atom_groups:
                atom_group = hierarchy.atom_group(altloc=current_altloc,
                                                  resname=current_resname)
                atom_groups[(current_altloc, current_resname)] = atom_group
                if current_altloc == "":
                    residue_group.insert_atom_group(0, atom_group)
                else:
                    residue_group.append_atom_group(atom_group)
            else:
                atom_group = atom_groups[(current_altloc, current_resname)]

            # atom(s)
            atom = hierarchy.atom()
            atom_group.append_atom(atom)
            atom.set_element(type_symbol[i_atom])
            atom.set_name(
                format_pdb_atom_name(atom_labels[i_atom], type_symbol[i_atom]))
            atom.set_xyz(new_xyz=(cart_x[i_atom], cart_y[i_atom],
                                  cart_z[i_atom]))
            atom.set_b(B_iso_or_equiv[i_atom])
            atom.set_occ(occu[i_atom])
            # hy36encode should go once the pdb.hierarchy has been
            # modified to no longer store fixed-width strings
            atom.set_serial(
                hy36encode(width=5, value=int(atom_site_id[i_atom])))
            # some code relies on an empty segid being 4 spaces
            atom.set_segid("    ")
            if group_PDB is not None and group_PDB[i_atom] == "HETATM":
                atom.hetero = True
            if formal_charge is not None:
                charge = formal_charge[i_atom]
                if charge not in ("?", "."):
                    if charge.endswith("-") or charge.startswith("-"):
                        sign = "-"
                    else:
                        sign = "+"
                    charge = charge.strip(" -+")
                    charge = int(charge)
                    if charge == 0: sign = ""
                    atom.set_charge("%i%s" % (charge, sign))
            if atom_site_fp is not None:
                fp = atom_site_fp[i_atom]
                if fp not in ("?", "."):
                    atom.set_fp(new_fp=float(fp))
            if atom_site_fdp is not None:
                fdp = atom_site_fdp[i_atom]
                if fdp not in ("?", "."):
                    atom.set_fdp(new_fdp=float(fdp))
            if anisotrop_id is not None and adps is not None:
                py_u_ij = py_adps.get(atom.serial.strip(), None)
                if py_u_ij is not None:
                    atom.set_uij(py_u_ij)
        if len(self.hierarchy.models()) == 1:
            # for compatibility with single-model PDB files
            self.hierarchy.models()[0].id = ""
Exemple #16
0
 def cache_restraint(self, cmd, cmd_residue, line, args):
     from libtbx.containers import OrderedDict
     if cmd not in self.cached_restraints:
         self.cached_restraints.setdefault(cmd, OrderedDict())
     self.cached_restraints[cmd].setdefault(line, (cmd_residue, args))
Exemple #17
0
class loop(DictMixin):
  def __init__(self, header=None, data=None):
    self._columns = OrderedDict()
    self.keys_lower = {}
    if header is not None:
      for key in header:
        self.setdefault(key, flex.std_string())
      if data is not None:
        # the number of data items must be an exact multiple of the number of headers
        assert len(data) % len(header) == 0, "Wrong number of data items for loop"
        n_rows = len(data)//len(header)
        n_columns = len(header)
        for i in range(n_rows):
          self.add_row([data[i*n_columns+j] for j in range(n_columns)])
    elif header is None and data is not None:
      assert isinstance(data, dict) or isinstance(data, OrderedDict)
      self.add_columns(data)
      self.keys_lower = dict(
        [(key.lower(), key) for key in self._columns.keys()])

  def __setitem__(self, key, value):
    if not re.match(tag_re, key):
      raise Sorry("%s is not a valid data name" %key)
    if len(self) > 0:
      assert len(value) == self.size()
    if not isinstance(value, flex.std_string):
      for flex_numeric_type in (flex.int, flex.double):
        if isinstance(value, flex_numeric_type):
          value = value.as_string()
        else:
          try:
            value = flex_numeric_type(value).as_string()
          except TypeError:
            continue
          else:
            break
      if not isinstance(value, flex.std_string):
        value = flex.std_string(value)
    # value must be a mutable type
    assert hasattr(value, '__setitem__')
    self._columns[key] = value
    self.keys_lower[key.lower()] = key

  def __getitem__(self, key):
    return self._columns[self.keys_lower[key.lower()]]

  def __delitem__(self, key):
    del self._columns[self.keys_lower[key.lower()]]
    del self.keys_lower[key.lower()]

  def keys(self):
    return self._columns.keys()

  def __repr__(self):
    return repr(OrderedDict(self.iteritems()))

  def name(self):
    return common_substring(self.keys()).rstrip('_').rstrip('.')

  def size(self):
    size = 0
    for column in self.values():
      size = max(size, len(column))
    return size

  def n_rows(self):
    size = 0
    for column in self.values():
      size = max(size, len(column))
    return size

  def n_columns(self):
    return len(self.keys())

  def add_row(self, row, default_value="?"):
    if isinstance(row, dict):
      for key in self:
        if key in row:
          self[key].append(str(row[key]))
        else:
          self[key].append(default_value)
    else:
      assert len(row) == len(self)
      for i, key in enumerate(self):
        self[key].append(str(row[i]))

  def add_column(self, key, values):
    if self.size() != 0:
      assert len(values) == self.size()
    self[key] = values
    self.keys_lower[key.lower()] = key

  def add_columns(self, columns):
    assert isinstance(columns, dict) or isinstance(columns, OrderedDict)
    for key, value in columns.iteritems():
      self.add_column(key, value)

  def update_column(self, key, values):
    assert type(key)==type(""), "first argument is column key string"
    if self.size() != 0:
      assert len(values) == self.size(), "len(values) %d != self.size() %d" % (
        len(values),
        self.size(),
        )
    self[key] = values
    self.keys_lower[key.lower()] = key

  def delete_row(self, index):
    assert index < self.n_rows()
    for column in self._columns.values():
      del column[index]

  def __copy__(self):
    new = loop()
    new._columns = self._columns.copy()
    new.keys_lower = self.keys_lower.copy()
    return new

  copy = __copy__

  def __deepcopy__(self, memo):
    new = loop()
    new._columns = copy.deepcopy(self._columns, memo)
    new.keys_lower = copy.deepcopy(self.keys_lower, memo)
    return new

  def deepcopy(self):
    return copy.deepcopy(self)

  def show(self, out=None, indent="  ", indent_row=None, fmt_str=None, align_columns=True):
    assert self.n_rows() > 0 and self.n_columns() > 0, "keys: %s %d %d" % (
      self.keys(),
      self.n_rows(),
      self.n_columns(),
      )
    if out is None:
      out = sys.stdout
    if indent_row is None:
      indent_row = indent
    assert indent.strip() == ""
    assert indent_row.strip() == ""
    print >> out, "loop_"
    for k in self.keys():
      print >> out, indent + k
    values = self._columns.values()
    if fmt_str is not None:
      # Pretty printing:
      #   The user is responsible for providing a valid format string.
      #   Values are not quoted - it is the user's responsibility to place
      #   appropriate quotes in the format string if a particular value may
      #   contain spaces.
      values = copy.deepcopy(values)
      for i, v in enumerate(values):
        for flex_numeric_type in (flex.int, flex.double):
          if not isinstance(v, flex_numeric_type):
            try:
              values[i] = flex_numeric_type(v)
            except ValueError:
              continue
            else:
              break
      if fmt_str is None:
        fmt_str = indent_row + ' '.join(["%s"]*len(values))
      for i in range(self.size()):
        print >> out, fmt_str % tuple([values[j][i] for j in range(len(values))])
    elif align_columns:
      fmt_str = []
      for i, (k, v) in enumerate(self.iteritems()):
        for i_v in range(v.size()):
          v[i_v] = format_value(v[i_v])
        # exclude and semicolon text fields from column width calculation
        v_ = flex.std_string(item for item in v if "\n" not in item)
        width = v_.max_element_length()
        # See if column contains only number, '.' or '?'
        # right-align numerical columns, left-align everything else
        v = v.select(~( (v == ".") | (v == "?") ))
        try:
          flex.double(v)
        except ValueError:
          width *= -1
        fmt_str.append("%%%is" %width)
      fmt_str = indent_row + "  ".join(fmt_str)
      for i in range(self.size()):
        print >> out, (fmt_str %
                       tuple([values[j][i]
                              for j in range(len(values))])).rstrip()
    else:
      for i in range(self.size()):
        values_to_print = [format_value(values[j][i]) for j in range(len(values))]
        print >> out, ' '.join([indent] + values_to_print)

  def __str__(self):
    s = StringIO()
    self.show(out=s)
    return s.getvalue()

  def iterrows(self):
    keys = self.keys()
    for j in range(self.size()):
      yield OrderedDict(zip(keys, [self.values()[i][j] for i in range(len(self))]))

  def sort(self, key=None, reverse=False):
    self._columns = OrderedDict(
      sorted(self._columns.items(), key=key, reverse=reverse))

  def order(self, order):
    def _cmp_key(k1, k2):
      for i, o in enumerate(order):
        if k1==o: break
      for j, o in enumerate(order):
        if k2==o: break
      if k1<k2: return -1
      return 1
    keys = self._columns.keys()
    keys.sort(_cmp_key)
    tmp = OrderedDict()
    for o in order:
      tmp[o]=self._columns[o]
    self._columns = tmp

  def __eq__(self, other):
    if (len(self) != len(other) or
        self.size() != other.size() or
        self.keys() != other.keys()):
      return False
    for value, other_value in zip(self.values(), other.values()):
      if (value == other_value).count(True) != len(value):
        return False
    return True
Exemple #18
0
 def __init__(self, cif_block, base_array_info=None, wavelengths=None):
     crystal_symmetry_builder.__init__(self, cif_block)
     self._arrays = OrderedDict()
     self._origarrays = OrderedDict(
     )  # used for presenting raw data tables in HKLviewer
     basearraylabels = []
     if base_array_info is not None:
         self.crystal_symmetry = self.crystal_symmetry.join_symmetry(
             other_symmetry=base_array_info.crystal_symmetry_from_file,
             force=True)
         if base_array_info.labels:
             basearraylabels = base_array_info.labels
     if (wavelengths is None):
         wavelengths = {}
     if base_array_info is None:
         base_array_info = miller.array_info(source_type="cif")
     refln_containing_loops = self.get_miller_indices_containing_loops()
     for self.indices, refln_loop in refln_containing_loops:
         self.wavelength_id_array = None
         self.crystal_id_array = None
         self.scale_group_array = None
         wavelength_ids = [None]
         crystal_ids = [None]
         scale_groups = [None]
         for key, value in six.iteritems(refln_loop):
             # Get wavelength_ids, crystal_id, scale_group_code columns for selecting data of other
             # columns in self.get_selection() used by self.flex_std_string_as_miller_array()
             if (key.endswith('wavelength_id') or key.endswith('crystal_id')
                     or key.endswith('scale_group_code')):
                 data = as_int_or_none_if_all_question_marks(
                     value, column_name=key)
                 if data is None:
                     continue
                 counts = data.counts()
                 if key.endswith('wavelength_id'):
                     wavelength_ids = list(counts.keys())
                 if len(counts) == 1: continue
                 array = miller.array(
                     miller.set(self.crystal_symmetry,
                                self.indices).auto_anomalous(), data)
                 if key.endswith('wavelength_id'):
                     self.wavelength_id_array = array
                     wavelength_ids = list(counts.keys())
                 elif key.endswith('crystal_id'):
                     self.crystal_id_array = array
                     crystal_ids = list(counts.keys())
                 elif key.endswith('scale_group_code'):
                     self.scale_group_array = array
                     scale_groups = list(counts.keys())
         labelsuffix = []
         wavelbl = []
         cryslbl = []
         scalegrplbl = []
         self._origarrays["HKLs"] = self.indices
         alllabels = list(sorted(refln_loop.keys()))
         remaininglabls = alllabels[:]  # deep copy the list
         # Parse labels matching cif column conventions
         # https://mmcif.wwpdb.org/dictionaries/mmcif_pdbx_v50.dic/Categories/refln.html
         # and extract groups of labels or just single columns.
         # Groups corresponds to the map coefficients, phase and amplitudes,
         # amplitudes or intensities with sigmas and hendrickson-lattman columns.
         phaseamplabls, remaininglabls = self.get_phase_amplitude_labels(
             remaininglabls)
         mapcoefflabls, remaininglabls = self.get_mapcoefficient_labels(
             remaininglabls)
         HLcoefflabls, remaininglabls = self.get_HL_labels(remaininglabls)
         data_sig_obstype_labls, remaininglabls = self.get_FSigF_ISigI_labels(
             remaininglabls)
         for w_id in wavelength_ids:
             for crys_id in crystal_ids:
                 for scale_group in scale_groups:
                     # If reflection data files contain more than one crystal, wavelength or scalegroup
                     # then add their id(s) as a suffix to data labels computed below. Needed for avoiding
                     # ambuguity but avoid when not needed to make labels more human readable!
                     if (len(wavelength_ids) > 1
                             or len(wavelengths) > 1) and w_id is not None:
                         wavelbl = ["wavelength_id=%i" % w_id]
                     if len(crystal_ids) > 1 and crys_id is not None:
                         cryslbl = ["crystal_id=%i" % crys_id]
                     if len(scale_groups) > 1 and scale_group is not None:
                         scalegrplbl = ["scale_group_code=%i" % scale_group]
                     labelsuffix = scalegrplbl + cryslbl + wavelbl
                     jlablsufx = ""
                     if len(labelsuffix):
                         jlablsufx = "," + ",".join(labelsuffix)
                     for mapcoefflabl in mapcoefflabls:
                         A_array = refln_loop[mapcoefflabl[0]]
                         B_array = refln_loop[mapcoefflabl[1]]
                         # deselect any ? marks in the two arrays, assuming both A and B have the same ? marks
                         selection = self.get_selection(
                             A_array,
                             wavelength_id=w_id,
                             crystal_id=crys_id,
                             scale_group_code=scale_group)
                         A_array = A_array.select(selection)
                         B_array = B_array.select(selection)
                         # form the miller array with map coefficients
                         data = flex.complex_double(flex.double(A_array),
                                                    flex.double(B_array))
                         millarr = miller.array(
                             miller.set(self.crystal_symmetry,
                                        self.indices.select(
                                            selection)).auto_anomalous(),
                             data)
                         # millarr will be None for column data not matching w_id,crys_id,scale_group values
                         if millarr is None: continue
                         labl = basearraylabels + mapcoefflabl + labelsuffix
                         millarr.set_info(
                             base_array_info.customized_copy(
                                 labels=labl,
                                 wavelength=wavelengths.get(w_id, None)))
                         self._arrays[mapcoefflabl[0] + jlablsufx] = millarr
                     for phaseamplabl in phaseamplabls:
                         amplitudestrarray = refln_loop[phaseamplabl[0]]
                         phasestrarray = refln_loop[phaseamplabl[1]]
                         millarr = self.flex_std_string_as_miller_array(
                             amplitudestrarray,
                             wavelength_id=w_id,
                             crystal_id=crys_id,
                             scale_group_code=scale_group)
                         phasesmillarr = self.flex_std_string_as_miller_array(
                             phasestrarray,
                             wavelength_id=w_id,
                             crystal_id=crys_id,
                             scale_group_code=scale_group)
                         # millarr will be None for column data not matching w_id,crys_id,scale_group values
                         if millarr is None or phasesmillarr is None:
                             continue
                         phases = as_flex_double(phasesmillarr,
                                                 phaseamplabl[1])
                         millarr = millarr.phase_transfer(phases, deg=True)
                         labl = basearraylabels + phaseamplabl + labelsuffix
                         millarr.set_info(
                             base_array_info.customized_copy(
                                 labels=labl,
                                 wavelength=wavelengths.get(w_id, None)))
                         self._arrays[phaseamplabl[0] + jlablsufx] = millarr
                     for datlabl, siglabl, otype in data_sig_obstype_labls:
                         datastrarray = refln_loop[datlabl]
                         millarr = self.flex_std_string_as_miller_array(
                             datastrarray,
                             wavelength_id=w_id,
                             crystal_id=crys_id,
                             scale_group_code=scale_group)
                         # millarr will be None for column data not matching w_id,crys_id,scale_group values
                         if millarr is None: continue
                         millarr = as_flex_double(millarr, datlabl)
                         datsiglabl = [datlabl]
                         if siglabl:
                             sigmasstrarray = refln_loop[siglabl]
                             sigmas = self.flex_std_string_as_miller_array(
                                 sigmasstrarray,
                                 wavelength_id=w_id,
                                 crystal_id=crys_id,
                                 scale_group_code=scale_group)
                             sigmas = as_flex_double(sigmas, siglabl)
                             millarr.set_sigmas(sigmas.data())
                             datsiglabl = [datlabl, siglabl]
                         datsiglabl = basearraylabels + datsiglabl + labelsuffix
                         millarr.set_info(
                             base_array_info.customized_copy(
                                 labels=datsiglabl,
                                 wavelength=wavelengths.get(w_id, None)))
                         if otype is not None:
                             millarr.set_observation_type(otype)
                         self._arrays[datlabl + jlablsufx] = millarr
                     for hl_labels in HLcoefflabls:
                         hl_values = [
                             cif_block.get(hl_key) for hl_key in hl_labels
                         ]
                         if hl_values.count(None) == 0:
                             selection = self.get_selection(
                                 hl_values[0],
                                 wavelength_id=w_id,
                                 crystal_id=crys_id,
                                 scale_group_code=scale_group)
                             hl_values = [
                                 as_double_or_none_if_all_question_marks(
                                     hl.select(selection), column_name=lab)
                                 for hl, lab in zip(hl_values, hl_labels)
                             ]
                             # hl_values will be None for column data not matching w_id,crys_id,scale_group values
                             if hl_values == [None, None, None, None]:
                                 continue
                             millarr = miller.array(
                                 miller.set(
                                     self.crystal_symmetry,
                                     self.indices.select(
                                         selection)).auto_anomalous(),
                                 flex.hendrickson_lattman(*hl_values))
                             hlabels = basearraylabels + hl_labels + labelsuffix
                             millarr.set_info(
                                 base_array_info.customized_copy(
                                     labels=hlabels,
                                     wavelength=wavelengths.get(w_id,
                                                                None)))
                             self._arrays[hl_labels[0] +
                                          jlablsufx] = millarr
                     # pick up remaining columns if any that weren't identified above
                     for label in alllabels:
                         if "index_" in label:
                             continue
                         datastrarray = refln_loop[label]
                         if label in remaininglabls:
                             labels = basearraylabels + [label
                                                         ] + labelsuffix
                             lablsufx = jlablsufx
                             millarr = self.flex_std_string_as_miller_array(
                                 datastrarray,
                                 wavelength_id=w_id,
                                 crystal_id=crys_id,
                                 scale_group_code=scale_group)
                             # millarr will be None for column data not matching w_id,crys_id,scale_group values
                             if (label.endswith(
                                     'wavelength_id'
                             ) or label.endswith(
                                     'crystal_id'
                             ) or  # get full array if any of these labels, not just subsets
                                     label.endswith('scale_group_code')):
                                 millarr = self.flex_std_string_as_miller_array(
                                     datastrarray,
                                     wavelength_id=None,
                                     crystal_id=None,
                                     scale_group_code=None)
                                 lablsufx = ""
                                 labels = basearraylabels + [label]
                             if millarr is None: continue
                             otype = self.guess_observationtype(label)
                             if otype is not None:
                                 millarr.set_observation_type(otype)
                             millarr.set_info(
                                 base_array_info.customized_copy(
                                     labels=labels,
                                     wavelength=wavelengths.get(w_id,
                                                                None)))
                             self._arrays[label + lablsufx] = millarr
                         origarr = self.flex_std_string_as_miller_array(
                             datastrarray,
                             wavelength_id=w_id,
                             crystal_id=crys_id,
                             scale_group_code=scale_group)
                         newlabel = label.replace("_refln.", "")
                         newlabel2 = newlabel.replace("_refln_", "")
                         if origarr:  # want only genuine miller arrays
                             self._origarrays[newlabel2 +
                                              jlablsufx] = origarr.data()
     # Convert any groups of I+,I-,SigI+,SigI- (or amplitudes) arrays into anomalous arrays
     # i.e. both friedel mates in the same array
     for key, array in six.iteritems(self._arrays.copy()):
         plus_key = ""
         if '_minus' in key:
             minus_key = key
             plus_key = key.replace('_minus', '_plus')
         elif '-' in key:
             minus_key = key
             plus_key = key.replace('-', '+')
         elif '_plus' in key:
             plus_key = key
             minus_key = key.replace('_plus', '_minus')
         elif '+' in key:
             plus_key = key
             minus_key = key.replace('+', '-')
         if plus_key in self._arrays and minus_key in self._arrays:
             plus_array = self._arrays.pop(plus_key)
             minus_array = self._arrays.pop(minus_key)
             minus_array = minus_array.customized_copy(
                 indices=-minus_array.indices()).set_info(
                     minus_array.info())
             array = plus_array.concatenate(
                 minus_array, assert_is_similar_symmetry=False)
             array = array.customized_copy(anomalous_flag=True)
             array.set_info(minus_array.info().customized_copy(labels=list(
                 OrderedSet(plus_array.info().labels +
                            minus_array.info().labels))))
             array.set_observation_type(plus_array.observation_type())
             self._arrays.setdefault(key, array)
     if len(self._arrays) == 0:
         raise CifBuilderError("No reflection data present in cif block")
     # Sort the ordered dictionary to resemble the order of columns in the cif file
     # This is to avoid any F_meas arrays accidentally being put adjacent to
     # pdbx_anom_difference arrays in the self._arrays OrderedDict. Otherwise these
     # arrays may unintentionally be combined into a reconstructed anomalous amplitude
     # array when saving as an mtz file due to a problem in the iotbx/mtz module.
     # See http://phenix-online.org/pipermail/cctbxbb/2021-March/002289.html
     arrlstord = []
     arrlst = list(self._arrays)
     for arr in arrlst:
         for i, k in enumerate(refln_loop.keys()):
             if arr.split(",")[0] == k:
                 arrlstord.append((arr, i))
     # arrlstord must have the same keys as in the self._arrays dictionary
     assert sorted(arrlst) == sorted([e[0] for e in arrlstord])
     sortarrlst = sorted(arrlstord, key=lambda arrord: arrord[1])
     self._ordarrays = OrderedDict()
     for sortkey, i in sortarrlst:
         self._ordarrays.setdefault(sortkey, self._arrays[sortkey])
     self._arrays = self._ordarrays
Exemple #19
0
class cif(DictMixin):
  def __init__(self, blocks=None):
    self._errors = None
    if blocks is not None:
      self.blocks = OrderedDict(blocks)
    else:
      self.blocks = OrderedDict()
    self.keys_lower = dict([(key.lower(), key) for key in self.blocks.keys()])

  def __setitem__(self, key, value):
    assert isinstance(value, block)
    if not re.match(tag_re, '_'+key):
      raise Sorry("%s is not a valid data block name" %key)
    self.blocks[key] = value
    self.keys_lower[key.lower()] = key

  def get(self, key, default=None):
    key_lower = self.keys_lower.get(key.lower())
    if (key_lower is None):
      return default
    return self.blocks.get(key_lower, default)

  def __getitem__(self, key):
    result = self.get(key)
    if (result is None):
      raise KeyError('Unknown CIF data block name: "%s"' % key)
    return result

  def __delitem__(self, key):
    del self.blocks[self.keys_lower[key.lower()]]
    del self.keys_lower[key.lower()]

  def keys(self):
    return self.blocks.keys()

  def __repr__(self):
    return repr(OrderedDict(self.iteritems()))

  def __copy__(self):
    return cif(self.blocks.copy())

  copy = __copy__

  def __deepcopy__(self, memo):
    return cif(copy.deepcopy(self.blocks, memo))

  def deepcopy(self):
    return copy.deepcopy(self)

  def show(self, out=None, indent="  ", indent_row=None,
           data_name_field_width=34,
           loop_format_strings=None,
           align_columns=True):
    if out is None:
      out = sys.stdout
    for name, block in self.items():
      print >> out, "data_%s" %name
      block.show(
        out=out, indent=indent, indent_row=indent_row,
        data_name_field_width=data_name_field_width,
        loop_format_strings=loop_format_strings,
        align_columns=align_columns)

  def __str__(self):
    s = StringIO()
    self.show(out=s)
    return s.getvalue()

  def validate(self, dictionary, show_warnings=True, error_handler=None, out=None):
    if out is None: out = sys.stdout
    from iotbx.cif import validation
    self._errors = {}
    if error_handler is None:
      error_handler = validation.ErrorHandler()
    for key, block in self.blocks.iteritems():
      error_handler = error_handler.__class__()
      dictionary.set_error_handler(error_handler)
      block.validate(dictionary)
      self._errors.setdefault(key, error_handler)
      if error_handler.error_count or error_handler.warning_count:
        error_handler.show(show_warnings=show_warnings, out=out)
    return error_handler

  def get_errors(self):
    return self._errors

  def sort(self, recursive=False, key=None, reverse=False):
    self.blocks = OrderedDict(sorted(self.blocks.items(), key=key, reverse=reverse))
    if recursive:
      for b in self.blocks.values():
        b.sort(recursive=recursive, reverse=reverse)
Exemple #20
0
    def __init__(self, cif_block, base_array_info=None, wavelengths=None):
        crystal_symmetry_builder.__init__(self, cif_block)
        if base_array_info is not None:
            self.crystal_symmetry = self.crystal_symmetry.join_symmetry(
                other_symmetry=base_array_info.crystal_symmetry_from_file,
                force=True)
        self._arrays = OrderedDict()
        if (wavelengths is None):
            wavelengths = {}
        if base_array_info is None:
            base_array_info = miller.array_info(source_type="cif")
        refln_containing_loops = self.get_miller_indices_containing_loops()
        for self.indices, refln_loop in refln_containing_loops:
            self.wavelength_id_array = None
            self.crystal_id_array = None
            self.scale_group_array = None
            wavelength_ids = [None]
            crystal_ids = [None]
            scale_groups = [None]
            for key, value in refln_loop.iteritems():
                # need to get these arrays first
                if (key.endswith('wavelength_id') or key.endswith('crystal_id')
                        or key.endswith('scale_group_code')):
                    data = as_int_or_none_if_all_question_marks(
                        value, column_name=key)
                    if data is None:
                        continue
                    counts = data.counts()
                    if key.endswith('wavelength_id'):
                        wavelength_ids = counts.keys()
                    if len(counts) == 1: continue
                    array = miller.array(
                        miller.set(self.crystal_symmetry,
                                   self.indices).auto_anomalous(), data)
                    if key.endswith('wavelength_id'):
                        self.wavelength_id_array = array
                        wavelength_ids = counts.keys()
                    elif key.endswith('crystal_id'):
                        self.crystal_id_array = array
                        crystal_ids = counts.keys()
                    elif key.endswith('scale_group_code'):
                        self.scale_group_array = array
                        scale_groups = counts.keys()
            for label, value in sorted(refln_loop.items()):
                for w_id in wavelength_ids:
                    for crys_id in crystal_ids:
                        for scale_group in scale_groups:
                            if 'index_' in label: continue
                            key = label
                            labels = [label]
                            wavelength = None
                            if (key.endswith('wavelength_id')
                                    or key.endswith('crystal_id')
                                    or key.endswith('scale_group_code')):
                                w_id = None
                                crys_id = None
                                scale_group = None
                            key_suffix = ''
                            if w_id is not None:
                                key_suffix += '_%i' % w_id
                                labels.insert(0, "wavelength_id=%i" % w_id)
                                wavelength = wavelengths.get(w_id, None)
                            if crys_id is not None:
                                key_suffix += '_%i' % crys_id
                                labels.insert(0, "crystal_id=%i" % crys_id)
                            if scale_group is not None:
                                key_suffix += '_%i' % scale_group
                                labels.insert(
                                    0, "scale_group_code=%i" % scale_group)
                            key += key_suffix
                            sigmas = None
                            if key in self._arrays: continue
                            array = self.flex_std_string_as_miller_array(
                                value,
                                wavelength_id=w_id,
                                crystal_id=crys_id,
                                scale_group_code=scale_group)
                            if array is None: continue
                            if '_sigma' in key:
                                sigmas_label = label
                                key = None
                                for suffix in ('', '_meas', '_calc'):
                                    if sigmas_label.replace(
                                            '_sigma', suffix) in refln_loop:
                                        key = sigmas_label.replace(
                                            '_sigma', suffix) + key_suffix
                                        break
                                if key is None:
                                    key = sigmas_label + key_suffix
                                elif key in self._arrays and self._arrays[
                                        key].sigmas() is None:
                                    sigmas = array
                                    array = self._arrays[key]
                                    if (not check_array_sizes(
                                            array, sigmas, key, sigmas_label)):
                                        continue
                                    sigmas = as_flex_double(
                                        sigmas, sigmas_label)
                                    array.set_sigmas(sigmas.data())
                                    info = array.info()
                                    array.set_info(
                                        info.customized_copy(
                                            labels=info.labels +
                                            [sigmas_label],
                                            wavelength=wavelength))
                                    continue
                            elif 'PHWT' in key:
                                phwt_label = label
                                fwt_label = label.replace('PHWT', 'FWT')
                                if fwt_label not in refln_loop: continue
                                phwt_array = array
                                if fwt_label in self._arrays:
                                    array = self._arrays[fwt_label]
                                    if (not check_array_sizes(
                                            array, phwt_array, fwt_label,
                                            phwt_label)):
                                        continue
                                    phases = as_flex_double(
                                        phwt_array, phwt_label)
                                    info = array.info()
                                    array = array.phase_transfer(phases,
                                                                 deg=True)
                                    array.set_info(
                                        info.customized_copy(
                                            labels=info.labels + [phwt_label]))
                                    self._arrays[fwt_label] = array
                                    continue
                            elif 'HL_' in key:
                                hl_letter = key[key.find('HL_') + 3]
                                hl_key = 'HL_' + hl_letter
                                key = key.replace(hl_key, 'HL_A')
                                if key in self._arrays:
                                    continue  # this array is already dealt with
                                hl_labels = [
                                    label.replace(hl_key, 'HL_' + letter)
                                    for letter in 'ABCD'
                                ]
                                hl_keys = [
                                    key.replace(hl_key, 'HL_' + letter)
                                    for letter in 'ABCD'
                                ]
                                hl_values = [
                                    cif_block.get(hl_key)
                                    for hl_key in hl_labels
                                ]
                                if hl_values.count(None) == 0:
                                    selection = self.get_selection(
                                        hl_values[0],
                                        wavelength_id=w_id,
                                        crystal_id=crys_id,
                                        scale_group_code=scale_group)
                                    hl_values = [
                                        as_double_or_none_if_all_question_marks(
                                            hl.select(selection),
                                            column_name=lab)
                                        for hl, lab in zip(
                                            hl_values, hl_labels)
                                    ]
                                    array = miller.array(
                                        miller.set(
                                            self.crystal_symmetry,
                                            self.indices.select(
                                                selection)).auto_anomalous(),
                                        flex.hendrickson_lattman(*hl_values))
                                    labels = labels[:-1] + hl_labels
                            elif '.B_' in key or '_B_' in key:
                                if '.B_' in key:
                                    key, key_b = key.replace('.B_', '.A_'), key
                                    label, label_b = label.replace(
                                        '.B_', '.A_'), label
                                elif '_B_' in key:
                                    key, key_b = key.replace('_B', '_A'), key
                                    label, label_b = label.replace('_B',
                                                                   '_A'), label
                                if key in refln_loop and key_b in refln_loop:
                                    b_part = array.data()
                                    if key in self._arrays:
                                        info = self._arrays[key].info()
                                        a_part = self._arrays[key].data()
                                        self._arrays[key] = self._arrays[
                                            key].array(
                                                data=flex.complex_double(
                                                    a_part, b_part))
                                        self._arrays[key].set_info(
                                            info.customized_copy(
                                                labels=info.labels + [key_b]))
                                        continue
                            elif ('phase_' in key and not "_meas" in key
                                  and self.crystal_symmetry.space_group()
                                  is not None):
                                alt_key1 = label.replace('phase_', 'F_')
                                alt_key2 = alt_key1 + '_au'
                                if alt_key1 in refln_loop:
                                    phase_key = label
                                    key = alt_key1 + key_suffix
                                elif alt_key2 in refln_loop:
                                    phase_key = label
                                    key = alt_key2 + key_suffix
                                else:
                                    phase_key = None
                                if phase_key is not None:
                                    phases = array.data()
                                    if key in self._arrays:
                                        array = self._arrays[key]
                                        array = as_flex_double(array, key)
                                        if (not check_array_sizes(
                                                array, phases, key,
                                                phase_key)):
                                            continue
                                        info = self._arrays[key].info()
                                        self._arrays[
                                            key] = array.phase_transfer(
                                                phases, deg=True)
                                        self._arrays[key].set_info(
                                            info.customized_copy(
                                                labels=info.labels +
                                                [phase_key]))
                                    else:
                                        array = self.flex_std_string_as_miller_array(
                                            refln_loop[label],
                                            wavelength_id=w_id,
                                            crystal_id=crys_id,
                                            scale_group_code=scale_group)
                                        if (not check_array_sizes(
                                                array, phases, key,
                                                phase_key)):
                                            continue
                                        array.phase_transfer(phases, deg=True)
                                        labels = labels + [label, phase_key]
                            if base_array_info.labels is not None:
                                labels = base_array_info.labels + labels

                            def rstrip_substrings(string, substrings):
                                for substr in substrings:
                                    if substr == '': continue
                                    if string.endswith(substr):
                                        string = string[:-len(substr)]
                                return string

                            # determine observation type
                            stripped_key = rstrip_substrings(
                                key, [
                                    key_suffix, '_au', '_meas', '_calc',
                                    '_plus', '_minus'
                                ])
                            if (stripped_key.endswith('F_squared')
                                    or stripped_key.endswith('intensity')
                                    or stripped_key.endswith('.I')
                                    or stripped_key.endswith('_I')) and (
                                        array.is_real_array()
                                        or array.is_integer_array()):
                                array.set_observation_type_xray_intensity()
                            elif (stripped_key.endswith('F')
                                  and (array.is_real_array()
                                       or array.is_integer_array())):
                                array.set_observation_type_xray_amplitude()
                            if (array.is_xray_amplitude_array()
                                    or array.is_xray_amplitude_array()):
                                # e.g. merge_equivalents treats integer arrays differently, so must
                                # convert integer observation arrays here to be safe
                                if isinstance(array.data(), flex.int):
                                    array = array.customized_copy(
                                        data=array.data().as_double())
                            array.set_info(
                                base_array_info.customized_copy(labels=labels))
                            if (array.is_xray_amplitude_array()
                                    or array.is_xray_amplitude_array()):
                                info = array.info()
                                array.set_info(
                                    info.customized_copy(
                                        wavelength=wavelength))
                            self._arrays.setdefault(key, array)
        for key, array in self._arrays.copy().iteritems():
            if (key.endswith('_minus') or '_minus_' in key
                    or key.endswith('_plus') or '_plus_' in key):
                if '_minus' in key:
                    minus_key = key
                    plus_key = key.replace('_minus', '_plus')
                elif '_plus' in key:
                    plus_key = key
                    minus_key = key.replace('_plus', '_minus')
                if plus_key in self._arrays and minus_key in self._arrays:
                    plus_array = self._arrays.pop(plus_key)
                    minus_array = self._arrays.pop(minus_key)
                    minus_array = minus_array.customized_copy(
                        indices=-minus_array.indices()).set_info(
                            minus_array.info())
                    array = plus_array.concatenate(
                        minus_array, assert_is_similar_symmetry=False)
                    array = array.customized_copy(anomalous_flag=True)
                    array.set_info(
                        minus_array.info().customized_copy(labels=list(
                            OrderedSet(plus_array.info().labels +
                                       minus_array.info().labels))))
                    array.set_observation_type(plus_array.observation_type())
                    self._arrays.setdefault(key, array)

        if len(self._arrays) == 0:
            raise CifBuilderError("No reflection data present in cif block")
Exemple #21
0
class XInfo(object):
  '''A class to represent all of the input to the xia2dpa system, with
  enough information to allow structure solution, as parsed from a
  .xinfo file, an example of which is in the source code.'''

  def __init__(self, xinfo_file, sweep_ids=None, sweep_ranges=None):
    '''Initialise myself from an input .xinfo file.'''

    # first initialise all of the data structures which will hold the
    # information...

    self._project = None
    self._crystals = OrderedDict()

    if sweep_ids is not None:
      sweep_ids = [s.lower() for s in sweep_ids]
    if sweep_ranges is not None:
      assert sweep_ids is not None
      assert len(sweep_ids) == len(sweep_ranges)
    self._sweep_ids = sweep_ids
    self._sweep_ranges = sweep_ranges

    # read the contents of the xinfo file

    self._parse_project(xinfo_file)

    self._validate()

    return

  def get_output(self):
    '''Generate a string representation of the project.'''

    text = 'Project %s\n' % self._project
    for crystal in self._crystals.keys():
      text += 'Crystal %s\n' % crystal
      text += '%s\n' % self._crystals[crystal].get_output()

    # remove a trailing newline...

    return text[:-1]

  def get_project(self):
    return self._project

  def get_crystals(self):
    return self._crystals

  def _validate(self):
    '''Validate the structure of this object, ensuring that
    everything looks right... raise exception if I find something
    wrong.'''

    return True

  def _parse_project(self, xinfo_file):
    '''Parse & validate the contents of the .xinfo file. This parses the
    project element (i.e. the whole thing..)'''

    project_records = []

    for r in open(xinfo_file, 'r').readlines():
      record = r.strip()
      if not record:
        pass
      elif record[0] == '!' or record[0] == '#':
        pass
      else :
        # then it may contain something useful...
        project_records.append(record)

    # so now we have loaded the whole file into memory stripping
    # out the crud... let's look for something useful

    for i in range(len(project_records)):
      record = project_records[i]
      if 'BEGIN PROJECT' in record:
        self._project = record.replace('BEGIN PROJECT', '').strip()
      if 'END PROJECT' in record:
        if not self._project == record.replace(
            'END PROJECT', '').strip():
          raise RuntimeError, 'error parsing END PROJECT record'

      # next look for crystals
      if 'BEGIN CRYSTAL ' in record:
        crystal_records = [record]
        while True:
          i += 1
          record = project_records[i]
          crystal_records.append(record)
          if 'END CRYSTAL ' in record:
            break

        self._parse_crystal(crystal_records)

      # that's everything, because parse_crystal handles
      # the rest...

    return

  def _parse_crystal(self, crystal_records):
    '''Parse the interesting information out of the crystal
    description.'''

    crystal = ''

    for i in range(len(crystal_records)):
      record = crystal_records[i]
      if 'BEGIN CRYSTAL ' in record:

        # we should only ever have one of these records in
        # a call to this method

        if crystal != '':
          raise RuntimeError, 'error in BEGIN CRYSTAL record'

        crystal = record.replace('BEGIN CRYSTAL ', '').strip()
        if crystal in self._crystals:
          raise RuntimeError, 'crystal %s already exists' % \
                crystal

        # cardinality:
        #
        # sequence - exactly one, a long string
        # wavelengths - a dictionary of data structures keyed by the
        #               wavelength id
        # sweeps - a dictionary of data structures keyed by the
        #          sweep id
        # ha_info - exactly one dictionary containing the heavy atom
        #           information

        self._crystals[crystal] = {
          'sequence':'',
          'wavelengths': OrderedDict(),
          'samples': OrderedDict(),
          'sweeps': OrderedDict(),
          'ha_info': OrderedDict(),
          'crystal_data': OrderedDict()
        }

      # next look for interesting stuff in the data structure...
      # starting with the sequence

      if 'BEGIN AA_SEQUENCE' in record:
        sequence = ''
        i += 1
        record = crystal_records[i]
        while record != 'END AA_SEQUENCE':
          if not '#' in record or '!' in record:
            sequence += record.strip()

          i += 1
          record = crystal_records[i]

        if self._crystals[crystal]['sequence'] != '':
          raise RuntimeError, 'error two SEQUENCE records found'

        self._crystals[crystal]['sequence'] = sequence

      # look for heavy atom information

      if 'BEGIN HA_INFO' in record:
        i += 1
        record = crystal_records[i]
        while record != 'END HA_INFO':
          key = record.split()[0].lower()
          value = record.split()[1]
          # things which are numbers are integers...
          if 'number' in key:
            value = int(value)
          self._crystals[crystal]['ha_info'][key] = value
          i += 1
          record = crystal_records[i]

      if 'BEGIN SAMPLE' in record:
        sample = record.replace('BEGIN SAMPLE ', '').strip()
        i += 1
        record = crystal_records[i]
        while not 'END SAMPLE' in record:
          i += 1
          record = crystal_records[i]
        self._crystals[crystal]['samples'][sample] = {}

      # look for wavelength definitions
      # FIXME need to check that there are not two wavelength
      # definitions with the same numerical value for the wavelength -
      # unless this is some way of handling RIP? maybe a NOFIXME.

      # look for data blocks

      if 'BEGIN CRYSTAL_DATA' in record:
        i += 1
        record = crystal_records[i]
        while not 'END CRYSTAL_DATA' in record:
          key = record.split()[0].lower()
          value = record.replace(record.split()[0], '').strip()
          self._crystals[crystal]['crystal_data'][key] = value
          i += 1
          record = crystal_records[i]

      if 'BEGIN WAVELENGTH ' in record:
        wavelength = record.replace('BEGIN WAVELENGTH ', '').strip()

        # check that this is a new wavelength definition
        if wavelength in self._crystals[crystal]['wavelengths']:
          raise RuntimeError, \
                'wavelength %s already exists for crystal %s' % \
                (wavelength, crystal)

        self._crystals[crystal]['wavelengths'][wavelength] = { }
        i += 1
        record = crystal_records[i]

        # populate this with interesting things
        while not 'END WAVELENGTH' in record:

          # deal with a nested WAVELENGTH_STATISTICS block

          if 'BEGIN WAVELENGTH_STATISTICS' in record:
            self._crystals[crystal]['wavelengths'][
                wavelength]['statistics'] = { }
            i += 1
            record = crystal_records[i]
            while not 'END WAVELENGTH_STATISTICS' in record:
              key, value = tuple(record.split())
              self._crystals[crystal]['wavelengths'][
                  wavelength]['statistics'][
                  key.lower()] = float(value)
              i += 1
              record = crystal_records[i]

          # else deal with the usual tokens

          key = record.split()[0].lower()

          if key == 'resolution':

            lst = record.split()

            if len(lst) < 2 or len(lst) > 3:
              raise RuntimeError, 'resolution dmin [dmax]'

            if len(lst) == 2:
              dmin = float(lst[1])

              self._crystals[crystal]['wavelengths'][
                  wavelength]['dmin'] = dmin

            else:
              dmin = min(map(float, lst[1:]))
              dmax = max(map(float, lst[1:]))

              self._crystals[crystal]['wavelengths'][
                  wavelength]['dmin'] = dmin

              self._crystals[crystal]['wavelengths'][
                  wavelength]['dmax'] = dmax

            i += 1
            record = crystal_records[i]
            continue

          if len(record.split()) == 1:
            raise RuntimeError, 'missing value for token %s' % \
                  record.split()[0]

          try:
            value = float(record.split()[1])
          except ValueError, e:
            value = record.replace(record.split()[0], '').strip()

          self._crystals[crystal]['wavelengths'][
              wavelength][key] = value
          i += 1
          record = crystal_records[i]

      # next look for sweeps, checking that the wavelength
      # definitions match up...

      if 'BEGIN SWEEP' in record:
        sweep = record.replace('BEGIN SWEEP', '').strip()

        if self._sweep_ids is not None and sweep.lower() not in self._sweep_ids:
          continue

        elif self._sweep_ranges is not None:
          start_end = self._sweep_ranges[self._sweep_ids.index(sweep.lower())]
        else:
          start_end = None

        if sweep in self._crystals[crystal]['sweeps']:
          raise RuntimeError, \
                'sweep %s already exists for crystal %s' % \
                (sweep, crystal)

        self._crystals[crystal]['sweeps'][sweep] = { }
        self._crystals[crystal]['sweeps'][sweep][
            'excluded_regions'] = []

        if start_end is not None:
          self._crystals[crystal]['sweeps'][sweep][
            'start_end'] = start_end

        # in here I expect to find IMAGE, DIRECTORY, WAVELENGTH
        # and optionally BEAM

        # FIXME 30/OCT/06 this may not be the case, for instance
        # if an INTEGRATED_REFLECTION_FILE record is in there...
        # c/f XProject.py, XSweep.py

        i += 1
        record = crystal_records[i]

        # populate this with interesting things
        while not 'END SWEEP' in record:
          # allow for WAVELENGTH_ID (bug # 2358)
          if 'WAVELENGTH_ID' == record.split()[0]:
            record = record.replace('WAVELENGTH_ID',
                                    'WAVELENGTH')

          if 'WAVELENGTH' == record.split()[0]:
            wavelength = record.replace('WAVELENGTH', '').strip()
            if not wavelength in self._crystals[crystal]['wavelengths'].keys():
              raise RuntimeError, \
                    'wavelength %s unknown for crystal %s' % \
                    (wavelength, crystal)
            self._crystals[crystal]['sweeps'][sweep]['wavelength'] = wavelength

          elif 'SAMPLE' == record.split()[0]:
            sample = record.replace('SAMPLE ', '').strip()
            if not sample in self._crystals[crystal]['samples'].keys():
              raise RuntimeError, \
                  'sample %s unknown for crystal %s' % (sample, crystal)
            self._crystals[crystal]['sweeps'][sweep]['sample'] = sample

          elif 'BEAM' == record.split()[0]:
            beam = map(float, record.split()[1:])
            self._crystals[crystal]['sweeps'][sweep]['beam'] = beam

          elif 'DISTANCE' == record.split()[0]:
            distance = float(record.split()[1])
            self._crystals[crystal]['sweeps'][sweep]['distance'] = distance

          elif 'EPOCH' == record.split()[0]:
            epoch = int(record.split()[1])
            self._crystals[crystal]['sweeps'][sweep]['epoch'] = epoch

          elif 'REVERSEPHI' == record.split()[0]:
            self._crystals[crystal]['sweeps'][sweep]['reversephi'] = True

          elif 'START_END' == record.split()[0]:
            if 'start_end' not in self._crystals[crystal]['sweeps'][sweep]:
              start_end = map(int, record.split()[1:])
              if len(start_end) != 2:
                raise RuntimeError, \
                      'START_END start end, not "%s"' % record
              self._crystals[crystal]['sweeps'][sweep]['start_end'] = start_end

          elif 'EXCLUDE' == record.split()[0]:
            if record.split()[1].upper() == 'ICE':
              self._crystals[crystal]['sweeps'][sweep]['ice'] = True
            else:
              excluded_region = map(float, record.split()[1:])
              if len(excluded_region) != 2:
                raise RuntimeError, \
                      'EXCLUDE upper lower, not "%s". \
                       eg. EXCLUDE 2.28 2.22' % record
              if excluded_region[0] <= excluded_region[1]:
                raise RuntimeError, \
                      'EXCLUDE upper lower, where upper \
                       must be greater than lower (not "%s").\n\
                       eg. EXCLUDE 2.28 2.22' % record
              self._crystals[crystal]['sweeps'][sweep]['excluded_regions'].append(
                excluded_region)

          else:
            key = record.split()[0]
            value = record.replace(key, '').strip()
            self._crystals[crystal]['sweeps'][sweep][key] = value

          i += 1
          record = crystal_records[i]

      # now look for one-record things

      if 'SCALED_MERGED_REFLECTION_FILE' in record:
        self._crystals[crystal][
            'scaled_merged_reflection_file'] = \
            record.replace('SCALED_MERGED_REFLECTION_FILE',
                           '').strip()

      if 'REFERENCE_REFLECTION_FILE' in record:
        self._crystals[crystal][
            'reference_reflection_file'] = \
            record.replace('REFERENCE_REFLECTION_FILE',
                           '').strip()

      if 'FREER_FILE' in record:

        # free file also needs to be used for indexing reference to
        # make any sense at all...

        self._crystals[crystal][
            'freer_file'] = record.replace('FREER_FILE', '').strip()
        self._crystals[crystal][
            'reference_reflection_file'] = \
            record.replace('FREER_FILE', '').strip()

      # user assigned spacegroup and cell constants
      if 'USER_SPACEGROUP' in record:
        self._crystals[crystal][
            'user_spacegroup'] = record.replace(
            'USER_SPACEGROUP', '').strip()

      if 'USER_CELL' in record:
        self._crystals[crystal][
            'user_cell'] = tuple(map(float, record.split()[1:]))
Exemple #22
0
    def as_json(self, filename=None, compact=False, split=False):
        ''' Dump experiment list as json '''
        import json
        from os.path import splitext
        from libtbx.containers import OrderedDict

        # Get the dictionary and get the JSON string
        dictionary = self._experiment_list.to_dict()

        # Split into separate files
        if filename is not None and split:

            # Get lists of models by filename
            basepath = splitext(filename)[0]
            ilist = [('%s_imageset_%d.json' % (basepath, i), d)
                     for i, d in enumerate(dictionary['imageset'])]
            blist = [('%s_beam_%d.json' % (basepath, i), d)
                     for i, d in enumerate(dictionary['beam'])]
            dlist = [('%s_detector_%d.json' % (basepath, i), d)
                     for i, d in enumerate(dictionary['detector'])]
            glist = [('%s_goniometer_%d.json' % (basepath, i), d)
                     for i, d in enumerate(dictionary['goniometer'])]
            slist = [('%s_scan_%d.json' % (basepath, i), d)
                     for i, d in enumerate(dictionary['scan'])]
            clist = [('%s_crystal_%d.json' % (basepath, i), d)
                     for i, d in enumerate(dictionary['crystal'])]
            plist = [('%s_profile_%d.json' % (basepath, i), d)
                     for i, d in enumerate(dictionary['profile'])]
            scalelist = [('%s_scaling_model_%d.json' % (basepath, i), d)
                         for i, d in enumerate(dictionary['scaling_model'])]

            # Get the list of experiments
            edict = OrderedDict([('__id__', 'ExperimentList'),
                                 ('experiment', dictionary['experiment'])])

            # Set paths rather than indices
            for e in edict['experiment']:
                if 'imageset' in e:
                    e['imageset'] = ilist[e['imageset']][0]
                if 'beam' in e:
                    e['beam'] = blist[e['beam']][0]
                if 'detector' in e:
                    e['detector'] = dlist[e['detector']][0]
                if 'goniometer' in e:
                    e['goniometer'] = glist[e['goniometer']][0]
                if 'scan' in e:
                    e['scan'] = slist[e['scan']][0]
                if 'crystal' in e:
                    e['crystal'] = clist[e['crystal']][0]
                if 'profile' in e:
                    e['profile'] = plist[e['profile']][0]
                if 'scaling_model' in e:
                    e['scaling_model'] = scalelist[e['scaling_model']][0]

            to_write = ilist + blist + dlist + glist + \
                       slist + clist + plist + scalelist + [(filename, edict)]
        else:
            to_write = [(filename, dictionary)]

        for fname, obj in to_write:
            if compact:
                text = json.dumps(obj,
                                  separators=(',', ':'),
                                  ensure_ascii=True)
            else:
                text = json.dumps(obj, indent=2, ensure_ascii=True)

            # If a filename is set then dump to file otherwise return string
            if fname is not None:
                with open(fname, 'w') as outfile:
                    outfile.write(text)
            else:
                return text
Exemple #23
0
class cif(DictMixin):
  def __init__(self, blocks=None):
    if blocks is not None:
      self.blocks = OrderedDict(blocks)
    else:
      self.blocks = OrderedDict()
    self.keys_lower = dict([(key.lower(), key) for key in self.blocks.keys()])

  def __setitem__(self, key, value):
    assert isinstance(value, block)
    if not re.match(tag_re, '_'+key):
      raise Sorry("%s is not a valid data block name" %key)
    self.blocks[key] = value
    self.keys_lower[key.lower()] = key

  def get(self, key, default=None):
    key_lower = self.keys_lower.get(key.lower())
    if (key_lower is None):
      return default
    return self.blocks.get(key_lower, default)

  def __getitem__(self, key):
    result = self.get(key)
    if (result is None):
      raise KeyError('Unknown CIF data block name: "%s"' % key)
    return result

  def __delitem__(self, key):
    del self.blocks[self.keys_lower[key.lower()]]
    del self.keys_lower[key.lower()]

  def keys(self):
    return self.blocks.keys()

  def __repr__(self):
    return repr(OrderedDict(self.iteritems()))

  def __copy__(self):
    return cif(self.blocks.copy())

  copy = __copy__

  def __deepcopy__(self, memo):
    return cif(copy.deepcopy(self.blocks, memo))

  def deepcopy(self):
    return copy.deepcopy(self)

  def show(self, out=None, indent="  ", indent_row=None,
           data_name_field_width=34,
           loop_format_strings=None):
    if out is None:
      out = sys.stdout
    for name, block in self.items():
      print >> out, "data_%s" %name
      block.show(
        out=out, indent=indent, indent_row=indent_row,
        data_name_field_width=data_name_field_width,
        loop_format_strings=loop_format_strings)

  def __str__(self):
    s = StringIO()
    self.show(out=s)
    return s.getvalue()

  def validate(self, dictionary, show_warnings=True, error_handler=None, out=None):
    if out is None: out = sys.stdout
    from iotbx.cif import validation
    errors = {}
    if error_handler is None:
      error_handler = validation.ErrorHandler()
    for key, block in self.blocks.iteritems():
      error_handler = error_handler.__class__()
      dictionary.set_error_handler(error_handler)
      block.validate(dictionary)
      errors.setdefault(key, error_handler)
      if error_handler.error_count or error_handler.warning_count:
        error_handler.show(show_warnings=show_warnings, out=out)
    return error_handler

  def sort(self, recursive=False, key=None, reverse=False):
    self.blocks = OrderedDict(sorted(self.blocks.items(), key=key, reverse=reverse))
    if recursive:
      for b in self.blocks.values():
        b.sort(recursive=recursive, reverse=reverse)
Exemple #24
0
 def sort(self, key=None, reverse=False):
   self._columns = OrderedDict(
     sorted(self._columns.items(), key=key, reverse=reverse))
Exemple #25
0
 def __init__(self, blocks=None):
   if blocks is not None:
     self.blocks = OrderedDict(blocks)
   else:
     self.blocks = OrderedDict()
   self.keys_lower = dict([(key.lower(), key) for key in self.blocks.keys()])
Exemple #26
0
 def __init__(self, imageset, size=10):
     self.imageset = imageset
     self.size = size
     self._image_data = OrderedDict()
Exemple #27
0
 def __init__(self, imageset, size=10):
   self.imageset = imageset
   self.size = size
   self._image_data = OrderedDict()
Exemple #28
0
    def add_miller_array(self,
                         array,
                         array_type=None,
                         column_name=None,
                         column_names=None):
        """
    Accepts a miller array, and one of array_type, column_name or column_names.
    """

        assert [array_type, column_name, column_names].count(None) == 2
        if array_type is not None:
            assert array_type in ('calc', 'meas')
        elif column_name is not None:
            column_names = [column_name]
        if array.is_complex_array():
            if column_names is None:
                column_names = [
                    self.prefix + 'F_' + array_type,
                    self.prefix + 'phase_' + array_type
                ]
            else:
                assert len(column_names) == 2
            if (('_A_' in column_names[0] and '_B_' in column_names[1]) or
                ('.A_' in column_names[0] and '.B_' in column_names[1])):
                data = [
                    flex.real(array.data()).as_string(),
                    flex.imag(array.data()).as_string()
                ]
            else:
                data = [
                    flex.abs(array.data()).as_string(),
                    array.phases(deg=True).data().as_string()
                ]
        elif array.is_hendrickson_lattman_array():
            if column_names is None:
                column_names = [
                    self.prefix + 'HL_%s_iso' % abcd for abcd in 'ABCD'
                ]
            else:
                assert len(column_names) == 4
            data = [d.as_string() for d in array.data().as_abcd()]
        else:
            if array_type is not None:
                if array.is_xray_intensity_array():
                    obs_ext = 'squared_'
                else:
                    obs_ext = ''
                column_names = [self.prefix + 'F_' + obs_ext + array_type]
                if array.sigmas() is not None:
                    column_names.append(self.prefix + 'F_' + obs_ext + 'sigma')
            if isinstance(array.data(), flex.std_string):
                data = [array.data()]
            else:
                data = [array.data().as_string()]
            if array.anomalous_flag():
                if ((array.sigmas() is not None and len(column_names) == 4) or
                    (array.sigmas() is None and len(column_names) == 2)):
                    data = []
                    asu, matches = array.match_bijvoet_mates()
                    for anomalous_sign in ("+", "-"):
                        sel = matches.pairs_hemisphere_selection(
                            anomalous_sign)
                        sel.extend(
                            matches.singles_hemisphere_selection(
                                anomalous_sign))
                        if (anomalous_sign == "+"):
                            indices = asu.indices().select(sel)
                            hemisphere_column_names = column_names[:len(
                                column_names) // 2]
                        else:
                            indices = -asu.indices().select(sel)
                            hemisphere_column_names = column_names[
                                len(column_names) // 2:]
                        hemisphere_data = asu.data().select(sel)
                        hemisphere_array = miller.array(
                            miller.set(array.crystal_symmetry(), indices),
                            hemisphere_data)
                        if array.sigmas() is not None:
                            hemisphere_array.set_sigmas(
                                asu.sigmas().select(sel))
                        if self.refln_loop is None:
                            # then this is the first array to be added to the loop,
                            # hack so we don't have both hemispheres of indices
                            self.indices = indices
                        self.add_miller_array(
                            hemisphere_array,
                            column_names=hemisphere_column_names)
                    return
            if array.sigmas() is not None and len(column_names) == 2:
                data.append(array.sigmas().as_string())
        if not (self.indices.size() == array.indices().size()
                and self.indices.all_eq(array.indices())):
            from cctbx.miller import match_indices
            other_indices = array.indices().deep_copy()
            match = match_indices(self.indices, other_indices)
            if match.singles(0).size():
                # array is missing some reflections indices that already appear in the loop
                # therefore pad the data with '?' values
                other_indices.extend(
                    self.indices.select(match.single_selection(0)))
                for d in data:
                    d.extend(
                        flex.std_string(['?'] *
                                        (other_indices.size() - d.size())))
                for d in data:
                    assert d.size() == other_indices.size()
                match = match_indices(self.indices, other_indices)
            if match.singles(1).size():
                # this array contains some reflections that are not already present in the
                # cif loop, therefore need to add rows of '?' values
                single_indices = other_indices.select(
                    match.single_selection(1))
                self.indices.extend(single_indices)
                n_data_columns = len(self.refln_loop) - 3
                for hkl in single_indices:
                    row = list(hkl) + ['?'] * n_data_columns
                    self.refln_loop.add_row(row)
                match = match_indices(self.indices, other_indices)

            match = match_indices(self.indices, other_indices)
            perm = match.permutation()
            data = [d.select(perm) for d in data]

        if self.refln_loop is None:
            self.refln_loop = miller_indices_as_cif_loop(self.indices,
                                                         prefix=self.prefix)
        columns = OrderedDict(zip(column_names, data))
        for key in columns:
            assert key not in self.refln_loop
        self.refln_loop.add_columns(columns)
Exemple #29
0
 def sort(self, recursive=False, key=None, reverse=False):
   self.blocks = OrderedDict(sorted(self.blocks.items(), key=key, reverse=reverse))
   if recursive:
     for b in self.blocks.values():
       b.sort(recursive=recursive, reverse=reverse)
Exemple #30
0
 def sort(self, key=None, reverse=False):
     self._columns = OrderedDict(
         sorted(self._columns.items(), key=key, reverse=reverse))
Exemple #31
0
class loop(DictMixin):
  def __init__(self, header=None, data=None):
    self._columns = OrderedDict()
    self.keys_lower = {}
    if header is not None:
      for key in header:
        self.setdefault(key, flex.std_string())
      if data is not None:
        # the number of data items must be an exact multiple of the number of headers
        assert len(data) % len(header) == 0, "Wrong number of data items for loop"
        n_rows = len(data)//len(header)
        n_columns = len(header)
        for i in range(n_rows):
          self.add_row([data[i*n_columns+j] for j in range(n_columns)])
    elif header is None and data is not None:
      assert isinstance(data, dict) or isinstance(data, OrderedDict)
      self.add_columns(data)
      self.keys_lower = dict(
        [(key.lower(), key) for key in self._columns.keys()])

  def __setitem__(self, key, value):
    if not re.match(tag_re, key):
      raise Sorry("%s is not a valid data name" %key)
    if len(self) > 0:
      assert len(value) == self.size()
    if not isinstance(value, flex.std_string):
      for flex_numeric_type in (flex.int, flex.double):
        if isinstance(value, flex_numeric_type):
          value = value.as_string()
        else:
          try:
            value = flex_numeric_type(value).as_string()
          except TypeError:
            continue
          else:
            break
      if not isinstance(value, flex.std_string):
        value = flex.std_string(value)
    # value must be a mutable type
    assert hasattr(value, '__setitem__')
    self._columns[key] = value
    self.keys_lower[key.lower()] = key

  def __getitem__(self, key):
    return self._columns[self.keys_lower[key.lower()]]

  def __delitem__(self, key):
    del self._columns[self.keys_lower[key.lower()]]
    del self.keys_lower[key.lower()]

  def keys(self):
    return self._columns.keys()

  def __repr__(self):
    return repr(OrderedDict(self.iteritems()))

  def name(self):
    return common_substring(self.keys()).rstrip('_').rstrip('.')

  def size(self):
    size = 0
    for column in self.values():
      size = max(size, len(column))
    return size

  def n_rows(self):
    return self.size()

  def n_columns(self):
    return len(self.keys())

  def add_row(self, row, default_value="?"):
    if isinstance(row, dict):
      for key in self:
        if key in row:
          self[key].append(str(row[key]))
        else:
          self[key].append(default_value)
    else:
      assert len(row) == len(self)
      for i, key in enumerate(self):
        self[key].append(str(row[i]))

  def add_column(self, key, values):
    if self.size() != 0:
      assert len(values) == self.size()
    self[key] = values
    self.keys_lower[key.lower()] = key

  def add_columns(self, columns):
    assert isinstance(columns, dict) or isinstance(columns, OrderedDict)
    for key, value in columns.iteritems():
      self.add_column(key, value)

  def update_column(self, key, values):
    assert type(key)==type(""), "first argument is column key string"
    if self.size() != 0:
      assert len(values) == self.size(), "len(values) %d != self.size() %d" % (
        len(values),
        self.size(),
        )
    self[key] = values
    self.keys_lower[key.lower()] = key

  def delete_row(self, index):
    assert index < self.n_rows()
    for column in self._columns.values():
      del column[index]

  def __copy__(self):
    new = loop()
    new._columns = self._columns.copy()
    new.keys_lower = self.keys_lower.copy()
    return new

  copy = __copy__

  def __deepcopy__(self, memo):
    new = loop()
    new._columns = copy.deepcopy(self._columns, memo)
    new.keys_lower = copy.deepcopy(self.keys_lower, memo)
    return new

  def deepcopy(self):
    return copy.deepcopy(self)

  def show(self, out=None, indent="  ", indent_row=None, fmt_str=None, align_columns=True):
    assert self.n_rows() > 0 and self.n_columns() > 0, "keys: %s %d %d" % (
      self.keys(),
      self.n_rows(),
      self.n_columns(),
      )
    if out is None:
      out = sys.stdout
    if indent_row is None:
      indent_row = indent
    assert indent.strip() == ""
    assert indent_row.strip() == ""
    print >> out, "loop_"
    for k in self.keys():
      print >> out, indent + k
    values = self._columns.values()
    range_len_values = range(len(values))
    if fmt_str is not None:
      # Pretty printing:
      #   The user is responsible for providing a valid format string.
      #   Values are not quoted - it is the user's responsibility to place
      #   appropriate quotes in the format string if a particular value may
      #   contain spaces.
      values = copy.deepcopy(values)
      for i, v in enumerate(values):
        for flex_numeric_type in (flex.int, flex.double):
          if not isinstance(v, flex_numeric_type):
            try:
              values[i] = flex_numeric_type(v)
            except ValueError:
              continue
            else:
              break
      if fmt_str is None:
        fmt_str = indent_row + ' '.join(["%s"]*len(values))
      for i in range(self.size()):
        print >> out, fmt_str % tuple([values[j][i] for j in range_len_values])
    elif align_columns:
      fmt_str = []
      for i, (k, v) in enumerate(self.iteritems()):
        for i_v in range(v.size()):
          v[i_v] = format_value(v[i_v])
        # exclude and semicolon text fields from column width calculation
        v_ = flex.std_string(item for item in v if "\n" not in item)
        width = v_.max_element_length()
        # See if column contains only number, '.' or '?'
        # right-align numerical columns, left-align everything else
        v = v.select(~( (v == ".") | (v == "?") ))
        try:
          flex.double(v)
        except ValueError:
          width *= -1
        fmt_str.append("%%%is" %width)
      fmt_str = indent_row + "  ".join(fmt_str)
      for i in range(self.size()):
        print >> out, (fmt_str %
                       tuple([values[j][i]
                              for j in range_len_values])).rstrip()
    else:
      for i in range(self.size()):
        values_to_print = [format_value(values[j][i]) for j in range_len_values]
        print >> out, ' '.join([indent] + values_to_print)

  def __str__(self):
    s = StringIO()
    self.show(out=s)
    return s.getvalue()

  def iterrows(self):
    """ Warning! Still super-slow! """
    keys = self.keys()
    s_values = self.values()
    range_len_self = range(len(self))
    # range is 1% faster than xrange in this particular place.
    # tuple (s_values...) is slightly faster than list
    for j in range(self.size()):
      yield OrderedDict(zip(keys, (s_values[i][j] for i in range_len_self)))

  def find_row(self, kv_dict):
    self_keys = self.keys()
    for k in kv_dict.keys():
      assert k in self_keys
    result = []
    s_values = self.values()
    range_len_self = range(len(self))
    for i in range(self.size()):
      goodrow = True
      for k, v in kv_dict.iteritems():
        if self[k][i] != v:
          goodrow = False
          break
      if goodrow:
        result.append(OrderedDict(zip(self_keys, [s_values[j][i] for j in range_len_self])))
    return result

  def sort(self, key=None, reverse=False):
    self._columns = OrderedDict(
      sorted(self._columns.items(), key=key, reverse=reverse))

  def order(self, order):
    def _cmp_key(k1, k2):
      for i, o in enumerate(order):
        if k1==o: break
      for j, o in enumerate(order):
        if k2==o: break
      if k1<k2: return -1
      return 1
    keys = self._columns.keys()
    keys.sort(_cmp_key)
    tmp = OrderedDict()
    for o in order:
      tmp[o]=self._columns[o]
    self._columns = tmp

  def __eq__(self, other):
    if (len(self) != len(other) or
        self.size() != other.size() or
        self.keys() != other.keys()):
      return False
    for value, other_value in zip(self.values(), other.values()):
      if (value == other_value).count(True) != len(value):
        return False
    return True
Exemple #32
0
  def __init__(self, unmerged_intensities, batches_all, n_bins=20, d_min=None,
               id_to_batches=None):

    intensities = OrderedDict()
    individual_merged_intensities = OrderedDict()
    batches = OrderedDict()
    #m_isym = OrderedDict()

    sel = unmerged_intensities.sigmas() > 0
    unmerged_intensities = unmerged_intensities.select(sel)
    batches_all = batches_all.select(sel)

    if id_to_batches is None:
      run_id_to_batch_id = None
      run_id = 0
      unique_batches = sorted(set(batches_all.data()))
      last_batch = None
      run_start = unique_batches[0]
      for i, batch in enumerate(unique_batches):
        if last_batch is not None and batch > (last_batch + 1) or (i+1) == len(unique_batches):
          batch_sel = (batches_all.data() >= run_start) & (batches_all.data() <= last_batch)
          batches[run_id] = batches_all.select(batch_sel).resolution_filter(d_min=d_min)
          intensities[run_id] = unmerged_intensities.select(batch_sel).resolution_filter(d_min=d_min)
          individual_merged_intensities[run_id] = intensities[run_id].merge_equivalents().array()
          Debug.write("run %i batch %i to %i" %(run_id+1, run_start, last_batch))
          run_id += 1
          run_start = batch
        last_batch = batch

    else:
      run_id_to_batch_id = OrderedDict()
      run_id = 0
      for batch_id, batch_range in id_to_batches.iteritems():
        run_id_to_batch_id[run_id] = batch_id
        run_start, last_batch = batch_range
        batch_sel = (batches_all.data() >= run_start) & (batches_all.data() <= last_batch)
        batches[run_id] = batches_all.select(batch_sel).resolution_filter(d_min=d_min)
        intensities[run_id] = unmerged_intensities.select(batch_sel).resolution_filter(d_min=d_min)
        individual_merged_intensities[run_id] = intensities[run_id].merge_equivalents().array()
        Debug.write("run %i batch %i to %i" %(run_id+1, run_start, last_batch))
        run_id += 1

    unmerged_intensities.setup_binner(n_bins=n_bins)
    unmerged_intensities.show_summary()
    #result = unmerged_intensities.cc_one_half(use_binning=True)
    #result.show()

    self.unmerged_intensities = unmerged_intensities
    self.merged_intensities = unmerged_intensities.merge_equivalents().array()
    self.intensities = intensities
    self.individual_merged_intensities = individual_merged_intensities
    self.batches = batches

    if run_id_to_batch_id is not None:
      labels = run_id_to_batch_id.values()
    else:
      labels = None
    racc = self.relative_anomalous_cc()
    if racc is not None:
      self.plot_relative_anomalous_cc(racc, labels=labels)
    correlation_matrix, linkage_matrix = self.compute_correlation_coefficient_matrix()

    self._cluster_dict = self.to_dict(correlation_matrix, linkage_matrix)

    self.plot_cc_matrix(correlation_matrix, linkage_matrix, labels=labels)

    self.write_output()
Exemple #33
0
 def __repr__(self):
   return repr(OrderedDict(self.iteritems()))
Exemple #34
0
class multi_crystal_analysis(object):

  def __init__(self, unmerged_intensities, batches_all, n_bins=20, d_min=None,
               id_to_batches=None):

    sel = unmerged_intensities.sigmas() > 0
    unmerged_intensities = unmerged_intensities.select(sel)
    batches_all = batches_all.select(sel)

    unmerged_intensities.setup_binner(n_bins=n_bins)
    unmerged_intensities.show_summary()
    self.unmerged_intensities = unmerged_intensities
    self.merged_intensities = unmerged_intensities.merge_equivalents().array()

    separate = separate_unmerged(
      unmerged_intensities, batches_all, id_to_batches=id_to_batches)
    self.intensities = separate.intensities
    self.batches = separate.batches
    run_id_to_batch_id = separate.run_id_to_batch_id
    self.individual_merged_intensities = OrderedDict()
    for k in self.intensities.keys():
      self.intensities[k] = self.intensities[k].resolution_filter(d_min=d_min)
      self.batches[k] = self.batches[k].resolution_filter(d_min=d_min)
      self.individual_merged_intensities[k] = self.intensities[k].merge_equivalents().array()

    if run_id_to_batch_id is not None:
      labels = run_id_to_batch_id.values()
    else:
      labels = None
    racc = self.relative_anomalous_cc()
    if racc is not None:
      self.plot_relative_anomalous_cc(racc, labels=labels)
    correlation_matrix, linkage_matrix = self.compute_correlation_coefficient_matrix()

    self._cluster_dict = self.to_dict(correlation_matrix, linkage_matrix)

    self.plot_cc_matrix(correlation_matrix, linkage_matrix, labels=labels)

    self.write_output()

  def to_dict(self, correlation_matrix, linkage_matrix):

    from scipy.cluster import hierarchy
    tree = hierarchy.to_tree(linkage_matrix, rd=False)
    leaves_list = hierarchy.leaves_list(linkage_matrix)

    d = {}

    # http://w3facility.org/question/scipy-dendrogram-to-json-for-d3-js-tree-visualisation/
    # https://gist.github.com/mdml/7537455

    def add_node(node):
      if node.is_leaf(): return
      cluster_id = node.get_id() - len(linkage_matrix) - 1
      row = linkage_matrix[cluster_id]
      d[cluster_id+1] = {
        'datasets': [i+1 for i in sorted(node.pre_order())],
        'height': row[2],
      }

      # Recursively add the current node's children
      if node.left: add_node(node.left)
      if node.right: add_node(node.right)

    add_node(tree)

    return d

  def relative_anomalous_cc(self):
    if self.unmerged_intensities.anomalous_flag():
      d_min = min([ma.d_min() for ma in self.intensities.values()])
      racc = flex.double()
      full_set_anom_diffs = self.merged_intensities.anomalous_differences()
      for i_wedge in self.individual_merged_intensities.keys():
        ma_i = self.individual_merged_intensities[i_wedge].resolution_filter(d_min=d_min)
        anom_i = ma_i.anomalous_differences()
        anom_cc = anom_i.correlation(full_set_anom_diffs, assert_is_similar_symmetry=False).coefficient()
        racc.append(anom_cc)
      return racc

  def plot_relative_anomalous_cc(self, racc, labels=None):
    perm = flex.sort_permutation(racc)
    fig = pyplot.figure(dpi=1200, figsize=(16,12))
    pyplot.bar(range(len(racc)), list(racc.select(perm)))
    if labels is None:
      labels = ["%.0f" %(j+1) for j in perm]
    assert len(labels) == len(racc)
    pyplot.xticks([i+0.5 for i in range(len(racc))], labels)
    locs, labels = pyplot.xticks()
    pyplot.setp(labels, rotation=70)
    pyplot.xlabel("Dataset")
    pyplot.ylabel("Relative anomalous correlation coefficient")
    fig.savefig("racc.png")

  def compute_correlation_coefficient_matrix(self):
    from scipy.cluster import hierarchy
    import scipy.spatial.distance as ssd

    correlation_matrix = flex.double(
      flex.grid(len(self.intensities), len(self.intensities)))

    d_min = min([ma.d_min() for ma in self.intensities.values()])

    for i_wedge in self.individual_merged_intensities.keys():
      for j_wedge in self.individual_merged_intensities.keys():
        if j_wedge < i_wedge: continue
        ma_i = self.individual_merged_intensities[i_wedge].resolution_filter(d_min=d_min)
        ma_j = self.individual_merged_intensities[j_wedge].resolution_filter(d_min=d_min)
        cc_ij = ma_i.correlation(ma_j).coefficient()
        correlation_matrix[(i_wedge,j_wedge)] = cc_ij
        correlation_matrix[j_wedge,i_wedge] = cc_ij

    diffraction_dissimilarity = 1-correlation_matrix

    dist_mat = diffraction_dissimilarity.as_numpy_array()

    # convert the redundant n*n square matrix form into a condensed nC2 array
    dist_mat = ssd.squareform(dist_mat) # distArray[{n choose 2}-{n-i choose 2} + (j-i-1)] is the distance between points i and j

    method = ['single', 'complete', 'average', 'weighted'][2]

    linkage_matrix = hierarchy.linkage(dist_mat, method=method)

    return correlation_matrix, linkage_matrix

  def plot_cc_matrix(self, correlation_matrix, linkage_matrix, labels=None):
    from scipy.cluster import hierarchy

    ind = hierarchy.fcluster(linkage_matrix, t=0.05, criterion='distance')

    # Compute and plot dendrogram.
    fig = pyplot.figure(dpi=1200, figsize=(16,12))
    axdendro = fig.add_axes([0.09,0.1,0.2,0.8])
    Y = linkage_matrix
    Z = hierarchy.dendrogram(Y,
                             color_threshold=0.05,
                             orientation='right')
    axdendro.set_xticks([])
    axdendro.set_yticks([])

    # Plot distance matrix.
    axmatrix = fig.add_axes([0.3,0.1,0.6,0.8])
    index = Z['leaves']
    D = correlation_matrix.as_numpy_array()
    D = D[index,:]
    D = D[:,index]
    im = axmatrix.matshow(D, aspect='auto', origin='lower')
    axmatrix.yaxis.tick_right()
    if labels is not None:
      axmatrix.xaxis.tick_bottom()
      axmatrix.set_xticks(list(range(len(labels))))
      axmatrix.set_xticklabels([labels[i] for i in index], rotation=70)
      axmatrix.yaxis.set_ticks([])

    # Plot colorbar.
    axcolor = fig.add_axes([0.91,0.1,0.02,0.8])
    pyplot.colorbar(im, cax=axcolor)

    # Display and save figure.
    fig.savefig('correlation_matrix.png')
    fig.clear()

    fig = pyplot.figure(dpi=1200, figsize=(16,12))

    if labels is None:
      labels = ['%i' %(i+1) for i in range(len(self.intensities))]

    ddict = hierarchy.dendrogram(linkage_matrix,
                                 #truncate_mode='lastp',
                                 color_threshold=0.05,
                                 labels=labels,
                                 #leaf_rotation=90,
                                 show_leaf_counts=False)
    locs, labels = pyplot.xticks()
    pyplot.setp(labels, rotation=70)
    fig.savefig('dendrogram.png')

    import copy
    y2_dict = scipy_dendrogram_to_plotly_json(ddict) # above heatmap
    x2_dict = copy.deepcopy(y2_dict) # left of heatmap, rotated
    for d in y2_dict['data']:
      d['yaxis'] = 'y2'
      d['xaxis'] = 'x2'

    for d in x2_dict['data']:
      x = d['x']
      y = d['y']
      d['x'] = y
      d['y'] = x
      d['yaxis'] = 'y3'
      d['xaxis'] = 'x3'

    ccdict = {
      'data': [{
        'name': 'correlation_matrix',
        'x': list(range(D.shape[0])),
        'y': list(range(D.shape[1])),
        'z': D.tolist(),
        'type': 'heatmap',
        'colorbar': {
          'title': 'Correlation coefficient',
          'titleside': 'right',
          #'x': 0.96,
          #'y': 0.9,
          #'titleside': 'top',
          #'xanchor': 'right',
          'xpad': 0,
          #'yanchor': 'top'
        },
        'colorscale': 'Jet',
        'xaxis': 'x',
        'yaxis': 'y',
      }],

      'layout': {
        'autosize': False,
        'bargap': 0,
        'height': 1000,
        'hovermode': 'closest',
        'margin': {
          'r': 20,
          't': 50,
          'autoexpand': True,
          'l': 20
          },
        'showlegend': False,
        'title': 'Dendrogram Heatmap',
        'width': 1000,
        'xaxis': {
          'domain': [0.2, 0.9],
          'mirror': 'allticks',
          'showgrid': False,
          'showline': False,
          'showticklabels': True,
          'tickmode': 'array',
          'ticks': '',
          'ticktext': y2_dict['layout']['xaxis']['ticktext'],
          'tickvals': list(range(len(y2_dict['layout']['xaxis']['ticktext']))),
          'tickangle': 300,
          'title': '',
          'type': 'linear',
          'zeroline': False
        },
        'yaxis': {
          'domain': [0, 0.78],
          'anchor': 'x',
          'mirror': 'allticks',
          'showgrid': False,
          'showline': False,
          'showticklabels': True,
          'tickmode': 'array',
          'ticks': '',
          'ticktext': y2_dict['layout']['xaxis']['ticktext'],
          'tickvals': list(range(len(y2_dict['layout']['xaxis']['ticktext']))),
          'title': '',
          'type': 'linear',
          'zeroline': False
        },
        'xaxis2': {
          'domain': [0.2, 0.9],
          'anchor': 'y2',
          'showgrid': False,
          'showline': False,
          'showticklabels': False,
          'zeroline': False
        },
        'yaxis2': {
          'domain': [0.8, 1],
          'anchor': 'x2',
          'showgrid': False,
          'showline': False,
          'zeroline': False
        },
        'xaxis3': {
          'domain': [0.0, 0.1],
          'anchor': 'y3',
          'range': [max(max(d['x']) for d in x2_dict['data']), 0],
          'showgrid': False,
          'showline': False,
          'tickangle': 300,
          'zeroline': False
        },
        'yaxis3': {
          'domain': [0, 0.78],
          'anchor': 'x3',
          'showgrid': False,
          'showline': False,
          'showticklabels': False,
          'zeroline': False
        },
      }
    }
    d = ccdict
    d['data'].extend(y2_dict['data'])
    d['data'].extend(x2_dict['data'])

    d['clusters'] = self._cluster_dict

    import json
    with open('intensity_clusters.json', 'wb') as f:
      json.dump(d, f, indent=2)


  def write_output(self):

    rows = [["cluster_id", "# datasets", "height", "datasets"]]
    for cid in sorted(self._cluster_dict.keys()):
      cluster = self._cluster_dict[cid]
      datasets = cluster['datasets']
      rows.append([str(cid), str(len(datasets)),
                   '%.2f' %cluster['height'], ' '.join(['%s'] * len(datasets)) % tuple(datasets)])

    with open('intensity_clustering.txt', 'wb') as f:
      from libtbx import table_utils
      print >> f, table_utils.format(
        rows, has_header=True, prefix="|", postfix="|")
Exemple #35
0
  def __init__(self, cif_block, base_array_info=None, wavelengths=None):
    crystal_symmetry_builder.__init__(self, cif_block)
    if base_array_info is not None:
      self.crystal_symmetry = self.crystal_symmetry.join_symmetry(
        other_symmetry=base_array_info.crystal_symmetry_from_file,
      force=True)
    self._arrays = OrderedDict()
    if (wavelengths is None) :
      wavelengths = {}
    if base_array_info is None:
      base_array_info = miller.array_info(source_type="cif")
    refln_containing_loops = self.get_miller_indices_containing_loops()
    for self.indices, refln_loop in refln_containing_loops:
      self.wavelength_id_array = None
      self.crystal_id_array = None
      self.scale_group_array = None
      wavelength_ids = [None]
      crystal_ids = [None]
      scale_groups = [None]
      for key, value in refln_loop.iteritems():
        # need to get these arrays first
        if (key.endswith('wavelength_id') or
            key.endswith('crystal_id') or
            key.endswith('scale_group_code')):
          data = as_int_or_none_if_all_question_marks(value, column_name=key)
          if data is None:
            continue
          counts = data.counts()
          if key.endswith('wavelength_id'):
            wavelength_ids = counts.keys()
          if len(counts) == 1: continue
          array = miller.array(
            miller.set(self.crystal_symmetry, self.indices).auto_anomalous(), data)
          if key.endswith('wavelength_id'):
            self.wavelength_id_array = array
            wavelength_ids = counts.keys()
          elif key.endswith('crystal_id'):
            self.crystal_id_array = array
            crystal_ids = counts.keys()
          elif key.endswith('scale_group_code'):
            self.scale_group_array = array
            scale_groups = counts.keys()
      for label, value in sorted(refln_loop.items()):
        for w_id in wavelength_ids:
          for crys_id in crystal_ids:
            for scale_group in scale_groups:
              if 'index_' in label: continue
              key = label
              labels = [label]
              wavelength = None
              if (key.endswith('wavelength_id') or
                    key.endswith('crystal_id') or
                    key.endswith('scale_group_code')):
                w_id = None
                crys_id = None
                scale_group = None
              key_suffix = ''
              if w_id is not None:
                key_suffix += '_%i' %w_id
                labels.insert(0, "wavelength_id=%i" %w_id)
                wavelength = wavelengths.get(w_id, None)
              if crys_id is not None:
                key_suffix += '_%i' %crys_id
                labels.insert(0, "crystal_id=%i" %crys_id)
              if scale_group is not None:
                key_suffix += '_%i' %scale_group
                labels.insert(0, "scale_group_code=%i" %scale_group)
              key += key_suffix
              sigmas = None
              if key in self._arrays: continue
              array = self.flex_std_string_as_miller_array(
                value, wavelength_id=w_id, crystal_id=crys_id,
                scale_group_code=scale_group)
              if array is None: continue
              if '_sigma' in key:
                sigmas_label = label
                key = None
                for suffix in ('', '_meas', '_calc'):
                  if sigmas_label.replace('_sigma', suffix) in refln_loop:
                    key = sigmas_label.replace('_sigma', suffix) + key_suffix
                    break
                if key is None:
                  key = sigmas_label + key_suffix
                elif key in self._arrays and self._arrays[key].sigmas() is None:
                  sigmas = array
                  array = self._arrays[key]
                  check_array_sizes(array, sigmas, key, sigmas_label)
                  sigmas = as_flex_double(sigmas, sigmas_label)
                  array.set_sigmas(sigmas.data())
                  info = array.info()
                  array.set_info(
                    info.customized_copy(labels=info.labels+[sigmas_label],
                      wavelength=wavelength))
                  continue
              elif 'PHWT' in key:
                phwt_label = label
                fwt_label = label.replace('PHWT', 'FWT')
                if fwt_label not in refln_loop: continue
                phwt_array = array
                if fwt_label in self._arrays:
                  array = self._arrays[fwt_label]
                  check_array_sizes(array, phwt_array, fwt_label, phwt_label)
                  phases = as_flex_double(phwt_array, phwt_label)
                  info = array.info()
                  array = array.phase_transfer(phases, deg=True)
                  array.set_info(
                    info.customized_copy(labels=info.labels+[phwt_label]))
                  self._arrays[fwt_label] = array
                  continue
              elif 'HL_' in key:
                hl_letter = key[key.find('HL_')+3]
                hl_key = 'HL_' + hl_letter
                key = key.replace(hl_key, 'HL_A')
                if key in self._arrays:
                  continue # this array is already dealt with
                hl_labels = [label.replace(hl_key, 'HL_'+letter) for letter in 'ABCD']
                hl_keys = [key.replace(hl_key, 'HL_'+letter) for letter in 'ABCD']
                hl_values = [cif_block.get(hl_key) for hl_key in hl_labels]
                if hl_values.count(None) == 0:
                  selection = self.get_selection(
                    hl_values[0], wavelength_id=w_id,
                    crystal_id=crys_id, scale_group_code=scale_group)
                  hl_values = [as_double_or_none_if_all_question_marks(
                    hl.select(selection), column_name=lab)
                               for hl, lab in zip(hl_values, hl_labels)]
                  array = miller.array(miller.set(
                    self.crystal_symmetry, self.indices.select(selection)
                    ).auto_anomalous(), flex.hendrickson_lattman(*hl_values))
                  labels = labels[:-1]+hl_labels
              elif '.B_' in key or '_B_' in key:
                if '.B_' in key:
                  key, key_b = key.replace('.B_', '.A_'), key
                  label, label_b = label.replace('.B_', '.A_'), label
                elif '_B_' in key:
                  key, key_b = key.replace('_B', '_A'), key
                  label, label_b = label.replace('_B', '_A'), label
                if key in refln_loop and key_b in refln_loop:
                  b_part = array.data()
                  if key in self._arrays:
                    info = self._arrays[key].info()
                    a_part = self._arrays[key].data()
                    self._arrays[key] = self._arrays[key].array(
                      data=flex.complex_double(a_part, b_part))
                    self._arrays[key].set_info(
                      info.customized_copy(labels=info.labels+[key_b]))
                    continue
              elif ('phase_' in key and not "_meas" in key and
                    self.crystal_symmetry.space_group() is not None):
                alt_key1 = label.replace('phase_', 'F_')
                alt_key2 = alt_key1 + '_au'
                if alt_key1 in refln_loop:
                  phase_key = label
                  key = alt_key1+key_suffix
                elif alt_key2 in refln_loop:
                  phase_key = label
                  key = alt_key2+key_suffix
                else: phase_key = None
                if phase_key is not None:
                  phases = array.data()
                  if key in self._arrays:
                    array = self._arrays[key]
                    array = as_flex_double(array, key)
                    check_array_sizes(array, phases, key, phase_key)
                    info = self._arrays[key].info()
                    self._arrays[key] = array.phase_transfer(phases, deg=True)
                    self._arrays[key].set_info(
                      info.customized_copy(labels=info.labels+[phase_key]))
                  else:
                    array = self.flex_std_string_as_miller_array(
                      refln_loop[label], wavelength_id=w_id, crystal_id=crys_id,
                      scale_group_code=scale_group)
                    check_array_sizes(array, phases, key, phase_key)
                    array.phase_transfer(phases, deg=True)
                    labels = labels+[label, phase_key]
              if base_array_info.labels is not None:
                labels = base_array_info.labels + labels
              def rstrip_substrings(string, substrings):
                for substr in substrings:
                  if substr == '': continue
                  if string.endswith(substr):
                    string = string[:-len(substr)]
                return string
              # determine observation type
              stripped_key = rstrip_substrings(
                key, [key_suffix, '_au', '_meas', '_calc', '_plus', '_minus'])
              if (stripped_key.endswith('F_squared') or
                  stripped_key.endswith('intensity') or
                  stripped_key.endswith('.I') or
                  stripped_key.endswith('_I')) and (
                    array.is_real_array() or array.is_integer_array()):
                array.set_observation_type_xray_intensity()
              elif (stripped_key.endswith('F') and (
                array.is_real_array() or array.is_integer_array())):
                array.set_observation_type_xray_amplitude()
              if (array.is_xray_amplitude_array() or
                  array.is_xray_amplitude_array()):
                # e.g. merge_equivalents treats integer arrays differently, so must
                # convert integer observation arrays here to be safe
                if isinstance(array.data(), flex.int):
                  array = array.customized_copy(data=array.data().as_double())
              array.set_info(base_array_info.customized_copy(labels=labels))
              if (array.is_xray_amplitude_array() or
                  array.is_xray_amplitude_array()):
                info = array.info()
                array.set_info(info.customized_copy(wavelength=wavelength))
              self._arrays.setdefault(key, array)
    for key, array in self._arrays.copy().iteritems():
      if (   key.endswith('_minus') or '_minus_' in key
          or key.endswith('_plus') or '_plus_' in key):
        if '_minus' in key:
          minus_key = key
          plus_key = key.replace('_minus', '_plus')
        elif '_plus' in key:
          plus_key = key
          minus_key = key.replace('_plus', '_minus')
        if plus_key in self._arrays and minus_key in self._arrays:
          plus_array = self._arrays.pop(plus_key)
          minus_array = self._arrays.pop(minus_key)
          minus_array = minus_array.customized_copy(
            indices=-minus_array.indices()).set_info(minus_array.info())
          array = plus_array.concatenate(
            minus_array, assert_is_similar_symmetry=False)
          array = array.customized_copy(anomalous_flag=True)
          array.set_info(minus_array.info().customized_copy(
            labels=list(
              OrderedSet(plus_array.info().labels+minus_array.info().labels))))
          array.set_observation_type(plus_array.observation_type())
          self._arrays.setdefault(key, array)

    if len(self._arrays) == 0:
      raise CifBuilderError("No reflection data present in cif block")
Exemple #36
0
 def sort(self, recursive=False, key=None, reverse=False):
     self.blocks = OrderedDict(
         sorted(self.blocks.items(), key=key, reverse=reverse))
     if recursive:
         for b in self.blocks.values():
             b.sort(recursive=recursive, reverse=reverse)